From 1a42152e6fb647674198f370d7efd8d36d86191d Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sun, 10 May 2026 23:12:26 +0200 Subject: [PATCH] Initial open-source release --- .github/workflows/ci.yml | 72 + .gitignore | 61 + AGENTS.md | 251 + CLAUDE.md | 1 + GEMINI.md | 1 + LICENSE | 202 + README.md | 270 + examples/README.md | 40 + examples/local-warehouse/README.md | 20 + examples/local-warehouse/klo.yaml | 25 + .../knowledge/global/revenue.md | 15 + .../semantic-layer/warehouse/orders.yaml | 18 + .../local-warehouse/source/orders/orders.json | 1 + .../orbit-relationship-verification/README.md | 33 + .../orbit-relationship-verification/klo.yaml | 28 + examples/package-artifacts/README.md | 17 + examples/postgres-historic/README.md | 115 + examples/postgres-historic/docker-compose.yml | 24 + .../postgres-historic/init/001-schema.sql | 51 + .../scripts/generate-workload.sh | 33 + examples/postgres-historic/scripts/smoke.sh | 152 + package.json | 54 + packages/cli/assets/demo/orbit/demo.db | Bin 0 -> 1175552 bytes .../knowledge/global/activation-policy.md | 18 + .../knowledge/global/arr-contract-first.md | 18 + .../global/customer-health-scoring.md | 20 + .../knowledge/global/discount-expiration.md | 19 + .../global/internal-test-exclusion.md | 16 + .../orbit/knowledge/global/nrr-retention.md | 19 + .../knowledge/global/procurement-workflows.md | 17 + .../knowledge/global/revenue-gross-to-net.md | 17 + .../global/segment-classification.md | 17 + .../knowledge/global/support-escalation.md | 17 + .../assets/demo/orbit/links/provenance.json | 209 + packages/cli/assets/demo/orbit/manifest.json | 58 + .../bi/account_retention.view.lkml | 67 + .../orbit/raw-sources/bi/arr_daily.view.lkml | 28 + .../raw-sources/bi/customer_health.view.lkml | 50 + .../bi/procurement_activity.view.lkml | 46 + .../bi/retention_exec_q1.dashboard.lookml | 28 + .../raw-sources/bi/revenue_daily.view.lkml | 52 + .../bi/revenue_exec.dashboard.lookml | 28 + .../orbit/raw-sources/dbt/dbt_project.yml | 10 + .../dbt/models/marts/mart_arr_daily.sql | 5 + .../dbt/models/marts/mart_customer_health.sql | 10 + .../dbt/models/marts/mart_revenue_daily.sql | 8 + .../demo/orbit/raw-sources/dbt/schema.yml | 455 + .../demo/orbit/raw-sources/dbt/sources.yml | 48 + .../activation-policy-decision-record.md | 49 + .../raw-sources/notion/analyst-onboarding.md | 35 + .../arr-and-contract-reporting-notes.md | 64 + .../notion/customer-health-playbook.md | 55 + .../retention-and-nrr-definition-notes.md | 58 + .../notion/revenue-reporting-policy.md | 64 + .../notion/sales-ops-segmentation-guide.md | 58 + .../notion/support-escalation-runbook.md | 38 + .../orbit/raw-sources/warehouse/accounts.csv | 211 + .../raw-sources/warehouse/arr_movements.csv | 721 + .../orbit/raw-sources/warehouse/contracts.csv | 321 + .../orbit/raw-sources/warehouse/invoices.csv | 3001 ++++ .../orbit/raw-sources/warehouse/plans.csv | 5 + .../warehouse/purchase_requests.csv | 5201 ++++++ .../raw-sources/warehouse/support_tickets.csv | 521 + .../orbit/raw-sources/warehouse/users.csv | 1261 ++ .../demo/orbit/replay.memory-flow.v1.json | 707 + .../orbit/reports/seeded-demo-report.json | 40 + .../semantic-layer/orbit_demo/accounts.yaml | 44 + .../orbit_demo/arr_movements.yaml | 38 + .../semantic-layer/orbit_demo/contracts.yaml | 39 + .../semantic-layer/orbit_demo/invoices.yaml | 33 + .../orbit_demo/purchase_requests.yaml | 33 + .../orbit_demo/support_tickets.yaml | 37 + packages/cli/package.json | 72 + packages/cli/scripts/build-demo-assets.mjs | 954 ++ packages/cli/src/agent-runtime.test.ts | 108 + packages/cli/src/agent-runtime.ts | 81 + .../cli/src/agent-search-readiness.test.ts | 51 + packages/cli/src/agent-search-readiness.ts | 94 + packages/cli/src/agent.test.ts | 393 + packages/cli/src/agent.ts | 214 + packages/cli/src/bin.ts | 9 + packages/cli/src/clack.ts | 11 + packages/cli/src/cli-program.ts | 268 + packages/cli/src/cli-runtime.ts | 89 + packages/cli/src/command-schemas.ts | 85 + packages/cli/src/commands/agent-commands.ts | 137 + .../cli/src/commands/completion-commands.ts | 47 + .../cli/src/commands/connection-commands.ts | 346 + .../src/commands/connection-mapping.test.ts | 329 + .../cli/src/commands/connection-mapping.ts | 426 + .../commands/connection-metabase-commands.ts | 132 + .../connection-metabase-setup.test.ts | 1136 ++ .../src/commands/connection-metabase-setup.ts | 782 + .../commands/connection-notion-commands.ts | 92 + .../commands/connection-notion-tree.test.ts | 283 + .../src/commands/connection-notion-tree.ts | 529 + .../commands/connection-notion-tui.test.tsx | 384 + .../src/commands/connection-notion-tui.tsx | 338 + .../src/commands/connection-notion.test.ts | 466 + .../cli/src/commands/connection-notion.ts | 278 + .../cli/src/commands/demo-commands.test.ts | 26 + packages/cli/src/commands/demo-commands.ts | 273 + packages/cli/src/commands/doctor-commands.ts | 53 + packages/cli/src/commands/ingest-commands.ts | 171 + .../cli/src/commands/knowledge-commands.ts | 90 + .../src/commands/public-ingest-commands.ts | 109 + packages/cli/src/commands/scan-commands.ts | 353 + packages/cli/src/commands/serve-commands.ts | 47 + packages/cli/src/commands/setup-commands.ts | 517 + packages/cli/src/commands/sl-commands.ts | 148 + packages/cli/src/commands/status-commands.ts | 23 + packages/cli/src/completion.ts | 353 + packages/cli/src/connection.test.ts | 649 + packages/cli/src/connection.ts | 415 + packages/cli/src/context-build-view.test.ts | 303 + packages/cli/src/context-build-view.ts | 414 + packages/cli/src/demo-assets.test.ts | 272 + packages/cli/src/demo-assets.ts | 281 + packages/cli/src/demo-full.test.ts | 201 + packages/cli/src/demo-full.ts | 213 + packages/cli/src/demo-interaction.test.ts | 127 + packages/cli/src/demo-interaction.ts | 202 + packages/cli/src/demo-metrics.test.ts | 137 + packages/cli/src/demo-metrics.ts | 174 + packages/cli/src/demo-progress.test.ts | 228 + packages/cli/src/demo-progress.ts | 77 + packages/cli/src/demo-replay-store.test.ts | 60 + packages/cli/src/demo-replay-store.ts | 68 + packages/cli/src/demo-scan.test.ts | 31 + packages/cli/src/demo-scan.ts | 223 + packages/cli/src/demo-seeded-inspect.test.ts | 123 + packages/cli/src/demo-seeded-inspect.ts | 299 + packages/cli/src/demo-seeded.test.ts | 117 + packages/cli/src/demo-seeded.ts | 41 + packages/cli/src/demo.test.ts | 751 + packages/cli/src/demo.ts | 544 + packages/cli/src/dev.test.ts | 670 + packages/cli/src/dev.ts | 61 + packages/cli/src/doctor.test.ts | 460 + packages/cli/src/doctor.ts | 488 + packages/cli/src/example-smoke.test.ts | 252 + packages/cli/src/historic-sql-doctor.test.ts | 173 + packages/cli/src/historic-sql-doctor.ts | 160 + packages/cli/src/index.test.ts | 1977 +++ packages/cli/src/index.ts | 53 + packages/cli/src/ingest-report-file.test.ts | 78 + packages/cli/src/ingest-report-file.ts | 20 + packages/cli/src/ingest.test.ts | 2275 +++ packages/cli/src/ingest.ts | 425 + packages/cli/src/io/mode.test.ts | 60 + packages/cli/src/io/mode.ts | 40 + packages/cli/src/io/print-list.test.ts | 171 + packages/cli/src/io/print-list.ts | 164 + packages/cli/src/io/symbols.ts | 37 + packages/cli/src/knowledge.test.ts | 95 + packages/cli/src/knowledge.ts | 90 + packages/cli/src/local-adapters.ts | 173 + .../cli/src/local-scan-connectors.test.ts | 163 + packages/cli/src/local-scan-connectors.ts | 84 + packages/cli/src/memory-flow-hud.tsx | 597 + .../cli/src/memory-flow-interactive.test.ts | 125 + packages/cli/src/memory-flow-interactive.ts | 143 + packages/cli/src/memory-flow-tui.test.tsx | 315 + packages/cli/src/memory-flow-tui.tsx | 552 + packages/cli/src/next-steps.test.ts | 129 + packages/cli/src/next-steps.ts | 104 + packages/cli/src/project-dir.test.ts | 172 + packages/cli/src/project-dir.ts | 5 + packages/cli/src/project-resolver.test.ts | 70 + packages/cli/src/project-resolver.ts | 56 + packages/cli/src/prompt-navigation.test.ts | 48 + packages/cli/src/prompt-navigation.ts | 45 + packages/cli/src/public-ingest.test.ts | 292 + packages/cli/src/public-ingest.ts | 315 + packages/cli/src/scan.test.ts | 2151 +++ packages/cli/src/scan.ts | 737 + packages/cli/src/serve.test.ts | 431 + packages/cli/src/serve.ts | 119 + packages/cli/src/setup-agents.test.ts | 176 + packages/cli/src/setup-agents.ts | 336 + packages/cli/src/setup-context.test.ts | 405 + packages/cli/src/setup-context.ts | 765 + packages/cli/src/setup-databases.test.ts | 1396 ++ packages/cli/src/setup-databases.ts | 1285 ++ packages/cli/src/setup-embeddings.test.ts | 381 + packages/cli/src/setup-embeddings.ts | 485 + packages/cli/src/setup-interrupt.test.ts | 90 + packages/cli/src/setup-interrupt.ts | 90 + packages/cli/src/setup-models.test.ts | 679 + packages/cli/src/setup-models.ts | 438 + packages/cli/src/setup-project.test.ts | 335 + packages/cli/src/setup-project.ts | 365 + packages/cli/src/setup-ready-menu.test.ts | 41 + packages/cli/src/setup-ready-menu.ts | 63 + packages/cli/src/setup-secrets.test.ts | 37 + packages/cli/src/setup-secrets.ts | 25 + packages/cli/src/setup-sources.test.ts | 790 + packages/cli/src/setup-sources.ts | 1185 ++ packages/cli/src/setup.test.ts | 1502 ++ packages/cli/src/setup.ts | 713 + packages/cli/src/sl.test.ts | 372 + packages/cli/src/sl.ts | 129 + packages/cli/src/standalone-smoke.test.ts | 926 ++ packages/cli/src/startup-profile.ts | 54 + packages/cli/src/viz-fallback.test.ts | 120 + packages/cli/src/viz-fallback.ts | 93 + packages/cli/tsconfig.json | 10 + packages/cli/vitest.config.ts | 15 + packages/connector-bigquery/package.json | 47 + .../connector-bigquery/src/connector.test.ts | 307 + packages/connector-bigquery/src/connector.ts | 492 + .../connector-bigquery/src/dialect.test.ts | 52 + packages/connector-bigquery/src/dialect.ts | 207 + packages/connector-bigquery/src/index.ts | 18 + .../src/live-database-introspection.ts | 34 + .../src/package-exports.test.ts | 11 + packages/connector-bigquery/tsconfig.json | 9 + packages/connector-clickhouse/package.json | 47 + .../src/connector.test.ts | 296 + .../connector-clickhouse/src/connector.ts | 525 + .../connector-clickhouse/src/dialect.test.ts | 49 + packages/connector-clickhouse/src/dialect.ts | 279 + packages/connector-clickhouse/src/index.ts | 16 + .../src/live-database-introspection.ts | 40 + .../src/package-exports.test.ts | 12 + packages/connector-clickhouse/tsconfig.json | 9 + packages/connector-mysql/package.json | 47 + .../connector-mysql/src/connector.test.ts | 292 + packages/connector-mysql/src/connector.ts | 578 + packages/connector-mysql/src/dialect.test.ts | 49 + packages/connector-mysql/src/dialect.ts | 202 + packages/connector-mysql/src/index.ts | 15 + .../src/live-database-introspection.ts | 37 + .../src/package-exports.test.ts | 13 + packages/connector-mysql/tsconfig.json | 9 + packages/connector-postgres/package.json | 48 + .../connector-postgres/src/connector.test.ts | 342 + packages/connector-postgres/src/connector.ts | 707 + .../connector-postgres/src/dialect.test.ts | 52 + packages/connector-postgres/src/dialect.ts | 213 + .../src/historic-sql-query-client.test.ts | 50 + .../src/historic-sql-query-client.ts | 37 + packages/connector-postgres/src/index.ts | 21 + .../src/live-database-introspection.ts | 37 + .../src/package-exports.test.ts | 13 + packages/connector-postgres/tsconfig.json | 9 + packages/connector-posthog/package.json | 46 + .../connector-posthog/src/connector.test.ts | 400 + packages/connector-posthog/src/connector.ts | 609 + .../connector-posthog/src/dialect.test.ts | 48 + packages/connector-posthog/src/dialect.ts | 258 + packages/connector-posthog/src/index.ts | 19 + .../src/live-database-introspection.ts | 34 + .../src/package-exports.test.ts | 11 + .../src/schema-descriptions.ts | 99 + packages/connector-posthog/tsconfig.json | 9 + packages/connector-snowflake/package.json | 47 + .../connector-snowflake/src/connector.test.ts | 257 + packages/connector-snowflake/src/connector.ts | 689 + .../connector-snowflake/src/dialect.test.ts | 50 + packages/connector-snowflake/src/dialect.ts | 187 + packages/connector-snowflake/src/index.ts | 18 + .../src/live-database-introspection.ts | 40 + .../src/package-exports.test.ts | 11 + packages/connector-snowflake/tsconfig.json | 9 + packages/connector-sqlite/package.json | 48 + .../connector-sqlite/src/connector.test.ts | 255 + packages/connector-sqlite/src/connector.ts | 371 + packages/connector-sqlite/src/dialect.test.ts | 33 + packages/connector-sqlite/src/dialect.ts | 177 + packages/connector-sqlite/src/index.ts | 16 + .../src/live-database-introspection.ts | 30 + .../src/package-exports.test.ts | 13 + packages/connector-sqlite/tsconfig.json | 9 + packages/connector-sqlserver/package.json | 48 + .../connector-sqlserver/src/connector.test.ts | 358 + packages/connector-sqlserver/src/connector.ts | 701 + .../connector-sqlserver/src/dialect.test.ts | 49 + packages/connector-sqlserver/src/dialect.ts | 201 + packages/connector-sqlserver/src/index.ts | 17 + .../src/live-database-introspection.ts | 40 + .../src/package-exports.test.ts | 12 + packages/connector-sqlserver/tsconfig.json | 9 + packages/context/package.json | 166 + .../context/prompts/memory_agent_backfill.md | 21 + .../memory_agent_bundle_ingest_reconcile.md | 27 + .../memory_agent_bundle_ingest_work_unit.md | 28 + .../prompts/memory_agent_external_ingest.md | 28 + .../context/prompts/memory_agent_research.md | 30 + .../prompts/skills/light_extraction.md | 40 + .../prompts/skills/page_triage_classifier.md | 102 + .../scripts/pglite-hybrid-search-spike.mjs | 354 + .../pglite-owner-process-prototype.mjs | 317 + .../scripts/pglite-sl-search-prototype.mjs | 263 + .../scripts/relationship-benchmark-report.mjs | 52 + packages/context/skills/dbt_ingest/SKILL.md | 34 + .../skills/historic_sql_curator/SKILL.md | 153 + .../skills/historic_sql_ingest/SKILL.md | 170 + .../context/skills/ingest_triage/SKILL.md | 77 + .../context/skills/knowledge_capture/SKILL.md | 124 + .../skills/live_database_ingest/SKILL.md | 58 + .../context/skills/looker_ingest/SKILL.md | 217 + .../context/skills/lookml_ingest/SKILL.md | 180 + .../context/skills/metabase_ingest/SKILL.md | 218 + .../context/skills/metricflow_ingest/SKILL.md | 274 + .../context/skills/notion_synthesize/SKILL.md | 69 + packages/context/skills/sl/SKILL.md | 240 + packages/context/skills/sl_capture/SKILL.md | 276 + .../src/agent/agent-runner.service.test.ts | 330 + .../context/src/agent/agent-runner.service.ts | 101 + packages/context/src/agent/index.ts | 9 + .../src/connections/connection-type.ts | 28 + packages/context/src/connections/index.ts | 27 + .../connections/local-query-executor.test.ts | 59 + .../src/connections/local-query-executor.ts | 34 + .../local-warehouse-descriptor.test.ts | 63 + .../connections/local-warehouse-descriptor.ts | 102 + .../src/connections/notion-config.test.ts | 120 + .../context/src/connections/notion-config.ts | 196 + .../postgres-query-executor.test.ts | 111 + .../connections/postgres-query-executor.ts | 80 + .../context/src/connections/query-executor.ts | 25 + .../src/connections/read-only-sql.test.ts | 30 + .../context/src/connections/read-only-sql.ts | 22 + .../connections/sqlite-query-executor.test.ts | 148 + .../src/connections/sqlite-query-executor.ts | 94 + .../context/src/core/config-reference.test.ts | 34 + packages/context/src/core/config-reference.ts | 36 + packages/context/src/core/config.ts | 42 + packages/context/src/core/embedding.ts | 5 + packages/context/src/core/file-store.ts | 43 + packages/context/src/core/git-env.ts | 29 + .../git.service.assert-worktree-clean.test.ts | 75 + .../git.service.delete-directories.test.ts | 78 + .../src/core/git.service.reset-hard.test.ts | 56 + packages/context/src/core/git.service.test.ts | 358 + packages/context/src/core/git.service.ts | 855 + packages/context/src/core/index.ts | 27 + packages/context/src/core/redaction.ts | 47 + .../src/core/session-worktree.service.test.ts | 124 + .../src/core/session-worktree.service.ts | 113 + packages/context/src/daemon/index.ts | 1 + .../src/daemon/semantic-layer-compute.test.ts | 339 + .../src/daemon/semantic-layer-compute.ts | 304 + packages/context/src/index.test.ts | 12 + packages/context/src/index.ts | 144 + .../src/ingest/action-identity.test.ts | 42 + .../context/src/ingest/action-identity.ts | 9 + .../dbt-descriptions/match-tables.test.ts | 75 + .../adapters/dbt-descriptions/match-tables.ts | 127 + .../merge-semantic-model-tables.test.ts | 62 + .../merge-semantic-model-tables.ts | 37 + .../dbt-descriptions/parse-schema.test.ts | 214 + .../adapters/dbt-descriptions/parse-schema.ts | 655 + .../to-description-updates.test.ts | 102 + .../to-description-updates.ts | 70 + .../to-metadata-updates.test.ts | 70 + .../dbt-descriptions/to-metadata-updates.ts | 74 + .../to-relationship-updates.test.ts | 62 + .../to-relationship-updates.ts | 57 + .../dbt-extraction-golden-parity.test.ts | 410 + .../src/ingest/adapters/dbt/chunk.test.ts | 36 + .../context/src/ingest/adapters/dbt/chunk.ts | 130 + .../ingest/adapters/dbt/dbt.adapter.test.ts | 51 + .../src/ingest/adapters/dbt/dbt.adapter.ts | 48 + .../context/src/ingest/adapters/dbt/detect.ts | 12 + .../src/ingest/adapters/dbt/fetch.test.ts | 38 + .../context/src/ingest/adapters/dbt/fetch.ts | 60 + .../src/ingest/adapters/dbt/parse.test.ts | 8 + .../context/src/ingest/adapters/dbt/parse.ts | 32 + .../src/ingest/adapters/fake/fake.adapter.ts | 48 + .../postgres/eviction-churn/input.json | 146 + .../postgres/first-run/input.json | 144 + .../postgres/normal-delta/input.json | 181 + .../postgres/reset-detected/input.json | 159 + .../postgres/version-change/input.json | 159 + .../bigquery-query-history-reader.test.ts | 200 + .../bigquery-query-history-reader.ts | 219 + .../adapters/historic-sql/chunk.test.ts | 251 + .../src/ingest/adapters/historic-sql/chunk.ts | 86 + .../adapters/historic-sql/detect.test.ts | 197 + .../ingest/adapters/historic-sql/detect.ts | 37 + .../ingest/adapters/historic-sql/errors.ts | 61 + .../historic-sql/historic-sql.adapter.test.ts | 304 + .../historic-sql/historic-sql.adapter.ts | 135 + ...postgres-pgss-query-history-reader.test.ts | 281 + .../postgres-pgss-query-history-reader.ts | 262 + .../snowflake-query-history-reader.test.ts | 193 + .../snowflake-query-history-reader.ts | 203 + .../historic-sql/stage-pgss-golden.test.ts | 152 + .../adapters/historic-sql/stage-pgss.test.ts | 652 + .../adapters/historic-sql/stage-pgss.ts | 508 + .../adapters/historic-sql/stage.test.ts | 798 + .../src/ingest/adapters/historic-sql/stage.ts | 630 + .../src/ingest/adapters/historic-sql/types.ts | 201 + .../adapters/live-database/chunk.test.ts | 107 + .../ingest/adapters/live-database/chunk.ts | 58 + .../daemon-introspection.test.ts | 224 + .../live-database/daemon-introspection.ts | 256 + .../live-database/extracted-schema.test.ts | 136 + .../live-database/extracted-schema.ts | 61 + .../live-database.adapter.test.ts | 59 + .../live-database/live-database.adapter.ts | 28 + .../adapters/live-database/manifest.test.ts | 252 + .../ingest/adapters/live-database/manifest.ts | 270 + .../adapters/live-database/stage.test.ts | 152 + .../ingest/adapters/live-database/stage.ts | 138 + .../live-database/structural-sync.test.ts | 428 + .../adapters/live-database/structural-sync.ts | 525 + .../ingest/adapters/live-database/types.ts | 10 + .../src/ingest/adapters/looker/chunk.test.ts | 154 + .../src/ingest/adapters/looker/chunk.ts | 198 + .../adapters/looker/client-boundary.test.ts | 14 + .../src/ingest/adapters/looker/client.test.ts | 455 + .../src/ingest/adapters/looker/client.ts | 732 + .../daemon-table-identifier-parser.test.ts | 44 + .../looker/daemon-table-identifier-parser.ts | 81 + .../src/ingest/adapters/looker/detect.test.ts | 47 + .../src/ingest/adapters/looker/detect.ts | 28 + .../looker/evidence-documents.test.ts | 188 + .../adapters/looker/evidence-documents.ts | 378 + .../ingest/adapters/looker/factory.test.ts | 74 + .../src/ingest/adapters/looker/factory.ts | 32 + .../adapters/looker/fetch-report.test.ts | 77 + .../ingest/adapters/looker/fetch-report.ts | 22 + .../src/ingest/adapters/looker/fetch.test.ts | 645 + .../src/ingest/adapters/looker/fetch.ts | 555 + .../adapters/looker/local-looker.adapter.ts | 67 + .../looker/local-runtime-store.test.ts | 116 + .../adapters/looker/local-runtime-store.ts | 280 + .../adapters/looker/looker.adapter.test.ts | 125 + .../ingest/adapters/looker/looker.adapter.ts | 70 + .../ingest/adapters/looker/mapping.test.ts | 384 + .../src/ingest/adapters/looker/mapping.ts | 442 + .../ingest/adapters/looker/reconcile.test.ts | 25 + .../src/ingest/adapters/looker/reconcile.ts | 21 + .../src/ingest/adapters/looker/scope.test.ts | 101 + .../src/ingest/adapters/looker/scope.ts | 63 + .../looker/target-connections.test.ts | 86 + .../adapters/looker/target-connections.ts | 41 + .../tools/looker-query-to-sl.tool.test.ts | 243 + .../looker/tools/looker-query-to-sl.tool.ts | 305 + .../src/ingest/adapters/looker/types.test.ts | 329 + .../src/ingest/adapters/looker/types.ts | 255 + .../src/ingest/adapters/lookml/chunk.test.ts | 230 + .../src/ingest/adapters/lookml/chunk.ts | 159 + .../src/ingest/adapters/lookml/detect.test.ts | 46 + .../src/ingest/adapters/lookml/detect.ts | 13 + .../adapters/lookml/fetch-report.test.ts | 113 + .../ingest/adapters/lookml/fetch-report.ts | 125 + .../src/ingest/adapters/lookml/fetch.test.ts | 146 + .../src/ingest/adapters/lookml/fetch.ts | 75 + .../src/ingest/adapters/lookml/graph.test.ts | 118 + .../src/ingest/adapters/lookml/graph.ts | 114 + .../ingest/adapters/lookml/lookml-parser.d.ts | 43 + .../adapters/lookml/lookml.adapter.test.ts | 60 + .../ingest/adapters/lookml/lookml.adapter.ts | 55 + .../src/ingest/adapters/lookml/parse.test.ts | 166 + .../src/ingest/adapters/lookml/parse.ts | 202 + .../adapters/lookml/pull-config.test.ts | 140 + .../src/ingest/adapters/lookml/pull-config.ts | 39 + .../adapters/metabase/card-references.test.ts | 44 + .../adapters/metabase/card-references.ts | 47 + .../ingest/adapters/metabase/chunk.test.ts | 307 + .../src/ingest/adapters/metabase/chunk.ts | 243 + .../adapters/metabase/client-boundary.test.ts | 43 + .../adapters/metabase/client-port.test.ts | 104 + .../ingest/adapters/metabase/client-port.ts | 196 + .../ingest/adapters/metabase/client.test.ts | 377 + .../src/ingest/adapters/metabase/client.ts | 783 + .../ingest/adapters/metabase/detect.test.ts | 49 + .../src/ingest/adapters/metabase/detect.ts | 19 + .../adapters/metabase/fanout-planner.test.ts | 52 + .../adapters/metabase/fanout-planner.ts | 49 + .../adapters/metabase/fetch-scope.test.ts | 144 + .../ingest/adapters/metabase/fetch-scope.ts | 82 + .../ingest/adapters/metabase/fetch.test.ts | 515 + .../src/ingest/adapters/metabase/fetch.ts | 315 + .../metabase/local-metabase.adapter.test.ts | 59 + .../metabase/local-metabase.adapter.ts | 80 + .../metabase/local-source-state-store.test.ts | 314 + .../metabase/local-source-state-store.ts | 560 + .../ingest/adapters/metabase/mapping.test.ts | 295 + .../src/ingest/adapters/metabase/mapping.ts | 344 + .../metabase/metabase.adapter.test.ts | 153 + .../adapters/metabase/metabase.adapter.ts | 51 + .../adapters/metabase/serialize-card.test.ts | 222 + .../adapters/metabase/serialize-card.ts | 127 + .../adapters/metabase/source-state-port.ts | 25 + .../ingest/adapters/metabase/types.test.ts | 87 + .../src/ingest/adapters/metabase/types.ts | 137 + .../ingest/adapters/metricflow/chunk.test.ts | 124 + .../src/ingest/adapters/metricflow/chunk.ts | 93 + .../adapters/metricflow/deep-parse.test.ts | 1304 ++ .../ingest/adapters/metricflow/deep-parse.ts | 700 + .../ingest/adapters/metricflow/detect.test.ts | 51 + .../src/ingest/adapters/metricflow/detect.ts | 34 + .../ingest/adapters/metricflow/fetch.test.ts | 110 + .../src/ingest/adapters/metricflow/fetch.ts | 67 + .../ingest/adapters/metricflow/graph.test.ts | 268 + .../src/ingest/adapters/metricflow/graph.ts | 205 + .../metricflow/import-semantic-models.test.ts | 382 + .../metricflow/import-semantic-models.ts | 293 + .../metricflow/metricflow.adapter.test.ts | 121 + .../adapters/metricflow/metricflow.adapter.ts | 47 + .../ingest/adapters/metricflow/parse.test.ts | 206 + .../src/ingest/adapters/metricflow/parse.ts | 241 + .../adapters/metricflow/pull-config.test.ts | 68 + .../ingest/adapters/metricflow/pull-config.ts | 37 + .../metricflow/semantic-models.test.ts | 258 + .../adapters/metricflow/semantic-models.ts | 387 + .../src/ingest/adapters/notion/chunk.ts | 153 + .../ingest/adapters/notion/cluster.test.ts | 119 + .../src/ingest/adapters/notion/cluster.ts | 90 + .../src/ingest/adapters/notion/detect.ts | 20 + .../src/ingest/adapters/notion/fetch.test.ts | 395 + .../src/ingest/adapters/notion/fetch.ts | 653 + .../ingest/adapters/notion/normalize.test.ts | 73 + .../src/ingest/adapters/notion/normalize.ts | 185 + .../adapters/notion/notion-client.test.ts | 65 + .../ingest/adapters/notion/notion-client.ts | 205 + .../adapters/notion/notion.adapter.test.ts | 350 + .../ingest/adapters/notion/notion.adapter.ts | 160 + .../src/ingest/adapters/notion/pull-config.ts | 5 + .../src/ingest/adapters/notion/types.ts | 84 + .../context/src/ingest/canonical-pins.test.ts | 76 + packages/context/src/ingest/canonical-pins.ts | 66 + .../src/ingest/clustering/kmeans.test.ts | 67 + .../context/src/ingest/clustering/kmeans.ts | 114 + .../candidate-dedup.service.test.ts | 268 + .../candidate-dedup.service.ts | 315 + ...ext-candidate-carryforward.service.test.ts | 183 + .../context-candidate-carryforward.service.ts | 195 + .../curator-pagination.service.test.ts | 196 + .../curator-pagination.service.ts | 333 + .../context-candidates/embedding-text.test.ts | 13 + .../context-candidates/embedding-text.ts | 8 + .../src/ingest/context-candidates/index.ts | 29 + .../ingest/context-candidates/store.test.ts | 76 + .../src/ingest/context-candidates/store.ts | 30 + .../src/ingest/context-candidates/types.ts | 121 + .../context-evidence-index.service.test.ts | 479 + .../context-evidence-index.service.ts | 447 + .../src/ingest/context-evidence/index.ts | 12 + .../sqlite-context-evidence-store.test.ts | 490 + .../sqlite-context-evidence-store.ts | 1418 ++ .../src/ingest/context-evidence/store.test.ts | 66 + .../src/ingest/context-evidence/store.ts | 17 + .../src/ingest/context-evidence/types.ts | 55 + .../ingest/dbt-shared/project-vars.test.ts | 118 + .../src/ingest/dbt-shared/project-vars.ts | 121 + .../ingest/dbt-shared/schema-files.test.ts | 41 + .../src/ingest/dbt-shared/schema-files.ts | 76 + .../src/ingest/diff-set.service.test.ts | 163 + .../context/src/ingest/diff-set.service.ts | 54 + packages/context/src/ingest/git-env.ts | 20 + packages/context/src/ingest/index.ts | 640 + .../src/ingest/ingest-bundle.runner.test.ts | 1853 +++ .../src/ingest/ingest-bundle.runner.ts | 1381 ++ .../context/src/ingest/ingest-prompts.test.ts | 76 + .../src/ingest/ingest-runtime-assets.test.ts | 132 + .../context/src/ingest/local-adapters.test.ts | 444 + packages/context/src/ingest/local-adapters.ts | 256 + .../src/ingest/local-bundle-ingest.test.ts | 608 + .../src/ingest/local-bundle-runtime.test.ts | 144 + .../src/ingest/local-bundle-runtime.ts | 594 + ...cal-embedding-provider.integration.test.ts | 162 + packages/context/src/ingest/local-ingest.ts | 459 + .../ingest/local-mapping-reconcile.test.ts | 79 + .../src/ingest/local-mapping-reconcile.ts | 65 + .../src/ingest/local-metabase-ingest.test.ts | 328 + .../src/ingest/local-stage-ingest.test.ts | 706 + .../context/src/ingest/local-stage-ingest.ts | 411 + .../ingest/memory-flow/acceptance-fixtures.ts | 168 + .../src/ingest/memory-flow/acceptance.test.ts | 62 + .../src/ingest/memory-flow/events.test.ts | 332 + .../context/src/ingest/memory-flow/events.ts | 247 + .../context/src/ingest/memory-flow/index.ts | 17 + .../ingest/memory-flow/interaction.test.ts | 326 + .../src/ingest/memory-flow/interaction.ts | 450 + .../memory-flow/interactive-render.test.ts | 177 + .../ingest/memory-flow/interactive-render.ts | 160 + .../ingest/memory-flow/live-buffer.test.ts | 91 + .../src/ingest/memory-flow/live-buffer.ts | 74 + .../ingest/memory-flow/package-export.test.ts | 11 + .../src/ingest/memory-flow/render.test.ts | 114 + .../context/src/ingest/memory-flow/render.ts | 99 + .../src/ingest/memory-flow/schema.test.ts | 164 + .../context/src/ingest/memory-flow/schema.ts | 171 + .../src/ingest/memory-flow/summary.test.ts | 125 + .../context/src/ingest/memory-flow/summary.ts | 93 + .../context/src/ingest/memory-flow/types.ts | 246 + .../src/ingest/memory-flow/view-model.test.ts | 436 + .../src/ingest/memory-flow/view-model.ts | 523 + .../src/ingest/memory-flow/visuals.test.ts | 70 + .../context/src/ingest/memory-flow/visuals.ts | 78 + .../context/src/ingest/metabase-mapping.ts | 23 + .../context/src/ingest/page-triage/index.ts | 9 + .../page-triage/page-triage.service.test.ts | 569 + .../ingest/page-triage/page-triage.service.ts | 481 + .../context/src/ingest/parsed-target-table.ts | 28 + packages/context/src/ingest/ports.ts | 386 + .../src/ingest/raw-sources-paths.test.ts | 24 + .../context/src/ingest/raw-sources-paths.ts | 19 + .../context/src/ingest/repo-fetch.test.ts | 227 + packages/context/src/ingest/repo-fetch.ts | 155 + .../src/ingest/report-snapshot.test.ts | 219 + .../context/src/ingest/report-snapshot.ts | 194 + packages/context/src/ingest/reports.ts | 101 + .../ingest/source-adapter-registry.test.ts | 41 + .../src/ingest/source-adapter-registry.ts | 29 + .../ingest/sqlite-bundle-ingest-store.test.ts | 373 + .../src/ingest/sqlite-bundle-ingest-store.ts | 598 + .../ingest/sqlite-local-ingest-store.test.ts | 156 + .../src/ingest/sqlite-local-ingest-store.ts | 233 + ...concile-context.context-candidates.test.ts | 124 + .../stages/build-reconcile-context.test.ts | 83 + .../ingest/stages/build-reconcile-context.ts | 192 + .../ingest/stages/build-wu-context.test.ts | 188 + .../src/ingest/stages/build-wu-context.ts | 114 + .../stages/stage-1-stage-raw-files.test.ts | 59 + .../ingest/stages/stage-1-stage-raw-files.ts | 39 + .../ingest/stages/stage-3-work-units.test.ts | 150 + .../src/ingest/stages/stage-3-work-units.ts | 152 + .../stages/stage-4-reconciliation.test.ts | 149 + .../ingest/stages/stage-4-reconciliation.ts | 45 + .../src/ingest/stages/stage-index.types.ts | 65 + .../ingest/stages/validate-wu-sources.test.ts | 35 + .../src/ingest/stages/validate-wu-sources.ts | 24 + .../tools/emit-artifact-resolution.tool.ts | 53 + .../tools/emit-conflict-resolution.tool.ts | 38 + .../tools/emit-eviction-decision.tool.ts | 51 + .../emit-reconciliation-records.tool.test.ts | 228 + .../tools/emit-unmapped-fallback.tool.ts | 52 + .../ingest/tools/eviction-list.tool.test.ts | 56 + .../src/ingest/tools/eviction-list.tool.ts | 39 + .../ingest/tools/read-raw-file.tool.test.ts | 69 + .../src/ingest/tools/read-raw-file.tool.ts | 41 + .../ingest/tools/read-raw-span.tool.test.ts | 53 + .../src/ingest/tools/read-raw-span.tool.ts | 46 + .../src/ingest/tools/stage-diff.tool.test.ts | 131 + .../src/ingest/tools/stage-diff.tool.ts | 44 + .../src/ingest/tools/stage-list.tool.test.ts | 58 + .../src/ingest/tools/stage-list.tool.ts | 30 + .../src/ingest/tools/tool-call-logger.ts | 106 + packages/context/src/ingest/types.ts | 163 + .../src/llm/debug-request-recorder.test.ts | 123 + .../context/src/llm/debug-request-recorder.ts | 131 + .../context/src/llm/embedding-port.test.ts | 38 + packages/context/src/llm/embedding-port.ts | 39 + packages/context/src/llm/generation.ts | 63 + packages/context/src/llm/index.ts | 18 + packages/context/src/llm/local-config.test.ts | 127 + packages/context/src/llm/local-config.ts | 122 + packages/context/src/mcp/context-tools.ts | 509 + packages/context/src/mcp/index.ts | 33 + .../src/mcp/local-project-ports.test.ts | 1044 ++ .../context/src/mcp/local-project-ports.ts | 683 + packages/context/src/mcp/server.test.ts | 869 + packages/context/src/mcp/server.ts | 94 + packages/context/src/mcp/types.ts | 326 + .../context/src/memory/capture-signals.ts | 128 + packages/context/src/memory/index.ts | 41 + .../context/src/memory/local-memory-runs.ts | 211 + .../context/src/memory/local-memory.test.ts | 204 + packages/context/src/memory/local-memory.ts | 482 + .../memory-agent.service.ingest.test.ts | 375 + .../src/memory/memory-agent.service.test.ts | 475 + .../src/memory/memory-agent.service.ts | 658 + .../context/src/memory/memory-runs.test.ts | 198 + packages/context/src/memory/memory-runs.ts | 133 + .../src/memory/memory-runtime-assets.test.ts | 100 + packages/context/src/memory/types.ts | 157 + packages/context/src/package-exports.test.ts | 253 + packages/context/src/project/config.test.ts | 391 + packages/context/src/project/config.ts | 551 + packages/context/src/project/index.ts | 33 + .../src/project/local-git-file-store.test.ts | 101 + .../src/project/local-git-file-store.ts | 190 + .../context/src/project/local-state-db.ts | 6 + .../src/project/mappings-yaml-schema.test.ts | 85 + .../src/project/mappings-yaml-schema.ts | 135 + packages/context/src/project/project.test.ts | 78 + packages/context/src/project/project.ts | 143 + .../context/src/project/setup-config.test.ts | 76 + packages/context/src/project/setup-config.ts | 55 + packages/context/src/prompts/index.ts | 2 + .../src/prompts/prompt.service.test.ts | 54 + .../context/src/prompts/prompt.service.ts | 108 + packages/context/src/scan/credentials.test.ts | 183 + packages/context/src/scan/credentials.ts | 50 + .../context/src/scan/data-dictionary.test.ts | 114 + packages/context/src/scan/data-dictionary.ts | 109 + .../src/scan/description-generation.test.ts | 318 + .../src/scan/description-generation.ts | 582 + .../context/src/scan/embedding-text.test.ts | 47 + packages/context/src/scan/embedding-text.ts | 45 + .../context/src/scan/enrichment-state.test.ts | 175 + packages/context/src/scan/enrichment-state.ts | 108 + .../src/scan/enrichment-summary.test.ts | 42 + .../context/src/scan/enrichment-summary.ts | 52 + .../context/src/scan/enrichment-types.test.ts | 159 + packages/context/src/scan/enrichment-types.ts | 130 + packages/context/src/scan/index.ts | 400 + .../scan/local-enrichment-artifacts.test.ts | 852 + .../src/scan/local-enrichment-artifacts.ts | 417 + .../context/src/scan/local-enrichment.test.ts | 742 + packages/context/src/scan/local-enrichment.ts | 659 + packages/context/src/scan/local-scan.test.ts | 1494 ++ packages/context/src/scan/local-scan.ts | 516 + .../scan/local-structural-artifacts.test.ts | 196 + .../src/scan/local-structural-artifacts.ts | 125 + .../context/src/scan/orchestrator.test.ts | 376 + packages/context/src/scan/orchestrator.ts | 297 + .../src/scan/relationship-artifacts.test.ts | 310 + .../src/scan/relationship-artifacts.ts | 75 + .../relationship-benchmark-report.test.ts | 451 + .../src/scan/relationship-benchmark-report.ts | 363 + .../src/scan/relationship-benchmarks.test.ts | 1269 ++ .../src/scan/relationship-benchmarks.ts | 902 ++ .../src/scan/relationship-budget.test.ts | 86 + .../context/src/scan/relationship-budget.ts | 60 + .../src/scan/relationship-candidates.test.ts | 881 + .../src/scan/relationship-candidates.ts | 756 + .../relationship-composite-candidates.test.ts | 84 + .../scan/relationship-composite-candidates.ts | 622 + .../src/scan/relationship-diagnostics.test.ts | 373 + .../src/scan/relationship-diagnostics.ts | 364 + .../src/scan/relationship-discovery.test.ts | 699 + .../src/scan/relationship-discovery.ts | 338 + .../relationship-feedback-calibration.test.ts | 211 + .../scan/relationship-feedback-calibration.ts | 300 + .../scan/relationship-feedback-export.test.ts | 270 + .../src/scan/relationship-feedback-export.ts | 179 + .../scan/relationship-formal-metadata.test.ts | 134 + .../src/scan/relationship-formal-metadata.ts | 61 + .../scan/relationship-graph-resolver.test.ts | 649 + .../src/scan/relationship-graph-resolver.ts | 508 + .../scan/relationship-llm-proposal.test.ts | 240 + .../src/scan/relationship-llm-proposal.ts | 281 + .../src/scan/relationship-locality.test.ts | 151 + .../context/src/scan/relationship-locality.ts | 164 + .../scan/relationship-name-similarity.test.ts | 81 + .../src/scan/relationship-name-similarity.ts | 151 + .../src/scan/relationship-profiling.test.ts | 354 + .../src/scan/relationship-profiling.ts | 467 + .../scan/relationship-review-apply.test.ts | 352 + .../src/scan/relationship-review-apply.ts | 231 + .../relationship-review-decisions.test.ts | 365 + .../src/scan/relationship-review-decisions.ts | 182 + .../src/scan/relationship-scoring.test.ts | 108 + .../context/src/scan/relationship-scoring.ts | 155 + .../relationship-threshold-advice.test.ts | 241 + .../src/scan/relationship-threshold-advice.ts | 335 + .../src/scan/relationship-validation.test.ts | 492 + .../src/scan/relationship-validation.ts | 370 + .../sqlite-local-enrichment-state-store.ts | 237 + .../src/scan/type-normalization.test.ts | 24 + .../context/src/scan/type-normalization.ts | 32 + packages/context/src/scan/types.test.ts | 258 + packages/context/src/scan/types.ts | 391 + .../src/search/backend-conformance.test.ts | 472 + .../context/src/search/backend-conformance.ts | 151 + .../src/search/hybrid-search-core.test.ts | 127 + .../context/src/search/hybrid-search-core.ts | 141 + packages/context/src/search/index.ts | 35 + .../src/search/pglite-owner-process.test.ts | 331 + .../src/search/pglite-owner-process.ts | 114 + .../search/pglite-runtime-boundary.test.ts | 66 + .../context/src/search/pglite-spike.test.ts | 302 + packages/context/src/search/query.test.ts | 26 + packages/context/src/search/query.ts | 19 + packages/context/src/search/rrf.test.ts | 52 + packages/context/src/search/rrf.ts | 18 + packages/context/src/search/types.ts | 85 + packages/context/src/skills/index.ts | 2 + .../skills/skills-registry.service.test.ts | 212 + .../src/skills/skills-registry.service.ts | 255 + packages/context/src/sl/descriptions.ts | 34 + packages/context/src/sl/index.ts | 32 + packages/context/src/sl/local-query.test.ts | 260 + packages/context/src/sl/local-query.ts | 150 + packages/context/src/sl/local-sl.test.ts | 321 + packages/context/src/sl/local-sl.ts | 595 + .../src/sl/pglite-sl-search-prototype.test.ts | 268 + .../src/sl/pglite-sl-search-prototype.ts | 569 + packages/context/src/sl/ports.ts | 53 + packages/context/src/sl/schemas.ts | 149 + .../src/sl/semantic-layer.service.test.ts | 678 + .../context/src/sl/semantic-layer.service.ts | 1130 ++ .../src/sl/sl-dictionary-profile.test.ts | 115 + .../context/src/sl/sl-dictionary-profile.ts | 120 + .../context/src/sl/sl-search.service.test.ts | 165 + packages/context/src/sl/sl-search.service.ts | 168 + packages/context/src/sl/sl-validator.port.ts | 8 + .../src/sl/sqlite-sl-sources-index.test.ts | 164 + .../context/src/sl/sqlite-sl-sources-index.ts | 549 + .../src/sl/tools/base-semantic-layer.tool.ts | 154 + .../src/sl/tools/connection-id-schema.test.ts | 18 + .../src/sl/tools/connection-id-schema.ts | 6 + packages/context/src/sl/tools/index.ts | 11 + .../context/src/sl/tools/sl-discover.tool.ts | 337 + .../src/sl/tools/sl-edit-source.tool.test.ts | 187 + .../src/sl/tools/sl-edit-source.tool.ts | 200 + .../tools/sl-read-source.tool.session.test.ts | 75 + .../src/sl/tools/sl-read-source.tool.ts | 63 + .../src/sl/tools/sl-rollback.tool.test.ts | 67 + .../context/src/sl/tools/sl-rollback.tool.ts | 87 + .../src/sl/tools/sl-validate.tool.test.ts | 66 + .../context/src/sl/tools/sl-validate.tool.ts | 130 + .../sl/tools/sl-warehouse-validation.test.ts | 120 + .../src/sl/tools/sl-warehouse-validation.ts | 325 + .../src/sl/tools/sl-write-source.tool.test.ts | 267 + .../src/sl/tools/sl-write-source.tool.ts | 380 + packages/context/src/sl/types.ts | 88 + .../http-sql-analysis-port.test.ts | 61 + .../sql-analysis/http-sql-analysis-port.ts | 159 + packages/context/src/sql-analysis/index.ts | 9 + packages/context/src/sql-analysis/ports.ts | 30 + .../context/src/test/make-local-git-repo.ts | 45 + packages/context/src/tools/authors.ts | 13 + packages/context/src/tools/base-tool.ts | 174 + .../src/tools/context-candidate-mark.tool.ts | 64 + .../src/tools/context-candidate-write.tool.ts | 179 + .../context/src/tools/context-evidence-ids.ts | 16 + .../tools/context-evidence-neighbors.tool.ts | 99 + .../src/tools/context-evidence-read.tool.ts | 153 + .../src/tools/context-evidence-search.tool.ts | 142 + .../src/tools/context-evidence-tool-store.ts | 145 + .../src/tools/context-evidence-tools.test.ts | 598 + .../src/tools/context-ingest-metadata.ts | 23 + packages/context/src/tools/index.ts | 43 + .../context/src/tools/sql-edit-replacer.ts | 229 + packages/context/src/tools/tool-session.ts | 54 + .../src/tools/touched-sl-sources.test.ts | 45 + .../context/src/tools/touched-sl-sources.ts | 60 + packages/context/src/wiki/index.ts | 29 + .../context/src/wiki/knowledge-search-text.ts | 7 + .../src/wiki/knowledge-wiki.service.test.ts | 118 + .../src/wiki/knowledge-wiki.service.ts | 437 + .../context/src/wiki/local-knowledge.test.ts | 236 + packages/context/src/wiki/local-knowledge.ts | 391 + packages/context/src/wiki/ports.ts | 68 + .../src/wiki/sqlite-knowledge-index.test.ts | 115 + .../src/wiki/sqlite-knowledge-index.ts | 276 + packages/context/src/wiki/tools/index.ts | 5 + .../wiki/tools/wiki-list-tags.tool.test.ts | 42 + .../src/wiki/tools/wiki-list-tags.tool.ts | 49 + .../context/src/wiki/tools/wiki-read.tool.ts | 82 + .../src/wiki/tools/wiki-remove.tool.test.ts | 59 + .../src/wiki/tools/wiki-remove.tool.ts | 85 + .../src/wiki/tools/wiki-search.tool.test.ts | 41 + .../src/wiki/tools/wiki-search.tool.ts | 92 + .../src/wiki/tools/wiki-write.tool.test.ts | 168 + .../context/src/wiki/tools/wiki-write.tool.ts | 167 + packages/context/src/wiki/types.ts | 55 + .../lookml/extends-chain/orders.model.lkml | 5 + .../lookml/extends-chain/views/base.view.lkml | 11 + .../extends-chain/views/orders.view.lkml | 12 + .../extends-chain/views/orders_ext.view.lkml | 7 + .../lookml/multi-model/marketing.model.lkml | 11 + .../lookml/multi-model/orders.model.lkml | 11 + .../multi-model/views/campaigns.view.lkml | 16 + .../lookml/multi-model/views/orders.view.lkml | 15 + .../multi-model/views/shared_dims.view.lkml | 12 + .../lookml/single-model/orders.model.lkml | 10 + .../single-model/views/customers.view.lkml | 12 + .../single-model/views/orders.view.lkml | 20 + .../lookml/three-churn/billing.model.lkml | 5 + .../lookml/three-churn/customers.model.lkml | 5 + .../lookml/three-churn/support.model.lkml | 5 + .../billing/billing_churn_risk.view.lkml | 16 + .../customers/customer_churn_risk.view.lkml | 16 + .../support/support_churn_risk.view.lkml | 16 + .../fixtures/metabase/card-ref/cards/10.json | 15 + .../fixtures/metabase/card-ref/cards/11.json | 15 + .../metabase/card-ref/collections/5.json | 5 + .../metabase/card-ref/databases/42.json | 6 + .../metabase/card-ref/sync-config.json | 13 + .../metabase/multi-collection/cards/1.json | 32 + .../metabase/multi-collection/cards/2.json | 32 + .../metabase/multi-collection/cards/3.json | 32 + .../multi-collection/collections/5.json | 1 + .../multi-collection/collections/6.json | 1 + .../multi-collection/databases/42.json | 6 + .../multi-collection/sync-config.json | 16 + .../fixtures/metabase/simple/cards/1.json | 32 + .../fixtures/metabase/simple/cards/2.json | 32 + .../metabase/simple/collections/5.json | 5 + .../metabase/simple/databases/42.json | 6 + .../fixtures/metabase/simple/sync-config.json | 13 + .../metricflow/dbt-mixed/dbt_project.yml | 5 + .../metricflow/dbt-mixed/models/orders.yml | 7 + .../extends-chain/metrics/orders_final.yml | 9 + .../extends-chain/models/orders.yml | 19 + .../extends-chain/models/orders_ext.yml | 9 + .../models/marketing/campaigns.yml | 7 + .../multi-component/models/sales/orders.yml | 7 + .../metricflow/single-model/models/orders.yml | 33 + .../data.sqlite | Bin 0 -> 20480 bytes .../expected-links.yaml | 32 + .../fixture.yaml | 7 + .../snapshot.json | 170 + .../expected-links.yaml | 878 + .../fixture.yaml | 14 + .../snapshot.json | 5961 +++++++ .../expected-links.yaml | 126 + .../fixture.yaml | 14 + .../snapshot.json | 1224 ++ .../data.sqlite | Bin 0 -> 20480 bytes .../expected-links.yaml | 36 + .../fixture.yaml | 7 + .../snapshot.json | 179 + .../expected-links.yaml | 113 + .../fixture.yaml | 14 + .../snapshot.json | 801 + .../data.sqlite | Bin 0 -> 12288 bytes .../expected-links.yaml | 11 + .../fixture.yaml | 10 + .../snapshot.json | 103 + .../demo_b2b_declared_metadata/data.sqlite | Bin 0 -> 139264 bytes .../expected-links.yaml | 51 + .../demo_b2b_declared_metadata/fixture.yaml | 13 + .../demo_b2b_declared_metadata/snapshot.json | 137 + .../data.sqlite | Bin 0 -> 139264 bytes .../expected-links.yaml | 51 + .../fixture.yaml | 10 + .../snapshot.json | 137 + .../data.sqlite | Bin 0 -> 24576 bytes .../expected-links.yaml | 39 + .../fixture.yaml | 7 + .../snapshot.json | 208 + .../data.sqlite | Bin 0 -> 12288 bytes .../expected-links.yaml | 11 + .../fixture.yaml | 10 + .../snapshot.json | 67 + .../data.sqlite | Bin 0 -> 20480 bytes .../expected-links.yaml | 22 + .../fixture.yaml | 7 + .../snapshot.json | 161 + .../expected-links.yaml | 135 + .../fixture.yaml | 14 + .../snapshot.json | 1055 ++ .../data.sqlite | Bin 0 -> 28672 bytes .../expected-links.yaml | 59 + .../fixture.yaml | 11 + .../snapshot.json | 264 + .../data.sqlite | Bin 0 -> 16384 bytes .../expected-links.yaml | 24 + .../fixture.yaml | 11 + .../snapshot.json | 186 + .../data.sqlite | Bin 0 -> 16384 bytes .../expected-links.yaml | 22 + .../fixture.yaml | 7 + .../snapshot.json | 132 + .../expected-links.yaml | 206 + .../fixture.yaml | 14 + .../snapshot.json | 1173 ++ .../data.sqlite.gz | Bin 0 -> 161115 bytes .../expected-links.yaml | 13362 ++++++++++++++++ .../fixture.yaml | 8 + .../snapshot.json.gz | Bin 0 -> 44596 bytes .../column-embeddings.json | 6 + .../data.sqlite | Bin 0 -> 12288 bytes .../expected-links.yaml | 11 + .../fixture.yaml | 8 + .../snapshot.json | 67 + packages/context/tsconfig.json | 9 + packages/context/vitest.config.ts | 8 + packages/llm/package.json | 49 + packages/llm/src/embedding-health.test.ts | 106 + packages/llm/src/embedding-health.ts | 54 + packages/llm/src/embedding-provider.test.ts | 146 + packages/llm/src/embedding-provider.ts | 379 + packages/llm/src/index.ts | 30 + packages/llm/src/message-builder.test.ts | 113 + packages/llm/src/message-builder.ts | 197 + packages/llm/src/model-health.test.ts | 61 + packages/llm/src/model-health.ts | 60 + packages/llm/src/model-provider.test.ts | 173 + packages/llm/src/model-provider.ts | 152 + packages/llm/src/package-exports.test.ts | 19 + packages/llm/src/repair.test.ts | 93 + packages/llm/src/repair.ts | 88 + packages/llm/src/types.ts | 95 + packages/llm/tsconfig.json | 9 + packages/llm/vitest.config.ts | 8 + pnpm-lock.yaml | 6313 ++++++++ pnpm-workspace.yaml | 13 + pyproject.toml | 43 + python/klo-daemon/README.md | 104 + python/klo-daemon/pyproject.toml | 50 + python/klo-daemon/src/klo_daemon/__init__.py | 6 + python/klo-daemon/src/klo_daemon/__main__.py | 172 + python/klo-daemon/src/klo_daemon/app.py | 228 + .../src/klo_daemon/code_execution.py | 333 + .../src/klo_daemon/database_introspection.py | 284 + .../klo-daemon/src/klo_daemon/embeddings.py | 172 + python/klo-daemon/src/klo_daemon/lookml.py | 1056 ++ .../src/klo_daemon/semantic_layer.py | 136 + .../src/klo_daemon/source_generation.py | 254 + .../src/klo_daemon/table_identifier.py | 66 + python/klo-daemon/tests/test_app.py | 442 + python/klo-daemon/tests/test_cli.py | 426 + .../klo-daemon/tests/test_code_execution.py | 210 + .../tests/test_database_introspection.py | 153 + python/klo-daemon/tests/test_embeddings.py | 107 + python/klo-daemon/tests/test_lookml.py | 134 + python/klo-daemon/tests/test_package.py | 6 + .../klo-daemon/tests/test_semantic_layer.py | 64 + .../tests/test_source_generation.py | 161 + python/klo-sl/AGENTS.md | 161 + python/klo-sl/CLAUDE.md | 1 + python/klo-sl/README.md | 0 python/klo-sl/demos/complex_cte_join.yaml | 222 + python/klo-sl/demos/run_complex_cte_join.sh | 60 + python/klo-sl/pyproject.toml | 59 + python/klo-sl/scripts/gen_b2b_saas_model.py | 219 + python/klo-sl/scripts/slquery.py | 110 + python/klo-sl/scripts/tpch_runner.py | 166 + python/klo-sl/semantic_layer/__init__.py | 4 + python/klo-sl/semantic_layer/__main__.py | 3 + python/klo-sl/semantic_layer/cli.py | 268 + .../klo-sl/semantic_layer/duplicate_check.py | 99 + python/klo-sl/semantic_layer/engine.py | 360 + python/klo-sl/semantic_layer/generator.py | 1419 ++ python/klo-sl/semantic_layer/graph.py | 285 + python/klo-sl/semantic_layer/loader.py | 210 + python/klo-sl/semantic_layer/manifest.py | 233 + python/klo-sl/semantic_layer/models.py | 235 + python/klo-sl/semantic_layer/parser.py | 303 + python/klo-sl/semantic_layer/planner.py | 1445 ++ .../semantic_layer/sql_table_extractor.py | 72 + .../semantic_layer/table_identifier_parser.py | 111 + .../sources/b2b_saas/abm_engagements.yaml | 15 + .../b2b_saas/account_intent_signals.yaml | 18 + python/klo-sl/sources/b2b_saas/accounts.yaml | 23 + .../klo-sl/sources/b2b_saas/activities.yaml | 36 + .../klo-sl/sources/b2b_saas/ad_accounts.yaml | 13 + .../klo-sl/sources/b2b_saas/ad_ad_stats.yaml | 24 + .../klo-sl/sources/b2b_saas/ad_campaigns.yaml | 28 + .../sources/b2b_saas/ad_creative_stats.yaml | 24 + .../klo-sl/sources/b2b_saas/ad_creatives.yaml | 20 + python/klo-sl/sources/b2b_saas/ad_groups.yaml | 17 + python/klo-sl/sources/b2b_saas/ad_stats.yaml | 24 + python/klo-sl/sources/b2b_saas/ads.yaml | 20 + python/klo-sl/sources/b2b_saas/ap_bills.yaml | 23 + python/klo-sl/sources/b2b_saas/approvals.yaml | 26 + .../sources/b2b_saas/attribution_credits.yaml | 22 + python/klo-sl/sources/b2b_saas/budgets.yaml | 15 + python/klo-sl/sources/b2b_saas/calls.yaml | 28 + .../sources/b2b_saas/campaign_members.yaml | 23 + python/klo-sl/sources/b2b_saas/campaigns.yaml | 19 + .../sources/b2b_saas/card_transactions.yaml | 22 + .../sources/b2b_saas/cash_balances.yaml | 12 + python/klo-sl/sources/b2b_saas/charges.yaml | 24 + .../klo-sl/sources/b2b_saas/churn_risk.yaml | 290 + python/klo-sl/sources/b2b_saas/contacts.yaml | 23 + .../sources/b2b_saas/content_assets.yaml | 16 + .../sources/b2b_saas/content_touches.yaml | 33 + python/klo-sl/sources/b2b_saas/contracts.yaml | 30 + python/klo-sl/sources/b2b_saas/crm_notes.yaml | 23 + .../klo-sl/sources/b2b_saas/currencies.yaml | 9 + .../sources/b2b_saas/departments_hr.yaml | 9 + python/klo-sl/sources/b2b_saas/disputes.yaml | 23 + .../klo-sl/sources/b2b_saas/email_events.yaml | 18 + .../klo-sl/sources/b2b_saas/email_sends.yaml | 33 + python/klo-sl/sources/b2b_saas/employees.yaml | 33 + python/klo-sl/sources/b2b_saas/etl_runs.yaml | 21 + .../sources/b2b_saas/fiscal_calendar.yaml | 17 + .../sources/b2b_saas/forecast_snapshots.yaml | 23 + python/klo-sl/sources/b2b_saas/fx_rates.yaml | 14 + .../sources/b2b_saas/ga4_event_params.yaml | 23 + .../klo-sl/sources/b2b_saas/ga4_events.yaml | 25 + .../klo-sl/sources/b2b_saas/gl_accounts.yaml | 13 + .../klo-sl/sources/b2b_saas/identities.yaml | 22 + .../sources/b2b_saas/identity_links.yaml | 25 + .../sources/b2b_saas/invoice_lines.yaml | 24 + python/klo-sl/sources/b2b_saas/invoices.yaml | 28 + .../sources/b2b_saas/journal_entries.yaml | 12 + .../sources/b2b_saas/journal_lines.yaml | 25 + .../sources/b2b_saas/keyword_rankings.yaml | 20 + .../sources/b2b_saas/lead_status_history.yaml | 18 + python/klo-sl/sources/b2b_saas/leads.yaml | 43 + .../sources/b2b_saas/meeting_bookings.yaml | 22 + .../klo-sl/sources/b2b_saas/open_roles.yaml | 22 + .../sources/b2b_saas/opportunities.yaml | 40 + .../b2b_saas/opportunity_contact_roles.yaml | 20 + .../b2b_saas/opportunity_line_items.yaml | 24 + .../b2b_saas/opportunity_stage_history.yaml | 21 + .../sources/b2b_saas/payment_intents.yaml | 22 + python/klo-sl/sources/b2b_saas/payments.yaml | 20 + .../klo-sl/sources/b2b_saas/payroll_runs.yaml | 17 + .../sources/b2b_saas/pricebook_entries.yaml | 20 + .../klo-sl/sources/b2b_saas/pricebooks.yaml | 13 + .../sources/b2b_saas/product_costs.yaml | 15 + .../sources/b2b_saas/product_usage.yaml | 20 + python/klo-sl/sources/b2b_saas/products.yaml | 13 + python/klo-sl/sources/b2b_saas/quotas.yaml | 19 + .../sources/b2b_saas/quote_line_items.yaml | 24 + python/klo-sl/sources/b2b_saas/quotes.yaml | 28 + python/klo-sl/sources/b2b_saas/refunds.yaml | 20 + .../sources/b2b_saas/revenue_schedules.yaml | 28 + .../sources/b2b_saas/reverse_etl_jobs.yaml | 16 + .../klo-sl/sources/b2b_saas/sales_reps.yaml | 27 + .../klo-sl/sources/b2b_saas/sales_teams.yaml | 11 + .../b2b_saas/search_console_stats.yaml | 20 + .../b2b_saas/sequence_enrollments.yaml | 28 + .../sources/b2b_saas/sequence_steps.yaml | 21 + .../sources/b2b_saas/sequence_touches.yaml | 25 + python/klo-sl/sources/b2b_saas/sequences.yaml | 14 + .../sources/b2b_saas/stage_weights.yaml | 9 + .../sources/b2b_saas/subscription_items.yaml | 18 + .../sources/b2b_saas/subscriptions.yaml | 24 + .../sources/b2b_saas/support_tickets.yaml | 31 + .../sources/b2b_saas/target_accounts.yaml | 16 + .../klo-sl/sources/b2b_saas/touchpoints.yaml | 34 + python/klo-sl/sources/b2b_saas/vendors.yaml | 11 + .../klo-sl/sources/b2b_saas/web_events.yaml | 22 + .../klo-sl/sources/b2b_saas/web_sessions.yaml | 30 + .../sources/b2b_saas/webinar_attendance.yaml | 22 + .../b2b_saas/webinar_registrations.yaml | 21 + python/klo-sl/sources/b2b_saas/webinars.yaml | 19 + .../klo-sl/sources/ecommerce/churn_risk.yaml | 35 + .../klo-sl/sources/ecommerce/customers.yaml | 19 + .../klo-sl/sources/ecommerce/order_items.yaml | 21 + python/klo-sl/sources/ecommerce/orders.yaml | 39 + python/klo-sl/sources/ecommerce/products.yaml | 12 + python/klo-sl/sources/ecommerce/regions.yaml | 8 + python/klo-sl/sources/tpch/customer.yaml | 27 + python/klo-sl/sources/tpch/lineitem.yaml | 69 + python/klo-sl/sources/tpch/nation.yaml | 16 + python/klo-sl/sources/tpch/orders.yaml | 36 + python/klo-sl/sources/tpch/part.yaml | 22 + python/klo-sl/sources/tpch/partsupp.yaml | 27 + python/klo-sl/sources/tpch/region.yaml | 10 + python/klo-sl/sources/tpch/supplier.yaml | 22 + python/klo-sl/tests/__init__.py | 0 python/klo-sl/tests/conftest.py | 90 + .../klo-sl/tests/test_aggregate_locality.py | 1735 ++ python/klo-sl/tests/test_cli.py | 447 + python/klo-sl/tests/test_computed_columns.py | 313 + .../tests/test_corner_case_regressions.py | 288 + python/klo-sl/tests/test_coverage_gaps.py | 740 + python/klo-sl/tests/test_duplicate_check.py | 220 + python/klo-sl/tests/test_engine.py | 1380 ++ python/klo-sl/tests/test_generator.py | 2302 +++ python/klo-sl/tests/test_graph.py | 731 + python/klo-sl/tests/test_loader.py | 171 + python/klo-sl/tests/test_manifest.py | 619 + python/klo-sl/tests/test_models.py | 373 + python/klo-sl/tests/test_parser.py | 279 + python/klo-sl/tests/test_planner.py | 1509 ++ python/klo-sl/tests/test_segments.py | 293 + python/klo-sl/tests/test_snowflake.py | 470 + python/klo-sl/tests/test_sql_join_coverage.py | 296 + .../tests/test_table_identifier_parser.py | 77 + python/klo-sl/tests/test_tpch.py | 360 + python/klo-sl/tests/test_validator.py | 299 + release-policy.json | 38 + scripts/acquire-public-benchmark-fixtures.mjs | 60 + ...acquire-public-benchmark-fixtures.test.mjs | 168 + scripts/adventureworks-oltp-source.json | 13 + scripts/adventureworks-oltp-source.test.mjs | 25 + scripts/anti-fixture-conditional.test.mjs | 66 + scripts/build-adventureworks-oltp-fixture.mjs | 260 + scripts/build-benchmark-snapshot.mjs | 267 + scripts/build-benchmark-snapshot.test.mjs | 253 + ...d-evidence-fusion-adversarial-fixtures.mjs | 492 + scripts/check-boundaries.mjs | 213 + scripts/check-boundaries.test.mjs | 147 + scripts/ci-artifact-upload.test.mjs | 70 + scripts/examples-docs.test.mjs | 174 + scripts/installed-live-database-smoke.mjs | 432 + .../installed-live-database-smoke.test.mjs | 128 + scripts/link-dev-cli.mjs | 197 + scripts/link-dev-cli.test.mjs | 45 + scripts/package-artifacts.mjs | 1686 ++ scripts/package-artifacts.test.mjs | 655 + scripts/precommit-check.mjs | 195 + scripts/precommit-check.test.mjs | 33 + scripts/prepare-cli-bin.mjs | 44 + scripts/public-benchmark-manifest.json | 36 + scripts/published-package-smoke-config.mjs | 152 + scripts/published-package-smoke.mjs | 164 + scripts/published-package-smoke.test.mjs | 256 + scripts/relationship-orbit-verification.mjs | 330 + .../relationship-orbit-verification.test.mjs | 244 + scripts/release-readiness.mjs | 246 + scripts/release-readiness.test.mjs | 376 + scripts/run-klo.mjs | 175 + scripts/run-klo.test.mjs | 243 + scripts/setup-dev.mjs | 74 + scripts/setup-dev.test.mjs | 56 + scripts/standalone-ci-workflow.test.mjs | 67 + scripts/validate-llm-debug-jsonl.mjs | 98 + scripts/validate-llm-debug-jsonl.test.mjs | 112 + tsconfig.base.json | 18 + uv.lock | 1723 ++ 1199 files changed, 257054 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 AGENTS.md create mode 120000 CLAUDE.md create mode 120000 GEMINI.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 examples/README.md create mode 100644 examples/local-warehouse/README.md create mode 100644 examples/local-warehouse/klo.yaml create mode 100644 examples/local-warehouse/knowledge/global/revenue.md create mode 100644 examples/local-warehouse/semantic-layer/warehouse/orders.yaml create mode 100644 examples/local-warehouse/source/orders/orders.json create mode 100644 examples/orbit-relationship-verification/README.md create mode 100644 examples/orbit-relationship-verification/klo.yaml create mode 100644 examples/package-artifacts/README.md create mode 100644 examples/postgres-historic/README.md create mode 100644 examples/postgres-historic/docker-compose.yml create mode 100644 examples/postgres-historic/init/001-schema.sql create mode 100755 examples/postgres-historic/scripts/generate-workload.sh create mode 100755 examples/postgres-historic/scripts/smoke.sh create mode 100644 package.json create mode 100644 packages/cli/assets/demo/orbit/demo.db create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/activation-policy.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/arr-contract-first.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/customer-health-scoring.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/discount-expiration.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/internal-test-exclusion.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/nrr-retention.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/procurement-workflows.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/revenue-gross-to-net.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/segment-classification.md create mode 100644 packages/cli/assets/demo/orbit/knowledge/global/support-escalation.md create mode 100644 packages/cli/assets/demo/orbit/links/provenance.json create mode 100644 packages/cli/assets/demo/orbit/manifest.json create mode 100644 packages/cli/assets/demo/orbit/raw-sources/bi/account_retention.view.lkml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/bi/arr_daily.view.lkml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/bi/customer_health.view.lkml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/bi/procurement_activity.view.lkml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/bi/retention_exec_q1.dashboard.lookml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/bi/revenue_daily.view.lkml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/bi/revenue_exec.dashboard.lookml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/dbt/dbt_project.yml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_arr_daily.sql create mode 100644 packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_customer_health.sql create mode 100644 packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_revenue_daily.sql create mode 100644 packages/cli/assets/demo/orbit/raw-sources/dbt/schema.yml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/dbt/sources.yml create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/activation-policy-decision-record.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/analyst-onboarding.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/arr-and-contract-reporting-notes.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/customer-health-playbook.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/retention-and-nrr-definition-notes.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/revenue-reporting-policy.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/sales-ops-segmentation-guide.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/notion/support-escalation-runbook.md create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/accounts.csv create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/arr_movements.csv create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/contracts.csv create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/invoices.csv create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/plans.csv create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/purchase_requests.csv create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/support_tickets.csv create mode 100644 packages/cli/assets/demo/orbit/raw-sources/warehouse/users.csv create mode 100644 packages/cli/assets/demo/orbit/replay.memory-flow.v1.json create mode 100644 packages/cli/assets/demo/orbit/reports/seeded-demo-report.json create mode 100644 packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/accounts.yaml create mode 100644 packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/arr_movements.yaml create mode 100644 packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/contracts.yaml create mode 100644 packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/invoices.yaml create mode 100644 packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/purchase_requests.yaml create mode 100644 packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/support_tickets.yaml create mode 100644 packages/cli/package.json create mode 100644 packages/cli/scripts/build-demo-assets.mjs create mode 100644 packages/cli/src/agent-runtime.test.ts create mode 100644 packages/cli/src/agent-runtime.ts create mode 100644 packages/cli/src/agent-search-readiness.test.ts create mode 100644 packages/cli/src/agent-search-readiness.ts create mode 100644 packages/cli/src/agent.test.ts create mode 100644 packages/cli/src/agent.ts create mode 100644 packages/cli/src/bin.ts create mode 100644 packages/cli/src/clack.ts create mode 100644 packages/cli/src/cli-program.ts create mode 100644 packages/cli/src/cli-runtime.ts create mode 100644 packages/cli/src/command-schemas.ts create mode 100644 packages/cli/src/commands/agent-commands.ts create mode 100644 packages/cli/src/commands/completion-commands.ts create mode 100644 packages/cli/src/commands/connection-commands.ts create mode 100644 packages/cli/src/commands/connection-mapping.test.ts create mode 100644 packages/cli/src/commands/connection-mapping.ts create mode 100644 packages/cli/src/commands/connection-metabase-commands.ts create mode 100644 packages/cli/src/commands/connection-metabase-setup.test.ts create mode 100644 packages/cli/src/commands/connection-metabase-setup.ts create mode 100644 packages/cli/src/commands/connection-notion-commands.ts create mode 100644 packages/cli/src/commands/connection-notion-tree.test.ts create mode 100644 packages/cli/src/commands/connection-notion-tree.ts create mode 100644 packages/cli/src/commands/connection-notion-tui.test.tsx create mode 100644 packages/cli/src/commands/connection-notion-tui.tsx create mode 100644 packages/cli/src/commands/connection-notion.test.ts create mode 100644 packages/cli/src/commands/connection-notion.ts create mode 100644 packages/cli/src/commands/demo-commands.test.ts create mode 100644 packages/cli/src/commands/demo-commands.ts create mode 100644 packages/cli/src/commands/doctor-commands.ts create mode 100644 packages/cli/src/commands/ingest-commands.ts create mode 100644 packages/cli/src/commands/knowledge-commands.ts create mode 100644 packages/cli/src/commands/public-ingest-commands.ts create mode 100644 packages/cli/src/commands/scan-commands.ts create mode 100644 packages/cli/src/commands/serve-commands.ts create mode 100644 packages/cli/src/commands/setup-commands.ts create mode 100644 packages/cli/src/commands/sl-commands.ts create mode 100644 packages/cli/src/commands/status-commands.ts create mode 100644 packages/cli/src/completion.ts create mode 100644 packages/cli/src/connection.test.ts create mode 100644 packages/cli/src/connection.ts create mode 100644 packages/cli/src/context-build-view.test.ts create mode 100644 packages/cli/src/context-build-view.ts create mode 100644 packages/cli/src/demo-assets.test.ts create mode 100644 packages/cli/src/demo-assets.ts create mode 100644 packages/cli/src/demo-full.test.ts create mode 100644 packages/cli/src/demo-full.ts create mode 100644 packages/cli/src/demo-interaction.test.ts create mode 100644 packages/cli/src/demo-interaction.ts create mode 100644 packages/cli/src/demo-metrics.test.ts create mode 100644 packages/cli/src/demo-metrics.ts create mode 100644 packages/cli/src/demo-progress.test.ts create mode 100644 packages/cli/src/demo-progress.ts create mode 100644 packages/cli/src/demo-replay-store.test.ts create mode 100644 packages/cli/src/demo-replay-store.ts create mode 100644 packages/cli/src/demo-scan.test.ts create mode 100644 packages/cli/src/demo-scan.ts create mode 100644 packages/cli/src/demo-seeded-inspect.test.ts create mode 100644 packages/cli/src/demo-seeded-inspect.ts create mode 100644 packages/cli/src/demo-seeded.test.ts create mode 100644 packages/cli/src/demo-seeded.ts create mode 100644 packages/cli/src/demo.test.ts create mode 100644 packages/cli/src/demo.ts create mode 100644 packages/cli/src/dev.test.ts create mode 100644 packages/cli/src/dev.ts create mode 100644 packages/cli/src/doctor.test.ts create mode 100644 packages/cli/src/doctor.ts create mode 100644 packages/cli/src/example-smoke.test.ts create mode 100644 packages/cli/src/historic-sql-doctor.test.ts create mode 100644 packages/cli/src/historic-sql-doctor.ts create mode 100644 packages/cli/src/index.test.ts create mode 100644 packages/cli/src/index.ts create mode 100644 packages/cli/src/ingest-report-file.test.ts create mode 100644 packages/cli/src/ingest-report-file.ts create mode 100644 packages/cli/src/ingest.test.ts create mode 100644 packages/cli/src/ingest.ts create mode 100644 packages/cli/src/io/mode.test.ts create mode 100644 packages/cli/src/io/mode.ts create mode 100644 packages/cli/src/io/print-list.test.ts create mode 100644 packages/cli/src/io/print-list.ts create mode 100644 packages/cli/src/io/symbols.ts create mode 100644 packages/cli/src/knowledge.test.ts create mode 100644 packages/cli/src/knowledge.ts create mode 100644 packages/cli/src/local-adapters.ts create mode 100644 packages/cli/src/local-scan-connectors.test.ts create mode 100644 packages/cli/src/local-scan-connectors.ts create mode 100644 packages/cli/src/memory-flow-hud.tsx create mode 100644 packages/cli/src/memory-flow-interactive.test.ts create mode 100644 packages/cli/src/memory-flow-interactive.ts create mode 100644 packages/cli/src/memory-flow-tui.test.tsx create mode 100644 packages/cli/src/memory-flow-tui.tsx create mode 100644 packages/cli/src/next-steps.test.ts create mode 100644 packages/cli/src/next-steps.ts create mode 100644 packages/cli/src/project-dir.test.ts create mode 100644 packages/cli/src/project-dir.ts create mode 100644 packages/cli/src/project-resolver.test.ts create mode 100644 packages/cli/src/project-resolver.ts create mode 100644 packages/cli/src/prompt-navigation.test.ts create mode 100644 packages/cli/src/prompt-navigation.ts create mode 100644 packages/cli/src/public-ingest.test.ts create mode 100644 packages/cli/src/public-ingest.ts create mode 100644 packages/cli/src/scan.test.ts create mode 100644 packages/cli/src/scan.ts create mode 100644 packages/cli/src/serve.test.ts create mode 100644 packages/cli/src/serve.ts create mode 100644 packages/cli/src/setup-agents.test.ts create mode 100644 packages/cli/src/setup-agents.ts create mode 100644 packages/cli/src/setup-context.test.ts create mode 100644 packages/cli/src/setup-context.ts create mode 100644 packages/cli/src/setup-databases.test.ts create mode 100644 packages/cli/src/setup-databases.ts create mode 100644 packages/cli/src/setup-embeddings.test.ts create mode 100644 packages/cli/src/setup-embeddings.ts create mode 100644 packages/cli/src/setup-interrupt.test.ts create mode 100644 packages/cli/src/setup-interrupt.ts create mode 100644 packages/cli/src/setup-models.test.ts create mode 100644 packages/cli/src/setup-models.ts create mode 100644 packages/cli/src/setup-project.test.ts create mode 100644 packages/cli/src/setup-project.ts create mode 100644 packages/cli/src/setup-ready-menu.test.ts create mode 100644 packages/cli/src/setup-ready-menu.ts create mode 100644 packages/cli/src/setup-secrets.test.ts create mode 100644 packages/cli/src/setup-secrets.ts create mode 100644 packages/cli/src/setup-sources.test.ts create mode 100644 packages/cli/src/setup-sources.ts create mode 100644 packages/cli/src/setup.test.ts create mode 100644 packages/cli/src/setup.ts create mode 100644 packages/cli/src/sl.test.ts create mode 100644 packages/cli/src/sl.ts create mode 100644 packages/cli/src/standalone-smoke.test.ts create mode 100644 packages/cli/src/startup-profile.ts create mode 100644 packages/cli/src/viz-fallback.test.ts create mode 100644 packages/cli/src/viz-fallback.ts create mode 100644 packages/cli/tsconfig.json create mode 100644 packages/cli/vitest.config.ts create mode 100644 packages/connector-bigquery/package.json create mode 100644 packages/connector-bigquery/src/connector.test.ts create mode 100644 packages/connector-bigquery/src/connector.ts create mode 100644 packages/connector-bigquery/src/dialect.test.ts create mode 100644 packages/connector-bigquery/src/dialect.ts create mode 100644 packages/connector-bigquery/src/index.ts create mode 100644 packages/connector-bigquery/src/live-database-introspection.ts create mode 100644 packages/connector-bigquery/src/package-exports.test.ts create mode 100644 packages/connector-bigquery/tsconfig.json create mode 100644 packages/connector-clickhouse/package.json create mode 100644 packages/connector-clickhouse/src/connector.test.ts create mode 100644 packages/connector-clickhouse/src/connector.ts create mode 100644 packages/connector-clickhouse/src/dialect.test.ts create mode 100644 packages/connector-clickhouse/src/dialect.ts create mode 100644 packages/connector-clickhouse/src/index.ts create mode 100644 packages/connector-clickhouse/src/live-database-introspection.ts create mode 100644 packages/connector-clickhouse/src/package-exports.test.ts create mode 100644 packages/connector-clickhouse/tsconfig.json create mode 100644 packages/connector-mysql/package.json create mode 100644 packages/connector-mysql/src/connector.test.ts create mode 100644 packages/connector-mysql/src/connector.ts create mode 100644 packages/connector-mysql/src/dialect.test.ts create mode 100644 packages/connector-mysql/src/dialect.ts create mode 100644 packages/connector-mysql/src/index.ts create mode 100644 packages/connector-mysql/src/live-database-introspection.ts create mode 100644 packages/connector-mysql/src/package-exports.test.ts create mode 100644 packages/connector-mysql/tsconfig.json create mode 100644 packages/connector-postgres/package.json create mode 100644 packages/connector-postgres/src/connector.test.ts create mode 100644 packages/connector-postgres/src/connector.ts create mode 100644 packages/connector-postgres/src/dialect.test.ts create mode 100644 packages/connector-postgres/src/dialect.ts create mode 100644 packages/connector-postgres/src/historic-sql-query-client.test.ts create mode 100644 packages/connector-postgres/src/historic-sql-query-client.ts create mode 100644 packages/connector-postgres/src/index.ts create mode 100644 packages/connector-postgres/src/live-database-introspection.ts create mode 100644 packages/connector-postgres/src/package-exports.test.ts create mode 100644 packages/connector-postgres/tsconfig.json create mode 100644 packages/connector-posthog/package.json create mode 100644 packages/connector-posthog/src/connector.test.ts create mode 100644 packages/connector-posthog/src/connector.ts create mode 100644 packages/connector-posthog/src/dialect.test.ts create mode 100644 packages/connector-posthog/src/dialect.ts create mode 100644 packages/connector-posthog/src/index.ts create mode 100644 packages/connector-posthog/src/live-database-introspection.ts create mode 100644 packages/connector-posthog/src/package-exports.test.ts create mode 100644 packages/connector-posthog/src/schema-descriptions.ts create mode 100644 packages/connector-posthog/tsconfig.json create mode 100644 packages/connector-snowflake/package.json create mode 100644 packages/connector-snowflake/src/connector.test.ts create mode 100644 packages/connector-snowflake/src/connector.ts create mode 100644 packages/connector-snowflake/src/dialect.test.ts create mode 100644 packages/connector-snowflake/src/dialect.ts create mode 100644 packages/connector-snowflake/src/index.ts create mode 100644 packages/connector-snowflake/src/live-database-introspection.ts create mode 100644 packages/connector-snowflake/src/package-exports.test.ts create mode 100644 packages/connector-snowflake/tsconfig.json create mode 100644 packages/connector-sqlite/package.json create mode 100644 packages/connector-sqlite/src/connector.test.ts create mode 100644 packages/connector-sqlite/src/connector.ts create mode 100644 packages/connector-sqlite/src/dialect.test.ts create mode 100644 packages/connector-sqlite/src/dialect.ts create mode 100644 packages/connector-sqlite/src/index.ts create mode 100644 packages/connector-sqlite/src/live-database-introspection.ts create mode 100644 packages/connector-sqlite/src/package-exports.test.ts create mode 100644 packages/connector-sqlite/tsconfig.json create mode 100644 packages/connector-sqlserver/package.json create mode 100644 packages/connector-sqlserver/src/connector.test.ts create mode 100644 packages/connector-sqlserver/src/connector.ts create mode 100644 packages/connector-sqlserver/src/dialect.test.ts create mode 100644 packages/connector-sqlserver/src/dialect.ts create mode 100644 packages/connector-sqlserver/src/index.ts create mode 100644 packages/connector-sqlserver/src/live-database-introspection.ts create mode 100644 packages/connector-sqlserver/src/package-exports.test.ts create mode 100644 packages/connector-sqlserver/tsconfig.json create mode 100644 packages/context/package.json create mode 100644 packages/context/prompts/memory_agent_backfill.md create mode 100644 packages/context/prompts/memory_agent_bundle_ingest_reconcile.md create mode 100644 packages/context/prompts/memory_agent_bundle_ingest_work_unit.md create mode 100644 packages/context/prompts/memory_agent_external_ingest.md create mode 100644 packages/context/prompts/memory_agent_research.md create mode 100644 packages/context/prompts/skills/light_extraction.md create mode 100644 packages/context/prompts/skills/page_triage_classifier.md create mode 100644 packages/context/scripts/pglite-hybrid-search-spike.mjs create mode 100644 packages/context/scripts/pglite-owner-process-prototype.mjs create mode 100644 packages/context/scripts/pglite-sl-search-prototype.mjs create mode 100644 packages/context/scripts/relationship-benchmark-report.mjs create mode 100644 packages/context/skills/dbt_ingest/SKILL.md create mode 100644 packages/context/skills/historic_sql_curator/SKILL.md create mode 100644 packages/context/skills/historic_sql_ingest/SKILL.md create mode 100644 packages/context/skills/ingest_triage/SKILL.md create mode 100644 packages/context/skills/knowledge_capture/SKILL.md create mode 100644 packages/context/skills/live_database_ingest/SKILL.md create mode 100644 packages/context/skills/looker_ingest/SKILL.md create mode 100644 packages/context/skills/lookml_ingest/SKILL.md create mode 100644 packages/context/skills/metabase_ingest/SKILL.md create mode 100644 packages/context/skills/metricflow_ingest/SKILL.md create mode 100644 packages/context/skills/notion_synthesize/SKILL.md create mode 100644 packages/context/skills/sl/SKILL.md create mode 100644 packages/context/skills/sl_capture/SKILL.md create mode 100644 packages/context/src/agent/agent-runner.service.test.ts create mode 100644 packages/context/src/agent/agent-runner.service.ts create mode 100644 packages/context/src/agent/index.ts create mode 100644 packages/context/src/connections/connection-type.ts create mode 100644 packages/context/src/connections/index.ts create mode 100644 packages/context/src/connections/local-query-executor.test.ts create mode 100644 packages/context/src/connections/local-query-executor.ts create mode 100644 packages/context/src/connections/local-warehouse-descriptor.test.ts create mode 100644 packages/context/src/connections/local-warehouse-descriptor.ts create mode 100644 packages/context/src/connections/notion-config.test.ts create mode 100644 packages/context/src/connections/notion-config.ts create mode 100644 packages/context/src/connections/postgres-query-executor.test.ts create mode 100644 packages/context/src/connections/postgres-query-executor.ts create mode 100644 packages/context/src/connections/query-executor.ts create mode 100644 packages/context/src/connections/read-only-sql.test.ts create mode 100644 packages/context/src/connections/read-only-sql.ts create mode 100644 packages/context/src/connections/sqlite-query-executor.test.ts create mode 100644 packages/context/src/connections/sqlite-query-executor.ts create mode 100644 packages/context/src/core/config-reference.test.ts create mode 100644 packages/context/src/core/config-reference.ts create mode 100644 packages/context/src/core/config.ts create mode 100644 packages/context/src/core/embedding.ts create mode 100644 packages/context/src/core/file-store.ts create mode 100644 packages/context/src/core/git-env.ts create mode 100644 packages/context/src/core/git.service.assert-worktree-clean.test.ts create mode 100644 packages/context/src/core/git.service.delete-directories.test.ts create mode 100644 packages/context/src/core/git.service.reset-hard.test.ts create mode 100644 packages/context/src/core/git.service.test.ts create mode 100644 packages/context/src/core/git.service.ts create mode 100644 packages/context/src/core/index.ts create mode 100644 packages/context/src/core/redaction.ts create mode 100644 packages/context/src/core/session-worktree.service.test.ts create mode 100644 packages/context/src/core/session-worktree.service.ts create mode 100644 packages/context/src/daemon/index.ts create mode 100644 packages/context/src/daemon/semantic-layer-compute.test.ts create mode 100644 packages/context/src/daemon/semantic-layer-compute.ts create mode 100644 packages/context/src/index.test.ts create mode 100644 packages/context/src/index.ts create mode 100644 packages/context/src/ingest/action-identity.test.ts create mode 100644 packages/context/src/ingest/action-identity.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/match-tables.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/match-tables.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.ts create mode 100644 packages/context/src/ingest/adapters/dbt-extraction-golden-parity.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt/chunk.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt/chunk.ts create mode 100644 packages/context/src/ingest/adapters/dbt/dbt.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt/dbt.adapter.ts create mode 100644 packages/context/src/ingest/adapters/dbt/detect.ts create mode 100644 packages/context/src/ingest/adapters/dbt/fetch.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt/fetch.ts create mode 100644 packages/context/src/ingest/adapters/dbt/parse.test.ts create mode 100644 packages/context/src/ingest/adapters/dbt/parse.ts create mode 100644 packages/context/src/ingest/adapters/fake/fake.adapter.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/eviction-churn/input.json create mode 100644 packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/first-run/input.json create mode 100644 packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/normal-delta/input.json create mode 100644 packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/reset-detected/input.json create mode 100644 packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/version-change/input.json create mode 100644 packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/chunk.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/chunk.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/detect.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/detect.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/errors.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/stage-pgss-golden.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/stage-pgss.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/stage-pgss.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/stage.test.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/stage.ts create mode 100644 packages/context/src/ingest/adapters/historic-sql/types.ts create mode 100644 packages/context/src/ingest/adapters/live-database/chunk.test.ts create mode 100644 packages/context/src/ingest/adapters/live-database/chunk.ts create mode 100644 packages/context/src/ingest/adapters/live-database/daemon-introspection.test.ts create mode 100644 packages/context/src/ingest/adapters/live-database/daemon-introspection.ts create mode 100644 packages/context/src/ingest/adapters/live-database/extracted-schema.test.ts create mode 100644 packages/context/src/ingest/adapters/live-database/extracted-schema.ts create mode 100644 packages/context/src/ingest/adapters/live-database/live-database.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/live-database/live-database.adapter.ts create mode 100644 packages/context/src/ingest/adapters/live-database/manifest.test.ts create mode 100644 packages/context/src/ingest/adapters/live-database/manifest.ts create mode 100644 packages/context/src/ingest/adapters/live-database/stage.test.ts create mode 100644 packages/context/src/ingest/adapters/live-database/stage.ts create mode 100644 packages/context/src/ingest/adapters/live-database/structural-sync.test.ts create mode 100644 packages/context/src/ingest/adapters/live-database/structural-sync.ts create mode 100644 packages/context/src/ingest/adapters/live-database/types.ts create mode 100644 packages/context/src/ingest/adapters/looker/chunk.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/chunk.ts create mode 100644 packages/context/src/ingest/adapters/looker/client-boundary.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/client.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/client.ts create mode 100644 packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.ts create mode 100644 packages/context/src/ingest/adapters/looker/detect.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/detect.ts create mode 100644 packages/context/src/ingest/adapters/looker/evidence-documents.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/evidence-documents.ts create mode 100644 packages/context/src/ingest/adapters/looker/factory.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/factory.ts create mode 100644 packages/context/src/ingest/adapters/looker/fetch-report.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/fetch-report.ts create mode 100644 packages/context/src/ingest/adapters/looker/fetch.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/fetch.ts create mode 100644 packages/context/src/ingest/adapters/looker/local-looker.adapter.ts create mode 100644 packages/context/src/ingest/adapters/looker/local-runtime-store.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/local-runtime-store.ts create mode 100644 packages/context/src/ingest/adapters/looker/looker.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/looker.adapter.ts create mode 100644 packages/context/src/ingest/adapters/looker/mapping.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/mapping.ts create mode 100644 packages/context/src/ingest/adapters/looker/reconcile.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/reconcile.ts create mode 100644 packages/context/src/ingest/adapters/looker/scope.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/scope.ts create mode 100644 packages/context/src/ingest/adapters/looker/target-connections.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/target-connections.ts create mode 100644 packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.ts create mode 100644 packages/context/src/ingest/adapters/looker/types.test.ts create mode 100644 packages/context/src/ingest/adapters/looker/types.ts create mode 100644 packages/context/src/ingest/adapters/lookml/chunk.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/chunk.ts create mode 100644 packages/context/src/ingest/adapters/lookml/detect.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/detect.ts create mode 100644 packages/context/src/ingest/adapters/lookml/fetch-report.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/fetch-report.ts create mode 100644 packages/context/src/ingest/adapters/lookml/fetch.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/fetch.ts create mode 100644 packages/context/src/ingest/adapters/lookml/graph.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/graph.ts create mode 100644 packages/context/src/ingest/adapters/lookml/lookml-parser.d.ts create mode 100644 packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/lookml.adapter.ts create mode 100644 packages/context/src/ingest/adapters/lookml/parse.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/parse.ts create mode 100644 packages/context/src/ingest/adapters/lookml/pull-config.test.ts create mode 100644 packages/context/src/ingest/adapters/lookml/pull-config.ts create mode 100644 packages/context/src/ingest/adapters/metabase/card-references.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/card-references.ts create mode 100644 packages/context/src/ingest/adapters/metabase/chunk.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/chunk.ts create mode 100644 packages/context/src/ingest/adapters/metabase/client-boundary.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/client-port.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/client-port.ts create mode 100644 packages/context/src/ingest/adapters/metabase/client.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/client.ts create mode 100644 packages/context/src/ingest/adapters/metabase/detect.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/detect.ts create mode 100644 packages/context/src/ingest/adapters/metabase/fanout-planner.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/fanout-planner.ts create mode 100644 packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/fetch-scope.ts create mode 100644 packages/context/src/ingest/adapters/metabase/fetch.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/fetch.ts create mode 100644 packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts create mode 100644 packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/local-source-state-store.ts create mode 100644 packages/context/src/ingest/adapters/metabase/mapping.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/mapping.ts create mode 100644 packages/context/src/ingest/adapters/metabase/metabase.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/metabase.adapter.ts create mode 100644 packages/context/src/ingest/adapters/metabase/serialize-card.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/serialize-card.ts create mode 100644 packages/context/src/ingest/adapters/metabase/source-state-port.ts create mode 100644 packages/context/src/ingest/adapters/metabase/types.test.ts create mode 100644 packages/context/src/ingest/adapters/metabase/types.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/chunk.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/chunk.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/deep-parse.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/deep-parse.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/detect.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/detect.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/fetch.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/fetch.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/graph.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/graph.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/import-semantic-models.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/parse.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/parse.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/pull-config.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/pull-config.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/semantic-models.test.ts create mode 100644 packages/context/src/ingest/adapters/metricflow/semantic-models.ts create mode 100644 packages/context/src/ingest/adapters/notion/chunk.ts create mode 100644 packages/context/src/ingest/adapters/notion/cluster.test.ts create mode 100644 packages/context/src/ingest/adapters/notion/cluster.ts create mode 100644 packages/context/src/ingest/adapters/notion/detect.ts create mode 100644 packages/context/src/ingest/adapters/notion/fetch.test.ts create mode 100644 packages/context/src/ingest/adapters/notion/fetch.ts create mode 100644 packages/context/src/ingest/adapters/notion/normalize.test.ts create mode 100644 packages/context/src/ingest/adapters/notion/normalize.ts create mode 100644 packages/context/src/ingest/adapters/notion/notion-client.test.ts create mode 100644 packages/context/src/ingest/adapters/notion/notion-client.ts create mode 100644 packages/context/src/ingest/adapters/notion/notion.adapter.test.ts create mode 100644 packages/context/src/ingest/adapters/notion/notion.adapter.ts create mode 100644 packages/context/src/ingest/adapters/notion/pull-config.ts create mode 100644 packages/context/src/ingest/adapters/notion/types.ts create mode 100644 packages/context/src/ingest/canonical-pins.test.ts create mode 100644 packages/context/src/ingest/canonical-pins.ts create mode 100644 packages/context/src/ingest/clustering/kmeans.test.ts create mode 100644 packages/context/src/ingest/clustering/kmeans.ts create mode 100644 packages/context/src/ingest/context-candidates/candidate-dedup.service.test.ts create mode 100644 packages/context/src/ingest/context-candidates/candidate-dedup.service.ts create mode 100644 packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.test.ts create mode 100644 packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.ts create mode 100644 packages/context/src/ingest/context-candidates/curator-pagination.service.test.ts create mode 100644 packages/context/src/ingest/context-candidates/curator-pagination.service.ts create mode 100644 packages/context/src/ingest/context-candidates/embedding-text.test.ts create mode 100644 packages/context/src/ingest/context-candidates/embedding-text.ts create mode 100644 packages/context/src/ingest/context-candidates/index.ts create mode 100644 packages/context/src/ingest/context-candidates/store.test.ts create mode 100644 packages/context/src/ingest/context-candidates/store.ts create mode 100644 packages/context/src/ingest/context-candidates/types.ts create mode 100644 packages/context/src/ingest/context-evidence/context-evidence-index.service.test.ts create mode 100644 packages/context/src/ingest/context-evidence/context-evidence-index.service.ts create mode 100644 packages/context/src/ingest/context-evidence/index.ts create mode 100644 packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.test.ts create mode 100644 packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.ts create mode 100644 packages/context/src/ingest/context-evidence/store.test.ts create mode 100644 packages/context/src/ingest/context-evidence/store.ts create mode 100644 packages/context/src/ingest/context-evidence/types.ts create mode 100644 packages/context/src/ingest/dbt-shared/project-vars.test.ts create mode 100644 packages/context/src/ingest/dbt-shared/project-vars.ts create mode 100644 packages/context/src/ingest/dbt-shared/schema-files.test.ts create mode 100644 packages/context/src/ingest/dbt-shared/schema-files.ts create mode 100644 packages/context/src/ingest/diff-set.service.test.ts create mode 100644 packages/context/src/ingest/diff-set.service.ts create mode 100644 packages/context/src/ingest/git-env.ts create mode 100644 packages/context/src/ingest/index.ts create mode 100644 packages/context/src/ingest/ingest-bundle.runner.test.ts create mode 100644 packages/context/src/ingest/ingest-bundle.runner.ts create mode 100644 packages/context/src/ingest/ingest-prompts.test.ts create mode 100644 packages/context/src/ingest/ingest-runtime-assets.test.ts create mode 100644 packages/context/src/ingest/local-adapters.test.ts create mode 100644 packages/context/src/ingest/local-adapters.ts create mode 100644 packages/context/src/ingest/local-bundle-ingest.test.ts create mode 100644 packages/context/src/ingest/local-bundle-runtime.test.ts create mode 100644 packages/context/src/ingest/local-bundle-runtime.ts create mode 100644 packages/context/src/ingest/local-embedding-provider.integration.test.ts create mode 100644 packages/context/src/ingest/local-ingest.ts create mode 100644 packages/context/src/ingest/local-mapping-reconcile.test.ts create mode 100644 packages/context/src/ingest/local-mapping-reconcile.ts create mode 100644 packages/context/src/ingest/local-metabase-ingest.test.ts create mode 100644 packages/context/src/ingest/local-stage-ingest.test.ts create mode 100644 packages/context/src/ingest/local-stage-ingest.ts create mode 100644 packages/context/src/ingest/memory-flow/acceptance-fixtures.ts create mode 100644 packages/context/src/ingest/memory-flow/acceptance.test.ts create mode 100644 packages/context/src/ingest/memory-flow/events.test.ts create mode 100644 packages/context/src/ingest/memory-flow/events.ts create mode 100644 packages/context/src/ingest/memory-flow/index.ts create mode 100644 packages/context/src/ingest/memory-flow/interaction.test.ts create mode 100644 packages/context/src/ingest/memory-flow/interaction.ts create mode 100644 packages/context/src/ingest/memory-flow/interactive-render.test.ts create mode 100644 packages/context/src/ingest/memory-flow/interactive-render.ts create mode 100644 packages/context/src/ingest/memory-flow/live-buffer.test.ts create mode 100644 packages/context/src/ingest/memory-flow/live-buffer.ts create mode 100644 packages/context/src/ingest/memory-flow/package-export.test.ts create mode 100644 packages/context/src/ingest/memory-flow/render.test.ts create mode 100644 packages/context/src/ingest/memory-flow/render.ts create mode 100644 packages/context/src/ingest/memory-flow/schema.test.ts create mode 100644 packages/context/src/ingest/memory-flow/schema.ts create mode 100644 packages/context/src/ingest/memory-flow/summary.test.ts create mode 100644 packages/context/src/ingest/memory-flow/summary.ts create mode 100644 packages/context/src/ingest/memory-flow/types.ts create mode 100644 packages/context/src/ingest/memory-flow/view-model.test.ts create mode 100644 packages/context/src/ingest/memory-flow/view-model.ts create mode 100644 packages/context/src/ingest/memory-flow/visuals.test.ts create mode 100644 packages/context/src/ingest/memory-flow/visuals.ts create mode 100644 packages/context/src/ingest/metabase-mapping.ts create mode 100644 packages/context/src/ingest/page-triage/index.ts create mode 100644 packages/context/src/ingest/page-triage/page-triage.service.test.ts create mode 100644 packages/context/src/ingest/page-triage/page-triage.service.ts create mode 100644 packages/context/src/ingest/parsed-target-table.ts create mode 100644 packages/context/src/ingest/ports.ts create mode 100644 packages/context/src/ingest/raw-sources-paths.test.ts create mode 100644 packages/context/src/ingest/raw-sources-paths.ts create mode 100644 packages/context/src/ingest/repo-fetch.test.ts create mode 100644 packages/context/src/ingest/repo-fetch.ts create mode 100644 packages/context/src/ingest/report-snapshot.test.ts create mode 100644 packages/context/src/ingest/report-snapshot.ts create mode 100644 packages/context/src/ingest/reports.ts create mode 100644 packages/context/src/ingest/source-adapter-registry.test.ts create mode 100644 packages/context/src/ingest/source-adapter-registry.ts create mode 100644 packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts create mode 100644 packages/context/src/ingest/sqlite-bundle-ingest-store.ts create mode 100644 packages/context/src/ingest/sqlite-local-ingest-store.test.ts create mode 100644 packages/context/src/ingest/sqlite-local-ingest-store.ts create mode 100644 packages/context/src/ingest/stages/build-reconcile-context.context-candidates.test.ts create mode 100644 packages/context/src/ingest/stages/build-reconcile-context.test.ts create mode 100644 packages/context/src/ingest/stages/build-reconcile-context.ts create mode 100644 packages/context/src/ingest/stages/build-wu-context.test.ts create mode 100644 packages/context/src/ingest/stages/build-wu-context.ts create mode 100644 packages/context/src/ingest/stages/stage-1-stage-raw-files.test.ts create mode 100644 packages/context/src/ingest/stages/stage-1-stage-raw-files.ts create mode 100644 packages/context/src/ingest/stages/stage-3-work-units.test.ts create mode 100644 packages/context/src/ingest/stages/stage-3-work-units.ts create mode 100644 packages/context/src/ingest/stages/stage-4-reconciliation.test.ts create mode 100644 packages/context/src/ingest/stages/stage-4-reconciliation.ts create mode 100644 packages/context/src/ingest/stages/stage-index.types.ts create mode 100644 packages/context/src/ingest/stages/validate-wu-sources.test.ts create mode 100644 packages/context/src/ingest/stages/validate-wu-sources.ts create mode 100644 packages/context/src/ingest/tools/emit-artifact-resolution.tool.ts create mode 100644 packages/context/src/ingest/tools/emit-conflict-resolution.tool.ts create mode 100644 packages/context/src/ingest/tools/emit-eviction-decision.tool.ts create mode 100644 packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts create mode 100644 packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts create mode 100644 packages/context/src/ingest/tools/eviction-list.tool.test.ts create mode 100644 packages/context/src/ingest/tools/eviction-list.tool.ts create mode 100644 packages/context/src/ingest/tools/read-raw-file.tool.test.ts create mode 100644 packages/context/src/ingest/tools/read-raw-file.tool.ts create mode 100644 packages/context/src/ingest/tools/read-raw-span.tool.test.ts create mode 100644 packages/context/src/ingest/tools/read-raw-span.tool.ts create mode 100644 packages/context/src/ingest/tools/stage-diff.tool.test.ts create mode 100644 packages/context/src/ingest/tools/stage-diff.tool.ts create mode 100644 packages/context/src/ingest/tools/stage-list.tool.test.ts create mode 100644 packages/context/src/ingest/tools/stage-list.tool.ts create mode 100644 packages/context/src/ingest/tools/tool-call-logger.ts create mode 100644 packages/context/src/ingest/types.ts create mode 100644 packages/context/src/llm/debug-request-recorder.test.ts create mode 100644 packages/context/src/llm/debug-request-recorder.ts create mode 100644 packages/context/src/llm/embedding-port.test.ts create mode 100644 packages/context/src/llm/embedding-port.ts create mode 100644 packages/context/src/llm/generation.ts create mode 100644 packages/context/src/llm/index.ts create mode 100644 packages/context/src/llm/local-config.test.ts create mode 100644 packages/context/src/llm/local-config.ts create mode 100644 packages/context/src/mcp/context-tools.ts create mode 100644 packages/context/src/mcp/index.ts create mode 100644 packages/context/src/mcp/local-project-ports.test.ts create mode 100644 packages/context/src/mcp/local-project-ports.ts create mode 100644 packages/context/src/mcp/server.test.ts create mode 100644 packages/context/src/mcp/server.ts create mode 100644 packages/context/src/mcp/types.ts create mode 100644 packages/context/src/memory/capture-signals.ts create mode 100644 packages/context/src/memory/index.ts create mode 100644 packages/context/src/memory/local-memory-runs.ts create mode 100644 packages/context/src/memory/local-memory.test.ts create mode 100644 packages/context/src/memory/local-memory.ts create mode 100644 packages/context/src/memory/memory-agent.service.ingest.test.ts create mode 100644 packages/context/src/memory/memory-agent.service.test.ts create mode 100644 packages/context/src/memory/memory-agent.service.ts create mode 100644 packages/context/src/memory/memory-runs.test.ts create mode 100644 packages/context/src/memory/memory-runs.ts create mode 100644 packages/context/src/memory/memory-runtime-assets.test.ts create mode 100644 packages/context/src/memory/types.ts create mode 100644 packages/context/src/package-exports.test.ts create mode 100644 packages/context/src/project/config.test.ts create mode 100644 packages/context/src/project/config.ts create mode 100644 packages/context/src/project/index.ts create mode 100644 packages/context/src/project/local-git-file-store.test.ts create mode 100644 packages/context/src/project/local-git-file-store.ts create mode 100644 packages/context/src/project/local-state-db.ts create mode 100644 packages/context/src/project/mappings-yaml-schema.test.ts create mode 100644 packages/context/src/project/mappings-yaml-schema.ts create mode 100644 packages/context/src/project/project.test.ts create mode 100644 packages/context/src/project/project.ts create mode 100644 packages/context/src/project/setup-config.test.ts create mode 100644 packages/context/src/project/setup-config.ts create mode 100644 packages/context/src/prompts/index.ts create mode 100644 packages/context/src/prompts/prompt.service.test.ts create mode 100644 packages/context/src/prompts/prompt.service.ts create mode 100644 packages/context/src/scan/credentials.test.ts create mode 100644 packages/context/src/scan/credentials.ts create mode 100644 packages/context/src/scan/data-dictionary.test.ts create mode 100644 packages/context/src/scan/data-dictionary.ts create mode 100644 packages/context/src/scan/description-generation.test.ts create mode 100644 packages/context/src/scan/description-generation.ts create mode 100644 packages/context/src/scan/embedding-text.test.ts create mode 100644 packages/context/src/scan/embedding-text.ts create mode 100644 packages/context/src/scan/enrichment-state.test.ts create mode 100644 packages/context/src/scan/enrichment-state.ts create mode 100644 packages/context/src/scan/enrichment-summary.test.ts create mode 100644 packages/context/src/scan/enrichment-summary.ts create mode 100644 packages/context/src/scan/enrichment-types.test.ts create mode 100644 packages/context/src/scan/enrichment-types.ts create mode 100644 packages/context/src/scan/index.ts create mode 100644 packages/context/src/scan/local-enrichment-artifacts.test.ts create mode 100644 packages/context/src/scan/local-enrichment-artifacts.ts create mode 100644 packages/context/src/scan/local-enrichment.test.ts create mode 100644 packages/context/src/scan/local-enrichment.ts create mode 100644 packages/context/src/scan/local-scan.test.ts create mode 100644 packages/context/src/scan/local-scan.ts create mode 100644 packages/context/src/scan/local-structural-artifacts.test.ts create mode 100644 packages/context/src/scan/local-structural-artifacts.ts create mode 100644 packages/context/src/scan/orchestrator.test.ts create mode 100644 packages/context/src/scan/orchestrator.ts create mode 100644 packages/context/src/scan/relationship-artifacts.test.ts create mode 100644 packages/context/src/scan/relationship-artifacts.ts create mode 100644 packages/context/src/scan/relationship-benchmark-report.test.ts create mode 100644 packages/context/src/scan/relationship-benchmark-report.ts create mode 100644 packages/context/src/scan/relationship-benchmarks.test.ts create mode 100644 packages/context/src/scan/relationship-benchmarks.ts create mode 100644 packages/context/src/scan/relationship-budget.test.ts create mode 100644 packages/context/src/scan/relationship-budget.ts create mode 100644 packages/context/src/scan/relationship-candidates.test.ts create mode 100644 packages/context/src/scan/relationship-candidates.ts create mode 100644 packages/context/src/scan/relationship-composite-candidates.test.ts create mode 100644 packages/context/src/scan/relationship-composite-candidates.ts create mode 100644 packages/context/src/scan/relationship-diagnostics.test.ts create mode 100644 packages/context/src/scan/relationship-diagnostics.ts create mode 100644 packages/context/src/scan/relationship-discovery.test.ts create mode 100644 packages/context/src/scan/relationship-discovery.ts create mode 100644 packages/context/src/scan/relationship-feedback-calibration.test.ts create mode 100644 packages/context/src/scan/relationship-feedback-calibration.ts create mode 100644 packages/context/src/scan/relationship-feedback-export.test.ts create mode 100644 packages/context/src/scan/relationship-feedback-export.ts create mode 100644 packages/context/src/scan/relationship-formal-metadata.test.ts create mode 100644 packages/context/src/scan/relationship-formal-metadata.ts create mode 100644 packages/context/src/scan/relationship-graph-resolver.test.ts create mode 100644 packages/context/src/scan/relationship-graph-resolver.ts create mode 100644 packages/context/src/scan/relationship-llm-proposal.test.ts create mode 100644 packages/context/src/scan/relationship-llm-proposal.ts create mode 100644 packages/context/src/scan/relationship-locality.test.ts create mode 100644 packages/context/src/scan/relationship-locality.ts create mode 100644 packages/context/src/scan/relationship-name-similarity.test.ts create mode 100644 packages/context/src/scan/relationship-name-similarity.ts create mode 100644 packages/context/src/scan/relationship-profiling.test.ts create mode 100644 packages/context/src/scan/relationship-profiling.ts create mode 100644 packages/context/src/scan/relationship-review-apply.test.ts create mode 100644 packages/context/src/scan/relationship-review-apply.ts create mode 100644 packages/context/src/scan/relationship-review-decisions.test.ts create mode 100644 packages/context/src/scan/relationship-review-decisions.ts create mode 100644 packages/context/src/scan/relationship-scoring.test.ts create mode 100644 packages/context/src/scan/relationship-scoring.ts create mode 100644 packages/context/src/scan/relationship-threshold-advice.test.ts create mode 100644 packages/context/src/scan/relationship-threshold-advice.ts create mode 100644 packages/context/src/scan/relationship-validation.test.ts create mode 100644 packages/context/src/scan/relationship-validation.ts create mode 100644 packages/context/src/scan/sqlite-local-enrichment-state-store.ts create mode 100644 packages/context/src/scan/type-normalization.test.ts create mode 100644 packages/context/src/scan/type-normalization.ts create mode 100644 packages/context/src/scan/types.test.ts create mode 100644 packages/context/src/scan/types.ts create mode 100644 packages/context/src/search/backend-conformance.test.ts create mode 100644 packages/context/src/search/backend-conformance.ts create mode 100644 packages/context/src/search/hybrid-search-core.test.ts create mode 100644 packages/context/src/search/hybrid-search-core.ts create mode 100644 packages/context/src/search/index.ts create mode 100644 packages/context/src/search/pglite-owner-process.test.ts create mode 100644 packages/context/src/search/pglite-owner-process.ts create mode 100644 packages/context/src/search/pglite-runtime-boundary.test.ts create mode 100644 packages/context/src/search/pglite-spike.test.ts create mode 100644 packages/context/src/search/query.test.ts create mode 100644 packages/context/src/search/query.ts create mode 100644 packages/context/src/search/rrf.test.ts create mode 100644 packages/context/src/search/rrf.ts create mode 100644 packages/context/src/search/types.ts create mode 100644 packages/context/src/skills/index.ts create mode 100644 packages/context/src/skills/skills-registry.service.test.ts create mode 100644 packages/context/src/skills/skills-registry.service.ts create mode 100644 packages/context/src/sl/descriptions.ts create mode 100644 packages/context/src/sl/index.ts create mode 100644 packages/context/src/sl/local-query.test.ts create mode 100644 packages/context/src/sl/local-query.ts create mode 100644 packages/context/src/sl/local-sl.test.ts create mode 100644 packages/context/src/sl/local-sl.ts create mode 100644 packages/context/src/sl/pglite-sl-search-prototype.test.ts create mode 100644 packages/context/src/sl/pglite-sl-search-prototype.ts create mode 100644 packages/context/src/sl/ports.ts create mode 100644 packages/context/src/sl/schemas.ts create mode 100644 packages/context/src/sl/semantic-layer.service.test.ts create mode 100644 packages/context/src/sl/semantic-layer.service.ts create mode 100644 packages/context/src/sl/sl-dictionary-profile.test.ts create mode 100644 packages/context/src/sl/sl-dictionary-profile.ts create mode 100644 packages/context/src/sl/sl-search.service.test.ts create mode 100644 packages/context/src/sl/sl-search.service.ts create mode 100644 packages/context/src/sl/sl-validator.port.ts create mode 100644 packages/context/src/sl/sqlite-sl-sources-index.test.ts create mode 100644 packages/context/src/sl/sqlite-sl-sources-index.ts create mode 100644 packages/context/src/sl/tools/base-semantic-layer.tool.ts create mode 100644 packages/context/src/sl/tools/connection-id-schema.test.ts create mode 100644 packages/context/src/sl/tools/connection-id-schema.ts create mode 100644 packages/context/src/sl/tools/index.ts create mode 100644 packages/context/src/sl/tools/sl-discover.tool.ts create mode 100644 packages/context/src/sl/tools/sl-edit-source.tool.test.ts create mode 100644 packages/context/src/sl/tools/sl-edit-source.tool.ts create mode 100644 packages/context/src/sl/tools/sl-read-source.tool.session.test.ts create mode 100644 packages/context/src/sl/tools/sl-read-source.tool.ts create mode 100644 packages/context/src/sl/tools/sl-rollback.tool.test.ts create mode 100644 packages/context/src/sl/tools/sl-rollback.tool.ts create mode 100644 packages/context/src/sl/tools/sl-validate.tool.test.ts create mode 100644 packages/context/src/sl/tools/sl-validate.tool.ts create mode 100644 packages/context/src/sl/tools/sl-warehouse-validation.test.ts create mode 100644 packages/context/src/sl/tools/sl-warehouse-validation.ts create mode 100644 packages/context/src/sl/tools/sl-write-source.tool.test.ts create mode 100644 packages/context/src/sl/tools/sl-write-source.tool.ts create mode 100644 packages/context/src/sl/types.ts create mode 100644 packages/context/src/sql-analysis/http-sql-analysis-port.test.ts create mode 100644 packages/context/src/sql-analysis/http-sql-analysis-port.ts create mode 100644 packages/context/src/sql-analysis/index.ts create mode 100644 packages/context/src/sql-analysis/ports.ts create mode 100644 packages/context/src/test/make-local-git-repo.ts create mode 100644 packages/context/src/tools/authors.ts create mode 100644 packages/context/src/tools/base-tool.ts create mode 100644 packages/context/src/tools/context-candidate-mark.tool.ts create mode 100644 packages/context/src/tools/context-candidate-write.tool.ts create mode 100644 packages/context/src/tools/context-evidence-ids.ts create mode 100644 packages/context/src/tools/context-evidence-neighbors.tool.ts create mode 100644 packages/context/src/tools/context-evidence-read.tool.ts create mode 100644 packages/context/src/tools/context-evidence-search.tool.ts create mode 100644 packages/context/src/tools/context-evidence-tool-store.ts create mode 100644 packages/context/src/tools/context-evidence-tools.test.ts create mode 100644 packages/context/src/tools/context-ingest-metadata.ts create mode 100644 packages/context/src/tools/index.ts create mode 100644 packages/context/src/tools/sql-edit-replacer.ts create mode 100644 packages/context/src/tools/tool-session.ts create mode 100644 packages/context/src/tools/touched-sl-sources.test.ts create mode 100644 packages/context/src/tools/touched-sl-sources.ts create mode 100644 packages/context/src/wiki/index.ts create mode 100644 packages/context/src/wiki/knowledge-search-text.ts create mode 100644 packages/context/src/wiki/knowledge-wiki.service.test.ts create mode 100644 packages/context/src/wiki/knowledge-wiki.service.ts create mode 100644 packages/context/src/wiki/local-knowledge.test.ts create mode 100644 packages/context/src/wiki/local-knowledge.ts create mode 100644 packages/context/src/wiki/ports.ts create mode 100644 packages/context/src/wiki/sqlite-knowledge-index.test.ts create mode 100644 packages/context/src/wiki/sqlite-knowledge-index.ts create mode 100644 packages/context/src/wiki/tools/index.ts create mode 100644 packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts create mode 100644 packages/context/src/wiki/tools/wiki-list-tags.tool.ts create mode 100644 packages/context/src/wiki/tools/wiki-read.tool.ts create mode 100644 packages/context/src/wiki/tools/wiki-remove.tool.test.ts create mode 100644 packages/context/src/wiki/tools/wiki-remove.tool.ts create mode 100644 packages/context/src/wiki/tools/wiki-search.tool.test.ts create mode 100644 packages/context/src/wiki/tools/wiki-search.tool.ts create mode 100644 packages/context/src/wiki/tools/wiki-write.tool.test.ts create mode 100644 packages/context/src/wiki/tools/wiki-write.tool.ts create mode 100644 packages/context/src/wiki/types.ts create mode 100644 packages/context/test/fixtures/lookml/extends-chain/orders.model.lkml create mode 100644 packages/context/test/fixtures/lookml/extends-chain/views/base.view.lkml create mode 100644 packages/context/test/fixtures/lookml/extends-chain/views/orders.view.lkml create mode 100644 packages/context/test/fixtures/lookml/extends-chain/views/orders_ext.view.lkml create mode 100644 packages/context/test/fixtures/lookml/multi-model/marketing.model.lkml create mode 100644 packages/context/test/fixtures/lookml/multi-model/orders.model.lkml create mode 100644 packages/context/test/fixtures/lookml/multi-model/views/campaigns.view.lkml create mode 100644 packages/context/test/fixtures/lookml/multi-model/views/orders.view.lkml create mode 100644 packages/context/test/fixtures/lookml/multi-model/views/shared_dims.view.lkml create mode 100644 packages/context/test/fixtures/lookml/single-model/orders.model.lkml create mode 100644 packages/context/test/fixtures/lookml/single-model/views/customers.view.lkml create mode 100644 packages/context/test/fixtures/lookml/single-model/views/orders.view.lkml create mode 100644 packages/context/test/fixtures/lookml/three-churn/billing.model.lkml create mode 100644 packages/context/test/fixtures/lookml/three-churn/customers.model.lkml create mode 100644 packages/context/test/fixtures/lookml/three-churn/support.model.lkml create mode 100644 packages/context/test/fixtures/lookml/three-churn/views/billing/billing_churn_risk.view.lkml create mode 100644 packages/context/test/fixtures/lookml/three-churn/views/customers/customer_churn_risk.view.lkml create mode 100644 packages/context/test/fixtures/lookml/three-churn/views/support/support_churn_risk.view.lkml create mode 100644 packages/context/test/fixtures/metabase/card-ref/cards/10.json create mode 100644 packages/context/test/fixtures/metabase/card-ref/cards/11.json create mode 100644 packages/context/test/fixtures/metabase/card-ref/collections/5.json create mode 100644 packages/context/test/fixtures/metabase/card-ref/databases/42.json create mode 100644 packages/context/test/fixtures/metabase/card-ref/sync-config.json create mode 100644 packages/context/test/fixtures/metabase/multi-collection/cards/1.json create mode 100644 packages/context/test/fixtures/metabase/multi-collection/cards/2.json create mode 100644 packages/context/test/fixtures/metabase/multi-collection/cards/3.json create mode 100644 packages/context/test/fixtures/metabase/multi-collection/collections/5.json create mode 100644 packages/context/test/fixtures/metabase/multi-collection/collections/6.json create mode 100644 packages/context/test/fixtures/metabase/multi-collection/databases/42.json create mode 100644 packages/context/test/fixtures/metabase/multi-collection/sync-config.json create mode 100644 packages/context/test/fixtures/metabase/simple/cards/1.json create mode 100644 packages/context/test/fixtures/metabase/simple/cards/2.json create mode 100644 packages/context/test/fixtures/metabase/simple/collections/5.json create mode 100644 packages/context/test/fixtures/metabase/simple/databases/42.json create mode 100644 packages/context/test/fixtures/metabase/simple/sync-config.json create mode 100644 packages/context/test/fixtures/metricflow/dbt-mixed/dbt_project.yml create mode 100644 packages/context/test/fixtures/metricflow/dbt-mixed/models/orders.yml create mode 100644 packages/context/test/fixtures/metricflow/extends-chain/metrics/orders_final.yml create mode 100644 packages/context/test/fixtures/metricflow/extends-chain/models/orders.yml create mode 100644 packages/context/test/fixtures/metricflow/extends-chain/models/orders_ext.yml create mode 100644 packages/context/test/fixtures/metricflow/multi-component/models/marketing/campaigns.yml create mode 100644 packages/context/test/fixtures/metricflow/multi-component/models/sales/orders.yml create mode 100644 packages/context/test/fixtures/metricflow/single-model/models/orders.yml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/analytical_warehouse_no_naming_convention/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/analytical_warehouse_no_naming_convention/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/analytical_warehouse_no_naming_convention/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/analytical_warehouse_no_naming_convention/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/chinook_with_declared_metadata/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/chinook_with_declared_metadata/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/chinook_with_declared_metadata/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/natural_keys_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/natural_keys_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/natural_keys_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/natural_keys_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/plan_code_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/plan_code_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/plan_code_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/plan_code_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/polymorphic_partial_overlap_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/polymorphic_partial_overlap_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/polymorphic_partial_overlap_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/polymorphic_partial_overlap_no_declared_constraints/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/sakila_with_declared_metadata/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/sakila_with_declared_metadata/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/sakila_with_declared_metadata/snapshot.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/scale_stress_no_declared_constraints/data.sqlite.gz create mode 100644 packages/context/test/fixtures/relationship-benchmarks/scale_stress_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/scale_stress_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/scale_stress_no_declared_constraints/snapshot.json.gz create mode 100644 packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/column-embeddings.json create mode 100644 packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/data.sqlite create mode 100644 packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/expected-links.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/fixture.yaml create mode 100644 packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/snapshot.json create mode 100644 packages/context/tsconfig.json create mode 100644 packages/context/vitest.config.ts create mode 100644 packages/llm/package.json create mode 100644 packages/llm/src/embedding-health.test.ts create mode 100644 packages/llm/src/embedding-health.ts create mode 100644 packages/llm/src/embedding-provider.test.ts create mode 100644 packages/llm/src/embedding-provider.ts create mode 100644 packages/llm/src/index.ts create mode 100644 packages/llm/src/message-builder.test.ts create mode 100644 packages/llm/src/message-builder.ts create mode 100644 packages/llm/src/model-health.test.ts create mode 100644 packages/llm/src/model-health.ts create mode 100644 packages/llm/src/model-provider.test.ts create mode 100644 packages/llm/src/model-provider.ts create mode 100644 packages/llm/src/package-exports.test.ts create mode 100644 packages/llm/src/repair.test.ts create mode 100644 packages/llm/src/repair.ts create mode 100644 packages/llm/src/types.ts create mode 100644 packages/llm/tsconfig.json create mode 100644 packages/llm/vitest.config.ts create mode 100644 pnpm-lock.yaml create mode 100644 pnpm-workspace.yaml create mode 100644 pyproject.toml create mode 100644 python/klo-daemon/README.md create mode 100644 python/klo-daemon/pyproject.toml create mode 100644 python/klo-daemon/src/klo_daemon/__init__.py create mode 100644 python/klo-daemon/src/klo_daemon/__main__.py create mode 100644 python/klo-daemon/src/klo_daemon/app.py create mode 100644 python/klo-daemon/src/klo_daemon/code_execution.py create mode 100644 python/klo-daemon/src/klo_daemon/database_introspection.py create mode 100644 python/klo-daemon/src/klo_daemon/embeddings.py create mode 100644 python/klo-daemon/src/klo_daemon/lookml.py create mode 100644 python/klo-daemon/src/klo_daemon/semantic_layer.py create mode 100644 python/klo-daemon/src/klo_daemon/source_generation.py create mode 100644 python/klo-daemon/src/klo_daemon/table_identifier.py create mode 100644 python/klo-daemon/tests/test_app.py create mode 100644 python/klo-daemon/tests/test_cli.py create mode 100644 python/klo-daemon/tests/test_code_execution.py create mode 100644 python/klo-daemon/tests/test_database_introspection.py create mode 100644 python/klo-daemon/tests/test_embeddings.py create mode 100644 python/klo-daemon/tests/test_lookml.py create mode 100644 python/klo-daemon/tests/test_package.py create mode 100644 python/klo-daemon/tests/test_semantic_layer.py create mode 100644 python/klo-daemon/tests/test_source_generation.py create mode 100644 python/klo-sl/AGENTS.md create mode 120000 python/klo-sl/CLAUDE.md create mode 100644 python/klo-sl/README.md create mode 100644 python/klo-sl/demos/complex_cte_join.yaml create mode 100755 python/klo-sl/demos/run_complex_cte_join.sh create mode 100644 python/klo-sl/pyproject.toml create mode 100644 python/klo-sl/scripts/gen_b2b_saas_model.py create mode 100644 python/klo-sl/scripts/slquery.py create mode 100644 python/klo-sl/scripts/tpch_runner.py create mode 100644 python/klo-sl/semantic_layer/__init__.py create mode 100644 python/klo-sl/semantic_layer/__main__.py create mode 100644 python/klo-sl/semantic_layer/cli.py create mode 100644 python/klo-sl/semantic_layer/duplicate_check.py create mode 100644 python/klo-sl/semantic_layer/engine.py create mode 100644 python/klo-sl/semantic_layer/generator.py create mode 100644 python/klo-sl/semantic_layer/graph.py create mode 100644 python/klo-sl/semantic_layer/loader.py create mode 100644 python/klo-sl/semantic_layer/manifest.py create mode 100644 python/klo-sl/semantic_layer/models.py create mode 100644 python/klo-sl/semantic_layer/parser.py create mode 100644 python/klo-sl/semantic_layer/planner.py create mode 100644 python/klo-sl/semantic_layer/sql_table_extractor.py create mode 100644 python/klo-sl/semantic_layer/table_identifier_parser.py create mode 100644 python/klo-sl/sources/b2b_saas/abm_engagements.yaml create mode 100644 python/klo-sl/sources/b2b_saas/account_intent_signals.yaml create mode 100644 python/klo-sl/sources/b2b_saas/accounts.yaml create mode 100644 python/klo-sl/sources/b2b_saas/activities.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ad_accounts.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ad_ad_stats.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ad_campaigns.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ad_creative_stats.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ad_creatives.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ad_groups.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ad_stats.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ads.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ap_bills.yaml create mode 100644 python/klo-sl/sources/b2b_saas/approvals.yaml create mode 100644 python/klo-sl/sources/b2b_saas/attribution_credits.yaml create mode 100644 python/klo-sl/sources/b2b_saas/budgets.yaml create mode 100644 python/klo-sl/sources/b2b_saas/calls.yaml create mode 100644 python/klo-sl/sources/b2b_saas/campaign_members.yaml create mode 100644 python/klo-sl/sources/b2b_saas/campaigns.yaml create mode 100644 python/klo-sl/sources/b2b_saas/card_transactions.yaml create mode 100644 python/klo-sl/sources/b2b_saas/cash_balances.yaml create mode 100644 python/klo-sl/sources/b2b_saas/charges.yaml create mode 100644 python/klo-sl/sources/b2b_saas/churn_risk.yaml create mode 100644 python/klo-sl/sources/b2b_saas/contacts.yaml create mode 100644 python/klo-sl/sources/b2b_saas/content_assets.yaml create mode 100644 python/klo-sl/sources/b2b_saas/content_touches.yaml create mode 100644 python/klo-sl/sources/b2b_saas/contracts.yaml create mode 100644 python/klo-sl/sources/b2b_saas/crm_notes.yaml create mode 100644 python/klo-sl/sources/b2b_saas/currencies.yaml create mode 100644 python/klo-sl/sources/b2b_saas/departments_hr.yaml create mode 100644 python/klo-sl/sources/b2b_saas/disputes.yaml create mode 100644 python/klo-sl/sources/b2b_saas/email_events.yaml create mode 100644 python/klo-sl/sources/b2b_saas/email_sends.yaml create mode 100644 python/klo-sl/sources/b2b_saas/employees.yaml create mode 100644 python/klo-sl/sources/b2b_saas/etl_runs.yaml create mode 100644 python/klo-sl/sources/b2b_saas/fiscal_calendar.yaml create mode 100644 python/klo-sl/sources/b2b_saas/forecast_snapshots.yaml create mode 100644 python/klo-sl/sources/b2b_saas/fx_rates.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ga4_event_params.yaml create mode 100644 python/klo-sl/sources/b2b_saas/ga4_events.yaml create mode 100644 python/klo-sl/sources/b2b_saas/gl_accounts.yaml create mode 100644 python/klo-sl/sources/b2b_saas/identities.yaml create mode 100644 python/klo-sl/sources/b2b_saas/identity_links.yaml create mode 100644 python/klo-sl/sources/b2b_saas/invoice_lines.yaml create mode 100644 python/klo-sl/sources/b2b_saas/invoices.yaml create mode 100644 python/klo-sl/sources/b2b_saas/journal_entries.yaml create mode 100644 python/klo-sl/sources/b2b_saas/journal_lines.yaml create mode 100644 python/klo-sl/sources/b2b_saas/keyword_rankings.yaml create mode 100644 python/klo-sl/sources/b2b_saas/lead_status_history.yaml create mode 100644 python/klo-sl/sources/b2b_saas/leads.yaml create mode 100644 python/klo-sl/sources/b2b_saas/meeting_bookings.yaml create mode 100644 python/klo-sl/sources/b2b_saas/open_roles.yaml create mode 100644 python/klo-sl/sources/b2b_saas/opportunities.yaml create mode 100644 python/klo-sl/sources/b2b_saas/opportunity_contact_roles.yaml create mode 100644 python/klo-sl/sources/b2b_saas/opportunity_line_items.yaml create mode 100644 python/klo-sl/sources/b2b_saas/opportunity_stage_history.yaml create mode 100644 python/klo-sl/sources/b2b_saas/payment_intents.yaml create mode 100644 python/klo-sl/sources/b2b_saas/payments.yaml create mode 100644 python/klo-sl/sources/b2b_saas/payroll_runs.yaml create mode 100644 python/klo-sl/sources/b2b_saas/pricebook_entries.yaml create mode 100644 python/klo-sl/sources/b2b_saas/pricebooks.yaml create mode 100644 python/klo-sl/sources/b2b_saas/product_costs.yaml create mode 100644 python/klo-sl/sources/b2b_saas/product_usage.yaml create mode 100644 python/klo-sl/sources/b2b_saas/products.yaml create mode 100644 python/klo-sl/sources/b2b_saas/quotas.yaml create mode 100644 python/klo-sl/sources/b2b_saas/quote_line_items.yaml create mode 100644 python/klo-sl/sources/b2b_saas/quotes.yaml create mode 100644 python/klo-sl/sources/b2b_saas/refunds.yaml create mode 100644 python/klo-sl/sources/b2b_saas/revenue_schedules.yaml create mode 100644 python/klo-sl/sources/b2b_saas/reverse_etl_jobs.yaml create mode 100644 python/klo-sl/sources/b2b_saas/sales_reps.yaml create mode 100644 python/klo-sl/sources/b2b_saas/sales_teams.yaml create mode 100644 python/klo-sl/sources/b2b_saas/search_console_stats.yaml create mode 100644 python/klo-sl/sources/b2b_saas/sequence_enrollments.yaml create mode 100644 python/klo-sl/sources/b2b_saas/sequence_steps.yaml create mode 100644 python/klo-sl/sources/b2b_saas/sequence_touches.yaml create mode 100644 python/klo-sl/sources/b2b_saas/sequences.yaml create mode 100644 python/klo-sl/sources/b2b_saas/stage_weights.yaml create mode 100644 python/klo-sl/sources/b2b_saas/subscription_items.yaml create mode 100644 python/klo-sl/sources/b2b_saas/subscriptions.yaml create mode 100644 python/klo-sl/sources/b2b_saas/support_tickets.yaml create mode 100644 python/klo-sl/sources/b2b_saas/target_accounts.yaml create mode 100644 python/klo-sl/sources/b2b_saas/touchpoints.yaml create mode 100644 python/klo-sl/sources/b2b_saas/vendors.yaml create mode 100644 python/klo-sl/sources/b2b_saas/web_events.yaml create mode 100644 python/klo-sl/sources/b2b_saas/web_sessions.yaml create mode 100644 python/klo-sl/sources/b2b_saas/webinar_attendance.yaml create mode 100644 python/klo-sl/sources/b2b_saas/webinar_registrations.yaml create mode 100644 python/klo-sl/sources/b2b_saas/webinars.yaml create mode 100644 python/klo-sl/sources/ecommerce/churn_risk.yaml create mode 100644 python/klo-sl/sources/ecommerce/customers.yaml create mode 100644 python/klo-sl/sources/ecommerce/order_items.yaml create mode 100644 python/klo-sl/sources/ecommerce/orders.yaml create mode 100644 python/klo-sl/sources/ecommerce/products.yaml create mode 100644 python/klo-sl/sources/ecommerce/regions.yaml create mode 100644 python/klo-sl/sources/tpch/customer.yaml create mode 100644 python/klo-sl/sources/tpch/lineitem.yaml create mode 100644 python/klo-sl/sources/tpch/nation.yaml create mode 100644 python/klo-sl/sources/tpch/orders.yaml create mode 100644 python/klo-sl/sources/tpch/part.yaml create mode 100644 python/klo-sl/sources/tpch/partsupp.yaml create mode 100644 python/klo-sl/sources/tpch/region.yaml create mode 100644 python/klo-sl/sources/tpch/supplier.yaml create mode 100644 python/klo-sl/tests/__init__.py create mode 100644 python/klo-sl/tests/conftest.py create mode 100644 python/klo-sl/tests/test_aggregate_locality.py create mode 100644 python/klo-sl/tests/test_cli.py create mode 100644 python/klo-sl/tests/test_computed_columns.py create mode 100644 python/klo-sl/tests/test_corner_case_regressions.py create mode 100644 python/klo-sl/tests/test_coverage_gaps.py create mode 100644 python/klo-sl/tests/test_duplicate_check.py create mode 100644 python/klo-sl/tests/test_engine.py create mode 100644 python/klo-sl/tests/test_generator.py create mode 100644 python/klo-sl/tests/test_graph.py create mode 100644 python/klo-sl/tests/test_loader.py create mode 100644 python/klo-sl/tests/test_manifest.py create mode 100644 python/klo-sl/tests/test_models.py create mode 100644 python/klo-sl/tests/test_parser.py create mode 100644 python/klo-sl/tests/test_planner.py create mode 100644 python/klo-sl/tests/test_segments.py create mode 100644 python/klo-sl/tests/test_snowflake.py create mode 100644 python/klo-sl/tests/test_sql_join_coverage.py create mode 100644 python/klo-sl/tests/test_table_identifier_parser.py create mode 100644 python/klo-sl/tests/test_tpch.py create mode 100644 python/klo-sl/tests/test_validator.py create mode 100644 release-policy.json create mode 100644 scripts/acquire-public-benchmark-fixtures.mjs create mode 100644 scripts/acquire-public-benchmark-fixtures.test.mjs create mode 100644 scripts/adventureworks-oltp-source.json create mode 100644 scripts/adventureworks-oltp-source.test.mjs create mode 100644 scripts/anti-fixture-conditional.test.mjs create mode 100644 scripts/build-adventureworks-oltp-fixture.mjs create mode 100644 scripts/build-benchmark-snapshot.mjs create mode 100644 scripts/build-benchmark-snapshot.test.mjs create mode 100644 scripts/build-evidence-fusion-adversarial-fixtures.mjs create mode 100644 scripts/check-boundaries.mjs create mode 100644 scripts/check-boundaries.test.mjs create mode 100644 scripts/ci-artifact-upload.test.mjs create mode 100644 scripts/examples-docs.test.mjs create mode 100644 scripts/installed-live-database-smoke.mjs create mode 100644 scripts/installed-live-database-smoke.test.mjs create mode 100644 scripts/link-dev-cli.mjs create mode 100644 scripts/link-dev-cli.test.mjs create mode 100644 scripts/package-artifacts.mjs create mode 100644 scripts/package-artifacts.test.mjs create mode 100644 scripts/precommit-check.mjs create mode 100644 scripts/precommit-check.test.mjs create mode 100644 scripts/prepare-cli-bin.mjs create mode 100644 scripts/public-benchmark-manifest.json create mode 100644 scripts/published-package-smoke-config.mjs create mode 100644 scripts/published-package-smoke.mjs create mode 100644 scripts/published-package-smoke.test.mjs create mode 100644 scripts/relationship-orbit-verification.mjs create mode 100644 scripts/relationship-orbit-verification.test.mjs create mode 100644 scripts/release-readiness.mjs create mode 100644 scripts/release-readiness.test.mjs create mode 100644 scripts/run-klo.mjs create mode 100644 scripts/run-klo.test.mjs create mode 100644 scripts/setup-dev.mjs create mode 100644 scripts/setup-dev.test.mjs create mode 100644 scripts/standalone-ci-workflow.test.mjs create mode 100644 scripts/validate-llm-debug-jsonl.mjs create mode 100644 scripts/validate-llm-debug-jsonl.test.mjs create mode 100644 tsconfig.base.json create mode 100644 uv.lock diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..184826a3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,72 @@ +name: KLO CI + +on: + push: + branches: [main] + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: klo-ci-${{ github.ref }} + cancel-in-progress: true + +jobs: + check: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile + + - name: Run TypeScript checks + run: pnpm run check + + - name: Setup Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.13" + + - name: Setup uv + uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Install Python dependencies + run: uv sync --all-packages + + - name: Run Python checks + run: uv run pytest + + - name: Build and verify package artifacts + run: pnpm run artifacts:check + + - name: Upload package artifacts + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: klo-package-artifacts-${{ github.sha }} + path: | + dist/artifacts/manifest.json + dist/artifacts/npm/*.tgz + dist/artifacts/python/*.whl + dist/artifacts/python/*.tar.gz + if-no-files-found: error + retention-days: 7 diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..c3f7e607 --- /dev/null +++ b/.gitignore @@ -0,0 +1,61 @@ +# Python +__pycache__/ +*.py[cod] +*.so +.Python +.venv/ +venv/ +env/ +build/ +dist/ +*.egg-info/ +.pytest_cache/ +.coverage +coverage/ +htmlcov/ +.ruff_cache/ +.mypy_cache/ +.hypothesis/ + +# Secrets and local environment +.env +.env.* +!.env.example +*.pem +*.key +*.p12 +*.crt +*.cert + +# Node +node_modules/ +.npm/ +.pnpm-store/ +*.tsbuildinfo +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# Local project runtime state +.klo/ +*.db +*.sqlite +*.sqlite3 +!packages/cli/assets/demo/orbit/demo.db +!packages/context/test/fixtures/relationship-benchmarks/**/data.sqlite + +# Private local agent overlays +.agents/ +.claude/ + +# Editors and OS files +.idea/ +.vscode/ +.DS_Store +.DS_Store? +._* +*.swp +*.swo +*~ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..8b35d70b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,251 @@ +# KTX Development Notes + +KTX is a standalone open-source context layer for database agents. These +instructions apply to all agents working in this repository (Codex, Claude, +Gemini, and similar tools). Do not assume an external app server, frontend, +database migrations, ORPC contracts, or `python-service/` layout exist here. + +## Critical Rules + +### Absolute Requirements + +- **MUST**: Use the active agent's task tracker for tasks with 3+ steps or + complex operations (`TodoWrite` in Claude, `update_plan` in Codex). +- **MUST**: Read files before editing them. +- **MUST**: Complete all tracked tasks before finishing. +- **MUST**: Activate `.venv` before running Python code when a local virtualenv + exists. If no `.venv` exists, use `uv run ...` from the relevant project root. +- **MUST**: After modifying Python files, run the relevant Python tests and run + `uv run pre-commit run --files [FILES]` when a pre-commit config exists. If + pre-commit cannot run because config or tool versions are missing, state that + explicitly and run the closest available checks. +- **MUST**: Remove dead code; do not leave commented-out code, unused wrappers, + or empty directories. +- **MUST**: Keep package/public API changes intentional. Do not add compatibility + wrappers for old KLO names unless the user explicitly asks for a migration + bridge. + +### Absolute Prohibitions + +- **MUST NOT**: Use raw `pip`; use `uv`. +- **MUST NOT**: Use `npm` or `bun`; use `pnpm`. +- **MUST NOT**: Run destructive git cleanup commands (`git clean`, + `git reset --hard`, `git checkout .`) unless the user explicitly requested + that exact operation. +- **MUST NOT**: Run `git stash`, `git stash pop`, `git stash apply`, or + `git stash drop` without explicit user instruction. Prefer a branch plus + commit when the user asks to save work in progress. +- **MUST NOT**: Reintroduce external app conventions such as ORPC contracts, + NestJS controllers, frontend routes, `routeTree.gen.ts`, or app database + migration commands unless those systems are intentionally added to KTX later. + +### Language Convention + +- **MUST**: Absolute requirement, never deviate. +- **MUST NOT**: Absolute prohibition. +- **SHOULD**: Strong recommendation, deviate only with good reason. +- **MAY**: Optional, at agent's discretion. + +## Priority Hierarchy + +When rules conflict, follow this order: + +1. Safety and user intent +2. Correctness: code works and verification passes +3. Single source of truth and DRY design +4. Code quality: types, readable boundaries, focused modules +5. Performance where it matters + +## Repository Shape + +KTX is a pnpm + uv workspace. + +- TypeScript packages: `packages/*` +- CLI package: `packages/cli` +- Core context package: `packages/context` +- LLM package: `packages/llm` +- Database connectors: `packages/connector-*` +- Python semantic layer: `python/klo-sl` +- Python daemon: `python/klo-daemon` +- Examples and fixtures: `examples/` +- Workspace scripts: `scripts/` +- Local agent skills are private overlays. Do not commit `.agents/` or + `.claude/` to this public repository. + +Some package names still contain `klo` during the split. Do not mass-rename +symbols, package names, paths, or docs to `ktx` unless the task asks for that +rename. + +## Quick Commands + +### TypeScript Workspace + +```bash +pnpm install +pnpm run build +pnpm run type-check +pnpm run test +pnpm run check +pnpm --filter @klo/cli run smoke +pnpm --filter './packages/*' run build +pnpm --filter './packages/*' run test +pnpm --filter './packages/*' run type-check +``` + +### Python Workspace + +```bash +uv sync --all-groups +uv run pytest -q +uv run pytest python/klo-sl/tests -q +uv run pytest python/klo-daemon/tests -q +uv run pre-commit run --files [FILES] +``` + +If `pyproject.toml` pins a newer `uv` than the local binary, do not edit the +pin just to make checks pass. Report the version mismatch and run checks that +do not require changing project configuration. + +### CLI and Release Checks + +```bash +pnpm run setup:dev +pnpm run link:dev +pnpm run artifacts:verify +pnpm run release:readiness +pnpm run release:published-smoke +``` + +## Verification After Changes + +Choose the smallest checks that cover the changed surface, then broaden when +shared contracts or package exports are affected. + +- TypeScript package code: `pnpm --filter run type-check` and + `pnpm --filter run test` +- Cross-package TypeScript changes: `pnpm run type-check` and `pnpm run test` +- Build/export changes: `pnpm run build` +- Workspace scripts: `node --test scripts/*.test.mjs` or the specific script + test file +- Python semantic layer: `uv run pytest python/klo-sl/tests -q` +- Python daemon: `uv run pytest python/klo-daemon/tests -q` +- Python files: also run `uv run pre-commit run --files [FILES]` when + pre-commit is configured + +For test suites that take a while, capture full output once and inspect that +file instead of rerunning to apply different filters: + +```bash +pnpm run test 2>&1 | tee /tmp/ktx-test-output.log +``` + +## TypeScript Standards + +- Use Node 22+ and pnpm workspace commands. +- Keep packages ESM (`"type": "module"`) and preserve `NodeNext` TypeScript + semantics. +- Prefer strict types over `any`; do not use `as unknown as`. +- Keep package exports, `types`, and built `dist` expectations aligned when + changing public APIs. +- Use `zod` schemas for runtime validation at CLI/config/API boundaries. +- Keep connector packages thin: connector-specific scanning/auth behavior + belongs in `packages/connector-*`; shared types and orchestration belong in + `packages/context`. +- Avoid circular package dependencies. Shared code should move to the lowest + sensible package, not be duplicated across connectors. +- Do not manually edit generated or built output under `dist/`; edit source and + rebuild. + +### Zod Naming Convention + +```typescript +const userSchema = z.object({ + id: z.uuid(), + email: z.string().email(), + name: z.string(), +}); + +type User = z.infer; +``` + +Runtime schemas use `camelCase` plus the `Schema` suffix. Static inferred types +use `PascalCase` without the suffix. + +## Python Standards + +- Use `pyproject.toml`; do not add `requirements.txt`. +- Use type hints for new and changed Python code. +- Use `pathlib` instead of `os.path`. +- Use `logger.exception()` when catching and logging exceptions. +- Prefer explicit exception types over broad `except Exception`. +- Keep `python/klo-sl` focused on semantic-layer planning and SQL generation. +- Keep `python/klo-daemon` focused on portable daemon/API behavior around the + semantic layer. + +### SQL and Structured Parsing + +- Prefer AST-based parsing over regex for structured input. +- For SQL, use `sqlglot`; it is already a dependency. +- In `python/klo-sl`, follow the local `python/klo-sl/AGENTS.md` guidance: + parse expressions with sqlglot, quote reserved identifiers before parsing, + and generate postgres-shaped SQL before final dialect transpilation. +- Regex may be used for non-structural sanitization, but not to interpret SQL + structure. + +## Documentation and Specs + +- Keep public documentation in `README.md`, package READMEs, and example + READMEs unless the repository intentionally adds a public docs tree. +- Prefer concrete commands, file paths, and acceptance criteria over broad + prose. +- When documenting examples, ensure referenced files and commands exist in the + standalone KTX tree. +- Remove or rewrite stale external app references unless the doc is explicitly + historical. + +## LLM and Prompt Development + +When creating or modifying agent prompts, system prompts, tool descriptions, or +skills: + +- Use XML tags for major structure when it helps model reliability: + ``, ``, ``, ``. +- Use positive framing: tell the model what to do. +- Keep prompts compact and avoid duplicating the same rule in multiple places. +- Include 1-3 concrete examples when examples materially reduce ambiguity. +- Use AI SDK v6 patterns for TypeScript LLM work. +- Use the local `ai-sdk` skill when working with AI SDK code. + +## Context7 and External Docs + +- Use Context7 when official, current library documentation would materially + reduce risk. +- Context7 "Monthly quota exceeded" errors are often transient. Retry before + assuming the quota is exhausted. +- If Context7 remains unavailable, state the blocked lookup and use the best + available local/source documentation. + +## When to Ask vs Act + +Act without asking when: + +- Following explicit user instructions +- Running verification +- Fixing clear bugs or tool failures within the requested scope + +Ask first when: + +- Requirements are ambiguous +- The next step is destructive or would discard user work +- A breaking public API decision is not already implied by the task +- Missing credentials, live services, or external accounts are required + +## Git and Worktree Safety + +- The worktree may contain unrelated user changes. Do not revert files you did + not change unless explicitly asked. +- Before committing, inspect `git status --short` and commit only intended + files. +- Do not commit ignored dependency/build artifacts such as `node_modules/`, + `.venv/`, `dist/`, coverage output, or local databases unless the task + explicitly concerns packaged artifacts. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 120000 index 00000000..47dc3e3d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/GEMINI.md b/GEMINI.md new file mode 120000 index 00000000..47dc3e3d --- /dev/null +++ b/GEMINI.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..57bc88a1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/README.md b/README.md new file mode 100644 index 00000000..71a8af77 --- /dev/null +++ b/README.md @@ -0,0 +1,270 @@ +# KLO + +KLO is a workspace-first context layer for database agents. It stores warehouse +memory in a project directory, generates and validates semantic-layer YAML, +indexes knowledge, scans database schemas, and exposes the result through a CLI +and MCP server. + +KLO projects are plain files: YAML, Markdown, SQLite state, and generated +artifacts. You can inspect them, commit them, and serve them to any MCP client. + +## What KLO provides + +- Durable warehouse memory with semantic-layer sources and knowledge pages. +- Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server, + BigQuery, Snowflake, and PostHog. +- Agentic ingest with provenance links, tool transcripts, and replay metadata. +- Local semantic-layer query planning and optional query execution. +- A stdio MCP server with tools for connections, knowledge, semantic-layer + sources, ingest reports, and replay. + +## Quick start + +Run the pre-seeded demo from the repository root: + +```bash +pnpm install +pnpm run setup:dev +pnpm run klo -- setup demo --no-input +pnpm run klo -- setup demo inspect +``` + +The default demo uses packaged sample data and prebuilt context. It does not +require API keys, network access, or an LLM provider. + +To replay the packaged ingest run, use: + +```bash +pnpm run klo -- setup demo --mode replay --no-input +``` + +To run the full agentic demo with an LLM provider, set a provider key for the +current process: + +```bash +ANTHROPIC_API_KEY=$YOUR_ANTHROPIC_API_KEY \ + pnpm run klo -- setup demo --mode full --no-input +``` + +Interactive full-demo setup can prompt for a provider key without writing the +key to `klo.yaml`. + +## Build a local project + +Create a project from the repository root: + +```bash +uv sync --all-packages +source .venv/bin/activate + +PROJECT_DIR="$(mktemp -d)/klo-demo" +pnpm run klo -- init "$PROJECT_DIR" --name klo-demo +``` + +Create a SQLite warehouse: + +```bash +python - "$PROJECT_DIR/demo.db" <<'PY' +import sqlite3 +import sys + +conn = sqlite3.connect(sys.argv[1]) +conn.executescript(""" +DROP TABLE IF EXISTS accounts; +CREATE TABLE accounts ( + account_id INTEGER PRIMARY KEY, + account_name TEXT NOT NULL, + segment TEXT NOT NULL, + region TEXT NOT NULL +); +INSERT INTO accounts VALUES + (1, 'Acme Analytics', 'Mid-Market', 'NA'), + (2, 'Beacon Bank', 'Enterprise', 'EMEA'), + (3, 'Cobalt Coffee', 'SMB', 'NA'), + (4, 'Delta Devices', 'Mid-Market', 'APAC'), + (5, 'Evergreen Energy', 'Enterprise', 'NA'); +""") +conn.close() +PY +``` + +Replace the generated `klo.yaml`: + +```bash +cat > "$PROJECT_DIR/klo.yaml" <" +memory: + auto_commit: true +YAML +``` + +Write and validate a semantic-layer source: + +```bash +pnpm run klo -- sl write accounts --project-dir "$PROJECT_DIR" \ + --connection-id warehouse --yaml 'name: accounts +table: accounts +description: CRM accounts with segmentation attributes. +grain: + - account_id +columns: + - name: account_id + type: number + - name: account_name + type: string + - name: segment + type: string + - name: region + type: string +measures: + - name: account_count + expr: count(account_id) +joins: [] +' + +pnpm run klo -- sl validate accounts --project-dir "$PROJECT_DIR" \ + --connection-id warehouse +``` + +Generate SQL and execute the query: + +```bash +pnpm run klo -- sl query --project-dir "$PROJECT_DIR" \ + --connection-id warehouse \ + --measure accounts.account_count \ + --dimension accounts.segment \ + --order-by accounts.account_count:desc \ + --limit 5 \ + --format sql + +pnpm run klo -- sl query --project-dir "$PROJECT_DIR" \ + --connection-id warehouse \ + --measure accounts.account_count \ + --dimension accounts.segment \ + --order-by accounts.account_count:desc \ + --limit 5 \ + --execute \ + --max-rows 5 +``` + +List and test the warehouse connection: + +```bash +pnpm run klo -- connection list --project-dir "$PROJECT_DIR" +pnpm run klo -- connection test warehouse --project-dir "$PROJECT_DIR" +``` + +The connection test prints the configured driver and discovered table count: + +```text +Driver: sqlite +Tables: 1 +``` + +### Scan the demo warehouse + +Scan artifacts are written under +`raw-sources/warehouse/live-database//` in the project directory. + +```bash + +SCAN_OUTPUT="$(pnpm run klo -- scan warehouse --project-dir "$PROJECT_DIR")" +printf '%s\n' "$SCAN_OUTPUT" +SCAN_RUN_ID="$(printf '%s\n' "$SCAN_OUTPUT" | awk '/^Run: / { print $2 }')" +pnpm run klo -- scan status --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" +pnpm run klo -- scan report --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" +``` + +For non-SQLite drivers, prefer credential references such as `--url env:NAME` +or `--url file:PATH` over literal credential URLs. + +## Serve MCP + +Start the Python compute daemon in one terminal: + +```bash +source .venv/bin/activate +uv run klo-daemon serve-http --host 127.0.0.1 --port 8765 +``` + +Start the stdio MCP server in another terminal: + +```bash +pnpm run klo -- serve --mcp stdio --project-dir "$PROJECT_DIR" \ + --user-id local \ + --semantic-compute-url http://127.0.0.1:8765 \ + --execute-queries +``` + +The MCP server exposes `connection_list`, `knowledge_search`, +`knowledge_read`, `knowledge_write`, `sl_list_sources`, `sl_read_source`, +`sl_write_source`, `sl_validate`, `sl_query`, `ingest_trigger`, +`ingest_status`, `ingest_report`, and `ingest_replay`. + +## Workspace packages + +- `packages/context`: core TypeScript context library. +- `packages/cli`: CLI wrapper over the context package. +- `packages/llm`: LLM and embedding provider helpers. +- `packages/connector-bigquery`: BigQuery scan connector. +- `packages/connector-clickhouse`: ClickHouse scan connector. +- `packages/connector-mysql`: MySQL scan connector. +- `packages/connector-postgres`: Postgres scan connector. +- `packages/connector-posthog`: PostHog scan connector. +- `packages/connector-snowflake`: Snowflake scan connector. +- `packages/connector-sqlite`: SQLite scan connector. +- `packages/connector-sqlserver`: SQL Server scan connector. +- `python/klo-sl`: semantic-layer engine. +- `python/klo-daemon`: portable compute service for semantic-layer operations. + +## Development + +Install dependencies and run checks: + +```bash +pnpm install +pnpm run check +uv sync --all-packages +source .venv/bin/activate +uv run pytest +``` + +Use the optional development binary when you want a local `klo-dev` command: + +```bash +pnpm run link:dev +klo-dev --help +``` + +The repository uses `pnpm` for TypeScript packages and `uv` for Python +packages. + +## Release status + +This repository is prepared for source publication. Package publishing is still +disabled by `release-policy.json`; registry names, public versions, package +visibility, and provenance policy must be chosen before publishing artifacts to +npm or Python package indexes. + +Build local package artifacts with: + +```bash +source .venv/bin/activate +pnpm run artifacts:check +pnpm run release:readiness +``` + +## License + +KLO is licensed under the Apache License, Version 2.0. See `LICENSE`. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..da5a4916 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,40 @@ +# klo examples + +## local-warehouse + +`local-warehouse/` is a runnable standalone KLO project for local CLI and MCP +smoke testing. It uses the fake ingest adapter and does not require a database +or external app server. + +Copy it before running commands: + +```bash +pnpm --filter @klo/cli run build +EXAMPLE_DIR="$(mktemp -d)/local-warehouse" +cp -R examples/local-warehouse "$EXAMPLE_DIR" +node packages/cli/dist/bin.js knowledge list --project-dir "$EXAMPLE_DIR" +node packages/cli/dist/bin.js sl list --project-dir "$EXAMPLE_DIR" --connection-id warehouse +node packages/cli/dist/bin.js ingest run --project-dir "$EXAMPLE_DIR" --connection-id warehouse --adapter fake --source-dir "$EXAMPLE_DIR/source" +``` + +The copied project initializes its own Git repository on first use. + +## orbit-relationship-verification + +`orbit-relationship-verification/` is a checked-in KLO project used by +`pnpm run relationships:verify-orbit`. It points the `orbit` SQLite connection +at the Orbit-style no-declared-constraint relationship fixture and verifies that +relationship enrichment writes nine accepted joins without requiring a local +warehouse credential. + +## postgres-historic + +`postgres-historic/` is a manual Docker-backed smoke for Postgres +historic-SQL ingest via `pg_stat_statements`. It verifies setup, first-run +baseline creation, delta-only follow-up ingest, and reset handling without +requiring a managed Postgres service. + +## package-artifacts + +`package-artifacts/` documents the artifact smoke checks. Those checks create +temporary projects instead of storing sample projects in this directory. diff --git a/examples/local-warehouse/README.md b/examples/local-warehouse/README.md new file mode 100644 index 00000000..a8a58ff2 --- /dev/null +++ b/examples/local-warehouse/README.md @@ -0,0 +1,20 @@ +# Local Warehouse Example + +This example is a standalone KLO project that can be copied to a temp directory +and used with the local CLI and stdio MCP server. It uses the `fake` ingest +adapter so it does not require a database or external app server. + +Run the example from the repository root after building the CLI: + +```bash +pnpm --filter @klo/cli run build +EXAMPLE_DIR="$(mktemp -d)/local-warehouse" +cp -R examples/local-warehouse "$EXAMPLE_DIR" +node packages/cli/dist/bin.js knowledge list --project-dir "$EXAMPLE_DIR" +node packages/cli/dist/bin.js sl list --project-dir "$EXAMPLE_DIR" --connection-id warehouse +node packages/cli/dist/bin.js ingest run --project-dir "$EXAMPLE_DIR" --connection-id warehouse --adapter fake --source-dir "$EXAMPLE_DIR/source" +``` + +The copied project creates its own Git repository on first use. Keep commands +pointed at a copy when experimenting so the checked-in example fixture stays +unchanged. diff --git a/examples/local-warehouse/klo.yaml b/examples/local-warehouse/klo.yaml new file mode 100644 index 00000000..959c8fa8 --- /dev/null +++ b/examples/local-warehouse/klo.yaml @@ -0,0 +1,25 @@ +project: local-warehouse +connections: + warehouse: + driver: postgres + readonly: true +storage: + state: sqlite + search: sqlite-fts5 + git: + auto_commit: true + author: "klo " +ingest: + adapters: + - fake + - live-database +agent: + run_research: + enabled: false + max_iterations: 20 + default_toolset: + - sl_query + - knowledge_search + - sl_read_source +memory: + auto_commit: true diff --git a/examples/local-warehouse/knowledge/global/revenue.md b/examples/local-warehouse/knowledge/global/revenue.md new file mode 100644 index 00000000..a076f6c5 --- /dev/null +++ b/examples/local-warehouse/knowledge/global/revenue.md @@ -0,0 +1,15 @@ +--- +summary: Paid order value after refunds +tags: + - finance + - orders +refs: [] +sl_refs: + - warehouse.orders +usage_mode: auto +--- + +Revenue is paid order amount after refund adjustments. + +Use `orders.total_revenue` for recognized order value and `orders.order_count` +for paid order volume. diff --git a/examples/local-warehouse/semantic-layer/warehouse/orders.yaml b/examples/local-warehouse/semantic-layer/warehouse/orders.yaml new file mode 100644 index 00000000..ffcca12b --- /dev/null +++ b/examples/local-warehouse/semantic-layer/warehouse/orders.yaml @@ -0,0 +1,18 @@ +name: orders +table: public.orders +description: Orders placed through the storefront. +grain: + - id +columns: + - name: id + type: number + - name: status + type: string + - name: amount + type: number +measures: + - name: order_count + expr: count(*) + - name: total_revenue + expr: sum(amount) +joins: [] diff --git a/examples/local-warehouse/source/orders/orders.json b/examples/local-warehouse/source/orders/orders.json new file mode 100644 index 00000000..98afcebb --- /dev/null +++ b/examples/local-warehouse/source/orders/orders.json @@ -0,0 +1 @@ +{"source":"orders","description":"Example raw file staged by the fake adapter"} diff --git a/examples/orbit-relationship-verification/README.md b/examples/orbit-relationship-verification/README.md new file mode 100644 index 00000000..7f0bde1f --- /dev/null +++ b/examples/orbit-relationship-verification/README.md @@ -0,0 +1,33 @@ +# Orbit-style relationship discovery verification + +This KLO project backs the default `relationships:verify-orbit` command. It uses +the checked-in Orbit-style SQLite fixture from the relationship discovery +benchmark corpus, with no declared primary keys or foreign keys in the database +schema. + +Run from the KLO workspace root: + +```bash +pnpm run relationships:verify-orbit +``` + +Expected relationship summary: + +```text +Accepted: 9 +Review: 0 +Rejected: 0 +Skipped: 0 +``` + +The command refreshes: + +```text +examples/orbit-relationship-verification/reports/orbit-verification.md +``` + +Use a real local Orbit project by overriding the project directory: + +```bash +KLO_ORBIT_PROJECT_DIR=/path/to/orbit-project pnpm run relationships:verify-orbit +``` diff --git a/examples/orbit-relationship-verification/klo.yaml b/examples/orbit-relationship-verification/klo.yaml new file mode 100644 index 00000000..db124bab --- /dev/null +++ b/examples/orbit-relationship-verification/klo.yaml @@ -0,0 +1,28 @@ +project: orbit-relationship-verification +connections: + orbit: + driver: sqlite + path: ../../packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/data.sqlite + readonly: true +storage: + state: sqlite + search: sqlite-fts5 + git: + auto_commit: true + author: "klo " +ingest: + adapters: + - live-database +scan: + enrichment: + backend: none + relationships: + enabled: true + llm_proposals: false + validation_required_for_manifest: true + accept_threshold: 0.85 + review_threshold: 0.55 + max_llm_tables_per_batch: 40 + max_candidates_per_column: 25 + profile_sample_rows: 10000 + validation_concurrency: 4 diff --git a/examples/package-artifacts/README.md b/examples/package-artifacts/README.md new file mode 100644 index 00000000..0e7c05a0 --- /dev/null +++ b/examples/package-artifacts/README.md @@ -0,0 +1,17 @@ +# Package artifact smoke checks + +The package artifact smoke checks create temporary projects instead of storing +sample projects in this directory. Run the checks from `klo/`: + +```bash +source .venv/bin/activate +pnpm run artifacts:check +``` + +The npm smoke project installs the generated `@klo/context` and `@klo/cli` +tarballs, imports public package entry points, and runs installed `klo` +commands against a generated local project. + +The Python smoke project installs `klo-daemon` through the local artifact +directory, imports `semantic_layer` and `klo_daemon`, and runs +`python -m klo_daemon semantic-validate`. diff --git a/examples/postgres-historic/README.md b/examples/postgres-historic/README.md new file mode 100644 index 00000000..04c943be --- /dev/null +++ b/examples/postgres-historic/README.md @@ -0,0 +1,115 @@ +# Postgres Historic SQL Example + +This example is a manual smoke for Postgres historic-SQL ingest through +`pg_stat_statements`. It starts Postgres 14 with the extension preloaded, +generates query workload under separate users, runs `klo setup` with +`--enable-historic-sql`, and verifies three local ingest runs: + +- first run creates a fresh PGSS baseline +- second run emits only positive deltas +- reset run treats `pg_stat_statements_reset()` as a fresh baseline + +## Prerequisites + +- Docker with Compose v2 +- Node and pnpm matching the KLO workspace +- `python-service/.venv` already created, or `KLO_SQL_ANALYSIS_URL` pointing at + a running service that exposes `/api/sql/analyze-for-fingerprint` + +## Run + +From the KLO repository root: + +```bash +examples/postgres-historic/scripts/smoke.sh +``` + +The smoke creates a temporary KLO project, starts Postgres on +`127.0.0.1:55432`, and uses this connection URL: + +```bash +postgresql://klo_reader:klo_reader@127.0.0.1:55432/analytics # pragma: allowlist secret +``` + +Set `KLO_POSTGRES_HISTORIC_KEEP_DOCKER=1` to leave the container running after +the script exits. + +The smoke validates the historic-SQL raw snapshot path without requiring LLM +credentials. It uses KLO's local stage-only ingest API after `klo setup` so the +PGSS baseline and delta behavior can be checked independently from curation. + +## Manual Commands + +Start Postgres and generate the base workload: + +```bash +docker compose -f examples/postgres-historic/docker-compose.yml up -d --wait +examples/postgres-historic/scripts/generate-workload.sh base +``` + +Create a project and enable historic SQL: + +```bash +export WAREHOUSE_DATABASE_URL=postgresql://klo_reader:klo_reader@127.0.0.1:55432/analytics # pragma: allowlist secret +pnpm --filter @klo/cli run build +node packages/cli/dist/bin.js --project-dir /tmp/klo-postgres-historic setup \ + --new \ + --skip-agents \ + --skip-llm \ + --skip-embeddings \ + --skip-sources \ + --database postgres \ + --new-database-connection-id warehouse \ + --database-url env:WAREHOUSE_DATABASE_URL \ + --database-schema public \ + --enable-historic-sql \ + --historic-sql-min-calls 2 \ + --yes \ + --no-input +``` + +### Readiness check + +```bash +pnpm run klo -- dev doctor --project-dir /tmp/klo-postgres-historic --no-input +``` + +The installed CLI form is `klo dev doctor --project-dir +/tmp/klo-postgres-historic --no-input`. Expected output includes `PASS Postgres +Historic SQL (warehouse)` when `pg_stat_statements` is installed, +`pg_read_all_stats` is granted, tracking is enabled, and +`pg_stat_statements.max` is at least 5000. + +Run local historic-SQL ingest: + +```bash +node packages/cli/dist/bin.js --project-dir /tmp/klo-postgres-historic dev ingest run \ + --connection-id warehouse \ + --adapter historic-sql \ + --plain \ + --no-input +``` + +The full `dev ingest run` path also runs curation work units, so it requires a +configured LLM provider. + +Inspect the latest manifest: + +```bash +find /tmp/klo-postgres-historic/raw-sources/warehouse/historic-sql -name manifest.json | sort | tail -n 1 +``` + +The manifest should have `dialect: "postgres"`, `degraded: true`, +`baselineFirstRun: true` on the first run, and populated `pgServerVersion` and +`statsResetAt`. + +## Troubleshooting + +- Missing extension: confirm `shared_preload_libraries=pg_stat_statements` and + `CREATE EXTENSION pg_stat_statements;` both happened in the `analytics` + database. +- Missing grants: confirm `GRANT pg_read_all_stats TO klo_reader;`. +- Empty templates: rerun `scripts/generate-workload.sh base` and keep + `--historic-sql-min-calls 2` for the smoke. +- SQL-analysis failures: set `KLO_SQL_ANALYSIS_URL` to the running service URL + or create `python-service/.venv` before running `scripts/smoke.sh`. diff --git a/examples/postgres-historic/docker-compose.yml b/examples/postgres-historic/docker-compose.yml new file mode 100644 index 00000000..2aa09eb4 --- /dev/null +++ b/examples/postgres-historic/docker-compose.yml @@ -0,0 +1,24 @@ +services: + postgres: + image: postgres:14 + command: + - postgres + - -c + - shared_preload_libraries=pg_stat_statements + - -c + - pg_stat_statements.track=top + - -c + - pg_stat_statements.max=10000 + environment: + POSTGRES_DB: analytics + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres # pragma: allowlist secret + ports: + - "55432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d analytics"] + interval: 2s + timeout: 5s + retries: 30 + volumes: + - ./init:/docker-entrypoint-initdb.d:ro diff --git a/examples/postgres-historic/init/001-schema.sql b/examples/postgres-historic/init/001-schema.sql new file mode 100644 index 00000000..8eae7eaf --- /dev/null +++ b/examples/postgres-historic/init/001-schema.sql @@ -0,0 +1,51 @@ +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + +CREATE ROLE app_user LOGIN PASSWORD 'app_pass'; +CREATE ROLE etl_user LOGIN PASSWORD 'etl_pass'; +CREATE ROLE klo_reader LOGIN PASSWORD 'klo_reader'; + +GRANT pg_read_all_stats TO klo_reader; + +CREATE TABLE customers ( + id integer PRIMARY KEY, + region text NOT NULL, + plan text NOT NULL +); + +CREATE TABLE orders ( + id integer PRIMARY KEY, + customer_id integer NOT NULL REFERENCES customers(id), + status text NOT NULL, + total numeric(12, 2) NOT NULL, + created_at timestamptz NOT NULL +); + +CREATE TABLE events ( + id integer PRIMARY KEY, + customer_id integer NOT NULL REFERENCES customers(id), + event_name text NOT NULL, + occurred_at timestamptz NOT NULL +); + +INSERT INTO customers (id, region, plan) VALUES + (1, 'na', 'enterprise'), + (2, 'na', 'team'), + (3, 'eu', 'enterprise'), + (4, 'apac', 'team'); + +INSERT INTO orders (id, customer_id, status, total, created_at) VALUES + (1, 1, 'paid', 125.50, now() - interval '9 days'), + (2, 1, 'paid', 89.00, now() - interval '4 days'), + (3, 2, 'pending', 42.00, now() - interval '2 days'), + (4, 3, 'paid', 301.25, now() - interval '1 day'), + (5, 4, 'refunded', 77.70, now() - interval '3 hours'); + +INSERT INTO events (id, customer_id, event_name, occurred_at) VALUES + (1, 1, 'dashboard_viewed', now() - interval '1 day'), + (2, 1, 'export_started', now() - interval '8 hours'), + (3, 2, 'dashboard_viewed', now() - interval '7 hours'), + (4, 3, 'sync_completed', now() - interval '6 hours'), + (5, 4, 'dashboard_viewed', now() - interval '5 hours'); + +GRANT USAGE ON SCHEMA public TO app_user, etl_user, klo_reader; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO app_user, etl_user, klo_reader; diff --git a/examples/postgres-historic/scripts/generate-workload.sh b/examples/postgres-historic/scripts/generate-workload.sh new file mode 100755 index 00000000..76659f0b --- /dev/null +++ b/examples/postgres-historic/scripts/generate-workload.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml" +MODE="${1:-base}" + +run_sql() { + local user="$1" + local password="$2" + local sql="$3" + docker compose -f "$COMPOSE_FILE" exec -T -e PGPASSWORD="$password" postgres \ + psql -h 127.0.0.1 -U "$user" -d analytics -v ON_ERROR_STOP=1 -c "$sql" >/dev/null +} + +for _ in $(seq 1 12); do + run_sql app_user app_pass "SELECT c.region, count(*) AS order_count FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.status = 'paid' GROUP BY c.region ORDER BY c.region" +done + +for _ in $(seq 1 7); do + run_sql app_user app_pass "SELECT c.plan, sum(o.total) AS revenue FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.created_at >= now() - interval '14 days' GROUP BY c.plan ORDER BY revenue DESC" +done + +for _ in $(seq 1 5); do + run_sql etl_user etl_pass "SELECT e.event_name, count(*) AS event_count FROM events e JOIN customers c ON c.id = e.customer_id WHERE c.region = 'na' GROUP BY e.event_name ORDER BY event_count DESC" +done + +if [[ "$MODE" == "extra" ]]; then + for _ in $(seq 1 4); do + run_sql etl_user etl_pass "SELECT c.region, avg(o.total) AS avg_total FROM orders o JOIN customers c ON c.id = o.customer_id WHERE o.status <> 'refunded' GROUP BY c.region ORDER BY avg_total DESC" + done +fi diff --git a/examples/postgres-historic/scripts/smoke.sh b/examples/postgres-historic/scripts/smoke.sh new file mode 100755 index 00000000..4fec1e4b --- /dev/null +++ b/examples/postgres-historic/scripts/smoke.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +EXAMPLE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +KLO_ROOT="$(cd "$EXAMPLE_DIR/../.." && pwd)" +REPO_ROOT="$(cd "$KLO_ROOT/.." && pwd)" +COMPOSE_FILE="$EXAMPLE_DIR/docker-compose.yml" +PROJECT_PARENT="${KLO_POSTGRES_HISTORIC_PROJECT_PARENT:-$(mktemp -d)}" +PROJECT_DIR="$PROJECT_PARENT/postgres-historic-klo" +KLO_BIN="$KLO_ROOT/packages/cli/dist/bin.js" +PYTHON_SERVICE_LOG="$PROJECT_PARENT/python-service.log" +PYTHON_SERVICE_PID="" + +cleanup() { + if [[ -n "$PYTHON_SERVICE_PID" ]]; then + kill "$PYTHON_SERVICE_PID" >/dev/null 2>&1 || true + fi + if [[ "${KLO_POSTGRES_HISTORIC_KEEP_DOCKER:-0}" != "1" ]]; then + docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true + fi +} +trap cleanup EXIT + +start_sql_analysis_if_needed() { + if [[ -n "${KLO_SQL_ANALYSIS_URL:-}" ]]; then + return + fi + if [[ ! -d "$REPO_ROOT/python-service/.venv" ]]; then + echo "Set KLO_SQL_ANALYSIS_URL or create python-service/.venv before running this smoke." >&2 + exit 1 + fi + ( + cd "$REPO_ROOT/python-service" + source .venv/bin/activate + uvicorn app.main:app --host 127.0.0.1 --port 18081 >"$PYTHON_SERVICE_LOG" 2>&1 + ) & + PYTHON_SERVICE_PID="$!" + export KLO_SQL_ANALYSIS_URL="http://127.0.0.1:18081" + for _ in $(seq 1 60); do + if curl -fsS "$KLO_SQL_ANALYSIS_URL/health" >/dev/null 2>&1; then + return + fi + sleep 1 + done + echo "SQL analysis service did not become healthy. Log: $PYTHON_SERVICE_LOG" >&2 + exit 1 +} + +latest_manifest() { + find "$PROJECT_DIR/raw-sources/warehouse/historic-sql" -name manifest.json | sort | tail -n 1 +} + +assert_manifest() { + local manifest_path="$1" + local expected_first_run="$2" + node - "$manifest_path" "$expected_first_run" <<'NODE' +const { readFileSync } = require('node:fs'); +const manifestPath = process.argv[2]; +const expectedFirstRun = process.argv[3] === 'true'; +const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')); +if (manifest.dialect !== 'postgres') throw new Error(`Expected dialect postgres, got ${manifest.dialect}`); +if (manifest.degraded !== true) throw new Error('Expected degraded:true for Postgres PGSS v1'); +if (manifest.baselineFirstRun !== expectedFirstRun) { + throw new Error(`Expected baselineFirstRun:${expectedFirstRun}, got ${manifest.baselineFirstRun}`); +} +if (!manifest.pgServerVersion) throw new Error('Expected pgServerVersion'); +if (!manifest.statsResetAt) throw new Error('Expected statsResetAt'); +if (!Array.isArray(manifest.templates) || manifest.templates.length === 0) { + throw new Error('Expected at least one staged historic-SQL template'); +} +NODE +} + +run_historic_stage_only() { + local job_id="$1" + node - "$KLO_ROOT" "$PROJECT_DIR" "$job_id" <<'NODE' +const { join } = await import('node:path'); + +const kloRoot = process.argv[2]; +const projectDir = process.argv[3]; +const jobId = process.argv[4]; +const { loadKloProject } = await import(join(kloRoot, 'packages/context/dist/project/index.js')); +const { runLocalStageOnlyIngest } = await import(join(kloRoot, 'packages/context/dist/ingest/index.js')); +const { createKloCliLocalIngestAdapters } = await import(join(kloRoot, 'packages/cli/dist/local-adapters.js')); + +const project = await loadKloProject({ projectDir }); +const adapters = createKloCliLocalIngestAdapters(project, { historicSqlConnectionId: 'warehouse' }); +const adapter = adapters.find((candidate) => candidate.source === 'historic-sql'); +if (!adapter) throw new Error('historic-sql adapter was not registered for local run'); +const record = await runLocalStageOnlyIngest({ + project, + adapters, + adapter: 'historic-sql', + connectionId: 'warehouse', + trigger: 'manual_resync', + jobId, +}); +await adapter.onPullSucceeded?.({ + connectionId: 'warehouse', + sourceKey: 'historic-sql', + syncId: record.syncId, + trigger: 'manual_resync', + completedAt: new Date(record.completedAt), + stagedDir: join(project.projectDir, '.klo/cache/local-ingest', jobId, 'staged'), +}); +console.log(record.syncId); +NODE +} + +cd "$KLO_ROOT" +pnpm --filter @klo/context run build +pnpm --filter @klo/cli run build +start_sql_analysis_if_needed + +docker compose -f "$COMPOSE_FILE" up -d --wait +"$EXAMPLE_DIR/scripts/generate-workload.sh" base + +export WAREHOUSE_DATABASE_URL="${WAREHOUSE_DATABASE_URL:-postgresql://klo_reader:klo_reader@127.0.0.1:55432/analytics}" # pragma: allowlist secret +node "$KLO_BIN" --project-dir "$PROJECT_DIR" setup \ + --new \ + --skip-agents \ + --skip-llm \ + --skip-embeddings \ + --skip-sources \ + --database postgres \ + --new-database-connection-id warehouse \ + --database-url env:WAREHOUSE_DATABASE_URL \ + --database-schema public \ + --enable-historic-sql \ + --historic-sql-min-calls 2 \ + --yes \ + --no-input + +run_historic_stage_only "historic-first-$$" +FIRST_MANIFEST="$(latest_manifest)" +assert_manifest "$FIRST_MANIFEST" true + +"$EXAMPLE_DIR/scripts/generate-workload.sh" extra +run_historic_stage_only "historic-second-$$" +SECOND_MANIFEST="$(latest_manifest)" +assert_manifest "$SECOND_MANIFEST" false + +docker compose -f "$COMPOSE_FILE" exec -T postgres \ + psql -U postgres -d analytics -v ON_ERROR_STOP=1 -c "SELECT pg_stat_statements_reset();" >/dev/null +"$EXAMPLE_DIR/scripts/generate-workload.sh" extra +run_historic_stage_only "historic-reset-$$" +RESET_MANIFEST="$(latest_manifest)" +assert_manifest "$RESET_MANIFEST" true + +echo "Postgres historic SQL smoke passed" +echo "Project dir: $PROJECT_DIR" diff --git a/package.json b/package.json new file mode 100644 index 00000000..681c9fb3 --- /dev/null +++ b/package.json @@ -0,0 +1,54 @@ +{ + "name": "klo-workspace", + "version": "0.0.0-private", + "description": "Workspace root for klo packages", + "private": true, + "type": "module", + "packageManager": "pnpm@10.28.0", + "engines": { + "node": ">=22.0.0", + "pnpm": ">=10.20.0" + }, + "scripts": { + "artifacts:build": "node scripts/package-artifacts.mjs build", + "artifacts:check": "node scripts/package-artifacts.mjs check", + "artifacts:live-db-smoke": "node scripts/installed-live-database-smoke.mjs", + "artifacts:verify": "node scripts/package-artifacts.mjs verify", + "artifacts:verify-demo": "node scripts/package-artifacts.mjs verify-demo", + "artifacts:verify-manifest": "node scripts/package-artifacts.mjs verify-manifest", + "build": "pnpm --filter './packages/*' run build", + "check": "node scripts/check-boundaries.mjs && node --test scripts/*.test.mjs && pnpm --filter './packages/*' run build && pnpm --filter './packages/*' run test", + "klo": "node scripts/run-klo.mjs", + "link:dev": "node scripts/link-dev-cli.mjs", + "native:rebuild": "pnpm -r rebuild better-sqlite3", + "setup:dev": "node scripts/setup-dev.mjs", + "release:published-smoke": "node scripts/published-package-smoke.mjs --require-config", + "release:readiness": "node scripts/release-readiness.mjs", + "relationships:acquire-public-fixtures": "node scripts/acquire-public-benchmark-fixtures.mjs", + "relationships:rebuild-public-snapshots": "node scripts/build-benchmark-snapshot.mjs --rebuild-all", + "relationships:build-adventureworks-oltp": "node scripts/build-adventureworks-oltp-fixture.mjs", + "relationships:verify-orbit": "node scripts/relationship-orbit-verification.mjs", + "smoke": "pnpm run build && pnpm --filter @klo/cli run smoke", + "test": "node --test scripts/*.test.mjs && pnpm --filter './packages/*' run test", + "type-check": "pnpm --filter './packages/*' run type-check" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "pnpm": { + "onlyBuiltDependencies": [ + "better-sqlite3" + ] + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/cli/assets/demo/orbit/demo.db b/packages/cli/assets/demo/orbit/demo.db new file mode 100644 index 0000000000000000000000000000000000000000..f0f261f1f5897de6468342337143c55909ab5fab GIT binary patch literal 1175552 zcmeFa3793>RUVj`l}lw+R;Gm1YL%)>-L00o)sm_eH{!)xgm$U5FGy3cP7!U{y9>xr-0f7-31~UvFgBP$^g@i3&u=ucSfAinC z@80vDxHsax*PpOw=F9%{Wkkh$igVsQC(a+|#Q9IW=?$+rdu8XQ-HUtYx3An(EIejm zadF{=H{G29e+B-3v;x&F2iLB@{`#fyEmyYhIk$7^-1ddb z^`FHXU-HH~Uv$@wae zh3)flvkT)xyiu<(No?VDeye@WvY|Eld%r!HQ-aD@*|gzfB|ym}db zXbDd3?QCD!IUV%Hs$9OZedX%quq4~(>$9EOxo|}eUcP$i(z&yAs@um4t83Rk^O;N6 z%KCTOFI?Qaa`MXAQ}5lma=G!(h5ugv#>U6?7wSW-@?5{~^3HubduOja5Pr@n{M>sl z?#Y41&rK}I-p=KV=kC*sbn*QcnyY#H_>Pse>u5-%L5^V47ojiZ>zMb=S<VPd0qw%J$xsvls3?X}_R9+PQE#@xkrYaCz7K|SIHl?3pClEvNo5m-gEiX-q}l6&R)F0U-kcQwFsBC&vsn~r>^c{p-)Zx=JxSp z57^+d_{d_dy1lF}*MF|lO4rA%#?1>aVX&R^+h@;(W!}4ZPHa&}nmSu~(?W;e@d}-eZ3&j2-zY zVgX{7atpYwKG294aEhJx10CejxqZC3SiAMLi!Khhczk*CYc4P8Rq%)F>pl+M?LXMB zj)JSx7vV&5t)9Jb`s(E?9PPHx?O4>h7nekElFMg5uygXB?F#~9=g#i#oOk z@2|dNc2ICm4OhJmQJ)u_QHf@|-6v-n+a#AE` zMU=lzwUNnAptBInkq3CL+MAxaN$p$CPh}2pAs#+cGtwv(2 ziDIjv*lJp0>nz6UpD-%AKDJgwM^_`!)kM+NP;@oz(RCIZl_v~G*QLkQx*XRwlIUup z=xQjsnq+jHZkgT*!?AT~t5!mLtC84hqS$IEwwgq2oy97}38SLxX03#du12D(iK45a z=xU}!*I7)HpD-L3abq(bYuJ)lhUbEzxzlam)$Bv2}f;R-@sq5?f6aTMfll z(;i!Aun_>qsOWmQ)Mt<{R?dx1=%YZgVuc*qTvn&62To z7R&TcWGcF@)N1JHnn`rcqUf4Ybj=dcbr!4G+=jq+W4TsB-wR|CU9%{@sVxbc~d7ajib!?*3{|F$*}MbVt1Xxi2eLO zs>~qQw=KpP}r%_M9UH<=o&Hul&`JN-6MgIS* z8!tQjQ-_N~-*xE5^>1I_IQZfE`Tyrv-@Wpic>e!$%g;IRj}F|n^mmsYxA+a906c&K z!lU`GT7Hfz38^O`r4%sBDD^HW^)4T6>W#pziZJV=L_w&O@?{y7FH0(4mhI)sS&CDr z>2jTWdFTp4>M3F=1u3DPWdS0kfn6X4ziAoNgl&t+*PoT-2KAa(gM|%Q7lomQ=nhljX}< zEQ5X(uIMi1wHmtIUdqFvGI}^v(!-%L@o;DstDrK&rQ8iuE1}EnJ$XFTiyja4=sYq;>Tv|&#o2G<@TOL*IpD|dlX%J z?a_4x8`-Cb{KRbOj#?L8ZtqES?M2bGN71#HjIL8HWAg}(txG3rCA7EpB)0aV*xIAm z+DpXN8LWb5hpGu-^I5eLI=c2Gy7r>z+N0>&n-X1TFb$p|s)mH+XVwboNAEp}uDvL_ z_9(jcTB7Szo=+Xz_%$JXVi)|zNww5>6Tgz9bF5Fu0<4G z3yQ8qBD&6C72u3&L)dsqt%SZ8C?vWTQFJXRx)xKS>r~U&1cx7_t=?R#)9_Y_twj`D z3yQ5pOKhFNIJRR&Mc12ZMRas6B)S$+bS)^l7VXh>1{>L?6^^b;H`co7dx1itYY|1) zf}(4YjIL8HWBXM&wk|!nR-)mp5?hNXwiXmyi$rXl#VV*)QFL|r|HXx^i<_U{+&S{w zN4{p`=Qf^u_{R=E6Z!vR*S~3f?cjsR|4*&{&g$D&es$%g%Rjc99r*qOHzNOESsX8X zdEw$*$$z!})`SxbC1eewkTs-`HEa)A_NSi*P3Zjg9$0PZ=R%EMkma{HCs673P=eMl z3R*)7TEk?}vc5P?jYb)0;mytsR3T(2L2DQVtsw=iVIpXOb~0rpbP=f9ysK70L+nsO z)-VcLLkd~LDIseH)7ZR$ZmZ7*1B$ML_UJl`jZmoKyX2+U)VgSh z9Y}N?MA3CX(RGlFuG1~k)Afg?SJz5tZyiW%9YnEpK(Td@h^@0&1^EKs9B;m=RzgSD zfkf9q6kP`tT?bR5>nx^0-oVjy`IWUoI^7;fbR9&|bwJT|&=Osz8wUj$j;-sjsMTnA ztHjnp6k7)rTLGnXP>mZ7*1B$MLM0A~M6-c7OLXl=(X~&}wciq5XE2VfQqiq)wf2%)5q&Svm+0D$qHCX`Yrj3ZPPLIORB>!w zesQgd_SU|{)_xRQ`xIOI$=EuJWl*Ey`oq$TYBh9p?Mrm+N71!U(Y2q5uCrJLo$ef6 zH(ppPq3;Fy5?%XIbnR1g?N5oW(@ldX-5gt2Ur?*l@K%Yf{V2BfDYo`oV(To%L3X2} z>+@?xbad@YbnQpcwNKHt-yU6Ou@O=nj;>43t98-$0)2_D{V2NjDZ2KP(RI3IkmPV| zU3zY62%VM9|vYxbSfBLknw^0OIxa=H) z6K^9lNbE*xNch61tcItUPg9{$0@~b*-N6V661$NK61?y!tB|IcPg6*jgR0f{)e5Qb zWrPHY-AEw`U-*<2(iHP)iZmuOtX--VQDAFK2w&KZ6p`?SPg#)$#eAA3$RRkQFJG)R zQDAF?1c}{96A51Ulr{0*Y9q%qH40scSh`TFp~9CD5+rsbH6(oDQ&vML=F^nObrE9Y ze6556TO%|`>^7Ak8YDhtC1QV_S`*2PE=H`LtJTroDkMk~ksxtI0!bSQat7nrdKHKD z_4n3_D6lm`g2ZmLE^=##PkCLGx6TmOY{`ludbRYPS{DVjCX?tI(ID|5ElM)FPPL4! zR`I>^(%D)G?X5zBG!Y3BM@Wf8Y@NX>wpPWr$(v_tB^1~iAwgm{vI<4lX(Y%QOoI$U zwL>i5TPvgzZ&vXSif+t%{@T#=C1J)V)A9lEiz|jIQ{Ql$dhwI@L6` zZpFoj)pynEG`uy$)@;;_tvEvJwA{JQU>y6LTSeEetrbzxH5-NKirvUZpFY@*HsBpDe16r-z{3R8t47EGP86~wl;)|@&T)^5W{#0v~ z8^WZJ)$}A*#YrV~JmQP2iHcQI&4lpHeJIs(<;E{5Xf@RE7^jxh@Q5$6D)Xe-4GzXq z<>oJ`Xl1D5G0rKe;}KtEJ!a3d8x)L<%1vHU;A*JpF-|C{=@DOKEn=sgY8`}d)dsOz zxe-j-WvS;ePAaMA(X{nE&KAd28^n6$CNU{^HPrMNXO`6Th%fTWD3_frhU;dC%1vQX z_-d%{G0rfl?-5^QWxUf)Hx3F^Tzpun+z2M^vefn%rCTe}e5mF`@XJ;^u%^vtJ zd7*NXnDnhd5$+02)cuGfq(>soPBn~88TjsavvL!dw9CpoViV0H#u3qB+C1VJtYe>b z^R4n~wczdw)uWL-+!dN=AQ6s`MlE-RGuX#I?B+*yYY*3o=sSZv zsaCSpDs`j$p;{B|uzAS2P2SA8afCEU#@Ja*(;#FlJyffqgKQphZj(22ZX6*s9$n6j zq(RjmHa=J@p>GTFkaL^7nRDX^DKX_TZAlT(Dv+av5OR}P9Z9ATFpsCCiz1bN80P2SA8afB8n8DOVd z23Ze>*QNVwB^ur;(KT=8+$N{!nkS;`3|6s;j*74Eua(f@RR$89Xdp3;$VyBbNIZjS zY`(+cb@{$pA$>z2BZ*Bkk{Cxwp_b@6)i^fS;n=!kKxs z)hddvHvfO}!p9c2?%(_Zp8x;)kynHN|MJ7Xc=)-8e&*2g*MDMtc<_@4&DxKwJ#+O3 zR&QGQu9c@Qf9LWI2fp>dbxYrdHvoL|;xhivUxEJ~s2o%D=wlxzuLdyGsTolK@u93q zs#EjA_T}^Uq=^8H0@YgOsG;UMB|L=HEF%6H>d?eK-=M|WmbFT&D!NSdaOL=+H)~Bo zM9m`FpP|-F?6W3v%W5x`Rt*|;sfEh1LvPcT2(dJaD1V09GO^DpsJqn;gXp5vLzUx$ z9@nc7LTDCI{tR_vVxLv;X;r5cG&AJE$`L`2YWX30W)Z{BP#Y%pS-#W&S}o8ZgsfDK z`)QN~s0B0P^%-ix#6F9U-Lbp%7SDI=4m0f1A zPtU2&@lfU1oi}SUNNkKVS8A;^ZLO8j@j98Mv07Z%1SMjrlX~@(;$6S z)p)3Kd`=x3sh2X&S*e%Oqseq_@X!~rSV0kZG2IuA&_TqY+R_^+mtj= z)99i&Bc;(rrekzbhe1%qp&ROlD);^*?Tn>F#yB6PiA5$ov8c-mzz42UJXpCGC+XNo zql)4LltvYq)Tp8^3)mVK$HtY)y(>urHH|5X(@q*wWTuTNYBz%|WKm>%xN9m{15yk!9G>*u$jU$?xc4rN0#(L#mkfif&nnn~i zeA6@{(=m-`7Be;I8OxP>J(4bK_9S%1t=}|`$fU;+&0-u)&2ahu_bzOG-R6JT{J@bf z9(mu!|Gjbk@D~o>bLbBaonHSp>+e4J8wcOI_FHRjSpD_Ym#zHr%Bz+?wfy1(KYn1e z^y5p#;twz0vhclMf%JdMDLzm+Vk>Ssj$h|r9GFWxX%;aS@nNVAU_ZVGt}koSk#DG7G-4y-!yq5~ya2IN)|W?%CYr?Q0j@_`2`$3E3nf{|1S zqUpYddOoqw@|`=jTfi2~C<}lyI=ZQLM(Xv9X8IcH^~634kDam047PekDZ#^)Q(Ut`v>zH8fob`z$_o#||@Sg2}^`o9o z9UEz!QZ%gB&^RURvkJ-?J1+?ojVLxgSh-ow8mMWKQZ%gB&?F`7vl8ANJFI}B5kuNo;fxX)quIQM1}I^lrB4THTQJkH(M@gEv5`h6MH6`qjZVToi;tbL%M7-9MzQgs z$_;MTKuuGVqItZArY2#ZH4%4gH-s&kab#So+|Xw2j5IANn#F5qS`zkI0p*OHmxN}E zJXE<+%{n#?qzn+v;x#lT3Hz*scg8L&03WzV!{+}hN2-ObYg>z(j~w}r*xmnce+Bke zV1EVnS73hy_E%tk1@>29e+BkeV1EVnSKx1A1@5ohglW_RuDkxaa{JV&D<`vD<~n83 z@L+rxp5tJj6F^lDxUzHkO8pSYKHYRL3~o?bmMb??8pQyTE#sWBXkKtm~Su`p*r!h|0XEhp?fP`?(VR^W6v!qc4VBvx2ltm+g^GurY zvCqO&Re*#Bn!NIX%8idk34k?_5vI3tzxb+*>vmd|p z<*hGm{rT1xw*GMIcej3f>o*R6+u>&&`t+fD*8h6_Z?B&?_)7=hg-L+lvi6MCpId#~ z%D-Rvrj@5H|J3r^5B$Y}j~}>c>8F?8viNToA6dM4;V1qkE|d72JGUOb?Z#V(2YP&uu?V#5|7047?a|6|j)!76T#oK2=2A$Rhe)g!n?ypI^l3i|@a1@80(5 zoqKO-5i|)q8F7ZOJNIwdx&PAkh0A9zUf6kx2vWA``ClZ>6JQ9>Ey&yEW)Y)oQ+P2_ z+y?D;ZZXz2H;D*ko7{_#2YYyz$=o8uHaE5if;&3Dmfi-ib>|i&wt2D$QntywAgM)z zjyty?Z=2&HhHaCNCSrtUn_+iuG1fLWhzPb#?jwNkW;5*0EkbN_tVNKI0m6&TusgRP zvCWf25ZfmAksy~yfZe$TdD|QnG0HYR4hXFk;c?%cmbZ1cFZAmd|za6vQd&Mio6^Hm~Ha-H#NZt&)bBhq$ z9B2{bV}Oj>MJhPIAhFGo2vWB3ksy<-0K0SlB5#{T5u;m0pScVyF>OEppF5`Q z=l^rZwEg`5QO30W{QptJwEg^l?wGcp|IZ!M_VfQo8PgtB{vRG~`*_g$h#U9NF8}}0 zg{}W~>yFLOY@R;ylVL&=l{nReiS8zzrp|ReT@`FxSBNZbul7O zV0Tg!!Q=hQkxFBK7No4&7*(_laB&MiW0^O6=pzCupp`>;E= zAhFGhMG)Jj?;}B9D&*LmTadTSi$sjFjgJE~ix0bVi?OzOp@>kn@ezRL?O}Ir5n`Jc zv3IHb%s-ZF)Wq&@4CX&Mn5;CKC~Co1TvVG@K2)bBhq$JiA4Zj{!7&4ZCv-65HG% zg4i}a9|>r(8g}Ov1fW-!VRvp3Vw-2S2=Xz2 z-bjYsxdn-Bo*{yiZG0r47mi_fZb9BQw~H8M8y^SgWn$QMVtTV*p4K)`7ZJ)fJ_68t z!mvB{FA>|^mKId_7(lNA!|vRIf^BU6f7`-a=70YGCG`K_y8JWC?>O+62fpFJQ<6sh{^S0u%pDHZDWQ)I_B0 z=+Un`^P%?Hy`9rR``x`D=SK1@VpT6e!M z$T>!?h45vc{ z>&(R<=SC%Ga*^m4+lyuJ0%U8MdXth9RxH@&^o1bln6k~}TEV&G7x`-izsL%Lq;xJvW$-b(pZLfzl#(X1G0LPk`mUz1qIve+#`bMs=<5Cy`T^W>`T(;7Bn%<_O^(j%LZ>W_hN)^ zh6$H*i?O!3TSSm;@TzhzLh5FiUpcpkaM|2@vPF=;Tc-Jx*qvLD*yi0LNZH2UGt(qW z?9MI7+vZ&&M%l*SG1Hhy?9MI5+U9FTgtCplS5_|)pI?O7=AA8q{M|AQi^T5SzeQ~G z4iThmqkBX;K&UIeWg8y}Xlx>OouEl3fNpRHq_`!R#&3Ruh*7rj zaexLRVt4M}V{P*_B0|~5M}T3-1aQ|p5mESG;J59+L~Qf=w4i~H0W{|jyY9ElAxLcV zIuXRS8Td#*V+^r7w;+GryjH}pZ3aFL(6B=6&MgMEanJuhv9R^+TfNQyeRCHR0Kel% zZ{t@s?%7y4{OyOcL;no?|1Yoq9X$X46!`ytvG(z`TUUR2^{$mK;Q9ZJ%m2&rs}Fqc zKy}~=OP^eN)#7It?_bYjJmb*a^QVa3k|xoquJMO-fK$wu!UeT{-4z{fo$3@kWt} zAgWsjc4Xe`xm4V3Qi3K+S6jz1SEEJqb!)sUM1D)at6tCL;%?I+B3S24%fqN7#9uPCM6~;U48X9{%X8vJ`zy%nkrefc`|37$^C>5pp!)->p1>uB*<5< z4OOoLM>6jfE)uv+>en=_vs=0T)q2_Z+VL}B49$QE97zar@xX0Tg2J+etH&`|>tDq7 zDSSM@=ob$QRLm+|JaC(opvf<)t>YN1@uE%c#SKQk1Q8Mk3Ks|5rb9%q&Y8+}u-1#l z*G}Oh0ZoCSVpicIf!m}6g+-&Q$8lKe-=ys0BLarMgx@3)p>PqwZBn8p%NDHTIIOXC zd^EuDmmntG!}ban4cw+fOdZ=?4{N+=J`NNi4jAlCZj*b3ivwvo(&a81xJ~N!gr%#mo`w2WIa)M73&zMS7`q#UxJZ!Ko0O2rqKS1L zZjs{y0p`E>HN$R@;{riGbB%uYLqU$J1p7%aMkc}7-5|$BgS@py9~RADI}ZgpsuG<0 zU?8JlOc(`|PoA1iHXZ9`P4hvK!0w#;7$B2iOqc`%2`&b>O^1YFnKKUtIqEr0?t_2~ zf-zwb41n4N0k=uX2`eUH?E^uQda{-KSddFBz;3XOiv@YTNl9u%wFg9sx@>$HK#mhU zO@dnP!hqX!NU=ff{vbiGn7{sI4h)iT`5pE#5MGI1Z&DJ%iUr&3zCTD(&uRQ17$bvV z?5AkkllF8 zP@-d83~-y2l(260s<{$m=oRyK12PB3ggG#Hdg`u!w@Jxp#xs}y|GkB+|86VW{Kd_C z(EtCpkKDHLsf~9Y{)@vOKm61~KYi$~^*>qv$oh>3|L23RUi;kI!)s4i{p9K^RzACO z|H_f&Pb|OW!2fmN;(?8&?_PS&;;%2BTU=TAdta&if46eos!>oM`AdLm)S)91`KG%B z+@@m@BY_jPP(rH3muWjY29Vu zHYqX6aVpoZ8ZDZyQsb>cBE%yHgt%+KZBjy-p0iuIe$`lzheE1Shn`3Xa+iVIbO;L8 z*{xi^YQ1cHRr@M6-Vh|e$ln=5kh>P#CM75=o5#Y+b*$FEh(qqsgCW(aLsujOxjSRG zNeP-PU45V8I#%OF^JqwQ>e6EeL^w2bn+_2n;-0Bo$7;Q347)=Qhg7EyU6Byv;LvSS zg2JK&`<$*^&uaaflzluN4g((IvHm3LLX^Wpw@HbbEL*US>sgJh$tAf$U44OjW_9tAc+S97Z2Q~Lr{oqu4}bkI=*&%ts3vk5i$N54}On} z25yrQ6PB*NdR*UXyl5U3iTf3LBO$~^0=G#CnJgMv$Mvnof_x+(?iWf;?nVNFTqJOt z4ne^>yOrx(t(T3j9nbwT#Qo|?JQ%om;5I2iVcEjf<2qOCU!?5g;{mb1P;3gn$i;&} zy-5k0EM09K*SQ)mnvVkmi32D#1raU|45r-|2kV@vT<2=NXzJSWk$~7=p*s@Rago4n zQokiETCmUQ%Jr_+ze(B0M+D-3q1Y6DlZyzobI))`d3WV{S0n3q{+A*CSC}%4%CL(D zZj<^&Vx2RU>s^f&&GWwu@xQ{9VTf>Xz->|@!lDKH>{hOOwf;41pT55{AO={NGYnBK z7Pw7H)a2KYb#9frWVViHfEi+dg*n3zIW7YE#?LqxF7nd-*yTU1i+Szv}(V4=W-=YlR0xJ^n> zShQfD)76uMC{@SsJTOB%uux-yC>IgjCM9aJY{5FmMGRZV^S})8z(R=$Vq7$En+`Fs z&Y9|lAVM#izb_yjSg0{Ugo^`ilM)dYE!bzbIu=B!%e3^d0I5!J1i3RPT`X{$l&Hy~ zk#(LVg81qweLNs0SnlHiev^v_Zqp%XVjY+Nf9Jy1w{7L%|KEM&ua5jTN1nOy^BZqJ z{FjHn5&8fBdgv|dUtE7={n)`DKlu8!KU#Ze?TM>Dg8ctquY6$Tam(Mg{DK4j>cHg# zhnD`q(sLJo6Z!w*!gqe9^8bZ}yQ_4gYVK>tc64}HZC^MlN>?MLQgNr{Sm$Mve#g2;Dr zUp>aF4n&Z@=ZB#BdYEiF1O;4ly{h%n(UqP1;;|hb9)6F%=ZBd3T9|B7V#3nZk=J#r z#)~$&4(#~wAjD_?5K>&`zp^2y#>L5|Lr}P~cPrPe zS}&Wj&IA|j`0yaeU)c~;lTC++V4X9S>sPH8jqWLPUqZG6#QkpxCFG)Xwd2Vq zB`7Rfu+Qnrb*$FEiS6U7$aaKy5G4`8m5<9=tH^g(u46T_jxQtI5#m9NL<3hpF5An< zVx2RU>sXB!ZGwxLRFUnj%U-Z(E)KL;k;9?|`|MV(XSMz{%03fZv_r(huaQ__Tr6;# zlzk?@hOFayR%7e?V?jO=ph+hD8i@ps?^U`@hoE4c z-O6>X*2|`@9Ul+SBohQlJg6g1=DOOF`Eppc5ZheeYW<7YKA!lMC1;A4j`20wbo`>) zI<9XuS~So6vV+9^ZwdV}=#OxB25!?KB3S24<@#3ZMdNG76Tj>r@gPVdLFVoY+$JR` zELyP7>B@Dk*1w7Ef2G@WEL(_euMA?io{@X{mmMY^tRvCD@xDs8 zNr?$dS6@A^2txFtO(Fqw4no4bA%_F@x77cogv7D!IxiA2$~wM&Ml7%}a|mKw{mgAT z#MH6vg+YW~G+#LzNgP1o$;APPsN=w7lM)dYE!fB9|KDMs|KCwR|Nray`Tx(?&;LJK zKmUJY{rvxU{rvxr>gWGIP(S~_uYUgjqWby&q5Ap%uhq~0zvC;D|DPF47b!17?yJZd zRgtmlgP|)Qm+e*Lun6KgW9cJh&&hoqIior{?;fYSNONCD&Zv%z-5|!H zVtXA~uIn>n=_1vO=24NV$k+`c+}&||6*(+g@SNST^pUCn-}k6Ub>whm7o{s8Pc|th zo%|ZIPP&UU?|W3l#q8lKT-OlfP_ey^JQ3tkaV%Y=3Ke}7Iio6axUM0_Wy@}p`aNOk zY}_47AE{n6e}7DMTCmUQN;*kZZ1Z*GjOxhP z4We8`n6-|4ca?4=%{@CSL&Hq4j@%!f>0Q6imi@zF37mdztQ1i5(N zHXVX|Y^$W1RK+&W%|a7f5EFW5iqf^VO*SbpVd?67O6ez6i{{x`8L_iM_Y8!%NH9A) zYqv_bljfeCm7!tAFIwoGfgqPJx3jaF*4eG3pH#&*&&|q+n}uEHad-sd?hD+ebJ=`s ztE8n=#kQW$mx-N)UGLh+nDJlfHYq`J_3T#Zj#7|_CZ2|dnXqUQ2ORIGbej$l;o3P< zT~6M-f}!9ANhAopGq8@!mfa>LDEJy(J$pfvifug~5qc64up5@mMTA+|S$9|O6ESqh zk9J%CizHtTy)*blE*iA6v&1@Qs!KtHifug~2Z);$dS@WQ#R0eJT(n@H-Rfcxr7ly? z&dP|L6}o4N($TUeo0O>L)pNSKAc81g&OJLT{f6$+(Xu9+4nZNdoeyGEZ1dc#3=K2h zu|w|+evgX=?cA*3Z*^>wDNuURd?lUOS)qFdLR=)6ot?E?y*KAfEnGXh)q8><7266Q4~Ux;dS@W0_O;0-B`7Rgh;3(sAQjsR9}kF~6}o32$i)M< zNePmxXSX^dBB+{?7d{RY5(lvB6< zZwZSQ>~p%>4Wd+RD||!{b`~m(@|#>ls249;NuT_tiFI85|M%_u{~n(I-*e=z@%;Y; z`v2dK`TyUH{QoBpy$#R*KeB!t&;Q?m{{M&8p0xUpk^leh$_MfM{}anE!t?*D=>Pu* zOV3~YE#&_P7XJQMCjWP5xqB>qrR+UL=IcjNNv1JkeOE!8Y*H&u-ZM^jl@`8|Y)6Mp z0tihJ>$?*2WYZx+yk{(3rR+V0uOr*pVL_0;N5(_>+L)6~N>IXk#?n{H-cxi>66?DX z@??_|rM>6wak{It@RejcJuLhpUrELZ>7n9fU;-_SqdvU#W_{zLJcgVqTQpbzuYcei5QceH}TYIx=>HAcu_Yb>y&Y9vR2d zSIV(1A9-Y?N-}nXpgJCuvs99I$LX%peB_alD#_RlBHSIaD=2p?TDW%3jHRoTFVoy} zwKC#rVK)eJ_sH#Bt*~gpKBvdhSIV&sO2y*eDovRg=EyT8?a{J0T^U&WZ6I%-0E1t10(vt&G@O z*bRbQJZNWYY3tlGeyl8=vW~wmAg&g6gBTYL+PPX`>FU__m>@(invVnni3HdULR=)6 zovpPy-f9u#BLT6sup0!qNYKvKnk*VN+ZQgMy?9}Fycq29e+B-FtibMAdP@0z9}mA=)y$6g@}+v2_T{?gvdO07rZwDA?v7uOymc)+ zQ!72}tM8avlTC*py5E0(5XATUh39Fdr+xJuL2I%}35xIcpBDu2{eIz@TIqpbeMixn zY*Kk}caIU*0-_sg?H~MQgI@5D~0%X8hGb5U1ru_p7=3t}%bI zNeK#z7VL9+{G1?)YZ*oN+qwFVqBYs1L`{}0WMk>6O@(J_rC-j~cNDG3rbA3!%aEx} zdeJ7ia{BFDeMiumY*He^q6Pcxj%99>x=hU^TTZ{AtM4dUlTAw0WYNev>8VY4R6U6Y z!qmdaC)qMQuFjSxn+`z}>)bPzsZCs`Ej&*v{erH(<7rJcDKTN`>Z?cQHmODPOs(`g zy84cyHQA(uOcsr-lb+gCc&1kR6J1O@Btj%8|-ifx{!m3~K8-#zf0Y*K>4 zvQ6x>0`SQN6*1z2P&8=VC`t;T>Z2k1sKiT?`tsmI> zo~{28EP-#``o^t~ZdF?kZe88FxOH~x)YiMU-n#XMtygc|x%Gmr(N?i_V(Yf8o41Z_ zUAMKhwZ3&=^FM9=)#jgXeqr+uH-Fa&O5odF-~^o_6)}&fZDu zY=KQ)^z5+{?$`@YUA=te;(7b;qIbv6{oCg+o!hzN)W!3AJMX)?bNR~7-j%(pJGydW7>ge@WyX#}^e9f@PMZ1Ev%UQea z)924#Na==^E1J3$tXsjl6{K5H-S7mf+woX8UurCj=$5l?M(K9x(%!}Ul7=46B#1dYz6pI+2^Eeunikte+wMOm*z>Esp2wn)yid z%SgX`#QL>6UN$_DXHD%g)-Gf1GSV)qjy^8!c>Z<|^D}~WX2jYJS-UpJ%LXURsG*w~ zk#1&0x|xyaW=7QwUzK(|e}{;H8=~8gbsH$%+8r3Dc^>Y%CLfb|=&ego2PP#t?r+VT8NDMGX8*Ju4o*000ydM8Z3 zsa>D7>$7%!(ym_}-D+_>|I{ZJyf7ter|_cH@ro0si4~?~-AdN2B;CsDhRqho^H`A! zRv5;LTw%p_hvVfZOcN{&gN0$RFa#E+R~h`asdm&V38}Z=yW`D!ZZQHFkl!4 z3`2lnit5-#i{p8~$OSMA14ga@quucUwWf&|hVjBMUKqj)ljA)^Esp2$A{V?cj2F4W zi#EqISh*%v7{&_2SYZe&jHzxo)Z%y^D~w=;VXQC;D>@txP-~iCVHhk7gM}fmFj;kc zJ?(ft8^CCl5K%G)3!}iI)A2?pvL;|;3>X;$Mn-^{TMKe0*rRY z1Jq_syvP_YGRBLH@FE*lN7vGh=d%yYa1reoFO0&AHpc_hW=*Wf7%MWyij1%#8&o%} zrX9~GESUcyx-nK5g%ur+2dK@OV39FcWDFJ=fkoD@j<2*ho(BsfV39Fc7zGxcj#r+@ znt+ipU}Ov!839ICR>ziG9M1!W5x~e8FpL6>4#(@A$eMVOF^ zJ^sGrV;{ob7k_LAe_!;m*Wm99KUU!H3qJNF{C)n%7V-CaA3cM=&;97@@%O7g+Q;AL zeDpZ}jy}4Kzr&Bb7k>vIc_aSzKQhAK@*_9lZ|@^(_*;DV0{-S7elz}>4?h=wvk%{b zzt8^gA^g4Lk@w;6iAUaszt4K)1^D~SM{dR6XFOt`#ovDI3jRL*+B@<0wrelO-&?QU zj=xX4_89zq>iGTmd&~IU`1_Rco%nn6_*wXS)A(`tdt>zh{C#q@jlajMSK#jr)wA*U zSoH+_ebPhtbM!R+Jn_}|bA66KPdI929$olb3tM+@{@mu%j(l|Ek2hX%_y-Sf9=fpp zYwN|qZ(IBKYj0WoiPalcKDhj^(d+-+2M#QqUi{_7XW;+x75;BO|KHF5ZMBDQ=L&T@ z*MSZD`M)oW@jcys{=c99@8|!v$h)8aQ+Z-P|3~t_pZ|k|M@6yy{2vn|_w)b#{2yqs zpa1`VKmSK2F<}E5sTF6OT$ovti!48pHH*a=7mG747H3o}&hT(?v8C|ji^WEY#Tgfi zjVcy*7M{$BtXVM5xL}-d!8oIWab{fp|Md%7+nD}$8}k3pZM^324+ib{EH9l|{1xQ?U;hcKvLtUEH4#W;RiMxTk8@x<1_A zpAbDJ?s4t9$F=Jo)vkM}UEh&*yfEg8io-osyH>^F4#%?(O`2u@9@nmWT)Xa3?Yf8B z^@$e83$<&h4fhmwsoHR-UBCE>J!b< ze~+=N$Jo^)?CPO0=IJet=K-T94?BAd7(Ml{v%~SwpU_199^*xi@uEj~(L-a*ZE42~ zbGCW{X9eR$Ps3T8r@*Y;@c^~aW8#AGqF}rz2rmjW#@y24cpfi$@~pF9yy$5-YjHgECq$2l z3&x6qv7#WXD9{-5lorSHSkV(KD;O(!3d=ei4^SIDCN3B(3I>aUz@k85?B*87^I*{v zC@UB&dJ4)q9S{8pO~5D^FbW2Yf&imHW6Vu0j^_cRCty}EVDuE2wL6~ePiW#r!FW+H zUKE5E1sY>+Y;in~7d^p?g7KoK@S^oGG5QmlSWz%m6pR%GVMT$)m?yV5p2vzpu%ckB zC=^z7IG*iKXo5w-U{Nqw6a*Fpl8fVM#|tkyD+DZZ28%+0MW^GTKcNX2IRi${fRPhm zk0Bu_7m|$dOz;DeZXSU2KJ5Mb21JD6HsmJmh7~#+aPJB4@D32`uuy z%m2S=Ve9nfFKyn6{QnCZuRZ)ps;^k^g_)^6xFb82SI&(%HpN zBme)VuXz5C6IMrGoi=2c0)JZ0K-2Tb^j5{(zxtz|b`y=o$dJ z?o2ygn1({o&{xoH*ql+EoiDs7AXXxrPboB|k`lyS&sKxPoouN@uJUo(I>p7RU2=L8Poc z<3*z>tIhGy)7iv|K4V3nv7%2{(MMhE`7Ms;v4R*`ea4DHVMRy8v(I#zV9{r==rdUK z2`u`ki#@N!@jO@%A*;_|p&PO~9S=!%6EOM=7<~qeJ^@A_b+PBRIGzU#qGa_MFmeSL z?T%;56HUD6GhXx=FZzTRebmLiI_-Gj%``;HDj6?wg%@o%>$W`6#EO!!qGYTn2`fs} z#h#OPyznv`Vq}$!6}iHS4#%_Qi6&T-3>GDWMM+>$qAoT{J6?DN4iU0S28&#QMW^GT zJkbP@tHbe7 zo@hqAlJTNsyeJ7TN+cIWi{p8`AW~Mzc+qIeYI8i4C!&bQSWz-ol!O)K;HV+ApifXwYRSRIP(7wE&tB)3y}Y>EbT1*67v6V_{!)1 z)v?n_D`Motqya#X0mrC$O(R366`Ls{P9&U8{6jdM_(y1(#C~=3RN9JRIy_;+Na2>o zhIT7PDdLE6Ys9!UBHS7Qx9+jJ#SHr}P@aHcq;N|CLs$4fcHO)HMvPk{#;po{%xokA;U=FmdYgB9naP# zny5cw+!`@%jR?0!z^#)lju&uC9!iW9ZmB$?mP#Z#9WTK*9Wic=7`H}* zTO;7s*S0vG$1OsJ5#yFdhIYrZ&vTk7;)ro;#JDvg+!_J5-r3@K0k@>^G*Y;w@`yIa zOYu!dj9Vkdtr6kY2t~1Xv^bu}3hKrhF;-}%ZinNg_@*NQ3ydfdu)se8SYW?8{`R!v zg_j3VGuDv7LQ{1+9WTK*9Wr1H88C(f7(*1r-j;T}@a6$(#~LzVXu58P<0bf}L&l3C zI zT`i910fX9ghYT3HUANuw0JTwyIApvSGF}V`FNP?Jy{X0VJYG<@?vU|9ck8w}9%>WO zgTx_Y#gMULNLVpMQS6N^j_0w0y0M0g6}lU%!|?#MQHnTZuoyB}3<)fTD2lzI#qm5? zP&3w$!9q7Lk5f?1ICa5V~C>I*R(jE2MlV*8Zuz$cC2>Cv$cul9rBRz zV#s(gB)k|Rxp;lr@xm)6s3B{>c+qIcYD+Fqn`owp1ICI0W5s~5Vu0l0b!o>7@1&q^ ztN~+%?#AkHJX@P+-X0GaECvh~0|JWyl8e`-9WT7gf|{`g3>LZ>tJCpNn`lP70RzT> z0b@XbF+g(hnzZ8$LbI5(V+|NEbURjub?k#lJDNNoSI8xBjP+BwM5i5_Ng__f z+7WW$6VeXI^`O-*>aRDsM=n7J93$--=+G8G;%-x%hIJ$4!Y8B~kn00hx2QT{a*tdB z4LC-+X=vyQAaS=T&cx~wa^VwF56JaEi{l043TckX6>_OGr_=Fp%hY&|h*PnCgk1Q9 z^aFC;-{N=yxkB$1CeHHmsRt(Qj%Od|G?GM|h_xf+!Y8C1kn8nYEj# zESBgt#c4=4p8tnWL^nJC@4mF-h53I14Hy>9y3JY^vyXEc&k=DZR*#SipOAV$uB&Os z3-kY^Q5PeoS-siIVsW=APR05Wa^Vxw56E>T?Ra7SpLAnkJ~it%V_D2rCmOd*I1y_{ z$c0ZxJCwyPw>X~9bfh24DE`0h)or_1x7CTpEfY?|x-nK5!U}`3*j|g{d908=U8Cmz z=|0`Ah!=O8;!La_g9Xk1!?&vA?`v^94;Ip>i|N^HSOsOB5ijmG#i>|71`L}2hi_HK zF10wG2Mp=P!su<%Pj_RrN4zZVHpPipJH`u||A%i?M=!QGp2rL6)y1@K)=uF?tK-Gp zrZ^4j##lk~|M0Eqh6^o@=dnVXu`tz}byHZ;<#=(oDbB>|F<8+2KYXh?em?DZVg8@= zVqq{itEa%C)A8aN&o~w7$MgU2iNqAk|38;@yfFVyy0I`*ob^+H(e8M*I?;@HJpT`8 zm7*g4LE51#_TIGPh53KdkA*?xtewJ(HpjEoiDty(`F}Vk>qb~%=l{JY?Ra7SpEP4( zb~)>&u%g59Y;~fMB;rhLSOyE4|A%i?$IrGno(Bu*#lmaESv~a(rPJ}^8P7Ns>&JjW z^Z)Rz>e!hU$Mb+8-B?D=|I^)A?T%-w6OAMhCt~dwFNpt-Z&gR{ZE-x07t)V~cbN0) zDZFTNJlmw$ygTOke>f*Q9wirc{@-qk<9V!*W-O!l|GF8g!|_m^XtrhJOuTdq7R3L@ zx2oeiEsp2GLVB_AvUFNH-HX-fc=3!UoQm~hz##rVzUBJ=uQ>kytw%n*@rRiI_x*`jmi~O{uEif)eEfnPQ}`(Vt8S>$1L5(K zk;#L8f&s9jnqK;Z5NB6A~z{zkXLjN4SVvOL1~qKj!-56InmY^{>)1;Y}d>6BsD6zwXR! zjTdomDNfDGF;EfNAK!xWrH8_q!1gC7P;7tQmfICD;^tDEo%Lg+BDO!i1?Njog*Uwb zRpF63j#0$WP|)doad#eDk+31zQ69yZFfFfk7(W`G2b7O z_}ZXCnC1Id>ACQx=fR2qK@t8NfY4G2OZ1oG)Vy{KRzx8Qu~!SJRR&`N4eit(@E zpu_oWMWT6|#EgH4;>DvB;b>7vM^2O+5uyDu3v7zPQ5_r{@J^xFX6wz6Iw? zkA^or4;|8lg=2Je=nma>=d(9T&0-jH{&7}bKSB=6`LEKm;Y}d@OCuJJv3e?jXpMYv zk10+=YsaL2d?I1llK!joaCj3~|I&tqW2~IcA39;T`2-PXV*NP1A=W>>1?Njohc|)t zFKt*j#`>uoqSN`}E>oO}6=dij+CRPp=Sz==H-YyrUAj2N3aTuk!};PiQ=Ev^W8@&- zKfVR$OV5Wlg+~tQ(#0`WPd8(=L_V-5qx_JG|2QXI8ASZY2XMahfOu1Q)Q}ck9Ao8l z4$$zSvbb(DdcF2 ze1P33M`ZFp&dJKr6BJASuhK)}@eV*gS}YL6ag3Eyb*FCFjgmxW|Kqf*AHjxY|5xcL zaVF6Jr5y{$SU&|Doz54Jc*3d3`I!EXPXszF{l8jB*Czu1Uplgk;{WT8taj%E>_)BG z%>Tz(Y5j=*j}NM&52x!Bf&VW(Sw`{y8$DTVfiIr%gwycSF=!C~AK$8O_)xk&5%~Yo zkYyDAUpHiRxSp*~G;f!g|Buu1($P~C&;P%=uytzlQ|SM{w(&4}9~|Uo5?8@kbZGYT@dmO#bcv4Z$Kk4bB9fKY@Yb z`D?yLTd+uUmg3Z8Fy{H=6Y+J+^RLq5;7lO;6A~z*zea*?i$zZjndlEuWHBQ8;{ybX z^gK8dnEnI=is?VcOLXGiQkZA0!8)Ly}2F1B5p3l$yq%{ zDx&)1TX4SgL^v}DNF`vPxc<5~x7GRL?oynZm1CqLu0Os7=Sz=-!@Gn%QiX9LIM#{; zUCtM`m*PyUA0ri!{qZe0UwS5-32c7?0>$>%K+uU)aepaJPYN>IAD_t8YuWx)dMKO; zbbrDEMfcZO(C&QpHmR8)GTk4N=;9^1KR$r-rKiG~!1pIOP<;Oe9JD}{eTLJZUE|ce zb_`X-_s6&3eCe@pCJ_FOl%5pfU$G(sSWV-zPXmDo={> zuiJAwoiFY&#pzi=PI8Fxk8i>G(u3hl-zPcLg{3I}x(ln_`D}HfiS9~ z@&54voG(2d&IIB=wPGpazi!g)az3;uHerK_{}9Fc(GwL*{KqRhQw&NInE%v@rI`PE zZepkN#nYW|da^Dv|M7{yu4VpL=>c&jQ2(hLOHu!IH&(mz0d}JVk*WWX#Op^{j-~!r z=?QVB^vFRCS&IALXvk`be6~8#U?$+yympKl#Qn#&;C$&3ai;XBLG4(I{NI?H*j^d~ z>_#aflm8)#7mr}WlK-ppo7qk2VT0PS6#HMdV|6-TJmCqa=LKZwAof4L1?Nk@n%$Hh zI;bN{(f{@I#CGSi)rn?lk?H@C#Op_;F-!li((h(BrAH2G%2ND)Jv^~B^2IZraB5yV zMh)Ws<6G|e|7H99|DSH&g#7;xHePc0yOICjyZ%e-w<7=l{Msv5zaRPk`Q`s@`B}*S z|7_{?i$9F~|MH{G|6#85D7ZZE`~?YcjFD7TyE@I)e1M1($%^0~VlMn6gfi^IT#(~aQk!USts=r~XKmI`rXn`s{4$cIwKOuqQ`fDWUaK1!u zDRccHiPw*S%5wdy^gK8-3P2?(jw1Uvu%ONP63wMd_J|dn^!kLi=Dgpw< z_SZnr<$Or4oBj06_J=6ekATXu{j2muI5P@BB~RBC-CyHCr}HJ+OPTHuS*#%CIF{~T zy)b=?J_%U4*hBHGC8`PnzX#ct=tJC=sZKh27hb&f*pu^JstMq6%GxX3w zU0I6vue-83oDc8nXd(ym{vnCgBjm8W|0+Ej&I~qoF*ng3OKI-D7L*r1*)Mg7-3 zS)I-ok9Wf9<&GZzUnw`EJgm;V-wr*LtC6^ww*BfA96@JBLCw9IA8j$Y$mY(sVPgb{~J>iyNeS| z*kJZQM6rGZ80)S9Ao`d>8aEC;uce!iWMZN!Y8Dlg{t&KxI7O~6{dyY7%QlH zbUUI&++vCov3dkm_=ME6P?a7Dmj|}LpaG7tda6aY)%oHUQ=EpBW41p&5#=n~ze>-9 z%LCnCpa92MIn|!qeV^29F~yl!KY}WJLi$;#N)Lt01K(ee0LNHA)t}qxd~u5@PQ?mR zmV-}7K?_yssc?B9{O6(ggk!9r>d|d?K3kq>Ks`>x>Je1o6H?FC#?oWqa^sU6Y0Sbg zQm@gN)pC<=?~@udM4X0|W7HtVKfVR$OV5SNjZbi-D+|Y1IfWV>&SxLuG+~1&|2Qq7 z8d3i70h}*A7%n$H!;z*e9Ao_yY;-zb++vDT@d7e*5a%D?g7c*(!{x?jIVKdJaEujH z(9!OE_90FaIhgd1v-0{8a#+%Tl^zY38;=~)qKjj!oDh3(@u(qPSvbbZDb(n2zC@EL&cymLY!K}q--7d{hr{K8_b*LZIL7)Z*ywb=_)VfX z6)DKPe|#dL#q$2E^mMp95dWnu3&&VN1sxsE7r#mrCt~#&If(d=Z^8M}MJlzSWVdWS#GC~c@{IAmU;qpNJm#!=vW91ZTbU7c=>n3b4^&h9DE0?JM z_yEqA9uSvj9yX*Y3&%)5-IUeoeDQoIoQfBap@X>p_!gWmJs~d7JakA~7LJjEx-F~S z`2f4k3=t<{^%yyb{Eu(J`O+_C%QKH0(wK!~te!@;cGzvcDF&xuNA zEzdk^NLLn)v2qI5I-C!%+icdxnOHxD4Wj?!TX4SgYuWP5!-h0v;TY?uP_5JX;u%jk z6)VWlLHvJw%RT?!TiCjL^A|Rsiv0igH(qx5e?tDhyZ-6*XCVLoleO2ZeiHfrx#eG5 z&XE6qap{eV{}}oI-lNR_7htmVIJi8p{|O8f`(JnHcAG595t;oDQ4}wT{f`e|vh+N- zJh1->3l#fbx9CnWS#y#pPERId_CG!mu&UYr>49*0VE+>uDE7Z@(`}Czwm{J+2jk?d z9)lIJ|M4xvi}XaeJh1->4;1@fcj>l*RorEYQ?qgmR>c0tx8Qu~k#Kon{}UJ}_P+*$ z4(CgBnd0oMAA=RK|M4w2UwS579@zhc1&aNzv7poW;x1F1o)u)UBKAMN1+YpFh06o` zpU^R8SGPfvx*1N)!wK(YU49Hbk+K@_KEQw7a2i&QQKKN#sM-JN5pj87|4U~U zj-GO4kOl|M3ALU;52#d7%GGa~6)VdI~u@ zV7EC57^mU2W7Ht}KfZ;?mwq){9{B&#nT2DloI;Hb=d%ThCTuYOAE)KTqlYU0)%gG8 z^nkcL@c#)86#rj0>ULNxZ#0_X)MPQ{|Kk(!cFX@Crw7F4f&Wifp!ol~Nw?c#dDCLd z|A#2D81et{L3Mnb9uSuY{y#y1;{WR|-Og|kH=5$~ynu{V#Q(>)s$=8yfVe#H{|O8f z|6h0NwucK_pJ>*Dng0(-yncjLmj6Fa4~WYH|DWJM@&9$BZkzKZ8clI(UOUDr;{W4Y zaK7|_xIFOx2@4eeUpMJ?IG?>yYNm+H|A#1EJi;o={~xCZ#N~niPf(!v|GG=J)A`~? zQ=Fa_koo`kM6O=T{~xCZ#N~niPhg<<|GHDR!};PyQ=FXDW2_?nKfVR$OAm<41OK1k zK=J=|qi(D7#f_#oH7mzhMf`t!3(l7w5SIu3Kec8l{=aU`>T*8h*G-^j{y#*K^AZ0a zAHez21LE?)|EJC@#sAk`x}DA!H=5$~ynvkM5dRHr{%>X*s%Qn@k;tGJ@EgfOE*{if8C|q>3ngcDNe-; z$k0Lje|!thmmUz82mXIvT)n)rcOuI&9ODI4X+*p80d}Luh|K@TS$X{kIV}HwoE{LD z2mXH^USo@6te#3D+MExt8$Cv3{y)x1$`St`AHez21LE?)|IfqZ6C7jZbPCZ~pNJkK zGXEc^W&Nmi%<})o=>c(h;Qv!+mg4{Gfmxl-7tegc>B;$+|Bp`ub}j#ZypV314g7!V z%~Je--J8|neDTbuocaIw2d^JxIW_-3Js~a+{D11sQv82CGpp74;+ao4HLo3`2J!## zEu@I)5pj9o|5Iz0;{WS;Ssjtj)+d^9D_sQLftH?!q||4*G+ivO<%W_3DW zJo5>s=LKZwApSqTh01LD)ogj-|5I<4;{VV3He0}M)UnO{e@No>Bjl+0|LJ$L<$?cC z-C2tNuV-epIUis*>ey!fKjg4-R2=jC|4ZHc|LnA-z%h!ndNC$}LW;okUR?>c#%jPk&LqBs zf5>{^A0d!oA25}k4wnb^KXoT4_J5;0p{0zTYBFW^KjesV_(xC<`&hj6bhtdQ|EV=W zvHx{zLPw;q_ejm`klFtb#rhFUS@!=pJsmC&?0@P^Q0#x*na~+25>2Me{)a49kYH+L zVJbZxE)VQ~>P=AWf8CqV0aJ-4Q)d4|601ir1@`}w#%?-X^Ww+@`#&3nTR6r3&qjCb z+`oPP(z%^GPF*}-M~c*?VQUi&9wN>ymj?d9+EHQ5(*MWl@o;&d|7U?-k7IP{X7fD6 zX{Luv|Ho-rKf)r98otFj6Jm^_|7XdEILWI6$#nxba_0ZzAFQAN2R?v;>G^PZ;QwcmekuNc zrqZv@*q3NCW&S^8k%Gkk#|QAf^nAEH@c%POz!d*KQwdmm?6XgCn%Keoe@J5W2sm{~xF4!{vehPhDAx|E~vTbvj=> z^9iRX+-3ehK9SpI%l{vz=fmZJ|4)5civO>NW_37UJo5=BXZ094hJ+lJ|36O8hsy*1 zpE|P?|6k9{YReDpdpnvfcg+8X95OEP|M3BwFa55zJn;XiElctL^}MVu=R>~M?73t9 zKSZ&91RIwBKTf}^Ef4&E>dI35e?2g()A{0=PdGhUm-+wrM4Zp^|HtWfwdH~TPkmX6 z|F8S9+MUnlyUobQ{C`N|^&{l){Qq;p|Nrd9iw^(8!z+hQum1w(|9y1rkJetc`hBbG zD`%E}8TtR$FMWRLRmlH07tYO}|5r~QyWw!nd$=M{|79s~j1maF6v0->o4B~a6z39S z;UBUZ_(w1n_F=5_bhsjL|D`bj$5=y^;&d9T*7I?0{36~5^#)_Q`t>Nq)0TF;!La`!4y6r{VYtS zr^6M2`!8(?IL7*^^d@*;stGY}FvY1@K}vD(2`Olw{k!RK@#W!D9BE7_6!%{@CQO_! zd2z%YrZ^MpM>vH~NI#2H>G5zyApc8m0*>(lstl*i`)qNd(P4_ykaA4^$0wqkCI64p zI((~bp!2Xxk1RP@p z6?n9JpDj)_?vijKR*$j65O!Gh|2RD#t_bXZX->c~R!?C^Tj&GuHuFQAhLvO7Aof4L z1>?H;aPg(#aYH&23dR1{oeAFgQddW!#}ubx1sOPq{*P}#!SsN*BGCV(GXckV^%QV) zdLOD2P4Hm)Kh8_HeMJAq2k^f1fVd*i|D`tp$5=rH9_`*|I~E(IF`S6jFJtT=`aiw} z?@JGeD+2vrx)X4W)l=Bf=6wL(W`c;*uyTwWME}RP5c<*s;)+23m(~OvW91ZXc<)Qy zRmCHpa3nz7AKyagOTVnG2>gF( zO~5hMPXR~pzSQLr&wRqESV0C4;{W4Yj{kpN`2WARaVOsYw}Sb9{|xj0KDzeDYp+`U z1oHp0nE!tQ`TrL%|L=#9|DT^f|6jmrxbbk&wHT=X{qPW{P}KkaTn}-YIM3XFoR@%r zxc~S73a0176@mNT4-av0jLg-a9jW3DQ=E#-#oT{)n+d6Gfuf1>%>BnX$+*P*#|MZN z>G^O);Qsd|!%^J-{;Wuq=rG0EdGQ#ji2ILk0jb=4xajH#-2XB>{Vx>vzf@2EL##+5 zRor5VbFqeuR7C#Ax8Qy00dYkj|CgZy0LRF@<&5RAM2jg-#Og6p5&0kAg7>8d#1(=3 zUxo$%9AounL#nvN6sKY37^#T-k8i>I(gWg(K>jbo!+#uOci!zC3)IQwn$}@_(to zV{)66yg1?>(}LOm_y;c_mB%dmf1I8WR|NKdDcGUd|E0o?j{Br2M`ZRtKMc zy!3>)BC!8U!41X!FBNWd-zPPnA>!<0+=79F*#Gzzye~Z=t_bY^Qh-CT|4RiNA@-&3 zlcE%n+5eEm3rOWL%l;pyC&U$j{V#)KaEvbx1s?4;>Gm;BGeu|929JWCtyud1I6We+ z2=ssHPrxx&P=QCg_t^qP^R}7k|2Qk#m$1Xq|HtVOaYdm2ONRoEv3d$ST0>tv@(HKm zwPV~M`aiw}0pKhDdmN5EnE|Ks$GxFYcX zr9A=1c=Z%;bb4Pr^9iS-6=eQDK9Quu^8d%_7q%6F|1S*+IK~Pp@aXWqc;*vM#Og72 z5dR9g*j5DozqBae7^|nSqt*N3nNK(kE62D&{C|84p)dWywj%KVr9T13SUH6o z-uqIxhK#KV9L)d6Y03F=0uImrNB{q+%};IKg86@ci1~ltgZ%%@`Y*5FhW!8YYp=%p z|JKp}|EtT-LjM2n@czFaLjM0~`~RObcGKaat1(djhmzzd>i8P1*-IXxFV4Mhmzqa^8avlpo+Upae7)oCja9TQLrZer{}{J zf&4#|EJu<5hqD4z++~WBvw93vME=LO095Jua77^h4<*S_H)DFx#;Q=FbJ4+aln|KnS@eBFS!`10`J zF%a-j?EirRkBRf8E)H9uXr_ov|1X&SkAF~Tw$-upgt#Km{|AB{ivB-P*wN;Fwm#A5 zu*a!s@tFRPPei$z{-2%@R|NY1KyX9R{|5>;I=s);Cz=@|)BhogysxC@v6}wBlVr9P zf&M=b;867cfdYr^Icqsb+K!Ha`=!-`_;pD6y zV@F9z4(R_+8oLQ`(Zvzy|9!y@MgQ+B>}XFq(7M=|?N~7XAOB$MQgzJo|HtVOaYf+& z_XRf;|G%$rqr>|WGoNsFUOfhml7PeV|HtV!wiSW@-xuIe{QtfJj!y53XFlQdtRRC2 z@&EBHc;DU0;S)vR|Mvwv6#u`kz@x+a;+ao4IjhImQ4)4o{{J}r#FzmNC-eHZfo-StncKNb1^=hj}n`n|~i-?RMb z<=c_}|LM}J7k>cx|Aj~K{2#Obues51%?q*T+5bk^|2QVq5FBTMa|cQ#x=e8QkHt&RhATY#-w693 z$5^>pQ7SRV6lapf!#`L*LMhAszm}d2S9tcn5%xchv3|3oRHDh0+5b2%D@f09Ec^dj zdNy3)+5bk^|2W19&WKXBIMKXCV)j4I%IXnHS@!=mHyf^baTK2YZ-o7iW2D}U?0;LG zXx<|+`yVG|?FgkT`~O;cI9%b`|3=vVIL6w|ic+>X(Y!}u_CHR``VmT5_W!l?aJa&= z|BbN!ag6nw9iOKy;1D{kxFlZ^Cd5jM3*Vk{|l!7;~(UGD2-kFf7p8ySZ%iKEbrdicj()< z`#%K7G{z$~b`#7{^__2I2w=c8HrRk|1~YUw?QU$-!R@v&PMC82pV7vcIf)b{QUnPZ zKp9XHp(r4kAP0yNp$MS}MS>(2aU>^-P!bU-%Dc|4Rr}d{t*Wk{NY_NVmUa5ns`}Si zch}nW*50+BwO`8L&}aPrQinJV{(ou2V>$b5a-#pNLj3=l_`ePio-G{9%fnsI`2VGj za2ovo(rCw0_DOid@R0bw{toXhbmIyC&&$JI&-nkPj&2(K|I+BjO7=;3!xu#2|N2vm z55n<;|L5i5u4nxJQinGU{(otNW6r*;d&Htoa?eK}#RK@iuG09rl!uGm!>7hBb!x1^ z|1XVrtPMJ1)u(In|2kj;LOY)L|GYlj_00cY>eN`n|6dyISjxVn=#yNd@yP${Ozj#^ z{C~b%+x5)%;Ax{~s&=FB>D=s8cpQ z(IvYaF+7ZMbOzx`r=FMkaG%>1Pux5IKUV%W3l zQ#ykHrBlyKdAQg;e2`Q5|2_HtaWnsKZ%N$;M0sL-&?%ikd^S0j*N59X|36m#Up6-2 zCY`cRINjn2xj}H`|8=GYXZ-*3QXejK4d?&I%Kz`l|I5Dd9o;_RbZaN%iSa?FbO!O+ z5l-aQ=U+{QsW(zwFKZU0UF55N!YKBmXa_&<*haI-%^#>%;Ax{~s&= zFB=+fJO3Y-n937dkHP`|Usoyn^7?Rl=l{pb|I5bs8sYdXZ%MoU*+>3ge%XK!599xz z=k?+C&i{{<|CfycHsZ1Rmedar<%sP^?EwF;tMus0>%;Ax{~s&=FB`*av}5rVJuWeo z8wN++0ROM6baP96xY#vZH!Aovl$ntieClUyM{^8Y$hA1=oKKhF!q?VbN0EB`MW18l@&Ir}8Mec2~DVtCXJ@c+6> z*_RiH+dKb1R{mc$hSzAvQuf*OL{}A9ZWtVO1N^_P(xWdg5Vv>!f2{n!Yz(f^jg`Tf%$qj>} zZh-&SRm#46v$nnS|6}Fyk3c-Vsf*9ko<@)B`-=l{pb|I5bq+oV)UDO0&&+fk{&|LZC}EAkR? zd*}bh%Kyv8;I=B2UX#KQk^H~hvhgs)G5-H~DG?WXhx7kq<^T8O|7GXBou3tH_lS#3 z<%=yyr2_x2tCW3tjkvw@|6}Fx6!|9M^_Ztwj6Sowe17~giKifc^eiUCro!2jziHDDknWuLt#b!$xJ2Emd4*O{st#{WMrHR3|o zaQ=U+{QsW(zwAxdSl!?(3=zry%Po2b_GC)j^8UO!0FA=wQ{(r3ezibS!5sy{j4MRlo|MJT4pdH5lKQATXLg#S)f2{of zp8UV;&HY`R9*c`i<%iiv-MGeQ732S(=QZN?&i{{<|CbGox0(O935s4g$p6bNTMxov z{QvV(BQExi>yLf>>U)2YY2z&|IhOpaeL?g$IAc9#@1_u zV>SC?*(bR|faL#mrizF0|If>wZJ~QO|36m#e^35jc6N{D>`Tf%$q~b&c7XraRm#46 z%eKAq|6}F<65f8%w%jl{>IV3KU8P4~zGd6q`Tuc0fBu(^!8N+!L*7i+ zV%aBoVtf=1@c+81jQ`(B{{MNY5Epu*^Z#Sz|M%qoWoIru-K=zg6W5u_4_br#zs}T5 zH~#;5ULtPq{Qp?_f7x)iw)6jSnW;Q6J_;51e_f?VMJW*%dZP3HW99$%#aU2 zVl7j-Vt^DX@c+6>kBYoP+}`>BvGV`2u>m&xBO-;@6zxAXrtLD9=R`G5IkfDo#a z2~}PqZtwj6Sowe1*m~QADlRjXD`X$}f1Rn_*ZBYEr9@ol9?t)dmH*$9|CgQJV_B$d zexet8^8fOR_5=T~6Ux54Lfqc@|FQD_va$U(36;Gj^&>>`|MJP;Fu*bX|9M^^Ztwj6 zSowe17~EE&vbUsu^$GcZxn+D9;TZq_yi|w_y~FwcvGV_W^8d2)-p-G{R6OE3Q~6@{ zag+o8Usoyn@)B`-=l{pb|I3CJ+|2*SWu|h(@TeW&|8e|5*8d+0b~K`F|;IU-n6!*m@KW@c+6>+4n2*g|p88 zkCp%5lmC~Ut+$$evFwvvF+hq3_Sh`Ec+xkY&+@(_xQHLUyLobf?l+xVXwxo@#68px!DS%tD~wb*u9FaC_(f2dq>h|1Ue^Te($Xd`SLZ zf2RhggE=_;uIEIl4;NI*`Tqf{)X4wK-UPS$4JQl_$^Y-k|LXwpnM&p5;r7n|4_Kx~ z{$F;+xB8rjbxh^H-d!EA0ijY){C{2^Ztwj6fTe2W|7B-@E6<5o$5hT49#!gs5zZ6; zU&_Pvon!C(|A3WhcE-1weHvi*Ul7Uv@5%q`0Nuk>DlZSW zcm99CQZ@4bvNOQt?8_FJlKL%Z-SeWFB1-%o#-{5{J;D%z$zR% zp=V!SA8zmb|4<9Z$Od5>8{t^ZzGRUp`G5IEfZ+diLfKd9!-ekQ{Qppk$Lz`f%gz>D z&OW^(g)fNY|K*k8K|7xK|GYfh-ueHb){c>l?bm3>QubwwOv(StCxe4-Jn{c|dAPmv z|3j@CBO8NjbYpFJCUQoF|wfrhekYBvM+8fDo0RW^8Y$hAI)c*8Y}hTV&`z} zsI_DE4gWth+Oe2@vFwxFFgT8G&d?3x|DWgO;r28CU+c!m#oHc z57e$<{QvW^Wn1VPGyh-f#_SvZe`s`LCHri0B7BqL$A9ww`WuMP1|9j1ZTp%3uZ3e| zW8*c#v6_9c?2}xf`<^Hs;Qw`%9)0cju;-sI68yyq*FJgLR{Y&56=G&b)A}Q3~!rG#U-Y4!{DG(I)mV( zQ#W~mxP$ZmLtUdL8-v@dQ+iA428i;+_@GldgZQLVH+g}$gY*AGU8E)(LO{owUx`y-rLtUeGApb8rpDimol`JupC&q^X zj?N%H>C{bLAnxG&|44|QHC{K)!!U6tYS1J2Sg}B%|e1KC6#~jH257&)w%*va+ zqsJwta>W2C9wWrV`2U-{K-|Ik|DhI-k&O-5h{wtRQJ-*n?I8azuXry2|F08zMb8Vw z9i0CkYV8==7+#|tORwn35>vTh+fg^b|LZEf0+#}Dv1_<))VeVT^8Z6yH^!4MeXELV zOy!CJQaHf>>na79SBN_}|3B2iF|x7s8sS*YzF782t`H#kf1RlhmnZ(eREP`R!}Y;QaqkYsbjO_G`3bG5ccKC%Iv8)D7_e zx=I7Yyg=N+`TwETjggJPHM-&1mpxfDy6y&u^2GQk9N_2V`O7|jd0A_mvxU=_DQZ7AjJdx zzpi?R`Tv_zA};hr=l_SgNbNxWUv~CIzgt;XwD+Vw*UA6OFLE9HzfR~qDX$TCaQ=U& ztJGw}?P_@GF7yuP|A)Ft?LhutcHY}L`Lgbz&p2U>Nd8}rd3!NE zX8iw6UL)?{{Qpo_s>y~HY*(tSWM8ahDo1QUD%Bp-W5)mAlp1ldbGTB~g=z=#|3kY_ zZAqzYf}&S>^8fP5w!;X=`2U-{MBKso|Di5YlZ|b+RjF)(q8}oX|Cd|Fhv_ln|8MdV zaR=xBhq_8lHpaJIsq~uEeM6Kh-d!pc_yj`yt~j1Sg3&$MD{|{~9nAPlyWuN4Vx0m8^4e>Dk|0XXH zcX0lHsKsMsV*@tgv66kU?2{ZZJZi@^w8QxSo4iEa!TJB8){c>l;WgT^^yo{E^e>>92cwQkIT{QuC_jq&8mgu}4P@a*IC*foU%{J;L70?cc~9i0Ck zYT+2!*m{j{tY%*<`y^LrLGu4PQ^mvh|C_RFTj(Cn{|~iz%z^yB?0mQ^XP-?_bZhVA zh~ZH?!2jziWnaE&+rjz&q1KL(jqTTH$5Qs$1V#VoBmXa-c(j23*9krP@=eeBJSY)|4{d-kqvjN-KS>#ZuQGd)QxHm#aU2;xbdYVt^DX@c+6>kBYoP+`;+(p>9?q8yj$|P{n1Ya>Vc` zRN()0l^zv&g}8(B|3lrYMmC1GO{g?K(T@mMd z_o?yZ%X&vr%v7$}dK9V?MmWa*-{d9Y4$l7%b)y>D*m~QADlRjXD+EaXUuUXN8UKG% zO2mck;r#zlH>)|2|CgQJV_B%QfFg_#$^XkMZZPn}kZQ zNj-%qH*7l!)e!?6z4@gV;% zzYGxKVf_D1sSp>thl@uo9&;f7KeWYTrsPXIhn6n(vt#7{<(Bb5JB zX8iw6ULx+`{Qpqv#>j@oYwO0WWuL~^{ScA-zuYqWARNa3-;{0JV()O_sD)z=@CuJ4FP=l{pb|I1dJLUwk< z6_wJh>M=xls+pvN+8R2TpJV!6w<@m>cX0k6JJra>_%^AO-Kt*V$^Xk|4NeDhaQa=( ziBcaf^h4+Wu}jT?{J-pMyp`vKEuQG}oczDsGCru(#Z)RU4|j0>AKTQ(#>U&OQpqAy z^8fP70HIRG|KH^0;SSFKW2+k37~ocwN*0-t|Cd*WhY^nP|2L&PT;Dkk&i`Yl8rcxu zR{lR0GL@g&HFUt}pi;*F-{ke-4$l8$mm1j^-PR?}_6?_>MJN9+w~P-eW&Hn5sSg)> z$D~r&rshEYUv~D6Rh3E>nUepPUj~RF&c#$JFAsNc{vTV_$i@cTs8TjL(GL&F|H~`G zgGw3yf0LJoJ2?N3ooZxbc$-wpCMWt?bn^f5$>5+;#{b`x@^Gf2{n!Y;3(oI99VSS!7E7Uw#oF_lBLw&40ZQa?N-|1Yl$587e;|4m*V?%@3YSowe1*nW+6th^(I;UW2d z`DAd=4defB^73#8=l{pb|I5bU8r|^hOUC~84X1zfk^h%l#s}dr{{NrA^5D(-3Z}R$Z2j~CC%KyuT7TnDLCuN_=|H~`GgLWAI ze^cti#m?c{QThJ^`Tubv{~z}jl^Y1}nYsb~Usoyn^73#8=l{pb|I5aU-gv0p%o4h>S!TJBO^8d0izD79a>`S_bRxS2>i^>%nkm7NMco_eGQ_91| z?&0E5`Tqm?|8X<_Z?hBqdo%fexn%=FJBE zeo`U-uRl-+9mfCPlr7sr*Kq!Sto;9h{J-pcwyb4eEc+x+xVhy2bq4X-pd;V0?cn_X zSowe1*m#X_tY)7^ZC%+Xxnh765AgrGN{_yL$F{@F{||KmK*RqJjd-jIZ&Yk!OGT*_VFc`TPIg`+w%-uEqU{_vY$@!tD4e)Ns6e*MqA z{^o1n`s$Co`jzkbfmeR)m9KgE&%gZarBA)|!v80oDh1;Dj(BwbADh(3hI{oQ{C`|y zDqr7cfT4(4pq@48ueg}9^h|JbNTHb&SCa#n9v{{c~s7#?&=XAqw0RH+cxcgCah z|JbKSHip-Xah9GGnx5!CAj%DcgHGuTf-{}U3&b6r|HmFRvN5==I+ZLjl_$mrozfY^ zXF8P^h&wv}k4iaP zF+S*&&LBS1sk}nm(fNPuQzIK2aGOrWC8lzN;K={$Obzad|1TBdLf3HqAA8gs$^UnI z)U4@LTw*Fuj1L1Gok4u2Q+a{7qx1jRq((M2-gcdeOHAd80YayA1_7E*VeUQ#yn2Y;vp=hzp&=`Tw!<|3~uwvN!J`YoBmBk(VDvN8JGb zudCE`^9pfC=l{pb|I3EP+sywbOHAd7@liOy|LZDcU#SomdxsBjD*t~Z|3AI}{~wo_ z$`u2oc!2-cRm#4+K-|&!|FQD_vatah@mLk!eu=3ZF+6Gq_R^KW^v$lQpJt#Q-TB;Qw`%0?aGK z9i9IlEB`MWTdxt0)pzt*_DQY~Ao+itsSg+9|8Gi#xX?YE{~s&=exbpC&={J(5$zeYQjvd@ZRb^}DYVQ|z9@c+6>kG{M>+|l{} zvGV`2F}OxI)`ho6dgY1nQ8>W=>naU8N`bi0JDmR?EB}8a|1Uf5?VNn+ z;sO3&S1J4Q3UNp0|HsPz%Z3)*%KuAw`w^lXF+6Gq_?=FA#m?c{QThKP`Tubn z|8LV1U2R9XVQ|z9@c+6>*_UtFc69!Kto*-hY`aD`*0N926Wv!8d18DN4)FiFN{_yL z!?vUI|6}Fr52_Z)|FQD_vatc1QO;ua#WkjKgT^EOuQOFQ zp7{S#BQA6e=l{p!%>N(B|I3~!Z}u7z*OI z#D&h`{Qp?_|0DT-*_->jG(DCqGL;`T9(Cgyy79#S=QZMv&i{{<|CbGox0(N!@`f=Y z`G2`(>*2e~6aQao#Kqp>!cqDEBl-XF1^EBCwWwUN0Vy8f|8yhhy7`Tw!<|FW_58sS)d_QkSKa)kiN z|LaT@k0<`W?AjK(hx7kq<^PZ5|7GXHWhMJ!*(W(-c+?K?|GG-qmv7p3bpC&={J(5$ zzeYP2voDr?k{bp`-2nfut29B8Z`yWr{(r3ezibSy(G4H+X1W&3KFJf~qj2o-UF9c& z|GzC2;zDnn`Tx3B?bz`D?OL^`o0Sf5;yP3LL2Hoz*O{8>#{b{uCE|`V|6kXs$yV>y z(5_QkzguC5Nd8}LAynZ1bwV%cr9@oli8KFS7pff_{=Z$Qw)&{h7`tN>Axb9|Cd*`9|k$b|KH{n;*K-_U)QS1#_%=?mEMxV z2$B51d@?wE|2O{swp56VU1Jiep{`RqHvE6PPR)}q>mBwTr(0(#SG>6tD)9fhN&)62 z;*K-_Ul*#$#@5>|RB@T9Tp>X6|2k8a1<)=|GG+#zPv)*(fNPuQzKgquG^=^ zvo8~>xXe_Z7$1e|2%$3m|F%?!```yZc=-Geedx<=p7ndemULx-3{Qp?_f7uw}#>HdFGE+HXc+`#~206z6-|5*8d*%)6V9G^va`(>tb#Q-TDM~H{<|F@+=Tl+LSjoOv_DPNy z9<}2D?J)lTHs7`F==}d!`G46MUZWjLk3J(S`<092|K*d(2iaEg2Z4DjF&oTY3Tb0*`J39Z5ooZxbe4A8CwI{%Mt zYGh;MZC9yektz9q`DK7mDdYcd^YU;<=l`))jcg2Xqe|K2M6dGX|K*k8L8XlUzb)nA z`p$85{vSKl$cFIRm1>JqV>Ua{ubv?PFQ<$SDrNltZC)Sl==?u+sgaG*HKUuA?6b*< ze)R@V2Q`vdEPDzkD({sFd;l zx1~H>>>55bhPpB2|A)42j3-|z9C35eBl&+FFu*DtI-vmb`fx|*|A*c6oPFf~W#=2_ zYW5|IOv(T2Zy`YN|2m=UEA`<*_i+Ay*wqC9kpy~FwcVVw6>K9c{Jo$<}dmv)b2l_~juIc9_q599xD^ZIZ{ z=l_TCTyHPMV{F7@Is0sKqSp@c|N2V|587e;|81!c7dwY*$5^$4{QuC_j#m%IOgojx<@SgB=-!E;sO3&SE&I@dAQg;Ts&&=m?QcBp)DS>Ht2|TpX7=0 zQ9I7i4&(oC^ZIZ{=l_RVJ4QAr@7QR^Quf)_9sT%_{J(snYk>dP2{m5XvMqEC=l_RV zH|9wGUv@rQRYe5PZ%cu=z9XKT|HmdZ zvY{#3MQZcCN_8r(F_o{{8ak-iu7f$7^t*0WULo$}{699Tk&O{<)Tv~NsT?sp=#dU;}e&?%ike5O-*g}9UR|JbKSHa6fUowB#24nve11V{c~XKHZ9 z|KFAhaiMEC|BpRtPUQc)J!)2T%HEQ?l_&DV_%J!9Gl*;gvW#oplqoP8Az^8drW5sq1Tvv>5k#Pme|Uk4Cg@c%lY?8^(pot*z4_VXY| zHa1`*9;kG{M>+{yX>VLy*?WMg=Zb}YW4$0eq6!{DeJ;Qw`% zZf+?M7rTb*My(rjBL6?^UxcdcM~HI804W^c|8OpY`sP}R4-n;s!BIEB|LZC}`tkyCC+Gi%S~o^E2G{7u%A-%h z+t&q_C&owN0ROM6G&xoZ#D(7B{Qpo3$DGLj%g%c{Cto_~h;^UjixE;h!2jziWnW$) z?&SRcP>aXNh8Ap#$1G=GQuaxX7#_6){J*YJ_LUvmV&`z}sI_BG+cmnel6^Kk(GNPv|I01o!=S_X|J!`Swv+S! zLoFO58{=z)W6r)@c>4w0a>ct#@c{p?tKMn;zm$jzebM=UZALSXU&;S>o7DK-O1olQ zWO^e1uLImJ@c%lY+m+XdJ30T4U20^*?K*E)s<_5fu5i1^|LaVB4SwSPOO3eD8J+*f zPBka;|J_bCE6<8p%2bXR9+e9Gzpm1=A}!=qAx|JPN@zEUGD zb`Dpnx=`&z{=eI(W=W}Rf}$THlK+=a-d!lw6aSx=h&ws|k421um>|F5gmeM^bB*gbrhQ;Wx($o~&* z@t7(3(#{e07L_N)NA1{Sdd&F$+q_2H$@%}`R7W}F|7GWk=2G@0YfR-D!IA&hnW`Jc z|KFAxaiMEC|394S2#5T?>`ic={`6l6T%*3dc3Rs~G=(TWZ9`-r>Sg3&)(u{}0FQ(_?&SRcPz%S%hDXb>5suYoUo871SLhz(|8=H{hw=ZnW!JXQJ)Hj^ zj#WI!|I5zXyPSPi9oV`O9dHQKS1eG=Y&(YD;M z?Wh~z|8FVRp{_3+Y zQ~m${>HS~*)}K}V|8ITsN8bF9>i_@P8(;hSzpeWJpL+Gbdi7_k{{N4^^7SwO1=aum z^b42&fBFY+|FpYOAujYr=l`)+&58Vfw^z-wP{nno@`KhO|F1JO(~bYX%S*(aod3r@ zHL}&aB|9^H{ceRJBKd#0#j_0jzfQdUg?F!g;LE=Fo4z>oMCbptA1$kEXL=(4uLA~% z5svZycX^4plk@-Bs75xn-gcqV7`vBx^8fOS0Kxz3gtG4?>mHvUx`*@s*sSJ6{=eI- zW?86gexet8^8fP6@URBpiT}?l#GRb~$6htEvHdm)mCa9d%S`14kG>;?3jDvWl2GLp z;!ZRFU-zkzjlpddDx06^SD%poms>U-MmSIWf2j}`dWZA>!=;XJ$p6dENAvvXOS?y0 zXL=(4uLDL1@p$6@^Ad3<=l_RG9p#Y!mz@zVXP0gt`|LZR?JPdN4`2SKOE_Mzd z#UpQa8Z=>x8l|uMl@~{(rdC0S@_p+1Ymf?8{%1!dFD{ z|N2vGJP5}V|DRWgJ30S9TAh>P9BM>&@& z9_0UrOS1@IO1`vn#C4`8^8Y$u1428V`2V~_+{yX>;ZkSE$p6dE23*X(SoTS-(Rk$l zb*8@dJ@NmgL|o_^&i@aWI=~_SFMAW*`us#M9OVB`;ZkSE$p6dE0GA(q65cREB>%6!#PFaU zPyBzW5EnX!^Z!Gw9djc8FFV6q9CS!|!w`}Dznn5U=*AQOpO=U`IsZS@x-qg<<(0jO zZaMp;ynWdxd1CLNaDe~URm#4yZCmUeE*!OR%!&N}&=!ta&AwRnNv;?m#RL4mu2S~p zySAO2{~v1c7}?l>jd(0)pUqG7vt#7{<(2mr+VRBy=exF@oc|wc?HJh@UZWk0kG@#; zNp9G7)QtlM9q&B*_rBVDiv3m%2RC(9n@Q;gZV+L z-!(qW>%*O#|Hn=>vN67GDisTv%1sSU2Q@ey%)#k*O-_{haG@VM|Bqd2PUQc)U24|f zk-`^5^8a$n_@Gk8|KH{1;ZDx~W1AY;*m&DjDp_Po{$G9>AeJ~A|9_X4hdVj{kF9EC zV}Kh~$|fgzl_&o%uM7_=W&HnLDG%3oj+68M*r`S~gtw9Zx7mqa;>rKZDWii*8UKHm z*M~bf|Bqd2WMgz&RmxUR^y5SF|8mRtFuXDT|E|=Bi@jq~DQ!pNkCqep|8AR_Rh7~^ zQoqQQ{J;D%KunDp|9_X4hdVj{kF9ECV*_qfDf@=g4-d)z%PYf!N*VuumzRe-IscEH zYGh-0n^ela;q(hn$p6bHgM&&L|9@A?!^N)QQ)89?Kau|*xAXt@4X0mvLjGTV8DJF- zolt;zeYlhJ|6}F5+H#{b{t<>5}w z|BsdbmyN+Sy0I?2;R7Q1f4OCR5Dw%2?@D>N&^w&}A1nWVBL6Qt@9ms?>F8gx6TNtl z|CeJ%2=Orf|1Pf&cXIxJto*-hXu+-gzj2lQvyc40yfQp!hw=Y+r9NEj9IhRe|38uc zAGh)U##Q#uKJx$a$>5+HXHAXW<>lc{&i{{<|Cfzz*XYJt_Gxh4Kl;f3%Pr%BaGbSp z>@F`4cXIxJto*-hjIR-nIs39_ODy{&S9oteQ#`=`>nb&1DGwLBhl@w$|4-!q$Ibk| z%}#`89|j$A%LasY82^8l*M~bf|36m#Up6*iqa91nKAW8AKdF%amrn!-{$D55cxB7B z&^4U@A1nWVBL6QtpDiodXOk2COEdX@xn+D14&(pt@*Uew&i{{<|Cf!8*9gaI_QkzL z%{nM?5?K zk4coOs00c&*cBd zcB7iL$+5V`RGt_gbV_FspXpRyA@1z_KlZ7SjSaX-r}UQ8j}YYs!IA&hnHrq&|97QA zT<99k|6`AuGx`5+kD3*o()2_>K$IuO2c6Ox#AiB{7l=DM|Bp>-WMkuP*C`FLyVWOh z#Q>pGI)eaBr}6@EXXpR1QH^X2aHCGy^hEcPBu5MnI;ArR&nCx8fw<5)oc|vy|9>X` zFMIPIvc&(}{6zPbBtMLfx&i)QS1J4Q3UO!W|HsPz%ZA3=%>Uc;ME8~?PmGVk0sdcC zDf>!=xY#>rAjJdxzphgDI>dMx`SR|t^&zs}T$i}C+= zr9xcj9?t)dmH$7J|CgN)mzC^`JB!K@!=rY9|JPN@zPv!(+4=vm^8d22{Tl69%)VIm zNp2V%bp!mruF|6~FA#Tj{(r3ezibSy(T#QC?FWeR#P}#2;Qw`%Cda<~X~}Hp9nSxc zmH$7J|CgQjc22%@axB(;k}pO`@c{p?tCW3tg}Afx|6}F|GQ0U{BC7k(Qa3t>*W8>1ReDzBCF0J`|6`LH*%;hrrBbbsZX#Wt7$21i z{J*Y}Qk4>Mp?5g{k6mic>c=joly3b8ga36xKdUA|4jbB+o)#cJt+(k$^XkIgM(5T|9_X4 zh&wy~k4%S*(ao&U!!HL@|j?MkIFcJ~cYu9$pO zD)9fhN)1>_#KrF6!<@?hpUMA^oB4lxPwGA*$`j+`C@+Pk#_8F&t_L2XW zTVx;jf1ObF{fZQ9J3IeBR{mc$HeMqf^Rq7raENP6<%$hR@c{p?tMus0OT?X>{~s&= zFB=1F#A8)>`)`PH#PFyc*O(qN{{OC&hzp&=`Tw!<|7Y_5vN!j4X?o0NDEjF!^8a$m zyNk~%#{b{tHR8_B|Bsdbmko`#o&S$zpX7Gahp$ z|1UcqF3Z^`;q9SbIYRr9|JRwSZN~rK<(syho&O&z|1TTcuhEXB?6V1q{<8}CfB9tF z;j@bI|9APOZD;5I$IAc9#^4&=SQp-Y(Y8FX@hBYN|8><*0RLYq#D(7I{6F@pIg|hI z_NsZhS=j)`zT@xAxBULx-7{6F@ok*(e>*_r9bhzmW@`G0LlGf$w9|L-=cS$$N*Wu|BH|2klR5UMBsKd%sXcK#om)yT#M+$vOY znW-Eh+sXgyOnp>5@&9>+xU=*B*sDf1hPO?qVl7j-VQ>_x6NKuC|1TBdV%M02YN+ef z&L_YzvS-Pc^^T;Nsa!EY3e^d-V^945C-Rqc=l`)$&6)ha>$ISl^ zGyi{X`2V39;mpaGc8|EuRKD1P6ptfDImZ9reKdbbpZWh`=Ks$P|KApmST=V>>C>0SQp;#6_NbE+%i50hw=Y+d4;(1%>NHF|1TTkYlLIYzJ>XTes+xfzx*;l zh==k2ccnsH>>e&2wRp_A;s1w5Jf`GJJ4f7JRG!#5s2xXWhw=Y+d5O65%>NHF|1TRG zu+fgCmvpO+*$)xP|H~&D5B$GQsPRgPxX?9b{(qO;GehMDqV~%IKgQ z#{b{tCF0J`|Bsdbmko`#ng6%>iC#F!|I01ogK!xCe^<6`i@n2V$14ASCjURa0ROKs zb}t^}|K*nfLOhKBzsq-RJ3IeBR{mc$Hee$j%h_l16aAMJ^8fP6@Sq*W|KH`iww;~- zA1nVa8^dd~W9iXn^Ao*pkpGuY1_#}E=lTEpQXa1Fh!^Mohq_cvw%Qc3vm>sklx|i3 z1yP>rb4&-dHFPjP$MpN#pLd_vhdVp}kDY2{V|<%b$|fiJG$;QrpEWog%)#mRx4+=N z)Q1cG(D{GtQgbH%-|bSfqEgu+Q}X|E%lM#D#{b{v<>AiG|6`jP+1Pm7RVrCzO8#Gd z86Z^3`2YL7Jlxs&{{c(Y$p6cpWMB3Q7weeHc}+eYFg&Q#6aQbz!}Xox?EF7=s*w%t z*RE7sR;gIXRDKv8Rq6tjdgA}{`fz9G|FKJrY>aN})R^9p!uXKc)`&AKJPx z{^(1GHyT{`8c+WJO#WX72++Ra;De+#!4{J%~p z`$~Pd&^?_0AFk^Vhy1_n3~==wDJ(K2|9>X`uY=ltPyByg9`5Y?|8QMLIOP9jXLxJx zNd54T{J;JV+YY+%#Q*2z;m*$g57%>bGuhZR8r|^hON1j^WJ>;DZW$kh5l_ zaQ=VT)u}P^|FX03=HyGeN3zP4{QsH!zYcJFZE7s94|jI{f7sQjG4lViGs5NUQ-!WD zJ|zFIzr+TFc0BR_r9NEj9IhR;cFdXl|FGLMHD;3&y>5{Imrn)<-FV{v^YUu)hYh{qHEU&_P9?&0E5i^rVF{|{~Pn3X|?%}(^%amJuSZW$l6%*O${~v1Y z7}@Z6*)`g+^z5_AiGF-Y{$D;39Qc2o(BwqfvMqEC=l_RVH|9+KUv}Qzwd{*!pX3Rz zrR4v0raoDo`2T#zwzKp9LoFO58yl|?j@9hbsI37cfT4(4pq?;0WI72+<=|6`*X*%;wQow83j-OZ9Ch6kO}8HA@#IQOMOT;CZl&i`va z8vpz+8^hbAQ}zj`TRR~)3=TS_GYC$faPIR0aTn+Ru}6(;3~sAVB}+`@iSa?FbO!P1 z6V82JAnxM)KQ^h6jqz>Qskp>ct{5P6N@ozDKH=P#0&%f>xK7oDY8UeV-9|NQIu+}f z$`j**PU#Hdv&pf%Lfpmqf9z8u8yj$&PQ@jra)aQ=|LaT*&iMcPQXwvM4d?%{N6m%& zf44`?noh+frt-x2Fgd0(h|ea+@&a)e=l`)ujcjbZ?K%~gn93Ccgih%U0<_7oyg=N= z`G0IwBO3$Us8cpQ(UDnl#PFa~I)m`^3Fp2Phzp&=`TyZCk8v*K|7CC9Llze}$2F$% z!|13R;Qw`%vM;X?cX9rIILzZ4+0b~0MmN^7Pj5-RaFG9(Tj(12f1ObFl?rjOclZFO z7LK`){~r#GaLme^y`#q^rgFswqnc6^@&a)e=l_SpJjRiY;WgT^^onkuaJm7a+%P!m2KawnrJGv{#Ko@Rx>4)K zT*&_qZQU48zU-~a<|n!lqFgaR3J3UqU8Ml?3UL?b|A)goIVKxhuMv*b?2BcezEp?{-NX6+;V@5*UC95-&WFo#_Sy7AH$aplhDYrH|F5f*eR+Yni}U}( zVV)e5jqTTH$5Qs$^h7s6lp6*|-2nfutMus03&dTV{~r$XuF(z8zU;{o%Rb2y z6*cOjj$-Y?jNsbsEwFCUWu2S}u9ou5(aP6qIV=m~iC{Qpqv#>mFDYjk5R`!u}nR-VWcUn)5^^Z(eTMmF58c9)vZa=ZGEh;oH|BLA;5RjG{szb`f7LT7aT zA3N1t$p3dc)hs_NY=WX!dh-ABiuMEluM>J!o0Q5XD7ue`a>KTx zQi1>1ReDzBCE_m5|6`LH*%;hbrLqZ%UgpXF%PkuZLmcD(?@Nie&^w&}$1XJ&^8ei~ zHFNT%-6PgAl`pm+l?wd7u2S~pHR3ML|6`{b+0cURO10(evyE2zF(UbYd1V7)dd&F$ z`%)t=b`Dpnx=`&x{=eI(W>Kl)8dJGp+fk|Zm>x6!|2{7fcX9q7o7Bk0w%e>!agC`w zF+M8Q9@AsS|KH~&;x5krW0xA)7~giKifc^eiUCro!2jziHDDm0YG4lViGrXtd%XGtLDEcuX`Tq;~e;pwEYm0YG4lViH_ndemULx+| z{Qq#O(_`fSWoH8}XP<<(|A;8(43F9Y{$E$=(U+HqyEy+poa*!#`G46N-r}P#mVJ_I z21nfh|F5fbb4!W1*fm@?YTcL%`TwD<8?!p-h;^UjiUCqM!2jzi1(?@}yEy+p)WR{c zL3vM&aI8N2V%aCTLV)D|b*6S-<`y@vUkJ&i@a!Zj5XUuF(z8 zzD(C**(Z5od=w7w|GMg(=l}0Zg}Bfgo&U#PH5c;#-Ci||LY1sDl^?VQ`G1|MnQr|5 zeO@B&;`~4MsgbSTE!mmr>vt;*5y}6{ErjX>p)&sezLbayJ<<7pZAdduppgIXHmX^D zRK#Vb7xMo)V1O9q82^8tSBSef|BuaTWMczv6e^pa=!KsAzq~R$404SBzt1bgU7Y{N zUNy2YyiG!7Z%O?;F!_J^WN`TYZ~Xs#sSp>t#w1i@<^M0_|7FjTFMW-R#Z2Xjy@Nsp z{$E!qz`R7<#rc11R3jT(Z@W;%Wu|h40LlOBOzpnL|KFDqaiM!S|BuaTF695a&1zPJ zDlRjXBZfzz0{^e8lzn-HxQp}u*sDf1w%<0PipxyphQU#&!2jziJ^JzraTn+Ru}_U` z3~sYf#bu`Q#P}#w;Qw`%X2(i}xX?SC{~s&=eI>FFPA>)LQmkM#Qd$@R1{{KS$f85OfOL@Z(k^H~hvH`Jp z%=rKNyhPl^`Tw!<|FW?G8|_%iJ}GZs_DOEgc;x?ers{_A|M#UtT<99k|BsdbzmWfz zz3CdO^Aq7KBKd#0MdN}0*9m1`ULo${{Qp?_f7#f0jc}}HUo871S8PCv2l#(orAJ?0 zA?`Bs|3jS}Yxw`65sy{j4OlPvfBhx4A7;ml|GzI4;zH+e{(rdC*)j6}vN!j4aq(EJ z`*b1yuLDMhL5K1G_j!r9i}U}(rOu9#|ChaqZaMpsvQP5Pn@iz1Ksb#5zc1Ui#opnw zW0yMUApbvH8sS*YzF79@LjGR|Y(R*I@&EVvu5B0R|A$MR9V7oQI~#B%`?PeiUwlXY zUw?_=K|759zt4AVyEy+pT@&BbhTAm-wp`2V~-+{O9-vGV`2F~E%~W#4f6wL;|o<(1(N|L%{die6PI^8ejVHH%YYHapQT6e9nxKg8&uQpW$^=k?((&i`YV8rjfz z?JhMd*_SOcCI2tCj1MYh{QrHa4;Oof4{s{}enZ> z+{O8SY*iy08*r;iC5ueS|H~`GgGw3yf1j6!yEy-kooZxbc-vGeS!8nNMU$L{$D;B9CX9@|NFc=+{O9-vGV`2F}OxIRvvv?JQ0S6Z<7=KvPJU$^2+d_9mfCPm-=wAbGUX?{{KS$f856Z+vG&QY?1uGd@?xbhVlRR zd3m^t^Z#Sz|7By_HM+5qeG=Y&;~06G-%(1~ z{~s&=FB==M(T>GuUo871HwcdWzs^+UHU9s;Y}ppNhV%bp<^M0_|7GX1WhMKPvQP5F z_$VCU|8+~SiWEHO1V46f>wok4J>Q)z)XUAObL^I15vF}STd6_=QrC&pKG%FZA@)2XyT zoUR)^KA(m&8{^xqQyO8nLT|1ZVAUx*g8)sZN`bi8JzS?|{$Kx=lVj~hHB<6sFL5?M z(eerAiSbpPvNMR!bSkY7r*97zZ$Mw*Y&JIFCY`eBiFS#pxj}H?|Lsf-?uq{|72-nI zaQ?sUQKNr*W^h||DlRcKPmHhXl$}9*rc-HwI9+%0#`6WvW@CKYbt)_|HCGI<>Xe;9 zfTmMvfjC`v^Z@-6j@cOCR-Fn%|0b2$G$&iudrEwy7S{~y+v znjc0--KhM(U8Sy@R*2I%*NvI~HyavnJO3Y+n3^ZXN8zaazg?y5D;45m?{MLm`F~wc z;dlZ5KP)jdR}7HiQTcznO4*kdh|@V2kD32B8ym0@k5%FA28iZ};ZZv(|8H06(U%s8 z(>d3Ung2H%!)vr-MR>abqPbyk)Q!sj+f}-`r9fQl8m=2N|F7$*8{7GRo1f@~1N^`F z#amV7|LueVOe@6ci;D}#%>SE>t=9<0>N~oI*u8jw|2Mw~u=4+QLfKa;#D(tR{Qo%f z|GJ*yv6cUq@OB>%%@Jd3Tng2H%gKKnSWq>H*?dk%XC&owNsQkZOrOB~UATIO{=l{o<|JU^t zju+woL)|Cy#Rw@LmH)S^lznN1IGuCxnE8LRp#?Yd|Do)YIbwL!j>`YrRm#4yV_WPT zt{pS~uj{ED8~OiG_Q~8ZIO;~_|LrPeU%FwN&be;P{J+`Qc8zYVWS>n>^p8I9|K^rY z77RL``2TdnHvJ7Z1~!)!7s8{))TrsfK*0si04)VECI{~yvCaatocDV1_=UZ2qL|HF2r3bjnlJpu&( zZ)a+N#{WN*8gZdB&iwzpQceFBN~J7i3$DH=g&`vNfAflTul&EAc>D7n(h_kxHz}1; zuC^bPN_K{~q*Sq#skvs`QK>5bZ&$tj1rKS7IGuB)y3`R4`TuT{nzd(z#@8*JVBQ%Y zm1>V6j`9Bwr9@ol9nSyPU25c&O4aUCGe0ZR?xFXjb_I(0VuVzx%KzI{%D%KloX)vY z%{$eY4NtSJ{J(w1X&0cFBZfz%+GBdm`2UAeBQAChSE`x+mscuPyHU+j_St8gcA=HI zVQ^Hc%KzI{%D%KjoX)vY&70JijcvD8sq8aOyZXdDF+M6)<^Sy}J^IoTaXRNpHSbbm zHpaJIslpmlbHxCuRF(g?tJHv{L|p71KFpc12T{HgwAuSQ7>wJha^Z#aJ<2AxDKl`$eh+&PXxnh76kIMhsReJQL zCE|3>#bf6G&Bg$mVa}@Xh9M&OfAh-w3+*ue|DlwK3!TIH|8eI3<%rs`k^hg2OwA7) zkGfI$f4fTAm)3~WIoFMu|2G>NZ!`a|35qaA1pjYt*?JHTd3Ung2H%!)vr->CtBs6yecF-MFT1RQ})om~L(<5f{6L>&DFg>w42jhqLkj4`tW3 z&^?_0A7}nw|CZvhng0)EpUe@%85Qu=ejZT|7K%wjc%+BIzrhe^ThZl9F_mKtKND3|DjZf3%$|#eUt zqVoTCLXV14A};hq=l_**H9iQH?2K>qQ4yAz>Q7Us!2jEs`lvAe{~@gqr|Tx6QqI)? zAyl$6z-6Hl-w{TL;Q#F}vHdU|XZ-&|S|LvVXc8(d0;%CasAOk&i;s#>%hX(>D}w*G zGiW;#s!|~?c8y7>v>c=c2ceR^32sWhbod|EnJNs9a84Ae%KzI{3NS4ZCp6b*$L57< zW~RZSj~X`O?l2wih)|5I6Aub_O@tX2;SJary$|+A-EIE9C#RI)n|ln0;Y;QFD#p z!2jEs8l3U}52Zw0=o-%dYYoWUHOT+V-UPSu+Nbe#|Lg<*uaGDlmH)RB%D%KhoX)v$ zjPM^ZAAL4I(SKP1 z|8IW@o#O!QF#i9cREP_mW9I+o+A(rO?PzPqJSAV&HKe>9*K2;*c+`yp79AV^|B#l5 zlLyz0xo(WvpuBC}n3ZRrl(*YF#yl}T3PGOUnu)zZrFCzjRSP!o#+1_N_n`xBWfu^S!Akw^9QYLXo_~J+L}tm zRi^S(TSEu+R_S0C0{yOAmDYz-2`N`8tW+cauVm$OY~@ykFNom(?eEm!bT9{}-}Rg* z_2EK4bpBsi=#3|2i;cHgrIJOa;QtjLRqBFoIL7}!q~+oCk6fj&OpW}%vXzau`kY7> znS%efzr_Z`5XbochqOGL{*kK`ma38e*D?_XxbmC`bxh59O+FnkJgAiM{|}`+T;DlV z24w0SSgA(-zuT#1QKj?^CyWol|0`6g)CDSK{QpB*A5QR z!csNz|5`!923*cQdq?Vrhv5J1FR}fgQpW#3q~+oCk6fj&QjPq-7KSjqrAMEA!|4}K zfd99@!{DG&#{WN*@^G?`%*Licd~Uu!^S@gV;%I|E$KKAW8A zr^dklD++4I8QNj||3g|HPXEZYV_!!&lc_BOIDeJ z|5t<*k2A!>`2UBrKAirMi^p}H8YBO&bt1gIE7=z|7d7WlUhw~RruJUr{~t$?@c{pCe~SS^JdFQ;DCOZ|_i*u; zi^s?j2OVwkm?`e(HaXFM zQUU*OKGArU|F;urys~9m=o-%dYyHRkY$5+IJMZpF_Sxh_FC5_i6$XW)^8a>1*_ZCv zrgJVFbKw}XvGE$=Sk1mr_Q_n~?w%+fmH)S^^yo`>Y|}XxkGXh^*%)9W9xIQ&Q1;0j zF+6HV<^S!f^8No)FI|1)*`XjG?<$HeMl^=WMYhM2IFF$+fQ!l;n|4FAxfw;aSszAszz^RMWWJ6Q5 zi`3?Om374-t}&Ib+8R2j*{*|G2=u#dR$3uW<-uL2>OwWy7~xi(ic3u8h~Z(3qcaH4 zbgEQ{>pP4|O< zU7i>pbV_FspXpRuAWreQPSr(fvN68xIu)0g$`u2IPU#E+G@U91;$ru3ovI7fR5XME zHiMiMowDMXozTk@zKo>r`E&CL0@XyH3R=rgFsq zp;J1808OXT0&xn^b*e5@lZ^px)Tv~NsT?sp=#!>IaQOUaL5yPW)fdAK3di13Q;uM~1$8jFx$j0y*?O1w6PnMX<4TGa@fdAK3y1Au5 zTj(+Y7q<-&2CC&y%CfQ@jhW?wA( zBv%NK{J+lBhl}z552Zp}=pN4hYi-EKhz-2nfutCW4|hHYv)*Ns{?Mm7f5=*CL++4MxW@ z#pl9N3&+UD_!{AuvoGr&vFwvvF+hq3_hqoazy20FBlv%v(6ge{ zhzp(3`G2Km4G_Z|*%{#Svm#kzI+Fj_0mH*6=ZXK%OT>-N|10Hccu*?Y8Q#*f!an14 z^T2YARNqsn!2jziJuC7OapTPY4^^oe{=eO%#}|J^P%bM|H3Bd#%( zdj?3Q0{^e8)PSW#T>`idXE1c~; zsh?XU|F8W)(0Jhgbwb&fmx$Bm9Nu_UILQBN*$CrX&AzzCRPNb;6c6zKx=O;Emx$A* z9Ufp65Ay$7DZ&6(9({3*shl%BYR5IE$Da8AQX(#N4(I>12Bh9!YDZf;<|+BIu3OpRCT#>j@Z;I?ke+Ose0EvjiH3J3Xrok8!g>9JBH zF7^%=j#@ZI8+kCkW{9(zeX;D5T(JQu9^n6Vm9j4{5vR>LyaB6tkpI`Zk&W4>35tG* zNd8}ciM%V9U5TwV?^@*^2^qPa6Iwt!@dh{Av6Ouh-hR=x+%P!m2KawnrAJ@7X`6y`-KceAWMgoRZg}=3Z&fzp z?FSv?|K*nV7Q*op!T&#&3UQ$~I{&YPn{$c$zwFGVMWM3qIQ-N|JQwL^!*G%C3_Ry>T6OMB9i}?Tih-1|2pyZ7e1B}aiJ$V|F4v*tp}l!o$;+c zD&jKJk^H|77$6n_82|s6SBO)Ekn$|6o7JcRF&`&816+PoB+E?Y2)&W~zs}(P>O0P3 zULkID{$GngYWrb2PIiX3^r%RdnaVYTqfmkW*HyZ?r9xcn8k10IIY+{z;7 zq?qYQ{$B^YxezMj{~z-baijD9S`sn`Nd8}T2DqAiaha*yqXo(T>rB19#{WN-5^ zIRCFTAvHkGj&0@tlifu}^8Y$O`+@)031weiA#Qa3Un@dtcn~Vt*?x=J7nhmJHQSCt zb;S37Uv|E~jNpT6Tf<|X1r=l``Pq((^b*vkLgcbs17$^Yvw zu>mp2G5-Itl!%L+V;bb>fn39*c5LJSZGNI3A(H$Gk$EcI}vSqw@c< z)z>8DBJb|{Ytq<%MI`?(w`@EJhw=Z9d4;&q`G2hfsf|bBcme)DE;E&Tc3+Ce5sSx+ z|9>nM;$ru3@u>X&Nd8|lp=`jF*)fA)`ynFvf4OA?LOYIHJocEEh|{(meuJxakpI^( zp7AX{`@;63x+ByL^8Y$hv(Nbd$5J9LbPebK^}0UaUGo33H^Hqu`@-&`ijN~4^8Y%6 z_%u8Am{*7!o&VP|kQyI_;|2KtSoTTok$ne>2l#(orAJ?0A#Qa3UmrYcfE16d{C`sR zNzNG_wFCUWu6p~YJ(db_p>sI@uLU7BJZi^A{@=*TUN^}9j|a|>Vx~mH!{f|7&>&d&jK2$ryIf7`qn_ z^8fP703ja6|3ButwrSUn>Df~Gf7u`&Ln9u`gASXY=(U6Vzq~R$XvY)(pYPf>I{&Xl zAoc!IJBCez4x69o7oU*-*WY1q(2aMV|9>py;rfoabN*lX=1n0R?$l=f-@f5=w@RMs zgF*+jHFPjPX!W~PDz6VWI{&Xc^!Q|Be4A9tCMWt^MgCttYj8T4gVXPNPL%p^p&vT` zk6mg;^8d24AFjV6g+-?1|K*nPL8XlUf6U9njn4lo5o_a7rCxylPZpVy|JUDQfEeN! z|NoemhZ~*$*K&{=AXREJ{~zm^%6Uyb9WXqMaE$+dEal<)&M`XwkDY2{L-Msd)htep zWvfid|H~<(!|2BN|Hr&O-01wj7KYTuqe^Y(|C2?g5x>|FtrthDVj!#{b(loL=F{|LgBCIH;8I|Bt0STN{$2R`o zCMWtucjW)|ci48&4deeG^YU<`^Z!~GQiG#zZ07&7MW*Ec^`{sggv0p%$5I|H^bY6$ zwHl%(blj;VW8{$Dn9kInplQuc}bzr3;mp&iEm zKbHD%v2(a~RQ`V?|F4xH>>MkT6JdBr{$D;B9CX9@|Hr&MoOb5$wyU~9{$Hy{n&4I^ zC&KWM{J;Jbr!71D2G>Cc`G2hpVSJ0(7t21$HG(7m zuQRprjQ@WuTegL+;rzcAhRk=D{J-o?a4XrT!F4~cNdA8$|E~kYXM>J>$F|Y=e=Py2 z@liOgn{O(gMR@zNPjb%&q^~NivdnrwX2ZAjx_*e zXMlWMwBgwlMYmc=ju;-aN@ozBY1LFEF0|jIRlABP>{tUpc7|6+iCu7dQ|eX<$qj>p zR_P3aGp#B`;$quPTBY0L$-ZL^0NI=1rl&=6ZCpab@e zI?T}tC0{8L7sB%r0PGtq0nqi0rQ}OineJEuKnDm8B>;3n$u|{=>wAWm0ASBx34p$5 zEGJ*Gx#*510Cd3ks#NKOlCRW=i;Xv}Q)A~~34pG1tR`PvWx8Vt039&EI?mAvC10r# z7XzHusnSlCRW=i{VY>W7s!X0-);~i^-=Mie5EX z0bn;$HBbRSCzO1pMqCUoQ4LlA=zGRW^2J4_J5~VD0UHmpVHE%#ry6mgb0o6C3IJW_ zSWUjT$aKdF06JiRkPfQ=@K{R3#Rg2IgB1X}-m#o~_7SH86n3ltpaX^n<**6>kEKLh z3@=d*RsiVw#!~XxN1W~p;*J#nbim-C8ma*B1&^gfTnsKz4ORf?d&Y9|C5udVEC8Sb z#s}H30sxPtL|ll^3jk0ySOB2w9IMF}>ptyR06+%_5Cs5qLdiFki0gZY7XYAiumC{U zJC>7AvfF<_+_3P372;xiiEIr2m;;Vj_eos~?|kI{ zb*8@a8UO!SD#XPA6X_uT-}R1_jcVtkMd)Ub_0|vmbu;GgSZYe|Y~_zx8iv{Qt=}|I0Uj zmd5}8^BceV^*^ui|EFI4uU{QB{{LTJ`SmaVM;iZs<{xwX|B`N5sT23X4}NYt0pLXd zBqj40kwpMx=NK%0%dvs1?uPneTxhyu4FDbBmZ1iKPUx1EI&mRBuK^&znB!v&0NEK| zd&}mLYza_<>^|pN17OD*06M783#$R}=yl?{2J{*L(vlh=RyoVg0QtOV!_%8m7$dR< zz>YNlbb#`Fqdx}-tIaZS|t~1@S z1b`0MfDkJy0q|JL#Kiz-u|n^_mz=J5EGJ*G%yh>R06JiJ5GyMI@aSdY`o1xX75WAi zId^?yDfw)Iq8}r&1i)@ju|f#|olx?XDsi#xX0bxgzzTrAXDlb5O;Gexi!1@K+f%Gi z0zfB}e5FcUjBgezbPjyU={m=1^6BZ^PcO0rz-~{mLJ0t!Q1X>3aWOz2=dgEhcC76k zE6Epk7u~S}fDV{^80J_7fX7lLE`;Y50H_?S0MPY~#pH{tOn0mRpaTSl3IIBxgP$7#vi?3IIHoB5^UeL^W6dpzj&W$!8N3-B-jN3jpYV@j*7OZFbCy#Pyvc zkqs6A=sL%0@@ayimkt&H*j-aPPyj$DlzgQ|Tx`HZI#>Xp>mAFLq2LcI5wcfV&I+Uni7& z)2?m(41n|hoE;wa_1Twc{Qslx|CMk3n;QRrn~d-N-@7gFZVSBI0`IoKyDji;3%uI`@3z1{ z!7ZS-^im@(jKPjR%)#UsD*$xgRrr?P&1~yMvtK~L0suP}0MJ2w(R?ZZP-?`5@Vo#3 zC&yR-pq~Pm-_pC_V%?`53jpWcvaB_?V0J`yi^OoMl7q=JP zu>gP$7$1f>2b&x#CE{Y^%|o0zb9`uqIL$Yec8J&psExD2XG9hN*s%bB4(i=~DgaPQ z#Ki#TVNRVoK46t|HvzDmd^SJPPmZww!0y1wF%$sM2@MfTiMSYEGC6j5@yRippXku6 z9r=G9@aAH2?1}#`CE{Xm$>i8!`IE{@^4a`EKRHJJe@Fgb2e`L3Ip*88^_?S`93%hV zb&l2Ki)Ej7DXa>O^SAT_V1alJ-_uKvfX|NiRl zT>Y)9zj^i7uKvo^U%L8>SAXH^&t3f+SO4nOpT7E|SKohibM*(WzW3@gSD(82$TVdg<9ediD>W{rIzg@a*qD z`@7Hn_Ot)y*Df8kF*eD3Fb+wcA!K?B2Zm{L%` zeAn;!9WouR^?}`%3+Z82fBVL9_w$EA|NqUO{@AyD%Qt<)FaPj9c$#u$IP4}*TJSu1 z+SlKrr|aNpx~Br@75 zee!gi|NR`DN{j zO%mNxlH*?*Jb8{^PdR=)XOVWsa!*N)|McL=bNqVB@#{H@^gwn`NsfO>@Z>puJ>~fI zoJHEo-aREb{>8zQ=lJ!M()ul;rpq1y7#i*HeyP&sj9o=#m`&!rqgA5*l_> zj_+y~)##EO|AOGjb9^`D_-@W3{uVRy^MfbP@!gc;yE%*aTlDnPf+x@M-IU|IIg9vP z^z?bbljrzu%5lC8_`TC`C-l3Y8a#Q9@1`8rN}1{2^S9{fgTa&M_-@K^eNuH#_$_++ zDZ!KH_-@Ma-JC`IEqeOg;K_4*H|4k>2#*QBMNj{9@Z>qZn{r%!+!KC_o<67dfcOCC|*af+x@MamsO3ZS*X_Z!rtr7d&~6k5i6oS1tF1-=e2CdruC&48xS; zd>e3|*?YlzgD21NVajp74a}aB_kuTqC(rR=%5lC8%$|}Qe?55e93Q3}=i9*SDarBI zf+x@M;qYA=DSqF-^g;c6`%iydKmWx){qywm5B;g%r=NfDPyI^$`~!dLbM*7~|H+T& z=kNQIU#Xw}!k_rP`uWfQiC?Cl-}@)7^z--r@!z7K-}A>mq@Ta%kD1T!{$sybKfmjb zy``U@`J=y4Kfm*jewluL`iH(lKfmLLet~}e?jL$XKmWNu@(ud=sXy|?`uV$l@OSCw zxBuWtKfmn<-=m-3`iH+>KY!;R{(SxX;Dfg{e_pV{$q{izx3JPdG<%1{qhg|qYwQ1ANb@4zTo|T>-`V!|7YL&hj0BmZ+-Nw zpYp!H`M&Ra-}%iSfAi10`ORdHrX<_V-`= zmtXt3*Is?~FTMKdSHI#tfA>9q{5>Cj<^Or*FTV2auYAeNfBWS>{PI`5^ndA3{eSqs zcgO$pQ)XS~-yQ$^Q)XS~-yQ$Y?KHpDst8(mD}HzUKeyA|1GUaRw9~uee{Ux$er%_A z$NzIX%_BFhUJm*6?)cx^>D}=^zIyC@Z1nEh$s=Pb?pWEr(@qhi6 zI{Xtp{(t(u%dY``7qxRhP|jDwOoGpG4bvPKzXr^nl6T+t1W!K4HB56{{2DNOO6Is; z37-5pK1_35{2DNOO5O`z4xW6DYnbM^R8Vi4nc{Duo&J9|{{Qywzk3{}nI`@6^vzHI z>;K*BBjj%jo~D^5ZN@Npn!h`7PCT^Whrcy=nlf#iGEM(G$25-~w45a5?C%Vorc4{B zOw<3mC(emSPoE5)rcBd><@8@Szjv;WkUtSTO_?@MnWq1pW17GFsJKD+ulttZX_`$P zhccV0|D8R}cTsf+f~Svno(A8YLBEtQznA%|*t!z_j^N2>Q!mqO>ZN`$nD1gXLjG9r zJT^Q!mpOgUg)boD+{N{@a5m zf0|yVr|G4(PQ8msj{mmc$)Bc|>1leITZePvF{a-dJo(e~GCfT%bL((UJbL;q!IM8t zFVoZXGUqtw#G|Kg4xapr!DaelaG6`DKB-iDIpp|n?mc-Sd2Qrfa{N5!I5jbP`c1); z=lFTb@$;PH^+_cVImGlEgD21N^OWP~ImhdhN|mL8r{54fd5)i_96!%FPECxSzA1R} z96wJvex7r@KB)xahM2xFc=8-SPdR>`bA0^^`8Nblp5x~!$Io+)uYV!`^}&puo^t#==lJ>;@~`hbdBN-9H0Ahd&T)P# zk1>5+@Z>punsWR!=lJ@2!LJFPJjYK{j-TcnUw<$7)xneJ_-V@V)12edz3FW&doTFf z;K_6RH0Ahd&hhp4g0BglJjYK{j-TcnUw<$7Rl$?z_-V@V)12e{Rvz!&hl3~2@za#! zr#Z*@tvq`AmBEwe_-V@V)12e$?*(5SJb8|vrW`-bcZ}c4V@$uI_vCx!4#z3Sk8_Um zTY2>K%Y!G+@#B=^$2rIOtvq`AWxc&++4wJ+K;^U2VNUr{n1zdl~=#+)%U#TKY7om-t)z;{I{<> zzVeG+{@-8zvoC-1%U3V`@JruYKL3CC?S8ps-3WF{8W|*tRxU~f>FaOx(eXXs@wAlx zSG_&0xBQ?}0op!w+Bjp%8X1ZM)jr2>)z!Cr=3}4yZo>lJ?!UOyEmo(zk)ilfB{2L} zeSOELKk>VtT=h#X>t?7^;>b{3sc08|tF9(k;cMQW)?9vG{V$v{NBf7C@%&TfsIgT4 z+j407t-AW;$G+`TpLil4-tL!Oj`KdFQ}W1AT&P+%eyh&rmY>#LqUEP2r1nXi;<6{C zq7kEl->R$H^8Lz7{df3XEYC<8W?%TN`kGt5UwT=0+MJLK8Hy|07OMVMUCk{&t-VCc zPftnh6*&DD+EY@^uCjmlt-30? zCx8S!pc@H#01Ti9fJAo>00|hd&bQavd%bhcjVrxTJUuBsUA^4z{Ohc5o3qz<_A=8a z;Y#GG!!+c@d48XA%J zzc87;1UwkUb!cND{O}{xVjA+|JipPvyt4&waDIt+3;XeDaDrvX_i{|kBI8fwXBzlJ zY#zQpCAr#D8B64;$1LP^b={iZY2e>5g>OYkuJ%->3i)1WLbMEQ^|R1G7I^wIL~i2FiT5JUn*i4#g~(I!eYqx zvP_J#(z?j`<#P>~z}Jl=S9_{;CGyl|8uH>izt_Mhs#q89N+QkLQ<*B{`_2sFf|f}ZDN7#sZ1sE?9CYR;yl0Ez?i1RK=bp# zvgN5v8S;I5X06XQ@DT6f^4SK~)n>)Sx|XRi`2r>KcSWXG$m`CnHNV@yzhR)wb*ZOX zS0ewHE@@nEM0tL=fj@)+q&7);+_jsl67YW>EQ#^_aAU3451CCeesXO7@Snm`!LBJ# zYks(ae`8H;ut`1DnhN$m4yzL1`RT@5Z(o~261J9kB>DS<`X9oQfiAl9(~Y%WzBV!> zEG@%B(o%x{`(aUHJGC2FNg7DDyS7ZA|8B4(vh&-Gwcfloh9hjYr-F)Pzw+C`lDN)q zH`-59_pq^R`JNWDGxF!j(rh=}# zuLet^IltXl*Ll?KE974emPB)YyRokGsM}Y_zZfjZuk+iD_42|6I*sOh8WI2bU`ZtB zw;Ss^kGg$@{IkK5NX~CJ)^#3r`wID|gC%jC-)`U?(Qmqah5VDjk~q$9H`a9?b^8kW z$AcwtoZoJ&>pbeV74nrI^_KMQ#+u)5;NSdp%~>sb=b3^y{=>nNIL>c3)^#3r`;+q@ z43@-ke!H=*^QhaNoVWUa=dQnZ*MtB5gWrAd`A__Bo_O-a!w3G42mY-GKJ&o+_y4E& z|MLDcklgQ@BKIK{n)*C-}4{e^OJjC zcK3gG_xauLeBA%}xPSX`UwYhA9{XQB_79j8@IOE1Uw_Pp9`mS2|A&wM-lJdesDJXP z(?`AKu78gIM}Ld|x$;QgP{A;qi1RB_6@LKm5$72%mRsU`hxPCy?5JeG!R0qH71;;= zu$XGmJwn&=RBvJhoJabm3a+Y=vB*F0hges;a=o6)m?F+2?5bqQ!QB}$6?G>55C#zE z88O;ZnaYs!NZ(h%H5ar+#^MiQtKDW^Pi0Fn=aIg#g6mpjD#}d!Aq=#O$Lpy~k^B!o z!p=$|zoHLZk+ZmNH4L=8sGiD{A>TJwa22Z76*)Kl5GLA%-t|<*5_xu440-xNTqdz3 zYq-TU47BUF>#0l?@_l~=|JPbpq}=#JS|D(+Y86T%-#1urAE->l(F*<$rrM>}^;FAB zVyWT?8#wPBU_-udvaC~lAwkF@zF2Zw0f*BJD^U`8c3Ben^`ZPB z)ff0f{4ktcBQE#&(?%lc3rk@^e#A#C9cBRTKoZzA6}TGodO`xIc{4`B*b92eN0 zCdjkXlE|+Q75AyYz#qaCsyME%J(Vd#zHhdy4;A_;!oVNG7^*m~usxNrM4sIiLte3; zDvb4_!oVh?2IC6bQ<*B{`+f^Dz91?dqS#q<7-5SGge@z76ZyX3vOZM!rw{{wh+l}y zzW7qMr&?BWo*frMUh$twjP;_#h?9%p7C7(4uOZ(zUDgs~;30nEBF0!auOu927*_r! z^6a`4@(1EP)fnqVjR8|Qxu8RBPxTWE`M&RhA@)R`dJOy_;DVp0M{Q4KE0OOTFE!ep zf(-m2Ou^45=Ow{BEDh<=AuzGce9P zmW7NaIlrE9ekV*B^5Q&|8JIS{m}(hKa^7@bCl zJ~=-rgCBn6jbW;g--Sx9o;_0)y@>xN~C^VDhJWe&JtEB(A@5sCb32UGI%6l$y& zp$3+P+$K5iIYj#T*MuoUUYw^=W4$OfBDYD-dk$g9zq((S#2RagHSllz#5T*Jo@yHt z`B#OhLVizvo@$NtqSnAqgm+K&dlFH|zj9cXI8VLCdeLjZRvVKL%W6-xtVI45gDG*I zf{pbe*nlZyG|7I?ArkqQhp9q-Pn@S>W4$OgUk{WF+E_254H!d4 zlOErbh(!LSVakvf=c(FQFRBfgLKc!9-;;+-YB9VXbU`m{) zaAUm)H?S;ZH0kj@he+gK6s8P$ah}SJ^`hK}+$KG~=Maf})c=3~t_T16;P)PU!BYRf z`GJ4_z~5Wy|9^P@ukL@%QvbjIzW?{We{ZS(|Iy(pD%O!K0bO(A#vCeUU<5 z+KLsqhI?z{C!$89!)Q!1ej+(v+6sju z)M!SoM4tVTLSEVm-NT8Im+`|8yGgG0RO>3_`wj_)P;yz6eBcjZ3lW?9*`CT)BHy=2 zFfUT3cotXM3I!uXapP7Z&mKu3FKvbH;=~NdSQTP7x3fLfstozQO|p@;Lf3KP4`B?k zn+~--m9a#geG)@noTugkV-&*_JTgmTgdkv6O7g4vMBk$AHo(+Ba-u8(kAEomI+4S$rL$X z+6o|#|&oRafi&>HgJ@3Og#v=u8}&xt>TF`P>z=RJu? z&VMgV8S>&hH6NJ&7N+3clk;BD7V_U6mL<;fhEBMcw8i~|)fwl%GnkT}r{n{p)xs2V zo8-Lb5Xt#(hbco|oaY^#n25HRY8g#(-g5{;{#*UJq^(%-nodkCi*+HR;fA)SauJFA zH^WpRFKxw&nh%T}3scBwlJlNK6!PB~mL<+p@_{K~VG9{ea^6eYTLlzku;#xH{f0@t3(RwDnU!IU^p-N#135V;Mx z)t<^!BLBrOWyp*3)P1xyQ8!B1T5e-GgdzWheqEATtSJ51C>SE6A-UR9tt*lLe3&Za z*XlD;`>|0hL`Fm0+EbY-k{WF0of=R!p|q?J&8!N;^HhNVZgF0~!RpBQ?En9D>Hq&@^#9+y^#4DL{{KH*`v3nU z`v31=`v2cV|NkE?{r~?B{r^ua{r{h$|Nl>y{{R1l{{L5&{{J7*|Nm!8|NqhG|Nmg= z|DU1%|1bY`{r`urveA)Sy499K%CnqB;}7w3;k*c4dzx+O)xOoSUF)ccTZ%t~vCEEE zud>yVTe@9yOCjgsx+-*Yg$TFiFLFz-_MHwy*T{4r=%KCLQUxQ}GKZm;=_)%NS)y$v zmw=*&rYdxEf$gBR;L^)2HWV=QVx+}uUIOjr9>$5wI)sCvSf@|(+! zSFf_wQOHX!0a*`CW$0Ecf*Z3~RLJ+8j;-Vp68PW`K?pc6dn&jRd3HJqdC4W9?BTL9 zbSoCMgVr*Y$g|N=$V)B(X%9^$=M{_EL2H>Z#KM{qQJ=IT400RkncMk8_6Yfg$Di*woszkQ`t)7+385+HVGN<$p2}Dv&sK*auh2~y$EM^G7R1e=7V>?k z1Jw#ETB!dE}@DR2NafnCGu=^81hP2DdO0aT*3r$ zbI$jzj*a9JE6O93P za*6Wzo<$_`FBnWIU8Ra+Q*sHNilbPY`T6IEsX|_Ii4{d0o03bYo<(jm_w&yi)}?fn zGLB8jCCcM_5|PM1H%uAw;yh&>o03bE$M+ilZ90<|ke0hA&A`W08QMI4)wODKmxZe#w`kblasE^(eRj!nrWltkcI!cx>k{>fp= zkQe7E1rV4q5gt7=^IWx}RH<%LVsp8m_TtYbnvYZ*`A0MU)dC4VK z6me`yE}i?&q|Np0@{{Oq^|37A_ z|6hdu|MjK*{|A4o{{LN9uJ)ad?H>13${dKu;~l!}c=akf9ho(4_qeA}U*-5@lY;@$FZndWnww`nW6 z1SC0JS5;=j4Zg=-%<^M>rvv>K)@Zs;r6RVX5p3C;nSP9&4ntlwn(k8(NS7V2US*@h zkXMaHVa~SX5|F6xCFfPr zB6eE_mfnV)jzV5?3EhyfExCkRw20l7fF<&5bQJQEOW5->x)Z*^=X zm%!B+G?tuKN!vbRDxxOxY;_p&;ymRN+mcHtKH|)H%K5(2f&Mi)&l@uE2XS5j61HZ3 zo}CUuUYw^~V!Ko4&jb$V&oj=m(P7Ao^OQ?$OD@6i*v_Bloc~0ZHEkuASo4Yu{2_iK z&LtQi+tUR3kB6y3UUCUsjS-tzatXypoJ&kO|FL0N%55l@*p^&E0TNlxjPoBIOeweF z4H?^#ODKncpEqtL=RXpr3VF#Ta8m|WwGIpi6`M_XGoTpr3TXG5I z5Xf?jTZ#O`Vakvf=P8%imRy24L_3!-ZW;0q_3M&c0(WJ^HkMpMNd$5mBUd7SB}^6a zl1t!f44SIkh7~Px8*`|I{QHMxiSv|8Y{e;fcfOSEsb0lI{(XZfah`ICtr!LG&IPuo zGL^``H%t}sl1t!b4F1C8ymAQSHgnFuXIPgwPr1ZaltM~|92Ms|Hq;K|Bn{^|9=nt|2r1_|Cgcv|IZiw z|NjF0{|6WS|M#K)|Hq5||6lwq_5W9n`cB7=c63Fi;ikzTZ97w~r*TJ**y+d}-O-LB zOfNgSDnYv(#|5XWM}4DXXU!dzE@xGOcEzIk>FN<19ho!jthuA0J(pD#8uP>X>FN>t z9QiKoNG*{AbxY7Lzj4v&>QUe6*pXUdrJ1@VXqV%dpROLU(_zTdi>;{QSXG54$AM^T zuVs3%qrTCxBeleeA`Y}lf(Ec~+tbw}HaZM>da)Hn94o8P$g=tA>Jj@KhCIF4iW&|G z$d^?$-xaUr@>Ddf$bHmzI(DR%(4`kEtI(439*pG0j@aoa~j?IQcL83-4e7b6kT+> zdenD1cBGbAX~u2|+Evg_Pgfs0VyDB97w4(sfP!2+RVYHVopQc!bfEHK=U8KQt3p#O zf-Q?hiXTU8bQtpDJVhKUyiiL^;<68yj@aigeTi&K`bZk>Cp>aD_p)vnL47dC%k!PbLk>5%! zfx9latn~9b??nu^^eZ{fK1U+Im0AKl!t14%4Dq$suA>Vg8wo*&rvI`okQwh4( z`G-qK>~t9N%2qXYq$)Jkvp7kZa=veLVEcJ0p}`|op)t{c$G7A@k-w+Und)+hP{lz& zF3u~5K!s+>eIkE%m@?#*t!ng$RcNuDtwOWZ-jIJ>zb>gIaM?v5ZxtFh6j_duD>?tz zFjdG)ErH7}Xo?siqv3Y8r;0zt`Ns^)QnsoABmxN;4SiU9DqD&CqX$#URyBb{i54;% za;rU+sYL!!VXBaqS^`&H@E0cMl|vx6F>V?1cMa=Og{CPa0tp!nlZf_I>k{%;{}QGQ zd2wE2NQ4nQGLwk*RHhR7tA8GrCC+OQi9mvdJqYA)}!`ZBMl< zA%FFcVakvf=QWCi5VedZIq&6cA%FD`{ko)^(=Cm8RxJ5ZdjH$uYn{22^mdt-V549{*t{=($EatP!$Q_f%g^{_5+UQsJ&Q=>e=(R6=QWu`AYJz1($$}bDMMbI*Ju(#1kPXf;nLNg_3M&a z0+(IHHVP!1IVa~miAdys8m0<)sU>jP1x+RAl|-~pnCi6_@~Qv7lkfjKVxJ?Erk#DI ztGZsLwp4z0ezj#ML0X;0&pUODRf96J(ANS710L&DZ&hpR{Ia}@FtOGuX! z0~cZHvcuIQwmAxUi6t}%2a^}$ceNwcda7S&$oGwo9f>7Yx?V-RzwB`Jh>Z?IUL~4t zS5fkeaLX08r&^a@rYlE%p9A$POEg`tqGk{*Fac{%WlL`Ch!obRtP|wf=P2YQme7?K5ReBwFF9O&_=s(eLSABtoQzX~b~R|*Ygwx@ z&i9Ru9f>7yVoA8FMB@tECro8b0py5{4ntm?*L0i`wCk-z;GJ^5?{nawg`C%XoD#I_ zrG%{+=h^2l9 z_{wu6HD5nA4OqiPeq9IHtcg0@)Aqv z%8OMA+O1eKZmCc>VwHlxjyYMCpxuf!BUd8- z<}PX4*%9X{;#ifST}cFv60D#tylVPcV1vfW2_4$nmv`VM1C(!73U?E zfIA3Al%QP&ElRX0=XZx?spUcq2PEXNEaW!!RLe@v?+m8Ya-oJ}Rf2Xa)@GdFhN(hc zVu=+c9IFzvD~CXqGw1wfSQksQ6-6AY60|FcK$bJ({5nh-^5Q&297t3HEo33~RA?3F zSHrTzd1^Qiz|xjIm90enje{w1o*E7$s$mMbjXjmAk zI94TSR}z8TX2$tfhABf{oTrEbiE6A1S%^K=x{C9!7?vf@Q^Ns_(w04ytwjFigDG*I z8V)3?VG0?IJ(a2C{L8|WAurBT!hu9}G1dHhWE0C%nM&ld|NqxJ`ToCSwmNc8cQ@Qq z33KdDM+GBzG|TRDPmlXf2O7m}DR58W>$RtfMzCekNPBwBPDf@L}zTmq4LN zQ<<+S7+r9@dfaz9b|sgPF2`pp^37}qt+lK~o}G?DUUCV%Ib2rOXgWEZAFm#>(NV}t zE|D{AN-m*bbkXtZao_6Lm0SYjYq+kg(QbddddyabAurBrj!nrWRMEntO*!9pI-qDE zyEMtB^1FMPHlT^M4wm9&XG8y$tbgt7BJk30-=zF1dtC zTC~xOT!}ne9frI(Pr1ap_{%5J1-z159Dz!LEzd`xrjubosL9) zC*=~n^8&*b!xS3E;VjzC)pjhN$p56znk1Ldl@~T!Q8@(sy!lf@{>ObbC%J?!y;zr^ zT}cFLv>E4r6s8P$<*S-pQgR6;5y(fTod4mlEaj^jT~cxhm9(%mQ_5F0x}@Y1 zDru41%sBu3FqNF&IaI!?!6hY^U=Go$wK?a%*RM-*iIql|lw3k3EpnR~=f4}K3VF#T z5Ke>ha#zS`=quV&@w7Pqoncw3(KNb5{t6jQa^9P?$@y;&rc|S8bcx&*GMePPm$b?G zZ-uEsUUCUtd12!fl|#VK&pH3iVO^@xG`gft;FLtnKU_NcMwl|>#d%FGsS`LQ5y(fT zod5c;EOB0=OXRO$fzF~m)h;ITUmHw`^BP?ucZH0GIYfIZQ;Gan!;~Q}&TDW9-4&S5 z9AbGYQ|4{H(yvQ$2^?7lQRT2M`*7*#%VDaJms|onMpK#3t5K{{NkP|KBki9l52u+EUG;*(d_FLsf05$D-QOm+6?Dj!c?%C6~Yy8vepe zR~3ueL2H@P%XG{}hasDZNALc$yym#ARW4q6MYM4p|FLSAwS33F^*qGHkfc=ec# zjzV5?3Eg<%;}Vne9*Ye5zSXfSxrFY#*r<+ehpGYB{CM@4tqwz8oYy@jeyAFz5N<91 zD$e(v4$N#I=XH&VLJ%xuvE~(RBF|2TA+Him6G9Y$E<0X5W~0N97w0t~q~sEp9IswE z?pqzZl1t#kGNMt*B@~U?H%vvY^fqjD6!MZwAT-icGF`C;5CO@$oHKN94?acluIC$(_4doZ*woy zF*_ZGyf{y}gpEs7f(Sov*?l6(Mq&A=bziJOL7UESO!r`E}S*1R^PWQ( z=btgGOPr@%VqJ0xB@xJNW`6$ZVakvf=P8#!qAH#$oj_(X_47{~mL<+pE&V?1_Ydn5=P8$1ms|oQTat+BmrGIq z-|_eV9rvA%y^W5VNHxFHk?6I9)=DXIPuc0ntZ8qfqb5?#?{vKF@#--f9l57_+S8mx zV-gsqJQp3W9`~(|y$$y?r_t1&s-&GCuO742k#EzUT%!d*pplW?;Y2brx7Y?+d*r+ z)|s!e)nUjpcI(y)8<(hHbiwiJao_0xEy;OJqba$BqES0&tz{+h>~t9N;yhIxJ}!~L zX#VNaF&iC*yf{x0hmT8)VANu_1tW_feXC;)+D*aiZTw&PZZ~ME{<~vOCl2aPlu^Qephmd6=fVgE-^Xp z6>TB^sbN{lZFuR0jZ0KXiyCdl`A-g}l-p3nf%%E?3orY0>G%_2Dv{q!xdiXLuyKjX zA&}e5IsfrtUCLKABc$XKN+OWk%sBtCFlETAM$?Rtl1nIwKt3|%{6~jnsYcU?kdjO2 zSRA%yod3vRN;R5Bgp^!D2js|YtmsJ2e>hAT@~Y7^A*AFI%puyj#N5w+s9%@l5;(F9 zFIaL3m9)rmW}N?Em@4EYmw+RtsXUiZMGHS~6>V|;1H-aZqiIBlatBmqc|zgG;^}Q3 z9!#l5(})md4yeqM{hmW4@(+cnLSAwS99Z%f=DCD&2;?@#E%P>4hINVanh~PZ0U1qY zIYzGZHt!EphP*hh86hQ?@Tc<4h5Y-5Wr_0|5mIsqe=1vv{Cfve;=D$LD08^%)1~A0 zgegN_oY#a9xUE*kwv5J}YE_2(yZd!XE`cM<*hZxe@b2aD&7mgp?+R0eyyOx%v81Ww zymVeTf1Yywox`%kd5s8B?tqLYIqz9SBL9xTlsK;uA<7&e5lYT`4nfYR{{N1?|LDbebuE?})?W2Ox{B-piI~}>Bd)iTaNx1Cjw)Rn>=z`PLYki|*Z(os{MDrURv!md# zNIQCsjgHEh_7%BFG{4dDwx_Gt*yqT1X-{eijiT8q7eu^f`qiFh@6xrt)3GPDgoe@h zv_vL|^V8L9>~t9N%2p}N*(w=CjAeq@o@!m`#jf>@4txN`Y?UGoMB7Mo+L>xSm90dc zjSfRz*(yaG7@rsh+L>xSl_|xKYwU9v^2%1J;nk zV_oyp)obi@6!KC_;7dZ9N-w5RbiwKBwZ74@C$)s`yzps>w>@3G#zsdWFSP_-VJ<6u zmqO9}boCnh9EH5p61wtYt6Z?1sRmdVovvQ%I~{vcOX$)IpO(nvetNq4&^2~C40&;$ zvuL(@15ryc6t$-!S8={?bl^)pa-MfyAT5kv1`Ek~k44FOHaZM>ah@U$o0h0hgj&s# z`{X?P9EQ9&PYs7nOJpcQ4EIn}k^5TT>DZH60^brYjPPlR$$2km6}hjm(^1GvEwPe{ z!!DPgLq+7a^sA8X8y$O6OX$uEpO%=M_kuQ&XQQK#ms$d`hQBa5FBdldaOoQR9EH5p z61wujrzIxmy__}V`%cH6)DpV%!lxxB=e?j!k{Xw;@Fg0LP^B@!=>Zjg(*W`oTrMzrX?zg zKxQ)I{PAyxWr_0?ap0v9=T*?c)`auNzZp!4^AvITv_$$-&hH5@)I zF*)x!gmL~?{ko)Oo}W@-^;B83UZ!hobL5upZ@H!TkZ{Sr(1lqHw16GZEl*{tw)9%x z=-Ahm;#0yfru?QIsm8kIhpX4v=*TVI*OsnzkBQQocBC4nE;wAh*7rH~C6; z7`0>8`i&Vt*ykwZC6;7PVv6GL^`)%~8lpETKs>=)%PBYDcQ|RHh91zR|HS zv4m#Ppbrx!E<0Sk#zu!BPd6rE4(=WgQwXy5RO`~qbmdy#=YZ3Sbakz3OsYgvMGLDI zj1qbFIShGmUe}n2QI{RAUSpfXkQe7EmhfST$$pPThJ4@X*q2yBcV1w6V!(CT;p#Ou zItqD-CDs~6Q(_6lq6-dJul0S7eTgM>Hk5*VyJLmUCDWMXy^ArU1OuekQe7Qil*+C zs6#oAMaB8P&w;C($a&48DY1kqT7>Qy=h^2l`N@6 zD=#oTQJhyrJO6O$8v7iDyu=a+tii1kOXyU5{^8O!wmAxUi6wO7#kNLql*jja){yTT z9s3eX=*|mFPmG_4LuY#`=bXsDy-S+*SK>Uy65A3>l*jjiwvc~Ym&@UfF>;<_2_Kf2 ziL1A0)6c(kFeT1YEV125rkBU}iZ(g_mM~?=i}Msq*sw(A5UoVBde)GCbGNQNi6wAN zZ!n_75=tUaqFEA=$iFE}CFl1fmRM6Pv9-G;=vtB6SQ1gl9}de>6OCdC8G-^0}SRyN0*s_W?k>3uc z$oUmD95yUb747`PrE8loWyq8BxTKdfE$<$AGdS;u;)eXXUzfxZD~dR_C6-V{JO6O$ z+A2&H@)Ap|DB{>w7O#T#f)AIjy>VEUjwPt!@L`GM8M2%i=ie}x(y;_J96l^DIqwy1 za{l#Us*smhVnqqZR_`AUBa-u;Lm2X}8`h;`35qy;SYmSClZfQ}Yr~WwFV0iM;lmP> z^PWVMw|UL5EODM14j-17ocAmuIsfXxlsHcfhYd?q4uNXca)?C!Rbk4I7w0MAXlPR` z!5pI1uroiO{r|t-$@l-Au+@=!x__Y9tzk5Er=x-qe2)dA+|!f3(}BBZ8M`%&rUdOO zX<^Hvk@ob2osPFIqCw;49 zUvde2OSm{iAD75ibAG&f!d6EiFS!K1B&4Z2jo^!U!SU)z-|5(wTtd1WAD75CbAG&f z!cIpaFS!KjL@q09G`DCw;49Uvi0@MpNSwRnpFn zS5MgLFyzH~4WqFdjd}hB$EzoOrvrB+lk=KJV>KFqn;);9u+w44i}RXBW8)GTjONFy zCv0>W^5VQ^(bWBaENENoo^!r$b?i$np-V4(Tw-$G!;vD$30ob7yyOyV4Wp_1|5VW; zc3T3L-sW20>DZTCLU&%Qsz%cxCStc`VDdKC*y$+bC6_=d5}dCZO<&Qr8=@-7%m~6p zMQf_ni)0=1tC1E`fS6Z6)WuNt?*C(_zSq^OQ@V6BDKov6J&&(kAk3bQtpD zJmnHA8K9goz4r4io7yTW(|i)R6yCzb?ro)|wGgHJX%O$Zd>ViTn@4R3R_9#9A{#sz$>OEcH8b z8*`|I{11j@>0Cl1Lc}R}cV-dosdh1u|NdY~=MowbB1XZxC+EGSP2|59rV4qTB^ z?O|Esyhemnr$xu&usY-Xw+2(3pDP9O?Wv3<@?Q^Ag}mevYt0A|rI67i=RJuirBxdeV0~FpMbDRV0M8mFX&e>k^C*x-A&#WjbM}Ba@~B$t5(6 z2A!BN&<<7Wsn{VmcEU!7Ax}4^Sv2UwEC!nMideKfm8p7}PWo2If#ed%2jeHoOSD7P zSl9e`^@OdCLSAx-m2Ov2rO^&m!_)=Gt0#S@<3Mr=33Gg0B6&7HUOi!_qmY+eLc$y$ zm&o@LLEPTUM4pX~LSAwS&7$#fiDX$jXq_V8w>l0amq1lOg7r1>~Ojk6Tf4OwRR!1Q(xrFY#z}@3wx?<4< zUoM^WosI*^CE!;ULZA~9wh+3L^B#;6d3HJqdC4V^4$>4~K!on(yvL&CJR2Q_yyOzP z@dACASXKL8sh*~s?^_)Ql1qS7Tvu{lG(+f~ah|OXLtdQUaL}HOOH9stL0g>fI~|zr zM$S{k0d~eOgLmhCwx{}yi99I1or<@SdB%Z7C+x1o%~$0gFCBDb+5B9VV-m@?$W`87FT;}Vrbz)M>}Tgbm;Se7`y zj@$-rRGe4&0b4W9zj!bu&Qrx<;}VraAj_F?{zYNRkQe7E;;?av%pqECW4XN{|H5uv z`;tpQYZ;uc`~Q@zBDa}w{smzwk>8hG0$NL&NLG zjPuVMOo{VUarn5z3%sKy@VO`=pWgIJ0v?u31 ziAc^rJ4_k!;ymxXz&DQKsgekkRa4GCYgiUJzouLQ(LQaZpZ6whdYfksrgSVpxrB{N zR1SgMX2$tvgegOwoW~`-L_YKR%f4JXd3wJt$t7@2Zy2+2iQG`+HZ#sYEld^il1t!{ zUYbhID~UjEGv)kKhh^zlf^rETmzbRQQ}IOpDT65;OHeN1;}Vneo{fB@yuR=1|G`sQ>Ty`~Ob*PRGH4{FtWE)Cf#P zqxtdb2|FE?H66&0X&Oz9z*I152d$H=iH(lj(*x~k&Z4RN{}hbcL2H?^2-3GY4i4nU zG>xYAR3+{Fc=d#>j(nRAB$tph2aOo{9t9)BTYE7pcK4kQ(r zSIp3{IBZ!-o8E?<4ntm?r;1~}lYU+$EzTt@)k);p=rH8Pd5SoETw-RfUeFryU+A)? z1IZ zZS1MSPa^;EFlES#^AvI58^^^|JB_fXGG)krtY4Sp63|)(QA;kNBmyAvRme*& zvC`m@^6ttZklW1t{0E11iSrs=Qr=xj1kRsloc}yOIdxBU3;B@USd# zo+=IyU4CB2;;=R2{6m8&ah@s;eB~(4tE5G4GxPIT!jvH|&TDW947HwTJ9Q4vx;#xe z|Neeml1o5q8JsWgE}G3hT{?MRm@4EYmsn|XNqKkC47rUt)bcj(9hN1|YjjC@ckyig z>C(x222m!ZqvLSP9hEL;Q#-0yG(TNEWuqf=ro%0FRJxo^?WjT#BDcMkiXW%! zbL6{pD7Az}(QE`!J5!Ap^rF+%)4tPjD76H_G~vo@RRt{~w-vO?nNHd1Fy!gQkPgyR zo=PYb!Sh%s%6I9sZ*-vl78M%uLE0)W#{GmXi$%(rPTA-%zlGYdtD zJo_AmJiXXjx0qDCzU*}MwC{8rN-d#FFE#?HovFsU=BKNt>~s|JQcLL4i;czWlzb_*S9@gJ!PMxke6CQS6=wE#N@n(qRN?0 z`%cH9)Dn1ygIpD_mGVqaS06fMr^ApJ=c$(1RJ>LwLRDqtD&+e{2l~Y1Jk=85K+pgd zSlYIyijm2AHaZM>ah}6yHWjZGicq0hqLZ9wpTm$B=QSUs)DjFuEpl6$W61ZNjzg&> z*1Ys$U1|xXJo68iPTA=wNhCyiH3~ z$^$=d4mJHeI~|6+I8U{NO-odKL{&BAeBbDRpn;sHS^_^zoL7K^tr_PZ>~f~VwKz|; zgilMPKgBupjPp+nQ--`aPqlw zke6CQS6UM zfz%R^T86hNwS)pBa+?|FA04I=`2(pXAho2a^z(|3?GvV%pMTV_EODM{i8Vf)T$Hys zi%8CU7LlC4YcM6wQ!QcB5|u;DKU_NbOPEUJ52Th@Q!Rln9Aj1ObESH!Us%YW{CQZH zI8U|2y3`U%B9Pk{x#Vq5{uHJRd2ya<34G!xo+^nzK4K0vBkai^hh>TLyz>HqB5g54 zg)Q@_LjL3rgDG+zN=p)gTZfE>3v5pXmpFg&`!Hq5lk-qo(o~&F#QIupW85<2f7h=| zY6)F>;nNbi!uf|wC%+9-g}l@fy7U5Ly#p;|G<2x#seYo6PyPR$eE;7m`y7=t9coL- zG{4d@H`im4w)B*3j@;72L%A_cqQPB0JZP$7&0|q*>1p5SIMkLRA6)ohS2`*h%@0>k z+33hEJ=B&;m}6HuDi&RExO&?6ISwV3kS@oDB`O%r4_8mw=P2YQmXI#Th9xQ%%@0>k z+2$zZC6>rZG$oc$J$upN>S^ETIFwid`5=*}muN?-@h;2{S5MjKFy!UNG>WFg5{g9# zx8^VOGF>_C`y41Sn67FTO^GEGj9|+ghTPaG`y7V6LbqnolvqNsXnweQ$~K1~FV1Tc zO^GEgJ6t{O8y$xdOCTQ%)FMWgxQ>M0u?g}lTP$Ombv+!za5xG{@Gg?!)VIFwjI zS6-9EH5Z61wtYQ(}qR9gMoaB zB~;O(M6>KZk!Pc$ke65j`5;Xt=T*?6M4NKH?{geVETJned{`n=L>xa`0+z_L&r!%r zEP-^8%Sy3?8np8dmrmK{DC8xU(2W;9EHOFn^{gS^H#!a_mH^CLS8`qzEsiB-oPT$h zG#&1V^Ath@b-tOu*Vx5jBy2=U_^lr&waMlNpV2 z2>AIK=id>g40&;$VhJ0T$Q%Nk_j=Zle|x_!i6t;SJCHB2gpvs4HZ#t@Eld^XC6>VS zY??~WD~UjEGv)kShh?cm(}WPaTOv8{6>TE_mcf)tG))MxVToi}`-G`BF}=;3!&D(J zv4n2C@L`F`dCwsX`8N&gQi-M!Atjbj5`o-i#`(iAWyp*36iaMMETJR<`N)*>2g9=D z=P8yzF(uBcqJ^I~kDAEu52nOiC>b~`LfoY#a9frN~P z>}pR%)I@$Wm=fnTAw)%g`&=oubN=Dd={igm@)Ap|H6TO~wa=BpRC}sb8S<-PUE;h( zga{;LG<2x#sf;D^Zwyn0yf{y>#6}pwBlFwK_Ee?{`8N#966ZA`L?FQe_p?2ftwjFy zgDG)d6G8-1`&=pDYENY~tt=f+_B)rqTGF zj<-EtJ!7LodkRzB(-oQ4-fNyqu%K6Npt)q!)uL&8O_`M5+njQR2E8CxBN zyyOxqa?Qsj(qUY1yn5DkIv+ba@R%OU{tq%M|`mq&-IW{g)o_2n`dd60VAurDJ(hD1xC{K%c zYr&{E-*q~$EODMH4j-4uR0p<( zVz(tahJ4rRK-MkJ^Lmx7vgdZF8vb>Dyn4n~MA^1FSc@5?Q^C0h3%<`st9t%Mn@qpxrDB~uyKhBMra++ zIp4K9@Ds&($~bIXqDorSXy#BecC*!C$cyva_%PJRCDNfH-p)9G+I2dxEODMH4j-48 zocAWJIDg7chaoS{Q^n!q5|i_5OTdKuDH|Pzyf{x0hmT84&U-;yoImYa9k}8`oR^HF z}2}{7z+njct4lGNYr;5YICF-SwpSKJw zk!Po)ke6IScV28zdM}GSteMISDm{`zlbb1){( zQ^sNA67^Q1Mw@Z|S7FMK7w0#@c^{XUocD^hkpJbdEODMH4%9CJ5;B_Pyk`-K{4WMm z;yhIxJ}xmi?DdO;PiOG2{XpQqf>(?c@1jrwSRb4Kjk`}qmjPpMY zQ-!?b5-Z9$Y+Ry}2>5w(sD=DbhGmKKRB_n2L}d}E(afVJ@;@F-u}0IK7d9?YIRtVW z<5nX7qcBy-OD+NO6D%?c8I3*F>to3Oa9Ec(PZ`I?#wC(m$Zaf%NN@9lFlES#^J{Xx zE|;ik%`4hM{`uKMHZ#tDEle5m;yh&>J}!|C6}gQi5yknh4$Bhfsp5c(k)PMGIBZ$w zOwMQj|4zRD?~IKOZRr} zQyJ6CbjDVPwiKqgr6{myDmPNG=z`W4c~N>ELC@t7q(V6!MZwNSNc}61ko8slT7iE?AQUPam9Wyh;$Y;_p&a$~w(MQs9mu!q#n%SI^k#Fys}wb-jvSz{`$T&)DcNBlfmmIHNIqO;-&^Ib{YZ^_- zCFH~qx~-y3Z^KqcAuqWE?m$~!>#dDTOwM~O$^dfKbvm#tabDACY+NFJ*!;_-Gj=)( zdC4Vo<%N$+OwM~ON^iqPMaf!)!uV*WCpLMMc{6uk{lS{TXiX&SJUePA< zY;_p&;`{-RB}y(K7Y09X`Bx#|bvm#tah@s;+(IAz3Ko*{-lSE!dd5zNAurBT#bM(T z<-*!GOcluzc{Vx>d2ya14jY%qdbS-uTmEIpcdZTxam9JcIDA}U`gyNt6M42e3VF#T zR+Mr0xWweV7qo@^YyW1_gk_2IRB`yY#N@nZ5sCb322)B`sp8nyC=R(W98g&PmB_z3 zOcnByOX$W6AD5V%_j=Zlf7P%qah@^`8<(gg0v_M8`$YbgVakvf=l98ZyIX=u1o9Ch zS0Vq3VOiolRU9@hQ57w08M_ksmk*}Id8#ZbB{-yo8B$ojB!#S5+LKQ7?8%t3W`Im&LLSAx-6=fVgE|Idk&GvKPOBX^5Q&29NUshunYp{J%=#PKf7O-p zZZqTjv%*v%FS*2uG7cY?n4I?{qB#G|VOiolRUAGpF*)y9L~{NagDG*IDh?l)n4I?< zB9VW3m@4EYmsnB6VdD~&L!gGW9Kw))+ORHho-z&_m#8EH)$Hi!qyE3+@BcgNJ01A$ zpCjGd0mn(*DfOocMX?sQZznjf#8vC)xxiqHOKN*7@Ab-es|^{j7o ztarGlYfYo6`~OtZ&W~5m*y_l)2_ODzT|Ad4NxOm(;;p@y^)~gL4tNFG;<-dg+HZfn zdd5zNAx}S+(`ZW4t`;p~w*{koo7m_uLjK*p-X7?8yub%at4t)2oWrZ5e#w98m&5u{l*y$+bC6~~p z7r4SFf>AqEt*1$)z(z+QFS&&7ynsIo1Blr6RHh91zSXgoTms~elPasxnBLEiSI^k$ zFyzH~4Wp?VO~DB9cFOs_(}DkcAy3X%jdt7P)hlQ0bQtpDyr$9A{eNoF;)G(xc{Vx> zd2wE|XzKnywrE@Iwgk+O?^_*f$t85_g^f#8IGTUDbjDUkAuqXvZoPn1C>YT`SgNOD zM0uO8(*Z=pdCDbhT%t~s|Jl1u2)3rK~;bhT*bpDvxT(NV}tE}=Uwd|YC3 z-V0hozH4>hC(6%LE&-`ftP2s_o(it46xix8TGYYBbC43;8b%%M#})m#}e(Iu=Kb zW+iPR|HZ+SavNTH0jZEUuN(rojg_>C{1?JhAuqXv?!54EiR2ct9P_7!{O5;tiSwEf zQj&Hh5y)+C# zpSGu3RwDngFjdG)F0rOu0t|@Xh1@1N?>U5V{-eXX#Cggkppy$@$Y|(L+f%J8k^e}T zGUUa1$|W!#N9KhWrP*b)mh;^i4BD^ai#MmSSD)X@dO2VJeYFdr_vM z=K`rv7-%0X)l-=&&ObCPOPr@%!p9|&XUJ`4oWC-d66YzGfK(_J(>_?Lr&<;{pZfnh z`u@M`eWwH0K*ct!biGRLs9uHn>FRZMI&w#G4U|mr&ez&ey$Khbu3qmO9r)~@wsgHp z)n$4W=BKOI+32X8>5w~$M)u;TafOfe5&tfsXnwkSoqdjam!y`^C>nUPFwo9a>#4|5 z@6z?Y(}C~)^%LW?WG$cC&Q!x#dn#k{V%OQ}Fy!gQ(8=acEH9QF1rJ5}E?w^%9rztu z7BBafOKoSWv8?&&>UB0c40(F7m9AHjOGUgUaP6snq2kAN_BjlBdNEvq#$T9T?6T9< z>wTvK*Ffnf^3qrN)OMyCKXHD#dYzq)LSAYK&7x_Y_k~i3*Yy7FseWQ1-#0q&*?%lc zcV4uP=3)!+`nIR5*V*VOUTgstSf z_h=Qlud~r%$SZPFEdixaEDP~EIq#t;k!PR7kXPi^e2}{TkFd4KZRwYJo4(VrmRbVj zk8HIB?E(&w+lr3#Htcj1@={CS4m6rd&I`8-K3uxqH#+dyKarR#+CHILuI~|6+I8U_%WOCxX@@Z66=1`0CeWL^MRLJurt_1B$r(w(3mB>G}%bC{u z;yl$7J}r?8oPW4<{V8F}kQe8vmhfqb;ZNJCg!xlL{>lBiq?Q2rgQz8FR}z8T#*+I) z{z+k~ke6CwO|^tiOQb_ZZe#UqA^+g8EODM{37eLvECL?i3fe^eiGwL+t5i$av_$0) zsL*Dde;`a1@={Ca$_ty8sN4%#&Ybi259^Yjr&_|MB`S$PZZqTj6T*}sFV0gff$2En zypjmyBU8@bH!Mq>r&8x?*CH~f!t=s`MbkZAuqKA?m(ld^z)Z|xODw-!?MJAswHe% zqOyqjhfCKVJD3vZsg|&5iOM0`CrlGR|ClgU$V)A;rdk3@q2L2D8s-q~sZ1I2j~><~ z&QmP`nOqn{M#D(dp2}Eyn@5EyLtdPxS_0E?#8V{^$V{f3ziU{QI8U_%epuQ{&U+Ri zZ*%sS!IU^pwS-SgOwM}_As&Y%4;OcnA{ORTAufKn*Xf_Jy4+C=&J)c@bf_y1jIpCgkdTmuzZK+dAs z&vaF>XnweQoo$ZXQd|QiQ%s_3OBsu>xgLvZORx8hj*Yf-rCBt!rHV#yWA~Qru z+Z=_w#1c7)rtbe!J=>01i?oJ(-{{y#ERnNlN-Uv@c7C{eosAAdo^DLS9Ngg}*P~d3 zaBJ_SUZyM8`#uL!QX$XEvBVMzMzCcGnB3TP_BjlBabB}%N-UvRG(TLu&Nhc3FV1Tc zO^GEKi<(2TSY*iejgF1P61wxkh9xQ*%@0?vv(Zt=ODv%~FW}DP#uSU%F>5iRknj5( z8;K>*$6mav4@+dYoFA@UXP={xmsmnqUih#?#-eu2TFX)Zxz096Auq9nZoF8RSVHwI zLie2WeWPO|v4rltfKVvlLcnIUY)`eW^fqjC81mx0M$wd5LMH(T-BZr@eGZ(zlJlBH zQ(_4PBa~>yuH-!X9EQ9&PYnlzLh%bxm@(67PxT8EdA2zWd2yaMUO*_cm}-StxUl7^ zOd03$?9>6)j3M%XAWXHaZGF=*kNSg#vjTMvz3$5%ZIIg!zDJKIzJLPP#- z!@AUR(d3d6ODKszZZqTjTf>wgFV6GMi*<=5ltjSKPdWdVVOiq5CYO|0LKQ7+%{c$& z!IU^p4TlX&R1SgMX2$t9g(*W`oY&lv5=$_LXt~Xt^N0PqB$ilda!H9LRM8^0nQ{Ig zOcnAHOTbA5BNPjf(In?Ri73wR56cqgHMvB=5E)H!-m{4G^Lv9SabA;4bhwL*COPjp zL~?#NOcnAHOQ4O-UznU%4uRZe&iS2TUE;hZmnay*yVF^;r+R_X+ib&>AurBT#DVcR z@h&0@$(D;J5rzC_Se7_X4F_tAw8a1ktH!QGem$5H=c(baVTsBiklPry68Tk_GUUa1 zN;sN_Y2Kz~G{LRqslZBall}j{-pTj>-C(OD_Y~JaMZA?T2fq*zSXgjTw<+zOqAfXL)G}P?WxwKx9J939f|x#a*4I>F;RYl zh|L77J(a0KzVCG48YuooU1OpIryZ)svgXIDH`wV&yb>JPN{{a)Z6eQ3 zhas=nO%;caOC+}tyUm{_@@#Y%@`~LWTvBogywi%}=1&Xx>wT*O*FfnfQX*7x3G?`t z-V6EbY;_d!l1reK9od}S|5w%8jPuv~P6w`m;XwgIEkKc{`w#KtVwbSG_v^%GhZ!_?>U4a|NB0hlUzcVUii4g$v7ex5Q8AD5V%_aq`Y|C2Cf$cyv)%Wr_1taUiUwEmmc)WmyC{pZ)(k`ToBfY;@$7;u@$( zC~_7}DPUB6Ty(s8qi=QK8YmfKx>{SRigtdydV{Tw+|unXw^Yg;yV6my2nCkKqTJFO zeWwEl##$EB)!I@8BiJ&BAvbn|osLYJa1E49@mQkd5{gChQc>k2pK;}ZFf&W~4bu+>q>OD?h2Fq)D}sQPFJtyOar@_nZR*FbSunnqJ{ z2?e8e&|0<^;n`)$oHKN^fk0Ba=zpe3P!MH!6=bur^ApJ z=QWL{n@bl(RD`~pXw>oeQlzt*PUvdeZVZzUwLsjU$!B&SMFV0gg zu_?KP3R)Z)PdVRrI?#aAvdH<8OQ@oStr_Rp=`iHQdCDbxTq6A`j-M^NPvqI?FyzH~ z$|ZbUV&q@#_}ToaA>X$;wvtP%DVKmwF0vCGIy2{KPvr#@dA2$VdC4Wv$__@9TtWpc zj-RKTe`A+4;Tk9+Pq~DTOJsK6zG13mrMG#*U`of&TOL1`TtYbnjwNQCe|?xL}Uu z&nqwBMwPCrqJ^y)=U+XT66YzG*z8ncmYnyBHaY*QFlES#^OQ^YxWweV=McvESN7|Y zTmr4^*v67eD2YIBGvoX#!c-wIxx|`siA~8RRL~-~nR5Q+!?MJAO$f1ZiOG4dXp{3V z8%&Awnh;`lOR$E7pEqtL@-Gcjg}mevxbB5uCFhkxT=eDAjh77T66YzGfKD#Rg^Y&B z;_azk>-07+4pWA_IIj^QC6`bVfqcXqYDL%=4a*YeH6f(r5;_%!E%T^}{0j$D;=Cq= z)ZG%wA&}b`w-WgmgegN_oY#Pml1ng$XvY%9E#v(2`*le!fmU{GW633yM9jZjy79a) zRme*&0Z!6X9ZN8YKyG6WwUB@Auq<(26GFtP_PtV2qdgTN68Yy0ro?$o2oa;&_ex=^ zJ(a0M{@G!wke6IytpOoo6f&Cpio&5u`au+x!Q)6PmqO{ALL>8N0Y8qKiFJ!PXK_jE^ls#!FCr(=3F z4@S19eXC<<#XZ$D8o$#qch|#F`Z2aT@@?9YTtd>Et=t5nY#us!Fsiqy?{w@)E}>~O zaAQF=*s`a31@diTr^Aq^A6rqG?{sV>m(ZmbHZD=VV}87P zgPo2)w~|X>cmqu()5Wrjj#qE=t&Xka61w%m$0d?n z^W)VUY;_p&jNQ8RVyjXEQI_OtPsP*XeBbFn>K2s(#y9XcCez8Y6>M2FO3t&>VaSW~ zRB_n2LxLy3Eg_(;}VneUeYG=Y;_p&%2z3ufKE=n zS?A(7OPF%L?{uKXV0Fw9AxI3)YD7e5h|8B?(KR4yWs|9Y4zdlr$%e|0dWe3h48__)O6yyp#ID}T%vB55Y6VFF5UQ2m@?$WdCDbxTw?lpuV~BLd~sNoI8V6*>KAcd z$Kvqw=1~**FAS!{dCDbhT%vLa)M&=7ME>((%8(c5DVMNuiOeBdZe!ds&VR07m*f&? zWydy_TtZ0%avO7~$@$NQsX|_I3AD0lDmkz61G$Y=w1xa;3^OQ^YxI{Ws zB=VmKQ--`aPr1aV?jKEjNF87e!6;-osQhm9qlM0K24<;Qz*LNboFN6=-AoSsf4aq5$$mn z!C2IuDxuJh-ejXAbEchLol59>6{XJYOf`P%{B-pu`yBZ$?MN-5Q8XyG!$3Pzt*2U5 zd9j;)r(;KI2@Rt`!5t-{{zpS^~oxxQpq< z6pLWX-pfRujSfRz6`HPBQR>{zRO7eKPgifU&tb@`Leu3cC1}6wboFN6>DZB4LYH3n zv_z6?e!6;-osL3YY6+yKftH#_{!4OD|TEl(aL|__5Q|)rW4f z(_zSq^O{?t+!--8Iq#vUINvuq(8}Yf1c%X}-j2n<7GKKtRKGDf&qjwKFV0iMv68!L zpDTsY_Ee^nGu>pL!;lx}HHxOx5|@3rbhGbt>_{!KqKac%Y6-oSh}`B-l{4LBr=yUU zS^_%Ph4VfwF*)z`Y$4w_I(DR%(480CQcI|yg`c^ADG9 zvd>Y-OD&-*FKk+(-pTd}Q*EO8`M%S!BejGsy|8JCDrnm$Ol2&QXQ#uE7w4(sfJ{z2 zmFO5}$5TJwH#+c%J~_X`lekh#=%s|M8Rwtb`Kl*WiX}Oh9VA|mZ(E<TL6mg)R zt@o#TFJ90l@(&EA#CeK1wmZxmkkQyvz14~Q{b98Rzc_Q-!?L5-Vyrd|F~=_MSr+@^=sG66dMn@M($3c~2q|`NxGR zLtdPxiep=92_+H8N2Z*A?653xo+1vI66dA!f-Uo?$@#|&ro?%QIDA@Sa^7=@enT;1co;RQA;hMBm#ckl88k9t}s=|OD%x}kfxIJN+M9h zPB@?X|2z5qznkoHWYV;|=9cCxni8}t7R?V=Z?esiTe`dEmaa94rUdPm9j@N&8y&ma zQd}{y>;o$9U~M~6t*3IYxutA$#3HNFBAJ5g}lTPcyYL_Y8>+oogc2=WSgUqmslbv(UhQFvFM`1)th~zV^?B{oJC`T zc4qhU!_}K?bQtn-S;_g#~7PvnnhEBb_FBYvKKRvXP?86 z7w0vLrUdPZMf1ben{0C!^5VQE(O8KV+j-&P>dn5TC-@ZL`%+lG)m61(UHjS zNGyTUF344ab``W29IoE%`y4wGOX$iAAC{P$_h6LBv(J&p??^16D=&OlVshSNQ3epU zITHCDi6z#WL{s9|Hm}-axo2U?=B28YS{|Vv z#Xu{}lJj2A8uERkV@F~MOm7I}twc-Cdqtbbv(Zt=ODuttL7GZGuYwk#+iH$NzVCDF zNGzc%FML>Ha^5T27$=f$c7?W$&hB^Dy&;HNC7LSQ`G-q4-x8(@ zd5I-ZkI__eUIi_3nB6o!> zBsuTL;^l4jhGmKKnp`4(1zRkH+Ee|;M1FTLCC*dB0VTUQuTybkIWx}hgegN_oY&kE z7^3)@IYi5HW}MId|6lLq`~S|_>c~Ca-BZ4*drWFi{i*gL_w>B)bYSv1^Hp7AQhVx8 zWlMW{&Q3>WO}l%_S9Og^?WsSNDZNeSY;@$F?(QjH)h#Bqr~Xu?>TNpjTOGTSOW=x$ z#rdG*UI?;+_p+xlrr3SXR!1Q(xdduXnkqkLPi3l*?>im4l1oUJgNi$r)ecqbsca?k z>~s|Jl1oUJW8)GPjONFy=WKKo@{&tv7R_2A2od{|mCz@AVk^BT)m<#7d3HJqdC4Vo=Y@?+R4|%(JZ}B~s|Jl1u2$3m=zAZlOjqe=6i}{-w*Bb|sg<@CN?EWV&()oIhJZ zTgc!1bC=ESN-m*GFMM1g$%S(XbErc8=AXipAurBTF5%-6NiLivSVf!2-~8jSEODN4 z33xtn9{;u+@q!PRZvJ5~CC*bWVdD~&L%`!(dY{PuK1>zox8gkI5;iW81#PR*rkua| zyMA4gOROoEK*=3H47m**YI`c8Ci1@xQ-!?b5{S&fh=b(3k_gmjGtS@q&9E%iXlu$P zd|V=VhAd~o`J2BUOtD7OofkeXF*)xgZ6g1xFjdG)E`i|<{DsMRD`q?Aj_F?{ug1&kQe7Em+*0k$$3vAit|4omL<+pE`bm&rmLhymSe{_@-{br zHkcCUDVMNuiOM17A1>YeX_zwP#d*plY+NF9h*qP`IscP>U6M;+d_y2#f_5bl$Z}?! z|8bZqjgH*XJ#A^uqOtq`=>0D`UOn$y z9eZmXHIZuT+S1z}ub#8jkz2aA=9WsCV^=yV7Qyw{dzo(Ryzg|NyBq21O4Dd+OBIY@ z%U(>qOy}%$WYV-JxkOH*DY=9y+WGP7IU5~@Jl)ueL_@v35UmC5crTZy*~@g^w>tJD zmp~q}_#RZ;!P<7HT2Ez6q5GVzjzV5?iIs-Y)ct>oMHd{ep7))OJ;@~`%)u>_v6#z_ zSI^n$DC8xVkTA!`CEoUU^_-24LSAwS-FSgJCSz5XAFrPGt&Tm(C9nn}pP^e7?fiK4 zoUIN+UYyr3nvzQ>7Qu~8Ip23WAVVPMHI1g^5(-B18g zTtYWq*tkU1v+WzEGF74byl-{vN-m*0FV=PcpDNn+4O1CQ3i-a%0ksS{PZb9|a@yic30qdtrk`i0!;lx}sp9Z)iCp0P%cXNRIt+Pno+1t( zml*4VpZ9v!kndX^yOK-bBb;DF$t83wKL2v*oUM*RUUG>QWgMut11%gz(4n@c8S>jc zX_8z*S6<+j$*=`K&p_0k%2p!38BD2-MiqyROH>Xq|8nVk9i|HTluPi&i#5J&TMBh# zG+bYMs#O{Ct6^PAR}Z5^TbEoyNyPlirSmt2DMMcAD&-QWw_{z%LXz`d&=%+4Ff2>y zD&-PL9nw~E-YeQf{`G??m1vYppxzEs$Y_%DUeTtXe_fa|5gf)=^Wl=H71mZcJnatRxksET&} z<yxLo1A}1m@?$Wc?}3Dxdd~Fb}V5zgmM1G{kkNVSZhQ`$t6_L&c9qbe^Hn!gsp!IU_!2_ecHkkM3@W86yQ zpC6_QdC4W#8W5t)p?$9ut7=cRDntHx!@9(IjR+~Zgg=!rdYh>K@A&)wZuOmxy*=)! zlsTAvtesRenjf#;Vy7dsroBDIZe6cZJE;~eVz<4R+S6NXbmX4yX-_qaW>Y(aD)jvA3sl30<#JJE@X(e!P0lR!6=~dy-2?nzK=Z1F@UIs67=u>TT*f9ea{XtaQCf zi6j(_V9TOWzD?|O81l+jb-jwR!FH$`+c`g8J!hlCkXOE{%T@57O9B)AqWLlN7xp&w zt&Tm(C2&%>IG>Hu!FH$`>zW_0p0m|a$V)DPb3&R*@1rvJ5&v0^W)WXwmJ-X)o8k1MQ6sG2%E}gT{VaSW~6me|${z*?5-Z9$ zY+Rz8So?^njAg#cR!1Q(xx|Vx4j-4u>>g3v$W`8^?{w@*E}=^=d|V>gh1hMWPI8`| zjzV5?3A}f~dA)#$-Q-q#s$VESf6hinAuqXv?!54EiR2byxA{{;zHfExNiLyVFE)Au z+XqYHyqC0zJX;-xyg0APC3*wlk?By|Q~kt3zVCG4`zdmMmsi^B1%!p9VFh$t6~jag3 z%sKzrVO`=pZ@sW_iAo~opDvw$CQKRf;yh&>D7eLWB@xI+j9lgCKRqlD&!@XSW(8Y-p_nqNd$5mOCk#Sj}6Nb=c(fGaf!?#klPr$68Vn~ zro?%wIDA|pa|q-%=1I zwWA6}7o4u%>Kh&VYwoCYId-F?V$u9`^%ff)nKSLLxud9BxU8zsNV)mx>Miy;@?F}O zS|Uf$lv+Ze=%UlrTYaZvUuuaQMpJ4D<(B7QvSJ3%yIX*yu3i>BVvwO{pc6PR~zQZ?Vr|$kU6hHHxOx5|^E> z-s(FY`%+8j(hHxKNOytAZSF#m`xZMLg}l@fYt5o5wS+=ZJ7=vVppfqy9s5#C=+28x zsU;MP=BKN-*yt$arItW-9h}Eu)6)97>~!@O`y7S5)DpV#VpD1fg`$g2S8w&5j(w>m zfHK#`G8XaLo(ir+o}CUuUYyq~no>(B6d~GLJzL24jSg^$oYyd#QcEZn!Is6M)GOb-{^oqjGU)h0xxs82%JjTQ@x6b{B!%9=}?@fTEeF#(x2i~ z!u)A+{yAaFkQe8vmhfqb$$2kl4f$vH>ylam-{1rzN-d!z0)E~K+C=_YVXBaqT4GJL z#HQ2|s%KH5O*#L}VOh#;c;|&pOH>wt+-Ao4XAGv4+fXgBscDJIA?6=0-FkYMD&(b> z(3KaPQcI|uMQ&p`gdzX5VO{E?@zRS;sU?&|Ah(%u{;6TgkXMCv7()9^sU?&|ARn3f z`KJuaQiVnl2V#ymUuSY&&?e`fJeX32MiIwmCz-B-7P-xg^G^y>hP)~?YB+3KB6EmV zp&7Rf`3L)TNiDIWieqD!OJv8z3)xG?;@Fg0LOBGo9OG7cn+LSAYKe1XGXnBHAE1aceW zmLY%Nur8fSXmClXC6q)U%dsRPk$-%cGUUa1%`GXlgpvs4BSx-5{@!6(;=Bfzlv;vW z#04KN-MVKmCC+PbNvS1tC=New{xp%lJ4_k!;=IO|lv;v0L_2++bN+Gtx}=s^X>duY zC6q*%MMp>^^J~wZ7I$Pmp2oWkHb@6cDQ}y5`N@6D=&OlBH1-RT)o9UMlP-{-*LB1<%iB@k`{Bv_!cXixPUlk@Cz81mvg z#S%U&kvR`Sx8+~yZP?~8`N?xZ*YPURhUU-g3xUlSR&6x zMF0GSj$?_j zq44l6wmAxUi6wO7g$+v-Z1D4Q&i9RueTgM>=YqEWrFR_Gfyx5djLOBFVG~<>b|Gr^e zI+mbVVpC!XB@xJNj9iKQd&86=FV0geVZ#!YL?9nAhg!(LXIPdvPq74YUHN%cw6J9r zZ6g2f!IU^pv4jsxB)5>;m_JSA-xa0|d2ya%2_Kdi>ubjnmY^E)@9ftlu>`)s38I!* zLKW@&!=+pA2vdc;#1d7;ylF?HY`zD#QejhTW=doiSrao z*sw(95c3b0ZoM^374i~GtSOe*lvqMJ1ag}>=if4{OPr@z!iFU(i9l{M)yTA-mC6Cx4!D89Xurc1ayC| zA)b5gKI`sroxRztpP&SobJsOrec0`u|Dk(1Rrc0{EzS`;4BbGW$^1{5jt0>ffl_j{kU-gQG z`C+AFM|25v$K|#X>DW|XbH`d{jbyuzSm`jy3-fBPA_7jARE?$D>D40^I!yAyyqc>N zp?zI?^=Me?*b!YqO)qR*qF~leuO6|~QIZ#3LQOAb3N^ZhJ7hPhe(IpBsy6Z}@RQIa@k(g(pqa-i7gj!zsxJ2qx*lu&c z(t&svR!|E1Q`2h_*04taR*% zE&+UVTZwsvv{95yadAuaU0g|sPo7CKDw!aPYF%c4sVWzh2;(3<3j zrH*aUCDio7#w7}7$V8h9mXc?wBPG8rx&#zyFD)CFD40!LTsj(7I<`fZfF|WyspnOs zZC_kEVx=P`zb(4Nj4TcxmzbFMkTxZM_t0w+T|zA{d|YB;-hBv@{9QwDPIL)WQTd9A zdF9317nhFijHOKS!u&iizpUmGs?oyBY3lhqM%$8aGY`z;UbVg-g|yJ~7Sc-3fBT!G zrG$C1IBZ;^e2DhNrEh;DmP*WT?+WuIaoD&-`VgIOW6pg^{@cGd+?VJQm^&MiFS>-B z2zWW>-V5{J{(3A`k{4Y9b7#3!`8GUG_%^0cQ`>y|cSqY&>6k1IAD2jU!OJm?nv(z8 zXepJB$>Q*Fi9AmDHl|O7`EP$UmMY1ME}@neJ}!~^6uymN%OwAm(Y{nVCXK_#CDMt2 zZ(|BICI97E$|Nt$v+2dM=n`@wpqovhmgK)Q+LmH8(j{zMqC#4%)x!K2M@td&xPe1J zq!%wA0=|vu)0F%{EM<}>=5YguONEGlbovneuQJvq`R9iF5?um!aAL8dOUQ|UZ_~p3 zv$0f3UUZ2$=@QGLOUQ|UZ_~v5{%BjuOOP(%;}R3|?nNZ#zc5-#c?r@bd|YB;-hGIa z{4=psNnUgb+`yr*=x4#BNzA(sVUmA(v@hi)NSE+&iHUi4A`JsMUzc0`vDF~`Ow${4lNt4FMKl;lO1 z5HV*~jCR?gc6#-Qg^rTE=o0F9fs5MV%5_QA`qeTZN5fLbj_49l4is0qtBPpb>D42a zI!yAyyoS*fqg}QL=C+CXVWne7bO|)a<+jpYl`+CvEzGmhVUm}-s%bREXjk3@rrZ24 z8IU6uI!yAyyk^l^>4@(_8qD!DRZoHx~+7t6`jbuQMSn4Rri!OoQxLhhR zuYeY&+x)N6Hp5ECj_49`c*(p$+OT= zk{4Y<9WQWETS!+tJ9TsEXjtml5nTfRONuKouZXsNbLohs4wJkvzs-Ez!a}3mqnTVV)!oAD0;4BrxwK zTGR8xQpb+y5>TWe`3lh#(Y9|c9kJ9=k{4Y9ij+$w<`vLF&zqxDnBN{aO*^7XsO5!? zOOzLZ5Y4>%w6I&Fr4*u(#bM(Thg@a8)}#Ab2$PtG>n5Hp^JbBrnW!aEXsgB)H&@7`RIEi_x}(`7L6;812f6V=dFD zDf#(mDPf*0j@d?rXv&Jix3P#eB|nR$O!C4!NgO^dF@rUaXHD|&8tzMU2`JJ4s)cAt z3iXIKCI5r5R7qZR3Cx}4Qi*vv5zzBZ%)fKAEu|&M;;?av@*?2dv@rjU(Ne-ZSsXSl zQ9cBGn-=EZ9!r(vMVFXqZb|k3lMewer;YiyjrJwXYjR03+T}#Rw=sp9l7DL~Ws(=> zH>u}~(Jm(f{zwz^KQP*sFi#c-GTHGdVg>tEpJHPEEu*D`c}*@cMth>)eTc;Tn`0@H zyf9A^hmT84%)1X^n19o7U!qGukp@tW{+yWih&Cnv##pK(FS-O2DVIvj%ZZq{xpegS zXj{U(CYO|Umlx5#xpee~(Ne;^CYO|Umk-gtxpegUSgIs1x&-tn->`fe`&D=_$-i#2 zFJWGjOG>-TiD=$jiunJw@Beo^taR)ylxk8NlR8s{wC(ijF)JPEHSI2xYEm1MI#U^= zc6#-gg^oPaU7aa<3Z=!x%3^1#$<2|zrel^m@@v`^ zU1F|2CXx|!4?oQAS79w}_wlgO0g@M~j%s5fAz=l6qusAsOM2{>l@60U^_Zs7;Iejz zt4pfZuUbla?3jfPlRWj9X3=yDkZ(B>D42aI!f}Q zOU%{BL^A4DU!rLQOAxT;iqa)gx9qO7fyh%+w9+<5taOy*MVFXsM2K1J#Jq>JDR~w;O7fyhsON=`OXP7*U0pgF zmO6GtmjI>$^JTFW(jrhII(9{u!257piFp}F ztYzp*$+Oa7k{9MRB1BO?A~PzB?pFizEOeOUg?UW~ss4X-dAj_wIroP7y`k46x&#zy z|B5~?k>Eo9+1&e-{O`n4Dfu1IB`|lEOQpXm`-r@RDb$kuXGhzTZ$r9-k4q%F;M-V8 zo09*`Xes$Nq)Yg?MEVf$ZA_mg=06=vrQ~-+mr&0OT-6SB1wBuF+Wo3SG|7Kzv@iKK zq)XViL>Wlv`4;A%ilt2Q!aV5`HZD<41pJXE=07>wmV6u1C7=VP=Vc(VRtxi=7%e5t zlP=-o5)<I`x0FOiZp;)ly(_N_%9dV z$%`(5xwBjNACr8^7=1G_Eafyj}4`~zgzdc$?m?vGr$0a7_-G@lY|5hwj zk{4Y9dX#UNn3s8+y1I1qo1=XR^Q23lCKTZ6E-clr`hZf~{6;Kgk{9Mlm#}e(vXAg` znwbCf(YAznHod@Jgn1cAtkuH&uZ@->=AlO^E*>3xo5Z}Q#Z&UX8cUhviFu5j!**O&jySGTfKw5>TYESapk&69F%$h528OrAqRmOF)rwsl>c^UdT%{G5<@WZ7DB7 z7Ke{ZOw2pcD>46zqotIWAdADsCCZ0@o;Q6;$tVB+w(kFTJgju=>O@7%v5t;1M$lqr zjPxxXv(k|#x~mfvF~>SO$`nmxSC5BVGYuR_HZ|RtY4wJkrw}#OaTcTn$ zH$~E7$EzA%xe@)u_b7Vx(LlokzsyV>DU!p0&gJnI}Kd9fwb^1`PjGMq*B*|24jA67bc#g>5G;=U5; z3TTm)Fol|uXQjg=FU*rI;nNc7=OB|{@oY(cSm;2^$?P-P5-?!GJpSDqNUUW6ZA$*J zfzPzNDa?~CVbc=jL$oh09lth~GRX_`WJ}nzMEVe2gx1FVYliz0TLOwSBwuU^IT7$} zTA2U-SgIs1wgeO@mrBgbiGXj@#QdvA+mdg?o)>l zQ%(f@ktXI}G1`_ePqqa7tXwNG?_NY={^g^kgn6 zO3XhzT1uGLfRJKK$cI4o*|3!s_MuqHBrnWsK1lWdqYu&fHl|Na@(&L8CAI_2~9+4OkCbH&9VwRLk%HjZi*8Xz%6PUj-6lFa(m|mEVBaN|Q|(piNEOkxldH!pbm&O2 z)Q*L>K5;y#bKw73i6*sI5$<8IGtuP6C_g6FIZE;(ONf`VknaqG zeQ9#_m}QQVyvP!oL}SAe70*s3SC0pU4!k1O7zmDWjKcl8>M`paCV63A?Ny2_AzRd?S<9{^=2_-2$qVypu7Zt{ zEU_-RdORp}V3e0Iul6c(p1YK4zwE4?Ts>x?qa-h~#7x~)Dgh^3G?83A9@IIoEn!~m zRpdLvU?=9?7$xRe=P1dGETNVc3;E7nN;TfFnOyzgG0Pk!d66a5@nRu$)m>MLrMh2r zh^B1@g$}%;Ft5oam4MS%*}k}R%tD7rUYO@7nuXLAj4Yjt?pN)rw9TN-(amYNB2SXd5!M6QvIqoOv!&FmNLl;^O{>yWC?zgoo~~|{D&{@YrZS8 z1QdRMjQB6H8+bH{d3PdG@*j$&gn9fA<=cqFVZ#y=^B&NaJUJXtF|jRpG>LhSXcO~4HCjrT zCx^p_B_`%QqD{=dKbA7d3-hFK_^`y(^Io7eJ^#MpzC@OQ!Vke~sQ}RC< zOO@nBmO$?yE>$6#{VH&k*erG$BMIBZy=dUsGP@NLY8Fv-7Xv@c2ka zCr$>X4!D8xS2c~s2rvvg+UeC3mO4uEqDzRFv#4>2GDcmZwV}sO29*wMOL|PxXo@bO zkhYy(Jz=GzBrm#zcsYxrOUM|t)2k;ebd=;pm&jQ(MVF9=2-|Hzhe>`=>cA^Xk7*iB z(Iw;}w$rO8EOnUVrN=akrsxtfMic4PlR>4Uiwo$VXc~<*m!LI*9y3=bG0#eeNnUzP z(`bq=A!F1|ub!~bVUm~aCW&KFbcuE8)ssP~1M!M5uW2+zmykJXr&mu{>L|&JE-}+E z8XK2LW7JNsK5;UrbYNS;yr$9ExI|i`_Qj`E~j2bcJ`#KgP@v?lrcE_!oVOqeH)V`1YG6Z7swq~!07rA+d|{5COf%_T0sxO8&Q zXj{TOSsc)O!n{IS*o>B*zk9TlFi#eTjZ0KW3*V-N`MYAN!u+N%PZEcXOQa9c`8I9L z-#Ofu=n_!)f%&3K$ccb&)5826u~bQ3bcq>h96l~F^}I*4iTUGijz6rAD77Egm2Tp{PFL_QYCrOCDii5$0a7_J)kYjAAfzcFJYcEjz!TW zloyAdH*g8_$G;m(ndF7}En>dt5^^Hoj~KX0@?RTmOPD8%14(CLULh^kGIXWnzdBk< zm?w*4v623~e2DhNrQ@%}QYLv}o+J(*ml)Ur<~^V_$$xpcFVQ8S@B{Njm(a~#_%`OE zrsTgAOO@nBmza^pu_(HPoCx?f2CkC)7f0I?=E>slafuXM`{L5^!DuOAo-7U@mq>jI z-^P3hS=i&}VyTk6=n_!)6jtUXqdTsnSsv@c;^qf3e|AtwU)=N9JoV=0roFt5oa zMVF8h0nem~`7eyNCCqDdNzo;g7ROp+%xC@ocJBXo!a_$LDK0=WWap7Ct6zw0(Nuc% zWLWB$FL|VM9;vl-R7Be)TFaT!$8^F{M;<8%FWOhAF>mR3d3yC^Sm{8nUu#8L!dp7h z7X?fzF(+NZ$0c%y?eyviOC2S7(Iw_&<}77_puEy?bz78b>|{{sKv|t?OvD@; zmndV@POqM@(ovEZT|&g1r7TdFR1K@tPOqM@&{2{XT|%>HY+Ryj(Nuc%WKinBD+=?Z zOZd3N3f2rXs)eRJuAm5!3U=n`sqv6KZuV3wG7Ta=P#p`#=( zx`aAjEEUDVV3!d$-`*rYD0Sc!g?Z8?d|YB;-Xq$SJWCxWd10P(2_Ki3nD>CTw9TN> z(J>mRoJp7Pafyj}k7!fRv(jOb7v@Qq@NtQWd5>sQ@+@?iS~#JoEZCHbwFvsDPf*; z2_Ki3nD>Y_CBG3%ndF6e(j{zMB7KN1L^Eue{v=k{qMVF8h0WZfK)RO#sv@KztbO{@mC@%uOjcL@B{A{$8Fi*OKk4q%B;MIg;av2#Y^(<7;Q_KCtbqFB{HJLS{Bi! zNtdv3iS!|Wd5>pJ@^2mPOLPe+{D`!RE+HoZzD*1BKM+fmgFi*OKk4sF4lF=B)Zz^)l*hFO7fyhsOg1| zOQbPsr&mu{=qSmHE}>a8GmFgV+)t%fPllzAx#$v_Ml(~bu}i9kKsx@{=)m+s)Fln0;Rys`bvfUaHBJyl^VJVjCe$}U# z7KDWkle}!VCWKf|i7-Z;?KTIjw9Uz|)G-%bLepq`Tw-G0%~5(yEOnIRMVFXsMo2|w zl3RC)sa`R)&B?IRfjiKYyhemnWG2CNmzZj;lsqdPC3(>$)bs*VzCzyXt}dOh&{2{X zT|zxCY+RxOTIhL8;+W)zrH;Ai66$&}6JcrH)uj`bI!yAyJn0fptU{;chG@+ku#)_+ z(g9pc^2EH1Ay!Dtdq|s-XQjg=FU)I1h>T%(VM&4l9;ln((tZy{|;elM0P$%`(bo)G{crj{&zRpf$qVzOOW3$XIT7$jOraL$KQr2vFi*OKk4vO`kF`vrrsO|8 zT1uEFUBbsD5?k%7ODCU-rA+d|Jn0fXE-`$s&bKjrYLb6yxG&Kqpzs5zMVHWpLijeO zP*d`sjHOEQqD#z4msoBj=J8SYj4${$7SWdEKQY>tFi*OKjZ2jC)xNrP^6}A9!aV5` zHZD;ZEkB;^w%#$u*;}YdWw689mJQ+)w zj9Ej&_mH@ll`uj(ze}uTwn*R7DGMDYd1|p7 zMpJ}#nWA=f^^|oElRUK;yxd;#%!<%XQ`BiO`z*_HpAIV>3$Z2C^unek${4k?tEa4V zl;p*hP}2(#9%Zj(iYBtFr^7))7yL!q(M@e36iJ6Ac6ro*t09bBw zzhpm7S?4Ipi!G6(Xlz;{Q8txbJsnm$7Gg`N=>-_>AuibK%d)E^BP8Dg!aU|+oHrg3mqnTVP3;%iqI}o)V{cM$~uQhUYOS?n(F^Y zQv}SrDKg9tD;*25C1zxCfbbZS@2)G=ud(GmWu>DeFSZ1xdd0U`gm%TV6Bn0GhlP%X z*b+1Lyzps>^uJ)a&HqZtv(QnJ7h6I-FQ8ZByU1XgpB(HU=(vtEaDvrAqQ*OQ6dUmr6aaVgh^{Q>Z2R$41+dZ$lD? zPfMgmg>Pf%O3A-=w3K`sk~n-?B99ZkjXC#;`Pal!C3&$WQ2*f@Cg$Zsz_&4dYLfr{ z(Y{o1S+L?_gmx-a_%`N5q~u>6OPS<_c{aU3Ga=zrP6YJ4`SvCG?;CAPm?wz?dLh?J z%)1wnl7H1`DPf)@4(Qcb3LXvhY4@u>pOpM7V=0roFi#GLO-rN?(fKxQ%zy7-Uuc-X zZ;>nxDAw2)JetJ32ec{qSHw~&c@$c-6fs|fb~zF7ZJL;W`Dk0hJV_irEip0g0c}eD zdqztM^Xz%y(-IT&9?+)bUlvQHcr-lD?pGb6Vg99~eF^g#Tw;Xw z#JoEZDf#b?rA+d|yylh^pq@~f7fVR!n_8T6ro)KE!Jx3`D>%4gn5!UpjU-? z`4H`kOQ$c1rA+d|JUJXTEs;J%=i9U~|LAaEVoS`(;z0K}VP0AB_Qj>sM`Edxyx0;m z%`K5FghxXsqWe{dkhV$w|LxrW@04|pa+-E|q?$zohP#eb5p6rUddf0K9x3=P@iF1c z=?w@*eDzCp20OmX{#S|4KBi%zW1%C(P_J05j#SpDom@R-p(BrUp(CBEk4YUV_IFuw z^>kS00N;i0s5U0@Z@QFftkq7gp0dtSk{4M*yc`f7V}UNETE9wMsm565D9MW~p-D70 zEK#;-D!F<(EOdbHqE}Q0%nH%C!*+7@l!XqHJk^+pIdeg(OR2^yP9#@Phjk8|nYP95 z-pY|+1#YnWRpLqm!a9dZUYJ)K6Cr9{a`lvD4wJkvPqIY!@sQBIF1dO-EOabHmQc?N zFx*2(T}rin)hng}VWFcWpJWO4yqF78FxZKCw?$H8Pn-_x91D>pG>gWDCCV7JldGq! zbCl#omQc$J%=n7;>QbuptKKj%&oW0zUStVPqL~X(2+Zhzb-!vU(>B9G$3kQY^}Lu1 zQr&f>*jM}F(kTlaCV81|4K7i>9!54X?*VN|epu&#ZVAj|rdKFIR}$tu zqD{=R&{2{XSpq}7xKxE`_Nx$4k{{MN79vZ`G`U2WLSU99WG~UCXua@M0dEm=IB*Z*hUI1LVmi?-=Qu4nvT1uEFhXZ;wmV!rP zziO$J{4d5*CV63Ab4#lKAC{!X_juMM{|m!?i7YV_iNpH;mBu&kJ|+M2u~cDRWC=7m z>Jb6O8m|aFZ@=ml3-doW+Ll5zayV>QqPz(BHl|Ti@*f#3r4WrA4jYzO8s8(@l>CQd zsgk_N5@>P6H%!cz#`k#EB>$n&z7(R7#Nopd6Z7swq~w1#mNLl;^Xz%y!x9to9?+KL ze`d5TVV)cg=)gEdcr^B_PBA6_)1#$?d2%>>SYl${BifYw2V*Idyf9A+hYw3k%)1X^ zlK;SPUm{D)NaBEEjaOWEaq0AlSgIs1vII&%fry<%x||4vXco|xGZgO9}JjaM-ZK()b?HrsUrjOO@nBmOzUmzF}fsJ_LN5wx0jV(Y}Ovk~n-= zVq)H%h{XJRV=0roFi#SP4@*qUds@6C{}ZEa3G?J|;OK<;()jL0q~w2mw3IMU4hQsV zECr9oe%0rbl7CMuWs(=>N#THA?U(8z*FahStCmX4XZ`<=Z|DAhXDoH(nS$@axJtul zcJd?@en1PEG0HPN8&*1y@6uZISL;k=jj)z|nL5)mRyxva0^dbTMM=SWI?5Qe)2nAJ zbmWKQ8?CV63A(`bq=A!F1|ub#2cVUic-HH)U`66?~dXTwqldP(RN z8KYJIKbfO;di9K@j*`6S5^8z@!efAIU3&G2vtgwJy(G9TO`|EggyN6(#icV=I!f}Q zOQ`3Ck4sF`4v(QnJ7hOUvFMM1gjS*~jv!rk~EOnrlgkF)DFS>-1n26R)p{BNB zsly~M%#$vGF<&8h#Au9YyI=KLm!2P1I#58+wut$nODLqpT9&*|J^SF zwuKmtPEYr%ww01+p~ECE%#$twz1lC;S?q9h`d_tF+U_ULhNTYllF%y>^F^0XNQ?Zl zDb$oaOC2S7(Iqg}tM}%hScQ4zFeff9oee7;=q15zkuKrm5-GU$#icV=I!f}QOQ`1s zrhLV=y6Z~ytHhO(e|X?EEk&1@lP+Q766Hfc&zlcnl7DF6&7qfsUXhqDx`dnv==m1r zAB?3;^1?jn5*YIpF9t6pG4By=Vg7;9wuE`oC1CAyEeZ~+wlIJHXenWyJuiG*Vq)HX zh?M+&u~cDxF3giI;o}k$^B&NeB6dt#}Qyyz0B0dc9s zyqpO5Hs(Z>_mdhPKf2ZOq>>+Ltg-y2QNb5^^HCD@^q*O52=%GnO*R3-hE)*tkSF5%5P^dj9kq zqiqTEq)Wgp73LMvVyy<|Pk(Q;lrT@a#C#+5ynG1wHVr+0`t?}KBrnX9F5%-66Z0O> z7UoZXcd#$?lAyLBT>{O7;uYc1P+4@p8j}B7ER~X9h%SK_3q%xMLJ=*zoEGL!zdG8M zFs~6IHZCzS?_Pv3fBKctQo_7Ogw(i1`4H`kOQ&CsrBd<>(Irp;;u|LBx^0dzn%O4ow3l7N4nIJY8DMB?ty<8Z2MLCuaD_$Sn62r@xvQcX_0^na!ojblb05Dv$JRSn0qQ9p5;vW^=7nV=_iC-S%PXV>)A{qnswu zC1ymLk4sF2=C&ve2n!u1d8)CQnyVCDq6%tmi%jyvQpZws30%#NS1h`OBHAv|TH;E{ zv(!$)bYZ{B_`(G7MbLSrH-ZO5}HN>in}mh1vR%uDS4JUO!C6Kx~nMC zfRRniyDcir4=WuoJ;c1)t0>gKT8Vx)Mk#q#I!y93-I_+TsN&h>>D4n9I!yAyyqc>( zR4SeY%BIq*Pn-=)9ZS(A)bj!q_mF&dU#Wf-xKi>gb(G{qmr&1(g(8jazEUhTadYWx zSm{_MU4qkSKyi;1y8BA?tG1PrXQiVgFS>+UUii2~qO5&$>5PSrlDy~=>Ugmz?Jkr} z-CQ~wmO7TAOQ`1sDDLrMFxUiF_p4qpCC^faNnWO#bcux`4H#J})b3X;RhS=EI^ZJF zm(zrh((c$^PxQiCmLW{Zv(jObm%FM7A<9=G=}eS$ziL~lZCL0q$;(~UfDqH}iFuD_ zP1_7h9ZS(AFxD%Sb7^Z2RcMqJVrRWlB zdC`qU>E(^2b7I~j+LZi{jh2$TstF;b-G#FD&84#+jipNRqD!E~5#KPiyHEx%$9#K} z{Ev+GC3kg)PDE*U!3Ikh{_6MEjVB`NuLjg}JTNtdv3iSi+kel~2S(o&wuE`oC45{WJ=FHirL(t1#S?r6awj zjSZfum^t3l@zV6_ISU`LNQlA-aT`UV!5snh<%3#JpRh#5^k$)bzr}C0?FhJ!he#Brm!|&Y~&0 zgpAQtdi8u*>evunBB#+5T|yykJH2|&Qin-ic?k`pDY}G=5$tU%y?Q>Zbl~nLF|TPf zMVF8@!diwdN&bwL4wJkvuW2+zmrzIx+ik8++HMv)O!C6KX3-Q~f&p!3yUn>T%%2TQ z9UG!csOtqd?rE_JuJ+ZXGnP6^@}f(iL=u=Ux&)T&BP>{PGe%{*hn0>E(IwRM0vvZ? zULkG!>e3l29VL0uCDio7#w99o(p_RImm($4LPtqnbP4smuyKhBXc42C17?yRmO3^> zmr&OWaNOe+5t%WK)BUP_rPst#he=+T-(c)mbO}YYh^tzdKO0s$P&6avHyAn=T|yx( z)@os%l@61b~suyKjbZpB?T?bcq>h9E+k$$ccb&)582`VyTk6=o0XW0`u1YFVXK2 zZRz<>kG3Vulf_}<66Hm7mze5PObh#|(Ne-ZSsXSlQ9cA>G}EUk`KMy3lDy~=xSGv3 ztQgIH)viqPpB(K=m}l3EMbRbXM8LN(aHY2SL@Z^J7v@RhSX6fjIT7$jOre(KKR()) zFi#c-_7dg?E7-4k$CUiXMoS6vWN|Du=sCcnv0t@RO8%ph0;Cjxrj^5QA^-;SkH@=MVrkg()ZiF7#;(DUX*l;nSFv@Kzt zEDjr&C@-RYb?NLkM@tFwWO3NIMEMZ#ZN`{S{{L;=|L=TQ>DbVT;%;_-3}CniXuF(h z{VJsCM9*32$P?Yri6S4)rSe2&iYBtF=fgtB#x75Eu38)8e~ZZ;Q7Z&gWr zSZ)hCQu3^HnB=L&M9f(h0Y;_>*49i>eoN=WLI;c(t7v3PV5!t%JWuE|`z}-REOeOU zsm11GODwIq1hLi5uAZ~bVUnj7o0BbJ(-PNZSI>u)jt#LT)bzrpB`Top@~nl3lsqdP zC3&$W;N`|4D(GLET|FNbIyS_XP|pjWmUwA)^_+!{lDyaw>UrVQ5_z2M?CLq|93^?N zB{Yi0rzO%9O=VZlhn0>Eu_e^>!lxx(mRX{vD`C?T70*J?H!(jfbZm$%p`I5uEl~k2)@ot?Ed!rvV_V)T zNgOsUQ9eZb;?nt>V=0royj5~IY+53Hh%WnV?w3jaO~ZYOErGk)0aOVIzD;7@orskD z8)K=Gyx0<0flDRkv z^|4e*UTg`qyzps>iFx-SO!BWA?Mo3FSsXSkQ3etbnt>}N|5z+#k{9O5;;?Cnif7@E zSUg+W=Cz}3DMBNO!=@!Fpv78-u9W<1MoS6vByrfZMEMZ#ZA_o0adBQ}W*%OO@osmY9*l0fu|(d4@sI z_@+-y@~;@}OPD8%!=@$5i9q(bh547qQYLv}o-7WVmMA9z{zwz^-!s~lFi#SPO-ocj zi?v#qf7xg$VV)!opO#3$!MAB){-v>$NnV&Ihr_2OCg$CTFv)-Sa9?6epzV18wb&9O zdcn78Vg9>fsgk_d60lobDlspb7krxr=9B+_JNN%PXPqORrp-;ct7>CXA(|pum~InY zeoQQLyoSItaF&;g?Y6x@sw)3VV7pDZ6)Se<}k?%^Xg)P|2ac6|5Zzwuaj!lsz)bhfHCCV7JldI>f zbCl#omQc$J8K#+^taFs)MV3&@3mcZGta$t4 z(mBfBZQ#JtC|Ci!8ZV^d@a^}LuVjz!WrG4By=O8z$nPSfT>m?v4n zhb1QF-H9m4|N6j}Lz$SECs_i_abeznH6;IQqoss-O$bpg93D+#-Xq$?{IAARCV63= zWCM@y;XLb8MpOC+}7+gL=KlK+KRsw6M61TJRt z4HN0|A*L=ao&Wr3UkuUI^TLNE5@zsi%tcMf|6D9(k{9MlmRJ^9Lhb}S6LTU;@*f#( zOPD8F!iOa?qQzPk(Wc}-JX%VaCt1RVCCZzC##i_0w9N-cO9}HNOZc$F#Ju|uDfth?QYCqjCFUecEH@JKif5-TE}cIy+Ltg-vV;#y zOw4;ko0$KpSjr?X%#$qP!xE`b;h8is|Nhaogn5!Ba8+D-UNkSP)x!MyMoS6vBugwe zO1smCfW~iO{wHH8le{obvV;vwqz}=>uq{2G_5VMl6*0$pI?5P9 z_t=M-dhGhJ(t&_*fgmpyD?+=>Q9HeQou!VH{HEv<2(Y1{hBE5QjSn1dlT|&GZbjuCNcS+UyHP&O-S?Ng0Z;CDfZ;RWiYy^!_JH2|Hg^rZ` zrsxtg&7vtnyNuCPdiDCS)UheL1opsvrI)IZww+$R&Qga-UYOS~nj*Bz7(w?mF+Z$y zfaglgYZ^@v+GUNfRtxj2beQCYdG@@(jhr~TE~#3->JVi?uCvf#k{9Ml;_z{aA-2wT zw=q8~b!>_*F(Zw`$0cS-!9!YEkn1dUl;lO1K!6>GScG;(wC(ij6W528j!n@e)bj#Y za)fk+w6NV~jZ)jN(ovEZT>@bNx0RTeF+yI#j8RISg^rTE=n`sqF{^Qj3T>w@E?pm% zIyOa@P}7T95!w~fwl6MSXQ{&^FU)gt32x+sv{2-w+`3-{uF~_vN(cJa5%V+Z`69F{ zq{Uh-%(K#Ak{9O5;=qj@AzdLYY_~aJDR~w;O!C4!NgTM5(=XLo?8LkWw5I2WrH)O} zCDio7$0a7_J)}*{v(!5uaYi-8#%EXcp=6ve;}4B$%`%lGtZ^Ux1kdO-^LVbN&f!PwiKh0E@9&mhvY+Ryzi1x*$>-WY|C3(>$)be6xFO7fyh07-%Q>i;Jv z0=`WP^XI=e+Lkb{5g|r@W=Pwti%aLf8%vesMVFXsLP#}> zlMgXA`6*N3H!txX=OrqRsnNL8(Er&q7D z)R9NJwJCR1-Bs#HWs6{L?Yqn)y*{jTzyYVbs`e^%q)=yFxps!N?88h0!b(RvO?)F;jb$qDv^EZKqeSv(!J`&zVyVL^^1{a@Cg$B3rQ})ZD9MX1fjw|rnU;_(YTsPC&O%2?UUUg{yqN3z>y%#{ zV)v_rRZi3OVX0$FbP4smm={Sx5pDbC(sh-eF zBQ1fCCf*F%o#)y8s&`Duv(jOb7v{<0uyKh}VF=L-Td8eW=rG9(^CWTLN{%qkc(x1C zES@#V4@(_eqDvsc4n*h+L|{g6b-(Hr6Z0%}l;lO1Kzz-m67y1F2+>TTmgL_%aGJJ6 zmr%Cp!QYCrOCDic(V`znR zsj#V=OV{5s+LwHrO}38H7YL7r3bp%Hub7g5cPwR+7v?p%L|-6uGZkw0tClLs|JZ0- z!u$p?uMZF_B<8(Ho09+0(Ne-ZSsXSlQ9cBGn-=DOB$hJC3-cs#%&Y$&eTXjo+{XM5 z5BDXy1S0Gn5xA5S#|V!mG4By=O8$ppsgk_t5{R$4RAOE#48Bbh^Sh&MF+@|#3m=!5 znD>Y_CBHLTN|+~$!^b63g~7LJVSYQ7D#?p3f%uwln3$Ij0pG@a2-7xOqkReUq;dGT z#KgQi5h?l2Sjr?X%#+4p;}WI9pz)iS-xzI6m}koiED(Ue3W<3y(x&8>qoss-vN&v9 zqEr}sn-=C5v6M+(m?w$D#wF5+=n~Ow%+H7W5?x|O8i$WdOw79zk&>UqQYCrOC1#{? zU=D3atGlmMzgpVnU88LY^JH=OxWvS~dl4!59~>zHpvPGFt2vFP=n}FUm} z)2laF>M+UEcB|_J?&Jvb@~B~Xo0uO~I?xJ_nAbF#qD#mcVXYSCS?Msz3-g*rQ*;SA z)Ue$abjX6-V4=e#FU*rJfjc?<8+H~uj8Xrq@L-r9mO8dXmq3UelCS=M3TfL{mu|4s zQIZ#30s%IcO3cd`Ax1M}l-lO{u+p(5x`diupj)nxu8VBjjr4=Wv@xDfNCOTgyjTK22Zm6B(r!z3@vlP=-o66xw7 zFJZ1uN}h!dle{obx`dBQO!Rv|Ym)!+&}$N10wH!F!utPJv}W#oO8!f+R7qZRi8<*K zHZD<41oV6p^IsfoOTG=8Uf8%qc@gk!T9`i=EhXQEO)qR*qI?MWHZ9CQ7fY4oMVC;| z3m=zQ(OMhx&yMyb--cZ;=0%r~69M0*h57wh$|SECZ8xa)xRWEC%87tK(!~51M%z-1 zw#&Rk(IqM?=Uzl&{+ZEIiqXj8@NtQWc@Jq5^H0Z8CV9nZBysq-#KgP@w1)Z55BDXy z#Edi!+{y`YLA%?p`m7W4pNpkR@}f&1z>d$SDD840Cax}B-y3a9F&bGMHZDH245sgk_t5(u#QhKY3f5b$lCv)hqQ_Lr$$Q&^JH<%HxlXc zA>iAz^!z7dDU-Y~PZEcXOQa70-^P6i!~7?P`x0FOA$A;Nb&Hb|f&8;6)WrP9W2ut7 z=n@F9xm5W!bSL23m_jWJ`?1lsgn6weXzn3DhKXenWyERK26CFDcE zw=rx{+a&-0w(kFTV_50f)``wFjHXUh0d1FOEz#*jZ?MvlC%UZ@#mI#IL>ESY$xM@* zqCC+X!$QaQrYyJGnAC~N7Qu4ccc~M-!9qv+Oxv5X+-hS|Cn{6a&aU2Iog=@cZLuXZ zipHlU@&ld9uHG0{I=01@&@h@su_YAH!g4n%3O88kFv-hXRUeaLOUM*Ki<$dXk{=d2 z5OdO7RU4CHOUM>sE%U!p@+@?iY*7|%(^20*Mw%8KtdEwI%>50K| zw=mB_M@e3633$1D!;H{mieR}di6fus2J0Lpd9fwb^1`PjCg$A~mHoIetaNOPEup3t z3+dD@ry44|nO*(B4OTi#^1?jX5(|sV2rfiuW{OJk!$Jo}BM|c%5Ta5eR-oXzUv-L! zc@{cM^1?jX5;iSSrU((5VJjuiI)_PKnAdy|2uRH(LZ5b)+puMlA67cH#g;&T9f(kJ z7xtRq>VDNLrk-b|qa-i31j1`BRhF9`4=lHVt0X@xbZm<)p`I5$Es^MgA-CI9BpQu1vyAf)0l`4I4J zT9|)RELD;hTS6@_7BYp1%M$bMLzv{>INF!85@bs(WDF6PCFb3UNXb7QOPS<_d9o!I z7MG=-_pEqnn>UQMCCrm80l*9M5*&P+mY#q8XenV{141k=OU%0uk(hs7EM<}x<~1Lr z*b)?5mwj&Q`NxL)5?f+UwuDbhOw79zk&=IHELD;hTLR&Ak9pk432DKjNj>jQL`nWN zqiqTE?0MnS5)v6M+(m?vAprX|XWfIngiwIu(_(YAznvL#@^ zgn2n%Sj*6rlK(c^zQmRQ zc|DG?`u~aO1>eS;h?M;I#8M@Bu_fkYOZc?J@V;j7ZA_s`^2z_do%{dYV4WkKrky#D zG-uK5@@s^_wqNxH5NcY)RD>>b!pbx zR~{(~9eJcXI?@@TwlD%rDKyg;I?^X@4C@>_B1?#uW5W_JPp;lzouec#vV?d!HY`!L zsGVHB!7@ilUSx@!L{nr5GvcN$O!C7*$BxJnFtg$56j{QIxao_OJPRErd8#oHb9`7L z^~FST^~SKy0TM#Et251_DYAr&5!Nz=k!p-}4wJkvuURxjmM|l3>LM}EGKWcCnAaql zB1^1GuHG0HI<`fYP|u4+ktJk}+R4=$EOeygw?&pv&kGxtC|fjo1?Fv7BCRD%w}C4$&q9YuUYOS?nj%Zc7Qx&axJug$>l`?KVxAlhpdd{*jSzA%(LaiqR0}8Xrb{^T1N_EOeCQMV3HQNiLPzT>&jZH1oeo^20jEw#X7QayWcgVipuUqD{%O&QX#V zSz<;GhYw3k%zH$el4qHtBrmdrI$rp&MCwzdB`k+ydVW~w*cMp=Dv#pgZo2DA0kiH` z!@@o}beeXA`FXg}iy}+Ni9k}JiTU3i`f?&msO1IJl`yY}7HhRI|68M_gn4o}Y*?av z2&A7|nE%aK$|Nt$lfq%c5*g2SA)5IRCi&kO?n`8e8A%+AB1_1LfN#^n{IADSg?W)B z&{Q%G(RxczwIWz+V*b}g+fosY91b6rn3(s7HZlLJqoq_tBZtF>B_`%QqD{>IN-R~9 z7g<6bFML>HV&3Cfll(7__NBrxNgRtJOUQ{pf}(}_Uy7wn^1?iOUih#?I?nJ%nwbB^ z(Y6$#kt_kTD$FaQg`PKbrR0BMv=lLq_?m!7FJ3-G`{L4#pO2+X^29viYc7@kJbj2R zL^EueDbwlzp808b*2hwv6g8JeN8u6=}50> zXG^x5yqsm7sX|)VZqpY!)0-@G+yIm@C;$QZTLt2bHbD9MX1p;h97{o}OR9$c zZ>LwExH+tJ?1(O*o)bLle{p$9X^GROQd%Jd)vhP zu+o8qAThrkcT0R+Vq)Gy+Qd979VU5Uo-7U@mzbFMkT&%^3mqnTVV)$8?tf0Y1OwX6 zcAGvm$q!2%JEBXVsbolAkm{~01yH+R4au|AQIZ#3Vn!OrQjqGdD+%)+(U!IuRyuY> zmr&0O8<(h%w!6Yq+e*o^(ovEZU1CNShmA{AiMD-l>E_D@Uek`~5^8y|RMo$`t`u)L zb#dwDO9$TEj_4Bb7%2JF^Kv5E7ng2+cPwR+7v{H!c~$-4g(T+Pi73f`*JxY9{1!2< zBnj3^%zH?ilD{@uN|+~$!^b5i=G})#%)ca-D$Fm1d6GDMTw>~Z_aO}Pj}G@Gx&)d^ zhU5h)cr=N5cOp{TJQ7Ql4**^Q3Y3xWvS~I}wunjo*u=+hxKye!MIRIC)tkdg$FArSBIfwGM53#mUcJdmM@e3E2@!LA zTq2LNonF1kLPtqnbP3I(L68tf*Ckc!S5xxCQpc|75^yaku2f!F>XP*8O_n-L^1{6O zn3TbmEt*KL-W*msK)oU6)yAY4?J`E~^y*DkI!y93-D-J(X%6vTT~f7vmADe~EOeOU zg?SAKp@{mVYQI$ftClj%4@(_8qD!df#Y{p%V8#ROe$`?!AU9d+NXhSrE}@Ul5HrsP@aFv$z^nh+u(b@!EodC!WcD$QB z!U}nd!z0?1{Ev^866Q4_L_&gZL&0^wYFjD!_ry{rd10P(2_Kgj*y_9-!$=A=t3i!LGi2;at1geCbO9c@dcW6~vjTq4m0 z-^M)Dl>Cp3mJ;Skm+*0k)Ti)mtVElV|KV7wBrm$eoEBMWYPB{0}0>8vN$RE?a@+7 zOR(jIk4sF$&{L9cSbBcV&83^O(Y}~2YqX&TK= z+HP5+c6#*|D;?=I?QThrX&Ma}0euA2B~|NJ$xdf_i-nFn(_Niu&Y~$wyNuCPdiB<@ z)UmrIJ*H_iMQK+^3)^kxNEYN4OC9+&?TRiTYL1UfEGupwX6mt9!%D}l=n}a5(&HBR zk{**af)%$fQ(x09Rys`bvfZRhV4OoN1&eLJ8j@$B!z3@;tywfiX^#)Hv)yKlN{`(d zmO6Gtmq1g=c*UZ$%N#+EnR}m-XQ`tkFS-PpN^+^vWA>|Fu_QmNbnJ>Qp{5r$E>R(E zJH2{~m5!3U=n`srfy%a!E@RYAuij#zqa-i7M9!iqO1njC=75>xhoz2P(IsH0DK0wo zu-Nvi#Fdg~sly~M+pS?VMQN8YnnCqqHaH-58nVhoz2P(IwDSG9+J=c8k_pm}jY@Brm$eT*GK; zT%wE-Vl*>Gh52EnV^?$uHNCKLi59I{)REeTm5!3U=n`|%B~aNG=2fC?UtPMzLPtqn zbP4sm@NtP{#oL%4mO6Gtm%ukn$tTh)TJw-rn7_$Vhe=+TCtU)iZQ-<{HP4Hu}OZd1%q6}V+`4B1jFUL|PdC?`*^TNj^5@qmh%!e?^ ze`&NY`8K3W%!<-3Cj!2WDb$qw7h@@tyf9C?1WMb&shkM-BNow?QQGB1z{_c2elM0X z$qVzOOQ5tZoGRrDf24`|-x+O7m?vEVHYe9g%)1wnnE&i(DPf*;iP=VJcZRg>t4lXO z6HA%og?Z8?Y+NFJ2w>iQ2*doRhx-y;0!=037^_>HoQU?-rJJ9MrAqRmOQ5MFmrBgb ziGXj@)bme`wk6DKM2OL!6Z4)HPsx9Bw3INf5g|o?mJb2nW~}GC`2Y3yz2}Z=-*Nvx zEcD`^7bWnb1YVTDixT+%jRZb;i**iJZqOQOxz+Nba{)+brzz?zx0#}{+_#36j$N@O zW^8)l(-P_Ov@b5*Vx^-jx7ZRhvN+}{2Eks_ba~nr zmu|7pQL(Yu66$$@!gjb{h|B0&biZm_@>XxL&LPQz1G>oz4jNlh`8S2Lh|tWnH^~nx z9lK&n%rv;9@^8wDBSJGrM-kdBRys`b^j6jM0+nrfn2Ki+Rhe&Jk{=d2Fhou77|9a2 zySWyfd#q*XlFxMOT|=K~m)% zIDA@SV&3D~!ueD@^tMB<6XCuq^DhFi#SPPfJY9dr~|x&ntu` zd1WQYmhfqbiFx-SOwSK55$=jDp{5r;Eip0gS@FdDD~FO7TS84QY+9n62U7xx0|Ge`L?|kJQ|K7h&r}ybM zJp7i&9)HX0AA9`OuX)YKo_+So&p-2}r=A1_P3T>2eBooyKK1lL|KC3G%+m+Ye(V$R zmmW5AoTU-OvbMcHq}6ZyQwu05~6fdEAe<2?JsXTJFC(;xrhCqMnv z!IPhR=DFwINpQf@#h+dNhp&BqJnix2;AuN@R(I-YJN~rJG~sCr6czNJfzMBV_PI|y z^F@Fei2dxdA3MN*dMA-t&)$fKfA!V*>XDt+a5h)L+T?&K~`WiAPI8ElNQxTtW5m&v*9dmrp!ex@u9nYT>%7kCwi( zM}N=6qouYMrMBt|?mn*W|Gu+FzpVXe)iMwrCc@{g!TMnAJA3p?Cmtp}KTH1o>1z|ulir$d)~UMQ>Vw7?@Vu8yJWtAMUdn3j%Bs)yynyFDI`KSd zsCj9qxofCCCG-NG_sGQaq?YETmgcUO`n1stc;3Sk&yy~imoA#SF6zs&FW`9(O*~Hu zXkH2k4omsHeFEwQJnzBw^K?@a@p6e-7m%dK?0-kr@#Pg(IW~E%D*X#rPjR>r)Rd9cR4o|GeYl7vukur`s{L7kDxL zkC^+#_`mn3zxrbQZ&}cVtlx|Azx&)T#{UWkCUS~XdA%3o|GE707vukSK4mJu@nZa+ ze$0#Uzt!2M@bY5(pLlsO{=WiV{=XRidw%%NiF||di?hlv&fM4OyPjMz-(b({;{V6* z_=R@-|E`Dr!9(wU@J}E7@B@GLz%%#%U-$q1{ja$1Z{7FKd;h_`AH3(E-1D)!|M}hD zxclL|Zrt_Qoqzw%ci-`!W0~&H!TaJB-t?|lz53PWq$b^=QiyX;A$EmjUb~-fKKm5P z_n-YzU;6y?Q%^sIBIr|}eDX7&eeU3yXTQ4G?P35F+WUWP^nW|}$$0AH?|Svcr!tW^ zuS6n#$}f6ymV{DJ&oUndcWLxc&?oxGMF7^ZD)49jy*#eryE2^!j+{Pk{Pd?5&8=t& zqi14975=b>fJWin-}ZhY9{IB2Bk8e#OIMGyjABw$yBB@>sV|Qo)|W&=wrCL8|Lx$% zC(e@T#CfF?@zdY02z_hLvJY!Rws_P2ZwK$0I7=oKLCdRGw4@^XI2D4lndVt8dr^i! z!Ak8Vy#MZrvt)1u246kZQa*DLV>`hn#bhL2G!w!fOknE&wcG!D@5d(2lG#Nh@#~S6 zT}%S))LEj73;rx3Ww6)@ss8B1SyHut3Rn*_)shU`?qQdHm&=X*sr^4eFX;b$@FNpv zDJIn{3mEvabZFgoNeWc=RQ>1nS@x&5h_3`(s%{VaKiob`HFlWoRvBa0${2I7dpdm`U_^CI-i;74%d6vD#Sy1n6^#4BCZJ(t&M1ZGQ z4|Ba!RKs;=8SmEr6JJRG?}MH8S*n=?D2?@0S3SC9xArWl*?Uckp0547?r%?=rAivR zRnpi^sC0rg9W?!2Qmyx}!8qCe(R*7HXQ`^jZYm(qCH*W-0X21O|Cmq(7&zPi?O=1_ zES1#RO}zs`r9Z4LvtIsPhSCY0fXTF>**7N6QiYA(R6PKwKb4fp^=H{@ozMxeC;i{{ zmlJ0x_hvVB5LlZ2)S$Vo`z}Mgf!5_#|F^xx#969>x|<3JY)*fqDWKL_#wp4R+3cnI zr8%Yg_KQR?HP@JJl-}7ey@UVWTP~Chibeu$_uMByYkl(Br#|^bctaq8QKN@0z&E+O z9V-4yvx(=bjJi?EXTy}wj6=v?@N?fa@m!TtH%bp}m>$yY?ic*rADnou(l#5Vi8f3V z=_>dOeeN^wjORY~_-ihZ?bHth)VS1u8>Wl!-*vubS1jDYWu#uk#&M#1B>(>OJM_Sd zyjS%M!5#8?plKrAEys(tAIJykdC7Zy`^594j5bOcZJ09B1@!0rythp}PiknR)X;{h zA(W3V7wFXcy>;SwQa~G}fHq74=_dR0e!m}>c%HP+Mrof7(>{6K{rR5XTiVZ4rxRGX z#LLnZ5O2gk@ArH2#Pg(hmZf->u6ULa06p*Ly=mfkQaH;}I7?SJyjuUfpZCUz=Sk5l zOVKP{(eVEL^M2mr6VH>gvn&O(bOpl(_|NxwQUCvwJFcCh{{KfW*?9C%AN|Eg{@Ej+ zdH7#G{D%*}^r3(2p?5v_pFH^f2mbK`pSb_e@BiBU_uO~uzBi)&zjM!janG;b{bzT7 z?ymp)uD^QMEAIT8cP{VvlRoNm5-(eGUdMW^V5 zgg<+qiboCta6}GzWT`BtWTA&hMW@pa1%pz|cV`|;>3u*!L;f#fP23^ahA#&*%cwDYR&1J^|3|kEEyq& zgSIj2|9$XV6K5&CWBee`)6UiNXYE-s_t&c&SndAb2fsOSmQp>-R6XF+>d+7ht36Qn zRQ>1Xcgaw(*FE4k^&iXrZ%mw}bWq(DnlH^~LSF1jXW1)(j86N%?fv@1SxNydQvuDF z<~GeYOaZmdG7^6ZO1DF*zt%oW-NVKN3HeO;De-kuHNA7Wj_u_c3TU?63eEnj?X%RC z?fD{A4}RKH)w^izS%yd%pfU-#|10gY%vShyTL=Rx{Ha$;SchOS&aL}{9>m?h_I|m2 zmOD~FbxR0?Ec}tCfSPC7Yu8Y*z-s^F;Fl)OQhH~R{u3r$_`^)^Tz;0J*?X0P8boOJ zUz|8gsh&kfX>-34gaI@4g=jO)?=nP6M+8&IJHhV%!o*oh2NfF#^E0$OaI5)79fGEH zYzQ{pk?x7Jy`P^rODUj5DImWEgqbz9{b*6C_k*nJKQ2Q)(e;z4`dZa5L4QcA&(>%*wg~ykH?jil(4t}Rt~DRT3Yhs_V<$!!A8bmUKe(EH*;3|4jQVx21ZD-GjC({!~*v`p?a?WL2j-!Wp_* zguVRfiL;arsyjZYSZVq0T}=l~pC#2g-I2(v?tO6LETw?znh(lV{zy|mt+S*;4=-c` zSqc0(_`t+jO7GMi9~7$mVOLF9hb~u|d^a@v6BB1C)iW>E<2QYv`sxeOX4+@j>vB0F z;mGd))Wlgz2i1KaRIIdocb2AuE;-8{uIY~Sf7^Ti#92xK%}W7Yh%eO~X6m~Pg`QqW zca*b;|NrQYYk&FL_q^oaMg0GtKKjHX{~hB0|MlUoKYR`G|C=BDdk?-B@&B*i|3BUT zWyJqS_r3Doe;@JxKfLFIcmMZyf99@#ao5-GdgRW(cIWHw_@jTlq5r{(SCfXJu!akx zakcan(yewBJLbAvu0q1J_aN`DM|WUfk)bqBDJANJ`THvomxkvlMY3Iq98b*RN^PmHX+^@gE%fw~?1B|9FOAMqnq)gQ35t>a zs^-eHtU`E9($Az9tVmrNo~K$a%(v2oK@rlQW-iR--)H}NQqZiPuI^~s1vn1OZ)~L- zgV4(!tCp%aetoQY}y6HOnPc5BhS;$w@n#MA17@*HTG7-#NTMz1$Yg5 zWGSMhE23?4W?H978y~s=!;-r9JNUhL*yu3j!)%rM@%vBEh6UHl53_%{3>Evo9ejP_ zEak>*l>)ku-_Xd3^=BD2zI(-vjMIOk`@cJJmQqAprHC%%H@3~6x#TQEz4uBF0b|(s zuT7k#l+acwp$qv9&9Io#w1F7Li4^X^S0~O=s%NWI&xQPkhFq*ywEfF5R74-bPPhkO znK(-+pRH0p7xEjLhq3l7soohgMxJB;%k8sN*M#|IDWMDb4UW!OLoi_I)_;%i5PE&I z_oen(W~b=QQa~5-8yc`Nb(Rd;=#DUJ^u_jBsy_m{x~GZ0kl)YV}9 z_F3wd1RB8CQ(g6Jnd*`1ZTmpIC>b+il1zu7{pTjmQaWg}bkK$TM$PrO)CKB+lf0__ zZ+p*9oTU`dW+|Wx`Hii0aa2gPdoAdlQ11u(6K5&CvsrrQLVjb*^v>lvw#Uv=p<-zE zFHD@JRL^Fqo(uU6jS^Y=T{3#3FA{P2{xcJ2DIK&~I_N@vLqkWpr(U9CL#>A@=n|y( zv-kAGSxNzImICrFCiAT|-cP92bVGWDe(?GBS*mXW3P$;bT*z;1UD^5R-U&I?_*V$` z;B)P>%wgC@`?-+c(43RCXBnQmVn%T4@MnLoeU>>k+ekkZ`XDsXNR-RZGSqsgbG&a4 z-o4*xpJk5MHonh={Klp!pqB4LU%h`X(4TDP&gusa4IAI{~LE)JG=H;#Q(EL|Iwo#dgPxx@=?V9 z-+1``hfW@P!-IeK!R-hBHGftzQg-oa__BsUw6;nzvsv9{?ofZa@U{T z_4J+p$DM!S&X?ZtH~!Vf|NAS^kp@wdS~?=CF+9)LUe>+%_mo$umvytS) zb%}*q{jNn@JWw%WWHsDBW)73SB1LISbO(bpX=DnP1f|h=N_kA&m;eXjis#92EgZCEq?`{{BeXhGWu~>I;g4#Wt zLL&BH#m1n+?{nSFiN%#~PB3ii)d6C}FzzdL2)%ec4S`c6Z>FwJEUtcaqJLmk75l&K zuh<;)vEg}A8_g!}PAsl=cOrDW%8KOMuhd2xG^D{ec|%NLY>k* zWD)aV#YP~b^P~pCm5F+q<;fT7BHcah@*pY%8%jv!%$1se4$qTO6tG#3btTk&;S2QX zUT;5pyV9$?uCmy_`~HdzKu70EZG=k`^;pZ9F<+~8$Fb$Oz*AO`EzuhR7JQo zQO|T$)L%*Et2WOww0ndy9hU}w4pwaTIXq9@pqTGeAhl^(Wc;t|4KF{>{v_p??Dl^< zSgFzH;5>7b=}skaHqA+xt55y<^X%2Ue3LlW{S}*h4$m{Ep6*mSXEUov2&L51zI*KS zc`|yVhqBds{(CES_ZXgMjz`@|e+W|``>EVnff-dCDK!sU*&( zeV^!Le#P?)HJ|Zfgm5eN^&Hwy-J8JN;{@%deV=o+(qDU?^yaA+c6u4_uhiaipl>(i z5ABpT+O+R;j%ll@7A`r@UKNFap-B3_?XB42b9A0c=jYPLX4c`2XyVYk%q5OJDN05&!>3kN(6X{|(~*|K;IlAHE0i|EnJScOKk8 z{QrUb|Ev2yiTMAk_uX~xb;SRF=bpvg{~6-{e|p!)@BH5o|37%g*}t0j|6tXo3c)>K zoogcf0*|2H|8j)IXVm`_h0p%q2dg$$2!4VdTN=ajFN-VZUl_Pi8%O-VziN|(7V9oB zEUtKgA)+?w~$ztsdhOp>i+&XS!Z_(Ryp~bo@3>Xb{)$amw=jIikaKjG;2|K4A<2}AG`_}Ga{4EQYTvC|rZ zQCsBjmQXYJv$tw9hTtUVnWZr4DAJk}Q(cjy)0GhQ13U4|Revx5u#fuER@{vxC+q`?L zc8d=<2|CYGMHez0>uxeIUJDJx^5wy*-Qxr9fu2@Ai(h3}T=^S@B5BLdsZ2B)3E;B5ycA0?|on9dc@T+#44}86P zW~w5@T*4?akZW%vzVAbyR0YK|%sevwBX;{<<=rtJ*25xjrf)sPGYB&2p;H1Z< z61s4w0mbbro+kr0ia6RF?XTM1K0pc3V^bSlxYPiBU5~xQe)h^IgT?-Dd#iT457Zp= z%u+>O^+)5NdS>%H`;#OMXBqnFVAbyTG2f8}T9gLzvOjpt^|Z^s&;An$xn@yw)o%C! znzLTAl#ti`7gxU5Ks6t}3H;OUo7rErJANQ=;$tT+Ho#ic+fJV+b-aSbUfz4FcFPZN z@AS;{hj7V3ItTi8-OUDScWNNA;aBYWF^q`UU2Om%wchaZ^9(gl$AonFm0ErbGFI#E zHlWN|kJY=k^=+@%<(rTXzhcLaVMMH%gt*Zlf9S&HhQ$>xH}pDQS?ulpZ+k0t`xs_8 zwxx4x7*S{6W1FSa1kZzfBv0&%iHvBL4yj*I8VmJ;JPqMB?Lyb#L>A z;4tW!sXfq6Mqd!L$GZFrt$Y7WWbbI*+r%Nb3VK>)fc;j(;>x!gcW z_zZe%x-e)Z<1cwt?ZF@}G&3>>-D~f!+SDQV40>#8j|;aN7FWB~zz9x(BdvRzI|PS8 z&rC-KH6edhb7ZEiyRvsmBelsxa252l^k2|Z#-Dao?ZF@}oWv|^w%J3_4SH;9j|;aN z7CYv`Xg6(Z&`VRAnJoC#r<>6+deHl>StQ_PsK9_)&T!s zdS*H@7j88yb}X^eI!{`6s%7%vSM3HIa25EpiCYc$>PiD$Y7HW6OCDbDxgx`#HJlL^0Sx8Q)!pvRUr@@ox?D_(2pHEjlt{onRh?H(L(81&3i zMSiaV`HPZp^E~@^i_azS+SM1^O*r5x=xL>a{9*$N`Sr9*&y#9CytO5-GOyZQIN&qr zv89CkX2asjHyaqlg<9af^!-)44aY(QBjyuV8_>M7-gf%?WCW+cvDdqMt9Bm_F!J=O zrHU?OJJ#K82u&+4YFuJIShX8*fQ+Z7l?L+54e)~N4KF{>UZfc^_J2EAwL5WOO+B`h zkl$`tT={lG;Fhr?+tuu^+O0Ui%Hv}vt~cPI>TRdbGa|U~iV--{eztpYfRd+YmMZf5 z4U2X68yLIMF^M?iVAXEM0WzMRRvO4JIN&Z%z2T+jNj0yKu?O$Ls@;tPtUNuol#q9D zSX}*vLoe3&Vvt?#zm@$}yB!Bud3tPVBfsLXxZ)Lu(D9*)y5JUn_EznF9H8XsnWc&@ zWINX0ap?8A;>Df*ZwIS(LypB<{?NoFhsD(|IrM5?j!F1_2dj2R4zTj{lBI+$WIL{W z%OTXfd=oO`SM8P@i@CJX#5D&5$A#YM?~|@PJrq*oSM8o0a47VurHU?OJJ#KEpa*gqo*X zh;H#s#t=*( zk9s2ig0ApaMOc5-QpIG%AGR4o@Dudd)EH<@UZD(H4c24zpW6{sf4%`|06QY$!!~0G zeu5sG{>z0M4j|mtW2ZF+bF%#lBkBr^e%NLVK_Gf&Y78_)*E@j$wwBQsf(?HVu50%V z9~`zBLvRoHw265Tomj7yPoqT-zXgUP9@}ON!B5a*Q)65h2!Wz{Jy!4DKF_e|p^Z^+ z>K?m)*k%mDPtar2f4MLb0*RS=>?P+J7M(g7fXAP`!!~0GPJ*79&dY@x4$HMS9Kxb^ z59w|#hDASoF!a+P-;E1JdRqD{7iL0$%2jW8sRf9G%rLQk`oZA?@z_EAAIGi7rrQEd zpo4p|>pWhM)qifDCj+<8#-x$#AGZ5&EViVLCT=(^uXe+MQ5$vgPH%no4%>Y=;3Vi( zOBH$jf7uVwzJzfbDlhzQ^kC@U^x&}Fhhwqsh6CzCCD=>PGgKb)t7?CD*zUuD|6jdi zDIve%u)OkMi10)eFUDw*{lj)24)_W2*r~}70DryhCEsT+Qw58g{onQu+kH6TBl(qX#~2iyZaEj18YkLruShjQf`4gsL}N)aYf&A0n-fR(4m zrV_$s2Y;-karB>?-zU|)>Wlr`?yuU7IKax&V^bU9vV+#|-qy6yCFdDBUcqAjx4l)n z69-D3dS~c)R zh;R4eKuJxHO(k?;K*aLu{!p*j6)*NLiwZ%z83$N-dTjbb7bZlYTwHHEeV&Zk=%I)a z-|ohNo#~mWiY|rKl^$E#$Zt5{c33_3lJg9YvqusT z(En|3)$YgvN}irss>p9RfNEIJY<{1i-9sB}L_6?(^nL<;6ZTlW&FlY%ZT8S|UH)Z$MRs3*{>tHz9e>zn z55Zv|+NOFto9LbHi0v-@b>!$}0CG#=Y#55Z^9 zV@rEn$aY+r-AAL>Jq`C4LwD^Tw%J4Q8T8oF9v9kaUXk6``-Tb}VXN_H@374tg2SL^ zmiD-i?O4n1>+JyRi<}&GaM)%K!Bx=H%73{qu;I$=J_d0L8GFed9Jbj*I0QYmw1?kz zcphDg`v*!hN4kCg@a>_r25d0+41DawZ3obtOIJ=?gRnW$&uoKt__lcFu>Qa9wgY;% z*E27%?g3m*4?8$)_ve7Cpf@c4<-*X0tF!w;u%Q-~ENdRN`*Xl&&|}Mm@oWbg5Y|gx z>aEebQ!PZUZvU{|p94OF9$VVTZ#!JIb#d<#hJX<`_J7+uZ1?AY!=PuDD)MXx$jh*ENdRN`*SST-FA2$BZouHN9@Sa z82g9q{v2TC>1|URUAXOV)tSS+gz2GdLn1j#pvyOH~c z?fx84R($NlZ3mRu>ao-QP`L6LIQC!8-eJ2x2Pk=ZW~m~t|F0iB-0SnqW%qwOIBfUl z02xnDD-Cp^(czV|%c17uRpBI1&A0n=fR(4mmJ;&%KWc0x*h|kd)O@I0SeO2E_YYsz z_HWnScDU;7;ofV(*B(_qV^jUEf8&-^Q{X5NzXDRnN~(GU;OC z+(>uC_`i-ALO`xx>Lvac1T*>lkG!GvOWnKi`n2X+M+`y0X;`l@P&Jpf5`yApL|<>X zIa4?<`V(D83}N5*!!F5iz-HmMU9K_QiS8VTKtp#sV7t~4LwPL20SxXurnD0XQAt5%@pXfS2M;^;?u#y+M6P=93?yv1y=jQHkBAMX;GIxGeRo}W3ySOdun2XGPcXh*N7Gu)gKB1XJg=jX^{84jtPKgZ3j(?+R_*6h|gKL^}B z^tP;xg5nKE{v0>MPE?Q`=j2o8=YR*7#$;6_GaTX-fZh4oUrbzSuUW10bD)w0jm8>C zW;g)%WL!^oa(Xkd+q|<{=jVWuq+wYJ$qa|o&Y#omTe_RL+tF^V^K$@|*AKfS!vT}U zZ+nEk?KZ797Q4T;Yn`71kUSdGTTpOzk@h- z4yl5R_pv6K;_Y}tb}BSgS`%LZ@Bep!NEv&Yn`71s5}~$wUNwluoeRD zWW^hc-Cx_a&d&iz9*xQOp}@lt%~)1cUG7B{Wg~2utGUtcYb!C0A+XoJF5%*ejCeh zNbUSNogSh@8EOCDVfp`S9X$lovS0bsYmbm^!ietcR@@to-Cx_ajvmTm{+Bp^aktQ3 zE%(c=qlfZXw!;ei#oe-dH(r0b>*ygs7-(X>_6V{a5QyMomQF(5nsTSRyRG$HW4pDE z9>UQ`!+PxzWIJF?9yWTqw~7j-Qrs(V*E)I#*Bu(uYmXq?0R)9Srgr;wrw)1V9PVva z>*%38mhFI3lSe!9_TAt1$z<%_byn->p*)uDVCOIHjX0;7dl<=XeWO!agZMv!=l$F+ zspnwjFYaE_9e+@i{ptQnH|8Mz4_rBo=`S)twnLo1*quGSS|&G=&d-s@vK?07FYeS1 zspY=RSFQ7Npxgt!q`$%h*$yl47kBST*e62A>--#fEZf1zU)+b@`Wqsl<8^)x;G5`G zSrrBG|0uf8O>D1$2pzBUbL6pXhZXpX-NbfsiTGl#=5>A!7(euqtb}B?!wUSxPR;jf z!M*Knt@Cr_v1|t`f3ef?PA3#DM!j9@{2X~K+abNyfHW~4_UQAo+dykfR~VmJ#sz!NKCI5MxC_o);d2&9?NzBoP&p5 zejn;BI5HeNRkU5}{2bWe^r~JJ1=$WDDC9A<_3X7fT}(QyFspTbjy#s_0G=uy?a1|X z3c;;vo7Ushg3ix@UFKdA;!iYniOhBYFCh2)4R3K&ymNn9js-6yIn&b z?H(Op>--!@2cuW@_o1+!LzF+K(*txd>Cy4E&d-s@vK>|^4BROMx|sCn_*&=ZKun5W zl9iCmc1X?qIsI+kX(OCP?Kax2zfr&6#3>7i|%L@F(-ORgr zC(z_>qa%hu2GMWqCB-z2{Kfs6_d2;p#5X!(2$@ARCTk2?O*774+>O(#;=aDutkDre zfTPi9d}omv4lD2%JHxHh{4f*U!PDr7p(2*yumXRvvoE?+75)i#qBlBX2$y4eTfV=@ z3bL6oMhZXpXyOngC3D}J8uUVt> zbL6oMhZXpXotk%Qq32uOZFGJP)J3JaWo;xg9IX7s-Aa06vHNSg(fK*@ScXHCzu0Ma zvJqW2^Q_VNIr3PB!wUSxoth^{cAwVV==>arPSC`9B@|>hfCz^L<;WX%Z@Bep!O>vv zHab5?9?NjBRtWBPkGmq^s?hzl-RS%rfU44~dQ}u;IK(RicRGhIChnd;Yjl1NFiZBM zEy-|bEks*}L$Bt0Gto`+tkL;7@>qt$3KfE#ns;{-JxX@*%9$@o7=30Yz^ z3(83*1grhm&+n|(`8o1fhJ(FAaDRz+7n44M^`6Z9mx9&FI zy)0nP-COV0IzLAq%Wyz=jE7x*AL@0y(+R|gSL^&7c`U;rULn|N_g))N{Qo=D`z`2_ z`iV_FJ_jjIG=t!@wr4mvHSg2{)t{?ht=@0*ScZeWLatch>0Wp(2*;VCOIHPxtO9 z!mzz{tfPl;2-8b??Ga=Z-EZbp)YQav;_jeO_FWzl*evTrR?O?4I+{ubJ z9J{}^8=ap6c-~$pxYOrkFZSs8M(5`!V%ZKL12M{HH?IKZbdu7?Z)c6p z&w(f&y(B9kneAY&7TgZ-EZYG~#BY0qHRUY0ZY;cU_t$o#^K%rj zYzGh&@|fEB>Gr65LBvBkftfWrKL^P3`iU*QH3f@W+@{Uh4!xQuld%(+S)=oF6tQfF z6{-by`lr8}ApGstQ|IR>V%ZMHYQepZcRB$M`gY9iM(5`!V%ZKz+&X$a-A#z>MQ0t& z8l9h`h-EvhP%YR^Y=1E!GfC&?0KlKN5i6m<+hNXja9;HOZqnxh>^Aq+@3*mR2Wz$9 zP6@&3)cym+sr{er=05uUHkR!WuNK^mLl+aMHvGFI#{YH1P!aRLtiWIFPINczeWB7> zqa%ijnEwSyDlA5%zm7n}6F;gm#CIDVF;v7d9IX7s-AlSR#R<@T*=}^iP!Y>;i1QbB zCykFUc7M$p9WhkIG8|UmFK%}-_{_Wiae4z5v5pu*HTYg*q+VkL84h;-;@)uUwXu_{ z-9|?Y6|oElD}Ql!%#ltW^(VTH7%E0|c}e}{CCG4y^A~qh>f?+3iGIoHr(yg*mf-;H z#)5LR1=UTdHxv64eaj6yjQ__n99G~jb_Y+d7F=17-A3oHhQkW{#ofLod(nx^Zlm*a6tN5k>_&duBP^)S zw?W2YkBDz{evTrR;SjG7-0fA0FS`0(vqtCVC}J57D^v*X)J}gf=}Xhi8l9h`h-Ekc zf5#Jh_V`xH+`|KvgR@vqtCVfD!2A6KM?|5kC?2tyZWI?B=%n zY|HMn!|paZKSvSEaIjVg?sg5`P~0BcZghSQ*y!}CUKIry4)F@Xoj#|FiSx0}8l9h` zh-Emyk!fAePDZ?$*sFP+pQDInIIK`1xKpvcz1Z`u?lw9i64NhQkUKf}NUoN(g4F8}_mK{Wg~2utJ4k zr{G5?E^zqq&H+>8Ly+s?>#qoaq)nExftU)(J;#TQ*R^Q_U) zLuD-6p^bgn(0%SQ>P^P}5Yo{@Wh~nvweuHu#|~*`kYnKAZlj}z%2>8TYUD3=Cy-Mc z#E#eK=%F%}?GWWJ?oW4jfkDD)`-ZbdM-P>;Y=;&2i`~SM$=ILnI(n##Wjn0EU+m_# zeaV*cyFcB(-6^d>{J)H4J6QRPodwtHXLs}8ZeHca9K`<-5$O$$f-f>M+ab8OwHnPsu2s?p*gKV}H8q{2Z8ddP%;*$ZUrd_=~#@)W^RvrwMl(ou8wK zWjk2;i{0kw_6`zty1%v?ou8wKWjjRqi~G~P+e6r~{pqgra}=>`2h<2PuBUr1Z!-3$ zyUx#1#IhY$;4kjgpSwwIf_`egO$IyQ$F5s?EczrbbgK^mhBMdFYfk@Q@4mO zc7M$pou8wKWjolb1@~&+?N%!K)#&^j2$A;knR_J^WIL=-E!e5~{%+#V(cMPp=O|*? z4%TYH-JH@5#d$!t8=aq{h-Ev(s|9zGNf#5B%{*&#evTrR?Eo!mluu_>(8Z)rYi@LY zjv|)rutK%qZsYbQO`jXN+vxlpMJ(IFS}oXFaCAfI{@QMIevTrR?GUdP-05>4U+n&x zH99{>5zBV4R}1bh@!fFnF869)=jSM5*$yjI3+@zx8xE&SH^;k;&d*WAvK_3|f}M_c zs)*R}8l9g5_=#Q>Wvq&1wnMyHu+#2N0}(r3qw{kVv22GGss%eW-wp2B@fw|<11?f} zNq--LWkhbrw)(RG2~x8jopp2D4W1wqq}@-u&5P9gZ4t|MfDo`YTJBwTnwC7;qyGGS zp?be9V%ZMyYQb(|yEmg7{2egHF~u?z=0f3aIhiZAwv_{F{4u!H!28Ov~3fxp`pwax?5y5%6zL z-|GAvWh}!XULm*}hb|^PB7ULsbCj_R2h;`SH#~AZoyzlOVmHmRh0f1W#xfjMs1V#K z_TG^VKI3+7cNaQ8M;Xg-uvQ4}M3WFuJtBUg^K+E342O7y;7*^D|JotqXNzO{{Wg~2 zutJ64PSv?v7(m&*nqSlJx3LU|6)FUGik&ieyIpigkN+=p^iUb|zgYQ;-TL?X+3mXR zg^nI7WB!*of3aKkUM;&?U$cdd9x7wm4lD2%cgybMi``$dg^nI7W7!UN{^H(<>ov36 z9;3jD?H5OQ}I zV#izP=phsk>reO8Utxl5hZXpX-RbVM7HH<%eYCrHj8j^}_62! zPAL~)ccJrhl(B3FD}S-G;G9kXAzeGw+Y6naql{%cq*4CjZm*KPh|3oK%@#U8M;Xg@ zSb@K|Q}f`!-{pQDUrJFLK8+`b+`CY^GCeCqrhWh~pl$Y0!Ba84(H(z|_v?S;U^lT&2~k*cq4RT;v22GGss+2b?LHHb>bdpQ`8mp1wu7}=u(RO$ z8%mFkU+DZCWh~nvUM;wj5duE-==g=s&r!y*9ag9o+^HXLGWKd-=jTAJQ750vvK>~a z7ThWJ{%(R}*R7|{&r!y*9jw)Y+d<$LasT5~()L2<=RmzadR4EAf^3I0%AeD{6J1Ok zcld0f^K+E3Y=;%91v@q0tA$Cg=5>CKGM4SILbYJ0=KH&e^TF;e9;)AOW7!VYYQf!T zbVKRm|BHv{_uE*uL%dpWr*r6H;t;%d#Q49C7^-6amlgPnyQTJKV)xfjnA z`fCd$kWcg#_>0}a(`(}%5x>w8Lscxp!OCCUtvTuBzFz!7M+{Z542LLxaetya1qD8* zR!_|qI%24bWjM6nZd-;!52NW6a?&UI&K5disETDctiWIF=GJLroR006-Gz=As$v-q zR{r8{_mEC@duV&1BZjJ2hC`gcxI1Fpsv>`}`)juN7N?&E@&788;jjXKakqgy47ESe zU*Luv#Q&>UhQkW{#oY!XZS1nab{9H7M-|I(u<{poyN7ggCor&}bbbyn(DkaQU{xeD z9HRWiy}jlx4k!ZG4nAAx{2Wy*!(j#fVt1lDwLpcw7CJu%bR@kbUt45`!wUSx-E{i+ zV)xd&3!R^%jAc0BqQ}~3c|6L6-rNKuzSwE}?S;H4D5x>y+Im%du!wMCGyZ!3pi``$dh0f1W#xfjMs1V#K1Rr1Q zw$tuH=jSM684eImi+nn5;Elyj<8Lo?eh%ozUKQnD6@~R2q7{PMEed1b{ZF^J(6>52 zM;Xg-z-;jw9vLt0LoeN^n3i76>--#LEW=@i3c*gz_i6#a0{q)u==>aIEW^QCA=v47 zrxS<~Z=v&Zl(7tlc!l6j575Pg;{Q57M;Xg-SfN62r*`OK(lg>MbbgL9mf>Kp5bV@E z-A&x>Z+D^dbCj_R2Wy33H^c4#MM>v&4{a}WevUGh;eaeSmY5^(bN$7{Sx2+Q6V>}| z8Ov~3p+az{3CNM%6NfLJpx$p$U8h$<87m=af38p=xSLfswZQ*(GIIyd|3B2xLsiWG zV&yM(r#tCqce)?y=%Fg+e~I%KyVJeh(op42u4acidZ>zJJFLK8?3UfBW$<*iqt6a? z^iUPcc36SGxRa{hcI=}yyN5b@sETDfSow>c1?O&SC;`>J>h__I9;#y54srhCwtLWa zy8m(WJ3G|TLscx>!OmaY8*%P7k@soMhdO!)VWVDqjJ@^<>p7GHVoXjgH9yK>B?+5~ zs|fz>E?)1H)*$|0#j+i6#bwQWgf-<(_g+8u?0AdUxiJUv|0i*Y-l^=RiD=UX@jm%yx+L7k83L zzHE4_@o%=!`8ldswgVp8D4*TDw$^3)ub?Kg=)c0&G%}dXUALU z{2Wy*+hK)j!A{LPwUE18!|p=o=cr=Y4#sN1y#+@%lip;~bFo_*2`*e9KQ>iis4 zEZYG}jkVG8`;fbRdBd@LS|ie{uWf3AypyMw=b#h@mRxf3foy_l8@q zjorrHJ=76HRV>2+ow4*T*BHIkM#iE$(GPXRP!-E?i1Qb_6TMf(efrh7wRj~|*75IzY!Q*ad-~w(p)$XB=7^-3!4o3ds{zPwwYQetlL_gFK zLscxpAyMtsqo=!ZH#2Ryd@kK&d*WBG92Rk#qLD!HBgU;Kh*g-s#u1@3jD>Lh`4uznb`d`JJk6( zs#u1@3jD=R&3Eq%w`;eZb`N!ajw+VnV671BbiC6Ec_05j)cHB8ScXFu%W&wmySt(F z(ahPQ&d*WBG8|T@5bP$ldsPrnbbrkbb$$+n21WmzSrzf=jW(m84fE{2<}AF zn~B|DvqPPqql#rXtWY7i(?d?(65s0Xq0Y}y#WEbM6@r~|Cu6buYx_{==cr;C4$%t1 zy>{<4P>+Z|)cHB8Scbz26@s0b?=L1jBK}b4=RlETC!fnQ98emT1?A|7k5ltb8})^z zcMo-bjw+VnV670`jYc<=9ua@2^K-z}L$B&pQIO#fuMpg6Ao5@LuA{?q`u#SR;ebmV zPwdh6Avdx8#iUOhK0K@6Z(|t_D^v(}bL-A+*%3T%>QELFLo!8n~}bT z=k}qF9va2`FH!#D{&aV@wZ7KZ>`+G!jbhmjEASV0%igPH3Tqzf=pke$_NRNv@u&c8 z0yEHq@{iOW&cof?j(`ocyKVPSM-Po+*$!6z;%-lS!?F8o`%p&@!3?2SW$lql_N^$o zubWd(tkC^6JJiubqgb{>JFR|VkJcXDcJ(G>f4b}Fp;0W`VFmu;Zr_q-?$Pmw-{6$i zApSp!Wjk2;i`z|;kbZXO-}d2EZp=aae-z7hi1HWrr+crKdvyGv&d)K5Wjn0EU+hkI zr-XVM$=RXK&oPQ+JFLK8><%HPjX(p@&h+k~&d)K5Wjk2;i`@y-4uxgy(eZ~mKL_sf zy?hp|ie$D!oWIy>f&R^WU6X8)^HuLOI z=jW(m*$yjI3+_b2n~c4h*ZDcBShm9o)q*>P(A$oEv}X5E=jW(m*$&of!OnuC8%p=T z+lM+oM-|I)myJ(sB=~yy-{l#&JOh_!;PMPyo`K6VaCrtU z&%oswxI6=wXW;S-+>vHrcBu1nj52cLEXi9mdEwtF?CH(T&$c`Jw#~oV{Nm;xZ~p$~?{5D3<}Ypj!sfr<{F%+4-24}t z|7`OIHa~mjJ!c+q`Ug)x=G2d!de+JRc=9DDe&@vg@xML(rsEfm-E!zH# zeEy*ieCS=TxO-VXbawqqx38PMsKr4$&j`Nk?_GB@TH3Z7Zv6IZZ@TH`8(($p4KKR+ z#{Jp#ul&{CM;Uv97)0rt5CrxqfdCnbdnPz2U~a>t04fBn1yaZThh3&I8+F?|$EK*p%Ki zq`Fc**rQ+<!-+(S?fk=g!cz7oq=AR4 ztP7h}*3rwodut0@_PaPMdGs=}u$fh2vts{V+QJqzXu*RP@rAWYY?klbQ(IWlP8n;b zEMLB`R*B8BgL`NTOL{3|y_99BKtR_~0(e!qROgNm(zYW9!1E;%GZkJ*A_) zJ1YxIx1S_s^^}ex3!7S%R9fxdNm*FB`yd|UB{r?13!7TCQ(Eq9Y70x+DP`@Hmdh8` zs+ZE@U_)D2(n~4prL-_FtW`aw`QEy=u%w<+R!?c{@|0SYRGRHy)E1VM6#hG1p3?Z` zDYa^+G~KzNEi7rLl(kd3{PL7q^-`J~oYxkX^is-tDK%f7QmcAOCa*r%&2e}uCd6Tq zc=d#6C<)KiE@XwD%p+s9r-MBx-u!Kwlg+@@RHid*6yi_G${r$kF!=)W%VxR4ij&qM0$|Ie*$)|+?T zxMkxJ>z`eJ^u?dL_^b<`zi{pOFPz^$_ZR2hc<#<;Z#nybGe2-_4t^6NP>V;#W%W-^rC;#iN>uxceO$532H= zuWyGP;1GykDXF|S4qunvk&?iB9y|xjd?wSk2XPeDG*x zVLiuE>0<>+X^v`|9ZcnDZ=x)$=Ugg%%pfUaQOzW9>>jb3%4&bCEUaf-!hffQE#py5 z=>#}ZV#{)8q%AD@TuRnX<#MaW3@|B+gGyUi3KEp8mr8R~(+n^v^Sx4ASPB-Dte(nP zRMQMFDYN}TTUb(3`0svU<5o=;gkz4{Pw7ssEi7rLlC@L0JgRASFqO$crY$V#rIPhh zX?6)`!CmZ#mn|lHsj{%d!ju%Oo{AVGEVD{%G2Ty6#34hw1p+@RIqj`mS3LC4yGbIc%-(lq?ZcTONIIJlv>qO zk?wt!wy>n03RX`=?DAv=m=wwWS85AON(%qoFKqns6k3W$Xs3L#^9W^OsqRsdvv$gt zU!KeWlYD&eaAjesVW|KT z=-YYF_~prL?&RgpL$!q^?Ub{2%9mfB%;rvB96Ur@Skg;5>!sX$c`} zWVtP4*0Yo3;Q89Z(lue?^%59lt^Pu@o*hs2zDZeF;y_Bqte(a(TgL2G8jts%rz|Wn zASL+kw6Np2En{{ojYm7*s4OfoASGkgPUGdajM=R;t`4qI7M945k}>P0vDua}yOqY} z-X(2eNj;5OJ&j|wjM=R;F7~&yg(W40|LzwyZp)b6O5=QIsx2&Or!i}%@p4{c3Q z2hY_Omh{q?_0rgE%b49t<8<#i+QO208nb#D$7~t1TWOr^U#%@HDJlGSzp(Mkli95_ zn(RDVSy-w-l#EzAjh0`Y%xq?KEQTG&?|xz9mnXBiGfH=!tSu~Q zrx9zX(elfa*{w864xXefEa{~Y>!p$T@?>@^Rg=9ZDho^HiIR%dQx&^Bna!PQy#EAc zVW~J#g8xnnTg5L=X17u`+W7`$VW~J#Qn7ZbmS3LC=1x_)`2UZsZGQLW`o@Q0|Nqc> zaq(Zn{{N*5-+cZLVE?b3tIypP_WvW!eD=(vVgEnt)aOrK3;Tcn#9y3v5(ufcye*xtO$iwW)El-L09h1LjPBT;yxDo??zyg%=9VnRBA)E9DQIsmjq z4Pm1$$GGXvyvvFSR$!>^ki?|CB+ODmly$3W(2_3$l@B+IYu4t&)04*_9;;U;EUf6l9W>d z@HMuwHuM1oE%pzzh3)SqDgJw}&+#?Bur~JB~bZDIRcNy?M}_!?PQqx%YjCi}0}7M9c$rv!jrH@dJ! z_f?9N|DA7F7M3UhQlZIOOya9 zQv%>?Y+fFkEu*N^N0DFEJ$mn9-~fYjj^Bv^Lq>Q5Kdc0YHrA4}v6idosGO zFz9&y24!J=2_j|#099I4VvX)A3_IG{RuKoH%qCBg?|iGau%w-s z2LR^EQi(NNJV|!&a&2KrFLCleZXMQzH5)uhx_6znu%w7nXI1Xa z6~s$Q0DpYp;iF<^z3~9RKUW|xDIxsvg-12Wnf1y8g#TQ@yrcy2#}^)VxuBUfXq6qz z70^rS%O^9&A8l4iGe!WQl+(TWgB4Np^^_UppGHCcW*rI;|6HMwOno_JM){*HemdHq zr8op8J9C9cG9{QlzWBvG0_JblqJa6&6(Y%$aQ^th!Heg~v0SEN1T!@cwg!Q8FdKKfd_kqkw<2`v`#kxk4$K z65=0Uc+|MjV)h>a@jq8MB~ya@;|q_w+-fm9kO29gE2NSsVgB)j$1|HPW)D(<3zfnu znHFTXFz6p`k?XAy0R>wG7b=BTGA+n(Vbnj`qGzrV0)=sLp;CAy(}FA)hW(>$spVpJ zA_4Y4SBNFkf=n02{iDr#p)z}sfcu{-%#vwAwhIIQ(H2`=tFIft{<%UenHJ<>VdOvB zqKj+xBLVq8SGXlpg8t)+m!u=0|7J%Lp#O7)Trwr>KfdsI<_neC6aw~t-myy#!Zif` zPZ|7=uaU(yyOIFzpLfiX!zgRdw3ssbA7A5(YrSCr{Xg%xC5KT~(o+ooPmh5An@u6W z|L3O_r$bH}it+!c9sh4Og&h9>!E2vc+kE@xy*54u`~RP;KL+vtCtdi(3oktXd$9k% zbZ&m`0_^|$o%zI>D`EeC{i&Zh^&Hs$FFEmr6EoQVhsW2Cy%+ZXXTH4V|FE~6xdM1e z3Ez(|UIvc9_nQ?e@cnZI@sbk2A76Mpv&@+lDggd-1@e*-!XICF_?XO@{qjKY&lSu| zN)Ug1;ZY4TX9I4*6%CnFKrgAUY0MaZv@Ja?%?cG5|M^vl%=vo84DwIyAb+z$1;~G{ zkVmAxz%irz(e~w1s6g@0750eK*EeREKia;03KcN_xk4Y2`U1y{^GDnA#kI!yf%Bg$ z{1K?~+{0EF=#Mt*RmiMR0ra0M1QIDB{qe=iP!#EJ_ALR)KUWweQiA&93y&H%V$2SD zK>g}9C9prf@OWk;#_U@H*nh5&NTj~>F{Ay_7Li=D zZwYArxxylm`pU-)_eWb)a?QRa!2Rb6jYR58A2Z$`ZA&HB2r_MpCmqZc9*NYKJ!Zf^ z+N_dmM44iE7%>WwL^>uTh7td0i!H8IT>;TQSC}NyF{vwt{G%#80R7JuGKrK>|MRG<{iAKEeM1o;2_5v^%5a6b_KKV|$szAjx{vnfRQ|BtS1zH##|8}Eeuf7^Nq{{JUk__+(u z1ONZ^=l=NIcb@%g*#CDs^WifOg#AA{^^>Qrg8lz|)c?B<_Wx^-pE>qM*#93LTI^q* zzxU?~@+2d$eZr1Sw8e$Ywhqap1=}molZ?Rj341irwsakg8WiI}$Q0~JMqqo?YNY*! zHnWgfS?^k~y#hYT2y8FdOwjh_(V)qs1>3)@-Rz-{FC(zMAT>c-d~t0)K8yctwMPZA`2yQQ!OweXtT$={P@UU7Gx`@nE@Uu%3ZL!6*dFL@I ztQLhYBFp+a0qV*69c=*I8@=-w7gmcx7?EZDWk8e&McdNFHEK|d2dhP4jL5S78qnqo z^@#;&vo5aDnS|kCwJ4MkS=QeMkT%4_$E+5kcOIj{YEd{NGS9=$xj|@)O0E& zpDUyh8KLe)nNYMXUtFu?0(C!ESR*om-HQsLXfrRaRdNBlpDVNx8R70lrBJj*B-iLn z!X(3LQFtRV0^W;)p=gUQt_`S!abdM6#1WY%;MBeIk<6sH8C z&APZoXA*{o)uK>GWGN{sP7Oj^WO0qoB#a8HMd6OfJRd)22%#;yxJG9Z#>It7A&upgtq02YXd4_JY1+0_J}O)trk%!1a0QUwO%m*zn?4g5gFm{Ma59G#TM79u7Izf zEBp}|0q{lPP_)Gt*Q%!gz@IAw5*Z=zMafXKEni&g-2w>wxxyfk5d>dU3q_lGajl99 z5d679A(44LeohfWTV!#KKqZh8;XI{ z_WwQ3y!Xr_VE<1}eeTrLVE@11#IK&%f&Ksb<7>y>4*UOOUmkn^_Py-Y3j8D^B!0ra zOteMT!wC5VfnBj*EAW$ykoXCEG|?6nHY4N{|*rSQI=;E3+ zDUkTJ!r+h*5?>IRpl#{mnl&kq__e~|kP#AJaG0RYy0~Ud3M785FgRp{#1~v9Xp1eb zRg(f}Un>j_86ojSVGy*%7uR~@YmxX0g+oS2eAEH)8vF>-eIr5%m>F0v3Wq~h(sh7y zh0tbRT5fVS)Tp_es7uW1mYLWN~kwZpEeB>1Q*Nr0a%}%8jiLWp@ zWQ4>=RS+IEZoQbDN-Yv!p>oIwiH~|9JnnMq#R&OKCM^|IAsWJ5y>?>m0Bde!sd_>5Q)j`l^UR>)HqebE?qz)M&@kNyo zv_&M>YznnVe1+8^^E~{VIE1#SLl|3eC8BETTIlfrSE{RuNBlu<^lLjErPbVs9D4R0N~dO>Ll|3d?pn^+j3E} z>QMmj>xZ{{J>;LuJOH2hM9^klT&o@h0KZnq9WoEVXI>Gs#TM7&PP55Q-7 z5wyh@*LvRr0KZnq9Z)HjDsu3Z;0OS|*`)*keyxx@WFCOe6+qBtUR7uW1k0sy~O$Q@87ws)3uq7d4aO0L|?BA$Q0KfDhaUk7u@Bj7e{JT{wdka)*on_z3Fq z@G;xPi1fs$a0V;n4jBRP5fS21=?%rTaG_Gj9WnyoBa?{7UG5AvyOaRHuN88Ki~#rvrwF0V zy0~VS5&-zMLhg_e0AExGL0e>T%`PS2>uZJFAtM03C=!CUsN|YmN&w*33b{i@0DMs% z1Z_(t*O>H%>k=+h=M__kc=2czm=iT^6H{ zz9YY{6%a~Jczh6wkc^|vDs$#6V&L&>1%#3l9$!$5pe-VEMjt&mEg^FX2qh;xz91Sw zTU6$Z3Kio*=APWjK$!HB6CPhMjG%4l;u;ky#)Hf$E9Lhg_g z9$%E{KwCsD8+}U{6-JCg?vN86UsUTrTU0F@eM=Y@MvOx4kP{wX)agLm(#19UmM|WS z7=_#+Cp>=2czm>37uN_^!tgL+6mo~0@c5!s2ijtbYgJbOv(^f^Lr!>nQLzJU@x`_I z!11;)ViaEsMF#1RuqqK^etgjT&NUshn(>EDdX|c7GGSeSE94G2;qg(jgGY^DsEno%VpF(KJ?i$RQ095=xNZm9mP)SC6vB98XaC=U^Z#oF ze3BCmA9Nyq9Sk^pvj%N(_zL(WCmcR_M0ngIgv=KC2PHj(OaY(dgu@4e2#;qMGFx1& zW5ll&@JUWMd=QE7@KGVN-hEme{wvze9_Ih&gu@4&2#;zIGOGq{arg?kLryq+K`DZ^ zr9x)bpe+txA$Q0LhcC!P&}Kd!ZQl75@YTIqA$Q0Lhc7rq&=$G-m^EmN!&k^1a>C(@ z0v>3KACFcW4LJN-A$Q0Lhc7C3pl#{m8Z{_p7gmcx?vN7>U)1bCn{{!`&LrUQYlYk) zCmg;g-GR2q;+maF0I}8zxkFAkd{pi5l54=>o1IC(;nxbeLryq+l4H?>rV4 zR*ORJkP{9cg*tdVvm4m#OaczSR>&Q4!r_b39cYU!uGLfn4!>5&9dg3qivk{Ki%PE9 znFJhut<9gu@q=JJ7aVa;?@GaQL-C?vN7>U)1bCn^|(LiV8UVS|NAH35PFAcc3k@ zxMpV(K&-Vw?vN7>KV=*~+M2{`;(A$Q0Lhc9Y& zpv}6tW@i#`__adrkP{AHlo!h1?-09KI;vfwuVKS}#<<;nxbeLryq+ zQMm(c%NN(Gp@74$6>^81aQLET2inYwYrRkbhhHn?4msiQMd=Q-#TM79u7Ja@6>^81 zaQLEt2il^GYc_>|!><)`hn#TuDA(c7z$0+@W@i#`__adrkP{9+WgI@*tcz=QCIN?E zE94G2;qXzfgI_mxp)xy@5R|H)p_oF%bBC%OJZjtxYy>K`Mdkki!T zH7StzjY95_6A~Xm86J1J^jJbE94G2A@N1Y4zw+mT(eWDMdB;u4mlz5MX?UFStZx(RBDm< z3b{j0NPJPV18ottY(^-xNPLCdAtxlhsN8|Js9H8Vm0BdeLhg_g5+8Ls{H8hriEqq) z1LZ%g7lqs*CnP>9bntlQ3zfaj0jw8=+#x3d4@X-sExy}Ks7lqs*CnP?~b?~V1 z3zgZa)FSZ}a)+Fd_^8vt<1W8YnVm{45?>*A$O(y$3LQM2`9fuODz!*_h1?-0B)+KG zfwqX`8nfRJ8-zPpA$Q0Li7zU5pe-u7W>cs|;w$71IU(^y$quwFRm)~ms72x{z41p$@;dqC9tIQ>ZHLxcUE$f;uS(fG;RT&}Ll?qaKBY z2-`(LofHJX7tA7Pi;0>|k788VE(+?TAOODL7eQNG)U0d=0N^(Y>ZBk5z91Gs+j3E} zp&js^uw5S6?)5PLzaRj9!jvLtvo5Ytk79V(E(*CrK>&QgEP}S!;@VtF7!|gQLheuy z06$^BCfeeQYcotSZobnfH*^*hiOU0kC}2^rCookk&dCEaq)N*Hf^&?w{%1p)9y$quwx7uV>V$MB=QMj>}72!Jmtcc3k@xJH)}Mujt2A$KSU zfG_HIpe??*R>=hbzfs5?3IgCKoG660<%?^TTmbMJh1{Va0KO>Mfj0BvS|t|%{6-;n zC; zoWTmYLqP!igj0mjW?fvPO9>dQOF$%0^qkPk9`;iZSlpm>L~#58-?7VAOJp69(mm57b>Gm37ZKQ zDuvvkAOJp69(g?Th01IS0f65qUW?my0~VS z5&-y(Lhg_g0ACdDK-*HuHM^8hXujFFy(yFv&z&gQfi|n;8ciXDLP!5#^8XtJgi;V5 zUoecIZRxTYee@kKexra;3c}+HZV|Lum&NF#$MBFj1%y%%9$!$5pzX`0PyuFb6c9>5 zczi)Lg0`s48GZEN?1Icay_JD5|GywSzF-(Z+w#S=UipB>ZxnKeg7El)TLf+9#kJo2 zfX8nXa)*NO_<~{tZIQ(_0zWayFk%#Phl23}72#+u7cc5+Q z;u;ky#)A=~kUJEF$47+5A6IrfzEPoKco;DXxkEvC{Dc#S&=y-<>-`US{6-;nCG2ocPI#tFDiGS%__M@A3eZQFk%#P zhl23G2ocPI#tFA8{|EwZ>~-x8d@jY95F5FS6_G9GA)FRoQ` zfyZwYa)*NO_@aIX+LkY_RdRvHZxnKeg7EmFatGSXi)*#nfX8nXa)*NO_@aOZ+G2}q zmRww@6mo}x@c5#P2ioF`YrSCrkKZWd4h7-yMg0!6EnQr*ZwYw(Mj>}72#+r+cc9I> zxMtrH@c4~F?obdOUli~_TV!#~z9r!C8-?7VAUwV(`1pu@-dF&LrUQ8-?7V@Hl*~+<~^Gi)(f!0f*lx`Vd$KfM3Lvgin+-flbJ`sw5)uNC)6okVUl{?V3d~vNB3OM{m zA$KSUhmV=??`g;3o1IC(;WrAoLqRxvOb8Djvs#RRPxJ|^MIm=62#1f@1CMIBP+4yn zz~MIvxkEuXe0WZI+~pT4voi@e{6-;nC@Zze4#Q@(m_ISp;E{l3c}%w(j91vEw1&V0UUm#kUJEF!xsfS&=x&&&884=_>DsD zP!JAZRPI3AQnhS$CIN@vDC7z9SXwXi_#rvi!83$nFKFy zbG2d$5zn0{;87TO?#!l8mACf)y-xo8+UA=#FKm2pZZ^|@0|!~etOf0t+A@(f&_fjis`>@O5@hmw%^ zqH+h?qQ|6BlY-!Sva?Xg9ZEvti;^8^TYgO1oJtsPe6Uc+9ZEvti((yUGar*SrxJ!A z?JX2?hmw%^qGku$V#lP-WW%V{{z4&lC<%!#DtDkQzPL8062>ie77DpTNl1KAvIA|) z7uR;z!+6EPLLql335hR?b)e0>xHhK}hR^pF3b{i`NPJPV18tGTH9D0rYPP>n$Q?>T z;)}{1Xp1hc(W!)SVZA8i4kaP+Mad4dEnQrrQwih2dQr$7N^7?koclx2ile{uFT;)`M(XtOS^(W!*t;SN^F9ZEvti<%v1 zi!83usf1AhVo}H)N%Hov&}-#5Ro z`8%7xw)vkne}404H-CEbCpJH~`A;|h@#beXKel3Ya4&L@n;)jsO@ve=xZoF}$-Z;HNE zKdk@U`oCHKm+L>e{=@4(xPI&UC)Pi_{=W6^U4P5^8`l5P`fJu_>#tmY#rjLuUyx0z zB-z`)>82a5zwYK2p@v7bb;a2SKJcOC1Mm5Wo7x66m3`@K=AJ`ah7_tRlKhHRY|=+3 z3I6Z-d;2fhxqddg?q%5oJ74x%PQSYA_5R#zLA`nxYyzMg^$MY&Ua!6BrkihkRof{N zTV$uj^iji3bL!OlSCP%_JH^hweWxg4Av?{t9yxsfj5_sBRHSVCPJw8kPR09YVXxWr ztA>x-SDP2S4GHoo>HQH#pkDd?p?l<`rqiz+cAE5$T1?2_i$rVRDR2n4?-VO6J59D8 zF?fFuk^}Cx4|5@l+INbQ3e+jT|9BGgnop(=A3Umu!WBNMgIsCftJqck{m0|5)A95x zhMiKCcjik#2DI-~+f?IG*y(8NVZ-;YXi|Md1_{o-Q?aSU`-f7MSJQ_MAC;tvSq_lC z?0Xfvir+sJs=S;&WY{SQ6?3v8N7;9(ZK`o7RC%%W;Nkm|P%-5yGLLw(R-e>rzBvqWVj~;!mVps9|heDNA(|Zp) zC86Tlok+FyoobtE6bcoNvwIESpM)y+X{^Yk^__}MCEh=js;rpabNHwvRa|~E$ylny zuHyF(g(}OZ_ZW6cLd8Wjkqqk}RozsfP$A}b_u>1KP;o*$Qe1tfVpECt52Y$gr*|7Z zDoItwQiVR1UB&Mo3RRX&?>gv|e2XHD9ezmPskW)AP^b`fC84RLFuqH+X;YgNe**_{@B#VpH+^m!VW8lj+&ESJ+hd|ID-S zdB4v*2%q=;^v(FZ&!?Y&&wGFR9{9Z1r(S{2dw%NC_`JubF5>gyz5T*{OxzX z1fRe4&H|sm`Oee${EhE@9zK8ldmo9CYH|BrsXTSS#`20V<8}q;R@7|30U;DRj#{94Sn>S*0l5g!%aK?|Ey`}|DV=3 z;q#x=?fm~x-Om62I)6Dne{kN;{}0UD`Txi5^nd@AZT$brwcFM@{cC&fIq9nlo#sZ##X> z>9te0ox0}K+R58aUUPEo#BC?8Ik9&9w&T|vUpsc&v1^X4q4O`#zv-0&`S+QD%u)}F z-ixZHR4&{9f&Eupw*N2N|DpfO=dW>Hm&^7)U#ism&p5nQUAF%(+y8^3%2S(Ow*T3y z!crw})&8nJ3We&j{eRj19|#q}e&F=-o9eRt@9)2t?SCJt@k#~m9~uAo|JMF5IsQjD z7g4S(0f`v7Duh?bKWZ5|)spe_%wYc$h(%N*OF;EQooY^{GIXi|v~;@bl;fX-R1u}d z60mbnr(#nH|3~OmORDLq;iHmQO_*v(qNiTPt%~12bgCug^yIKpa;k|?En=0_sb>F` zp;HaXX(xv7Pfj(i0tTRo?^Jjy#ruaxRgzDS49Jv_ zMg||#GbTLnoyxaYuIZt{ieaAQU@{g(r6>6-8osPG@aq#|>eGO!*KdHy8sWhQN>C~{-(e#?( zqmut$$p4sCl_FHAj2d=YO)m{Q^_q&28e`T}icp~xYS?MHwKaTyuc;WRF=kCAHkDT@ zC~g||T1=p8>sC!rEZ zjWG+A*i_>EL#Z04)2oM%O8$QV)flr>iCx9-9}3ktnLc~asZTc+NR1H-mA0v}P^dq)u@<0 zb=WBhl|X8YSg5p3m4!kz%D0{}e18%ufz%kWP>D??-anM8Q8s<@@KH&s1XN?hQYCg3 zzkeuHqjdVDVW%Wi0;w@#p;9+hC{&|l>xqN+_o=Z0sZp^|iA}}tk9vRdRIMh{Ck!6d zr@jiPM#WMkc9nmBl;{dN9Z$bu*eMB>@YGZ+RNAIWL!qihTUQO=pM**vH7XV=v8lxS zhf-Bl)5i}Vm842QH7b@Wv8(v~L!qk5>Eni-l28exM#VyEL#e8g>0<_u>IsrF#y^!TRbp59_eW~HT&i+1ee|$Xy8j8J zM#(~@Z7L8$2c3?$CWH6)+{VIFQ?gKrO~vmIGS{Hj(R4g~RFWzI)hJo2#IEA^4~43% zrlVn}BvctAHA)sLZBqf%8a!#aRSn;tgi0VaN){@ysl@w-QdJhya`>nuRRXF}vQ&v( z#qS>qRhdtVVW%Wi0;y55P^p_L6sj`Y%7^byLM4zINa7;Q(G2e*aLY%4C`jI`w3}DGL?o$EZ_fQ)QD-sEWx}GI)Q_)r$;9l7BD;QKw>4 z`S;Hzp;Q&)>DLS&m5wT5s)6i-dKJ5h-ai9(hTN(}$Nzuj+DFzlZ{2L||64a&`~TMU z*8acsVr&23dZD%dZ$01I|F@oN?f+ZPw)X$6XIlIJ*3+&1f9t8%{=fC)r4xU5;xi|n zd;G7DfAaXVkA3Ob$BsQ??ThIA|NXzMxdZ)pQyO`#J?N8#9&Or7`>8~sZ~4~v`E>4J zKi)xyd{ye!i#^(Wx0)=4SZe%wI`3dV?Dj)GD)s7(NwoQ1HDMYD8VbMOnmfOrm;$e& zyfujoRNt#AP2)gE;pfx2^Zbb^@Jh;Cen>R+-D=_#!m07=>3rb%L#}8)1uXh#^Svta zgtFS^G{5H*Hd2jk^bqsRU`^UvBO@| z>3rb)LqaD_ftP-?`Ciq%73eAadTZ{ye>?>qQq9tj+)dxB*jp?JA*LEXpU&4prGi9E zdRXtOL7VSZY%WqNs8StFK{lSge%L8V7kg@udFeaVwpNI##;>qje_GJzux-J z;ro+hv8M(ZkG@l}t;G9>qJ^O9Yle?XlEtnXz(0MjVq5Y1hk})5)9)B|N^-@X8e}W_ zPSs5n%2k$by?XfmBv$OHX}hH3Pi!jj{-Im}RQ>IPNA;j;cGV!&(Dy2Km4E*bRE=Lx zUp4HMgo-^i$S3rjYMUy=RO8oM`-Ar5cN zVA=J(ie1I;9|~1kP4|YKl2EayCKaG+ZBtdDP@#0s&BOO6p<+)Bcyj%dicKZnKa?tf zs&|KvN>atH8qnYRUd68B_YZ|C&8IgFJ0+oFPYuw|zEgEmg+hgDIyVmApM;7%HK4Bb zor+B*-anKofT~|Pd{mMucGZBO*7quQ6~BKdRB1BZ8FcDF)$FN3@hRV_wy8o)HGaKy z!{GgWOqD$~sfekHO~vmY237I%>GtqZ>8N5?O)7$_VpsY14}+@s_4M0@osv+orv{bP z{rhX1stkn+{0`R--=BnvJvFI_sftY{-anKo1XW)#d{mMucGaXJs48|9zkeuHNiqG_ zVW%Wi?5RmbP*vMhWhhjr?(f$P^gl0`m$lCBvkCF!T*o%RNYjeP$kLMOT+g^_I7{&5uTa}3zgVZ{Qe-A7I$ED zF?~t+s9-&%UKvy~;iF3ID*yhVh7CF$Pp=(zNq*vqpcSY-=Bmk z^Cnwcxs6szY%1~ofmGq=(-#dNm843zY9@UD6T6DvKM*QVnND9g?39E`cxonmRB4+E z6+wdcFSfpA`2HkR!c#NhlS*tV@&18SfrixK|6kkM|63b>v+?PT=dAxF^5>s*@h>iZ z-^Hh2__GTizVMXue|-Lf=bw1)56`{t+*N0P|LnWZe*Kx>JM+C~9(($CPrnU(fWLL> zEvKrJzkc#fC-W1(a^ejqlH~ST6@gMsQcz4H>^7gyomY>oMWTTgsdBi~`G-|R zrOnP4tBPMw=L4@^O6<sG80PzP;2FBeQU$%};^0 zx3WMD;n!PpmoX!zfb@`X#)yci*jr)>Lis|-bnZAa#1xPw65bdQGZmYQryvxs5yWf2 znE_LV?0a^`h@h#uw?gq6Wm|Jcnjxlu&UbdjrXpx6_Li7}P`(f~oj)?v@CX6Y!?Jvd zu&LNwJO!b6jUZlMHR#pHO&K?ninyt^x5BU~e!VsS%JASwecV)dV?^9k>@A*xFmQ@4 zrt?P(AC?YzM$V-oaw;~LpMo%QieFFX4!HJ5NxXzhMnq4wy#Lis}YbpGJs!;*XnuZ)PFip|AS5QDmIs&f-r=d zmJqK2g+}UQsKPNLVyN2Q3PY&qVQcOWjDv-;4;OYI_TWKvED&+zuU$hL{2pFX5aKVN|iV#1w?` zg*fW`UIRh!#aD%UMg&sD=He*`#jAvP-E-J0iI;HCh)Alsw?gqMaXY-nKo|G|s=_}b zBB^3;i75!>3!&8c-3JfrW2wSJBVwsybI}x_sCuwo1;p!a!(Pck6&@N9OjY(4N~edt z;&yn~!4&wirou%df~jI}`6+DWKC!cxePlnb=!m3PSlRaC~hJAC}}x_-O=9nb=%B1)+Eq z5U-74uXKC~KaJoiQ}pWNC1 zPw4Fb$9MMs*LC*)V>DBG^w7Yqm-%2R%Qo8l!z$u}nm}Q%xJEm_o|ppCLoC{v z2$pK%QiK##`1y41JbPjaND;AYXChpxhzpANgbxcnH1Ow8x6vvIv;YopVo20iE-Vn9D@ORBSHN7$~L!}d|hWvcG2P`pq#X6{mEcnW;TH1m$wOoU9u-r^|;W2X4|bncR7cnX+_ z%E$LJ5i}K>%TGZVG{vu{^Fh)~=A)+Uibb2B0&Q=FK~wyCYyQ0O=*&13)njMS=6e-; zOH4s1UkIDdzcD=IG9s848BS>P-HOe{QxIIE@$2dQnqjXbUhJ;PMBr50TU97tDX!6% zhEqV|#lX2t1Wv`?5>pVI^Z5C6zBPPUk}vkxKo0yAh|R@Q5Q-P*sOHmQuOwa!p2pkZ~Gka)4XCKC}fGhfh$$fXVt-90VyI$s@f3vOg}l`HAcqF;N{$si6h#nK-CLn}q2l4(CDDi}An{^% zO(ueZikN>P63IRaMy?wWwE!!6a?~xi_~=f z^}~lH`4avbk)$j(7f(STUg-ql^>xEuNxX!+Mr0|gdn*tx#2&WhUptrrpQX$uT`IDa z#opp62-B4D^Xc5B)OcNxQXy%OGkZZ0dCFpQ`6&qVl+nXxgr{H%OC6njfdK`38i9A7T0Moa<87u$5HNL3b_i>DwIuW{+@ z|AV#7&u+H%|ED%u`~TzXt^NPei>>|tp$o13|AF(Z{r}!`t^NP5v#tIAjx(+O|JKv3 z{r}ykTKoTvCtLgf@I-6>*T-A?|MkaO`@iL3RR3+&&b8+!D)VJ%S+vpSr(e90pQthn zrsC(*+WGeUL}fmh%Ce0%->o7pWf)Awuc!6EwU>>&9^#`@1XC4pDYHNip@*&7x%R{q zcs<1Sq==J7i1)&tjG<}19iY@^Nh zs)$R83vtkEx>Y;Zo|po!hrDPb4)1%_#085PcAHFV=iB2c@WE7;?Mwty#pWV$L8Z2! zSLC491J_>WgQ+arX!8%N?X57DieGQl&b7x=;H#^$XlEjpD)tr&LKsZN&!@FZo)J?( z=e+ROh+wMNTs#GV9?Fo0S`Tt(GG7~2xNAf(Roh!>poh@IR_$_U#1xQtu`4zcu~f0Q z#1w?`g#1w?`g<$Um44n2&Rh7#ZwT97m`uy6N6rTFqPponFyw8 zdn=5k;@4aC@xc`MSgLT>h*+xFTRa6}Ff~IkwLUg{Si&kWye1RDRI$1I6okQ4{CZli z4SOZ=Vt7p^f~nfxN<#4}agBcca0=*{7w#GnOBH)dOhG7LZ7}t9!-pmL68;(yOw|-u z7);GdT%%t*?3KhzxNAf(Roh!hC|)J5(Jp_6b%Bqi3U`f&rHZ{JrXZBBHkj%%Xjm7N zBwxZ`BZ8@7bMY<+#S2-f^Fam;ZVtUJ5bhcgOjY+*C|)J5(JqCCr@+Tjg}X+?QpMi# zQ-Hcy!SU4wQ(Y1bPXTA9`m1Rsf~jJ2(G;NiRoE-8(SszK%m-5$UV}DGadEF<45Pp` z+GWv*DIoD;bWJ8=sbX)5DG24O4W_z08ZiYVU&3D_f~jJ2@xzAVRY1IE13@TAyo9?( z1XGp0RfH9vvI0)ly@4($NW2(blZjZW*jr)>LiuWgsW%TFmX0ssuMxpivAK8(Lh&jf zUc19yNxX!+Mg&vUy%maA0VnHC!zm!~Vs{O$)!N387Q}OHR{FTFA$;ry@noMLXYkMn9R>rTl<~xHa@X5;TuE|8Qve;WZ1!1-_em-1Yrm%{!pPyE`6H=f9j|MKxS9#4<`^0C8XSFZi?+Cnw{MdMcOTzvl3FgLfG zFtoV|%dSB5$uoQUS8HMA!mz~8udbbs@19hJFVAj2V`%f;igkF|&DXws)?&(oE@7(< z9DE2L)06s$Y8z7SBC2>&lz*ghaWB30l`p;Sh8s}X4<}vFEiTgo2Vd?(s_flDo9`C& z9jV)!ulu&^UfLERge1yd5mBuN4!+z+RM`SXo9`9%p{Z9A@YW3y^a@*b;NZ)BM3pUI zv{A2Z{b%Zx7YmXYbUV7bb`CyXEFV&36F8@kYIa3i-kG{}i-l7z=(5_Xop+D7l@F(~ z`I}QX6PaDK5MhC#0|R0rO@JSA1c_AlCe zuR_E5#X_e+ulZK(5^Tg2kYus>3!UM6)g-HRf&|?{vRsCZSS*q(hAZS0PsPj*C9Cy- z1YJV1Ty~9kQj#n-ev#+mpH!2q)&mlB3&|R!*W^B!nt9_FZN6JYvRXK~>=nV(x(E-s z+~=aQ@ryRqQm5pDt`Ci3-^^( zW#c!eU}|6Z2RUEOU2WQ4eM5`b*cn?rkCok9zH9{78|}; zWdE$Q>KE1K{Lb?0)w)$gxA^&1-6z*@w{(jZ zwlBp}amZ@Y)%sO}ZpW8umtFHf?v|WDoLT~cB>%Lkc(q(#LBFHt)GoiquNaC9(=0Gp zfugB++}p`l>sAT+tS;3qxkfxI30UH5ouTZoe^yn%TE|MzFTAXMdJWG3Nm$}Jv`~52 z_p3=5_DR?+q8Ki}=KkC*iCDt15sIc_3RDpbB{!Qu#$0}lm<5ut#LF0Poc?K58NZ8Y>?F*0>!Ya&OBh8{anLFf)`HY!uL!2r4;uDL0+x7GK}NyU@UBG~u`Q$(^orx_ zfrBaV!PJC(OB75EuUlRtwkY+WTXX4^{=;P<8{TI)1teXx)w*_rZV@EB_wZp!xP-$-Bs43!pbQh5GjKoGgY+8M zX1#m~e~n0J7J|T2FxXo-z+8HbzjS6kpIP{8L_V|FTRa6}LNmIU)-J!sTL_s?Xci6| zkp^-=<`bHQzeXf9YkMoqXGRY${{L>W|8L!xuK&&Yr`NB(_%|0n zb@A#8e|h2K7p^}4rSl&@|IBk=JooW)&p7+VvmZVC)H7c^^N}-8J^iPrZ#n(cQ-5;m zmQznU`9~*jIr+pBe{|ygC$2jFN5|iH{HkMraO~a3u3G#3wRa0s-}ZG{JExzYz}!b! z$6i69jkGDsIQWYNO<)T%7dsQb-m0C?&#!jwqpV}EpwQ-f6+6?fc8fC)y2bt4IsN=; zl2@dl#{T?Ao9|YfQr(TM1(^q3LOTsSez^~^j{NbDHs7TM%_xYh3;!D$UJyhXs3b4FRw^Djr`${Hs7nV+uI{5=oQ*&;PHbavKRP~Kl{<< zdlkFgFIEdO54y$C?>v5Du}C|O{L!CNj5V}ey2WY{=0TS@`klK^Y%5YtBY*Vc0P;^N zHmhH(7GfTBi=*H9`&b5ifOX`L{+t4=q1i%<)mA%}y&_q)9%S6)KEOI+$)W%&dKH_M zB&%iS3VMZPxr`g00-vlpV#%WTYG}0ZVhtp#gk-st8(yrEBx}TyMFCcHD=t-Tu?CWb z=xXiKZN!t(xnHqlQFIkoKXH!mVhtn<(bf7T;h~q8Bv}hVhtn<(bf9e zVXq`v6-yRHSJA5?S?vx877LQ~;_%#uGrpIsiY1GptLRmetQKD$bPLIPQP-_lERw8> zC5xh~=vG`(y2Wb2zfBt;6tkwOBRJz(W^KKykxaS zkAiL?SC9pi7*&Y;lZxF+lGRqe3f>=*^~_?V>ksQS!@BLQ{SuDuU-wdpz@$wNY>N4ZdtN&k}NiX zbBd{AX2pK(7OO>+2VFw4o;G|^k}NiV!7b*WRBTqiSS_SH=oXUo)WNj+oMyIvbINHB zZ&wt_Ti@^#xB{ZW;O|n`KNYE`L zYmi@)XCzsSRzMr=0eQpni#3p}43g#YYgk@=LNkLEa!P2%!zz;1&Oq?2J^$aETl;?s z{{PQ*_W!5Wrx*Xav;V(z;Zql$-P!-oIQOM_~_Vg9Q&SQ)!J{Yz1e8=Hv>msL2*^^ zk5k$N+Gy6KX+pEsE`@kM{Ce6rN1vayg5s*66z5)99|<*2TYL}zMGsqz^Yi(QgVWa? zbYLatURfWB`;*_|gCHonxVmwUKEK6_wAV+VAm?rwu^=R;(?^2S(Q@b_0b>#il0xLftRnKxGE^f{gZNBRZ0#m)*u3k9;VH}%U4ib6(MTw)%$&< z;P}M~6$g6QY6f1ug5s*+Am?7a-$x1#FIK2L(8bk_^YZaxQCt=4!`=FmURoz^u|maB z;5yhi7awmc%1{NVICts&KGHJb#R@e?0gm6s<=LL#O#kpHAS<*D|i#3oe+z6XK z&4z+LMbe-2pcUs{y<|z##4lF+tWsc;{8HocY<$oMx6ppWGNc%7e#5GwH3)*@_s?lu zri~BN6$!}}2UgKW{mM{^TdqL>6u-UH+{vAEJOw0N99YGE^nHpWiPvio0mbjPnoV~u zQ@@}^qB-!=g*M->IG1?A2FE=DtIfu++y1x*RD)IwTuHuLapUtVHVA;?=a-uGa2CLe zL{IDG3vIqzaWJub4T7Nf{d1a&UB7>$j^`K1xy+#{fdP`i~s*i&4uB!_9s3V z#<@=~VbVeoD;7#FxRth=^TVm_k9=^9bH851q>aKW7D{eDfsCCSc1tqGfz_M?tI|l} z6$@nyKi_K3x>=yGB22P=#W=8vHd-<12=*&B2!i70S2t(6ZoNj#DXyAwTorA;Td{4s z1seoF@zbrwW!iX+h)X;@sXxBZ=DQSI*00wf0*aqc8<%S1HDXS2RnU_A<>I)i1hCL@ z4I-fEVcHBbZBTfK9+u^c@6i{3hyyX0>Uc97{!iyE2iuoAF zmrJ$b#iA@#>_YC=OP9QExy2d?7lNvdOS2JAO4sQL3l{}dLnDP3Yam+)sy2f>o7}TR zPgu4ns2W-+ez69k)dp2vo{iW7WT;O3@r6=Ew6mm@;umWmS#41Dbpy%rB+(OpF@&qa z_bN^VzgR7GhOBO{`(W|ikX|Y;R zdC)5i)$bTgfsd$8Sh6Uhie82J&x=S9jgAWcjq_2}>5GHKSYM0_Y^GMU)3m z3PbhVhfhk9HD<}8lxB1(tQap@IM>5&VW_@pIIU!;j#;uOry1RfyOv+9fn<#!S^L9Y zNwUT)S(MWZ&zi7e{9@rzgDHSyxeOatE}kEH%#uYZ&FEE=td`X+=oXUYQf$OxkqciPfP)#m?Sv+;MG{r}hhlestlzU`{2eb4vQMb3#8)^%OG>mo=Xfj|O* z?6CGh#kxSn5=E?wAHKWZ`*7hABnsCkJh_kCp8u1#$$fB%hGzs6AGG`WT62%l`|8;u z`rd8L*7$*}tTVFC9IMYc$N0>?_U#YdetlE_KXL0bxBjxJ|KES}i5vga)c>En{{HK) zLH+;CwMVah^6H;meH8rvy;mN+{P&mN3;zGyrT1QX#l`2q{~tPg?(98h4}t$b_4(ER zylZ}L-9Ge$kLxIjDz4>hCj!d1-Pk8?ug*)iFQ=wK5Y;Y;Dz0T%5m5}Fu}|DxotK86 zM2$tCM0G%-sv3&7z5cqEqy=}+&r82A-=RSeReY>GR%cv`XH^a5)HG_(Be;EbUK)P+ zwhw}+qI1Box#+8inyCr@rv0Q?^sU=B7{-p8L>1TaNku?RR(le`?bUhd_Q|luhMGim zK%%N5%4FeLTf>TOP-*x{)Ywmxs18U}RYQ?1L!Y>NeqQ>0lC0QHlc<7xHer`@1aFL_3NK|nxpY)WnjD6zv>by8@IIKZb zyCkXu5>+izB3Z^had&lI%r+7=ltrRCAW_vSC6i^!8bDdaYr|v(QSFkb4oFm$rZ8Ei ztO1nuV0+OY1W`rvER{u~`hrvThCcDf=jShJFZzQ)D2qfD*CJV3q@=QjrYsQE^Y3pj z&x0VU`0O~JG)q;jQZiYltO1nuVviciB2gWXsA`oG$ujhbyXWT*_z;A$NK^rTa|q;p z&14z=#O<^5X%m*pitqk*NmK_UsxLTXZ|D=Z&d(?Qq@gSlRp7#WQrSvnvW$J=_Ue3W z!x}`jOQJd;QPnCXl4a}@cUR{lj~dD%Q5}$|YEw9qWy%6kJs;W-1X1mhs18U}wI|GE znX>v&Rx#T!SwU3MK+gA-L{$rwW*Wqs+*$@eRL{+6GYF#EB~it-NS1blnJiNlep2_m zIBhs#g-f+dqKe-u%CNEyWU@?IeJHD#Z6s|pPd)8jU+49ED}}xUQvdXJyw$}L!Y>HetvY?M6(Kn{J3V3sN!0N zJf$pSpSZm`FGd>1IG^Y3iJGFd@X;UeS1ayKZ2N=cR}3ya$McleXW znnj{IAW_u~Rw}DL{jK{pd?Nb%+kIGLH%Ov7AW_v;a3;%?)q}FW&7+30NK^+Ts=C37 zWSQ+4chAqi)rTO~EZV=ZWM#5WDa)+KxP5m1ElpTSRw#@9?*ZM+y2omgWwvA7IzRtr zf6`DEzvorYR!29pZm=?0<~zpi)%iEsu!fr%Km47q72V9b!-`~??|1+=^LcUDP_N-; zhAEa&6JF>BE0bl)!n$@|3^tqw!_AD}{btlumhQ1qS@q}F^nL9@S;b+)WQCg$an%3MH0S@HLH++c>i?JH{Qr9{z3k%uxcJVC zFFE_~XKz0nga6-SW&f}Izu9?d^X0G&1F6Ew7Jo-vix*NUN;7Qc{=}{GbL;U91F6bS z4B}dblm#h=tsXT=Fm9ipmmXg_-iCox~Tht-xwT< zS5+qZc*y~>>}kPNOP?>9Dk{9PgvyT$;#xkfO!WDd17ulTFxArOOF}mcrrP0o1zgLx z(vm9qGWLmk@17s~e8E1^wvA`a?isFSOxZ+9!rF~4+&@1rjlLXOd>tv`a#kW!RsCef z41OZS{Jb>#By4o1<;Vh=s$vvo%;u$bO8T%a6NcER&Ee)jFkz)%)s#>LF!i;~S zVKO`aMO*a)sdnVl0+Fg#DowtOeGY1*`V0P~Auu_#K%}ZHCDUc_6P0^){$^YC1F3fS z9kcxvJ$ECumXOs zD5J`DE0tyZ6SvRKi@Ao$3Z#m&nYnBcsp@vCB+HaFhO*3AGl;K5<8$K-nHxi`Qc_tH zQx-lGef~z%Kzt?|-y3IGZVcsZ&19LffK<O;|}*RH{U(__d-8EAMNQEMuRzb$) z&5-i4X0i-^;`ZwNt_^D-Rs1}7CW}Z_cUh4v^BE8Djp*~aM~&_(-OB^Im({kI$uecZ zy?kET5QKXfzZlMEO=W2(n8~s=YXoJzz6r}@g?kytlNpw4mTa<0vP@a{wcYb4{Yj$- zMECN5?q%IzFzWJ=FEZt*evdn4>b~HOL<{C*>D2wjp0o}{G$7+&gHe=jc z{QnLAFW>*KZhz+X2jTz!G|vA&aqH9Y|38lN|L?!~IQ;+5;Qaq5uRjC-|MLz1e;)q- zM;iYB2>ky~H~jx;`2U}6`2Vx;|3CQe;{S`%?aR>vJwC3Z96el1wM&tbqi66Fx6c-( z-It>`%EYstPK}YEK}zb-u7(-!OP;hbI+qEk{+>B_^vqgW&dR zQF?tctg)BoNrfRDs#>B%vW$GaiKj4Iu=R2G)QR_hWxb@Aox6(3(mijk$V$WxW2lw_H*z*84r=1&?` zDo-llYf1T}r<7HXs&+%cQx{+A!y3xsNdQu$op-(Bx$R}>E7S~K!!Baatseqp<%CNEy zWU@?I;HitN9yNA@JgG30uR}|-DwAc(!uED?#fBhwYKJEkhWt9TRwzuCDGNMxak&Z0 zWCc%!Z7in&ejQr(SdFskU!Bs$1$*1YC4bUT7EdY+`E_XBV5PF^2W^FgvM&0thO&54 zVMrIV_JAW<<~PRO)#A*fhO&54VJKgR7Q_KJ^ zT+H&Tf(7M6OWSO!s^ykG`!)Y*F%<`*(Lw+4v z534X)rmW8K`+x8JU-SLHr_Nq>;iDJc`L9s__P?GlN~=X$ zZ*lwGMQQaVP#p$P6%#nFMOZCSIDB})T6zaiU6f{D0@YyvRjeF2GZCn&hMJiPCO(Tf zy1O{G`U0p91E`7(9M|$$ML;<<)lu8ein*!u`uJi_k8_jwz=s5?r_R(H`NZATqO|)Y z>TzxoANY_wRYepDGw_MK=Zn(qEq(lP&|FxOKj;ULOnsWp3({r{Y>ZsqFePTSh){d$lONJ`QW})Gm4IkUUk(l$e|9 z9~9E%2|RUCtThrfdeh{oL-JHDQ!-gLH-V=vinE5v3ZB{}PaTq{DobIqOk3coi(;)| zvVy0I0encFsz<&`vP@dwsSESf3@6c>CQrq+NS0P9OqMANJazG?tt74!FyJXOn- zOqMANJazG~M-63>rw++ewH2JnGG&3ME*`QW2%akD?;&}rvJ@uElm(u;cv%yc%1WL} z^Y@TERa?O&S*9%T)Wu8vNn<}v^Y>6Js#2veS*9%T)J5^taKZ|n+M)S-C>B*&2O?QU zK5=)oDApQ@8vALQzlY?hTBby@41D75`Jy;$I1Q#nmF91F_TqijDkYO;@DsPs7R6d4 z$%_3n&EG@vRIOB+WEuFxt@DNXYCubl`)QiLaV?+pl(LL`;`VA$d^H@_;He#&zlY?h zy2;99nXY_Mnn5^Kb9h$#~@-=ALWHDK$Eb!FD z$dna46~{7jKPX>=mP(~0%ajG4x)_?W@@vpMsW2o@)eTlEtF{yCRscM8F=)cdutufI zlL|wA4O+{TNS2XL++8jD9yOH3lL|xfRNY`jvJ8CU?)jqULlBiJPb%O;!12E7!Qf1m z!B5;iTXdVSR961tE>9{9$y0TY)g;SI#<+F9==hU{vUpMfATgd)H&~f0a~b3IYVmzG ztl?em@T9_!-euikWwK0J@GdXD*Q18Acv4|V@3L;NGFhf9c$XL7V?z+$`#yQ!Gxt4q>-k&H+OsyxGN?|gQbe=dqkG1sp9$`k)^75n5^~$g4<_{(&}Tff~AVX*e@QK@Ji(;;kWF7a>;`$zurRqtQCRqkO zaqE2Xo9(3zpWHYqi!2q_@<~rAtNy(oSwL`mwRl^5T^rH;r_wLHmWnX+n@`c@l)V5#Ez9+9PLb&|?5$0u%|Eq=WT%VY&h71#HOELGdU zC0VAdnx+1lKWX%S$WljSsal<+vW$A-_GWC~=E0iW# z20d}>d|`eXQ1hd*$Wm*Peq^ayoiJIZteT}3KMg0WV5#Ez9+9PLd6LO8Wz{UTSZX9{ zD2pt0M3$=MNhZsbRkPIMsNpmiEVWCPIwDKe>V(O%HLGT+#Ztp$1xppz_lPW2E0mHf zQ&!DV%}+B*Zz@?Tu0^tRgO$lL>WSN{Me)0j>uAVkJTj0 zpeJsfFMi6OG?Yb_IwDKe4HlDS$^uJW{A3fB!x}7AT;C(IRNY}^vP@ZEsf(ZRsG%&f z)Dc;#Zm=?0rYuariyyZk2$m|Y?-5z5?y;CGQx?7mz4);vER_}ZWpRCvXkXSnR!NpA zs{<7Hqvc5h1&+eLEUs@{i`PmwSWH&^q`Jev{JZ#(GOmmp(&ER~NAxdi130r~Mq}JQ zU;MC#4Q=sj>m&M?b&HkhGM_PSzq=@=nuNFLpBxther_GtauiOA%e=<8|H(zM)g-(f zg@0MTw~lKWSN2)QgtdLfxcBbjXsY4JhP?Pg*7#EyQ}7BiW=3P&KVKAAjd1b!CUh^o zDnt60b)UtIwPlOj<^2DDobUfFt=oq-V@Y>MhLA*6b(BMB>=PlXW$E^Dh6ncf>WEJKvb8d;U`g#GZfy(XiJSqR8>jw%3~wn(kF=Ovh@47_6JcF|M!SQRfUu@ z)YN7u5Y=UA_;H2`qKfyC51S_xUvR3v{-LR;sCMZ7#d^c>B2m?1C6Z1IGWoh_HvV!~Ytip&yRqK^zb~5ydTjxu2)}YmXR2I)F;95MX z)+$U^dlJFz)v`EiIIKZb#rr)XQPpB4lV!>RQC${ujYJJ)@vOp#L{*EGOqMANM0Hub zHB43zRq=k0NK~~}VX{nFAgar6wzWTqs(8OgB&u4klw_H*Kvb9CRM!4D`#TDUYKLbP za4nN{N?FD}aeKA=#WzKk_G($2HJq^0p-Q4UB2m?1C6Zv8N_c9g(PNu@cEL^ohIY z%i^u!G?)%m5>*Up41w;kGFgT{ar4M0G@>s(Y*^S%yAw>wIa>8vIv} zn|~5jT+1gtr7Ylz*1iQqby=J>9M&MJ;{6_xsOkRQC)tKDJy5>?$`MY7Ch zjJxN{`+NwZSfzUzb4$Fhy2r|7nbjD#&z85EuvAvKm%DT?kLX_3Jyw$}vl-*o`SPYe zX()@|S%)C=Nlz&YrplJG;9g$duwf1Na+mJq5#7tW!^&itvfy4`7IO{tI*3jX-OD4o zmvw`c$uecZy}T^m8cu`ZUY75ykLX_3elU||%7S}&Sc_4=b@jn3AGz`re*fCQjAbB$L&iI&gQjER8;i8k=YyR2UPd zYHgCqYR??Fd%i4fJ|-)0s+hjV#Hm`EA;wn`E*~S-5BUS{s7Esbcyb6Q^ov!ep7UfK!)`H({Bqz^P*T9uuc( z|F=n&aZlVjU%tklG>+2qpu(6qRZEjhmVr;)UM*kk!y3xsL4`4Ks@5ivECZjoyIMZx zQA1fgs4yl@)!HPIW!w{Y&zHqe!)Y*ZYKI3E(4dL;RZEjhma$LVK3f(?jU+3S#e)iC z;#4h9O0rB@xM68_nsHiFc~Ak@@<~rA3w*FOVFRZwi=Bqb3Y;pY?=f+z)+U)OQxRSqwEyR^ZeQ4=Rj_Q?)c@srEqs7X3E4xB2c zZ(NI!)&6fuR{f;94uYpHkA9k zs?|xR%fKgYpD$nPVMAOzt1u={)jd|`%eW_Qzq@>}37b3>eaYjFkcSm;El1(hx&^nG zjc)JB$sM2r8YQbtgSKJJ712=v&Jrd6!vBL5 zwDZ+UsE-dqi66Fx6f9k-It>`PKPRaDz0T%(NHsb6PrZ9Q&-mO z0~m`Z&6D-GmLWwxIXT(qZgG3HD!smZ)yKh8#rutG8CDb&uR1=E&|3SzQ&*+kCsB`6 zll=Vmm^@WMO1$d!!CTxtUzKhjlNCHwyx(K;R22}D)t*Ff`)pO(eN0yHRPla~$x~H9 zO|lGp;@0`fdVS;Isq)+3xE7d9mo`NZwjs`UC~Sfe*ho;oH^)oLY@W#kigSF6(Q zlc>jesY{+ZCQsFBC6Z;}6L-&7#a|=I3T2U}B3C&C(zeQE8T`cUvsE$JNV1OWzIeaK z$!f37xO=|*qxL#F4xTFBZ(Pf$((z@oOj$Kg{lg|Kla)M` z-tRGaswNhK{-jZ`()&FoPt{^2m1X1;w^z&G_hF4qHND?s@>H!>B3VX0 zad)-+J&zhIKE2;#@>H!>B3TALarb=ryFLW5;?w($=_-J~zmt>i;YM%Pr{-m*?ruQ3DM?UE(W!X>5;`VC!TQ;o0Q^or|CQsE;C6i^! zs(ETL*if&*Q&IW`PaTt|DoDv>nX+o0TKqMf27{-H_j^p9s&xvJWy-2~YBAU_S;14q z`#mO4)l#KNmVr;)I$xT*W}FUHdcScklBMn7RF;uX++Hno>ai9okJk+YbML!CvKlDi@`>c z70TjCg)w=mc7#i^Oj$Kg{S{MIehr!@6|j2dlgcu{WZ9Zk^VDB9Wd%aiKe9}|O!jFEoc7t#*FN?c|6IM8wJ3Of{ zrh{4A!I>;m797mWVz7~@p)8(M7}LS58>~#0DGLtfW%1W=8Vm=sd}n=32ea<6m@HFP z|M>mC-RJ)P-#^{_i<_^$@%)WHyYb4#|NpXUe+&Nq;MI>_eP_e}-+AT5mp|O_{|{e! z`=xQi|Ho(l?QGZZ|J~scig}yZk?}=ExzbPO@gO( z$y0GHLrT$~6Oxfn++M9phfjtz%2V>x33;lLlz7LDeB$nEReF39HOf=+)Cqa2DkqX< z;1hSxSEa=#$%^unJQaPP90Fk}OjdjL!0oeD>G4UjqC6!}osg%hbV{fdqzaSOo;q-QwJHrh4r}mKF@8_TQ?)k9WVNRb++D4TpGKlac}ku- zAy3uXB$H*z!V0@8b{Zxtc&Zq`C*-MGnq;yJe&Y7os`zP`tl+6){GO1fDoJUQW#ALH z&R1rpnWR0HJQde6S*Mg`BUg{@lPbE*qwG1l|r6kLg zg?VE2o&KbuEb>&eAv2^b1F0S*it&3wo~pG;Cd-rsp1S&Wj~dD% zPo0pb>S2{kmMN<UXN?2D1gaAPRV_}KE+e0~f4(YynzTDk;s(-W3h4(s7?q}wMaQ;%rpz2x;lDl_;!WB2vqS?zxk{u1!iAZ#{KhEG1cVIjsvKQ z0UXyduI#j!v9@q=`+QZ5H4-)?MxZ(-P}L%Z8EY#Sx8GeAV@+mk96(hp;JB7gds1Vy z#6)Abf42JSqOsV;jRUBP2OQTju4~M|CvKguzN$!!Awy#PP!7eLtXSPgH2_~Q|dC8F>bF` zFS20`?{c@7?(Z?Z%evVLahb~)cUP+~@u;CKdY8xaF6&+^lV#F^cX{>2HU!~amQSsZ z>0Q=NFq37TbzIA231LZP znadcrSF7Typ|bL8(DW{k>0Q>%RwT<@#<;s$6>E(|4Q0{0Jf?S9TfvbmGa2LV!vEi0 z*ZA(F0sQ{or*FI;zyJ3s{QlqL*FK5g z|9kA}^H-m_`Y3+??|oMux%~X)_u}{eK7Q%xOOIT94!{5R@Y%CxPoF)6-~W3zYy8ji z|5dBf?#sXGBtQDg0mQY;i-;%&(BLQTu2!YvCsB_BDCX}8k*aDa2GH0i?w+qo!;fW9 ze)N}z74Wj>vq~o*-*z*8;`Z69bo}ydpQJ^VhZS%w!^-;9yzRz5aqE0#?LNF?$0i!h z-?)|`WogQ{9Sfqac(}bQnEaRWJeYPqcKPD@Xs+hkgM5>BYnq(RK#I5s{ zwfiPvQ5Ewyu0^u6Uddz`{KW0msyJ+9Sff47!wM53RV`Q|Sq49GceN@e8;Kgq;$eje zk*czkNS3ir+&y0vkBubjxafCySON2W4uQO{OqT6`fmBz;WFyIn_B74k6CzcuSW2?2 zW(87R{YZPo!#sOb77r`nT0W_4rI;*J7Le-dhuh2ZB#|o3-?$cGwOGkynX-UXS8w#F zp)4L&m=LLIt&+(yWdW(Ke#nL(kt)sK6CzcuR5Dq{KXLnP^@B}VCM%Gtn7=1Ps)|#Z zWEuO!t@G6n_>;zVnn-m*q^c|>lV$J|w^yq-_^?Jdh)8upq^fmFB+K9@?ygpMJ!&la zM5+@aRb?rWEMuRzd%l|c5X5$xNEO?je6da`%Z#76eYUEau#&8(R*6(6M5OPdJrj{1dm&R>fq)WCc5xhm8ztD2qsSLZqryN+iqRC+@CR#bhH< zLs>+s6CzcuQX*N#K5_Sa^=eaAeiWKW6+8bNf>X+}Pc7s2+3GPgrK{(ohzWDo(t{lj;VG$ueaDsjgntgypaXQWf*}gh*9)SeYzS7Le-dl^!*e zMWi|*Qq>JsCd-rsq`G>=+x=whDG#jGcSBU$D*#@*HGA&(l$ zqKkP#7qf1#B3WiP#@+K(aoBJg3>R~kE@o7}`M%0Zz+~BO5H99bao9+*V$Gt9c|sSn z?y*X;Oj&R-ua54T=-1(Z_i@dliy7DANp*w8WSO$yVqO(<4V4uxX8GLugf3>?VP&#R zS-s=;|F-q_|NkxD|NF$X|9tI{tABs>Pp&?U@BhE+%1hz@f5+tqF8vMs|I>?q1ONXB z{{O?-=)zxL*nVDB|9ovNK5T1`duWbYz10nes?^arYW6dqxVc)F7GI9qG>EErzi}iEVsH$+{6*u&W&)Ut`rOB7C_%w*Bc)xKipHAjRK@&_YZ>;Wvn*oVhpx_Dk;N~Egg2@}?4pB-$r*VjC3$cqOS zrbMb*pJc`ieRhCU*H?WMLSH1*dTPP3S$jzS6`^q&kgn{&sj|0oO9F zV9FveL!TWW)%9h6)=(JFEa20r8S|vV@UQh*asPaMsXerlK&oN_$F+=mV)63dm&NV# z^+gXG8k18CM5;NHA%NN+;aZ!*H3nZ#qq%c`+bs@xTRm?S# ztXRI}zygV?7AhrK?G?%9HOVsciCgEZxA~LC+QqX96B1S3Yh|*Ged6|N^&38{v2^jQ z!h}Rs+rW`5W1qOYTD{eyhO&58VM3y+d#zBG*^F`beD&)-1feXRRlpk%@2fU~nJiOQ z?Oy)1CafeYltu6NgzjbCZk1%2vTFD8Tl`5wSv;$N9OsjsQWkb%x@zI}YW1r&tl?hn z@T|gw?qzKQXR=ILwR^dkYpB<8D2rzmCUh^KI#rKTO08$D-OI&W!)Y+w%krJ|3Ej)O z*UDs>)fl(WR>fSyWQBVfgPHV_?q%I-HOVrY@uYSyo3myT?q&JTI<7^sbeomRGM_PS zFZ}=M3-|xk{ciyO|F3tdI~Q($2>k!E;Q!UF&w&5W!2dsZ^9k_(4}kxF`uh99|33x( z|M=BUg8x4T{{PICN5TK!2mb&3rT2pWe;oY(k+bK({~rebfBN&P|LvyPx^(<<1gC*i z#s7_K@z+%qH6v*3vje2Mwszk%kgE8 zMe({D{Oka!u1m*Hq8{fa@qbT=R8>SVH`zyT(eRkBOT#Z;cWj%=S}MQ&jcfU=Qm*9d zZr)E6H?wu=_(`&&iAJP4B~n!tm1MOi5wttzYisvmV-!!Ct3Iy9ld6K4toAH|c1N`? z?LH1`AXV{yPl;4jMVYMjEP{?lwJse$iF%xyP*sQAr$nk+t3BYnq(RK>;S2*&15r8mn!|=xR%K}r7VM= z9U#?pG1y${A!Osqm>bm%BBxEy!(*KQXF$7wrFj?&-8YTX0J#Mf1)A;IdhyHI|%dmbz5o4}P9=aX#^~j$zR(<-v zQ7~jkXQ4iyivJteGOX0qnJiNlkm`EiQDfDo|9eWLs%1(f%h+cJ zNOj$}Aqb=@{_iP~s#YnPEaRUYAk}rR2}@-KQWgLAlt@*(!i}EW$+Vyj%wZUVGU*Ru)>r`Rm+q}mch>skm~yTJZdP5hZUwos#>OGvP@Y( zs_SC2;WQXXwZp>-D5~Sd(kg|?GGzg&u8Yq`k`?=D9#)tVscNNCl4Z&QQeB(L2FHVs za{v!3;95TEDP;{zSwO1mVzS|c6-ZV5-%}!0EmJaCrYs=Ub@ACq)KC@=D@=)0b%Pbj zGWLls$9!FEHcVC^Rq=mMiBz>p$z&P->;S2*i_eD13ZyFj?kmfK&7Nq^Fc+KQfCRN45S28`eOo;{To!sp<|ZlV!@nhoRSB?@>cp zJghLKky$rbkt}l@;|q85_1D=DgppbN-%}cy^}I?Z%Us7D7@5~!+k|DZ!pJP2T%Xd& ztb42`S>`wfot>}0#-B9SEFM;v(#WhEtW=g+jyo_iuZy#W6IL(QEFM;v(#WhktVotw zjyo_iuZy`xqK2|~SYb*dv$ldWS*9#(7uLmFLuK_sSv;(O=4!mJ+6iW|Oj$58uZy`x zk`>D0VTCD;%-RYr$ued2j^F=l`2YX6@Bck_`HwF@eCcDC-gW8w;s1Zf#YyA;@BbV4 ze`)c>kfFwj(lmK$eJzGeRg*)8eX#C!fk|iU(&LLELx?C%lc(0#GOUy)&5#-R#7FJs z>vn?=e>_Ulnv%=9H+20qc)xVt_!`HtWdfBrZjbrY!8*D|J*CX%o=`NWs) z=Iheslc7D%NZkagh?|`sElx6HMn1bWP%WK4345H9x(QSfwx5qHuPigxo|DksxVtW< z8p&7)EP*P*rogm1k&Lw`CA{TNu8XZkG8PI;po+MuFfCDz88g+^K=o*WURjBt#5nFTYl-yZ~FBgha#b|;;MDFylf#Z@>E>Q zu(BwWWVN-0{>FU$WB#O}E&9LlHfBia0W(=`X<>b+)<0^)8a!3(-&68bEm1OArY!K( zb#c~kdlozu=e4r4N}j4EN+ip`X9qlWU5quc*MQh!@#@FoRbPuC&Mm{^>sq5mbk*J|8^3*ALs+K63EK?SE>be+fBw3*>@>I;NIRvMaW#3rFc4xLO z&KgNpD2qIGN}j4!N=cR}3p{l_H)ZA5pvhA)A;**IRx6cdUs%QtsajX2tl+8Q{hpGi z>Q*b0Wy%6iUBBL=#+pT*Iweol5+#yl;Ijjsx_;7zAb6^Hzo+D>TA`$}41RXNQ`fU5 zERz*H6*FH>1LUc?(`u4s;Ijjsx_-i+G}bKsZuOKrReQjhEHfE*@HOc5>wH*4S+p)s zX@V_tm6d?WvNa3V z<@IZtu#&7$7Ol%uT9u>Rtl=~m)@Aw5`jpmX z-D734%xByg9l!tA`2Xj3K3nsD`2XwQ|Eu}`?AAZu`ar|~KXCK4H$K(y|4&{2^Xrc_ z{Qt45|8Vtv4gY`Nl~-Q=c*Flce(6sxJ>2mBhtK}@?A;Cj|9ANRH>KSdBZyk-ID#l6 zvbZEqRT0Gq8u;vjr*2BOFGdhikE=fFe@4w!UqzH7Xz;TOp1LXRzIfdc_E@pf{9Rv* zcT;PX=5;sl*#%GCw0nK{ZwI>mL>TbSi_xkWu$GHimKSOeEI(7JW*q%jTVyHHy z*T-Q^o|@)wGz(a&N*5rKW#kjxj%rg}HWD?I)uFP;Qx&8{vJ8Bp*D>D|lMR!VJT=YV zc-B;w9#+X@8T>@AW40+S8zw7wD$U;ro61s_(j?2kXBRwm)3(^)kH@N?Jhi?S%F-$& zm1X3!3!b|9lGgf+5V7hfPpz+ISos`eCab+ZcfnIPU+ht1)lZ&^sHrS1Q!-hmEb!FL z7qwP>M2J;Cd1`$vpH(_+OqMANJazMhtyLc(V%1NcT3^est}Fwe=yA+9U*Jy~tA6rS zgiK{=mBM71vcOX}_uH@rPo?=A{jyY+mMM`eBcEOH)Xg1_8moTtR7A}_tAdnBmVr-v z>2AKcZ9@<|mF90mO=W47lF2go*#%GC+}DJqvVy15{Ee`wEUi=;Wf}PFf~Rh7`ICmS zlBXhMDod-BOqP+)E_mvu7;HFU1yAiJPes^NmX;}*EK?SE>ZVw1Bx)!tc`Bl&vb0Re zWSO$SQ#Zw6!)Y)rs>xFkHI=1R3X^5Z0#Dr(gN-CBl$AUcVN+RJsgz{dngyP^Iht#Z z@Ed{v4W#kjzx2rZ6ny@mgv1TPtMc7o9?yxdhrmUK${*gxw zWhGBV)Kr$1DVZ!&R?SoY(1#$_tmLVPn#$5nFq377ei%A!%m|yx(mrq| z%am2yn2WVWqK2~4#*C<`EIq7}$uebikKg}$$N!4&|9$N2kIx>w@R1Aeu%rE-^B?D% z(&-CA2cjJ{YBwl2)RAMS`pK~~*JlqK!%gY-N!a7yv7>;nIe4m{96T(D3jTUvs+-d7 zOQs5GA5Z*cO96j|^U|pfT0I@Kox5ZNZA8Z-5k4p z!Bla_F(!{K;99=yQls!?N7Qy)Ec&J0$M-UrDt+KMK#)_DiYPK>@Ush~x+xt$i5i8f z99tk#RSiYLjD2>2R5zvJ$3;JoDsA9+R!&V)q~z3O{Id(Bx+xt$PECPS=>kVsPEFDR zXwqfuvkRoUX}A0E$D;)#XBLQ5wMfZq8T{-5scwp`Mus)yB}W#BRJBUUWZBdNq`E22 z8i^X}k`oI=s#>LFvfAS60;z6_u||>=;*tXkM5W}T?Pe8YpJ;K+H*H%D{&*}}d03&o7K@e^DNL3r3rKZSY&D#)0;$sc zjl*cUW@(iY$ujua1ybDS6-X7^oD?jPs&2FzWf}YI0;z7=wi-NDC@T*u)Ymdua#$skW$?2L zq`E1#8X4A5RvuPB*i@EQDVZ!&7Le+uIBO(oC@T*uAZjX0w^*4hQx=fwren&=uR-Tw z1w>6{X_3NYnX-UXH{WN<%CAA^VFiRuW$7NPB+J$;Al1$H`jdvT@~{Fzrm}Q{#blYX zfK)f%(}d-)22$2R5#yc zLl8)n=5ItzW$7L(lVv{RF1`l6`OYRRl@-=y3}z~e)@9veHOVrMF}`m%-+YHZX(%fX zE8s~}S-Qc>WSPmh3+wXc+kIF=S$S9iVN+SU!^&itvS3}_e49rNW#wT7L``Ms1}l?g z%7S%yQ;ap72E)3HnxCVZ%F;a+lV!?+b$L^qHIl4Yv+}S49yXPwd#sWyQx>eto3^b6 ze>{|xhZX8;F%9Yli^(!&!MeODwi+rctjqk)I@n_>OLth2EHfG7hvka@|Lqs<|3B`Z z-}%g)zr6Fh+n>7qf!mMY_o@5-{JzI-ed^XTw;sFs4>#X;^D+GX-}{>1|9ju{N3MPR z+Mir|`0B^6K8^4H{q2=^UwQcQvzOm}`K6aWdg4kR?f2BJn!A%{-o6GLaz zvsbg!((03_F)8I6iKsa#se0l)H|W``S!(I@<$Deg8qXSp8c}mnQuX9}Zs4<5v((b+ z%l8~GG{Od@Mp$m7h59w`xk1lf%~Gw;2M`(|W2#5UoRm~LOjdjP=+!K>^!a31b5aUK zjj)`Qq?pcRwWp6>%~DINPon0e6!i(Ba#E63Zziifd-Q6SS~`7_tk_6*W2#3~PD-cF zCK~ue$9=Xbtv*RsOiDuu%SlN#QYBgK*`rspRI}IMu=g=p*-yZern0m=$z&Pz?A0u_ z*lRef!BX+!A7^E>)lykXQ6gDJJ$p4vEe;!r8p_Ik0-~m}6rx1340`rzmRbxpOjfW| zy!U+8RF-m-RF;9yUd>Y9(q8TXLt|tqSh7?tQJQ2K^z79v^;g@=JwRxLq*%#PwLD?6 zOj$Kc{gw8@+zpn>egcLym8JDbCd-snv(#VqsG+RvCm?DnOY4(NmMN=dslVhy5X#Da z0-~m}v^-(5+G{hY(`@sLO;{?cAIcg~S!Ai&2QJAnWz{V87yL;>S=mp(lcut?JjrAk z_3YIw_02Y{!BXX8a>7!zK8a)*_3YIw_2)fmC@cF3XaT3Pv_6Ss8T9PcEcNGX2!f^J z3_&W3ELF>sOqPMqUd>XAt%k`8mWp!(44Z3~mMBfK40`rzmfH5z!1x!+%6imgVXhO)AsfQL)RYx06=w+ethr|C9xIb&;Ijvoy7`eNERz*174JU7rn0m|X_95ovxh}( z^TYn6p{(pD;P0Qx((;7KGG&3KZr3WSO$MP}UE5)KFIT6A(3( zr9I$GmMIJC+U5s+2x86ZagRw0v+l8&EK^n&%KCvOtRyRxmHh-fY${9lSS4AetS(sK z8_JU=3k(ahd`wO(%(}s1vg#*Q#)y?|bGM9Z#tjcM-u!&8=wa3^R%Xl0#yzZTn_{dH zdK=oxjshMfwWV9EOqY3$d+;!Cim^uMZI)UT7d_0n&0@aHZ`|vQpWGB{jnLcJ5oS*T zkDKb!ebzByZLe_;9_G!_Si_MGd1X@pF;iZ;%VNgNY~1Vo$Nm1_wsidX(kD_C-4m&* zmYOj%_Spkc-CDa3JT#UQ7M4V+s-ql2e7>jk&I74#OS_NDNq+TLR8OR;T8ekxe(6g_LhRJ{8{%+na4m z$4`csS*9!?)opRs$gsxRFQ;pWRJB;i zWZBveq`EEU8i^XplAaBbsunAmEK?Sc>h^1G?VrTjFD)A)RjpN+EK?Sc>h^1F?Vp6Q zq-{f_s`W}qmMIHJb^F!+q@gTn*$}B}t&+(y_=!TZ+J048`r}A3faviZke&^ZsunAe zEQ6mtAl2w_o0b zrLq#K<}nb2&2?Yvl_punK6^l_+b{Dc4P^~jSQ4pft-@rPvVc^#U+Tje%95T9k*XFe znJiP*7|ME)M-633&xS}tN$E!SvTm@LEK?TT%iH3t;e<7eHA{Ln@-=87n3*h77Tn9* zVy=;>p)9H1mG~AGG*27H}9ka^)RY9=QDB%lDe!|GRf-eDSZF-~apTv)$Rh;roBv(&)<} zL$~}mA5-G!^JQsz>L`8v(nuW%lr`IyPG1fgSZIVz^bfCTgw@id88Ty@eW28>wfVq7 zBV@LXfLAl5oSw}0+n{G3D0N%feEB8GVm|oQx7B z!f2XNMK$q$8}sY~rEW`~kI71uN{Pc~no&hH`F;t84OP`O)N|Z{81FvRSS&&My z+OtLTUz;~^0H>K40`r~Qn$rT!(mO7%6=@2rWsb= z)<~8?&puG<_VM;I4-k6PE88?Q7bLCpc(C&21@srWsY<*G!gi z&puG<_SNlW9xOD%=C%}G(+n%6NlBI|3l-t^F@Mrn=GmPAUd@p5vNBnwETGiwqdu&m zEVgI5M5$VrWU@?IK&jhTdDKuA+cWr_Ry?fMC7CQ!7AnH+D}4w;S?tb$_heLA379NX z7EtQ;ktQsa6}D74RD}ZGvJ84+u&V9L zZCDefvONQ%X@)(aEQ6kXpww-#(@?L8QrVt?(KMr;P?j;zJ}Sa(ano=bOq9y*41A^; z^^~%Vd-j1+x5ZAwWF<;vhX!8L4C~6mPNB7D!O(BbOVbNmDvpvx4GEuVhCHP#Oq1=f zP*(BM$gqa8*q-SUrD|Og%EBwsjtXTJJB>sQWwAYj&uPWO>IN&6)qJ-2t#6ADwf0dF zZi|~nk`>BgcLvBaqn=V$tykR*c6(25i=8IPTra)`Er)9GJ*^D;0%g4*X7}#)fg-IS zbG>A#Y|r3tm@!W+TCn%ovv%g&Y0*~3O_s_A4gOrlJ*6#!o_&n|cH&`U*PC(Hb8?6+WnT-1w|83vH zhQipUslO|7d|cgVF=HlNc$c?5ABB(@+cbD>GpxL~k}+Ep@jd8mw}~qmi+!OKa`HWB zd2f#yGu@*7zU`D}&5dDtm)WSnljO6?ijd2fnT#<%R@-8$$+-*0<0!eZk`wE)_JAW{ zW-{)jZ~7nf z`+vLA?#mGb4L$x{d7KWW(@eL_{xO0EKKtOQyVC8;5d;j4s9aXbQ&mSff(AeP;HkUP z?&Ag^d8(+MJXKjr^S)1P0|1`7vtC~>d8#O$JXJwTzV7zP+dg>euJrn3SYv(?#gnHh zNr~4TW8Rvf!Bcmo-6v6FdJ?^pr>clDSvHG+r|wF(Pm-0h6Zns1g&{Y|nAeD=Xpch>7e0UE+&--bL@3zkflkxz7os%`1@aafb5vM~$B z8c(VfOC-z4XTRpD#b6^*Ls`@?UvTC0CJ03NZC5;>MR0S!SEK^p^Q-9mm(&VYqw;@kemcnG2vTC0CTTNI=Rwzq) zH{_|xQkrBL`0Up_^*8-VLs`aRCpnXKfg(z_u~ z)zc~^S*EO-r~aBhX>6*cZ$qA{Csmj%Q&!DW-{Qj>%96$nd8(ExnJiOQ%~OlPhST6E zlqHQD@>H!>GFhgqnx__j4X44hsPbeDIsy3#NGpKJGG)~~wHRz9S)nZH-H@m1X_Y2f z20r^WPc?T98j;5}OZqnCsamOIvW$F!5>(sbuHmpIPnE_Ed8+QPB3VX0`!!E31{;YQ z%96$nd8%%(B3TAL`!!E3{u(ALd8+hn$WwKXmB}*r*{^x(&zrK6r%LaJJXQBtC0VAd zny3DpDJ#DQEqxpERNY`PS*EO-r~a%dD|xCkZpc%0hn2}PWz{_OO&&FrC5;>MR4r37 zS*EO-r~Zr&LF@*3vIY&YP?qkom@HEkcDti2tQ)LMmU)c<%Bt;8+OVdBS-LiKFzXI0l4V}wJ{-*3pYW)mENR-%!K@ps zNS0ZR`*1LCf82&39n4a_(ZQ^HtW1{Kjr(vgZ;Qc($w~*abZzKh);(59mMIGk=B>GF zU_XwEFbx}UHOFhE8!RTvlm!R#wzz9JVa3;=`DqncR?@SgJFHBWDQf^_6@!gL4P{C7 zMhCNQurgVutikd7f4k51{lC}U{KuPre)G{ApMd}Wk?YT0|KsZqU;7yN|AXNFPhEWp z`2Rb>|37m19pL{TzI5->0~h}W{C|A**Jt<6M)>`|UCZMCFaATdE1kal`vHhXGSWNJ zBT`l6#HiU%d=7wAccs@SQDf97@g9+?3MWRbPO$Wm2SBR3(&@{^0|DY$vy22y$`|KU z@67loGB?|mPG7#_V4@K=OUWLQsw$^>#f^OqfK+$KE?-n8;Gq#R+eWaP=98Z4oEiKa z0IBXulaIriNR=9g-!#JtOo?O}{2TzO?n;|aqUL;*yIo+`j4B(bNS3kB0k$r?(&b~a z5~;F51I(IHrRLB12xmN6?*NeMuC)1>tVF78(g3q&SUIp#lGUC#2H3aktjE_+cPb^` zBU07MB-5?%UdHGJt4=%>w}Nn`YP($};#l08-t3t49rGu|osD zX+}MvEMuPoOu)Nuu^~vL$_5SWrWy5=vW$NYfK+$i+=OMa5~=d^94x01R;!beEK?Tt zExT{>CykY!4H{t93@P9*m1XdA0C(!{8+}+qS?tjCh*Y&S31u1l9CV?qZ}6z0EOuym zM5YCN<-`#zE8G_)6SVxX878^8BKn{VtuT<6x;`W~0eO(hbc~3u?D!Vk` ztQl8c*pe+{pM!4q?(XQO;j@Oe*rEYv&6x7KGF=8g@tu|VuGnePND3etV?c3{scLnS z`7-!9z%0BgmKq5g>SC7$hSPjpSqhl2Hv1fasqTuUCjF`XWUB1az;7C1wLD?QjDHTm zRCmQx!;B?UWvd3B(~K*4<(M&B4Z&1*M@vm+EPi7b|NY>cUOCp*?yW*>nupu$FYVd*Q90Gxq%$Ti-V5++>HI3!>pxLTH%nW-{ zW3_ZuyB#cFFDe?#U0i+-n*ADl<2mEH#*BRq&?DM?NqJO;42iKvpS=mwAl)urBZJ_a}|zOL{f5 zF6&+^)nz8*KCH{TJ3g!-E@{@#x~!Y6OjcW4eOQ-ww>@fTOPV#bF6&+^lhu}1A76vs z6=O|q3{hMhN0o&fwndq&Q_`vr*@&`mn0mG=&KgNpYz(DeL+i43f}3QS$G8vc^3H5E zDD>htOHJ|j9vdmq=F(~K&ufOy*teGWiWcctH#Z#!^kJS&$|5>?euPEBK5r9o78rQw%v zdq0UP1rOV4KCFnSdE3XfK!d36tlQU5qDr-cvSvtGnwYHiBr*U|-IZ>i3~NqJ$^UyK zs(S7#lhvL?1|X`t((se0IW@6wL!zo0%4D_YkO7G5uJrpPSvfVO;L%@*A<%jylV$i5 z?a|q;H2fr4IW@6$L!zoQrAd~d&jE<)&P+DIp)oa4StP1juVk`}eWI;g?TX2U!hASE1feWx+mNVgPdJlh_!ER+wtKP( zOJ$8hS<<>8QPpClNtU6{0f_2u=1&^R>QPxFs#>RHvW$HKtyH@wY*>@1O5cV=RXf6w zEMuPo5Y^r5JZdOQ`Zgr0ic%t3hCT-%s=L?P5F}BRwhf7@(iA4klm()?D?S@0D~YPK zZb(#>rj%rvvOrXKX0qugQI)n0iK-SUOqMANM0Hn8Hk`1Ms7l|4L{+PlOqMANM0Hnu zHWD?IC4CzbRjpDoS*9!y)m^dKa2ia9s&s8gRCSM)$uj&2P&V5YpN%9dlqHQD5>+i! znq(RJ9Du0q%wz)`dR()3um)ZKSX{J7$z&P(1YN9l#bm=_O`)!oZYSxHo-VMC&-MGBK;$^uc{J=}z4vXZDu&xS-* z_gE!arYsQE-9!GQS*T944Md`<8!RTvlm()?dzlYw>;|P>L!zoXtW1_E3q*DIQjZ$S zl5P!&s&23{S*9#}6MFZc4?(P1(yO77S@&3(EOQ;>Z#LV#qzNm@3S~*ZhDK&>3OC6z z$MFD0=H2)ElZLX`s{!~5W$6YhlVz6U0gTMM7u&F=k(tdJn42@KR0NSMvm6g#WZo5< z4fWa&W$|DQzVe(=rK*l(nd5i>BlE79Y&Z?3k(p;}@R8??dWvAcysce@0gTMMVzObf z(#Xty4L~7|0~r0AG|d~{r^GK|DV3`0o4DWZtDL}Tzent|Nr~ypWyrdAG`9$ zR~`iae<%3=M=res{Qtw?{|}t~4fy{U{Qus+Q0Bkz@@!w)d^v1jqA#d=`$VcLoo3ij zVreFZxM6QSzCj{Y4jV9QW z^GUK|LgJAmSWh#oNT*4bvCkpum%a7)z(eCnv*N9L^5Jw6*<`W|ej>BgzV!Gwtcg^4 zSOJ(d!m4y4Sq49cK&t!F=98!~A+hL%?=+(dMag8fXO1E2mwoB-Fl z!y3wB)!Qdh)!HPJWy-=m`{Jlc-ZSWivRL%Ocbda0Yf>hwepVGMnCkv=SwVw(Vf~FU zptQ(TwKie4jDKQx{N(;pS?n1%w8gp?oHZZ!1==$9IRsPPU-V}Uak1#d0ji97Qd|Z< zhp1upXJxt1XHBNc$`_n9;%a@ugtg)45I5~Fc-W8^D_0yAT#+?uKWo`(&MvHb0mscMyy8mlFy8pBPy;;rF~ zm`s)RFAlKf)1F$sjDMn9ob8IahBIO!RTjW~B2}$anuHnq9O8zZIcqSxA7=v=znGfi zNwrGJ8PVY9utuuIS(7s&fM|qeYl%oz%all$!OuaBRExPrqK3Yt{v}e?y;fw)*yo@| zs>NHA$pR0JXC)0GQq?LYCq(0)gBqz8b4?}-Of?-y&?{3q-xF@@X%w( z4l7k6Ro!bbS#50%YNYz>MFhD`&#ysua$|_^JLfwfZ)+y2Ev-R~RDaE*hO|1QAw;U$ z3eIG;l{Ki5>RXzqlB`(0I=OlQv*xqPijc`N{)vXkZ1<~8SV>kaULE2PB30dPHOVse zIjE89ulSROvO2jj1ZIsV)jn`0%it%PSJm#9ZCDejcBm{ORqX>uvdm{Zz}KL6zvNLv zSsijlx|emY70EK2@t}4u|Dp{+x|cgt7TwFb-D0xZ`^`b^UjBt9ER&V)WjS9%_p)xc zO0rB@wR`!^{-mKSY1YuatbJf6%am2Smy5H86V@bFE$P2%GZ~Jf|5}s+D-vjeHKlQ}?CaCsAWwn&i9$&uK=Lr72$ZQF{Uz zf~W3Fw~xt6o;t~S36|50Dy0gO)t*F#;Hmr4?qjl&r%rOk2V>2!vNn}uwI`4vcpX zx#IW9Qx&8{vJ89FFhnJj~!AaApMG1*A6LRn)fi#%0XN|P)D zpF{A}y*X^aT8;z2bw0 z#=}xs`y^ao&r;e#C@>H!*m@HEkcUhxOXQ^!;md8$?^C0V8{ z@YMbH`IE+qKcce8Q?){2vP@avsr&EsVGU)Cs4VhSEm1OArY!K({r7m(P}YdbB2U#4 zC6Z;}a|oWg|85_GP}YdbB2U!{C6i_F69YEee^(Qh%1Vdoh{_^Q)heY)mVwV9c@0j;Jj1R4q{=Sw=pG;Hmp!vY}p+r;eyB@>D&n zlF2eRPu&-XjSOojYe;30r)r6k$ueaPpsZrDk*J}pA(cg*swGM!%fROl zJau0@Hj=DR){x2~Pt^(~lV$J|bH;36Og56NShI#y7I~^xDNV8rd=9}=_g`BkQ}(MfNS->RvdB|)kHut}vcOaKU)_XdvXZCrqyoP7d}@zXl4Z&QPu+i&KWVI4EPVUq zsk*^pvP@a{F!cT_eONO)yy;ZZ|btbOsR=Zq?)OC-ze#zVN6 z_h0Tq5Xxfhi=Y`*s+UZbS&p%8&h}r{gq384vRM4W*_>gedTEkncH<#j%=@FSCc0{{ zARjkaEPUZ?&X7{NWU|a}JcNsRUu-p;u+qiMlM49MbB2}LC6Z-+;~`wk`{JyTsG%&L zRKTa6BdTt&GFhf9+_Nvn8cu`hV&+K&eCau(o&pyzdbhgkz2o=)8vp+s^*{Lk?a$u+ zFR1@({y)3*=v{{MlSuSNY|^Z!p>|8vy;pF;ir*wueP{r?#1|My*a&ulfJy>;K=E9$yX{XlThXuv{D+g6TB=f?Auzu-Qj% zM>S6^ExsH!z|e@A6B0b98C8^%?>Jsn9kx-;Q%jF8-|=DaR6GJ7mJ^alr+LTC^*O3} zsx|nA!Bg>Ra)xAiDzeFUyq=E)pNz1??n{F&$>*?poRIKsa)#xEB+`j@ym{DGSOC@1 zQy0)j;*wsWyad`(g;9xUtBfJSO8Ugp`1^f0@Dgb zGS;4$MgXe&;;UiC0;nQMjGGG6GUb>tQ!Rk%{^+X7jD<-R-zaCyl$h2ilCk!pKSDQS z{}W|-X57#if$D%jRjU+c%%t1Fs{iBdRe#tCjS;902voI9iHsTe9HF7H|FQO}k5cdW z+LEVYlN#@VRwF?u)fXqK2}_ zQ(-!ds9L6kvJ89@>KXv^I?7M zGVnPBPu-iZ1`Vxv(p&nszH>h831!(gmWSY}`{Jx&vVx~}$y4!p=ZtzvS@wTIVP#mOQk~|;uuq<< zTdho%DGNMx{~C`PmFhJ2g?;i=Z3Tz2415m3Q}?g-A&5$KN@bC!Y9~0SL4%)Y$IteU zHDRf&{B_-Fu37lRbG!q((`u4sCgUN#2EBjOpEN4fDV0U`eZaH;mMvSCf{ z@|4P=cUgB?kt}l=58++jztW=~m4)49zO3{v>mDnU)!q>f;a%Q8VndML->|NjFw zAIJIs+W-Hl>(5+&yz&1(cJ=wI&ouu3y8eH@@&AAP($kk7Y5e~WpFMl_bmRYj_X`jD z|0`FjLuvQrUll;~xI7W54v17$L@|H{KS!fMbtoM_i5iPOk?Md*RW%d?XzX(Yq_6i+rdO5Y)(z^on~0sPBm}4vCk2Z>cQH5 zxFtg7)C91TA!RF-Z@a-yoL#66rQOG24W!y7QibU>!m5ZeS?x(=1f+T>9Y2X0i~cyL zCScZ#DpWm_)t*B}K&pq*@ME$9sdkA}fmt)EY^9j2_B=8IQazN8ACnbG6&{C}M}b*0 ztZb)Bvf6XV2uSr{?Y?1HRJ%l~10q#rDNI&-5*Y!h9*V<8hBcH$q&gr{RhSaVGWao2a)x#|tf*SSage6PC#e zqzao)DvL-}o5GE<(0^-(gtBhYC{xA^>E#XHIzl9Iv`Tj z11pg%gP)@Tly%LchO&rM2SloRUL}-e>~jR9dbsLC5XvG_#iQmBoKlwYPprnX!<8ni zBrB9fq&gr{)k397ma)$fkm})RunF)39(t4jkt#f=@uXU$WU>r?;tX4LDE=BwSb{GVGFhf9Ak{;0*hth+7Lh7Urx{h2flQVu3rO`){56~g1F3e2RN*XynPQq?_HNtP+AMyf||O(rWWs$C-00g_V>?~UH4-(HMWl*PJ!e!|1|nI;K1VfD{e4qbd>Fb* zqzbuaR9Oi!S;jxHBc1Jk&y*D(hVBxn;%mYNYzR{-mKSB2~=u8B&&k zOqRjVQH@l8rwPkp4W!y7QpKm9Gwcav*|(NQHB$X;j~dD%QpKm9GwKOt8T%a7NcFdD z2m+~giBtzfs=CKwvP@a{F!cU6o3K%A$*TKo_%~R|#dA-FSql zVqc6koCd?ij2{H&sHU>?yh%!arm1{I8_^M-&G}q(K_{l?Q_a#yt1yYsYrGW1=Gsw^ckX6$ne zqIxI|KZ$yrnmRnS0LmI8pc;yV8TuS!%Y7*QzMPuSLpkP)hZa!%WmH+6GGB&2;Tf47 zO2aRwCLqxWn=K$v)(k5fzb0LVKF8Q^AIw<;92z0B1q91!h7^>N*)sMy22ni}XH5<( zcxZ%8d4aNK*c0+H_BjGkJrr||L=APxfdvv(MJbsq+fIY19*VanlLZ_a&zj-_WzAfyz4&6RUlE- zA|;h&_!G9N*3N6G~NL%RGMTN`W%6%9!CD8p)5J8K%%NBC6i_B6W#ji zFtlL}qT1od&)_-DCzZD~lVyuOi0WbBQA1fgs{qewMwNvilV!>RQ9bl+2!g0~`0X=T zP9v%oDNL3r3qY;dRI1Q#l zmELbO0W(>$5@fOrf5JLAI}~$`BrBA~vkI`CW>{GXnq(RJ9D%4F%vl2l6UoXo3*F-k zDVwZJma$KmL#sn^)^J#psPe1=zVDo2PbkYiusi}$J$$FV0^?8?&nn>i&KdQDvg{kn zBM{ZYcbKx0sPe4BfJ9Y`6ei1*1)_TR_9iTo6+{(9T`G%2RrgpWS*9!y)x)>>lSZLR zq6*3ye}COzF~jR7diWNP8igu}>VQO5kE?{T z%w{~o*PsvI>_ZUERl1ka2+wCdr7W`=!#+DZd{Yxvk`;yOBzJ>wG-p^>mf4I)a4#Re z(Vz6FtnP%$qI+34SeY#I8RNmL!#CKlhI_d?;ck%bWo-jzvTV(Qd-+hzHPq|qs4N@_ z&zF_%W!+$9vTV(Qd-+hjHJk>+y^J3LXVhG?bdQzFLc!G94Z^*ADCQa_E8NRyY%pwA zTDr$-l!dyt9TLhiXAP{!Wg5hffHPz&OE*|dmMLp={Qlp&F5Lfr-d~^p|KOe1-Tr@W z|L?b7ci%tV_ZRoQ?$$rv`ionyx%r8ke{u8G`2D{>yYZ^)pSb>?ufOWr-(UOFYp=rh z|NivqE3W+Al|Q}ma-9Ev&*hh0`nyYieCcHuKX&n57hiVvKhECO)c^020{`>*zq3PW z_T}IKhaM+<4&IP7RY^)Scqm|W@WwSwwN4*U=rJS*4}|rUcyjQJdyZ?GS~`9CPJ)F- z*gWVlBu!NT#hY&2b6nHZ((IF{vEcKZ0xYNbu(C46n{L>1T+`Ij>&rJCC^Vim`)3$| zh^qR@H{H-D=JDB~H2Wl3vEbAC4cBRgm40uNEW@7TnxXs zACna{wZn4?L()_&P?)Ut95RMM^-wG}OjgiTxEo>)8Iq=Iky4Un%7Q`lVE&p>8dP~s zVMv;)hrKdc#y!V1O)dT!8P-r1&nXN^Q}wV{B+IzxxTdMaVk1#QSv;oz%W1p*+Y3H$XoSrLe@L3DMM{$_!=B@s zrvAJ?X()^56hK(>Nlz)uzIKZ}aCP{(_JThOn%bfD8HqWnWqzgQgyeuZGDAnkqjm zhz~vI5J>soD64)@IjS?pUi)CCno)ien&%XTq^Vk1Jb zPpUhtP?mAeaSzJ+0goEW;yDF;^a7E;qZngsw6AcES^)qW-z15`^a7E;V}0n4Q271LcMX1Z$e8s&1Bhb5H$5r z*{}vp?eLrezVe)5rJT-W*_wrKLLXl5QA1fgrvP7bMwOQ}lV!@n>F~poHU!~e?(m!f ze9akE3IZm})+~6K53?pLlNBCjkj~r<@|)1Q$12G(WpzOVpD0h7G%!5O9iCHwuQ{Gn zH&{$o{iLc`bTkjeRYTAkCgu(gD!|yBab+RMY?<45j19w~IBO(qXp1KmhVoVDQ`$1K z@farNLvhxS--d~~!=nl)iSlVripvbg=fM|rx4K)~Rh7}n#@4JD| z3D$*s*6RZdjgVQYV$3t7w0rY?H}W|FPrX-qeO#b}r-}m{j5WfljN)~-U;7+`r`{{= zK8bpqobW29-pNzdmJ*W_{*$d22t4&(>Gox^07GL4vV#W3n$Id4VzO*<0#ChH+I^WU z0MQ7`$%#BwSxQNkO(Ni__pH|k7qjlV#*{44!(g^!j92qc=^S3eRbV zl?5u2W#n@Vo_epe`y}ddYU+}w!gHEYWqpcd8TcH7r`{{Z8c9|ti#!!pjf^VmQzpyc zClJZ(UUAk)vO-zpsj!`9SXY*T&oOxFJ+sxIeILn6Wx?Zp)7t&8;mucRLhi{2911T zl&gF9+pq>t74J7Zrx{k>)=ZWu3q18+an?|;!BcSvF#rDKsR~j;SrAC;eFaawSBy2B z27{-H_j^d5sw^d^K^!Y=hlR3=vxdn^o=WfckUUj;!6jLyEb!EOW~&*cLzUj|A$h7c zf-_l0KF8px_lm7XhBcZ%^nMS?Q+0}f@0?*zC=2bV*2@Z>`u{Tb=3n<-Rh94e z{q}wA`@ZkM(>K8c5)w>IV$PTI1{E9ZP*GF_v2T8s;d}aX5-Jq25{(68-2aI?pejnJ zq(Fm)8%EvvJo{N|&S&Ouq3)<39HV})D&JZ@?_PWEz1RBexzE`}j~b;az2A7>IipIE zlF2e&0LGeMD%(IN%S^^&yas)C&WAOW#hVJSGiO-Y2SQn9G9Hi61o~@_8p`5L z1-$Q^QI9CgJjUb7y8Kr@1feY6Q~+nqsK=CLK4YwIv%mezI;qAWCZTTv_P za&gv3)KC_0Dhz2|)&o{33)8M06-}UGtl=^k*5wXwDhz2|*2^lXEL@#$hxPaWK6d-g ztuNeq?$#qW|Lx{8Hy^t3*&83e@!<8Bu0M7C{%bE@d+OSKS6{rkzk2VL7q9HD+q~FC_^JB$|2}nLabaBl{@?h2`~APO(&)=!0|#wR{3NMxnnp4dp~SG6 z+j9bvdRF>;Ic&h75jD%x5lO02lpMBd;z&bk0+M=G8hts($3arX@jW6*RfoA*c0W!?5wo;Bw10O((yea zNmc2TWVKh02}tT$)8Y%OBM!uotSnDqG|exS4T;HWFCC~>=VztE$6*bU+9gQ^UCpqv zDP^+SOUDEx^{lvRBx)#&By~iRs@GI9S?!f$0+M=GJT**KkW_Jek4RFrG-0w#Sse&PBtsbZF_}0ArgY!d1 zmHC>tq@7oM1wq-Q!;xdlpG5=xY4^dPgObWz2H| zl6qFmG+eNPq;^SC;WW*#vJXVEjCoFaP*(BNNYqdkNh+MC8TE*=40%paZ=V%A4VS_6 zrIMuL44qMrDa)`Y+C;On;-`^hg|bLeVK&XMt}L^9PC!!6%uItStPv|oYQ@nTlBx$R zCd-rsl6p2a4FpLQ$M=XNRZm!%EK?Rp>RIvAFj@H;G)XGncg~MJqAYu3d4dUjR_ruP zR*=*#Nh+Rqj;MOZN@Z1g)ze^ieCKRXloi->93)j7-+1CV!^#<}DC>1GM^Bvfi?kRs zq(zj9_nkAQoUoWJW1bVB)U#em7vsjRMU*-sO4aHl(`C$a0+f2z^{^o>qEtNaoS%D4 zT!uU+K&fXPABEVrh*D8XX4s?ZGVBTa$(^$wuj3|49S2J75~bpe=ZyP0d6~(0g7=`$ z-ss;N`l9a}7rQg&v7M`HE@SlXPtV?9LmTF0F@D3)oNo7ED zT-g(f#wyvWZZWgZ%vS^BakC=Qyo~pqGp0=1)R?)9C-5$x6<-b4uJA6)lk4y^XIPoE zAu)3qPdeDXinT_fhQjDw#{140Ri!AiuoI+(~K%QiUBnCIRR2#l!hO-lz8+P+i0{V(VUH_Dx;k5#y`Oy zXJ@73$NhgAO*H-jIDDrWR%BF9cVnNE3aMJV55%p^_6~ml9IPNiih^>s+l#lr;q$Z7 z?&GiqQtkB8?lU4%RhW{=vSp+~s-@#6QJd8X_dar1iBy%PWU_1#sgP=E_%T_5R6BgE zenh0IIEBfwb)-V7rQ^qB1yaRe@?*I+$;w|R%h>0nLaNs80}pMI#mfpKBGse!>J5HQ zDx_MxH8QNxp5|o*m`-yJ$gwJtW$<%SA=N)FTS*q!X;I~61(;4FsunDfEMuRO3aJ)% zjU+4D)4Z$zK#@`9RF%mx{)q`QJ1h1YNmeL}mlfbU&9JU4W1o`>ss3Tv^Z56NvS|JW zFUyd}lx4P0TnU+v#))x7nw6;dts8i^Xp;$?*qk*eNT z$z++bDx_N6HCzT0snYyCB2qniufDqXsb}8`sTO+;lNCr+%-J;x8CI&4OqMANNcHTO zJZjXcM5=h-Iitz}E0bl)!fViHzi2}cNL9?=c-}dq%1*#!nX>R2^w}@eVX3U}FN^sb zp5_cIJ3*Z+Ga66eUq1VJ|I$zv{mbw)M@T(jWwOj^Jb{1t?43TWp)9_$4o`E2l@nGZ z%bdm&_?ORq&ZCC1=wHVB&KdQHvdm~afq(g|xNEo!hJU%spRUH*l2MN-%gn|&KF`j6 z#*-C&R{EFm#B+vqW#RGJ*2)V1@|k&Sa0YFbLH=|#cFzoXOj-8AG77}`S@G6TS>a#q z(!V^Se_2mhnJiP*pn3oAZ@%>Ve}8}DvFo3|{!#eivYY$xg%+;r^zU9iN!T;~M z{OQZv%lCl)uP)tl@xNbOUA+6kCoilnOwWJv{Mq@@$tO?#_GE>BWzZuV-i(WPK;W=y>h_HGGCNd zpG1wFo_7_-B&v#1Vm+#sPT7)SWI0`wMqf_xNf1?WeUC|0m8Rr+WcafKqPi%JzFd!h zL}LiDIvta!zV0f$p--4tP8Us+FOwBSwZpp#Fq=lQ^i85nma$KGSmuk;H?<(M6o>65> zVzSzc2by=YMd|WMvf>QQy9%JJ8CLeBdh;;5X9q-eVRjnyaGLzn^^NK?L&|={WSO!+ zR2Rig!(k1gDz0z1O(U$9CYdZ#7KrMiIBFznD2sO$#w4m*nq;y}SvU|bilK(d3Zg2m zZ~V>I^|sjm`(Fb zWk1Sf8T;&js4l*%y(fc*HebrS3UHfd*dxj^_KBw9eDR$gHI&7>3UHfd)Fa9=^w~kn zZSfsG1feXtzEPZJ)MLsr{E4RFZ1L@NSSqU*%A)HVhSLn|%CaAqMaS@T@ooO4p)B52 zs3ui-Q|0``WSO!+R2ScB!x}_YT;F37RlTng$};xZ0a0Cii$@J*@vg#{L{+b=gt98R z>Ymk|JhAxZG6aDWCqY!j^*tt0)$T8q^}4v@JBwneN!&9Dq$;*=VAdR3nXpA#hCVxR zr!KzHzcsYQ+X`bMRqg&VUB*5;*xMGxQIj@O@X$tFysiMdX@09r*vyx)&km64qPS`# zY^aO(6<{~burgtpu(sn9jl(Av#Z{B;RJ1zdYZF+YNt0n^J7C5Pf5J6+XHk4LlCjVi zZ!7?_MqE8Jx2vB53Asz@w9w-XZMr3F0joMEL#Va9Av1X5kxW%D+WYKONL@V;|~l|4Z+X4>sw z`?_7n<%$?cRSe)`B2|51rOue4&kpvt#jWzGc`yv3Dh6;+*8Ei25mI8tK06?)i<@O& z88#HgOAC14Il~@Nn6b|ei0a~oM-74T&H~T#GZDut&s&|EL?y1n%WUG1uh5a6Aod$#n&~m-Va_+M2edHGzA1QM@&I z+!}{_83*SicXTgnBRHjnzq$RfP*yS5WU|KLUY3{E;b_hw@ZHO7#uK=g7v`*i^|%>T zxvN0;vbKR!S>`jIOq=)ro;r8e|8>_p?mBn-$w zB>r#sPBW^AD5kryPk35R7p38s(;YlCzLk4Fk*bO)r@Qe_Y-h7Y>G(;qnzgA*qzdb4 zeynUz_1a|Y6ONYCg|+)Ib>mC(f<1DaAw@ws-LWI8FmZC5FG{BYGFiqyJ3y+7 z;cUJm;Gs>jh*V)ajbv%TlF2go*#S~r6qAh%Ybc9I z6~5C9E61rwmch>skm{oNY$R$Zi%1o|(~K&UHIilQvje2MnAu)B4rLLkB3Btz<|~tB zTQrdB;&I#c$Du4DRaj3mtemGxvf6|Ojl|Q%Py3gKvWQf%%w>EEFjgzBQ^wqRK@>2CQ{W>C6i_R zvje2M_^CQ9m6b@9{_io7s&<9zWEuPH0I4n>_AiZHpZ;%P)<~ANgfm$NKRZCGi-&wz z<2+6OH+-iV_K30!es+LV7eDDy<2+6OH+-iV^@y^JeWI0kx+pdqE`xzoJM@2Jxyq== zlx5ye)HJh2@!3eS;yg|NH>{@_)|F+(Pc#xw7iO}-ZNah(cIf}cR-Yk{DGRA=?O8yo zi(<0jf)z+r{NH0D)ngaz&G^{?Qe70EjYN&}H2vRWB2_(Lg|h6SEQI9Fh_$NyH+2T!gSV>kWiVzJo21jr2!ComRXKFcoKRs@u;CJURHp)Iil(TE0k3o z1hml#BlBWxLl8!0@qfeIoKa=Irn1a++<}pKF{;BdSz%XG%Phwo7?~HvWWxn(5PKFcE8vmm3@fuWl4X|T&H&0PHXDf= z%Hm}OJo21TWwJ)H%yEns;_0H8Y^bb!5}KD4aB$72GGCc2I}O6fyeK9cNmeL}mleh| zGV2+uB+E{NgXaCen*Y!1_y6a&Uc2@Dt=Y|2QU5=F99pE&u@N&hS7|I5 z{HBH=Per|(LndW(4w=DE^u}k)(&Nh^o2EmRJas~zs-;OiWcXjGN`#5!bZHGfw7}v^ zvor-`%`cTDKIgYRbPKjLUzP@6&ctc(RPlaK$WxW1#Qd(lRhEq|c880j;zS0PL_dB zJQsPovlPwFxCt!P2NbBkaz6s zBx;nVwQq()^_a4ZeByuqd{MkK9M<5e;{BeGr|NB$OqMAN zL%;Z|vdstVnWjUPJQbeP{8-tKGFer@tAee7>R*=KlVRh*5$GLyvs(exKlg8qz7K&aJf|7+sJM)LqGfox_%k2cP#1wJ zAZx}wrY)RdsJWM`$XgL z&f<^jxCvCJ=}{$6h3_=uzD{5UKG8OOV(~}*t)VaiRd`M_=23;=KeuNsfa(WrXalH< z34B7JsvLz9Yqx*86;Lh4n&h<80IEoSoK7bMs>g2B*Ai2Wp*UO=V+~it0IFRA)d_*B zc7ao420yzMPc6{dM0Y&Fy1sa?9jVL8ow9aET*&u+z2 zi>-z$VmegGQ{g#{uv(%-x{Q3nxIAAJXN^P+eUYcabDB|Q!iK&Ke4=-Fx+umPNmj^< zJQW}yqso+Jvf5{9Oz_#FIBO(Xv3-%J;)&-BD^*HKR{JcC&f)3ey+vF>sR0I??Tb7W zucu~6*%9)rVdN9DcfRze`)Mp#cBLMhWh*MI z%ZuWyk*J|8T9?PPE~{rTlx1%$cVJy!6k|;;gOji>cWGV5^UgU0$CPD0x;Mk5%qt?|7SP8fcn4U|Ib~24E2A-|382A@2);v^Z$o0|I6i%)cpS= zm)>^q<(mJ$eBr|v?yvd({U@I}+1LDkZ)pF^|6k|J((cOZ8oLZYHrn~Z#R!Bdx|-6v6-wW&k% zH%zA)RSK3&mTk}Asms#slVmk(6E1Rc2)H)M_QYh_77d=dEbTr?R&>yKQ(;1$sw}16 zNKhZC+`HhZOY8OFxK-9B+~MSxa&3}gg~@8KBIskxm!;RoVGW)t=5H|87*;J-GFk2Y zxeK1UEG`>~8og=WRG5&bDoBZB8Tjmir!I@hhRF(^D(3GAd8+>EZzjv&XBRwmSzIN8oh$>NUGUVUS!||ZQSI=i0z9V~@|d!We0IT8mp|6tpQocx7H=xR zbedt0D9gxa7d&CQI@uhme zVzO+{0#98Qiw%c0c&eDcC*-Mm!pdaXo&}z|EG`>~8s};9R6O;ZA1m8HB+I}jo{>CV z7LyH=6+BhU-+1XcqsmT@$ujua1y5amizzF3s+hm=+H;0=Wf}PFf~PLO*_4$JLzAcC zrRNNJOj$-gyWpwIZ>qz}utudyo{FcQGwcy%8Tssjr!K$IqlU7`Q}NVuMm?e|`-NF_ z5Koui;6o6VDtRid*k;sY%CeuBMF(-V{Q5d9l@&j+i_>!MS>&mD#;TQtKcjUT#KX|b zuk$YrWzoirnmN8y4_K)zGaRFPIA4CP4Qtq##rzFxbB2`$Kq#wPShNoc8}ssOJZdP5 zHfB8aoKcS`%lyV&*qE1JZ9@ zxK5V&jk~ZhFB@Y`bk?R}W5!XLAyZk}2hL=f;TZ2n&X>hkLuKW|(6lk*splNlBg!(v zaThk`WwF*s)KC^}%y{ZKqaIOK&-Sc-^Zwt5s``J9`~M%S>;LC*|NphR{(lws{~xdG z|3`8E|Jl0!e+KveU%~zVhcAB)_x~Tl{r{&gy%qKUlNaA|;h!(8FWhtfznx#4AD?{U z{MEmhFH4Uv{yzL2afaroO^H-hIWffze)cM)TH1UPHO|l+wJDLRmL{=z z82jv1NVRnNV)MYa#u=KI70@`#T!?UTilYIhdc?n;ElZm(ra1n;M|H}}3R5CgRZcy{ zjeYhiq}uNBA!MAPY5uO3jRvWzYI2HK8@;R?__I!zrN zrPCLL4sl=a|DKYmYJpPnW$d#Drn+qUd<~~X%(y+p3ky>+Rjp1WVQt%I4@`Ag3^g*e zAurxon3Ab#eUce7_}K$fT^3V~gbjW1%EFXPRSOhmti3Gtz*LvTR3jM+f$`1)TF5aG z^xBGKtbLY-aPBONtwu5y3ge{(aMp|~XQ?J*rdlx7GJ2w?woOhsfrDJN~Wr}y_hkRZU>wG&$c#w{5_#D-dg}?&CivyRAkK9XAf)f z@@HC`KB9)icx_=yq^deeur1lt@+Ez>zJ3pFJSe<=mr&ym)0{N~Ee+S29_q zF094NxBC!;x_Dy&G$n^XiWDZx#08|fyipgRquD*kU^)(k6WsZ3T|SqL#-7H5q_4Q0t~1tQgB zSL%&@c7arv#aI((4g4<(Wyxg)B2_I?GFiqyyFjYT;;e~eA#Chj{Pk~O)))dkZq>;$ z_SpqeU7D?Cnif^LtU#oyXRTD0!Ot#`>ay5sWLQI4a#?{$RZWYLEQ6n2Ak}4Y)=1P) zmRwdKQq{9oD9hMq7f5yaP)k`D)limPRv=Q<;}(2qJ4O}e5(yXAXPDc1G9z%^r)4|GM{mG0%hG_ho!Q@x-8~z z*qJk|6bE&(%wybzb$NN8e`)MlysQ8_bB2^=RVK?!#$8yKmv8Z54Q27N0_@BgR`!8N zmYIyZur4p(>`_BmysUutoinPOu`*fhJqzu_(`7N%a2X8iGAb8NtlYEoCrget z%i^q&WQDSLSpiQx=f}!UP?BZJf^~Umwi+C3nmvn`6|f*@NI76JS*9#lmzTv>LuG|^ zxznSvXkFG5Rwm1oHE!Pj+kNTx|Nfz_|DT2b|KIES{}uTEAF1pAhv5JJXjA_`bn&wn zpN9YcCHVgzIR6s-|M$WF|K!Pi@c-|=vihI@>U7oa_Te1Z3?MFXMyheJ%K;PtkxNmR8`iP>)Gvj?KO zD*e8g?fBNX1Gw&pnoS=!I2kpUCM{B!toAwrlf!IP8h(gsZ9dq&vU^y&Sc zlBjBxlF2eHVIPsA`px$uebusID&h5X5ns-ft8XF<-SvVX{nFAgZei zbyzBE7@I!5-%}D*EmTUfOj#hRtMmS)vFX$Mjru;n^w^$NjjHxpK~z^KHmpHZ#rr)a zQPnCXl4b0(2co(<=TYN0P4D-VL{+PlNS2|`UPV;@+J+#As(8PrB&u4ZWU>r@_9~)U zd^Sv05LNMhPf1j@P^pt;=(AT5)y8Afa33^ULRq}4FeOpdA|;h&?6X%9)nc*Xf|U+c z-c^{AsA`px$uebCM75Y~Bx)#&cNL~2s#>LFvP@YOQ7sl5E`#Y%3R4nQJz!4 zBQxGf%5y7?%zDC#WSQl-R~ea$xrTb}g|c{8VM-&j9VH zLdltK#?LRktYcEZGFx6Gl?PJEGP)(+am|U8q{FjWi*Ct%6 zny*T`FIN$mOk)gUZNkSS_)!%_#teS;fmBze<0nz0Qca|asEJgyOo@aU`|RW2tV+W# z*CzBR;#=b$6{2Q)TC0@Im+{X&km{@8jRBM(w>6bF&%MM5>6I;?gRG$uezqP^k{vyFMm;gpD2$!lt&gQYp#8f0dH5 zkAJWl_?L#X5~)`IpJF4?Duu~1WdW(K`nKx_QWXa{Fl&ZAazoMJXAekq)$^#KEF#q@ zk*by{kt}1MJs{Opw><=~n8v7vvS5)-Woea?$uj=g15#ae>aeLS*i0jAC<|e8>(WZ4 zPL{FH9+2wl$Nft~SwyP9tU0Q}QZiWvKYKu`t2g?vhO&rMr$nk+ri8K#e)fP=S8wpB zp)4ZRDUqs{DVeOcvU)(OtKzN6We}@#GpaH#5)EM5=nws*^Rg`*1+2tG2TScbGz1M57vJ8IqfK*rCXBx=Ypovtc zM58uaQrJZdPb*UQOD_p%&1Y{ua^sa7&)j(E`YZVTzlZAI|9j}_XRkhe^}#DI;r+h{F28j7smu3W zdg;=Wm+rgx;>F#?`!0O?!tTPo=U+I#JAco~3n%N7yU)FFZuJ#Z{rOK$SEbXJBZo@A z?4k61qhQIAQ}N`;4cmVvI+(LnY4zpE;hnk&%X752Q$;=X$eH1@U$Io{^Py)GA!E14 zmvT)y);%-o*{@h?>GS1;gts)pvO*HF^D{oY)ZWwu7LjC%Gfmip`M-5%c3m|0X7S*k*mNR~m*K3MAN*V=n1tfdh(DpY)H zDod-AOcqv?mJGmBSHIfc?O`u1Vd?uu*i@EQD0Q+7diKFmSHI$48oPb6RD?`rX?4P6 zwfE&dSnBGReOP0+PnL?XsVpr|GFhf9u+-JNJZkLr$x;zDm8IoLCd-rsmb&^SAA&eX zCrd@tRF+mJOqMANEOqsZbyzAZ-Koh^5jK_8m2*@{mMIG?b@dDWrE!iNQv@3}Cqokfka_iDVh{?1QDQ zimQgpV6asBzVWT8EUiv5Sq47)V5zI(s$sH%rPB9}u&FGqQ0inE^z4JBuFO&cYiT3a zWT^<5%F^nD$ueburLKylMus(%l`IuuQ(1b#%4C_cz*1MmRU=VDS;3L``LBb;4wsvcOVT#Z@E8igR?bRD?}s=^3je%ajF{x_Y}QD<6eU zmWq(6EUiv5Sw=l!Sf8)%n6iSUc9W%|)11oE@+6XF)Uywkx|(^^s8ExoB5Ixn^?()0 zGU(aIzgazQLl7*LzHdZLW$76!lV#wu50<+6={hWv6)cs$Z-h-{=^3j|7ETYXZ4m!p z^_YKYC@WbiLZ-6xfW>5)vhXPM>QNuoP*ysa5jK^jC#+1CDXRx%J>pSAS?OR#)Kr!p zurgVuEc~0*Px%mpveLnfsHrSHV=-B#tR9s0a2-~X70OBnGs32_v;|y}Wy*UqM6-{*6h(^-0cCIfMp32jHpe((cPCk4x1NHd+e^ z%k@c0>w3x?_#9xvU0bgYcXT6U6hH{c^+_s~obtF5r!#&4p1Ll*J{i`iR%w7hiIoMA z^m}8*+s}OtI)nMTwEHA#RIA-Mrr}F+eNr7|vfAs@02}VQbo(S(QLWMdAv{%O#ALPC zC!A|%>(cI%WJR@_tp$8+u1_kXk}UjJSw{xgaM#xB!{5>*i&qu!Au+02sbsQ@d?I!8 zb?Noxu)^~fVc7>FPt{T-l4azx51zU%)*6W#%95K3%%IbrsuD{0i=xHcRZYq$cYN?XRvYj71b^X;IHIyYc706SyRLNwumDLANU4NDB z{J~RE_-9!~o~o4!lhsxhO1s(mF5CIjqMGJ!BrBDr)k;ZLTUmYZ)b(xu(omM%R3J~) zN+pwJzhN@UjbBTZNS2Y$K6vW-rbi8Bl}MI>&pvqS z`i2cb@KhB3nJn^DtyD5uW9tKfr>?KpVX3U(sVM##HkGB-N}VhNpMCJu^)>&}P*&bl zz~7&HmR2fEmMIH7bzOWlT(E+tcJihI!g9|#@}9Q+x@;djbzQ7A5;c^SHx&>y_be?{ zGFhf9@YHp2)^Hh2i)!9fKvbToym8GRhB+I~OA3SwkoHa~V@Kl<=5tXMwIb&tAMy4$A)b%-2R`67szY#W-rPWHE zECZkYil_dyDJx%t&YKDdnaa`w7L#Sls(9*O)nR2=W6#Q)3J9CZ(i2uD%l52_r~ai! z4Q1s`1w>6{=>aQ~Wy-2}>R^sefLFrLyAJby581GDx1P zXRMMeQx;x>Uj3PWX(%giD&R{~S$e?AWSPqtcwoNzQybRsE~E0#FQs=`Pgs#Ga~bz5 z@A98`)KFI5Q~<(EW$6Jcl4U02e&t>MV;h3-F7usrL``Ms87q@zM&o|vT`txdCM&$l z7)%a9Dof8;b+XK4+^@XL=BvR|&doBIHx=-usVqHUFE(|4P|9fjHszBJz!wro?a6ERU48ND?&^J4K7D0(<=)FLT;AdL|6Ztn|L=v1EByZ7 zr!JgbxcmGk@%w-O`|tm)OP??QzFzuLb26gl%~4b3#HisvwyHC*)OBg~Nz@oM{%iu? z)5_{hivAe2L3`mCfTgZWr!O}T^a`7alXJ1E&oipj{5i#|d84J|5IcQo_2m@rr7tz- zBf{oRuga;XxIxbWSnAsPeBcZ*#e+~IB-bOU`E!aJ^&Ehuu1lYf!IhS5Br8^Vj4Ib7xuwEnwO5V-Sn9f1YGhbr zr5S*n&MnJiNlSn9f%Y9wkXOFA@Usal$3vP@ZEsq12?kz|Fkq(MWLs+9?oWy%6e zT^CD@BrDF){PjDOR3TWcPU>VC^c;YtuHVw$=J2>`ldNndAX%v_g(#UUqn@~7GGD*B zy(hy}8eu6|vQ#ZiB3VW~2Vkk|dp&9>D_aTZ(dACB5G9gj&~pHmy1vJTAXq9}35c3I zy;dffECZhdu+;TW)M1&dV5yk<%}$T7xzlTPQj%rL0!v-L$-gv|m8}GXOl4_h!ep7U zz*5(D`>=+x1{5q=s+K01EK?R(>U!!?Ls{8Mz{lpEr4S{PWy%6eT~B-nLRr~LK-Ao` zv@&6`Oj%&5>v0`ck`>CzRszDNva~v>lV#9z0G7HoFAaXyw~;`$5)d+#rIkr0%cv)Q zQ+&QIUK%b~!BW{uK$|xAEGh z84Q++d7ly>OV!FGlV#v@0G7Hgb{ZxtSSsc|!=|$Ij8&3l%ECIaHZM&teW}?>z~7(B z(#nL%GG&3Ku8WsOhBcJMcjSbnYH5ONinXdO4HXD{D_&SnCzN@`2bSeY)Po&ziu>mTv3A+GEu zAZ&_D&sdo+gPsGx)b$VBDCB$4*-k*%+_$s?ED38je+J$0o%IjZanlKl_ne!IWj_IN zQ(k(|Dj746@t`|;V*P{ut+8`uLjf^UUwX`vjG1n+UaWt>hc*P39R&5!}J!~jUsyKR=^`OO!nQ*)PC)UMQllU9cK1P-jqjy;kTaq!8F1*X@Vyuyjg~p_i zqjyQR`tjiEUuHCdQ z>tQPrW+vl7=RfBAe{1XZVU|U*eMN~_-Plh#@CkmcKRYjD^vSp+q zs-@v4QFC?TmP(?k8p>qZB2p36((jXG#o{D_CsEaUg~_sYq#~-N;U~$8wMhg|qN+7Z zoh(D2gNmq{y9Q-#lPqc5kf>_ClF2gm2@A`7UEDPs)+DOZw;@s0f+dn=>~l~N)nc%b zsG%(B+mNVg!4kN{&=paAZ`q+4Hk2iO8xmD5STb3rtcs}qjzLtt0JoJsl!rPlTenlZb($MW~q~9=yOmJ z)!*_j4Q1sG5ZqYHJxlAAOqQ`v*tq8F-?U*(qAGnG5>+i&B3Z^h2NhBM4UZbi$}1qa z2$#odEm$I1hCT-sQGK@!K@wGI+mNVgy^_f?{5hzIYBAU_SxHo-bwi@6B}++`DGNk( zZSES_OPgg-+BPJrTCFfyrYsQEb#d2l!3u{e?$qT8jYL(el}wf?YYb%-gN;NDWl7(L zL{*EGOqMANM0H*KHCzV6p~@Q|)m_MAXDlYmlm()?E(RM(RwzqaHzcZh#;TKL=yL#~ zx;A$Wx|>b1@&*XLG?k?XtW1`%Pgoh}>*B8AuqIKJz72`0p0FZW#y$ris_S1cWsO2v z(zhW|)dN-}%h2ZlM0Nf1rmQ5Y(zYQ{)iYKm%kbv_M0NenIxLfwL{(Zh@*1?-85Fv#AN#Gb{sD#XF8XDlYmlm!R#`iVNMBrB99of|rs^^8>~%dEx&IGER` z{-vQTY1+`itOu-2mU)fg=AEzSHmvDjmZl9I%zDC#WSQ4^00;BB7;LE5VJJ(gH#(TL zDICc%tMLF1=5_Jca2ZSovvh3eVAeBMCJQ{Y)n7|2NN;f)~^?=1>nX-n>`+sL&>ixg}=f>aPcntpk|8o7|+W-I7t1n-D68!%~ z@c;WRe;WM%Uhx0jrF$;EaB&U(|H6e8`2VNC|L;EeB!2&IeC`wH&eGif<^G4+rnLKV z*x)aXr9L-MSXN_GQt8ygWkHeZsl_KvFsj74$Sq49cK&qS4=98$oAf-tIKZlwhD_yEc zma)$vkm{y%`IxLks+2i?4K<_6eHA9Fy?P9RR5!&@!(=5=Wm^`%h?-&L!b(Y2d*v7c zscy_p(+_(puX*8-%2bw?CQOzo3rKZS>@+g0vDLF9+a*%f+9Z=@%IZT|#ZegJm~YAA~>nl6#5Vw6ahvCkn6+M92*AxNak4o#OxRZEjpmi@r&5J+|N4Ru&5E0HR@ zG+o+LwLB@wGG(EYx%qnk(%7@uq3IH-YH5quk&FIWwAxm6?>}8)=ZWu z3rKbIwH`H;#TE^o(~6m;wMiz+l-27$u_=a{SJ{;?Dg(! zilrubPd}L|+cbDkE91%ytCB5apF=R!O=GADri#BczLgsxA~rvq^ROW?Ua{$tsp?6K88hXAscx>;VI^as zG4^VJK64a~YOIo&YOIUx>q;G$D`GdcFE(ttWU6}HsxxNnbBL3}=5qPeJQyZYWwQo9 zex08xXRS<_!OtO(>gG}z*bJLUmE9Vc)-tRdwjy8VGalkK=*>ls8Ukatrt$$b>w&g{ zBVA@Q9-^taxnM((?qxP>Di1_Nl|m-Hubyy}V-OFs);97Zxb$OZ1c!;Lv z=ET1=wl6kox^yq=S&PXsalyU3Db5-m;W{BMc5AwHFKZt-lV#fKpa)dUH4-(nC50T_ z%i0RgWSO*Za#$B{4Nvj$8Z_>gMJE{3HAYoC!AzDZt8y;*T4bpqT1LK8N6`o6_!+sJSlXWmWtvYEDWy z|HV`{@Hqre-IQ)0la)MG)J~qN0%Edkiv~~Kly)DJl{{6{PM)d?D#@}11Uz+Py}o|D z_)BXZuF~YLuj*m4Yy|;N-IQLR3~Q`QqIL3AtyVHwwt|4CZc4jPqQ<%;QYTNr?qUAc<6qAi4E7v8SRLN7dV5yU3;ByF`x-o|h zJSdH_q-R5(svsqkW#khOXuc^98xCvoR5oV2`qIFuz#8}d}W ztP;sG@Hqre-8^jjX!2C)*^s9yOJTB1S>UOghiu1Bo=Qhlcbq%EmMbM$rY!K(%}@H5 z#*QyN8}d}GRG2JN7I^CBZ9c4_ENR-1r)s5=$uebur*0nfsG%%r+K{Jep_0imWr3$| z9`GRuWl7J5JXIUQnJj~!zz(y`TkEh?RytIrZ$qA{l}eo~1D`|i)Xn|=rJ*cdt3j(L z)&MP1GFe7G@qe9f?z3S{o+?cn@>DHSB3VX0hv2E3VzQxLlc!44hCEe4N+ip`=MX$~ zQ#>|Y29u{s&xSlzD-P;0U%ajG4x+xAD8P-siG;PRJ^@NqlGG&3MZi>l9qK2}hX+xf>2dqq%DGNMxQ#>}3 ztWcJ;Z1^Iyp0P4n20uaGW}9NNkz~c5#hW#FB|2oFXRJC|20n-2shf!@D_?}>wHmZL zGo&1_GFe7GA&L2BY|2WWDh(U*R6SuuvW$EV!BaOQj~aWH^lQjd^?()0GVnPBPu&b{ z2$H8tw}w1b&sa>BDGNMxGpNHdS;Zb2s8he)XYRFUdfW>5) zvhX7Grsu;N%936UUCi1J&SaUg;9}l%J!&XRS~YYr>j5j1Wy*q!dDHPB2xUp5hAw73 zV`Z|;a*P3+ZGOBCE6ECFN%=NRW$78KPL|n?hj1}(%wYpNZq!*kVA92`2dqq%`HitH z%s0hh!v!l{%u>G5#jGc+NS67Hhj1}(ipNHxhO(r3ql;M&SdlEV8xP@P-V}!om%(%~ zOQVJ_LhBie$ueaPn)m-cbndQ?-*t-m|Kq6tzj*5(QU8At_5bX~7f}DtQ2#%7{V~-4 z&(-z+N3XsO_5Xjm^480r0snu?rGJ9||4kP^RrCK(U07Tg*ZhBc@_(OvsOJA4`f}L6 z{P+2`^!RevU@(n#cfx=C7;3JbQk%rE!Gf#J$PswzwzT+i*x)aXsLB8FTc{cJSlbN# z!*)&9%_VD>C|%^C00A+=yYohzCrR-Y8=0XnjwYyJ@QnoO)^=gEbIlFKQ2*&Aq{$=EVgKR8`@%@2FBHlD+r~e%fRQT0;+%L z-x}g#lcxI8KpjWLW#khR=5+G`8`=b_Y}3HFnsJY*%YI#UQ~}lZd)SZ{`!qcQRV`35 zV+KA)6;S;H8-)a_?9=oJRJA~%zHpnnH50l3s_(1gCQyaRv{|~?tbuVgMnO4BlQ9!* z1yq0Ezcmii?9}wSxp!%aLWLor?QaE8Ev}km)r0tfUGY5j2voI5p~OtN6;LhC8VMU3 zW3#45psH00GiDk?>Afk=8m@?GQf0FSQpk}#rZIz`C_rbMVy)qdm^_sYn;vFGJt8n8pQDPW7Hf?} z4Slg&gBPMBs-CqXTLwNy6;CbB8YU}wDw{QUAv&YVaVzCz@N-o0)ZZxT3Z#tZoMU9O zVS^WD%*WwK0Jn9!SF^&yDei_IDgX^yJQ*G!hdPxN7Cn_sEJ zQd#lqy7GQaualdXwu0+qnaOyBx_k4>{-vQTHfxZJ{8BkP?mIS=v~%Ua3sr2#zT0QH^0~(0zBm$-WlM%w#--cX?yJ8a(A3Ayce)Av#0KZ7?RQ zt*jxu%bVh>NoDo(HE33HU3!=s)ImP_D{?1N zRhSY3IBx%`BOujn>G(<17(ljXdqk?Lp%_48pCcgEZE5&%AIV35Meszbs-c|i#y`O` zX4}&7%h?W(X-rKPm_({7qIzjE_BjGl-CDa39qb61{2!O{bGD0sa<&`%#D~wfrQOG2 zO{B`+EPfU>!-|L^Sq48xK&so)@sp^zG^N_{v#1$W&Qh7I_8Kw*Qr(t@ACr|xRoXU0 zs;VI-%XVlW)otncFb96{Bx)#&H*0!Cs#>ZfK<1|V3rKbQy*31i zRHbc0q^d;RQr&(}9hS*Tq$;hOQ7TJuN=cR}3rKbQ-TtMq=}X&&NL5)1lV!>R zQr&)+4{In(`Zh$WTBT&NOj$sx+wb(Kp)6_J5UDCliDVi39093rzr%+hlqF3YB311P zXR?fcA_ueWx7T4MS)nZH+YqU0p;9Ny*yjjHb!!eAfXqe$(y}2^)gmR6W$+VQ*nC?Y zHe9e0sY=U+NL8zpNS49R5s>P(m~13!C`&pvM5=mUC6i^!0#e-;j}4c>M5@xSAyU;M zg~>8y0jX|_$%e^Fq$({NB2_(Om1LQ+fK<2Uuo8-hftG(`1?RP~I-WSO$? zF!c6z9hS;U7qj$h=wjA0R!NpA3ohpEE&tNkv!quuNM-2(i^(!&!Nt72>BAbzl4cEE z%zDDgWSO!CP}U8P8p@LDjV@+AU`4XbZajjEd0V_STn5v{EY%xb%zDPkWSQj{yYXyW zyfu=n*t4X3ql;P3Saq_@ZajjEdE5AEvR}uCp($3nnDu~_$uht32rlMrvDHvn>0)NH z29HI@>Z&KKNS67HNB!pgzrQ(m*T?R92j2g$>i^eneeBlT>-zuk8?W8?hr0fM_WI-3 z{=KgMU%C41)ko_3|DnsTT>dEP|Ic3fFzWyRaq)2R&G7$kFTAPd|0DSS|Mp}A|Nq}e zW&frB$J4E~_+T%M>NH0U{{Hy$R5dwj#y;`GKl5#A@#Uz&Um9Vv=!I)F!m4s&)Xelb z22tIXE}uk=^@ueueiAjK3Z#rF4!5m#WXCuVZcCFdr#S4T@vT|&;uldf>amtInw9Oa zSdU7VFQ@o0i7JcUK8dO-r=H?4R<%Q7J+c2e+lgmt1C; zM3qHvpF~xalgX++R@mnlN5XAs^GVoPkXZEgiBwfNnJq(~V<6RSY4fFj4%>Q@E>^v` ztdQxFsy|nx>Q|Q9pWC~$Eseeebnuu)+`O&;-)hDcic<1r=yMFDx@{~qfoZW-L`a<&ZfK0< zZ=Xn2tCYx?q0ceShTB;ggxE@P#BHW6%U@jF$%F|_$%GmHL}zEVeY_6K6)}k_>)$?! zsun7Bz6^biaWdTgw0~&`jI}Rbe2FjBA|=yh>~jpFx_!)sHHj+g-#&?|RwDW$1H^9d28^H9TviLzU$(PFp!&k15OWCmNiyZ86tKvSROI{o5x| z)#Fy3EJL4TblK) zvW$I>KvcK)x0QvroFgh*wj`>0*2-kHl{ErU-QL$$7M^mBsCh8#k*Ml%E0tyVa|EKg zeM=oSlZEG;BW&W&9*L?Rw@R|w${K;FZr|)*8oO2}m4z3gGg)#SjLB*%YXqXYz1N2| zHmy#+ecmHc)i!V@tF5dNi0bwpj~dGA@DeMDs&;}SS!OdH;Wg;(PxuhTp4H(cR=St9 z5uC{~t1)`~v+bMeu#&7$Rwqw|J-U~*6I>_DY{nzFm$!HOmxi)Bc_>6B9?8-+a3;%q z#sH-A?bL=f-OC*+i|%D@14pvVXFP&?d0Wgi)axXa)uFQJUe+^KCd-rs_wu%QYjPRH zQ_hV9I#d?j%X-RUvf6vr2=3)=G1o9z>0Xw84c*In$STP)Wx>6?HD}Er-OJLep?g^m zSWK2FYtp>`_tckq|L-;M|LXq#^Qiw{t?&Q8iu(W2E1w7de-!oqGnXF*|9|GvL*V}( z0ssHpg{Q&)Uk3kw@Z=@%|NFuJpZd~8|CjzRvt8-<

TFsZMik!jGfo+?0Z)9zk43 zYt>aisyl1<4HKzOa&77psj7l<1gi>2tIP3lzANp%oS4IW^>>nMQ=dpxRTR_R;O7`f zbyqrm5;fMQNv=(OB2^VpOm}0SV<6RCY53)I2R3M?`y|&Um?3ghg{tRtH~tA)KHHU! zpCl{Rrb(_%eIiv=QJpMfpJO1^owfV$zcs!z*Cuqe^Gl_;&SV+<#0ogym3AM8HIeEh z*CzZjYKE2CI+N92MaDp?yVCKKsIfLpa&5vdqh?e&Ol7j#i^v#AbypgGOjaV*Nv=)! zVbqAKRSJ{UUP#73s=MN|VX_jbPN*y*RjpJ?vP@Y(syj2;4AZ4Lrm~1swMt>KOj#o+ ztC(zLSYy{8Q&~i+TBby@41SJBP*(BTNYqf)n93qj)iNcLW$bedq`Eub-b+Ewn)y1W zvWQf*O37py|HSGx+nuy`eb`JRER{v1s+CHeEMuQzAl2PD|I$#_n93qj)hZ>EW$+V+ zfcf^X+q*tIrtzgz7Llr!DVZ!&R)tjm%A5!v!mm z>WIoBQq?jglV!@PkZLj6NYqf)h{_^T)iNcMWy-3MYO&ZbS&39fR2Gq{p0SuLQ&xpk zi^Yb?N~AiXvWQgmj8&3l%BqlRW3S0%<-^cJDvL-}4_Hi=DXT)N?=ubL!_Y%2i%3<= zlt`Ar&+!1t`hAZY${JExM5=ngiewr499Ky7_v)yUtk|=LR2Gq{Rw zhm~Z7vW8R^k*ZcIb+U|ojw_`4UjNeAvxZa_k*Z!+$z&P)99Ky7cWhV_sSc?uB2_(M zWwK0Jco=&7+a5KPHKej=WYz;#Cd-sn8JXW>Ly$)1A(cfVv!1b-EK?R9hTi^G9hS*T zBQq~6^l4<)Gge8KDGLunZ-3LjH1;eOzkM2+^?=1>nX>v&R&myF!Rp7J#R9lbBeR~c zB3WiR9`~WFVy=;>p)6ij=+nro2dqezIgZDbk-2zlsH}V#nwJ$2G>`65y=1b?b&OhX zwk_rwNmeL}mlg0>bcU7crB0SPj>rAx{lCAh`2YX8_y0b9{&4=@lNU}_CzErZI`_9< zy2@|=mnU|m%a?c#$K`T_#LMLIY9f}S{PU#d=cpO{#LfFVyVB=NpbC#^#LWe1K%lD9 zDZ^%_&k2C)uIcgxMnfkeV&;le{di`0RE1FHB*!C|nC2h&;QQY9-uJxw{qL%P%jvGP z`I3CXU>b3ANy0CqW?Vs(Im^xTIl;QME3G~W+pI}IayhgFsw$qG=6EKr^|b)1yVB}o z#sa9~d2)tji7M@0$yj?;ngFQoO0$m{3!sWy6O5bPAFWZEjJ21g34rRZY4v5s;?Z9` zP|ldyn$`kEGS*&~CS6pirPr4$$S6Fjc%qzf^M_QmM2UW$+Uol-aI0YjRzJ#WccFVC1P< zrPS#%@HxTQ@61+%eqMy6zQ|LxLdk3y`J8~K?uxC3!x}slPn74dlBa5klF4fC_!IEd zU8lX{!(SR7OLdW_YKfA`GI3$(cR$|VM`16Gs1z4@s#YjWmT3z-b@#^hjz0>XihYAa zklNBJr6kLwg`wZQ!N0WG@w>cBJRnci3WdosWr3&ee$0op+41o_IlnZOr6o!v%gE=X z17-cFM-63>rw+(dwM2l%*9)Cd=R_tXQ+%57%L-tZ=B} zd2$Xx?)X}z)X6gNIl<8He#pNxltrG3DV|>{J>X21kx$%Vn(uzFJ*@DTHnWxYiQ!t! zu(B0ovf9cTgQxC_vnKTle`!QbW%bEZwM5BewUspnPu&$`O)i75mqyf77F?_Ot#XcH zvf9cTgQxC_vnG=Ti)n;SWx=(YVWkaRlGRq$7(8`nwi?(=n~>zGaIHp2tx%Y(wz9_H zsk>sUkzoyGk*C77nqlRn70EL4IR;PN6=#h^4P}w1_Q_NAoE6D3@Hqxg-4$bvBrB9f zo{AbRzg13JnJj~!a3juk#aScCiam=wwNIX^RZ5*K1D|8?)ZKTql?AL5$;v$oZL|E+ zW6Cn}iBX>KzTK1+Jhe-n+9yxd5+##m$^uW_eVa!OWs#@$$y4=+mB}(?fv4`i)rKH= zYL`5s{M(Jc!|(rn{`%it|EX*La_#S~J$&_FuKs7d|MxFfK62%4 zm;dMG4_|)UrI&I4|E(84bMeC$@4xVw3x^ADKL45X`}1!;`R9|($(zsp)45H%%Kviz zxB0I0`f~V2@#HVc(-=PdC~D41*_&ecpj!PqClyRBt-c(-QNYwLVJdzTHKWQ!xMliBP>j%*C$b9%O^}75T>evB3VW~ClyRBtv*RsZ25$# z==Nu_jd}4E$?Mkmtk`-J2I2R`TE^3CAV_%&tqn?urrdp#9Yge=76Q<%zBBYim zOjdgd!I6EwD~&!5Yrxd*I2R`TC~Ah4*_z2}FCi1a)LpUINYvQ!2~+W-s2Nq(`%IQ8 z3ovz895zf=z|<~b>VPm+OO#X=-k)m805Ek|EH+G5z*O`_QwD^o3Q|h4Oj&@bJM-6! z!k^kDOoeAP|9&Y^GFgT_CxEHDUutj8qhTnEFm*tfsx?X^%dqEU2xa}EM-62WrVa>G zm7+wljCxK0Q+L16-tuuNqnWRSseoQN1X7}8vW$G!8F2BS%j%trqszY z>Nx>S-M!PlG?Yb{3PHw~Dn((kOj&@byPvaR4Vc;`OdSxWYK@Y~GGzg#?ta#zhO!7# z2ZX8JBg#T6uO$J%)ZNe65ClxcyW}YW!c;9$m@HEkVCwE2byzAZU@Gtc!{(l)z2K58 zQ`P{=dcwan%2dMC0b#0^D48t7o)f^-UGdd$!3vn#B}^RFE@A3`FjZ@mOqMANFm+da zH4-(HMVN}0qGK7*16C%>lm(c&E4CUYD`0AuFcmLFXH+?3FP(qns;Vb2L*>h7oNurjQ%XA!0j2vfC2iDVh} zob;irhdpX2i!gOSn5s2OB+IDh1Tb~?kPkuZS%j&OYYc&&u`*dkK2a*qc0XB%rLy8z zcDsbB1Hx481J}tiv+)G4Lhs(@UmD8dD}8t=I=@u5fmD{cjbYoE?;f;a4G%MZ3p}S4 zJPxkL!`SbNtA?mG zOw9N(aK_Evs@_$}Y?<45g2CSvXN`mnZPCPxx1wWi=@Bc_WoF|^??34KfBVwz%hhQd zNVUTu9EwL(bQD8q@Uzn$%=e|^CsCUb>~I8!Vo{YvKSt2lX9owFeQEgRR)TBpWjF2c z(gIGtkr$OwPI%*=@W;*erQ??q9v;&OnYVWQ;%x^=bzjHw+kOUIAN3ZyE(EQnu5&9Kt4 zD#>cEA|0G!_SWtjr$v>w6<}M~x^4 z;;fOVp)6ij7!s*!p%Te5_Sr$FXkUyqlB`e`FDsw`%pnk*lF2gu39LEW7iWzmE0jg^ zH+~s4!@9DJeRgn)+23q$df)<$vS|KBr!PYuQ`V$?qMib&?r+!*9Z0p)l(U01F7z>_?HIwk;@81s#>R{vJ8GsfK>OFeONnkm~-DM-64kWd$NtWhtR7W1kZs)%`^uf>4%RRv=QB3TAMClyjH&Kijt%Hm~(0gN>pOXry{+TH&UxVgl1@r;)TV*H6 zWEua&gqiLB)RdL4LG!W#UWm@Ha>lBYW$bfOA=N+eFAZhU{0-ofA!Qq2vP@YOQvKsP zEQd9bDlaPxh*b52mB}(?RY>)ZJZdP5mlXy?s@e?BWSO!mr20V{f<&sktS}%_)m|`@ zWy->9(7QjZ!%|scT}B@!l|}2aHiJvDOj(t6`2+r?(O;!?881Xfvh;wJ$ug7iWDI4! z--k82K(sCoXkFG$a3sr2#*;CW^#>lcQC64Mv@Q>5UDh*Joh z{O=c67w^9C$qTCs)AOG^e|G+V`~APYb@_0ne&{2XdVl`SczxFOFDNb0`y`ErVn zgQRwNM`1{ks=~?jsG2u&;O&5&B+>mDnBz13HzHyM$ z4(})oNm5lcnJiGH|2#!k;W3PX}qWhjv>W1bz5)O~66Nz~Zsc}HPLlB&Xq zWEt}8fTZqAn@^GzXK3D0zzDmSInHv}XHZ z9hS-(gtBP(9+ISLbyAXL$^uE<5By6*StBZoBvpIAOqMANBz51nVGWYn;T?q`Nvf76 znJnA0KvMTTj~dG29fcuDsv?w3mMIG)b>Foi2$I_29fcuDs!|jt%ajF@x-X6zCM!rP z_ViR1NvcAWI$4H1(ThLbo1JDHB(=jk3PX}qtxPgm#ymS9srzE5;ewU6RNhe-lB8;B z63H^=*#Sx27e|dm4Q26;!jL3YOOr^JAy0JUPxr-8!(}jSsl20rjWCp@XRJ(?VNY~* zXZzx)kz|Fkct>GKlB(57NtP)KBz13g8a(CPtXI6FfES|kOJy5ivP@YZsrzE5;jjit z#g3lSiX>G}SeYzS7D(#;hfG<$P!{hf3`tV;fR)KIWnlr{|DY)=NGkSpek)mp)cj1A zDXTZWv;ToQY+5VFK~iBxWmvKbsZL6=sxQ?OCQ$1B`^%>WO2u2wO}2PR0Z&9nw)BL> zY#HUJo1M8su&IAyKM!fHPlaGw$F$ z=>7NjD8#2{&9;;muE!oCb=Hb)^frl+kY zW2P~z#CvnrjKjX%;dOVtvUQ|Fs-@#6QJdun_dZhZM5SP)FgjeizZ{`|UOq-BIs&KAmvIMDTvJ8H9Dx_M>H8QNB zEFx9>GHQmE*&4|*_zAn%d|&)E5;c_7mgOhZ{js%SIj zx5}|9lV$u9cV=e$-zWTC|j8wUyPWkm_&vmxi*4RKW`3OSNEOvf9eR z(QUqeciH*#O9QEP>Hi)QscOZN$uebCNcGn}YAB0Hbx5SD6-y?|lm(=^|1}$eK&oB- zoYas=RSOm-%eH7B)%~y5VX3S@s`v$!Tn30#6{pn6GWOX4Qr-WGe`zR-NOef0ss&3X z%iw1RNOk|qKCGcEBGn<0s#YwKEQ6mNAl3c5JZdP5NOef0s$Jnoma$JX=}-5?Uc+TD zkSg|cj%q4Po5Gnac04En5;yq^nc@p=p0#T2h_KUsf%ajGAx_`TWX_Tw< ze`DD4OQjk}WuaZv>RSM*?(f*J22#b2o=Xdns-CbyS(qTLsCW%}Kl7+jtkVBIq=8uv zSeYzS77Wb$$88A0z>Gbe-jXR=^9n*X0&7@z+{&Hq1f@}ZOdSN{8dhtliIA;Y1j**wWpF=^s&Q`O{< z_1k|Rx*@Ye>GkE1!CD$&^Bz6isu@<6`FhCklY^>n_`^;QO`k7Lx+sVvBui8BRFzE5 zZzG>*s?QIl&6hKA5%`X+%#QZk$35(eLP#S#_HMV*3)Dd~AswU=lH3FRv zd@x=q?1HBrN}G?#3Z5#S?-6;bswR_V@Ush^dMIr^CM$R<2uUnPBl1+0O`R+QpIz|O zL(}5RWCc$Z&-aKtRm+l0mXS~R!{&$5;geyF12lQ+h&)y6l1P@3&n|fCq4fA9Y8;@+ zQ}J7&G9b#f>>zBzgWr3$2?yAF5S?NqA zPaTn`YVWrs%ajG4dbsUh8pS>UOMTQ;o0Q^oT=B2U%2B$H*z0#7{@ zI}P<3JQW*yZUf}0T9;(9Oj*6j6NloaN!T+9o+_U2kvLQ3@Wf7zKnc!@sAG0QX^qQU4v|?jtEq>Hetfr;1ln$K5;0P8p&A5i$E2J=#ZBl zw3so2pXkWnITTZkWGr?r0#&>bopEJ9XfkG+1yDU)ECOT3LSO`{)f|W)*p>dTWXyz% z)7arcQCP+epeo*Pyb&F7^`ym&nQ-xs4(B~=Y+f{fj|f!tq{WPxasgBiCpHQJRJ#PK zBLY=DZAr#VV_j@t=jymz5d)}-{d>e8*wy1!oiPKS-HNCFb@|jh7zR%j`}c@ERnJ

hoYo>$0A;m@E@lWnC_|8g4XUUB>2~OE;~{df3WjnYJqHa&gv3)X)~K%XlF=hE>m6 znJkkQ{>{D^Yj}ze>#{txju)ac>apz$rqb5#)oI@U+n>AZ<9D6n{{P2s&u@M4)<52w z-TdOs=Wot#eBs7(_3!^Zcm1(zpRa%a@AFsx?&`z!@Bck~`Cl%7r2hTCk6e1&#h2^f z|9knuhcDb;|Nh_oC!aal*T4U_|8g(>z3#uy52fA55sX)V#r%!mMUB5zK}w9EfzK{@ z>Y;S|ID)}b#rzH1YDSe-Ku&jqpIz|OLuvPMx(81c^EYg(8CFzOPj>^KUGUU{_4?p1 zEz`Y2^EZALHABhG%A zTC|j8*&+g-dazy}Zk|N4xHgfeYQe%}*(w5_dMLd<4r}n#PLFF7d8$?{nJimHz*7&! zWg}6WwF#F#^6w{4)ruvPWs3-S>Y)ruvO zW#kiHvH9Vx9yOHJ<*}MPRY6K5%fKhBVyB1uZPyQ;+Tl$F(1{oVy{y7ynXfv6G8p`5Lg%NqG7Au)7QxfufPrJ*d|R2Y$`YN3+JGV<94PdyZi4HvBBsl2H$ zB2U#qC6Z<2vkRVjC@vd`8p`5Lg%NqG)+v!J1D{>+)I%}Za2X7XDjsxUvQk-Er7&5h zEb!DraoI?+V$b4Dg%NqGRw^Y~rY!K(gIR2F0kK&Ic~b$)TqsKqSWK2F3q18uEH)h0 z3q18uTs9Ill*OA0cqBSMRt{L1EK?SE>YFXLkf;{kTVsN|iiyM4qY#tVouDPc-RI4{!7#h)R__6@wbd z(lZv5Wy%6iJ-nd~OJ&8c>tat&!IG!y8LK4Al!XVO4?pH#8kH(-%-HJlOONeYIDxnJ zEZCS2KWf7oHfAw@k7#4o6ILe6lm#2};YU1bRI0Qwk7#4o16C%>lm#2};fHMq!p1C5 zt&eD9)~0YK%RI+j*q9H+Wy55Jjai;rAJN9FXRJC|<~Q!b#(Xe~4PJ9D%b+~9jz^*+ zS$e=qWtri)3mfyHSZt`Qd=Q#8W;_y|VWlG+$uh%ncL-$_lZ`|TWzohwqK#P(SdlFA z8>2~o3&r;v&wZ$>|L1p|yZy1c{(t_~{N`(Q{r~EX=WjfI{nhKwU4Qi2tJj{r_Q=(L zz50=>4_*16S3Z2@e)#_nm+uAtUthZW;tRF^|5F#vF5F%F|HmhvI5|5Rf&YK#b@lzx z|A+aLrN@_l-!v_%95vWhW7M=XiBU87*{hIhY4b_c*y%ZH_$}0oDn);cTCcru^eUuU zx_r5LplELPPhM8Qp(LZqmXuSx*Iqo(b)Owdn=hyMG%cz$e~*b&RXOz($NxfIIC>RQ zwH_ZH@QW|a>J&eP8egia$tiB|6XP*IlpY_4HIORqmBq?ACQ?=9WU|^TN3TMvrOhW% zW2fh3g)xz;mL`!bW1qbWsg^DulNCr+%->@oRTWMq%lK!nLaL?B$7BUk74!F)NL5)% zoh)OYy$Y#Xk8he5RhqwHTg_z2WX)t5{OnaowRmY{Sfe`SWd-~eYKE2B8p$&F*{hIh zvC~M@P!=yM;I~jSsvMv)S*ENCsTMblBrBA~%L+J{XH+>qF$Bg(fvIpQ`ORh*)smw z>w&5MUL7|H-87i0*uP<1&9RkJR7sby&tAn;f7ib?3RK=#fN?cr9u=3t&tAn;-|Isg z>f(ikF`24fT4BQ4@UvGj)!*^3Aurxo7?Y_gMahgA`|MRr^|yT#LSMYHfGfW_3P<&2 zKQN1be&_I>I<90a1jaiHV=`4OQksmJXe*}LcxpJiV(;Rmg{nXclj>20;Xk)$EtqOC z)g-Q-22&Lm_?S#pYZPY8lv^>?Vyuy{v3v2_0*tHqxpM4d#!O@AXB>*LhAUz)Rq=tt zxSCCII?{i{OonHeHCYoL=AoM&cc{TRnJ zT;vt_7uKxixWy|AXw}5G>T!$7vRx5K_3*BuE`|+t@x}sPh|aJA|4On3m+Tm@5F_Ee^f-_m>Gw$Iv=)*hfuvAu9m&N@(rgd36!F96CW86bG z{EZi)Gg)%h%4C_zxCiU<;b(nVW9#B&g)yzm+6InfnaQ}`eBVW~J2V-1(VurA9x>*!d<5NIQq$uecZx_l_k8c9|timg>~X!wi;0PW)0wF1-uZQUwTYg*dQ0;Y|8~{y|Gne(|ElZ%*KU36*4yj)|M45I-S~&P{(tuR|W(B?eaNofcO|d6;RH0EM2Vu1W|pmbo+7ur$JQZx4*|Es>)Jg z0PP1qdmyS$mWH21jUF0_>X<}TH59Ym&}R=s^~uuj<7^M2+9gpPi$isK+U?%m>EQpo zGiSTu&mM^ClcnLu*&aj{TSxBD;!u?nRXy7cefB_9pR{h@G>EGF_BVbQHIk(Q%48Y) z?1890S-O2PtkIn&QN=H#W>^tXB+J-m4@C9J((se0&C=8*QN=H#W>h&#WwLAy0a1Oj z^!p@P&8ClgW*k*6O;W8eS+SMcuIsvX*F_?MBm@!&WFPiE zSc?@Zii#lC#b;5xqKUgM0ve0<-v3Eo^hJ#jjeo%-; zrZ1Ca?6U`=y8JR*o`a}%NmR!qs>)I#S;jtlAgaqR^{AmN64fz@s-l!kmMIHFb@?Se z1feVvRpcs%;5=oS?-SjK+476)uvFGKltrRCCQ;Qwr6kLg1){qAB7f3Q7Ktj3WaX2d zr!0JYrL|^(s4lLFvP@YZs>?6%sG%$p)iH^xRwht-fv7-@xJOFi^;Mz3q*BUd^VD-*iO^?Jtk4rJyuDUDGNk(X(k)g^3A@N z-ftXB%qNwO6_aJk0#RKSlMRP8h^lzMaanYP)hZ>EWy%6kUEVci4Wm${_j^pDs#QuP z%g|>JM0I(`lodo(yx+JiI-gZmf=rg-&mM^C@^&4T$qJ$>-fvtMond8PAJ!;Tc~)UeqN?XrB3Z^hdmyUI8y+>3#j^@y z5>?$`WwK0JxEOkQ-G?ArKs>8}RXc}3Rstr=lm#R6@>(5Mk`>D0S%oo;%(}-a$uecZ z$h^GjPa4YNSp^);%_luiS!Ou~ft@d}*sz9?S*~0k)5xqltW1_E3r6N;G1ySAgHRUF zDvW7l)(uu9%N)l&7@3#FUc+fHjLdT7`j|#$Z3Snt%yry@k$G9{HB43*ndQoLToxT8 zt9z_ES>`zI!N|O9tTovJ3?s8#xsJ=CGvs;7GRtvq(A@v`-U|=>&j;QN{*UwjcQ2s+ zpV#OA=cxamuh0KKkNW@R`uzXPsQ;g;&;LJ#`u~Xf{~xaGuDts4hcEx(<%ci*-KF2Z z^w7n>g8%=)vk!v*kHP=n`zcEO_P?4fOPeo-t&i4DQ=ZWO4d-h7Mnx(0uo?U81F0_C zJwAktBOXMm_#kSAJg;+R@Usu3x-31ucpDKm%2Og$d=WLnige-~hjmGJdwn3)Woh$C z)YwE5sZNMgRXOpF<9ft)R7^;v%NOrBo;5bnM5?HH<5^WX`HrK?Y=^~!RN8!!tk^^o zsp6xk8CDjhlC1Wc+y_!!wtIYd(%3{3sp5-f8B!LcTs^9&LMd^7!(v%_d;w+Zfe@k(uYS2M1xOC?{%KKo#*%f?dE5IV#Rb&;vUyP7f2RTr3U zD`VJhFN>!}hBo9yrV8(B#MJ^NGiLC!kL~ud_-Z6<=!;AhA4Scu^2#z}CR;Go7)Vu|;1eQMZ3Wl)GWOXAQeD2* zpELwUq&gu|)hZ>^W$?2Pq`G{K4{PX)NOeM_s%1)K%iw1pNOk$RM-6!qsZNMgwH2Jn zGIe3ky)51uE?_cOjaOOY@+fVAX3#2NLBpbxFI^j%2q3r)m9cl z%$FmN8p@*odrbH8d8g{lX552&c{ywi0se<#&Elu4aYJ-It8BGWS!Olv!M(g3)L|o8 z2phY?ekuz$L`PWNY1PRxn{f~Bo2lw)_IBR0U!js0D zMfdWU?q%%*N3zUk+=F|0SYNU}m%bT5y28?P?&b|7!mKiTD3Ked}fT|DVG7 z|M%nk|Hp0|YyN+P|No_$|GxzP|Km0Pe;oe*Rn7lb@c+M1^Zys%|9|*D#s62O+ZUsU zb)^|S@qUA-$LOhma`f=VYEp1$e6}j>z8F1(ZAMSL-}o|Wh7|?Xqc^e%1Uz-s?)4#L zGcR>`QUS|ZgjDt9t6o(jUP`Qhanz%iM?EIwsY+7fRX6h42TxsGjF5HuDl{6+UUM_*$Z5vf499 zA3Swc+I^sl{Q#VGW)t-tP%{ zs+K5`EF+)&il_csYsJU*GL*%W3KQ~FEm0y_20r^0PyN-_I*O=qxSrnc33;kkD48sS zpZ$ub{z_}bN7!b?7w`9kJXKjroh$>N{fejlvOj4kizgK(0JuD&SyMzOUyg3v)zk zC8&7nyX&x2Ryb5quH@uGo~l(!NtP+A;;BFHPa4YNNd*)(@ub=gX0l9K6;FMa4QujL zdcP;+sam2$vW$H8E1p_RHq>kKRGw6rkf$n0iDVi0>{mRscx*TgCQqgJdqSS76-p}0 z;Ag+$sl{Z&WCc$Z@Arf}RU5)}vJ8CoE1udIY_e084ps70xK=Y+QlVtBjC}Sho?84h zGOV$eCQqG^r|J$XlV!@PcxrLjNYvO%lc!F|Q}wV)Cd-sn@zmn4kz~bQnmiTf3u6d$ zkHut}vMQch{56uS*h`bA;^sXuAT%8Q}NQ&HS!NU5ipEK^p+Q{QIF z3Z5$7Z(J6gVTJrfvW$H8E1voj9yO{|^3(}=s&23%Sq47)6;J(f8-n1e;{BeGr|KRn zlV$L;U-8r*tHUx`!BfTiJt0rkJyxA81E2khr@qynG^$j7!g@lUsy*OLmidkQxEOl* zqdu&mEV`H{bTMlWIFn_{f{S_iBOW!BMHlmgE@s_eWwK0Ja4|1`*oPpLMHe%U*v5;c zU0^25lm!>_@`viMlB`e`UCa}@n01d;l4Z(*i+S0$)u1I7{hx8Vm~mNjKIwVNsvcD9 zEV!7L#a6=!D_qQ7x|k<)G3yR1l4XA5K3vSp;;fOVp)9(XaanXetQ7x|EVCQ;;bLAE zV-2Ula52l3>$ogBqsmH<$ui4vzu(;d_dEZU_y0X|@xvFNz4-9ihtB@T*+UmTc;R>2 zkpGkar^Tvt`4Z3J1A5I#FAf}xtML<6J2`L$J_i7*tJ395po%Z(McBMUX-c4~;^D{{ z{2cVg_g1CPmp~O4_eR_-QDIz-$5r{%?>W9-)OsTa0II8|%NH08m-aR>d0b&ipsL!* zS*dH}bAXzARoZ+qw6T@ud4(x~s#YlZrbBcppaGVIRcZA}*w{<+z`~S3RRzS1wI`+l zfaH!-GG7Ki2jHozVy(%A1a^uDOM#K6 zYNb-A%fROVZ}qC}tLei^(S+oI1=v<&RJBUUY#I3+fTym$sl7gT)1u1r3RCh_EmJaC zrmik}K;P(5LtQ+sFeOjbG9{B`;=(Jv`iAzZ55^fokUSMVcdVEVY5=5lBa&cnR)}C1JvECuk$AjY4NNA{%iT9a$<$aGG&3MuD;fWHF&Dnzo+D> zTBby@jC>BjQ&(T(QA1gDe^1F%wM>a*8TcIFg>P{<@W$?2Pp1S&iwzBYf+z6X%)`UD&H(GVF41D&%Q&*qgRu(>u+k}*p3goG} z$;xCI`Rs$Iu0F30E5jOFLOH2Go~nJ|OjcW2eel%P=X%somYh@|Pt`3}CabNiK6vWt zb9@M5&61M}(5uh#Ckjo>5 zWhUc3yvwTxYzV@;EcdKW=v~%5Rwm1g#(j8~SH)VxWQBKG?pdGEyR3VxI$35i?!&vh zYWr${Z$nu;sel`zW4_W2Rwm0_#(j8~SH)LDW#w(qJgG3DcUk+unJiNlyvwU%t&ymq zES^-D(7UV~tW1_EYtr2R_Xm~#|K8pIb@xy1-n;YfcmC+kz1#nG`;Tr{-~aogTW`4e zFE^jN`MMkba^rO4b-4d;fBkjW{^{EO+H0@=!`1!O*WmpB=E`H2|Kalb@?)1ieCZD_ zJ$CW$FaF`hSDpRs*&m+0^1|O<_=8Vg@t^x|=Bv`|%OBcJgDQdQlm=CmPz;}WJ_i+0 zE&V=;+6skK8e~)Oei1%drb*cRY8#~BcB5R)m3TtFQvy}3Q8HPkECAJ2ao9+*LRkc= z*hj?>XoHsA`Q8$ujgg08m}M*`tQC2vny8s#>E&vW$EV0904^Z3qIW ziuHR+psFQGCd=sO06=xMsKYW@2~=tQo)V~PL%2?skT2#!8cRN{-%|orEm4>( zQx<^g>Pl=#eoqNhwM;3=GGzg%t{P)a&~NxCin2PiexnR zP+h&ol$F;)^QZzU>U>t&V=-B#ECAKj-1JS*9!i)zz#0NkdsY zs(`DaGvs;7GW3c4#(ecy9hSoyKvk^YQvy}pVMVeGeGULrSC4wsP!^9WObJwVgB8g# z@;LxdT|Ht$5I|L|-%|orZ3Abr%y2xwwa}|q)nTcu@Gyf+=GK)SX5C}e$uhI?03PPm zEB#4h&Eio7TooP3(hXKB3r3k%?E(+;>R}(&P!^9WOzC0P9abjGl+}l_Ug1$gSv;yR zrH5HJSeYzS7OsU}6`cE7mL?RhZJltb44I zEK^n=Jg_m3Prsg@a|Qvse{ZjiA=(t@5k_?uyR1QW9jg%xygA z{r9~8Z*AQ^q_x>lCsM7cPk2UzcuQPENfbs`$c4qfBg9If8QHD@M@R z=MY47T^fE8H8#{qR1uYj7G%zk_ubIv5WB;5>G$RP-V372&wwLpp7u}`<@;{VyiQkbmv~jdBx-RA#i5kjEqKc@gEUi;QS%yA`*d4Bm zw}#0IqRP*JBWfy3%alx(;m;w6>iTMX*~hKDF|t$^iK^0+I$4H3hiG4~ue6u_UOH5h zsNzXeSz4wrS*9!y)%E4}0^JLu+D)R0u&FGqQ!-hmED+W8C65})N}`IWsVuEiGFk0q ze~8`T`l1g(C@YC7qNcL6OkuL{7jo_o<>G98R)?jsI-#s2s`xLZvb0nw$uebOZ@9kT zPa4WfqFOyiEYDh|WU`EXqKP|S{bqYuab<7wS`A{&0%T2PX`K?uGWI#Bi0W^6)KC_Q z>XbxPQA#As(C45cs=wYI0(=}do;8UoT&p<*Ql@0G41W$PqFT%~nJj!DH^PRpFa)_~ zX)m}=mZ8rys8)-RLQ zR@R^*s>NI*QA1fIs#6kGty3~tZDkEAqFTH)ISpdp6VJ+1KqRVKrZ8D;Wuap-TNQJS zBrB9f@As5MRZEqUthTZS6;U;34a&}VQnpt~RJBaWWEuMe$C$5*vxdVOL{+@sxFI@{ zr8}%hma)%4MO1&@lr;%uk*H2dRCR+D$ujggsEF#jOj$ux<=fv=5>+izGFgT{2Nh9$ zXC0Qw3ZjZjq_R{cQPn+Goh(D2gNmsBoIhzOi$oPSMCYhVmBM71vMQqb4j~o{*^CF3d%1XPI1Pq-S?*b%(!H#EtW1_! zjR%!`xtMF1tZ*-5Pm#)^ds+8bb+XK6JgD5u=B&ZTahqw7?&T@n%euj0vP@ZH-v9Ui zJg~U?A9tU>`^G!}cIW+fUVr&xqp zTzlc#GuIx$_y3-$zyJ5ll~>^VfA72e;H3{;`aOLA?>~M2-?}vVa@cTJZ~2XQ2eXzW zNmc38!)C~H2$H(CE+6jcZ9?X-;T(QWNGh8gwn2O97=om(OP4R-#$I0an`aadHkW#> zP2wHLzijPGK~mSH(I-)x2??b>e?K?TvV)3ue9)dbh9Ify(&x)}9F~L_f>`SDtelX} z>z!3Ej&7nsQrD%?C&_9iq)wW?5jHo`DxEr6hCGKLscY-<0bInB#!`=voRDM#mB}*Z z331NXrOU@*4U$UJH#`%$)T?wNS;jnvAgSxp=#!|;goMH&lSPuMwMit)kmnF2bzRzg zOjeLoe()R5%1!inAAT_GIRr^v7i$fZ6(p6WZ-nJ0T3Tu)S?!r)2$H&PY&8MrdSOjP zrO)4=%F@z=$uebuq^^srMus)I(s@P!Pnyco+9Z=@$^uDU7hjD;4Q1sS1w>6{=`oc| zmMIG)bvK-5&0mL|D+R3o6u0!dx>+e>|K7|O~s3J9CZ((FquA2${;#(uBz}Wr3uwzr}|&Hql8^5jK^jwMiz+lm(Ky{$`IF zo9HB|h?>gM+9Z=@$^uDU7efuF!62#KB&mp+%F@z=$uebuq^^sjMv@hq=p?BKo66F2 zDs{39c@9BR*Jh`|*Kx}n&`pwxkf|&!O)^==Jcl5u>td(jgcT%}rf-}BN@Zzn63H^= zIRr^v7e|dmjZJitR76c>=>{v3Wyo^~lDaO28YU}9Dox*rn#$5WRwm1^=MW@yT^uz` zR*+Phz7aN+rF*QBEK?Rp>e}oy_&RQrtR$%jnaa}Agvl~xfuyd#$~2I-K_^K?*i@F* zCYdZ#7D(#)D?Mr`D@iJ%rn0(1Rx(+pERfXoSJY7@Sy7%QNk!CDmX;<=mMN>>ySM)G zI&4}id;L&Wl2n9EW$7NPB&&K-T?c_u*I!m1HBc%(j~m(Ic91AlcUa7pF;AQfUaY^g z3~a{D`=Ar0f`O#AwEdguGN17f_d&0}#KVTT(!PwaDK2dRXTHp4JcNCD{lzv4VPB^4 z8(~vjy3dk?wXMd(-uT}7i|V+{SlE{_Z{+@s_GR5@m5iCqcnJIQ`V0M8LtklM#i+TNhVN!tY)$maeofBW?;zH(Jb?=@#qTx;SejY$z=4%Ltna(~TB0X2OMi zd0m_}Xx*8A^y|KGZF z`*MoHv{A-Su9Op=szS=~gL_S<6V#!zb!qqIdIIxrgpC&-(1*h({J$PP1D_+zVQcI4 z!ARGH%uS>5lm+5}+R2(E|q={e02}=3^@wyxN992BEwEHA# zY^$@ofT*0Gqz@2pyMfP9#ZybSkI4$2igN^fR!&c{N@cPPevT@hTH1X~R`67I7x1vT z_^XCWvf7i#2t&WNULUUOjbz1gf{>h?&g)Ivb+@C6rH!>GFk08 zWK{9g((aR}QLuKSyMTwysY$DqOjdga8DaWb7i*0qE0o16<%FkdvBG5Ge^(Z2v}0!L z;;oTng|fO-7I~@`EOoLBe2y@4t<6}2$-7AwPb%Q|MP;eQN-E39CzinZx)^J6SaCCJ zgpFk3>w2jytyUsgMm~q&sp}suD}S~^rlBl3sX(5p)k-AGz~>M=b^SwSF=fmTr84Q0tm1@cs_Rx(*_Weveo*Wd3^Ls@cCfjm{Kl}uJ!SwryD_4oM@gtFwM z0(q(yD@;~fS?Ea4*56x)O=aQA-sXLklM3XiTCmi~GVnPBPhEeHKWQjSPAZV6YO#{Z zGV%!sG+%$W4QudJe)t<7m`r79wGzoP@;L-gT^DZ+^%^`C6KZZ!$y2pjiDVi09D=8= zi?t@FL3|oFMm0_<;8{~yTC8NU41Ny5Q`f~?!(;_dMg7l*O=W4lQj%rL0#99=u?C;U zZJsnwD&R@EW=T7k$uebur>={!Mus(%mELcJ<$hY)!I>;m7I^BqcxxnTC@W7YAZo5z zTB&5ROj+Qm>td~uWQDTQ`;DmFO$()DvP@avsq5mckz|Fk@}vU7ayKY@tU6f+K8N6` z>zOGlZ-Y+nH$vu`rBzBM%g87G<9z+JDJys?z29Bj5FJaQRwXgn4;w#u`9_W*W?s3J96X(hXK7%WTGw#C%H6q!k}{>ZUaMBx+7b zv4cWXPDoNqXR_Ke#|S)iQ`&rztelWy16391vC^q>GFb*cakOK$DQ!MUR!m6Jr6EsM z>D0+G@Hqlc-83yeCMzZ+>Clj;YH5q!~snVh$Pu1Eal4ay`1fIGn zej14y%90ihd8*bXkt_qBBkv@lJTEmL^PA^{~3a0;q0^ zp(bGuzK(2 zA3zoFe!f`*s=CR_d>Q#10jO?@r$)kty0V{u4sfbVH(5+r8+?u`pjtdN%vbzg0jkSyS$#n=+b*IIQnQ$@w>tFS-A+c;K;Bix8y3=CDOt}?M{S_aD&=|jc zCqPwqTaq!;SPzxzFV}H7BlbdLG(ZSY)$LZDF*6yDF#PLZDvz2Q!|*QSKmlKZl$h?d zGGXR2#v3zV|6&=~3>)5My#E=N-ev6rN50HuJgU6Qzu-|rVA)mxzD$AXUMtdNCgTyt ze*JD6g77ZO=i9`)tlO>3mKlvlm3O&VYnZI?F3acJ#JjB9t&%KL7lwXqz8ZWQx0w&K zt$^R3>e9UylV#$nyvxN`!-XbqgU+@B!sha&o2^WiX{(DKP_foX)X-M86%aMGrF*SR zmPxDI-2eA%RsYW)xN!H+?#}Q0$DQZz%x{1E_Vc%A`2OGXx1PrL|2}~4|Gj+U{rLXh z$F3i*KXvV+`2OFMS6{k%y!r&b|F^sH_~n-_Z}I)V7cZ?YJ$~^c`2OFcXD^(s&K|+{ z|DOGXQvd(-59XWF?#rJ#2#YGah@k2mKxz5L0NU3+#~rNyrQ;`2V*si4>gZG4)Gh@} z3?RNSs_XO^8=6gN_+=S{t!cbNDfsHNM?@8>o^QMHPc*V-o6_;iw|x*6RS|q;Dvz+z z4XEFCW1nN}W;WLDgC62Z!(D(;<KT|!-^|=BP^FwB2^VpB+KCE z2uO8PI(`y0rzS3@M5?NxNS3kB5s>PpH2gAIxUn~$mBl5Is%nVIvUvnZbyGTinJnDe z8)4bHAyUxM{GYnD1$#y&?ts+)&x#S4oneLML5sVuEmGFb*c@!<2#D{OgAq$+(IB2_I| zB3TAMM?k8Z@9?OhEa}@2scOLz$ujmi0#e;PWJ8chRoXU0s#>lvS*9!?)y;!-SSBlx zsS|yWZ$^uf|jC=?}S<<#4Qq@W&lV$u9JY%*Q)?p=Cp)6_L5UFahQYXvU z=Lkr3V-6d*yP9b*kAUFFS}IHHluVYvPw02PDGnP>Scz1nZ$qT2bxI`5;O7WPbyG|> z5;c@1eH$WGEmI;{#y&?ts+;1m;WU^?RoXU0s#>HlS*9!?)lD(kFjuEE#buC4Rf`lR%ajGAx+xAD8P-si^lgY#wMxlknX<-ERx#N~)KHf6 zZHQF0O37rIvVc@K-(t#&`=EIQq`F)tw{5b=%48Y;1k;;szS)!&_d%m&ndKjms_wDs zWEuM$0jX}j$)7ZoB|RG=Ro!4^vJ8G=37c=eu@1{&O{6M48zNQRVMVeGevW`tH{ak< zLs`V=Lkr3^Yu0aiBzRwL!_#EEGEm8h5Mj4Uss2vveLyYJsY~1b&pk& zWy*q!dGodYq@gTn*U-hR8!RTvlm!>_=4*UdLs`)~56qAi4E7mOO*U-hRd#pNHW;Y(e z#k?_x4P3slb!V@JE@s_eWwOj~jBVU}Qyex_R=Sv_Swk1I?yw?R<~JSG4H} z3wmuvjS>ediJ4PXlcQ$r69wRWQ(AmEYPhL4!e$-WB~ex7#Hbnj99KlObonG|&PQ2C z;ySH-SSkAB6^Bo-)k>&{YH9N2D~^kL<5_e4?2@ReaPk$0N2(nb^HJ&Y zl3fy2RZjhiW3_6B#C&8ezCjXIN*wq9=IeBw4YCW=9r1z8C^!DVZ$8pV-69Hl@oa z$;$bNO&StatxoD>8TuSoMAhsxxFw^>e;)HfH!_u_l}RSc*yp$+s>M#jVNIf%$Gp&v z%wvgKnnbdUeU2-lS{yYJHI&7cY?nk;OOs5NDXSu?#Zbd!B~fKZwo9U_l?jt&%BqO! z+uJKWF6)hvrLyQy)#{`q%am0S)t_pw^n-Ay@|YJ!HI=2636o{Ys)*`Ow%6oA5><9+ zx+JPvnq;y}S^dFc^R^N-=+vMe%3_BG*J*_Yv^2?NRnIEub6g?SpD1I%upupWXwc}+ zuu}9hTZTWelfSq5@j7lI)j=Xvp02@-S{YXwz$IOVKF1YO{V{*m5EpwixK1l$%1Xd| z8T%YpNcF8Yw24&NrRfr>YI(whwb|#mLaIONVMAVQ)8Mna`M6S_WX24Aj)7D+KVqYh zNR@3Ge0Dd(%6cFfGu;BIZi=yn8B3(fUJX9Gn{nl4)h1&mS|HU;W2?!xD?hQzMh!l@ z8!@#)k&KygF%NHwuO^Y|AUvvQsz;61B~sNIg&8yDVlLhkZ;gZvjj>mQi?#A`#kI(c znZ_^|Z;H2uGh%pDaa{+WHZ`W(t<0F=Pn5N@O)=MS`AVY7ehseH%BOXS8TuS!Cf=B{ z2H2;WWZA2!0yZQ>iNVC)$LZTE?i^Y4vFRKJN-$cP-U;COQNcKEhfvf1){onvkz-*4B4#dlBnutE0bl? z0#V)E_oyMQ^y|P6oYK<0Rwm1og?V_h@F9rRi@h4SjB`|FMM!0t)fkSA(nQGo&mAnJn`ekKtb4+_Pa#_p(%SbT8{> zE0SeC<1yUJn_{k^UOS;IHft&$P%{naUMrGiHsdj-;7#$?a2ibaGJ7?Z2O==5_^X&K zQx@FIn_{kEveLcGehp6RXISx9m1LQ+F#T@KSu;rYGJ7>$x|emY#blYXI?erms|yeO z&j;QN{{QE9=XWol{-59e5Agpv>i_3&&A|VkNB#fujSqnTzkL1u;Qt>3|9|T0N5TJ} z1phx?c>?@@2mb%kr7igXi{Sr{pM3=U|55P&)u&be+aJxgrQ^pDOr$D`CsNg-r5-_~ ztTi_Qscx;^hbwzyA>jf|q^bo=jv(IecG%&3TiShmG2_->>CNIM;K;RBEb+Pnqqn0D z=G)Tolc+H_iQI`)RYdW+8~YpsscuWdkFR^&`itv%a@8kNRT1UuZu}FJWVS6GKS@^3 zO?d>MOQfnrOPwrZpJO1^t+o5m6mOCxawk&Nf+dq>@Dryj=G)Tllc+g2v2jDBsw^dwWs?Yy>b5len5;yqqIe=z#VJgdO(a07+v2lfvJ$CE z?}kWKaY{*+DGNw-YbKikU8+PzE>NL5dKWwK0JK&so~ zvyrHwENR>jsVYl}WEuM$1F3Gm$kx(HtorQEV&50DfZ~))mhn%J!rArRQr&(|9hS;Uq{_oJT_RPj zR7$c;SwO1W&-N#cHB0(7M5@{nX0l9KK&so%@?i~SN#lk{Rm+r2mMIHJb^DnfHIyZN z8zNO@DUmE=pJO1^ZL!&K8cd`rT^k}*tx_^s#y^qT*|zv>Bw3*>Y1|O0YNb*q%h=}_ zNOfx_8~jPLW=YS6NL8zpOqRhsXL=9z0 z%Z5l*H&~f0Qx=fww%BZ#tVF8Pupv^_JrFIX1Ce)N*z{`70Qx^4UwwuvFcDJK5tUIhsmMN<;G8co5L=9z0w}wV$-C#ws%yB%f zjLgMeLuKW~&{Dn8$gF#;OqRKhvB=Lh#a<)H3S~+8MkBNCvFc=*<9Ix5?*FU#|Gd8c ze}3oVIR8Jp{W9wRr*Z!O1Gk<+{r`TP|Nq#HBkKQ;;{5-U*Iq*X{{+teZ?8Uf<;5$@ zE3dlzxA6bJ;?iH&{{O$I`Tt+Q|KI=g`Tw@G_;SdQ$*9h=hD7%~H_+#`%?y5`G?;Bm zk1vO8m=0A+yhom@vZ;s6z~=-!b!!biT-O^$x5C|iTB7;RW>DA?O9_2p1QRL-!OS9^^IGABUvh$oQ$fdf+#2b z;bL1Fd`V@7$x|ut9(k(PC7G;G2s>D8OOsE+=4_PxnI3_v7ABc31D_KB)ot<9q~j6u zS@UYKBMXF-gK)0441S_QzPBxon#6O%1gdP(;3D9R>pkh}&X)GbcJFT+KTY5{T-Y1W zN_7#aYH1<~Ym-mh+PK&jLk)*Efht?FJpxrNPBLRgJ|_UG+b7CmpO4!MeX&W?BT&`q zgc&p00;q0ZTh>s94dTNlO^-lTs}l+gR^Ha=0;q0ZQsb(wEpX~UX4m7SU%d8(Euku4*i6Y$h+ zan^A8N}kG2O^-ZP+rXJDQy2bbTZ}bazUWX*7Zkp@8$+NK3X^5x0#Dr*XAP5;JeAEF zd~r9!%DI)2EYlXsn624r@M+v;He{owN1mz`3X^5h0#Dr*Ta65BNQ<4C9(k&kD48r% z7I^BmIBO(oD2tt%9(k(nwIW#tJ}3B_Z86qJvO-yG)Ib3_s5-@EcB@X7fzJv4U^_Bp@iu5KUF4~{*UDrW`Gk#iz8#vflBcp$(<4vS%~mAK z$maw+bvy8=v2d|d(<4vSy;dg6l!d?9_H77~r?OGgBTv=s7L#Sl0#Duc>aa{!@>HI! z!54Sq9nkGoNtP)Kf3WTPlZLX`sOgcX>Q;-%GG*a5=xxV`HI&6pO^?=P?Ez=9Oj#W$ z>)SnQD2tt%9<9sT299Kz$9RIj*?yZ3K`4uj8VqTU>UqjCpE0<>Z2PTsSV>kWi_MxI zt;@R8s*`0N;|cy?YqlDE8aHaAJjIO^$CK(VE0bj=V_3Q8+hVK92@BuGjj$$2{#GFfe9jbUBh7Gq6LgZMOVJZlzm_~LFptDLM# zW#M3mRAnidEQ6nzG-um)*{V;ADwRc^s+CHeECZjDil@HQpEQ&u zEgSMwtx_^sMn17?pKpK8hBbMrv~0*zwM@xmnX)RL`VNm8%94%^d8&exOqMCD;;BDt zLy$aG`ZeUKTBR^qrmTvm{!AT~%1WLpEgSMwtyD^~Oj#9A{b_&F*iTEhhCEfP6ei1* zRq@nfvEhW3JXN|iK-eT zW$+W*=h^lxbyzAZKeWq}HTd{$)>O_f%x^rw#n9Uy@F$HmOPV#bG3y2^lVyfuG;ikH z@3&!18?$t4Xk*qLRwm1o1sn7B`#fqWOS(0*G3y2^lV!?+jd}aMHUw#7mSzoY%(};7 zvP@aH7wDJK3tQ)LImidh*gXaFf-~O+=|8IHev5PNU zeCFaS&i?xBeP`1P|L=w0wxj)@>_0AcrOB6sZj?-w!`3HLRq5og8T`a0!HZpK^hwxH z9JfB1s!Auv&DbYy3BJE8jlN{6*eyiamt`-a=G-CH#C*%0f1-nTZ&zA<$y9M~Z^X^Q z7k2?iTvbo~n!{FtH}YNYe#blB{7R9RF;44o!Blsp+b3aTT4L$jCsS4ZFk|g`3AZNR z-<573GnOV**1ml*RTYqAtUWQ|!>jjprQgSlrAd|5Z=Xz6%akT#?U@PS?f$Om_GQM> zq{`B_Po}Chie#)kHQ}Dr`L5V%auOM(NtN|)pG;M&l+c*L&km64t~hHXYDkRbZ=Xn2 ztCUEXvCj_r8M|Vv$sHPY&dpoP@)rWjx9dEG8UI8xd$udinp{V4Yj1?5z=%|}P^r^p z>=VBH#qMj`D;~Op5t91Cb2dm-iHtby8DVcER&T;mGy6* zNL34!k}Q)J=8D~y`;%s&n$G7wk*XFcOqMANNOkvRKCH2wX8qeIQq?LYl4bC-1Ejk9 zQjZ$SV)@%AQq?LYl4a}@w?`~?U*ba$%3}G8B_`fiEmAUB#y`PxX1g!0!%DJ3S*(Bi zM5_QmIE@<|1$XR-`_!j3=R6|qK znJiNlkm{~DYb0tYi{)>hNL8zpOqMANQ^l?rYd8%iQf2wuCsNf$Fq37<0#e--XAP5; zNR{<(pGZ}ATP0bhEX)%-v(@11xJ|NH`}T=cb*IH-nX-UXcg0pC!y3wB{o5x})hZ>D zW$?2Dq`E848i^XpV)@%AQq_G{B+J+*wpxqbXPL6{HfWZ=*l5LzrA106%lId-=4|(w zrmVaTn)Pp=NL34!I$6d(aZmVS_Zj}Ap)A(Ecp39a&r_DcPkeKGzI&h!o5PCFSv^{pb%T}4GL!KH zw?Xf2`mlzwI(buFkJe>v1BbHAWIVxb(7PKRHI&sMW~6mlH&~G@^B7NHUEURAO-_So zBQ~Sj$$AT=M!Z+;TQHDIRk zq`4bJ=PW~>rz|rWV`nhm6L*`zoXOWY1oiuTSDJk}ez>tW!sht(2~<@>F@89U+=>dI zx-0!Yi5fH0G>aO(rBx=k|lbo6Q1ga{bNS2Y$4uI;ewEHAkF*8kaW`Y+Y z-dC+rGFe7Hv7MXkO21E%6*JQ$XQn=Zs@5rWvW$Gr&Bd2BErHWHXnSp=$DqcB;f zECAJA@z`)!6R1w8ECN+6QZiYlECAJAvDrw}P}YRXB2d*LC6i^!0#Mx*mkpDZKy^Z8 z5vXd7!ep7U091ExYOndD1gaA%i$GOjN=cR}3qW;uuf67v(x5t~vIta_q-3%TeRcp; zceD2DJW8NCrm_fBwMdC%8T#yupsc4oYA9<=Wf7=qkrK%=^4S4U-M!I=Ae1$xvItbQ zM#*Fu{X|7G+r6OP?r~FAnSz{`TKvioLCd-rspt^g# z4Qm3`F_lH2szpjB%ajG6x_g~R4P}j~ECN+6QZiYlECAKrlQskiRL4{nfvVOhOqMAN zKy_CPHcVCm)iIStpsLN_k}Ojefa*983@B~TqvSp=%u2+m{~`oxEM=ey#q;e?e2 z)e)6NpsGbmB+Jle2S9aK+%*z4lr^HV2voI5iDVi1#CLfYyW*|kG?)g}5tT)ts(Y+V zmeEfX_Oo5_)=08KStBZoKvnlxb+U|nb^ug&W~{-^xLLDCR2G4%Zm^gvQx<^gt{7`L ztO-;{R2G4%?yxdhrYr!}-K$JlgHYCp$|6wJ4OS-0lm(!=d!;EWf$E6LB2d*m7L#Sl z0#MyOT!&?{5~z--ECN;CW0ho?vH(cUX}uL!X@il=YBD4P_0fECN;CU`4XbaE#BBE_M(45X71_q_XH^);(4x%S^}U zZOwMmI;&Ai#}%EU@=*yEclprV;k1=F%PLM z`j~ZxmB}(?!NkNFYUg3Gsl5caZNeH^8P=8>+!xD{OtAy^L^>~Nz`U?!ZqcL%DYLWFBR{* zu}@qQyx5n9U%v0CO3Px}B~r!Q6s4-FDBpMEpTI=3ed+k+`;KdSBWzAi10q$GQJpPg zpSU1+vA1>~j4cr|C#Pym!=);gbtbDlIiZO+-`TLs$qJ;3JIWcAjvsknnXLBYgiC^F`(mzP zvI42%o(G0aFP)YuC0XrB1Q!G^_U5b^hf5VVlrv;1OUsl@mcdV45YmMN=3s=sPO5J(j>Prg`0s#>Nn zS*ENCss2hGmdXmGikXLDQ(0Q7lw_H*Dx~_${-n)@8q*F#rn0n5$z&P)#6hn4?w5R6 zLs>+s_~>rDty-rh%`*6j zmu$W(&YGOC@Oj(_o65pRcQdSHJPkYAXQvX&akO0Emcag+RExwNY$J*qqq&a zOQedA?#7d9nUcyf_}QtD>d&+_FpArtaW^@iG?k@0tVou@&rXF@f7+vlvWQgsM5?;M ziewr4#OHVyySLX-C0Vg%5vih3%OQ~WHI-%j6BGPw_fvIPNmeL}NVQL-s(Y+DS;js) z6;l04f6`D^CwGJBb?1}H28+pRuUR-LJm0;|hBc5X=ABFyk*YR=Gg+oA+y=e-36C1e zqI~TKO=amGi^*!QSsmO4z5B5`ERz-PW&A0_rm}R8 zRgz`O!fnvIxB8Pt6KI-iR-f)=-C$+1%xB!G+{?vT!wG8=O`s{2MfbAqup(LJGw#5> zyesA!iP|U&(++=s)>*p2ie#D1xC8g{u6S#xth^0+nrjxC74g369xIb&R%2{(X1ijp zkz_T>!py^m%{5E+Saq_@X55)H_y4`8;{W&V{@dO6-+kkqkKH-mdE)jz+}_=O{ML)N z*0&zL`Qpvh%|~zi{f%dDJbL|w>(5?)<+Z=L_UyHXul~)|-@p3MmA|?2zAF!1{_D%X zd-#4WuMHfN&&F{q7D0$Q<_VjRc5Maa1CnRO=EZ%ajF-x-Xs@4r^f4E-~tW7*z|COqMAN7U0~GxH~W)DZAy$fAVyVwlF2gQ*#$=3f0GYuD2o_%K#Zz|NhHgFXLkr? zeWOPWWf7weh*6cFM6!%`c7aj%-{3yk{CF;6%TX8W(N!%|t{OU2B? zWaT=qwMj{qDGL~N|8@SPp)6ulte*L#=N(G4Z_5HH&G%nx!x|VB^G-e~F{&0OnJiNl zFzUYeX{gu0sF-#bmHkvHOfp%fEMU}qvD0uG42;?(M#U$0a|mQ5V6seEz^MD;r(v=J zqk`h`VN+RpNTp7e@lJe+cd<7!4Ze-rOoPOz_~dRrsZ`OaECZfhVAOpv)5x%fvWQU! z#HhN%iewq^>;j|ii=RfKhO&rJ2gInl!HQ%V@5IM=7yDwTkz|Fkh*9yVF$B8D%48Yy zg!yK+FMb+HR;*dXr~_hD-D8zxnX-UU_g`YlitC^;?J)nTEZtx+S+-^Yqwc@hloc2i z^G=2(M%5iwCd-rsjJp3Kj~Z(hG3tOARX13fEK?RR>i!FD2m+&aiBSi{s9KjWS*EQ1 z_}=~t>ac`>fl-lI4nZnQ_gE!a)syNv2#&h{{PL*5QOChi@ey#wOlj#3i`g=laTgat z??0~$EaQf@XkNxgck_LfJyxd6T*h6Pm-nCRVMAOredDXU5mxtDnJ>F!8Kmp}{&Rd3 zLS4L~50cHW!Yn0WZKE-e$-Vt&*Ks9dAuryxj<4=!T%ndFV`ef2>AJsfEHxb2Sh@H) z1yoZR^SqS{-D!IhI*Wbr)TGN8pT}(`S(=vzG%xE$iy1TB!o0jMz8VP|3Zr>>K=ZP0 zw3smyF3ijO;;TvX@;J=PU7D8%G%xFBOEPBC?fo~s|8HN~eYvDe^44FD;gA+p1t~Fx z)Amor9l?uz>GtK4G6|lF!!RNJp;%PU`_>~YPCBFDlHl3CwEJ>NnFLS8JIAm&OKHtg zzw-t@aY686Z@s=r@KpJvAZ`NAS6*b4la!IqUd2;OuTO@xnWH*9sW23aswgSmcq5;^ zF4q6j?vtp^6eZsj9FnK1jv`qGK5<9zVqdy_lB{Ni>hPoj4ie@N$QCf)cY~iefH2#a zcAq4xnV>qn{A@^`sxm6cYR@CMAb7F2ULUUPE%OL2pJ4tuKb_Z`#s&55*9z-@>Gg3~ zgQw#1i404gss&3Xt38e2j^O#ecxxnTGdlsxF)DY~GVy1!Oj&rL_r+SnWCc&fy%Hm0dA$h7E_=;p1`RswG?!VWghO&54VMv~;ASIGz;1hQQFZSP4 z)_y)~D2pc*PzZ*yv|h<%8T`cG&i3D3mVSl}WzqaSBu`bAQj%rL!VA6sE`QQc7Eda` zl9W#>dnzW&lm(u;|IYTX;?CYiu)S0kZUT<5TCilY+RDNm!Snr_J!&XRPAZV6YQd7p zYAdS?p1Qx^9s*q18_$}`!cD;Wtmi4qjGwq9c(z~EVN+S-;HenQP}V>!sDHYB3TALyWpw&Vy($(5N5(AS#nZ=JXPzJRF=U{97UV$i?>FS70Qy63goF; zvXo@Cm4y%SF7{@u!Au!XnrjwZdHGtMr!2c+8EuUDz8Gsbtie-p=|sL(yN6Qei-z zs)b4<%gARJJazwCj~dG2NreG#gVqgJB+I~O7d&d#p^B!B3PA zv;E_BSSl;OuFI1O1KtL$d#sWyQxzlWIqp$!f1z=;Y1!kJ+%M zd6_2_1~e~gM>vyZ%7S@$|ENa|W$~oKfaYc0U}dsQS-1^)|A-AinwNP}VLSUSA7$4$Y?9Es+4)b!C=H&s+%eukJWSPyl3-j{6 z7;C7kxDC2X^YVb^W!+&#vdm`O9Ya~gTO&~$WntRk@6T3{Zm=R*<}&V%oBRL3|Ns1f z`2!d3{#kwh|MPd|@c;kO?Kj-|=&k2&J$dt`o7s)y3=lc+Htanyz+sw$iqHAA005Y6Z*kqN>WtWVL6G9*F9pwE84!%tt({FeFh` z;bgMf69?`EUK~oJkI4$6it{gg)?DdTIGHTNpFI%OLuvFeSwU3M#%0)C>6NC`$ujiW z15rIRO}drsQ zW$~=SkVI8cN+iqBXAea6P~0?i;KvWMm+shofO^sN2RsnmCeA4rjh2POq7KrNMMte=31X1nqtiq5)RZEjhmMIHF z^>E#zhO&58VMt@DmL{1jQx=Ho;hGIW5LH|@krE(L)ygE3W%#oPqI$Smhh?&YsDflL zY${9Jzjd+F7{$<+kt}1M zJrLEyC65});#q|uiK><+kt{=>JrLEyMIVAt7SAeR638JqZ_R@HuJyixs2=|&zftNRwh(d^{|3J z@j>2u`{JibK!;D{#?Vq);!KtPZ^@RSPkfE{{=RY31fawBahsStt$@$&M!Iy1MQs`T z#3y+d`(md_k~Ycvpm|&Y-`&l)^1^1mjD7Yhq*^RB5;oMu^9n;ERo!GUVQut@Px9X1 z|5A|`Mz56`(G^jV%*RdPb}cOyBSwbtuz@k%~nYD7yMa6 zU_7#byRI^(l+%(i6E427yMK2bmqQy!6<1E=TT7&>J1u6+gj*ripZBmKF&LmpVDX2Zt1ta~jc%fwY#my4~23r$#;aqC3BRVh04=le7AWp=@FU3W`Ie;Rd96$`Z{#4u%JU^6fUk>0j zh^lzMMBp@T`M#2- z#|Fn)8fB5FVrvu0(t3r-GG&3N9*V~%hZR@$Mp#ZwB&u4lWU|`I!WF^uL$TRN)KC_Q z>X1ZLQA#GOt*jo1>Y=!7GFiB>H=dQNK8dQOC+-Jc9LBcjr$d!Qbx5MB^-3no*eC7?o*zcGJO@$5ofCZ0+*WJB63H_5i7SHV zhoMIeWs#^3NmR99iDVi2?188r20jF#ED}|88DmtnC(LBEm4)r+?9i{nrm}ErZ}Se2 zs18X~wPGpBYAXx(11}Cef6`DEz29Ja`J}Q8Fj;M7q31Y1bZuCJsN&9vOcsf%7Au)7 zQx=Hoq2p0QStP3X=59W$ysVilQx=Ho;oEHpf~bo38{gc`sIn4dvJ8LrKvWOKX2WC! zQ5Em^kl)wUrf{7sL!Uhm)q{C#CP7ri`;Bkz=98XxzTVhp4@C7)JT{!L!l5eO?;(k* z)+v!JW1l?`)kCq_NYqdkiRzFuLlxf-;)|8a(w;DrWy%6k zJrtXbBrB9fqB+iyGFgT{ zdmyTZuc^Z_SxHps{T`C2>K?03mZ8rci0a|1{Yj%2MDI7gxtqy4Pg%x3dmyTZukvAy zLY3a{A&IK)up(K;K6@alhp+UgQK-`UJ(TO9WrufYEwJo_N{{}}xLy`NsykH4B7N~14_Z5l{b?B65a^{djUht1e$ze1`_ zlP_9a(?F_X|Hl2lsmAj}ao2B`NOeS{s?v#f+~8-wLaL?BCsAXm zCsG{|sj70~9q+Ygj(&wyOP4R-ag+?r0!pNc7bk~6c6<4boAndj{Mn(j`6OAf)Dx+W zh*VWNC0Xq?xnChw>+zvq7f+fK5~_rJQjtx*(m{YgVvM5-eqRV_`JtoEXe^Gox?kF=NiX&_ax ze~*Y%wKmCQnX>xR#o>ob)IeU-xa(K!-y>Y;dRBy6l+WU3=FRo!Ya zW2P}Iafjlq;qnzsRh-}>v8kS?G2@^3B=77{%r#uT0;!4;tJD7DWlU-^|;fiB$2?-FOYOOo?E;Oq#zQ;Q|JXMEfvckQLqh1_> ztW zKbCG^j2_0JnU}=-jkZ*dp3L?+dgJy_#nyIqEbYD+J%nvWPrTn_ai|JYuSXAEAYGhs zIq>4x?)6m}D;pH?e#3s5Pb#H#zUp?}En>`%rPs%oGI*+Zzi|t2h82<$uezB&aX;|< zSlWFOH9BbYevimgRY38o8~DWiz>8z)_C>PrKNJpCdcScCa6YTd_L(e$pSU1+b}a3_ zNEX7zK}CAMaSL#U6$RDFGVs|4Pd!?%ZyG$cL+>|k0gjNWo=ldJPh1c@KbBsf3~ThJ z>HWqnz!_H7rbw2N&pvqSv9$XnYBMi&)r2o$J^ib)_1-IS4Z?0 z$KtV(WQDTiqyl-WmMTnETUn^?X2)W(kz|Fk&Q@ub3tJXLFyNS2Y$K6vW!0goEW;z@-Od8&ex zOqMANJoR|jhai;2lM1+HHD&>A2s2rxEb!FhojNR)l@3*&R2Y$`YL!xwWy%6iJ>K>w z4Q2790w&~q((~4=p(zVI^?1vMHF&Cczwyc4413O+WuKSD$9U(*VzQxLgQsGh&X<)u zRS&B~vJ8CUW4w!F@z`)044x|9Z+vn$pY^;o%it%z#ydL}lMRyR6F#3kH}NC9h}NC@`uCvSQ8R zNre%4s#Yl_S*9%T)Z;}{R@@8S;YkJTOY%vjo@TO4S>UP1GgDUbRC>QhTlFxnXKTc z;{C=acVlF=BU~rTz-PbWslV<|8dWNJ>WDm5H&~f0^BebZFZAKpd|0DOrHdJ#+|4JI z+9i@@e&hZKRqC&L)TmPFVjj`OtQ)LMmMN=pG5?AWL8C0(I+2qrUCg@2VzO+_s$9&! zT!)opMU~1ASC8mo);(59mMN=pF*oL#?5c%}8COp5NmE(6!D6yZS-2PaP`ovqu)@WR zJ0~(MUCg?}%4C_cDi?Ec*GSap|Io!eqKjEKSdlEV8}}<0bMe-28Vnb++_*lHd!f%$ zmRXMb!{+|K+W)^m{r~5+|Nl==|NjT-|Jkj7L;XKP{r`cRZ$SP30o4B=yM99b|1s46 zPhNfL>gMWeu6*Rm^2)0&|IOuRE`P_RzsCLlgF$C;EDgS( zGWb|lTusK43S;tAtxYmn)x%2JKLAiY7DJ7M4QcVH!k9o+i<8Wjflpi#eE(PsHG${w zw4p89zsCfsTAVOl20w94@V#TP)C8VG+z=Q2-(vz*Whfs>jAq)8IM840Z9e z0`3CNgq^FdPMdrV0922UmX(=tLtZ?tFeXsd0wptM(P4NBWtICScrwxJezydbtF$!9tNXAUK0IJ7VmL;EYLt#9zFeXsdGNs9wsTM%> z_^>}~NQ_4o(B;i%m7SDG%%lsG>hTpev;kDb29CRcGp@9OnK9E?5BurwC}Eo|3bY>@ zqYr#cpsFAxGG^ctw*)Va50yb+)Q}j@EZ{2Od{}vHGhqflaZB**_+TBDGh*;myy3C& z!6$b!tW+v>z6^ZgOT3HY)Som2#uE$p;%v%?r)n!WlV$2cfp{#=8ZKXHQRQ(36sqy8TBR^qCNA*Q zW3kpqvO-)ut$;7?W>}$=C0V8|6o|*Ruclh4n(itOD^xF4SX9r`7F?OFbp|~3SbQ}c z*5Ik){~nX4>SimGWy%6iJr-+?L=9!}sKS^$RrgwvECZhd4E?bt@uUKXUq0!1$}*Sn z0N&-}H`=gNPa)KC^rDvaq}){Ryq%S^`j7VqNt^)>|I zUGDIt!kFG=J*<++GNbVT-sNMl)-YM&UB=!!lSS{c?zHM;naOy7UGA~%tHFC0YZgx` zjOkt0T~;c~T*d=gB#y$rXQf;^UFt0XKlYIDlOr)wTC6i_F6L$p9kHujl z!y4^rBGoaGs#YnPEK^p6REx<*qK2}FRL4ZB%2F~}wmMfxwRmhKS<#**QU&^p_f^lU zFj=Oo3aJ*8jU+4D(?qIcB2~pHC0VAd3aP%sR=l*R5~*TE%O{m0g~>8yRY>(`ZFvr) zD(3Gok*ZcHkt~CsxFUFd{27lL$|6!76RB#I63H_5i7SE^$Dg(#2&5|J?=g|8;*?C5 z@lV_lJUhO<4$EW(QWf*}m`GKd!gaEYeGV$5`cwX-p)4X*Tm&4+(jq03W$+Vs1kaB@ z>BAbzB2pa_scMyy$uebCNcC+VHIzl9Iwn%pDkYO;%Bqm+PxugovWQfXs~iGZ379NX zR)ti5ybdeL3S|+g;`6&1R$2fhS*ENCskR+9*aya%MWl+2U4}eQS=FfOnpGjy;;`X_ z6-ZUg-}wA)gw-k~l4bA{ALgAOi^)c!hO&rM$3&`Hr9`rfed5Esi(~QFa2gDxD(3Go zk*XFcnJnX<_%iS8SWGrdRw7lJzsE$X+7qsmW$Y7QQjqNne-(w@^Cyi$mF92MHu0po z!D6yZSwO1C@2$gfSOckw`Fl*HsynPmmcdVan0J2s9*-J@D$U>c{BAz16akmFs6%HcUYM$Qx;sz$NL^Nl*PjeW4f4i zgO$lLWx>UKEXEp6gW+QC@UQ|_?HpBE379Nfv*2Pr7H5qlE1Ii3tT3jFS@&2aS*9$w zn2&8+4LSm$EFM-seIHM%8!RTvlm!>_vDj*;tZ*^QmFr`=n01F0$uht3V9?zE_gnu} z_y2wP()!XP7ys_!A6$Is>_cb&<7{%_FE9L7l=YwT|K2|}ExtfVfXQ(&v+~vDOo*PKZ=hI`NX@ zR-pFN0;!%#pD&SWH;}5>zi}6Ej)H8V@-6Qg{v7tl_fDnLmq@i6NLBpbxC=PrihAnT z9Jd15vySecnm%7m+hBWV7` zv*wL26B1RWDVZU0_U9HK{b`qK8Ik82;BkR+-T z5>+iyGF!$zaYyj{^d;@(8Qs4KoA>`sNK~~-$z++jKvYj(>`_BqB&rh+dT zNK~u+X~X@rNMW)}Ss<#X&$nR>qAKq135lv!DUmE=pF1Tzf>f5_PwbLsr(&*=WQDRwR3{{=y4xzr zGG$@0s|pHyB}Cd-rsqIxRM8V+j^Rq=jLNK~~-iDVi39D=BxZkw_? zp)3;B35n`?r|J!T4lx0rZke)zsCG$ICnT!6)5>HS{v3j+o^IA*nXDkH*n0tr?!y{u7K!SFL{;0snJiNli0bK@ zM-63>s7^>!b&D0sGMh2J#=AIO^&yBgi{5XrfEc`E;cYE6ECF(fd82 zds+8bC0V8{^oUNE{YgVvyk{LHNewJ-2ea5pDA?j-Mo}nVUL1tS})`RTY(F;X$>e z1X4X&yARfmc+zzIU=JEks#QuR%it%@1I$mQ-N#`Kq}t(Og$a?Wswk3W@Dq0g&rhY} zCsCWZ3DbNoq(rJ(rbM!ged3Pb#i=y>n5;mm;{ToyscMyy$ujsw^dwWy%6lJw0V>={S_d!wNW967Q>4DNL3v(Lk!F*W0Q; z4rTGM!h}dwaY{*+DGNyT^g4giP!pbX0=m4;O&%it%D#LQ1mwpV>L0plG=FWrPl zRm+q}mch>J>p&*Ulm(=Edd#DSvUpfwLZqr?N+!#c1*Cc^4jWE`iBx%50ULpMR;^N)EK?Sc z>Zv$vBw3*>9#+8jcQdT?07|k(aWZUvD&`ta zScz2W|DF)3YMBzrGWa#NL4pjkt}1M_%iR}RLnI@Rw7mUzb8bh zy2r|78UGvtsh*0thRF(~D*o>Yk*e;o>SP)F90IAH%vsaTi=l~BCq$~c!AfP>Wy?b# z)zgEffxH-+NOeM_s%1(h%ajGAdYXFFs8oqmCq$~c!OCQrvVc@ilRBy-D=JkYRqTi3 zebqe{lV!>RQaz39u#&8(REbn4M5?;SD#d#N%N)n}GVkKlw;>24v-rO! zG&1WRE0ble;~|X9r(PYF$qFMgepv1XX=K(tR-G(!91me+K6U*`n>9mHE8bt`~8al|KGX) z@9!@B{wFE%KgmD7|6J+yB~S%4YF1H>9Zu=T*r|LtcGz+0`-4r#z2{1|FM(>W9}7MQ zZ%Ux50^;CFm5K|37tdL*57*?!vu2fw>rC@m&+DKW`Gmz_{#@zx6Ii zVP)4Buey;>+z>o}uC)6kYOMG?vM?o2RRP7TZr~Fa1TUT|-M)O)d%;uX8{kv&R25LB z%it$22%bGx+I{(|<145!vNY}J^~tctUYf@hrsSzwqGYn#^Ahd{o}Ws)PolzGVFh?qatP#oWwK0K6;CbB8c9}2i)R(422U09_mn(UOO!~Kkx$$YJU{(Pd&Tc{Ls>kjFeOjb z5+#yl;1l-)FHXPQUPpVuQ{@}rQ}R@0DVZ#TpSU1+cKW6Eir)*KD&FrYd8)FMI#~uj zM-@-~MSs#z7EdZn$y2pLVY1pQJ}wBJpMJrIHIzl~_mn(UOO#BODXZeC@AjynES^-D zlBa5klF2e<;creq??VvE;zr%3WdosWmP=&U3FM0D;=uzeox6$wMr?;GG$df z^_~8tp)8(MfVVrJ^gLx5`GnbQe)>5Z*5Ik){l*t}BdnGvkt`#h_!jT{RGc-`Yw%Ps z#Mqur$y2pNiDVi0#J6}Cr(&$(G#ETpyx;iZZa%Bj{+TR;pZFN>>{OgJOjhtz@qXir zyBSuhlsZ`kKJg*m#mQ_ny>zJZqyoOUn<1q_NoC<4n$}*Z;;F?}Bf}cX;z@-md8+QQ zGFhgqil-K5jYJJ)@ub3(JXLpCnJiNl{^nGSHIl4Q7Eda`wU|S2-kJr+b$bY)tm3SZ zWW}1rlL}MvRIO47C%(nIIK8z!1l{1N;{C=Kck@~0eNAN< z{2W$1^+)TlnXGQ`RPlaK$WwKXRVT~9CqBfxIQ@t}X{=dtQh_{GH&{$oTUq!R@BH+` zKCGcEIjKPFvi5*8S#4zvVO>7`kVg$=$w>uTmvw`c$!aSL-{M`Ie$a;?lqDw>XkFIB zDoj>eS!hGfPH(BhO0q&(a#DfTW!+ zN=KH0-j9vnPZK)=JWjKm`it9A(n-VM~E(evLtI{)NmG0DjYGotQ)Kl%QD9J7H|E@ z60FH(uouzgL*BD~#OSi_u~IC{8RKKTS5Sq&?8Kk$>;KnY{VMMNf6bMDx$?;?_h0_! z%m4lIeV4v+>EoB~yZGgcAHR6dg)dzA_=Q)V|HAo?p1vFzUQC`Eug$<XaB&y^~^f*N!nT>bx}hM67m6>Jg*j^^sE`TPTXvT|dC_ zX7keJ6S3MQ2`4{u8%d0+=4r$--idpG>$$b~07F777O2FiTAZX<20ZaTt>&f0$7v0W zDw*#oF{)N4A(jD8Tn${!OSncxZI>jJ4*B;dM%C&h#4_HAtAXoziPli8z^Ib>o)V*K zagt&g^Tgf2vw4ZuP^`eHIQhvRo7-qbC`BxD7BK3(jnxF1!UmM-68YqfUuYm7kM&E1$;El`SB<}6^;`49RpjcqhD z-}vTk4k^V6#WH6Bqs~j5h6`3;R2=)vHGmjZtCJMVoCSlo(a_SSgkd=IV?kDLBqq`%#tJ4M76GqZyVYFut#gOO|qR zAx70bRuSvpjoG^~e@^+S5v|10kk(o}p#XT4KlNB^SgaPoY{DD@xJvbj8F2` z^OCB`oN<2;2IDz}DdWq!(Mp5ikbCE$MSOW)k~K1HIE)7sri?G^MvID>aR=X~_y5gH zyD!%$d;+!It0qzfpvdEiTCT(t!Vx-yxFWckmyVx|8vE)*su&fwuk^>a9`GlTl+|ok$g<<^rWI3bBlR;)>vUUK)NBE0Ah0uhYh;*`8JtrC7#4aYyiMUOIjh zs~5eXyiOa#=Du2SN)fC3_Q(Ce_1xNh_yTGxRw7jlnQwosRw!0?6~P_B)x5-NWLm>n ziBvIcI!nuy6w90iq&hFj8W}a5l}Ht%rn9tMNwK|_9NR{Dl44clK^wL-DX zSwO1unNMptE0HROO=oGjl46;&fK=xX`>5fpM5-7you%bUiiJNQ8(>@!T+d(QQxMKd zq>6tiou$$$~hFz@X$m`HX1*UNXd z^n+6@gP%yOR`U|8$pxz)NL9k$QzBI@S3)dcpZPe2&|#P@h-^OCGdvHF2jCH#$#?&h#kuoSVn&ce5N z>$$~h`WaCrQpHDib4aOGC|1{5_#SUHFR>b#)^HY)>Xb-T`@tzz*ID=+Z#6H;8W}a5 zMWi|Jsiq&=27)W(~PuE#Eq}e{NM5=hMVhVJRmCmv|meK5=&0pDdRzGipCQ_Xe zsp=l95zE-;7)W(~xBt>`7Lh8Brsgj_&RGUO;ooY0S2HZ9HIS-=zo$g1y2A>w41VHs zyw&_pA2pmsq&g*1)eTmNW$Y85B30dErC63T#`kz<^E;Yh>8yw@ zOZa=r=(6syidg0>M3?8c`!9`m7Ng5kMwfMimCmw|F}}xJ&0pcu8qVT9>r+OTb%&K= znX_;k^!(*MYB-CZuAVZwtQ)Kp%bbPVpywr6!(}j{%W}^;*4B7lb&o}{y6-Fiui3mL zYecMgXEC}wWpr8hSdCbgF~+xe>$$~hz#`f;fcLE9cy|6$=~|^&7Ba>wteTft4V@Ly zWw~d4%ILE0utF>g8RK)jh5!G%`~R+A{_5rB>DaA7QiPIvpMQQViSV4XAqyogsJ+xLQjaUXgaVc=UXahAAE1Z>qZw#5v(#j;o zGV+N6uv(Nn4W~7DDnIxQ2AtKYmL?&Vkx$$UTrEnXMn(;1@YKbr?wfuPu~dHW z8$+hEv@)Ss<}C2k#mVkFd5}CcPby&8be5JTDV8}4Jauv4qlUBcqyk1wXDLWYvCLWE zsf!an1>vj=d}GvfmUe$rEQ6moG&P&QzZsU!8ilj+qymObXKDMl5zD|QE(NaVf9$_B zoRudPFl0JQE0Yw<$R~Or)%<-nt;tg}@ZCEkPu0>S#4_@Udx5L@dwtY!R-RPAsOc;% zO+qXKpZFSYJ%5i)LGsiLd}GvfmR2Sd%bd06sU=ZEv4+u6O`eKj(^=a7En=Co_B_?% zG=t!&IQ5weHhHR6CKSt@wdbiNPQwK&W2wng@k`TLTAHL-=BzzWEr}W#HTKZSQ!#2f zOG}d!%bYbF+?ba@O-3t0q1v5k@>GnP&eA;=#oGT^?I-PlYKhb&o*M*E#T84O+O(E# zvWi*;J|{x})izKQcn%wz_^sRz5~ylzLb>dQWh5c$c}dhHM;ioCW#$_Pg7USco2=B! z$S1zYThAXU?#iD#hzd1|f?`&SeH54n-%e-zK!=}A-yH&(8cVW4hTe4;l z>1E!wjv>=sJ!u0|EHhXC+xPyzMd|qE4B|tmQMAV64wR5Hs5WY55Z9RNTM8Eh*9&X+ z;UlOqWV}%@Bxi71_j;-G`5$w!btx4EUv7o1_(xVj29zELV%t?&Guu zQe}q$QgCe&7ll{`KXFHJwJ04w88x=n*R(B!6CBd^rN!d`WK&p85+l3Uvrn9tYDPozku=ieA#AcW&)$A}}$aIz#EELO} z1*Ezt5gVD-a8{4bB2v|gCB-sl0jVxZ%0@;FXZ7Mmf*+gjeXUqhEc^l40^^S0deQH` zOR+PJ=atSPQq_Vb#WMbh7V&J+>%R4ehv6({A@+`qYvm#RV$Vd%h)IG z2(A~mcBfzvNR|0-{MK}q)+-dN>nvOnJX^f988)3Y2&BsVH-=4TX~|N=>N*P-1lNm~ z_%DrjmK;_fQq^iDorV8GeOhrzaJBeJpVn}e99AGw)oLZ2h0K3N*Rb#RZEo=%lIcK$!t+lHX>FyOAad#scN;-h-K^( z-{P$o7O_EUEPiQhFra@rOAD10%it$ow$-9UY;wUG1X5-G8%KQ8Sz4%sSO!1wIo@hf zQZ_PbI4ch;VAOnP=>{vrGWIzEQeBjg4aEwidYFe5FlstWs}zc5&H_?hl#~s{3Z%;X zH-=4T=^m?yWzGUpU0B3skhekSVFe7C?=0P5Q7m&7km}-v=7GEoIu9#gSndV|tEX7z zEFjgz5BjL#tURoMQS+Up8>|$|oCT!1_e6)|SsvyNfYS-Qt6VwtlLV_rPZe`&n4@~{GiOlRo^i(;9x5My4Hhz%F4 zNxZZ2umXlnXXy?r#WH72;H;9ekx|20c~}9Xrn9swoMM@?5My4HkPV%cw?X#_2N`45 zrf`a7nPW&hTa=WIh!xJt!wUGZ=`8IEH)2`d7~kR*{{PMI|9kMlKVNwK!u{vJeEwtS z?>_e=^8a_7{oL6PoxQE$|F@m~%;`rP{{QHy$;rQM`2XJ?eBfa4AK?F`%a>Dz=6?I~ z90aA_S0%B3RM+H`8T`ZxVYVn;zMQgQG8>^u^8o( zh*5JfQrE=uZMWRwQs8<~T6`2Md1^k07?q2WlqD3ayK>-G;Mt<|_$XHL)I0-%VYwK| z3#o|JT{UncaJ{ew-!OSvj95gHr)pgiVj1|vrNH%~q-jK~aF%pu$Wyf}NwExmVqY{{lsJuu6^oH{ zXvkBwG-<>#@QE9N>xE@$@EU9_kmo-jRys?|k`&9xC(;_#qGV|}t;tiRJwu+VbxDY2 z&(s<2FcZNJw%MyxZ&H_(e{Gv~5I7`|yd{%`salt$So`0qT^|6|#p}x)aM-Yxv}XuZwJxDr20u|8 zTwnZrGj8I!VE|Qj67X#0i(ZS9BA0Y}7-s8|5i!)zyD+81^%XpoEd>ml7SrukBbb3te2TYTyrwuT zH-^Deah`yWY&uN$TB(?;@RhKO?zpDLb1$U;HitBX@;d(!Bg@0bJ%p3Znuh9 zW-ja;77zF@4Rd8n0e}BAm+rMFmU#=eK`-w2X$@};=q*Nvk)}vYauxMYF}d&9EX?Yz(E6V{}=!Ta8$j zF~)~@>xIQ?P~}H$L}#HijN724Fi5d1WQ-@fT9jA~H--^i#)F@a6{E|#*$S~NWQ=d| zR*RCXkx|20*-`+qNoVO^E5x#lF}}rHFG{e6%V0#8@ptBLO=szLi(;9xdhPvxA31UD zudmftzkc3E#4&S9nF+dS_0FVt-%S_k#gdVTl=Y7CjxB@UM6kWyaf<8D{pqHd~|rPn9Z znrl;Tf_vntTCv3AZlC+ahOt_fcAt!zYg0CE_9vb~EUB(+-$0FF>38x}tyn@VBcHexp*mRcgRIONwSY2n~g5Y{-5gU90 zHHJ)Q;mfKamKG}%%iho6smrVOex8Q2`spm(v6aKh`zgh;_cJ1@%PT%=I7=EgDHTQY>>8clKmL+8)V#Pa48aL#rTB$T*8TiD9c&^VIj4vnJszY1fdaYL!B<%vpP$ z`bW*MGOh8>l6DPws+K7!mN{$BQ~%IM4QENWhCEfvln~3nC%(m7FaE%%Aa;Yp++$+r z9`9Fe3a3~GKe1@e7VmC`rL*$uy3(*APt`qEBbMcj@gd%N@%#Qu!&%a-VT@TfSSgkT zj#0Q)i+92*y%NVmXYj`cRZm=kpIcpzd zF5wzFE3Sn`6_lkquZ7kfR*GfL8pBy7UL&K1v!q+YYoT?66=GT57~kS8{Qm>{`oFq% z;_9cG`hRn!y8N}K{(tt;=F-DW{r}K~XD=)+Jaqn<^G}?A=-kuio;Y{^*)OC1zvs*s z&iv(>J5E1&`p-|l{M2VoJ$mZ4ll%PtGI{v0|2>wf70bh66B~- z^XDUO{1fH;Y+2fT`G}7)qDq(JKCK*9>i*^tH};9sto71*eAv0hFU>L%a5IOL$|N6g zgP+*VRLj!ii~^(gDe9wVx1oD{3OaDYM8%hKhe zScz2W@*a_@8i!(a7Y{I~*|N0xC{`j>y1YlEs?|vmtGjR@%~~(5$2W?ID$jU<*JgF9 zl?lb_t{lKm^|EAX63mPuqRKN~7&fa@ElpCa{f`y+c?hPuEO{CkHmoHr8ZuR_O;Rmm zpE%KaV_EVvN$6ntDABVii#Sa@v09r@F5{nQYF%HJL`@R9Q8HEO(~zlZfl~A`_6h!a zWBH2mstZClijb-_WpSTY{?=pNW%u2JKh?{Zmlt!6n@p8$njV>|)+ec$!A~%#dU=}< z8~)M50|GvCLa|qL&t{8AU{ux2&MGp06+6Yf&t-7Le+)#A;+(!&+?C^oUfo zN=dQISwO1GlB|(Y!&##&DRHw_{{3aI6=E6t1c0rVKWWbDhqKtLLB29aJ|E1w9_Gi@4@`P7xyHue9*m%ess z-PHe!iw|G;*QWk|`uyVjgKho)^x3~Ud;giI&V1s`{inZj`V*(`J@v&?pEz~T$uFLK z?BrbsUp)Be!Cfalf8xX6tm;4R|5q=q+lRw}?Mt6-hez^nDhA34H1>&Szgm`VA15%0 zs@R=GRZSEVh%a(`tPpCJM2PeS*JM%hK?ZQDbQmx09%9r4nKp`ot%W>SgKoiCFQ{7sHdNYMqi| z8UBQiXUo#?6R~n>V(W%PRZEpdEJL68z)`)lkPW7&6-(MSB&u4aq*%s2QNdTs60+g6 zCQ)T;7PoALVYNz0vCLU}qFQn`GHN(W`Zgr0ic(T6+xPB?YKhrUtR$+^wjohfnnJP6 zS$m@T+xF5=qRQ@Uk3>~zN)gN6&U>QzW_#%~p-N|wsA`cyvCLU}qWW9*dQPG$Z5t9* ztx`fPW1q;gR?FY?QNvksu!clcQA&tq=o5L?difhZ1>r2|*^sDek&?Up6eLlVb`6QD_Jk>xISWK}S#mZMD~YOf zY)DkKP$^=Wvp`gr7P1*7QI%#5iK-SU6w90iqPi?08!lLxP?cs4iK=#lLo8#ThajrU zlCzOf!&%a-AyL&TCB!oHc?hDqEHN7{gPBm3UJZ$=_JmU`!=C`Kvt`NIh*;q)>DQ2` z>K>~R%g`snto5?Z*krRRFNT(04T-95uu?2zpXlUP%M!8Sv?fuNW(|p|wu4hFa~6o| z^5@K1<8YQVYe-aegOy^Lvp`grKWolPqAI-_UJR{!EQ)2$0#RMQt{IkMB~j(snjVR& z?y-tk<}47^DMsGtb43R zEK3~Y4(WP1^Isb8Ea}xS$gCTz6w4yVU>Vi&VVl+rGP8W^G03bttQ5uZBTp-D9P*AZ4e{LXde`!Zj2tgUnLCG03cY ztRj{<3qj_kC2K|*WR}v6L1x`xQ7m)TsJ;L1(G%DH=GyDP|8f5R>Iu~URdfEoLjAvK z&i`*v|3BND|9=+s|3l6B|A)>#efEjw{QncE|DVG7|9eh-`P5^l?mGG8$v;1N$H8Y0 z{_Nlg{QuG8YW!pWv)QV&`EuIEiBxIxK9Q=HC(X3&XH6g_=!o4_Kfz$@Rq6C`YLltb>U}a*H4qi+u1i3$^{RCH zWY}Dn@~}^zOjS!1D%M??fM9Q|O1F=SB~zu{`(&zGqKIPMl?e>?`l>{0s8}*pHfQ@} zs#>SCiglMJ@YfrwHdvF2Wloj7nm(DT7Ac|_9xnCnU{tM2vL=`Oaehga-I_j`s+K9C zn88mF*lJb6H8N^gjNO_(k*by{A(*jG0N8rywhhr)p~Hw&wMt37jDMm;PQf0%YPo%1qN+XxCPtezT^*#Pe!(VLH^odlpN=da0euBVOtLJv#`r|~Z?AG*& zRJBY=vCLgSs;lqzQNvyA*7S)~wM|2q^gxl5zDNF_uJ|_{Flaln$4O%k*ZcH6w90iq`G>JPiyR_*{$glscM-L zVj2AG0jaL8`KaM6c5C`Xs#>OmSjIj9VCz-M*5noqTj$nUY}SCN#`{34loZSOCos`$ zRl+qQR_v$Qu;~-2YNgVMW$Y95wO(1W1`YW5rF?VYcSgAEIBsF^69l$em1GSUtVF8p z*7S)~wMusij_!}4VylZs_wRmSmrFe)>f9R8Ru=#Y}WLNRCTA7&axYpL13%ZS@S^N2F-3w zpGZ~9ln~3{Cje}Ak)ii*7TWP)=d`0GH316 z%YWPqOR+M&%!W;$>1Ewx6|u}&c&RPl=f5=GSyII@y{sE7ie>MteR{bhYq($?#yd+Y zIi{C&hZSO3&bW7o9#9F_$f)5gspFVl)(uvOWiex9o$F=E*3emb8#J3WuvIuq_gE>G zMUC-v&z2=zBVvWK*s$p{y{vnzMl6dNBj_yr|C`_c_Ym^`PvHFj(>VYCz`3WI^Z!rb z{QtcT|GyXc|Hm5s{}}TBpKtj8=aK)v<3Gj!SEbv>*~^4#egJC!{fL~s<2q;tKk<5; ztxCI(vzH0g`~Vb&dovK`0RnFuB_LG2dsVL=LeuLWWMp$J^85P%y6gh0Z(0( zULPOIDTHBdb2IJ?*Rt>CGv((R*I z$y4(KQ24ERh)NBVk2+4sbce+ozqI=(R`OJFJ9(-)sEB0?2zctsdVTl;YAcplojg_D zlVTb9#P^M=Rq6G~wC1|RmJNBTmM9^XkZ-K+WYk=jSWl9tYKanJ8TiEajp|j& z*oav1#+QB#d8$?@DVD)cY!PRx60#Aoa$VxampoOgltwHApFpqm$`UqMWJ9bxR)gky ztN~h~P%Lv6cDHRLM$VnJ@C}kT|R0!i|6S2 zPt^(~#WMJbGHJH@>1J3uiwRXai#%1UltwHApZKa#y}HAHX*f$7HRP#Up-?Py7I^CF zcAM7ZsnV$-Pt_77#WH7sr>;uChJH<+DxDhgR0Sz1mN^SNbyeauTn3Y;N~4B6RVx&V zWzGUmU6ptZ#Y&zk%^LDltx}3u<}C2kRU54d_&v^qsx)fIQ?)`#v5b87z*AQxStHXL z&XP_Id8(EuA(oNP-Wbj**%}!&oF$za@>DHRLM#KHJ@C|3N!Ey1;Vfy?kf-V%E5$PS ziB7?6RgyI#R=l%#vZha-s(Y+PECZiC@YL1NoW+Zw`OYFw)eRQKGG~FOt_J3;s#4QENGhAC#C`aAtQ)Kp%bbN2^Qr`ExC~~BSsFD=G3y?SVwtms?frj${{QO! zzq?NS!-+pXmi2$b|FB+{US9%LdGkDu_2H8FY8|n8ug!=DCIKv3Gy^sm3CjQP52CI44W6p4#-p0MU7epK7n5A zmG$~?MPv+_Tl4{Ws#Yl!tGhM*KU0Pvvoi0ePyHDJfQWZ32C*RwY^^qlURy z{|?AgwM%R3T8Bt{cJRnciN~ICY zz$egay?VR<(r^|}D(nNg5mA*sFvT)w?Ro0kY+942vH%{Cr)rs!Vwtn{JoT+UYB-Da z?|?j2%ajz$oVDkvziU&FJeBqDfIL;J6pCfe+Vj-6G{e$a$x~SX56DxsQYm7Yv-Ujo zcl?*eewxMafIL;JloZRzC-7^vD!CdiSjkga01wDhwM+@IjC_K=R;v=Nkx|20tbYgO zsamFlSOz{pU+Yy#)^Hijh$`z}yj$XV)hZ>$GWZF@%vL2@BVxrniv{q2JXI@|Ml1uL zK(F=6ay7UtxLpHS{0_)db&o}{%vpP$T5>g<*5s)?sW2c<)jd{$=KFZ&hO=1w;t|hZDlv76WzIr&dG!n3X&q;FSqflgmvx7gVs)L>Lw0%fdLK2M zCG{_}%euizvAWJe(z#y!e0K`QnO&Cpm)T|AV^OTGvwFAcu3mNyO?fri^|Np6L*Kz;<`s!=0{L7Umt~_}8pD+K_ z<@+yv<bi9LWYn0rX-?dLC{?`^6KBk`50tttjXu6V^0MD43sj<1^-ezJ#ywGY z&DN#Umrpsqe%ij7r(BYVQq?@oQ$Dn{11NQEZ9eRWVn`OKM5$_=e9DL2#RJX%YF*lV zoYq9C)9iK+h*H%&DOPv!0C=s|rPC*)#*#G6Lka_;RP|1Z)m=OKK&k7}=%ZMPQm1VG z5T&YjC{}m<=mVv$OQ(-wB}$!ULVG}zsu-ma%a~^$D0OXZzDY(?k8(*G5T$Byl42S3 z>;t8)OR`3$HD2~dbQV#nRwp5rLC-!=>bit$WYloh5uHVps?|w|Wy}-cwO*HKjffR5 z`y)DwC{>G-6wA0LdJnU8iPnf%;jAM%izrnKlp>Zn3n+E{;_jOcH9_mFBRY#HRf`jf zWzGUhUB9UNTAn0IJ)*OSQnflsvCLUOsp}W|sNt+5I*TY(tCJMVoCTD+{z039M5#w~ z7E!7eClt$^1(dq}fo52Wl_>Ry<?H0;Lhlm}eg-b$yHf(kM_TbQV#n7AGl|LC-!= z>iYYATEkfrI*TY(tCJARpl2T_b^QV#HJmk}vxri)Itj6idG>))*Wc$;5YC#=SwyK? zoTOOBJ<)!ft)JfvD`JJSCUh22sun0kEOQo6>e}KoD2rQXP3SD5R4q;@mN^S3bzR~# zT(AbfLqWYlohgw7&L)#@b0GG_s$u1lbX%V46^37tihs>KP# zGG_s$u1lhZVkJtQ&{;&Oy2on7GUnL_N?lu=W|Gm=F`Y$}svE2n%b;f;D0N-pG%~H> ztTCNMl&U+d5X+!ve*|ZhM2(CZ&KlEMM5(&L3bBlN_JLB@&oO7^h0tRuct$ybyX!XA!099;=9D&H_qZU-e%a&KlEMM5(&LqFClEpw#u1W>`*ZqSP^+ zMU<*LtQ5iUvRL88<#okf(Ydn}4&&KiOQUTlUX z3C#5Jn9gE)S@&2)to<)l5{UTn`a=1s5v`nLe0fA`F}|!jEUINWW5k{7_4zWf95=i* zqPG}d);(6rWjW(Mu7qBn^I^kWBbtlxW!+p-DZhm=3B&<*CkaW zSUkQ5Hlo28U)GHl6*J!szCG{%TU)mecWjoIe~(u^pdgJNni?smaN7N=0>D=5((TJ7 z3SU8uVRHtDB&zDAm_Z!5SBHYXR_oI6lTq6x3Q!_G_An$-RVT$GKkY72;IH+%^!xJV ze-uO&33-nC%}>?uZx@i|1ORrnE)BnY`5y&QML?dz=GCNXq~?J)^a=P{udUm66hsx_ zcn+DD{;G#kEMuPlu+_SB`(#?%1q$JK4*N~-tT*-v{#va|!%s$S*C#~dIci?|t0oGu z41I#X*6Y&m6S3NDHNx>6HM`SVtmN`!_!9tjwk{1n5vyIDdL*ht5>+i&idfx61n{+9 zTe1eHXj%)9sP5;qC8}DiP%Lv6i0Zl|YdEbzRFRC&2_R9`Y9*Xym)?TER_hY3kx|20 zB&tIaRjpQ1EOQoiGwYJAp;$pwk&WkXO=oGbLb1$QAgb$GdHKgd-J>9?$i{Qnbe0w@ zjaY_00blF&!{y!2A=`o#HV zYW->-HJnAFIwVomY9+)n^a=i2uOIR$2xpO~;{6=Ys}?IMmf=qT*xCBQW>`9_7tSJ4 z9g?VO!BWIBXJH?+{u%$J;Vcr>A&IIMD-_F|1){osz@{~bDw6T}43Ma5wUT1lI}1d0 zeZP+y&LUAAlBjC6l46;&u#Z{aXHyVF71? zf~675&?n$)y|!e{Q4m$!OwJ+GSz4^5SjIj9V5@aW)^Nd!getBk=dk(C(rP8dGWH4n zTCGdCMn(;1k*E$yRJB?Ou?&5Jzt-!Lt>H2l302%o&fl8O(mhs+W%v_2ve~+XYecMY z7LODUNmO-@Rm3u9VfkHKvIe31_MJtdihnBB0Nr3wEOQo!>bfLra#|-rRFRC22^f&5 zYN?W9b)D4*QC;8Bb=G7W&LUAAkf`bgE5+(Ms}G{OzP;Si0I=2iHlNmb zXOXB5NK|!)6=E6t1b?m8Qy(>)MWQ+&QPmArh-ES3K5m0vANdr-JB#UM)M@d&>K-e_ zvZyhNlG%FF3@c)VvzT5UFukmMtRhy|SqM7U>#_gRa2C_cKvelljo>uGG`&Zye`?ATm~nRUPd;a zzcrnudn}4&&cbcb>k_V^Sdm`FFW|80EZt)@Vp+_%kM#1|k~Nb|FHiHGHDG#KH&`i_ z<&67B$KU_AyK?>VS1&Ivzvj|2m!7zE|HUs~d>s7$ix(cda96|s?>hJSb02Q_|A)`s zaprRk|Nq?S51qcP;s3Xt{0zSTx99(l9!&mY_y285yDz8hD0nKG&M|F6@>I1>Gi?Sw zfnMv4_4#0A@fzQw#jKT@(KJ}ZAz~%pT?uO>bFOpIwVik+9aNFBcFrGu-cSv zpNtv{5_#&7JXM_&&$xll0eI@B^!oA{M;O0-fs&`9+Zi@j=j1a!=&l~fNz68-*C%4d zf<&G=Bu`cA6tTK1#{fKa)AsqYGzp%HL_EdH1xal~vARnKw5v9y&BtjCo{C6(4ojY@ z)=9CtOUD2_byFIBGHNVH72JixazT=%z7fm7=Kwr)(*|l%tca%e$Ww>psal$(SVlev;HjIEr;%xm zS3P;^kUUjulMu_u=U|LieM!{FsNpQ~)FFAQ)+QmAfzQF7r~YyG1&aG9T4#}`qTwIT z($a)tnX~ph^?lt}{n0p_MV>k&Pu23Ih-J>&^VIkHFO63{c`6P$X1BDOOte#kA{5Wx zn(r*#V^OUAk5yW+2dX7flX&hZfGQqCj+@rfO;%CMz~^8ORNFvJ;JKp!s<@GyV{$(z zU(upmMm`67p!$|#1CAT!B2XO?sOlyw^)m7~*aOwy@nOSV1gb*NS7saVP3gdD_zjp!%COwQ(ObqVf485U6Ssn2MQkaUb;hH+72imY*~3|?bi5QRIeSOlzbe+U*`~Dna`vW~Q0456 z$Wzrq&FtByK7n5AjrIC)J|KQ+7OeXRJ#PG!8&vaAH}Z)GxZ0FnUp|!6;HmgNdQJd& zsyZkh^`q_*G6YZEly;wt+OA9pd2!UdSV60mc+~B}ThP~fQ@VW=D|jltkIqqZVNwI7 zSO!0b;HjI^?xR@2Q}KCp4x8(eI;auLz~>NuU}L?$Y2NtDlL{kAsLK71DVCAXA$aPh z^!j94+c*CqPb!SaQ?*(Nv5b6zzE+#k?vqj5b?J~N6-MN#TCIdw20lSw>rDyRh*;q) zo>aj0H=b85Rw$M^3p{mG5;h`MyDlB_q{4_iRST9PR`=};^jdE$UV}3-t+RMi0oO+6 zFFnp#c<6LX3H(}ZO1y^C8ax$OisWNOo~qSKie=6MPu;wS4VRZi1be0w?DVD*{A$aQMC%bR_Y4B9+kvMEROAD4pECZiI z{DI9+_%Ds4^*pICl7#AU&NA{j1W(=kxKC?1izgLEOsHzL5@H$o9D=8Ae#}P=XYr)M zh&)xRl@QCoC+KUv`B9&Oa28K0pbCuVRcjTBWzGUm-TX*1ES;4JRi0EBk*8`$xQJ!W z!XMcDu>aC<7EdaS$Wyga$z>2vN5@&rwDjOl3T*L-JHDR8p+2vp`?#O$pfKGI$g`6(2_DsOc;%Q&KF0p9ApJO-b0KSVzHA zu}pE;be4968?g+04!~147Oy$VgsPlWAWzjICB-uG3H(}ZO1ws6B33v{PAZV6YK20vy3RsXH`|nijffTR zEIFw_o~nDSB39Q~K(F=Y`?}7;Y037TB_|cgQ+0zyvAWIzeyujoH)jP;#b?p;u_8~^ z9af5E&H_)}e6Noh?<}5F7?P*z1}nufXMv|~o@Y}KJQW{C=Wk7CX@!zv8T=f8r*6Kd z8J1!NPsN2?95$V$d#pw*1D^x%)Xj7Km&QAbCl!X|sk*^Ru`Fy1{90|k+ov_0#ghs{ zMwoSn6=GS~7=h<%^Ibk_IEyD0hKw-l1}ns}tT6)5_2xT$3c^`Dseoo+ytA|+OtH*a zh%j%yqZwAj3TN@8!jKVW-D4H8%vp#qZ!BJe1H-Mecv1lx$1l|l7R54WA;P>V@ft2z z8DZv0g&`x%y2DDb%vp#qZ%V>OMh$22q{5I9X5C<=SmrE5m^US0!(}id%si7{WAGIB2QK4#Izas1bwYH zrOTJoHVvMN&!NZ4F=9Sdos-jM@N>B5sin)8&-gTWDq7AQmJ5=UC(Sc%;B&a=scnZZ zi-_QRqMnvZsZg6wc3;(pNtv{5>G0O z$WzrhDOPvo0DY}DrNt*=#e&3>3Wz4A&4p4@tnTUo{5so|9-oL63ldK%jL1{fIz_DR z$^rCRZ>+(G9$>p5@uUKFNcl^TYf9sTHJwc~cQ}HqMe5}Y*wKhqyx=RP> zYqcqP8W}YfB%V|lk*8{H5@H$n1bwYHB~C-Jf~Vqh==`nus@Kva#WMIg1W(gH#=uli}`Q+ZNhM4qbUNfFDO1)jQjo&VBs7EdamO2}U-&0mUT&H_)}+^}g4o_ffW z3M2AVtxZxabJlQLZ|X8?P`~LgoW+w0Bl1+OO;W7=Zx!$f{CZos8c{4>T|BTbB2d*TrB%#K3!u7b zLp6N5!eKnIFd|UZjg}~8&P76XQ&KgY+5oDDJhCt%P}Qv#6*K1osBTKKMurWK@yx=A zKvk<0DrO!V;Pq9KHGF*qP{o(g(+hGR^l=_D_&Eem-P~U+mb_KB`m&TUqR!RkE}PcKE+6u+!id>r-E5^; zW-VlwH+TA|VJ)6j7%{u7d#x19oP|HL`DvSi$Sxo9sKQ8YgO=wt#j>FB5ZUETiPli8 z$Sz|!PG`w&(DJ-CVp+&|i2c~cay8S)E+6uw!bom|K8{;h%6N$E@}}f!=&ZaAnkN-T zavQX4wn8jR84vq#R*BZgsNpQ0R2cC#Xx(dtSQauK_S^gaKC-X>t7|8&e(Gv<XWP>8i>DpKwl97Oe~*b&wPtCacGO+nA$#?<+wQ}V?b6gEQXLbi zYQ2*0eOwgW9rm!=mUbVfHIS-=zsE$XTCk*8-9=;sq`EB~KN+=MnjlcPeN3dPEG5J; z_6Y!6Z%f0EVg*u_@b{RP{;G#kEaRUeAk}T@_))AtsuKPl^U`0%DUDdhK1XN?ZoBP1 z0J-+1FX8Vok*d}!DVD)c5ZG#45;iie;VdH6F_EekEFqS`PXO3zTS7K6YB-BXbxfqH zEG5MD;OlJ!4U(}YY68;|X(qAo9QY_=2Lm<`d>1Nmv3&W1v)s;wf#7lp*R%ygC z_BjMn-JbGa8qOk89TBN&nUZ1|{2T(QZcqBOhO>xNM?|Vxri54qKZii7+XEjpoJFKM zB2v|+aEjG+)(}W_d%~w6oJFLHE@M2eTBJ~{uCvfOnQh+R3@c)VvxrnjM5@{pE@E|^ zHQXarOW2^|YV{{l#nYI-^f+hXn2^4+fETMxN!Y}Kg$FpU0wPi!5vgjGl46;&_DHpa zY-H4M7Ln?RNL8zp5X;yn0BpS}8Jk!J@ms@LM5-fxU{{Nj6wCPMaF0|=$cADCQkC%c zh#%P1Jys)@vCrWisanEj8WGhVk?M#@RX14aEQ6mQu+^p{Y-C!)SwyNMB30dCg;)kZ z0br|53E9Y~;VdH65s|8Huu?2@)*h+;zBwyyh3*llqSGGFtM0KVmN{#WRNrOJid&(3 zM5-eqRo!D1vCLU}r20<(rQs|hRV>K)OQlGmSmvxfQhi4=ET=V)s)WBsM5?;OO0mpY zd!+h^j~dP*QXLVg>IN&sGWH1oTW@~PrXY~2guh2bs=CKYu`F^70z2Ehy&0Cy$`muh z-y^1&b&u7EWpQH!p6kuq{Fg=xh~e*%+zKttsuaue#>0Jz`K>;!t+Nh!-};CtX5C?h zSe7?N=DFJZu8-O}>yY=YkCmI9!WzO2CnA>no_Uj_Wd^qKyaJd!wIA`JGE1h@NKE+(JHFQ>_ zm=Ag1`iLoJ-C?Cz=B#mh|KFeN@Be?@)qiX1|F2#7)RosZ_5Z_{zIJKd)c=c%4`2A# zrv88W{NntBsQ>@!+^bOkfBft#&-^3u|977L`-cC28u$NCPktKt|1t9ae|j)R{{K(D zm6HENz3sO6Py@w!M52mZE5kzw8opBM0HG} zs-;Pa)m=HjU#o5D^2w<2rYBJylc=h3QmpR60sdNVOOsE;iaj)mDo$&}^Qx6eu1C1R zs`I>psBTM_PsEBnG>Ph%L{+PkB35_d0DP^t-8c=z3TKh1qNvYbDrZ$FmN{#5SZ_<5 zCb$gx0O2eW)iH^xmL@6I{>Q3efmF98QX|8LwTM*5M5~Co7m@0iNLA~TRLs~X2yDF__^{zGBGoaGsun0z%xnv!y6yWc zgu#eZac(IdTP;vTF*7cZ>bBR68%Pz3g~Nzc$3&`HrL>BfYk^d^hyGi`VnnKAB2_I> zL@~22km~lQY-$6kN(y{Tq^d;<6*G?w@cMdL8MfV_AngzyBT^j`scMxHiW&L@fUUQ; zmPz2KVKEZbF^Q@cDJhuY&k=~~wuEcAA_h^F6!@4Q*ws#OqnDx25s2!xo2TA_6Y!6ZA-F-FPb2#JrdP1iKWx-H2XPHPZViGPntRJBS;vCLT@s@oqnXZ6EbBVr2@Ro!caScX2q zU+e7;nX`hZO89$BqN>}i6wB}@0PJl0;$~Qi6+~6C-(wP0-EK8v8TuSyo!Gv}e`z?2 zM0HG}s(Y;z%h)FXY_)x%PiwrnNL0rps=C<Q)w{{JQL z|9cO<2>yRJ`2Sy0ceXY0Mb|0`nJA;Ru z!3mM7Iw)t*;3o)dwJq(wd@y@)>#yX$Cq$}Ru*Bnzt)Z4d0I=1zbo^x0c5RaU_k>7Q zO%#tiUW(mOu{M>4Uq0@j{q3wW|BcR6I$lhak2_X`?yy*!O2<#cYS$*oe@}>1)kQ_D z?ji#ET5r4UzCFW-9n`gP#Df z)wXo}WYl(TfaGf;^#!Z>#2Sm8*(UEaRUbu(NH+ z*`!$LpT@9xjtbvajbXJ`X~Z)23Hn-ZyCEB#-w0 zSY2nKD>>W#n!WX>;Ve0Xy-!&!1zfk;)W6pGb#7A#S1-)L|B zK&pq#e~*b&wM+@I41NN@R@-0kQNvk0tS}~0)iNc-GWH1oTW{ZBQxHg1^50`3RjpD| zEaRUbu(RziH^b6dfmE@D%4L8^RV$T7EMuRbul4qq{FjEacvxXfq^ea)ie>N<1h(4# zqEBl$i-#4)M5LCv+AQl*l zi&iNV%bW$Ix-B^y5i6X}RjpKtSmrDs)onLqg9B&bEFM9PE!$DRG%&#HDr zfmF98WWxn3kgDXr$3&`Hri54qKLKE?ZOPflsNpOgRu~hh>IN&sGWH1oTW?FuhGGR$ zmHhXZNLBY(DVFh15ZKwa=X30-nJQ=>{i9a(1$#%FeXyf z4OTkK;3o)dwSBF5ATEYJUP$Q zDq_Vui-#2ujg9A3_gECmoCT!1eYhD`#0qEeu)>%~RV$SumN^SZb^99srQs|dR=^?3 z{H4b^YybCGX91~hUv1NxNR|2TF_Egaf+OR!1sP;!{(H3D?M|t+RRzGLIQ# z)(uvQWzIs7d0VnIbXHyr%^))-HJ(@9V^J(~7J|&%60Q-kT4(hbWF9lftb42?mN{$O z-v8I||LW@3uWoSu|7$q^KfC-a>i>sv{(pJtA=Lj1od16Y`TvK`KaKPM51e}n_5c0g z|Bs)&7xn)qaQ^>Gr$2i7m8U*`>cgjQKl#5-e(2=%;O`Fp>%rh#=l{FX;>#&RE2Aw- zIAvIlbIPO{l~abJYie;kW3yf9@#U1EV;aNeCmtu{scM^M%Jw75ssZ$RW7jtL0;8c@ z8bh))B~Mk$O2%osPr-xC5=^-VnBIB3`Xxd5uW(&dw3V>#kk zg$aSG`X-+6o`FyB*BiUi&ENFQmigi~dthd*9 z&n>SI&Td$YM;0aos#>G8ikWQzRCjHvhGT}ucxC}%ocyiQs1n7@yTik3S7J4}+~Xsa z?bF3W3ljoWEm1--BcH&p)$Ti+8IV!KVmz}jAy3s3B?L3@3Hn;^zN7mdMNBDvYxaL8 zKNrz%TnSY|Hp)ZJyDf-o0PD`0~j z53E)w6wAB?p1Ql#49nM7CRBM?VM3m&RZ0=dtOcIByXe0(ti`hm2&CmNJ937C0RqUf~QLMdqSS7ZQw>M1D`;z^{yMM!A2#V#ghsX@>H!* z(pg46fnTd#iPgxohO>B5VM3m&B}$5A&g#QiC0Qe*hO>B5VM3m&d#x19oP}+}t^{jD ztZ)`jDxgS>=T)~`6w90ip1Lc^8WAhrT0E&RAy3uaRuRjbh0XTvggGm3gXT$veY-wx zgFf!yuidbWHhi^xzd0*-s${<>K0#mW z?fYyBf~QLMdqSS76-tU_@DuoTwta6iEX4|*D%tM|d8%%-8nFy~0=?GT_xLXjXYr)M zggjL@S?Meb83Vso+duMY4QKJB!i3Rf-DIU$=B#~m`44^6a28K0Oc-6(Emn$U&cb$K z`v*P+;Vhn1K%XX_SM35*EOXXAy8P~DSP?6n#ghsXMwfMuRm3u9VY{&H#%gevbiA{8 zQUTHA{H4b^3$(CP$>BEW?YnGRBf2d2tWOwS)*V)eWg%k(ovUq0)^Hij+n{+;VZ!LL zZm>cu%NQf*TyIOThRa|?m*t-I38Txp$4ap*XN;tC;r~tkzefEJ{*V0s7WMz0|IaRc z74`p~|1U4T8ukC4|37p7lc@jq{QrTo|AhK~&;K7k^D5N;d;b5WQy)8ZcfGP8b5x;}IfDj2 zfnR64((dEq9z0dT-}tg>4lDbq=5aUh3G`a;tk>5Io+{z*33;kIDAy+3;L~~B!Bcmo z*C*53u1!7i)Cqa2mMQVLE`!|2- zamVY8d}2|lcBR+HX$_t#;qM7~s+K7smXS};*J@W%HZp4a*2glRe}D2+EmJ})1D~L; z^{#|$C|2-P34i0usxbvxrKDH}KY?FoyOOe@Siw`}%ik07RAniRSOz}F;Hf){*r0LR zibbA^FRSJ+J?_RAd~B%W0r1q_o%SL*3TKh0PRLWWOi8iKS>UO=pY~D1S>&k`@>DHT zQY>>8cgU?oJF1r?w3<=oU`yLh3*uh2Z( zOT$^@sn~Q?*P9v5b6zzE-;uu8~p0S>&k` z@>DHTLM#KHps)3=L~AHk@KgzZPsmeskCkE>`~-fT?OtZiN}kH__k=uE_gIZs20npa z>)oy9th^YS;qM7~s&23-mN^SNb@x*9K=4!ve^1C$wM6yO;Q=QK>TgJt0rk z4OWU}&H_)}{iIJpRH_VrW4X%b^*CqQb<5CTw)=@@SUM{{w2K3bxww$0>K?0zWzNFI z(7PY^UmBGv!{6ve#4ptiR*GeTV;p6!c0Xp*8Zl-Ge@_@=)*V)eWr1Uao~zxD`l!(Z z;+5+Y#+Y@36=GT57@_BS_ain15o4As*C&iIYbQ9JWtn57o@cuft)W;EW0ot|CyX)c z9;*?{^2SI!*E`GA^diPASFTSOW7Z88#WH6h#=I-J8ZKCQF*HvqOc-Ojo>uGG~pN`~OZ}tD5`&>j<_vt}8K`#_t3lD-GA7=$j$=O20@A}O*;TNRG0rYjyArEm z{U6a~xo3UC=(4tfqpM&cV+5V|{QvaT|9#@x|G4&FuHBA*ar1v}&cMwXxH$tiXW-@x z+?;`%GjMYTZqC5X8MrwEH)r7H4E%pL1Gk?(cgroeJpZ|)x4-3$Z+*kz;qdK`yzLEd zf6KexF?-}~FP!23{^lcZd-pqk>z$9h>rJTEN4NG4Zv|FwKD4|4*6yoLox9~lFM84Q z;vXNp>8= 20000000 then 'over_200k' + else 'under_200k' + end ;; + description: "Contract ARR band represented by the procurement activity threshold." + } + + measure: weekly_active_requesters { + type: sum + sql: ${TABLE}.active_requesters ;; + description: "Distinct non-internal requesters with qualifying procurement workflow actions during the requested week." + } + + measure: purchase_requests { + type: sum + sql: 0 ;; + } + + measure: approval_actions { + type: sum + sql: 0 ;; + } +} diff --git a/packages/cli/assets/demo/orbit/raw-sources/bi/retention_exec_q1.dashboard.lookml b/packages/cli/assets/demo/orbit/raw-sources/bi/retention_exec_q1.dashboard.lookml new file mode 100644 index 00000000..9981f308 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/bi/retention_exec_q1.dashboard.lookml @@ -0,0 +1,28 @@ +# looker_dashboard_id: dash_retention_exec_q1 +dashboard: retention_exec_q1 { + title: "Enterprise Retention Executive Review" + + element: retention_tile { + title: "Enterprise NRR" + explore: retention + fields: [retention.fiscal_quarter, retention.nrr] + } + + element: movement_breakout_tile { + title: "Movement Breakout" + explore: retention + fields: [retention.expansion_arr, retention.contraction_arr, retention.churned_arr] + } + + element: discount_expiration_contraction_tile { + title: "Discount Expiration Contraction" + explore: retention + fields: [retention.parent_account_id, retention.contraction_arr] + } + + element: q4_vs_q1_comparison_tile { + title: "Q4 vs Q1 Comparison" + explore: retention + fields: [retention.fiscal_quarter, retention.nrr] + } +} diff --git a/packages/cli/assets/demo/orbit/raw-sources/bi/revenue_daily.view.lkml b/packages/cli/assets/demo/orbit/raw-sources/bi/revenue_daily.view.lkml new file mode 100644 index 00000000..8ffcefa6 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/bi/revenue_daily.view.lkml @@ -0,0 +1,52 @@ +view: revenue_daily { + sql_table_name: orbit_analytics.mart_revenue_daily ;; + description: "Revenue recognition mart dbt://ktx_demo.mart_revenue_daily governed by notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation." + + dimension: revenue_daily_key { + primary_key: yes + type: string + sql: CONCAT('all_accounts-', ${TABLE}.revenue_date) ;; + } + + dimension: account_id { + type: string + sql: 'all_accounts' ;; + } + + dimension_group: revenue { + type: time + timeframes: [date, week, quarter] + sql: ${TABLE}.revenue_date ;; + } + + dimension: revenue_month { + type: string + sql: TO_CHAR(${TABLE}.revenue_date, 'YYYY-MM') ;; + } + + measure: gross_revenue { + type: sum + sql: ${TABLE}.gross_revenue_cents ;; + value_format_name: usd_0 + description: "Paid invoice line revenue before credits and refunds." + } + + measure: credits { + type: sum + sql: ${TABLE}.credits_cents ;; + value_format_name: usd_0 + } + + measure: refunds { + type: sum + sql: ${TABLE}.refunds_cents ;; + value_format_name: usd_0 + } + + measure: net_revenue { + type: sum + sql: ${TABLE}.net_revenue_cents ;; + value_format_name: usd_0 + description: "Gross revenue minus credits and successful refunds, recognized by paid/refund dates." + } +} diff --git a/packages/cli/assets/demo/orbit/raw-sources/bi/revenue_exec.dashboard.lookml b/packages/cli/assets/demo/orbit/raw-sources/bi/revenue_exec.dashboard.lookml new file mode 100644 index 00000000..32d6ebdb --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/bi/revenue_exec.dashboard.lookml @@ -0,0 +1,28 @@ +# looker_dashboard_id: dash_revenue_exec +dashboard: revenue_exec { + title: "Gross and Net Revenue Executive Dashboard" + + element: gross_revenue_tile { + title: "Gross Revenue" + explore: revenue + fields: [revenue.revenue_month, revenue.gross_revenue] + } + + element: credits_tile { + title: "Credits" + explore: revenue + fields: [revenue.revenue_month, revenue.credits] + } + + element: refunds_tile { + title: "Refunds" + explore: revenue + fields: [revenue.revenue_month, revenue.refunds] + } + + element: february_reconciliation_tile { + title: "February Reconciliation" + explore: revenue + fields: [revenue.gross_revenue, revenue.credits, revenue.refunds, revenue.net_revenue] + } +} diff --git a/packages/cli/assets/demo/orbit/raw-sources/dbt/dbt_project.yml b/packages/cli/assets/demo/orbit/raw-sources/dbt/dbt_project.yml new file mode 100644 index 00000000..999a6f50 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/dbt/dbt_project.yml @@ -0,0 +1,10 @@ +name: ktx_demo +version: "1.0.0" +config-version: 2 +profile: ktx_demo + +model-paths: ["models"] + +models: + ktx_demo: + +materialized: view diff --git a/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_arr_daily.sql b/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_arr_daily.sql new file mode 100644 index 00000000..9ccd305e --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_arr_daily.sql @@ -0,0 +1,5 @@ +select + date '2026-03-31' as metric_date, + sum(contract_arr_cents)::bigint as arr_cents, + '$18.742M' as display +from {{ ref('int_active_contract_arr') }} diff --git a/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_customer_health.sql b/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_customer_health.sql new file mode 100644 index 00000000..79274467 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_customer_health.sql @@ -0,0 +1,10 @@ +select + date '2026-03-31' as as_of_date, + account_id, + parent_account_id, + account_name, + is_active_customer, + has_unresolved_high_ticket, + has_recent_procurement_activity, + risk_level +from {{ ref('int_customer_health_signals') }} diff --git a/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_revenue_daily.sql b/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_revenue_daily.sql new file mode 100644 index 00000000..302ca887 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/dbt/models/marts/mart_revenue_daily.sql @@ -0,0 +1,8 @@ +select + revenue_date, + gross_revenue_cents::bigint as gross_revenue_cents, + credits_cents::bigint as credits_cents, + refunds_cents::bigint as refunds_cents, + net_revenue_cents::bigint as net_revenue_cents, + (gross_revenue_cents - credits_cents - refunds_cents = net_revenue_cents) as reconciliation_check +from {{ ref('int_revenue_components') }} diff --git a/packages/cli/assets/demo/orbit/raw-sources/dbt/schema.yml b/packages/cli/assets/demo/orbit/raw-sources/dbt/schema.yml new file mode 100644 index 00000000..80aa0b72 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/dbt/schema.yml @@ -0,0 +1,455 @@ +version: 2 + +models: + - name: stg_accounts + description: 'Customer and internal/test account records for Orbit.' + columns: + - name: account_id + data_tests: + - not_null + - unique + - name: sales_region + data_tests: + - accepted_values: + arguments: + values: [na, emea, apac] + - name: size_band + data_tests: + - accepted_values: + arguments: + values: [smb, mid_market, enterprise] + - name: lifecycle_status + data_tests: + - accepted_values: + arguments: + values: [prospect, active, churned, internal, test] + - name: stg_account_hierarchy + description: 'Parent-child account relationships used for enterprise retention grain.' + columns: + - name: account_hierarchy_id + data_tests: + - not_null + - unique + - name: relationship_type + data_tests: + - accepted_values: + arguments: + values: [subsidiary, division, billing_group] + - name: stg_plans + description: 'Canonical and historical Orbit pricing plans.' + columns: + - name: plan_id + data_tests: + - not_null + - unique + - name: canonical_plan_code + data_tests: + - accepted_values: + arguments: + values: [starter, growth, enterprise] + - name: stg_contracts + description: 'Contract records that provide contract-first ARR for active accounts.' + columns: + - name: contract_id + data_tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + arguments: + values: [draft, active, cancelled, expired] + - name: renewal_type + data_tests: + - accepted_values: + arguments: + values: [new, renewal, expansion, downgrade] + - name: stg_subscriptions + description: 'Subscription rows used when active contract ARR is not present for a covered period.' + columns: + - name: subscription_id + data_tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + arguments: + values: [active, cancelled, past_due, trialing] + - name: stg_contract_discount_terms + description: 'Contract discount terms that explain Q1 2026 enterprise contraction movement.' + columns: + - name: discount_term_id + data_tests: + - not_null + - unique + - name: discount_type + data_tests: + - accepted_values: + arguments: + values: [launch, renewal, migration, goodwill] + - name: stg_arr_movements + description: 'ARR movement ledger used by retention and expansion marts.' + columns: + - name: arr_movement_id + data_tests: + - not_null + - unique + - name: movement_type + data_tests: + - accepted_values: + arguments: + values: [new, expansion, contraction, churn, reactivation] + - name: stg_invoices + description: 'Billing invoices that anchor gross revenue recognition dates.' + columns: + - name: invoice_id + data_tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + arguments: + values: [draft, open, paid, void, failed] + - name: currency + data_tests: + - accepted_values: + arguments: + values: [USD] + - name: stg_invoice_line_items + description: 'Invoice line items used to split gross revenue, credits, seats, usage, and addons.' + columns: + - name: invoice_line_item_id + data_tests: + - not_null + - unique + - name: line_item_type + data_tests: + - accepted_values: + arguments: + values: [subscription, seat, usage, addon, credit] + - name: stg_refunds + description: 'Refund events that reduce net revenue in the refund month.' + columns: + - name: refund_id + data_tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + arguments: + values: [pending, succeeded, failed, cancelled] + - name: stg_plan_segment_mapping + description: 'Effective-dated mapping from canonical plans and size bands to reporting segments.' + columns: + - name: plan_segment_mapping_id + data_tests: + - not_null + - unique + - name: canonical_plan_code + data_tests: + - accepted_values: + arguments: + values: [starter, growth, enterprise] + - name: size_band + data_tests: + - accepted_values: + arguments: + values: [smb, mid_market, enterprise] + - name: segment + data_tests: + - accepted_values: + arguments: + values: [self_serve, commercial, enterprise] + - name: stg_users + description: 'Orbit user identities shared across warehouse, Slack, Looker, Notion, and Drive artifacts.' + columns: + - name: user_id + data_tests: + - not_null + - unique + - name: stg_activation_events + description: 'Account and requester activation events across the January policy change.' + columns: + - name: activation_event_id + data_tests: + - not_null + - unique + - name: event_type + data_tests: + - accepted_values: + arguments: + values: [first_requester_login, requester_activated, first_approved_purchase_request, account_activated] + - name: policy_version + data_tests: + - accepted_values: + arguments: + values: [pre_2026_01_15, post_2026_01_15] + - name: stg_sessions + description: 'Product sessions used for pre-policy activation and activity exclusions.' + columns: + - name: session_id + data_tests: + - not_null + - unique + - name: stg_purchase_requests + description: 'Procurement request records used for activation, requester activity, and health signals.' + columns: + - name: purchase_request_id + data_tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + arguments: + values: [draft, submitted, approved, rejected, cancelled] + - name: stg_approval_events + description: 'Approval decisions tied to procurement requests.' + columns: + - name: approval_event_id + data_tests: + - not_null + - unique + - name: decision + data_tests: + - accepted_values: + arguments: + values: [approved, rejected, returned] + - name: stg_suppliers + description: 'Supplier directory records associated with procurement workflow events.' + columns: + - name: supplier_id + data_tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + arguments: + values: [invited, onboarding, active, inactive] + - name: stg_supplier_onboarding_events + description: 'Supplier onboarding milestones that qualify as procurement workflow activity.' + columns: + - name: supplier_onboarding_event_id + data_tests: + - not_null + - unique + - name: event_type + data_tests: + - accepted_values: + arguments: + values: [invited, profile_started, profile_completed, approved] + - name: status + data_tests: + - accepted_values: + arguments: + values: [pending, completed, blocked] + - name: stg_purchase_orders + description: 'Purchase orders generated from approved procurement requests.' + columns: + - name: purchase_order_id + data_tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + arguments: + values: [created, sent, fulfilled, cancelled] + - name: stg_support_tickets + description: 'Customer support tickets that inform account health and risk.' + columns: + - name: support_ticket_id + data_tests: + - not_null + - unique + - name: severity + data_tests: + - accepted_values: + arguments: + values: [low, medium, high, critical] + - name: status + data_tests: + - accepted_values: + arguments: + values: [open, pending, solved, closed] + - name: stg_account_owners + description: 'Effective-dated ownership assignments for account health, renewals, and escalation context.' + columns: + - name: account_owner_id + data_tests: + - not_null + - unique + - name: owner_team + data_tests: + - accepted_values: + arguments: + values: [sales_ops, customer_success, finance] + - name: int_active_contract_arr + description: Active contract ARR as of 2026-03-31. + columns: + - name: contract_id + data_tests: + - not_null + - unique + - name: int_parent_account_arr_movements + description: Parent-account movement rollups for retention metrics. + columns: + - name: arr_movement_id + data_tests: + - not_null + - unique + - name: movement_type + data_tests: + - accepted_values: + arguments: + values: [new, expansion, contraction, churn, reactivation] + - name: is_discount_expiration_contraction + description: Discount expiration contraction flag used to keep discount movement separate from churn. + - name: int_revenue_components + description: Daily gross, credit, refund, and net revenue components. + - name: int_procurement_qualifying_actions + description: Non-internal, non-test requester activity for large active contracts in the golden week. + - name: int_activation_policy_windows + description: Activation cohort counts around the January 2026 policy change. + - name: int_customer_health_signals + description: Support-ticket and recent-procurement signals for customer health risk. + - name: mart_arr_daily + description: Board-prep ARR as of the metric date. + meta: + governed_metric_key: arr + owner_team: finance + notion_locator: notion://notion_page_arr_contract_reporting#arr-contract-first + expected_answer: expected-answer://arr_as_of_2026_03_31 + columns: + - name: metric_date + data_tests: + - not_null + - unique + - name: arr_cents + data_tests: + - accepted_values: + arguments: + values: [1874200000] + quote: false + - name: mart_nrr_quarterly + description: Enterprise quarterly net revenue retention. + meta: + governed_metric_key: net_revenue_retention + owner_team: analytics + notion_locator: notion://notion_page_retention_policy_current#nrr-definition + expected_answer: expected-answer://enterprise_nrr_q1_vs_q4_breakout + columns: + - name: quarter_label + data_tests: + - not_null + - name: net_revenue_retention + data_tests: + - accepted_values: + arguments: + values: [1.018] + quote: false + config: + where: "quarter_label = '2026-Q1' and segment = 'enterprise'" + - accepted_values: + arguments: + values: [1.064] + quote: false + config: + where: "quarter_label = '2025-Q4' and segment = 'enterprise'" + - name: mart_retention_movement_breakout + description: Q1 2026 enterprise retention movement breakout. + meta: + governed_metric_key: net_revenue_retention + owner_team: analytics + notion_locator: notion://notion_page_retention_policy_current#discount-expiration-treatment + expected_answer: expected-answer://enterprise_expansions_q1_2026 + columns: + - name: movement_type + data_tests: + - accepted_values: + arguments: + values: [expansion, contraction, churn] + - name: movement_reason + description: Includes discount_expiration contraction, which is not churn. + - name: parent_account_count + data_tests: + - accepted_values: + arguments: + values: [11] + quote: false + config: + where: "movement_type = 'contraction' and movement_reason = 'discount_expiration'" + - name: expansion_arr_cents + description: Expansion ARR cents for Q1 enterprise movement rows. + - name: mart_revenue_daily + description: Daily revenue mart that reconciles gross, credits, refunds, and net revenue. + meta: + governed_metric_key: net_revenue + owner_team: finance + notion_locator: notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation + expected_answer: expected-answer://revenue_net_vs_gross_reconciliation + columns: + - name: revenue_date + data_tests: + - not_null + - unique + - name: reconciliation_check + data_tests: + - accepted_values: + arguments: + values: [true] + quote: false + - name: net_revenue_cents + description: Daily net revenue in cents; February 2026 total is covered by assert_february_2026_net_revenue. + - name: mart_account_activity + description: Activation policy comparison values. + meta: + governed_metric_key: activated_accounts + owner_team: growth + notion_locator: notion://notion_page_activation_policy_decision#policy-change + expected_answer: expected-answer://activation_after_policy_change + - name: mart_procurement_activity + description: Weekly active requester counts for large active contracts. + meta: + governed_metric_key: weekly_active_requesters + owner_team: product + notion_locator: notion://notion_page_procurement_instrumentation#qualifying-procurement-actions + expected_answer: expected-answer://active_requesters_last_week_large_contracts + columns: + - name: active_requesters + description: Weekly active requesters for large active contracts. + - name: mart_customer_health + description: Customer-health risk mart as of 2026-03-31. + meta: + governed_metric_key: active_customers + owner_team: customer_success + notion_locator: notion://notion_page_customer_health_playbook#risk-definition + expected_answer: expected-answer://customer_health_risk_accounts + columns: + - name: account_id + data_tests: + - not_null + - unique + - name: risk_level + data_tests: + - accepted_values: + arguments: + values: [low, medium, high] + - name: mart_account_segments + description: Current plan, size band, and reporting segment for accounts. + meta: + governed_metric_key: segment + owner_team: sales_ops + notion_locator: notion://notion_page_sales_ops_segmentation#growth-plan-normalization + expected_answer: expected-answer://enterprise_nrr_q1_vs_q4_breakout + columns: + - name: account_id + data_tests: + - not_null + - unique + - name: normalized_plan_code + description: pro_plus is normalized to growth through plans.canonical_plan_code. diff --git a/packages/cli/assets/demo/orbit/raw-sources/dbt/sources.yml b/packages/cli/assets/demo/orbit/raw-sources/dbt/sources.yml new file mode 100644 index 00000000..5acbac71 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/dbt/sources.yml @@ -0,0 +1,48 @@ +version: 2 + +sources: + - name: orbit_raw + schema: orbit_raw + tables: + - name: accounts + description: 'Customer and internal/test account records for Orbit.' + - name: account_hierarchy + description: 'Parent-child account relationships used for enterprise retention grain.' + - name: plans + description: 'Canonical and historical Orbit pricing plans.' + - name: contracts + description: 'Contract records that provide contract-first ARR for active accounts.' + - name: subscriptions + description: 'Subscription rows used when active contract ARR is not present for a covered period.' + - name: contract_discount_terms + description: 'Contract discount terms that explain Q1 2026 enterprise contraction movement.' + - name: arr_movements + description: 'ARR movement ledger used by retention and expansion marts.' + - name: invoices + description: 'Billing invoices that anchor gross revenue recognition dates.' + - name: invoice_line_items + description: 'Invoice line items used to split gross revenue, credits, seats, usage, and addons.' + - name: refunds + description: 'Refund events that reduce net revenue in the refund month.' + - name: plan_segment_mapping + description: 'Effective-dated mapping from canonical plans and size bands to reporting segments.' + - name: users + description: 'Orbit user identities shared across warehouse, Slack, Looker, Notion, and Drive artifacts.' + - name: activation_events + description: 'Account and requester activation events across the January policy change.' + - name: sessions + description: 'Product sessions used for pre-policy activation and activity exclusions.' + - name: purchase_requests + description: 'Procurement request records used for activation, requester activity, and health signals.' + - name: approval_events + description: 'Approval decisions tied to procurement requests.' + - name: suppliers + description: 'Supplier directory records associated with procurement workflow events.' + - name: supplier_onboarding_events + description: 'Supplier onboarding milestones that qualify as procurement workflow activity.' + - name: purchase_orders + description: 'Purchase orders generated from approved procurement requests.' + - name: support_tickets + description: 'Customer support tickets that inform account health and risk.' + - name: account_owners + description: 'Effective-dated ownership assignments for account health, renewals, and escalation context.' diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/activation-policy-decision-record.md b/packages/cli/assets/demo/orbit/raw-sources/notion/activation-policy-decision-record.md new file mode 100644 index 00000000..adef4f2f --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/activation-policy-decision-record.md @@ -0,0 +1,49 @@ +--- +page_id: notion_page_activation_policy_decision +title: 'Activation Policy Decision Record' +owner_person_key: leo_martin +owner_team: growth +owner_notion_user_id: notion_user_0003 +status: current +created_time: 2026-01-10T14:00:00-08:00 +last_edited_time: 2026-02-18T11:10:00-08:00 +tags: + - growth + - activation + - policy +related_expected_answers: + - activation_after_policy_change +related_metric_keys: + - activated_accounts +anchors: + - notion://notion_page_activation_policy_decision#policy-change +--- + +# Activation Policy Decision Record + +Owner: Leo Martin (growth) + +## Policy Change +Anchor: notion://notion_page_activation_policy_decision#policy-change + +Before 2026-01-15, account activation means first requester login. + +On and after 2026-01-15, account activation means first approved purchase request plus at least three activated requesters. + +Activated requesters are non-internal, non-test requester users with either a qualifying session before the policy date or a qualifying procurement action after it. + +The governed comparison reports a 0.563 pre-policy 30-day activation rate and a 0.639 post-policy 30-day activation rate. + +## Pre-Change Definition +Anchor: notion://notion_page_activation_policy_decision#pre-change-definition + +## Post-Change Definition +Anchor: notion://notion_page_activation_policy_decision#post-change-definition + +## Dashboard Impact +Anchor: notion://notion_page_activation_policy_decision#dashboard-impact + +## Related Evidence + +- notion://notion_page_activation_policy_decision#policy-change +- expected-answer://activation_after_policy_change diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/analyst-onboarding.md b/packages/cli/assets/demo/orbit/raw-sources/notion/analyst-onboarding.md new file mode 100644 index 00000000..9acb1722 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/analyst-onboarding.md @@ -0,0 +1,35 @@ +--- +page_id: notion_page_analyst_onboarding +title: 'Analyst Onboarding' +owner_person_key: maya_chen +owner_team: analytics +owner_notion_user_id: notion_user_0001 +status: current +created_time: 2026-03-02T09:00:00-08:00 +last_edited_time: 2026-03-17T15:30:00-07:00 +parent_page_id: notion_page_analytics_team_handbook +tags: + - analytics + - onboarding +source_anchors: + - notion://notion_page_analyst_onboarding#first-week + - lookml://orbit/account_retention.view.lkml#measure=nrr + - slack://analytics-team/2026-03-31/1774942174.200142?thread_ts=1774942174.200142 +--- + +# Analyst Onboarding + +Owner: Maya Chen (analytics) + +## Operating Context +Anchor: notion://notion_page_analyst_onboarding#analyst-onboarding + +New analysts start with dbt://ktx_demo.mart_arr_daily and then review LookML field ownership. +Do not answer metric questions from raw tables when lookml://orbit/account_retention.view.lkml#measure=nrr or a governed mart exists. +Escalate unclear board-week requests in slack://analytics-team/2026-03-31/1774942174.200142?thread_ts=1774942174.200142. + +## Source Anchors + +- notion://notion_page_analyst_onboarding#first-week +- lookml://orbit/account_retention.view.lkml#measure=nrr +- slack://analytics-team/2026-03-31/1774942174.200142?thread_ts=1774942174.200142 diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/arr-and-contract-reporting-notes.md b/packages/cli/assets/demo/orbit/raw-sources/notion/arr-and-contract-reporting-notes.md new file mode 100644 index 00000000..c387b915 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/arr-and-contract-reporting-notes.md @@ -0,0 +1,64 @@ +--- +page_id: notion_page_arr_contract_reporting +title: 'ARR and Contract Reporting Notes' +owner_person_key: rina_patel +owner_team: finance +owner_notion_user_id: notion_user_0002 +status: current +created_time: 2025-11-20T10:30:00-08:00 +last_edited_time: 2026-03-26T15:05:00-07:00 +tags: + - finance + - arr + - contracts +related_expected_answers: + - arr_as_of_2026_03_31 + - active_requesters_last_week_large_contracts +related_metric_keys: + - arr + - contract_arr_band +anchors: + - notion://notion_page_arr_contract_reporting#arr-contract-first + - notion://notion_page_arr_contract_reporting#contract-arr-band +--- + +# ARR and Contract Reporting Notes + +Owner: Rina Patel (finance) + +## ARR Contract First +Anchor: notion://notion_page_arr_contract_reporting#arr-contract-first + +ARR uses active contract_arr_cents first when a contract covers the account and metric date. + +Recurring subscription MRR is annualized only for account periods without active contract ARR. + +Contract ARR banding uses active contract ARR as of 2026-03-31, including the contracts over 20000000 cents threshold. + +Booked ARR is not active ARR, and internal or test accounts are excluded from board reporting. + +## Contract ARR Band +Anchor: notion://notion_page_arr_contract_reporting#contract-arr-band + +ARR uses active contract_arr_cents first when a contract covers the account and metric date. + +Recurring subscription MRR is annualized only for account periods without active contract ARR. + +Contract ARR banding uses active contract ARR as of 2026-03-31, including the contracts over 20000000 cents threshold. + +## Booked ARR vs Active ARR +Anchor: notion://notion_page_arr_contract_reporting#booked-arr-vs-active-arr + +Booked ARR is not active ARR, and internal or test accounts are excluded from board reporting. + +## Internal and Test Exclusions +Anchor: notion://notion_page_arr_contract_reporting#internal-and-test-exclusions + +Booked ARR is not active ARR, and internal or test accounts are excluded from board reporting. + +## Related Evidence + +- notion://notion_page_arr_contract_reporting#arr-contract-first +- notion://notion_page_arr_contract_reporting#contract-arr-band +- expected-answer://arr_as_of_2026_03_31 +- expected-answer://active_requesters_last_week_large_contracts diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/customer-health-playbook.md b/packages/cli/assets/demo/orbit/raw-sources/notion/customer-health-playbook.md new file mode 100644 index 00000000..1fbf0700 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/customer-health-playbook.md @@ -0,0 +1,55 @@ +--- +page_id: notion_page_customer_health_playbook +title: 'Customer Health Playbook' +owner_person_key: priya_shah +owner_team: customer_success +owner_notion_user_id: notion_user_0005 +status: current +created_time: 2026-02-03T13:45:00-08:00 +last_edited_time: 2026-03-27T14:50:00-07:00 +tags: + - customer-success + - health + - risk +related_expected_answers: + - customer_health_risk_accounts +related_metric_keys: + - active_customers +anchors: + - notion://notion_page_customer_health_playbook#risk-definition +--- + +# Customer Health Playbook + +Owner: Priya Shah (customer_success) + +## Risk Definition +Anchor: notion://notion_page_customer_health_playbook#risk-definition + +Customer health combines support ticket severity and recent requisition or approval usage. + +Active customers must have an active paid subscription and at least one qualifying procurement action in the trailing 30-day window. + +High-risk accounts are reviewed for renewal action when severe open tickets coincide with falling workflow activity. + +As of 2026-03-31 the governed customer-health mart reports 9 high-risk accounts. + +## Support Signals +Anchor: notion://notion_page_customer_health_playbook#support-signals + +Customer health combines support ticket severity and recent requisition or approval usage. + +## Procurement Activity Signals +Anchor: notion://notion_page_customer_health_playbook#procurement-activity-signals + +Active customers must have an active paid subscription and at least one qualifying procurement action in the trailing 30-day window. + +## Renewal Review +Anchor: notion://notion_page_customer_health_playbook#renewal-review + +High-risk accounts are reviewed for renewal action when severe open tickets coincide with falling workflow activity. + +## Related Evidence + +- notion://notion_page_customer_health_playbook#risk-definition +- expected-answer://customer_health_risk_accounts diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/retention-and-nrr-definition-notes.md b/packages/cli/assets/demo/orbit/raw-sources/notion/retention-and-nrr-definition-notes.md new file mode 100644 index 00000000..aee01a60 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/retention-and-nrr-definition-notes.md @@ -0,0 +1,58 @@ +--- +page_id: notion_page_retention_policy_current +title: 'Retention and NRR Definition Notes' +owner_person_key: maya_chen +owner_team: analytics +owner_notion_user_id: notion_user_0001 +status: current +created_time: 2026-01-08T10:00:00-08:00 +last_edited_time: 2026-03-30T16:40:00-07:00 +tags: + - analytics + - retention + - board-reporting +related_expected_answers: + - enterprise_nrr_q1_vs_q4_breakout + - enterprise_expansions_q1_2026 +related_metric_keys: + - net_revenue_retention + - segment +anchors: + - notion://notion_page_retention_policy_current#nrr-definition + - notion://notion_page_retention_policy_current#discount-expiration-treatment +--- + +# Retention and NRR Definition Notes + +Owner: Maya Chen (analytics) + +## NRR Definition +Anchor: notion://notion_page_retention_policy_current#nrr-definition + +Enterprise NRR is calculated as (starting_arr + expansion_arr - contraction_arr - churned_arr) / starting_arr. + +Movement classification happens after child accounts roll up to parent_account_id. + +Reactivations within 30 days are excluded from NRR movement components and kept in audit columns. + +Q1 2026 discount expiration is contraction, not churn; the board-prep view calls out 11 enterprise parent accounts. + +## Parent-Account Grain +Anchor: notion://notion_page_retention_policy_current#parent-account-grain + +## Reactivation Exclusion +Anchor: notion://notion_page_retention_policy_current#reactivation-exclusion + +Reactivations within 30 days are excluded from NRR movement components and kept in audit columns. + +## Discount Expiration Treatment +Anchor: notion://notion_page_retention_policy_current#discount-expiration-treatment + +Q1 2026 discount expiration is contraction, not churn; the board-prep view calls out 11 enterprise parent accounts. + +## Related Evidence + +- notion://notion_page_retention_policy_current#nrr-definition +- notion://notion_page_retention_policy_current#discount-expiration-treatment +- expected-answer://enterprise_nrr_q1_vs_q4_breakout +- expected-answer://enterprise_expansions_q1_2026 diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/revenue-reporting-policy.md b/packages/cli/assets/demo/orbit/raw-sources/notion/revenue-reporting-policy.md new file mode 100644 index 00000000..59bd2beb --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/revenue-reporting-policy.md @@ -0,0 +1,64 @@ +--- +page_id: notion_page_revenue_reporting_policy +title: 'Revenue Reporting Policy' +owner_person_key: rina_patel +owner_team: finance +owner_notion_user_id: notion_user_0002 +status: current +created_time: 2025-12-12T09:30:00-08:00 +last_edited_time: 2026-03-28T13:15:00-07:00 +tags: + - finance + - revenue + - board-reporting +related_expected_answers: + - revenue_net_vs_gross_reconciliation +related_metric_keys: + - gross_revenue + - net_revenue +anchors: + - notion://notion_page_revenue_reporting_policy#gross-revenue + - notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation +--- + +# Revenue Reporting Policy + +Owner: Rina Patel (finance) + +## Gross Revenue +Anchor: notion://notion_page_revenue_reporting_policy#gross-revenue + +Gross revenue includes paid subscription, seat, usage, and addon invoice line items recognized on invoices.paid_at. + +Credit line items are negative in raw invoice lines and reported as absolute credits. + +Successful refunds reduce net revenue in the refund month based on refunds.refunded_at. + +For February 2026 the governed reconciliation is gross revenue 213000000 cents, credits 13400000 cents, refunds 31200000 cents, and net revenue 168400000 cents. + +## Credits +Anchor: notion://notion_page_revenue_reporting_policy#credits + +Credit line items are negative in raw invoice lines and reported as absolute credits. + +For February 2026 the governed reconciliation is gross revenue 213000000 cents, credits 13400000 cents, refunds 31200000 cents, and net revenue 168400000 cents. + +## Refunds +Anchor: notion://notion_page_revenue_reporting_policy#refunds + +Successful refunds reduce net revenue in the refund month based on refunds.refunded_at. + +For February 2026 the governed reconciliation is gross revenue 213000000 cents, credits 13400000 cents, refunds 31200000 cents, and net revenue 168400000 cents. + +## Gross To Net Reconciliation +Anchor: notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation + +Gross revenue includes paid subscription, seat, usage, and addon invoice line items recognized on invoices.paid_at. + +For February 2026 the governed reconciliation is gross revenue 213000000 cents, credits 13400000 cents, refunds 31200000 cents, and net revenue 168400000 cents. + +## Related Evidence + +- notion://notion_page_revenue_reporting_policy#gross-revenue +- notion://notion_page_revenue_reporting_policy#gross-to-net-reconciliation +- expected-answer://revenue_net_vs_gross_reconciliation diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/sales-ops-segmentation-guide.md b/packages/cli/assets/demo/orbit/raw-sources/notion/sales-ops-segmentation-guide.md new file mode 100644 index 00000000..825d52c2 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/sales-ops-segmentation-guide.md @@ -0,0 +1,58 @@ +--- +page_id: notion_page_sales_ops_segmentation +title: 'Sales Ops Segmentation Guide' +owner_person_key: jordan_lee +owner_team: sales_ops +owner_notion_user_id: notion_user_0004 +status: current +created_time: 2025-10-03T09:00:00-07:00 +last_edited_time: 2026-03-25T10:35:00-07:00 +tags: + - sales-ops + - segmentation + - plans +related_expected_answers: + - enterprise_nrr_q1_vs_q4_breakout + - enterprise_expansions_q1_2026 +related_metric_keys: + - segment + - net_revenue_retention +anchors: + - notion://notion_page_sales_ops_segmentation#growth-plan-normalization +--- + +# Sales Ops Segmentation Guide + +Owner: Jordan Lee (sales_ops) + +## Growth Plan Normalization +Anchor: notion://notion_page_sales_ops_segmentation#growth-plan-normalization + +The current plan language is starter, growth, and enterprise. + +Raw historical pro_plus values normalize to growth for current artifacts after 2025-10-01. + +Retention cohort membership is evaluated at quarter start unless the golden question states another as-of date. + +Segment membership changes are bridge items and are not silently classified as expansion or churn. + +## Segment Membership +Anchor: notion://notion_page_sales_ops_segmentation#segment-membership + +Segment membership changes are bridge items and are not silently classified as expansion or churn. + +## Quarter Start Cohorts +Anchor: notion://notion_page_sales_ops_segmentation#quarter-start-cohorts + +Retention cohort membership is evaluated at quarter start unless the golden question states another as-of date. + +## Historical Plan Alias +Anchor: notion://notion_page_sales_ops_segmentation#historical-plan-alias + +Raw historical pro_plus values normalize to growth for current artifacts after 2025-10-01. + +## Related Evidence + +- notion://notion_page_sales_ops_segmentation#growth-plan-normalization +- expected-answer://enterprise_nrr_q1_vs_q4_breakout +- expected-answer://enterprise_expansions_q1_2026 diff --git a/packages/cli/assets/demo/orbit/raw-sources/notion/support-escalation-runbook.md b/packages/cli/assets/demo/orbit/raw-sources/notion/support-escalation-runbook.md new file mode 100644 index 00000000..66914f0c --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/notion/support-escalation-runbook.md @@ -0,0 +1,38 @@ +--- +page_id: notion_page_support_escalation_runbook +title: 'Support Escalation Runbook' +owner_person_key: priya_shah +owner_team: customer_success +owner_notion_user_id: notion_user_0005 +status: current +created_time: 2026-03-06T09:00:00-08:00 +last_edited_time: 2026-03-21T15:30:00-07:00 +parent_page_id: notion_page_customer_health_playbook +tags: + - customer-success + - support + - risk +source_anchors: + - notion://notion_page_support_escalation_runbook#triage + - looker://dashboard/dash_customer_health_risk + - drive://drive_file_customer_health_scorecard_q1#high-risk-accounts + - slack://customer-success/2026-03-31/1774976400.000100?thread_ts=1774976400.000100 +--- + +# Support Escalation Runbook + +Owner: Priya Shah (customer_success) + +## Operating Context +Anchor: notion://notion_page_support_escalation_runbook#support-escalation-runbook + +High-risk review combines open support tickets with recent requisition and approval activity drops. +Use looker://dashboard/dash_customer_health_risk for the list and drive://drive_file_customer_health_scorecard_q1#high-risk-accounts for the scorecard. +Escalations are coordinated in slack://customer-success/2026-03-31/1774976400.000100?thread_ts=1774976400.000100. + +## Source Anchors + +- notion://notion_page_support_escalation_runbook#triage +- looker://dashboard/dash_customer_health_risk +- drive://drive_file_customer_health_scorecard_q1#high-risk-accounts +- slack://customer-success/2026-03-31/1774976400.000100?thread_ts=1774976400.000100 diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/accounts.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/accounts.csv new file mode 100644 index 00000000..61c3f285 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/accounts.csv @@ -0,0 +1,211 @@ +account_id,parent_account_id,account_name,domain,industry,sales_region,size_band,lifecycle_status,is_internal,is_test,created_at +acct_0001,parent_0001,Orbit Customer 001,customer-001.example.com,software,na,enterprise,active,false,false,2025-01-01T00:00:00Z +acct_0002,parent_0002,Orbit Customer 002,customer-002.example.com,manufacturing,emea,enterprise,active,false,false,2025-02-02T00:00:00Z +acct_0003,parent_0003,Orbit Customer 003,customer-003.example.com,healthcare,apac,enterprise,active,false,false,2025-03-03T00:00:00Z +acct_0004,parent_0004,Orbit Customer 004,customer-004.example.com,financial_services,na,enterprise,active,false,false,2025-04-04T00:00:00Z +acct_0005,parent_0005,Orbit Customer 005,customer-005.example.com,retail,emea,enterprise,active,false,false,2025-05-05T00:00:00Z +acct_0006,parent_0006,Orbit Customer 006,customer-006.example.com,software,apac,enterprise,active,false,false,2025-06-06T00:00:00Z +acct_0007,parent_0007,Orbit Customer 007,customer-007.example.com,manufacturing,na,enterprise,active,false,false,2025-07-07T00:00:00Z +acct_0008,parent_0008,Orbit Customer 008,customer-008.example.com,healthcare,emea,enterprise,active,false,false,2025-08-08T00:00:00Z +acct_0009,parent_0009,Orbit Customer 009,customer-009.example.com,financial_services,apac,enterprise,active,false,false,2025-09-09T00:00:00Z +acct_0010,parent_0010,Orbit Customer 010,customer-010.example.com,retail,na,enterprise,active,false,false,2025-10-10T00:00:00Z +acct_0011,parent_0011,Orbit Customer 011,customer-011.example.com,software,emea,enterprise,active,false,false,2025-11-11T00:00:00Z +acct_0012,parent_0012,Orbit Customer 012,customer-012.example.com,manufacturing,apac,enterprise,active,false,false,2025-12-12T00:00:00Z +acct_0013,parent_0013,Orbit Customer 013,customer-013.example.com,healthcare,na,enterprise,active,false,false,2025-01-13T00:00:00Z +acct_0014,parent_0014,Orbit Customer 014,customer-014.example.com,financial_services,emea,enterprise,active,false,false,2025-02-14T00:00:00Z +acct_0015,parent_0015,Orbit Customer 015,customer-015.example.com,retail,apac,enterprise,active,false,false,2025-03-15T00:00:00Z +acct_0016,parent_0016,Orbit Customer 016,customer-016.example.com,software,na,enterprise,active,false,false,2025-04-16T00:00:00Z +acct_0017,parent_0017,Orbit Customer 017,customer-017.example.com,manufacturing,emea,enterprise,active,false,false,2025-05-17T00:00:00Z +acct_0018,parent_0018,Orbit Customer 018,customer-018.example.com,healthcare,apac,enterprise,active,false,false,2025-06-18T00:00:00Z +acct_0019,parent_0019,Orbit Customer 019,customer-019.example.com,financial_services,na,enterprise,active,false,false,2025-07-19T00:00:00Z +acct_0020,parent_0020,Orbit Customer 020,customer-020.example.com,retail,emea,enterprise,active,false,false,2025-08-20T00:00:00Z +acct_0021,parent_0021,Orbit Customer 021,customer-021.example.com,software,apac,enterprise,active,false,false,2025-09-21T00:00:00Z +acct_0022,parent_0022,Orbit Customer 022,customer-022.example.com,manufacturing,na,enterprise,active,false,false,2025-10-22T00:00:00Z +acct_0023,parent_0023,Orbit Customer 023,customer-023.example.com,healthcare,emea,enterprise,active,false,false,2025-11-23T00:00:00Z +acct_0024,parent_0024,Orbit Customer 024,customer-024.example.com,financial_services,apac,enterprise,active,false,false,2025-12-24T00:00:00Z +acct_0025,parent_0025,Orbit Customer 025,customer-025.example.com,retail,na,enterprise,active,false,false,2025-01-25T00:00:00Z +acct_0026,parent_0026,Orbit Customer 026,customer-026.example.com,software,emea,enterprise,active,false,false,2025-02-26T00:00:00Z +acct_0027,parent_0027,Orbit Customer 027,customer-027.example.com,manufacturing,apac,enterprise,active,false,false,2025-03-27T00:00:00Z +acct_0028,parent_0028,Orbit Customer 028,customer-028.example.com,healthcare,na,enterprise,active,false,false,2025-04-28T00:00:00Z +acct_0029,parent_0029,Orbit Customer 029,customer-029.example.com,financial_services,emea,enterprise,active,false,false,2025-05-01T00:00:00Z +acct_0030,parent_0030,Orbit Customer 030,customer-030.example.com,retail,apac,enterprise,active,false,false,2025-06-02T00:00:00Z +acct_0031,parent_0031,Orbit Customer 031,customer-031.example.com,software,na,enterprise,active,false,false,2025-07-03T00:00:00Z +acct_0032,parent_0032,Orbit Customer 032,customer-032.example.com,manufacturing,emea,enterprise,active,false,false,2025-08-04T00:00:00Z +acct_0033,parent_0033,Orbit Customer 033,customer-033.example.com,healthcare,apac,enterprise,active,false,false,2025-09-05T00:00:00Z +acct_0034,parent_0034,Orbit Customer 034,customer-034.example.com,financial_services,na,enterprise,active,false,false,2025-10-06T00:00:00Z +acct_0035,parent_0035,Orbit Customer 035,customer-035.example.com,retail,emea,enterprise,active,false,false,2025-11-07T00:00:00Z +acct_0036,parent_0036,Orbit Customer 036,customer-036.example.com,software,apac,enterprise,active,false,false,2025-12-08T00:00:00Z +acct_0037,parent_0037,Orbit Customer 037,customer-037.example.com,manufacturing,na,enterprise,active,false,false,2025-01-09T00:00:00Z +acct_0038,parent_0038,Orbit Customer 038,customer-038.example.com,healthcare,emea,enterprise,active,false,false,2025-02-10T00:00:00Z +acct_0039,parent_0039,Orbit Customer 039,customer-039.example.com,financial_services,apac,enterprise,active,false,false,2025-03-11T00:00:00Z +acct_0040,parent_0040,Orbit Customer 040,customer-040.example.com,retail,na,enterprise,active,false,false,2025-04-12T00:00:00Z +acct_0041,parent_0041,Orbit Customer 041,customer-041.example.com,software,emea,enterprise,active,false,false,2025-05-13T00:00:00Z +acct_0042,parent_0042,Orbit Customer 042,customer-042.example.com,manufacturing,apac,enterprise,active,false,false,2025-06-14T00:00:00Z +acct_0043,parent_0043,Orbit Customer 043,customer-043.example.com,healthcare,na,enterprise,active,false,false,2025-07-15T00:00:00Z +acct_0044,parent_0044,Orbit Customer 044,customer-044.example.com,financial_services,emea,enterprise,active,false,false,2025-08-16T00:00:00Z +acct_0045,parent_0045,Orbit Customer 045,customer-045.example.com,retail,apac,enterprise,active,false,false,2025-09-17T00:00:00Z +acct_0046,parent_0046,Orbit Customer 046,customer-046.example.com,software,na,enterprise,active,false,false,2025-10-18T00:00:00Z +acct_0047,parent_0047,Orbit Customer 047,customer-047.example.com,manufacturing,emea,enterprise,active,false,false,2025-11-19T00:00:00Z +acct_0048,parent_0048,Orbit Customer 048,customer-048.example.com,healthcare,apac,enterprise,active,false,false,2025-12-20T00:00:00Z +acct_0049,parent_0049,Orbit Customer 049,customer-049.example.com,financial_services,na,enterprise,active,false,false,2025-01-21T00:00:00Z +acct_0050,parent_0050,Orbit Customer 050,customer-050.example.com,retail,emea,enterprise,active,false,false,2025-02-22T00:00:00Z +acct_0051,parent_0051,Orbit Customer 051,customer-051.example.com,software,apac,enterprise,active,false,false,2025-03-23T00:00:00Z +acct_0052,parent_0052,Orbit Customer 052,customer-052.example.com,manufacturing,na,enterprise,active,false,false,2025-04-24T00:00:00Z +acct_0053,parent_0053,Orbit Customer 053,customer-053.example.com,healthcare,emea,enterprise,active,false,false,2025-05-25T00:00:00Z +acct_0054,parent_0054,Orbit Customer 054,customer-054.example.com,financial_services,apac,enterprise,active,false,false,2025-06-26T00:00:00Z +acct_0055,parent_0055,Orbit Customer 055,customer-055.example.com,retail,na,enterprise,active,false,false,2025-07-27T00:00:00Z +acct_0056,parent_0056,Orbit Customer 056,customer-056.example.com,software,emea,enterprise,active,false,false,2025-08-28T00:00:00Z +acct_0057,parent_0057,Orbit Customer 057,customer-057.example.com,manufacturing,apac,enterprise,active,false,false,2025-09-01T00:00:00Z +acct_0058,parent_0058,Orbit Customer 058,customer-058.example.com,healthcare,na,enterprise,active,false,false,2025-10-02T00:00:00Z +acct_0059,parent_0059,Orbit Customer 059,customer-059.example.com,financial_services,emea,enterprise,active,false,false,2025-11-03T00:00:00Z +acct_0060,parent_0060,Orbit Customer 060,customer-060.example.com,retail,apac,enterprise,active,false,false,2025-12-04T00:00:00Z +acct_0061,parent_0061,Orbit Customer 061,customer-061.example.com,software,na,enterprise,active,false,false,2025-01-05T00:00:00Z +acct_0062,parent_0062,Orbit Customer 062,customer-062.example.com,manufacturing,emea,enterprise,active,false,false,2025-02-06T00:00:00Z +acct_0063,parent_0063,Orbit Customer 063,customer-063.example.com,healthcare,apac,enterprise,active,false,false,2025-03-07T00:00:00Z +acct_0064,parent_0064,Orbit Customer 064,customer-064.example.com,financial_services,na,enterprise,active,false,false,2025-04-08T00:00:00Z +acct_0065,parent_0065,Orbit Customer 065,customer-065.example.com,retail,emea,enterprise,active,false,false,2025-05-09T00:00:00Z +acct_0066,parent_0066,Orbit Customer 066,customer-066.example.com,software,apac,enterprise,active,false,false,2025-06-10T00:00:00Z +acct_0067,parent_0067,Orbit Customer 067,customer-067.example.com,manufacturing,na,enterprise,active,false,false,2025-07-11T00:00:00Z +acct_0068,parent_0068,Orbit Customer 068,customer-068.example.com,healthcare,emea,enterprise,active,false,false,2025-08-12T00:00:00Z +acct_0069,parent_0069,Orbit Customer 069,customer-069.example.com,financial_services,apac,enterprise,active,false,false,2025-09-13T00:00:00Z +acct_0070,parent_0070,Orbit Customer 070,customer-070.example.com,retail,na,enterprise,active,false,false,2025-10-14T00:00:00Z +acct_0071,parent_0071,Orbit Customer 071,customer-071.example.com,software,emea,enterprise,active,false,false,2025-11-15T00:00:00Z +acct_0072,parent_0072,Orbit Customer 072,customer-072.example.com,manufacturing,apac,enterprise,active,false,false,2025-12-16T00:00:00Z +acct_0073,parent_0073,Orbit Customer 073,customer-073.example.com,healthcare,na,enterprise,active,false,false,2025-01-17T00:00:00Z +acct_0074,parent_0074,Orbit Customer 074,customer-074.example.com,financial_services,emea,enterprise,active,false,false,2025-02-18T00:00:00Z +acct_0075,parent_0075,Orbit Customer 075,customer-075.example.com,retail,apac,enterprise,active,false,false,2025-03-19T00:00:00Z +acct_0076,parent_0076,Orbit Customer 076,customer-076.example.com,software,na,enterprise,active,false,false,2025-04-20T00:00:00Z +acct_0077,parent_0077,Orbit Customer 077,customer-077.example.com,manufacturing,emea,enterprise,active,false,false,2025-05-21T00:00:00Z +acct_0078,parent_0078,Orbit Customer 078,customer-078.example.com,healthcare,apac,enterprise,active,false,false,2025-06-22T00:00:00Z +acct_0079,parent_0079,Orbit Customer 079,customer-079.example.com,financial_services,na,enterprise,active,false,false,2025-07-23T00:00:00Z +acct_0080,parent_0080,Orbit Customer 080,customer-080.example.com,retail,emea,enterprise,active,false,false,2025-08-24T00:00:00Z +acct_0081,parent_0081,Orbit Customer 081,customer-081.example.com,software,apac,mid_market,active,false,false,2025-09-25T00:00:00Z +acct_0082,parent_0082,Orbit Customer 082,customer-082.example.com,manufacturing,na,mid_market,active,false,false,2025-10-26T00:00:00Z +acct_0083,parent_0083,Orbit Customer 083,customer-083.example.com,healthcare,emea,mid_market,active,false,false,2025-11-27T00:00:00Z +acct_0084,parent_0084,Orbit Customer 084,customer-084.example.com,financial_services,apac,mid_market,active,false,false,2025-12-28T00:00:00Z +acct_0085,parent_0085,Orbit Customer 085,customer-085.example.com,retail,na,mid_market,active,false,false,2025-01-01T00:00:00Z +acct_0086,parent_0086,Orbit Customer 086,customer-086.example.com,software,emea,mid_market,active,false,false,2025-02-02T00:00:00Z +acct_0087,parent_0087,Orbit Customer 087,customer-087.example.com,manufacturing,apac,mid_market,active,false,false,2025-03-03T00:00:00Z +acct_0088,parent_0088,Orbit Customer 088,customer-088.example.com,healthcare,na,mid_market,active,false,false,2025-04-04T00:00:00Z +acct_0089,parent_0089,Orbit Customer 089,customer-089.example.com,financial_services,emea,mid_market,active,false,false,2025-05-05T00:00:00Z +acct_0090,parent_0090,Orbit Customer 090,customer-090.example.com,retail,apac,mid_market,active,false,false,2025-06-06T00:00:00Z +acct_0091,parent_0091,Orbit Customer 091,customer-091.example.com,software,na,mid_market,active,false,false,2025-07-07T00:00:00Z +acct_0092,parent_0092,Orbit Customer 092,customer-092.example.com,manufacturing,emea,mid_market,active,false,false,2025-08-08T00:00:00Z +acct_0093,parent_0093,Orbit Customer 093,customer-093.example.com,healthcare,apac,mid_market,active,false,false,2025-09-09T00:00:00Z +acct_0094,parent_0094,Orbit Customer 094,customer-094.example.com,financial_services,na,mid_market,active,false,false,2025-10-10T00:00:00Z +acct_0095,parent_0095,Orbit Customer 095,customer-095.example.com,retail,emea,mid_market,active,false,false,2025-11-11T00:00:00Z +acct_0096,parent_0096,Orbit Customer 096,customer-096.example.com,software,apac,mid_market,active,false,false,2025-12-12T00:00:00Z +acct_0097,parent_0097,Orbit Customer 097,customer-097.example.com,manufacturing,na,mid_market,active,false,false,2025-01-13T00:00:00Z +acct_0098,parent_0098,Orbit Customer 098,customer-098.example.com,healthcare,emea,mid_market,active,false,false,2025-02-14T00:00:00Z +acct_0099,parent_0099,Orbit Customer 099,customer-099.example.com,financial_services,apac,mid_market,active,false,false,2025-03-15T00:00:00Z +acct_0100,parent_0100,Orbit Customer 100,customer-100.example.com,retail,na,mid_market,active,false,false,2025-04-16T00:00:00Z +acct_0101,parent_0101,Orbit Customer 101,customer-101.example.com,software,emea,mid_market,active,false,false,2025-05-17T00:00:00Z +acct_0102,parent_0102,Orbit Customer 102,customer-102.example.com,manufacturing,apac,mid_market,active,false,false,2025-06-18T00:00:00Z +acct_0103,parent_0103,Orbit Customer 103,customer-103.example.com,healthcare,na,mid_market,active,false,false,2025-07-19T00:00:00Z +acct_0104,parent_0104,Orbit Customer 104,customer-104.example.com,financial_services,emea,mid_market,active,false,false,2025-08-20T00:00:00Z +acct_0105,parent_0105,Orbit Customer 105,customer-105.example.com,retail,apac,mid_market,active,false,false,2025-09-21T00:00:00Z +acct_0106,parent_0106,Orbit Customer 106,customer-106.example.com,software,na,mid_market,active,false,false,2025-10-22T00:00:00Z +acct_0107,parent_0107,Orbit Customer 107,customer-107.example.com,manufacturing,emea,mid_market,active,false,false,2025-11-23T00:00:00Z +acct_0108,parent_0108,Orbit Customer 108,customer-108.example.com,healthcare,apac,mid_market,active,false,false,2025-12-24T00:00:00Z +acct_0109,parent_0109,Orbit Customer 109,customer-109.example.com,financial_services,na,mid_market,active,false,false,2025-01-25T00:00:00Z +acct_0110,parent_0110,Orbit Customer 110,customer-110.example.com,retail,emea,mid_market,active,false,false,2025-02-26T00:00:00Z +acct_0111,parent_0111,Orbit Customer 111,customer-111.example.com,software,apac,mid_market,active,false,false,2025-03-27T00:00:00Z +acct_0112,parent_0112,Orbit Customer 112,customer-112.example.com,manufacturing,na,mid_market,active,false,false,2025-04-28T00:00:00Z +acct_0113,parent_0113,Orbit Customer 113,customer-113.example.com,healthcare,emea,mid_market,active,false,false,2025-05-01T00:00:00Z +acct_0114,parent_0114,Orbit Customer 114,customer-114.example.com,financial_services,apac,mid_market,active,false,false,2025-06-02T00:00:00Z +acct_0115,parent_0115,Orbit Customer 115,customer-115.example.com,retail,na,mid_market,active,false,false,2025-07-03T00:00:00Z +acct_0116,parent_0116,Orbit Customer 116,customer-116.example.com,software,emea,mid_market,active,false,false,2025-08-04T00:00:00Z +acct_0117,parent_0117,Orbit Customer 117,customer-117.example.com,manufacturing,apac,mid_market,active,false,false,2025-09-05T00:00:00Z +acct_0118,parent_0118,Orbit Customer 118,customer-118.example.com,healthcare,na,mid_market,active,false,false,2025-10-06T00:00:00Z +acct_0119,parent_0119,Orbit Customer 119,customer-119.example.com,financial_services,emea,mid_market,active,false,false,2025-11-07T00:00:00Z +acct_0120,parent_0120,Orbit Customer 120,customer-120.example.com,retail,apac,mid_market,active,false,false,2025-12-08T00:00:00Z +acct_0121,parent_0121,Orbit Customer 121,customer-121.example.com,software,na,mid_market,active,false,false,2025-01-09T00:00:00Z +acct_0122,parent_0122,Orbit Customer 122,customer-122.example.com,manufacturing,emea,mid_market,active,false,false,2025-02-10T00:00:00Z +acct_0123,parent_0123,Orbit Customer 123,customer-123.example.com,healthcare,apac,mid_market,active,false,false,2025-03-11T00:00:00Z +acct_0124,parent_0124,Orbit Customer 124,customer-124.example.com,financial_services,na,mid_market,active,false,false,2025-04-12T00:00:00Z +acct_0125,parent_0125,Orbit Customer 125,customer-125.example.com,retail,emea,mid_market,active,false,false,2025-05-13T00:00:00Z +acct_0126,parent_0126,Orbit Customer 126,customer-126.example.com,software,apac,mid_market,active,false,false,2025-06-14T00:00:00Z +acct_0127,parent_0127,Orbit Customer 127,customer-127.example.com,manufacturing,na,mid_market,active,false,false,2025-07-15T00:00:00Z +acct_0128,parent_0128,Orbit Customer 128,customer-128.example.com,healthcare,emea,mid_market,active,false,false,2025-08-16T00:00:00Z +acct_0129,parent_0129,Orbit Customer 129,customer-129.example.com,financial_services,apac,mid_market,active,false,false,2025-09-17T00:00:00Z +acct_0130,parent_0130,Orbit Customer 130,customer-130.example.com,retail,na,mid_market,active,false,false,2025-10-18T00:00:00Z +acct_0131,parent_0131,Orbit Customer 131,customer-131.example.com,software,emea,mid_market,active,false,false,2025-11-19T00:00:00Z +acct_0132,parent_0132,Orbit Customer 132,customer-132.example.com,manufacturing,apac,mid_market,active,false,false,2025-12-20T00:00:00Z +acct_0133,parent_0001,Orbit Customer 133,customer-133.example.com,healthcare,na,mid_market,active,false,false,2025-01-21T00:00:00Z +acct_0134,parent_0002,Orbit Customer 134,customer-134.example.com,financial_services,emea,mid_market,active,false,false,2025-02-22T00:00:00Z +acct_0135,parent_0003,Orbit Customer 135,customer-135.example.com,retail,apac,mid_market,active,false,false,2025-03-23T00:00:00Z +acct_0136,parent_0004,Orbit Customer 136,customer-136.example.com,software,na,mid_market,active,false,false,2025-04-24T00:00:00Z +acct_0137,parent_0005,Orbit Customer 137,customer-137.example.com,manufacturing,emea,mid_market,active,false,false,2025-05-25T00:00:00Z +acct_0138,parent_0006,Orbit Customer 138,customer-138.example.com,healthcare,apac,mid_market,active,false,false,2025-06-26T00:00:00Z +acct_0139,parent_0007,Orbit Customer 139,customer-139.example.com,financial_services,na,mid_market,active,false,false,2025-07-27T00:00:00Z +acct_0140,parent_0008,Orbit Customer 140,customer-140.example.com,retail,emea,mid_market,active,false,false,2025-08-28T00:00:00Z +acct_0141,parent_0009,Orbit Customer 141,customer-141.example.com,software,apac,mid_market,active,false,false,2025-09-01T00:00:00Z +acct_0142,parent_0010,Orbit Customer 142,customer-142.example.com,manufacturing,na,mid_market,active,false,false,2025-10-02T00:00:00Z +acct_0143,parent_0011,Orbit Customer 143,customer-143.example.com,healthcare,emea,mid_market,active,false,false,2025-11-03T00:00:00Z +acct_0144,parent_0012,Orbit Customer 144,customer-144.example.com,financial_services,apac,mid_market,active,false,false,2025-12-04T00:00:00Z +acct_0145,parent_0013,Orbit Customer 145,customer-145.example.com,retail,na,mid_market,active,false,false,2025-01-05T00:00:00Z +acct_0146,parent_0014,Orbit Customer 146,customer-146.example.com,software,emea,mid_market,active,false,false,2025-02-06T00:00:00Z +acct_0147,parent_0015,Orbit Customer 147,customer-147.example.com,manufacturing,apac,mid_market,active,false,false,2025-03-07T00:00:00Z +acct_0148,parent_0016,Orbit Customer 148,customer-148.example.com,healthcare,na,mid_market,active,false,false,2025-04-08T00:00:00Z +acct_0149,parent_0017,Orbit Customer 149,customer-149.example.com,financial_services,emea,mid_market,active,false,false,2025-05-09T00:00:00Z +acct_0150,parent_0018,Orbit Customer 150,customer-150.example.com,retail,apac,mid_market,active,false,false,2025-06-10T00:00:00Z +acct_0151,parent_0019,Orbit Customer 151,customer-151.example.com,software,na,smb,active,false,false,2025-07-11T00:00:00Z +acct_0152,parent_0020,Orbit Customer 152,customer-152.example.com,manufacturing,emea,smb,active,false,false,2025-08-12T00:00:00Z +acct_0153,parent_0021,Orbit Customer 153,customer-153.example.com,healthcare,apac,smb,active,false,false,2025-09-13T00:00:00Z +acct_0154,parent_0022,Orbit Customer 154,customer-154.example.com,financial_services,na,smb,active,false,false,2025-10-14T00:00:00Z +acct_0155,parent_0023,Orbit Customer 155,customer-155.example.com,retail,emea,smb,active,false,false,2025-11-15T00:00:00Z +acct_0156,parent_0024,Orbit Customer 156,customer-156.example.com,software,apac,smb,active,false,false,2025-12-16T00:00:00Z +acct_0157,parent_0025,Orbit Customer 157,customer-157.example.com,manufacturing,na,smb,active,false,false,2025-01-17T00:00:00Z +acct_0158,parent_0026,Orbit Customer 158,customer-158.example.com,healthcare,emea,smb,active,false,false,2025-02-18T00:00:00Z +acct_0159,parent_0027,Orbit Customer 159,customer-159.example.com,financial_services,apac,smb,active,false,false,2025-03-19T00:00:00Z +acct_0160,parent_0028,Orbit Customer 160,customer-160.example.com,retail,na,smb,active,false,false,2025-04-20T00:00:00Z +acct_0161,parent_0029,Orbit Customer 161,customer-161.example.com,software,emea,smb,active,false,false,2025-05-21T00:00:00Z +acct_0162,parent_0030,Orbit Customer 162,customer-162.example.com,manufacturing,apac,smb,active,false,false,2025-06-22T00:00:00Z +acct_0163,parent_0031,Orbit Customer 163,customer-163.example.com,healthcare,na,smb,active,false,false,2025-07-23T00:00:00Z +acct_0164,parent_0032,Orbit Customer 164,customer-164.example.com,financial_services,emea,smb,active,false,false,2025-08-24T00:00:00Z +acct_0165,parent_0033,Orbit Customer 165,customer-165.example.com,retail,apac,smb,active,false,false,2025-09-25T00:00:00Z +acct_0166,parent_0034,Orbit Customer 166,customer-166.example.com,software,na,smb,active,false,false,2025-10-26T00:00:00Z +acct_0167,parent_0035,Orbit Customer 167,customer-167.example.com,manufacturing,emea,smb,active,false,false,2025-11-27T00:00:00Z +acct_0168,parent_0036,Orbit Customer 168,customer-168.example.com,healthcare,apac,smb,active,false,false,2025-12-28T00:00:00Z +acct_0169,parent_0037,Orbit Customer 169,customer-169.example.com,financial_services,na,smb,active,false,false,2025-01-01T00:00:00Z +acct_0170,parent_0038,Orbit Customer 170,customer-170.example.com,retail,emea,smb,active,false,false,2025-02-02T00:00:00Z +acct_0171,parent_0039,Orbit Customer 171,customer-171.example.com,software,apac,smb,active,false,false,2025-03-03T00:00:00Z +acct_0172,parent_0040,Orbit Customer 172,customer-172.example.com,manufacturing,na,smb,active,false,false,2025-04-04T00:00:00Z +acct_0173,parent_0041,Orbit Customer 173,customer-173.example.com,healthcare,emea,smb,active,false,false,2025-05-05T00:00:00Z +acct_0174,parent_0042,Orbit Customer 174,customer-174.example.com,financial_services,apac,smb,active,false,false,2025-06-06T00:00:00Z +acct_0175,parent_0043,Orbit Customer 175,customer-175.example.com,retail,na,smb,active,false,false,2025-07-07T00:00:00Z +acct_0176,parent_0044,Orbit Customer 176,customer-176.example.com,software,emea,smb,active,false,false,2025-08-08T00:00:00Z +acct_0177,parent_0045,Orbit Customer 177,customer-177.example.com,manufacturing,apac,smb,active,false,false,2025-09-09T00:00:00Z +acct_0178,parent_0046,Orbit Customer 178,customer-178.example.com,healthcare,na,smb,active,false,false,2025-10-10T00:00:00Z +acct_0179,parent_0047,Orbit Customer 179,customer-179.example.com,financial_services,emea,smb,active,false,false,2025-11-11T00:00:00Z +acct_0180,parent_0048,Orbit Customer 180,customer-180.example.com,retail,apac,smb,active,false,false,2025-12-12T00:00:00Z +acct_0181,parent_0049,Orbit Customer 181,customer-181.example.com,software,na,smb,active,false,false,2025-01-13T00:00:00Z +acct_0182,parent_0050,Orbit Customer 182,customer-182.example.com,manufacturing,emea,smb,active,false,false,2025-02-14T00:00:00Z +acct_0183,parent_0051,Orbit Customer 183,customer-183.example.com,healthcare,apac,smb,active,false,false,2025-03-15T00:00:00Z +acct_0184,parent_0052,Orbit Customer 184,customer-184.example.com,financial_services,na,smb,active,false,false,2025-04-16T00:00:00Z +acct_0185,parent_0053,Orbit Customer 185,customer-185.example.com,retail,emea,smb,active,false,false,2025-05-17T00:00:00Z +acct_0186,parent_0054,Orbit Customer 186,customer-186.example.com,software,apac,smb,active,false,false,2025-06-18T00:00:00Z +acct_0187,parent_0055,Orbit Customer 187,customer-187.example.com,manufacturing,na,smb,active,false,false,2025-07-19T00:00:00Z +acct_0188,parent_0056,Orbit Customer 188,customer-188.example.com,healthcare,emea,smb,active,false,false,2025-08-20T00:00:00Z +acct_0189,parent_0057,Orbit Customer 189,customer-189.example.com,financial_services,apac,smb,active,false,false,2025-09-21T00:00:00Z +acct_0190,parent_0058,Orbit Customer 190,customer-190.example.com,retail,na,smb,active,false,false,2025-10-22T00:00:00Z +acct_0191,parent_0059,Orbit Customer 191,customer-191.example.com,software,emea,smb,active,false,false,2025-11-23T00:00:00Z +acct_0192,parent_0060,Orbit Customer 192,customer-192.example.com,manufacturing,apac,smb,active,false,false,2025-12-24T00:00:00Z +acct_0193,parent_0061,Orbit Customer 193,customer-193.example.com,healthcare,na,smb,active,false,false,2025-01-25T00:00:00Z +acct_0194,parent_0062,Orbit Customer 194,customer-194.example.com,financial_services,emea,smb,active,false,false,2025-02-26T00:00:00Z +acct_0195,parent_0063,Orbit Customer 195,customer-195.example.com,retail,apac,smb,active,false,false,2025-03-27T00:00:00Z +acct_0196,parent_0064,Orbit Customer 196,customer-196.example.com,software,na,smb,active,false,false,2025-04-28T00:00:00Z +acct_0197,parent_0065,Orbit Customer 197,customer-197.example.com,manufacturing,emea,smb,active,false,false,2025-05-01T00:00:00Z +acct_0198,parent_0066,Orbit Customer 198,customer-198.example.com,healthcare,apac,smb,active,false,false,2025-06-02T00:00:00Z +acct_0199,parent_0067,Orbit Customer 199,customer-199.example.com,financial_services,na,smb,churned,false,false,2025-07-03T00:00:00Z +acct_0200,parent_0068,Orbit Customer 200,customer-200.example.com,retail,emea,smb,churned,false,false,2025-08-04T00:00:00Z +acct_0201,parent_0069,Orbit Customer 201,customer-201.example.com,software,apac,smb,internal,true,false,2025-09-05T00:00:00Z +acct_0202,parent_0070,Orbit Customer 202,customer-202.example.com,manufacturing,na,smb,internal,true,false,2025-10-06T00:00:00Z +acct_0203,parent_0071,Orbit Customer 203,customer-203.example.com,healthcare,emea,smb,internal,true,false,2025-11-07T00:00:00Z +acct_0204,parent_0072,Orbit Customer 204,customer-204.example.com,financial_services,apac,smb,internal,true,false,2025-12-08T00:00:00Z +acct_0205,parent_0073,Orbit Customer 205,customer-205.example.com,retail,na,smb,internal,true,false,2025-01-09T00:00:00Z +acct_0206,parent_0074,Orbit Customer 206,customer-206.example.com,software,emea,smb,test,false,true,2025-02-10T00:00:00Z +acct_0207,parent_0075,Orbit Customer 207,customer-207.example.com,manufacturing,apac,smb,test,false,true,2025-03-11T00:00:00Z +acct_0208,parent_0076,Orbit Customer 208,customer-208.example.com,healthcare,na,smb,test,false,true,2025-04-12T00:00:00Z +acct_0209,parent_0077,Orbit Customer 209,customer-209.example.com,financial_services,emea,smb,test,false,true,2025-05-13T00:00:00Z +acct_0210,parent_0078,Orbit Customer 210,customer-210.example.com,retail,apac,smb,test,false,true,2025-06-14T00:00:00Z diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/arr_movements.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/arr_movements.csv new file mode 100644 index 00000000..e0f61327 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/arr_movements.csv @@ -0,0 +1,721 @@ +arr_movement_id,account_id,parent_account_id,contract_id,movement_date,movement_type,movement_reason,arr_delta_cents,starting_arr_cents,ending_arr_cents +arr_move_0001,acct_0001,parent_0001,contract_0001,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0002,acct_0002,parent_0002,contract_0002,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0003,acct_0003,parent_0003,contract_0003,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0004,acct_0004,parent_0004,contract_0004,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0005,acct_0005,parent_0005,contract_0005,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0006,acct_0006,parent_0006,contract_0006,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0007,acct_0007,parent_0007,contract_0007,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0008,acct_0008,parent_0008,contract_0008,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0009,acct_0009,parent_0009,contract_0009,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0010,acct_0010,parent_0010,contract_0010,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0011,acct_0011,parent_0011,contract_0011,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0012,acct_0012,parent_0012,contract_0012,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0013,acct_0013,parent_0013,contract_0013,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0014,acct_0014,parent_0014,contract_0014,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0015,acct_0015,parent_0015,contract_0015,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0016,acct_0016,parent_0016,contract_0016,2026-02-15,expansion,seat_growth,4500000,30000000,34500000 +arr_move_0017,acct_0017,parent_0017,contract_0017,2026-02-15,expansion,seat_growth,6000000,70000000,76000000 +arr_move_0018,acct_0018,parent_0018,contract_0018,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0019,acct_0019,parent_0019,contract_0019,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0020,acct_0020,parent_0020,contract_0020,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0021,acct_0021,parent_0021,contract_0021,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0022,acct_0022,parent_0022,contract_0022,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0023,acct_0023,parent_0023,contract_0023,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0024,acct_0024,parent_0024,contract_0024,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0025,acct_0025,parent_0025,contract_0025,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0026,acct_0026,parent_0026,contract_0026,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0027,acct_0027,parent_0027,contract_0027,2026-02-20,contraction,discount_expiration,-4500000,50000000,45500000 +arr_move_0028,acct_0028,parent_0028,contract_0028,2026-02-20,contraction,discount_expiration,-5500000,100000000,94500000 +arr_move_0029,acct_0029,parent_0029,contract_0029,2026-03-10,churn,budget_loss,-5000000,100000000,95000000 +arr_move_0030,acct_0030,parent_0030,contract_0030,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0031,acct_0031,parent_0031,contract_0031,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0032,acct_0032,parent_0032,contract_0032,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0033,acct_0033,parent_0033,contract_0033,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0034,acct_0034,parent_0034,contract_0034,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0035,acct_0035,parent_0035,contract_0035,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0036,acct_0036,parent_0036,contract_0036,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0037,acct_0037,parent_0037,contract_0037,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0038,acct_0038,parent_0038,contract_0038,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0039,acct_0039,parent_0039,contract_0039,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0040,acct_0040,parent_0040,contract_0040,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0041,acct_0041,parent_0041,contract_0041,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0042,acct_0042,parent_0042,contract_0042,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0043,acct_0043,parent_0043,contract_0043,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0044,acct_0044,parent_0044,contract_0044,2025-11-15,expansion,seat_growth,5000000,50000000,55000000 +arr_move_0045,acct_0045,parent_0045,contract_0045,2025-11-15,expansion,seat_growth,11800000,50000000,61800000 +arr_move_0046,acct_0046,parent_0046,contract_0046,2025-11-20,contraction,scope_reduction,-2500000,100000000,97500000 +arr_move_0047,acct_0047,parent_0047,contract_0047,2025-11-20,contraction,scope_reduction,-2500000,100000000,97500000 +arr_move_0048,acct_0048,parent_0048,contract_0048,2025-11-20,contraction,scope_reduction,-2500000,100000000,97500000 +arr_move_0049,acct_0049,parent_0049,contract_0049,2025-11-20,contraction,scope_reduction,-2500000,100000000,97500000 +arr_move_0050,acct_0090,parent_0090,contract_0101,2025-06-15,new,generated_history,360000,3200000,3560000 +arr_move_0051,acct_0091,parent_0091,contract_0102,2025-06-15,expansion,generated_history,370000,3300000,3670000 +arr_move_0052,acct_0092,parent_0092,contract_0103,2025-06-15,contraction,generated_history,-250000,3400000,3150000 +arr_move_0053,acct_0093,parent_0093,contract_0104,2025-06-15,reactivation,generated_history,260000,3500000,3760000 +arr_move_0054,acct_0094,parent_0094,contract_0105,2025-06-15,new,generated_history,270000,3600000,3870000 +arr_move_0055,acct_0095,parent_0095,contract_0106,2025-06-15,expansion,generated_history,280000,3700000,3980000 +arr_move_0056,acct_0096,parent_0096,contract_0107,2025-06-15,contraction,generated_history,-290000,3800000,3510000 +arr_move_0057,acct_0097,parent_0097,contract_0108,2025-06-15,reactivation,generated_history,300000,2000000,2300000 +arr_move_0058,acct_0098,parent_0098,contract_0109,2025-06-15,new,generated_history,310000,2100000,2410000 +arr_move_0059,acct_0099,parent_0099,contract_0110,2025-06-15,expansion,generated_history,320000,2200000,2520000 +arr_move_0060,acct_0100,parent_0100,contract_0111,2025-06-15,contraction,generated_history,-330000,2300000,1970000 +arr_move_0061,acct_0101,parent_0101,contract_0112,2025-06-15,reactivation,generated_history,340000,2400000,2740000 +arr_move_0062,acct_0102,parent_0102,contract_0113,2025-06-15,new,generated_history,350000,2500000,2850000 +arr_move_0063,acct_0103,parent_0103,contract_0114,2025-06-15,expansion,generated_history,360000,2600000,2960000 +arr_move_0064,acct_0104,parent_0104,contract_0115,2025-06-15,contraction,generated_history,-370000,2700000,2330000 +arr_move_0065,acct_0105,parent_0105,contract_0116,2025-06-15,reactivation,generated_history,250000,2800000,3050000 +arr_move_0066,acct_0106,parent_0106,contract_0117,2025-06-15,new,generated_history,260000,2900000,3160000 +arr_move_0067,acct_0107,parent_0107,contract_0118,2025-06-15,expansion,generated_history,270000,3000000,3270000 +arr_move_0068,acct_0108,parent_0108,contract_0119,2025-06-15,contraction,generated_history,-280000,3100000,2820000 +arr_move_0069,acct_0109,parent_0109,contract_0120,2025-06-15,reactivation,generated_history,290000,3200000,3490000 +arr_move_0070,acct_0110,parent_0110,contract_0121,2025-06-15,new,generated_history,300000,3300000,3600000 +arr_move_0071,acct_0111,parent_0111,contract_0122,2025-06-15,expansion,generated_history,310000,3400000,3710000 +arr_move_0072,acct_0112,parent_0112,contract_0123,2025-06-15,contraction,generated_history,-320000,3500000,3180000 +arr_move_0073,acct_0113,parent_0113,contract_0124,2025-06-15,reactivation,generated_history,330000,3600000,3930000 +arr_move_0074,acct_0114,parent_0114,contract_0125,2025-06-15,new,generated_history,340000,3700000,4040000 +arr_move_0075,acct_0115,parent_0115,contract_0126,2025-06-15,expansion,generated_history,350000,3800000,4150000 +arr_move_0076,acct_0116,parent_0116,contract_0127,2025-06-15,contraction,generated_history,-360000,2000000,1640000 +arr_move_0077,acct_0117,parent_0117,contract_0128,2025-06-15,reactivation,generated_history,370000,2100000,2470000 +arr_move_0078,acct_0118,parent_0118,contract_0129,2025-06-15,new,generated_history,250000,2200000,2450000 +arr_move_0079,acct_0119,parent_0119,contract_0130,2025-06-15,expansion,generated_history,260000,2300000,2560000 +arr_move_0080,acct_0120,parent_0120,contract_0131,2025-06-15,contraction,generated_history,-270000,2400000,2130000 +arr_move_0081,acct_0121,parent_0121,contract_0132,2025-06-15,reactivation,generated_history,280000,2500000,2780000 +arr_move_0082,acct_0122,parent_0122,contract_0133,2025-06-15,new,generated_history,290000,2600000,2890000 +arr_move_0083,acct_0123,parent_0123,contract_0134,2025-06-15,expansion,generated_history,300000,2700000,3000000 +arr_move_0084,acct_0124,parent_0124,contract_0135,2025-06-15,contraction,generated_history,-310000,2800000,2490000 +arr_move_0085,acct_0125,parent_0125,contract_0136,2025-06-15,reactivation,generated_history,320000,2900000,3220000 +arr_move_0086,acct_0126,parent_0126,contract_0137,2025-06-15,new,generated_history,330000,3000000,3330000 +arr_move_0087,acct_0127,parent_0127,contract_0138,2025-06-15,expansion,generated_history,340000,3100000,3440000 +arr_move_0088,acct_0128,parent_0128,contract_0139,2025-06-15,contraction,generated_history,-350000,3200000,2850000 +arr_move_0089,acct_0129,parent_0129,contract_0140,2025-06-15,reactivation,generated_history,360000,3300000,3660000 +arr_move_0090,acct_0130,parent_0130,contract_0141,2025-06-15,new,generated_history,370000,3400000,3770000 +arr_move_0091,acct_0131,parent_0131,contract_0142,2025-06-15,expansion,generated_history,250000,3500000,3750000 +arr_move_0092,acct_0132,parent_0132,contract_0143,2025-06-15,contraction,generated_history,-260000,3600000,3340000 +arr_move_0093,acct_0133,parent_0001,contract_0144,2025-06-15,reactivation,generated_history,270000,3700000,3970000 +arr_move_0094,acct_0134,parent_0002,contract_0145,2025-06-15,new,generated_history,280000,3800000,4080000 +arr_move_0095,acct_0135,parent_0003,contract_0146,2025-06-15,expansion,generated_history,290000,2000000,2290000 +arr_move_0096,acct_0136,parent_0004,contract_0147,2025-06-15,contraction,generated_history,-300000,2100000,1800000 +arr_move_0097,acct_0137,parent_0005,contract_0148,2025-06-15,reactivation,generated_history,310000,2200000,2510000 +arr_move_0098,acct_0138,parent_0006,contract_0149,2025-06-15,new,generated_history,320000,2300000,2620000 +arr_move_0099,acct_0139,parent_0007,contract_0150,2025-06-15,expansion,generated_history,330000,2400000,2730000 +arr_move_0100,acct_0140,parent_0008,contract_0151,2025-06-15,contraction,generated_history,-340000,2500000,2160000 +arr_move_0101,acct_0141,parent_0009,contract_0152,2025-06-15,reactivation,generated_history,350000,2600000,2950000 +arr_move_0102,acct_0142,parent_0010,contract_0153,2025-06-15,new,generated_history,360000,2700000,3060000 +arr_move_0103,acct_0143,parent_0011,contract_0154,2025-06-15,expansion,generated_history,370000,2800000,3170000 +arr_move_0104,acct_0144,parent_0012,contract_0155,2025-06-15,contraction,generated_history,-250000,2900000,2650000 +arr_move_0105,acct_0145,parent_0013,contract_0156,2025-06-15,reactivation,generated_history,260000,3000000,3260000 +arr_move_0106,acct_0146,parent_0014,contract_0157,2025-06-15,new,generated_history,270000,3100000,3370000 +arr_move_0107,acct_0147,parent_0015,contract_0158,2025-06-15,expansion,generated_history,280000,3200000,3480000 +arr_move_0108,acct_0148,parent_0016,contract_0159,2025-06-15,contraction,generated_history,-290000,3300000,3010000 +arr_move_0109,acct_0149,parent_0017,contract_0160,2025-06-15,reactivation,generated_history,300000,3400000,3700000 +arr_move_0110,acct_0150,parent_0018,contract_0161,2025-06-15,new,generated_history,310000,3500000,3810000 +arr_move_0111,acct_0151,parent_0019,contract_0162,2025-06-15,expansion,generated_history,320000,3600000,3920000 +arr_move_0112,acct_0152,parent_0020,contract_0163,2025-06-15,contraction,generated_history,-330000,3700000,3370000 +arr_move_0113,acct_0153,parent_0021,contract_0164,2025-06-15,reactivation,generated_history,340000,3800000,4140000 +arr_move_0114,acct_0154,parent_0022,contract_0165,2025-06-15,new,generated_history,350000,2000000,2350000 +arr_move_0115,acct_0155,parent_0023,contract_0166,2025-06-15,expansion,generated_history,360000,2100000,2460000 +arr_move_0116,acct_0156,parent_0024,contract_0167,2025-06-15,contraction,generated_history,-370000,2200000,1830000 +arr_move_0117,acct_0157,parent_0025,contract_0168,2025-06-15,reactivation,generated_history,250000,2300000,2550000 +arr_move_0118,acct_0158,parent_0026,contract_0169,2025-06-15,new,generated_history,260000,2400000,2660000 +arr_move_0119,acct_0159,parent_0027,contract_0170,2025-06-15,expansion,generated_history,270000,2500000,2770000 +arr_move_0120,acct_0160,parent_0028,contract_0171,2025-06-15,contraction,generated_history,-280000,2600000,2320000 +arr_move_0121,acct_0161,parent_0029,contract_0172,2025-06-15,reactivation,generated_history,290000,2700000,2990000 +arr_move_0122,acct_0162,parent_0030,contract_0173,2025-06-15,new,generated_history,300000,2800000,3100000 +arr_move_0123,acct_0163,parent_0031,contract_0174,2025-06-15,expansion,generated_history,310000,2900000,3210000 +arr_move_0124,acct_0164,parent_0032,contract_0175,2025-06-15,contraction,generated_history,-320000,3000000,2680000 +arr_move_0125,acct_0165,parent_0033,contract_0176,2025-06-15,reactivation,generated_history,330000,3100000,3430000 +arr_move_0126,acct_0166,parent_0034,contract_0177,2025-06-15,new,generated_history,340000,3200000,3540000 +arr_move_0127,acct_0167,parent_0035,contract_0178,2025-06-15,expansion,generated_history,350000,3300000,3650000 +arr_move_0128,acct_0168,parent_0036,contract_0179,2025-06-15,contraction,generated_history,-360000,3400000,3040000 +arr_move_0129,acct_0169,parent_0037,contract_0180,2025-06-15,reactivation,generated_history,370000,3500000,3870000 +arr_move_0130,acct_0170,parent_0038,contract_0181,2025-06-15,new,generated_history,250000,3600000,3850000 +arr_move_0131,acct_0171,parent_0039,contract_0182,2025-06-15,expansion,generated_history,260000,3700000,3960000 +arr_move_0132,acct_0172,parent_0040,contract_0183,2025-06-15,contraction,generated_history,-270000,3800000,3530000 +arr_move_0133,acct_0173,parent_0041,contract_0184,2025-06-15,reactivation,generated_history,280000,2000000,2280000 +arr_move_0134,acct_0174,parent_0042,contract_0185,2025-06-15,new,generated_history,290000,2100000,2390000 +arr_move_0135,acct_0175,parent_0043,contract_0186,2025-06-15,expansion,generated_history,300000,2200000,2500000 +arr_move_0136,acct_0176,parent_0044,contract_0187,2025-06-15,contraction,generated_history,-310000,2300000,1990000 +arr_move_0137,acct_0177,parent_0045,contract_0188,2025-06-15,reactivation,generated_history,320000,2400000,2720000 +arr_move_0138,acct_0178,parent_0046,contract_0189,2025-06-15,new,generated_history,330000,2500000,2830000 +arr_move_0139,acct_0179,parent_0047,contract_0190,2025-06-15,expansion,generated_history,340000,2600000,2940000 +arr_move_0140,acct_0180,parent_0048,contract_0191,2025-06-15,contraction,generated_history,-350000,2700000,2350000 +arr_move_0141,acct_0181,parent_0049,contract_0192,2025-06-15,reactivation,generated_history,360000,2800000,3160000 +arr_move_0142,acct_0182,parent_0050,contract_0193,2025-06-15,new,generated_history,370000,2900000,3270000 +arr_move_0143,acct_0183,parent_0051,contract_0194,2025-06-15,expansion,generated_history,250000,3000000,3250000 +arr_move_0144,acct_0184,parent_0052,contract_0195,2025-06-15,contraction,generated_history,-260000,3100000,2840000 +arr_move_0145,acct_0185,parent_0053,contract_0196,2025-06-15,reactivation,generated_history,270000,3200000,3470000 +arr_move_0146,acct_0186,parent_0054,contract_0197,2025-06-15,new,generated_history,280000,3300000,3580000 +arr_move_0147,acct_0187,parent_0055,contract_0198,2025-06-15,expansion,generated_history,290000,3400000,3690000 +arr_move_0148,acct_0188,parent_0056,contract_0199,2025-06-15,contraction,generated_history,-300000,3500000,3200000 +arr_move_0149,acct_0189,parent_0057,contract_0200,2025-06-15,reactivation,generated_history,310000,3600000,3910000 +arr_move_0150,acct_0090,parent_0090,contract_0201,2025-06-15,new,generated_history,320000,3700000,4020000 +arr_move_0151,acct_0091,parent_0091,contract_0202,2025-06-15,expansion,generated_history,330000,3800000,4130000 +arr_move_0152,acct_0092,parent_0092,contract_0203,2025-06-15,contraction,generated_history,-340000,2000000,1660000 +arr_move_0153,acct_0093,parent_0093,contract_0204,2025-06-15,reactivation,generated_history,350000,2100000,2450000 +arr_move_0154,acct_0094,parent_0094,contract_0205,2025-06-15,new,generated_history,360000,2200000,2560000 +arr_move_0155,acct_0095,parent_0095,contract_0206,2025-06-15,expansion,generated_history,370000,2300000,2670000 +arr_move_0156,acct_0096,parent_0096,contract_0207,2025-06-15,contraction,generated_history,-250000,2400000,2150000 +arr_move_0157,acct_0097,parent_0097,contract_0208,2025-06-15,reactivation,generated_history,260000,2500000,2760000 +arr_move_0158,acct_0098,parent_0098,contract_0209,2025-06-15,new,generated_history,270000,2600000,2870000 +arr_move_0159,acct_0099,parent_0099,contract_0210,2025-06-15,expansion,generated_history,280000,2700000,2980000 +arr_move_0160,acct_0100,parent_0100,contract_0211,2025-06-15,contraction,generated_history,-290000,2800000,2510000 +arr_move_0161,acct_0101,parent_0101,contract_0212,2025-06-15,reactivation,generated_history,300000,2900000,3200000 +arr_move_0162,acct_0102,parent_0102,contract_0213,2025-06-15,new,generated_history,310000,3000000,3310000 +arr_move_0163,acct_0103,parent_0103,contract_0214,2025-06-15,expansion,generated_history,320000,3100000,3420000 +arr_move_0164,acct_0104,parent_0104,contract_0215,2025-06-15,contraction,generated_history,-330000,3200000,2870000 +arr_move_0165,acct_0105,parent_0105,contract_0216,2025-06-15,reactivation,generated_history,340000,3300000,3640000 +arr_move_0166,acct_0106,parent_0106,contract_0217,2025-06-15,new,generated_history,350000,3400000,3750000 +arr_move_0167,acct_0107,parent_0107,contract_0218,2025-06-15,expansion,generated_history,360000,3500000,3860000 +arr_move_0168,acct_0108,parent_0108,contract_0219,2025-06-15,contraction,generated_history,-370000,3600000,3230000 +arr_move_0169,acct_0109,parent_0109,contract_0220,2025-06-15,reactivation,generated_history,250000,3700000,3950000 +arr_move_0170,acct_0110,parent_0110,contract_0221,2025-06-15,new,generated_history,260000,3800000,4060000 +arr_move_0171,acct_0111,parent_0111,contract_0222,2025-06-15,expansion,generated_history,270000,2000000,2270000 +arr_move_0172,acct_0112,parent_0112,contract_0223,2025-06-15,contraction,generated_history,-280000,2100000,1820000 +arr_move_0173,acct_0113,parent_0113,contract_0224,2025-06-15,reactivation,generated_history,290000,2200000,2490000 +arr_move_0174,acct_0114,parent_0114,contract_0225,2025-06-15,new,generated_history,300000,2300000,2600000 +arr_move_0175,acct_0115,parent_0115,contract_0226,2025-06-15,expansion,generated_history,310000,2400000,2710000 +arr_move_0176,acct_0116,parent_0116,contract_0227,2025-06-15,contraction,generated_history,-320000,2500000,2180000 +arr_move_0177,acct_0117,parent_0117,contract_0228,2025-06-15,reactivation,generated_history,330000,2600000,2930000 +arr_move_0178,acct_0118,parent_0118,contract_0229,2025-06-15,new,generated_history,340000,2700000,3040000 +arr_move_0179,acct_0119,parent_0119,contract_0230,2025-06-15,expansion,generated_history,350000,2800000,3150000 +arr_move_0180,acct_0120,parent_0120,contract_0231,2025-06-15,contraction,generated_history,-360000,2900000,2540000 +arr_move_0181,acct_0121,parent_0121,contract_0232,2025-06-15,reactivation,generated_history,370000,3000000,3370000 +arr_move_0182,acct_0122,parent_0122,contract_0233,2025-06-15,new,generated_history,250000,3100000,3350000 +arr_move_0183,acct_0123,parent_0123,contract_0234,2025-06-15,expansion,generated_history,260000,3200000,3460000 +arr_move_0184,acct_0124,parent_0124,contract_0235,2025-06-15,contraction,generated_history,-270000,3300000,3030000 +arr_move_0185,acct_0125,parent_0125,contract_0236,2025-06-15,reactivation,generated_history,280000,3400000,3680000 +arr_move_0186,acct_0126,parent_0126,contract_0237,2025-06-15,new,generated_history,290000,3500000,3790000 +arr_move_0187,acct_0127,parent_0127,contract_0238,2025-06-15,expansion,generated_history,300000,3600000,3900000 +arr_move_0188,acct_0128,parent_0128,contract_0239,2025-06-15,contraction,generated_history,-310000,3700000,3390000 +arr_move_0189,acct_0129,parent_0129,contract_0240,2025-06-15,reactivation,generated_history,320000,3800000,4120000 +arr_move_0190,acct_0130,parent_0130,contract_0241,2025-06-15,new,generated_history,330000,2000000,2330000 +arr_move_0191,acct_0131,parent_0131,contract_0242,2025-06-15,expansion,generated_history,340000,2100000,2440000 +arr_move_0192,acct_0132,parent_0132,contract_0243,2025-06-15,contraction,generated_history,-350000,2200000,1850000 +arr_move_0193,acct_0133,parent_0001,contract_0244,2025-06-15,reactivation,generated_history,360000,2300000,2660000 +arr_move_0194,acct_0134,parent_0002,contract_0245,2025-06-15,new,generated_history,370000,2400000,2770000 +arr_move_0195,acct_0135,parent_0003,contract_0246,2025-06-15,expansion,generated_history,250000,2500000,2750000 +arr_move_0196,acct_0136,parent_0004,contract_0247,2025-06-15,contraction,generated_history,-260000,2600000,2340000 +arr_move_0197,acct_0137,parent_0005,contract_0248,2025-06-15,reactivation,generated_history,270000,2700000,2970000 +arr_move_0198,acct_0138,parent_0006,contract_0249,2025-06-15,new,generated_history,280000,2800000,3080000 +arr_move_0199,acct_0139,parent_0007,contract_0250,2025-06-15,expansion,generated_history,290000,2900000,3190000 +arr_move_0200,acct_0140,parent_0008,contract_0251,2025-06-15,contraction,generated_history,-300000,3000000,2700000 +arr_move_0201,acct_0141,parent_0009,contract_0252,2025-06-15,reactivation,generated_history,310000,3100000,3410000 +arr_move_0202,acct_0142,parent_0010,contract_0253,2025-06-15,new,generated_history,320000,3200000,3520000 +arr_move_0203,acct_0143,parent_0011,contract_0254,2025-06-15,expansion,generated_history,330000,3300000,3630000 +arr_move_0204,acct_0144,parent_0012,contract_0255,2025-06-15,contraction,generated_history,-340000,3400000,3060000 +arr_move_0205,acct_0145,parent_0013,contract_0256,2025-06-15,reactivation,generated_history,350000,3500000,3850000 +arr_move_0206,acct_0146,parent_0014,contract_0257,2025-06-15,new,generated_history,360000,3600000,3960000 +arr_move_0207,acct_0147,parent_0015,contract_0258,2025-06-15,expansion,generated_history,370000,3700000,4070000 +arr_move_0208,acct_0148,parent_0016,contract_0259,2025-06-15,contraction,generated_history,-250000,3800000,3550000 +arr_move_0209,acct_0149,parent_0017,contract_0260,2025-06-15,reactivation,generated_history,260000,2000000,2260000 +arr_move_0210,acct_0150,parent_0018,contract_0261,2025-06-15,new,generated_history,270000,2100000,2370000 +arr_move_0211,acct_0151,parent_0019,contract_0262,2025-06-15,expansion,generated_history,280000,2200000,2480000 +arr_move_0212,acct_0152,parent_0020,contract_0263,2025-06-15,contraction,generated_history,-290000,2300000,2010000 +arr_move_0213,acct_0153,parent_0021,contract_0264,2025-06-15,reactivation,generated_history,300000,2400000,2700000 +arr_move_0214,acct_0154,parent_0022,contract_0265,2025-06-15,new,generated_history,310000,2500000,2810000 +arr_move_0215,acct_0155,parent_0023,contract_0266,2025-06-15,expansion,generated_history,320000,2600000,2920000 +arr_move_0216,acct_0156,parent_0024,contract_0267,2025-06-15,contraction,generated_history,-330000,2700000,2370000 +arr_move_0217,acct_0157,parent_0025,contract_0268,2025-06-15,reactivation,generated_history,340000,2800000,3140000 +arr_move_0218,acct_0158,parent_0026,contract_0269,2025-06-15,new,generated_history,350000,2900000,3250000 +arr_move_0219,acct_0159,parent_0027,contract_0270,2025-06-15,expansion,generated_history,360000,3000000,3360000 +arr_move_0220,acct_0160,parent_0028,contract_0271,2025-06-15,contraction,generated_history,-370000,3100000,2730000 +arr_move_0221,acct_0161,parent_0029,contract_0272,2025-06-15,reactivation,generated_history,250000,3200000,3450000 +arr_move_0222,acct_0162,parent_0030,contract_0273,2025-06-15,new,generated_history,260000,3300000,3560000 +arr_move_0223,acct_0163,parent_0031,contract_0274,2025-06-15,expansion,generated_history,270000,3400000,3670000 +arr_move_0224,acct_0164,parent_0032,contract_0275,2025-06-15,contraction,generated_history,-280000,3500000,3220000 +arr_move_0225,acct_0165,parent_0033,contract_0276,2025-06-15,reactivation,generated_history,290000,3600000,3890000 +arr_move_0226,acct_0166,parent_0034,contract_0277,2025-06-15,new,generated_history,300000,3700000,4000000 +arr_move_0227,acct_0167,parent_0035,contract_0278,2025-06-15,expansion,generated_history,310000,3800000,4110000 +arr_move_0228,acct_0168,parent_0036,contract_0279,2025-06-15,contraction,generated_history,-320000,2000000,1680000 +arr_move_0229,acct_0169,parent_0037,contract_0280,2025-06-15,reactivation,generated_history,330000,2100000,2430000 +arr_move_0230,acct_0170,parent_0038,contract_0281,2025-06-15,new,generated_history,340000,2200000,2540000 +arr_move_0231,acct_0171,parent_0039,contract_0282,2025-06-15,expansion,generated_history,350000,2300000,2650000 +arr_move_0232,acct_0172,parent_0040,contract_0283,2025-06-15,contraction,generated_history,-360000,2400000,2040000 +arr_move_0233,acct_0173,parent_0041,contract_0284,2025-06-15,reactivation,generated_history,370000,2500000,2870000 +arr_move_0234,acct_0174,parent_0042,contract_0285,2025-06-15,new,generated_history,250000,2600000,2850000 +arr_move_0235,acct_0175,parent_0043,contract_0286,2025-06-15,expansion,generated_history,260000,2700000,2960000 +arr_move_0236,acct_0176,parent_0044,contract_0287,2025-06-15,contraction,generated_history,-270000,2800000,2530000 +arr_move_0237,acct_0177,parent_0045,contract_0288,2025-06-15,reactivation,generated_history,280000,2900000,3180000 +arr_move_0238,acct_0178,parent_0046,contract_0289,2025-06-15,new,generated_history,290000,3000000,3290000 +arr_move_0239,acct_0179,parent_0047,contract_0290,2025-06-15,expansion,generated_history,300000,3100000,3400000 +arr_move_0240,acct_0180,parent_0048,contract_0291,2025-06-15,contraction,generated_history,-310000,3200000,2890000 +arr_move_0241,acct_0181,parent_0049,contract_0292,2025-06-15,reactivation,generated_history,320000,3300000,3620000 +arr_move_0242,acct_0182,parent_0050,contract_0293,2025-06-15,new,generated_history,330000,3400000,3730000 +arr_move_0243,acct_0183,parent_0051,contract_0294,2025-06-15,expansion,generated_history,340000,3500000,3840000 +arr_move_0244,acct_0184,parent_0052,contract_0295,2025-06-15,contraction,generated_history,-350000,3600000,3250000 +arr_move_0245,acct_0185,parent_0053,contract_0296,2025-06-15,reactivation,generated_history,360000,3700000,4060000 +arr_move_0246,acct_0186,parent_0054,contract_0297,2025-06-15,new,generated_history,370000,3800000,4170000 +arr_move_0247,acct_0187,parent_0055,contract_0298,2025-06-15,expansion,generated_history,250000,2000000,2250000 +arr_move_0248,acct_0188,parent_0056,contract_0299,2025-06-15,contraction,generated_history,-260000,2100000,1840000 +arr_move_0249,acct_0189,parent_0057,contract_0300,2025-06-15,reactivation,generated_history,270000,2200000,2470000 +arr_move_0250,acct_0090,parent_0090,contract_0301,2025-06-15,new,generated_history,280000,2300000,2580000 +arr_move_0251,acct_0091,parent_0091,contract_0302,2025-06-15,expansion,generated_history,290000,2400000,2690000 +arr_move_0252,acct_0092,parent_0092,contract_0303,2025-06-15,contraction,generated_history,-300000,2500000,2200000 +arr_move_0253,acct_0093,parent_0093,contract_0304,2025-06-15,reactivation,generated_history,310000,2600000,2910000 +arr_move_0254,acct_0094,parent_0094,contract_0305,2025-06-15,new,generated_history,320000,2700000,3020000 +arr_move_0255,acct_0095,parent_0095,contract_0306,2025-06-15,expansion,generated_history,330000,2800000,3130000 +arr_move_0256,acct_0096,parent_0096,contract_0307,2025-06-15,contraction,generated_history,-340000,2900000,2560000 +arr_move_0257,acct_0097,parent_0097,contract_0308,2025-06-15,reactivation,generated_history,350000,3000000,3350000 +arr_move_0258,acct_0098,parent_0098,contract_0309,2025-06-15,new,generated_history,360000,3100000,3460000 +arr_move_0259,acct_0099,parent_0099,contract_0310,2025-06-15,expansion,generated_history,370000,3200000,3570000 +arr_move_0260,acct_0100,parent_0100,contract_0311,2025-06-15,contraction,generated_history,-250000,3300000,3050000 +arr_move_0261,acct_0101,parent_0101,contract_0312,2025-06-15,reactivation,generated_history,260000,3400000,3660000 +arr_move_0262,acct_0102,parent_0102,contract_0313,2025-06-15,new,generated_history,270000,3500000,3770000 +arr_move_0263,acct_0103,parent_0103,contract_0314,2025-06-15,expansion,generated_history,280000,3600000,3880000 +arr_move_0264,acct_0104,parent_0104,contract_0315,2025-06-15,contraction,generated_history,-290000,3700000,3410000 +arr_move_0265,acct_0105,parent_0105,contract_0316,2025-06-15,reactivation,generated_history,300000,3800000,4100000 +arr_move_0266,acct_0106,parent_0106,contract_0317,2025-06-15,new,generated_history,310000,2000000,2310000 +arr_move_0267,acct_0107,parent_0107,contract_0318,2025-06-15,expansion,generated_history,320000,2100000,2420000 +arr_move_0268,acct_0108,parent_0108,contract_0319,2025-06-15,contraction,generated_history,-330000,2200000,1870000 +arr_move_0269,acct_0109,parent_0109,contract_0320,2025-06-15,reactivation,generated_history,340000,2300000,2640000 +arr_move_0270,acct_0110,parent_0110,contract_0101,2025-06-15,new,generated_history,350000,2400000,2750000 +arr_move_0271,acct_0111,parent_0111,contract_0102,2025-06-15,expansion,generated_history,360000,2500000,2860000 +arr_move_0272,acct_0112,parent_0112,contract_0103,2025-06-15,contraction,generated_history,-370000,2600000,2230000 +arr_move_0273,acct_0113,parent_0113,contract_0104,2025-06-15,reactivation,generated_history,250000,2700000,2950000 +arr_move_0274,acct_0114,parent_0114,contract_0105,2025-06-15,new,generated_history,260000,2800000,3060000 +arr_move_0275,acct_0115,parent_0115,contract_0106,2025-06-15,expansion,generated_history,270000,2900000,3170000 +arr_move_0276,acct_0116,parent_0116,contract_0107,2025-06-15,contraction,generated_history,-280000,3000000,2720000 +arr_move_0277,acct_0117,parent_0117,contract_0108,2025-06-15,reactivation,generated_history,290000,3100000,3390000 +arr_move_0278,acct_0118,parent_0118,contract_0109,2025-06-15,new,generated_history,300000,3200000,3500000 +arr_move_0279,acct_0119,parent_0119,contract_0110,2025-06-15,expansion,generated_history,310000,3300000,3610000 +arr_move_0280,acct_0120,parent_0120,contract_0111,2025-06-15,contraction,generated_history,-320000,3400000,3080000 +arr_move_0281,acct_0121,parent_0121,contract_0112,2025-06-15,reactivation,generated_history,330000,3500000,3830000 +arr_move_0282,acct_0122,parent_0122,contract_0113,2025-06-15,new,generated_history,340000,3600000,3940000 +arr_move_0283,acct_0123,parent_0123,contract_0114,2025-06-15,expansion,generated_history,350000,3700000,4050000 +arr_move_0284,acct_0124,parent_0124,contract_0115,2025-06-15,contraction,generated_history,-360000,3800000,3440000 +arr_move_0285,acct_0125,parent_0125,contract_0116,2025-06-15,reactivation,generated_history,370000,2000000,2370000 +arr_move_0286,acct_0126,parent_0126,contract_0117,2025-06-15,new,generated_history,250000,2100000,2350000 +arr_move_0287,acct_0127,parent_0127,contract_0118,2025-06-15,expansion,generated_history,260000,2200000,2460000 +arr_move_0288,acct_0128,parent_0128,contract_0119,2025-06-15,contraction,generated_history,-270000,2300000,2030000 +arr_move_0289,acct_0129,parent_0129,contract_0120,2025-06-15,reactivation,generated_history,280000,2400000,2680000 +arr_move_0290,acct_0130,parent_0130,contract_0121,2025-06-15,new,generated_history,290000,2500000,2790000 +arr_move_0291,acct_0131,parent_0131,contract_0122,2025-06-15,expansion,generated_history,300000,2600000,2900000 +arr_move_0292,acct_0132,parent_0132,contract_0123,2025-06-15,contraction,generated_history,-310000,2700000,2390000 +arr_move_0293,acct_0133,parent_0001,contract_0124,2025-06-15,reactivation,generated_history,320000,2800000,3120000 +arr_move_0294,acct_0134,parent_0002,contract_0125,2025-06-15,new,generated_history,330000,2900000,3230000 +arr_move_0295,acct_0135,parent_0003,contract_0126,2025-06-15,expansion,generated_history,340000,3000000,3340000 +arr_move_0296,acct_0136,parent_0004,contract_0127,2025-06-15,contraction,generated_history,-350000,3100000,2750000 +arr_move_0297,acct_0137,parent_0005,contract_0128,2025-06-15,reactivation,generated_history,360000,3200000,3560000 +arr_move_0298,acct_0138,parent_0006,contract_0129,2025-06-15,new,generated_history,370000,3300000,3670000 +arr_move_0299,acct_0139,parent_0007,contract_0130,2025-06-15,expansion,generated_history,250000,3400000,3650000 +arr_move_0300,acct_0140,parent_0008,contract_0131,2025-06-15,contraction,generated_history,-260000,3500000,3240000 +arr_move_0301,acct_0141,parent_0009,contract_0132,2025-06-15,reactivation,generated_history,270000,3600000,3870000 +arr_move_0302,acct_0142,parent_0010,contract_0133,2025-06-15,new,generated_history,280000,3700000,3980000 +arr_move_0303,acct_0143,parent_0011,contract_0134,2025-06-15,expansion,generated_history,290000,3800000,4090000 +arr_move_0304,acct_0144,parent_0012,contract_0135,2025-06-15,contraction,generated_history,-300000,2000000,1700000 +arr_move_0305,acct_0145,parent_0013,contract_0136,2025-06-15,reactivation,generated_history,310000,2100000,2410000 +arr_move_0306,acct_0146,parent_0014,contract_0137,2025-06-15,new,generated_history,320000,2200000,2520000 +arr_move_0307,acct_0147,parent_0015,contract_0138,2025-06-15,expansion,generated_history,330000,2300000,2630000 +arr_move_0308,acct_0148,parent_0016,contract_0139,2025-06-15,contraction,generated_history,-340000,2400000,2060000 +arr_move_0309,acct_0149,parent_0017,contract_0140,2025-06-15,reactivation,generated_history,350000,2500000,2850000 +arr_move_0310,acct_0150,parent_0018,contract_0141,2025-06-15,new,generated_history,360000,2600000,2960000 +arr_move_0311,acct_0151,parent_0019,contract_0142,2025-06-15,expansion,generated_history,370000,2700000,3070000 +arr_move_0312,acct_0152,parent_0020,contract_0143,2025-06-15,contraction,generated_history,-250000,2800000,2550000 +arr_move_0313,acct_0153,parent_0021,contract_0144,2025-06-15,reactivation,generated_history,260000,2900000,3160000 +arr_move_0314,acct_0154,parent_0022,contract_0145,2025-06-15,new,generated_history,270000,3000000,3270000 +arr_move_0315,acct_0155,parent_0023,contract_0146,2025-06-15,expansion,generated_history,280000,3100000,3380000 +arr_move_0316,acct_0156,parent_0024,contract_0147,2025-06-15,contraction,generated_history,-290000,3200000,2910000 +arr_move_0317,acct_0157,parent_0025,contract_0148,2025-06-15,reactivation,generated_history,300000,3300000,3600000 +arr_move_0318,acct_0158,parent_0026,contract_0149,2025-06-15,new,generated_history,310000,3400000,3710000 +arr_move_0319,acct_0159,parent_0027,contract_0150,2025-06-15,expansion,generated_history,320000,3500000,3820000 +arr_move_0320,acct_0160,parent_0028,contract_0151,2025-06-15,contraction,generated_history,-330000,3600000,3270000 +arr_move_0321,acct_0161,parent_0029,contract_0152,2025-06-15,reactivation,generated_history,340000,3700000,4040000 +arr_move_0322,acct_0162,parent_0030,contract_0153,2025-06-15,new,generated_history,350000,3800000,4150000 +arr_move_0323,acct_0163,parent_0031,contract_0154,2025-06-15,expansion,generated_history,360000,2000000,2360000 +arr_move_0324,acct_0164,parent_0032,contract_0155,2025-06-15,contraction,generated_history,-370000,2100000,1730000 +arr_move_0325,acct_0165,parent_0033,contract_0156,2025-06-15,reactivation,generated_history,250000,2200000,2450000 +arr_move_0326,acct_0166,parent_0034,contract_0157,2025-06-15,new,generated_history,260000,2300000,2560000 +arr_move_0327,acct_0167,parent_0035,contract_0158,2025-06-15,expansion,generated_history,270000,2400000,2670000 +arr_move_0328,acct_0168,parent_0036,contract_0159,2025-06-15,contraction,generated_history,-280000,2500000,2220000 +arr_move_0329,acct_0169,parent_0037,contract_0160,2025-06-15,reactivation,generated_history,290000,2600000,2890000 +arr_move_0330,acct_0170,parent_0038,contract_0161,2025-06-15,new,generated_history,300000,2700000,3000000 +arr_move_0331,acct_0171,parent_0039,contract_0162,2025-06-15,expansion,generated_history,310000,2800000,3110000 +arr_move_0332,acct_0172,parent_0040,contract_0163,2025-06-15,contraction,generated_history,-320000,2900000,2580000 +arr_move_0333,acct_0173,parent_0041,contract_0164,2025-06-15,reactivation,generated_history,330000,3000000,3330000 +arr_move_0334,acct_0174,parent_0042,contract_0165,2025-06-15,new,generated_history,340000,3100000,3440000 +arr_move_0335,acct_0175,parent_0043,contract_0166,2025-06-15,expansion,generated_history,350000,3200000,3550000 +arr_move_0336,acct_0176,parent_0044,contract_0167,2025-06-15,contraction,generated_history,-360000,3300000,2940000 +arr_move_0337,acct_0177,parent_0045,contract_0168,2025-06-15,reactivation,generated_history,370000,3400000,3770000 +arr_move_0338,acct_0178,parent_0046,contract_0169,2025-06-15,new,generated_history,250000,3500000,3750000 +arr_move_0339,acct_0179,parent_0047,contract_0170,2025-06-15,expansion,generated_history,260000,3600000,3860000 +arr_move_0340,acct_0180,parent_0048,contract_0171,2025-06-15,contraction,generated_history,-270000,3700000,3430000 +arr_move_0341,acct_0181,parent_0049,contract_0172,2025-06-15,reactivation,generated_history,280000,3800000,4080000 +arr_move_0342,acct_0182,parent_0050,contract_0173,2025-06-15,new,generated_history,290000,2000000,2290000 +arr_move_0343,acct_0183,parent_0051,contract_0174,2025-06-15,expansion,generated_history,300000,2100000,2400000 +arr_move_0344,acct_0184,parent_0052,contract_0175,2025-06-15,contraction,generated_history,-310000,2200000,1890000 +arr_move_0345,acct_0185,parent_0053,contract_0176,2025-06-15,reactivation,generated_history,320000,2300000,2620000 +arr_move_0346,acct_0186,parent_0054,contract_0177,2025-06-15,new,generated_history,330000,2400000,2730000 +arr_move_0347,acct_0187,parent_0055,contract_0178,2025-06-15,expansion,generated_history,340000,2500000,2840000 +arr_move_0348,acct_0188,parent_0056,contract_0179,2025-06-15,contraction,generated_history,-350000,2600000,2250000 +arr_move_0349,acct_0189,parent_0057,contract_0180,2025-06-15,reactivation,generated_history,360000,2700000,3060000 +arr_move_0350,acct_0090,parent_0090,contract_0181,2025-06-15,new,generated_history,370000,2800000,3170000 +arr_move_0351,acct_0091,parent_0091,contract_0182,2025-06-15,expansion,generated_history,250000,2900000,3150000 +arr_move_0352,acct_0092,parent_0092,contract_0183,2025-06-15,contraction,generated_history,-260000,3000000,2740000 +arr_move_0353,acct_0093,parent_0093,contract_0184,2025-06-15,reactivation,generated_history,270000,3100000,3370000 +arr_move_0354,acct_0094,parent_0094,contract_0185,2025-06-15,new,generated_history,280000,3200000,3480000 +arr_move_0355,acct_0095,parent_0095,contract_0186,2025-06-15,expansion,generated_history,290000,3300000,3590000 +arr_move_0356,acct_0096,parent_0096,contract_0187,2025-06-15,contraction,generated_history,-300000,3400000,3100000 +arr_move_0357,acct_0097,parent_0097,contract_0188,2025-06-15,reactivation,generated_history,310000,3500000,3810000 +arr_move_0358,acct_0098,parent_0098,contract_0189,2025-06-15,new,generated_history,320000,3600000,3920000 +arr_move_0359,acct_0099,parent_0099,contract_0190,2025-06-15,expansion,generated_history,330000,3700000,4030000 +arr_move_0360,acct_0100,parent_0100,contract_0191,2025-06-15,contraction,generated_history,-340000,3800000,3460000 +arr_move_0361,acct_0101,parent_0101,contract_0192,2025-06-15,reactivation,generated_history,350000,2000000,2350000 +arr_move_0362,acct_0102,parent_0102,contract_0193,2025-06-15,new,generated_history,360000,2100000,2460000 +arr_move_0363,acct_0103,parent_0103,contract_0194,2025-06-15,expansion,generated_history,370000,2200000,2570000 +arr_move_0364,acct_0104,parent_0104,contract_0195,2025-06-15,contraction,generated_history,-250000,2300000,2050000 +arr_move_0365,acct_0105,parent_0105,contract_0196,2025-06-15,reactivation,generated_history,260000,2400000,2660000 +arr_move_0366,acct_0106,parent_0106,contract_0197,2025-06-15,new,generated_history,270000,2500000,2770000 +arr_move_0367,acct_0107,parent_0107,contract_0198,2025-06-15,expansion,generated_history,280000,2600000,2880000 +arr_move_0368,acct_0108,parent_0108,contract_0199,2025-06-15,contraction,generated_history,-290000,2700000,2410000 +arr_move_0369,acct_0109,parent_0109,contract_0200,2025-06-15,reactivation,generated_history,300000,2800000,3100000 +arr_move_0370,acct_0110,parent_0110,contract_0201,2025-06-15,new,generated_history,310000,2900000,3210000 +arr_move_0371,acct_0111,parent_0111,contract_0202,2025-06-15,expansion,generated_history,320000,3000000,3320000 +arr_move_0372,acct_0112,parent_0112,contract_0203,2025-06-15,contraction,generated_history,-330000,3100000,2770000 +arr_move_0373,acct_0113,parent_0113,contract_0204,2025-06-15,reactivation,generated_history,340000,3200000,3540000 +arr_move_0374,acct_0114,parent_0114,contract_0205,2025-06-15,new,generated_history,350000,3300000,3650000 +arr_move_0375,acct_0115,parent_0115,contract_0206,2025-06-15,expansion,generated_history,360000,3400000,3760000 +arr_move_0376,acct_0116,parent_0116,contract_0207,2025-06-15,contraction,generated_history,-370000,3500000,3130000 +arr_move_0377,acct_0117,parent_0117,contract_0208,2025-06-15,reactivation,generated_history,250000,3600000,3850000 +arr_move_0378,acct_0118,parent_0118,contract_0209,2025-06-15,new,generated_history,260000,3700000,3960000 +arr_move_0379,acct_0119,parent_0119,contract_0210,2025-06-15,expansion,generated_history,270000,3800000,4070000 +arr_move_0380,acct_0120,parent_0120,contract_0211,2025-06-15,contraction,generated_history,-280000,2000000,1720000 +arr_move_0381,acct_0121,parent_0121,contract_0212,2025-06-15,reactivation,generated_history,290000,2100000,2390000 +arr_move_0382,acct_0122,parent_0122,contract_0213,2025-06-15,new,generated_history,300000,2200000,2500000 +arr_move_0383,acct_0123,parent_0123,contract_0214,2025-06-15,expansion,generated_history,310000,2300000,2610000 +arr_move_0384,acct_0124,parent_0124,contract_0215,2025-06-15,contraction,generated_history,-320000,2400000,2080000 +arr_move_0385,acct_0125,parent_0125,contract_0216,2025-06-15,reactivation,generated_history,330000,2500000,2830000 +arr_move_0386,acct_0126,parent_0126,contract_0217,2025-06-15,new,generated_history,340000,2600000,2940000 +arr_move_0387,acct_0127,parent_0127,contract_0218,2025-06-15,expansion,generated_history,350000,2700000,3050000 +arr_move_0388,acct_0128,parent_0128,contract_0219,2025-06-15,contraction,generated_history,-360000,2800000,2440000 +arr_move_0389,acct_0129,parent_0129,contract_0220,2025-06-15,reactivation,generated_history,370000,2900000,3270000 +arr_move_0390,acct_0130,parent_0130,contract_0221,2025-06-15,new,generated_history,250000,3000000,3250000 +arr_move_0391,acct_0131,parent_0131,contract_0222,2025-06-15,expansion,generated_history,260000,3100000,3360000 +arr_move_0392,acct_0132,parent_0132,contract_0223,2025-06-15,contraction,generated_history,-270000,3200000,2930000 +arr_move_0393,acct_0133,parent_0001,contract_0224,2025-06-15,reactivation,generated_history,280000,3300000,3580000 +arr_move_0394,acct_0134,parent_0002,contract_0225,2025-06-15,new,generated_history,290000,3400000,3690000 +arr_move_0395,acct_0135,parent_0003,contract_0226,2025-06-15,expansion,generated_history,300000,3500000,3800000 +arr_move_0396,acct_0136,parent_0004,contract_0227,2025-06-15,contraction,generated_history,-310000,3600000,3290000 +arr_move_0397,acct_0137,parent_0005,contract_0228,2025-06-15,reactivation,generated_history,320000,3700000,4020000 +arr_move_0398,acct_0138,parent_0006,contract_0229,2025-06-15,new,generated_history,330000,3800000,4130000 +arr_move_0399,acct_0139,parent_0007,contract_0230,2025-06-15,expansion,generated_history,340000,2000000,2340000 +arr_move_0400,acct_0140,parent_0008,contract_0231,2025-06-15,contraction,generated_history,-350000,2100000,1750000 +arr_move_0401,acct_0141,parent_0009,contract_0232,2025-06-15,reactivation,generated_history,360000,2200000,2560000 +arr_move_0402,acct_0142,parent_0010,contract_0233,2025-06-15,new,generated_history,370000,2300000,2670000 +arr_move_0403,acct_0143,parent_0011,contract_0234,2025-06-15,expansion,generated_history,250000,2400000,2650000 +arr_move_0404,acct_0144,parent_0012,contract_0235,2025-06-15,contraction,generated_history,-260000,2500000,2240000 +arr_move_0405,acct_0145,parent_0013,contract_0236,2025-06-15,reactivation,generated_history,270000,2600000,2870000 +arr_move_0406,acct_0146,parent_0014,contract_0237,2025-06-15,new,generated_history,280000,2700000,2980000 +arr_move_0407,acct_0147,parent_0015,contract_0238,2025-06-15,expansion,generated_history,290000,2800000,3090000 +arr_move_0408,acct_0148,parent_0016,contract_0239,2025-06-15,contraction,generated_history,-300000,2900000,2600000 +arr_move_0409,acct_0149,parent_0017,contract_0240,2025-06-15,reactivation,generated_history,310000,3000000,3310000 +arr_move_0410,acct_0150,parent_0018,contract_0241,2025-06-15,new,generated_history,320000,3100000,3420000 +arr_move_0411,acct_0151,parent_0019,contract_0242,2025-06-15,expansion,generated_history,330000,3200000,3530000 +arr_move_0412,acct_0152,parent_0020,contract_0243,2025-06-15,contraction,generated_history,-340000,3300000,2960000 +arr_move_0413,acct_0153,parent_0021,contract_0244,2025-06-15,reactivation,generated_history,350000,3400000,3750000 +arr_move_0414,acct_0154,parent_0022,contract_0245,2025-06-15,new,generated_history,360000,3500000,3860000 +arr_move_0415,acct_0155,parent_0023,contract_0246,2025-06-15,expansion,generated_history,370000,3600000,3970000 +arr_move_0416,acct_0156,parent_0024,contract_0247,2025-06-15,contraction,generated_history,-250000,3700000,3450000 +arr_move_0417,acct_0157,parent_0025,contract_0248,2025-06-15,reactivation,generated_history,260000,3800000,4060000 +arr_move_0418,acct_0158,parent_0026,contract_0249,2025-06-15,new,generated_history,270000,2000000,2270000 +arr_move_0419,acct_0159,parent_0027,contract_0250,2025-06-15,expansion,generated_history,280000,2100000,2380000 +arr_move_0420,acct_0160,parent_0028,contract_0251,2025-06-15,contraction,generated_history,-290000,2200000,1910000 +arr_move_0421,acct_0161,parent_0029,contract_0252,2025-06-15,reactivation,generated_history,300000,2300000,2600000 +arr_move_0422,acct_0162,parent_0030,contract_0253,2025-06-15,new,generated_history,310000,2400000,2710000 +arr_move_0423,acct_0163,parent_0031,contract_0254,2025-06-15,expansion,generated_history,320000,2500000,2820000 +arr_move_0424,acct_0164,parent_0032,contract_0255,2025-06-15,contraction,generated_history,-330000,2600000,2270000 +arr_move_0425,acct_0165,parent_0033,contract_0256,2025-06-15,reactivation,generated_history,340000,2700000,3040000 +arr_move_0426,acct_0166,parent_0034,contract_0257,2025-06-15,new,generated_history,350000,2800000,3150000 +arr_move_0427,acct_0167,parent_0035,contract_0258,2025-06-15,expansion,generated_history,360000,2900000,3260000 +arr_move_0428,acct_0168,parent_0036,contract_0259,2025-06-15,contraction,generated_history,-370000,3000000,2630000 +arr_move_0429,acct_0169,parent_0037,contract_0260,2025-06-15,reactivation,generated_history,250000,3100000,3350000 +arr_move_0430,acct_0170,parent_0038,contract_0261,2025-06-15,new,generated_history,260000,3200000,3460000 +arr_move_0431,acct_0171,parent_0039,contract_0262,2025-06-15,expansion,generated_history,270000,3300000,3570000 +arr_move_0432,acct_0172,parent_0040,contract_0263,2025-06-15,contraction,generated_history,-280000,3400000,3120000 +arr_move_0433,acct_0173,parent_0041,contract_0264,2025-06-15,reactivation,generated_history,290000,3500000,3790000 +arr_move_0434,acct_0174,parent_0042,contract_0265,2025-06-15,new,generated_history,300000,3600000,3900000 +arr_move_0435,acct_0175,parent_0043,contract_0266,2025-06-15,expansion,generated_history,310000,3700000,4010000 +arr_move_0436,acct_0176,parent_0044,contract_0267,2025-06-15,contraction,generated_history,-320000,3800000,3480000 +arr_move_0437,acct_0177,parent_0045,contract_0268,2025-06-15,reactivation,generated_history,330000,2000000,2330000 +arr_move_0438,acct_0178,parent_0046,contract_0269,2025-06-15,new,generated_history,340000,2100000,2440000 +arr_move_0439,acct_0179,parent_0047,contract_0270,2025-06-15,expansion,generated_history,350000,2200000,2550000 +arr_move_0440,acct_0180,parent_0048,contract_0271,2025-06-15,contraction,generated_history,-360000,2300000,1940000 +arr_move_0441,acct_0181,parent_0049,contract_0272,2025-06-15,reactivation,generated_history,370000,2400000,2770000 +arr_move_0442,acct_0182,parent_0050,contract_0273,2025-06-15,new,generated_history,250000,2500000,2750000 +arr_move_0443,acct_0183,parent_0051,contract_0274,2025-06-15,expansion,generated_history,260000,2600000,2860000 +arr_move_0444,acct_0184,parent_0052,contract_0275,2025-06-15,contraction,generated_history,-270000,2700000,2430000 +arr_move_0445,acct_0185,parent_0053,contract_0276,2025-06-15,reactivation,generated_history,280000,2800000,3080000 +arr_move_0446,acct_0186,parent_0054,contract_0277,2025-06-15,new,generated_history,290000,2900000,3190000 +arr_move_0447,acct_0187,parent_0055,contract_0278,2025-06-15,expansion,generated_history,300000,3000000,3300000 +arr_move_0448,acct_0188,parent_0056,contract_0279,2025-06-15,contraction,generated_history,-310000,3100000,2790000 +arr_move_0449,acct_0189,parent_0057,contract_0280,2025-06-15,reactivation,generated_history,320000,3200000,3520000 +arr_move_0450,acct_0090,parent_0090,contract_0281,2025-06-15,new,generated_history,330000,3300000,3630000 +arr_move_0451,acct_0091,parent_0091,contract_0282,2025-06-15,expansion,generated_history,340000,3400000,3740000 +arr_move_0452,acct_0092,parent_0092,contract_0283,2025-06-15,contraction,generated_history,-350000,3500000,3150000 +arr_move_0453,acct_0093,parent_0093,contract_0284,2025-06-15,reactivation,generated_history,360000,3600000,3960000 +arr_move_0454,acct_0094,parent_0094,contract_0285,2025-06-15,new,generated_history,370000,3700000,4070000 +arr_move_0455,acct_0095,parent_0095,contract_0286,2025-06-15,expansion,generated_history,250000,3800000,4050000 +arr_move_0456,acct_0096,parent_0096,contract_0287,2025-06-15,contraction,generated_history,-260000,2000000,1740000 +arr_move_0457,acct_0097,parent_0097,contract_0288,2025-06-15,reactivation,generated_history,270000,2100000,2370000 +arr_move_0458,acct_0098,parent_0098,contract_0289,2025-06-15,new,generated_history,280000,2200000,2480000 +arr_move_0459,acct_0099,parent_0099,contract_0290,2025-06-15,expansion,generated_history,290000,2300000,2590000 +arr_move_0460,acct_0100,parent_0100,contract_0291,2025-06-15,contraction,generated_history,-300000,2400000,2100000 +arr_move_0461,acct_0101,parent_0101,contract_0292,2025-06-15,reactivation,generated_history,310000,2500000,2810000 +arr_move_0462,acct_0102,parent_0102,contract_0293,2025-06-15,new,generated_history,320000,2600000,2920000 +arr_move_0463,acct_0103,parent_0103,contract_0294,2025-06-15,expansion,generated_history,330000,2700000,3030000 +arr_move_0464,acct_0104,parent_0104,contract_0295,2025-06-15,contraction,generated_history,-340000,2800000,2460000 +arr_move_0465,acct_0105,parent_0105,contract_0296,2025-06-15,reactivation,generated_history,350000,2900000,3250000 +arr_move_0466,acct_0106,parent_0106,contract_0297,2025-06-15,new,generated_history,360000,3000000,3360000 +arr_move_0467,acct_0107,parent_0107,contract_0298,2025-06-15,expansion,generated_history,370000,3100000,3470000 +arr_move_0468,acct_0108,parent_0108,contract_0299,2025-06-15,contraction,generated_history,-250000,3200000,2950000 +arr_move_0469,acct_0109,parent_0109,contract_0300,2025-06-15,reactivation,generated_history,260000,3300000,3560000 +arr_move_0470,acct_0110,parent_0110,contract_0301,2025-06-15,new,generated_history,270000,3400000,3670000 +arr_move_0471,acct_0111,parent_0111,contract_0302,2025-06-15,expansion,generated_history,280000,3500000,3780000 +arr_move_0472,acct_0112,parent_0112,contract_0303,2025-06-15,contraction,generated_history,-290000,3600000,3310000 +arr_move_0473,acct_0113,parent_0113,contract_0304,2025-06-15,reactivation,generated_history,300000,3700000,4000000 +arr_move_0474,acct_0114,parent_0114,contract_0305,2025-06-15,new,generated_history,310000,3800000,4110000 +arr_move_0475,acct_0115,parent_0115,contract_0306,2025-06-15,expansion,generated_history,320000,2000000,2320000 +arr_move_0476,acct_0116,parent_0116,contract_0307,2025-06-15,contraction,generated_history,-330000,2100000,1770000 +arr_move_0477,acct_0117,parent_0117,contract_0308,2025-06-15,reactivation,generated_history,340000,2200000,2540000 +arr_move_0478,acct_0118,parent_0118,contract_0309,2025-06-15,new,generated_history,350000,2300000,2650000 +arr_move_0479,acct_0119,parent_0119,contract_0310,2025-06-15,expansion,generated_history,360000,2400000,2760000 +arr_move_0480,acct_0120,parent_0120,contract_0311,2025-06-15,contraction,generated_history,-370000,2500000,2130000 +arr_move_0481,acct_0121,parent_0121,contract_0312,2025-06-15,reactivation,generated_history,250000,2600000,2850000 +arr_move_0482,acct_0122,parent_0122,contract_0313,2025-06-15,new,generated_history,260000,2700000,2960000 +arr_move_0483,acct_0123,parent_0123,contract_0314,2025-06-15,expansion,generated_history,270000,2800000,3070000 +arr_move_0484,acct_0124,parent_0124,contract_0315,2025-06-15,contraction,generated_history,-280000,2900000,2620000 +arr_move_0485,acct_0125,parent_0125,contract_0316,2025-06-15,reactivation,generated_history,290000,3000000,3290000 +arr_move_0486,acct_0126,parent_0126,contract_0317,2025-06-15,new,generated_history,300000,3100000,3400000 +arr_move_0487,acct_0127,parent_0127,contract_0318,2025-06-15,expansion,generated_history,310000,3200000,3510000 +arr_move_0488,acct_0128,parent_0128,contract_0319,2025-06-15,contraction,generated_history,-320000,3300000,2980000 +arr_move_0489,acct_0129,parent_0129,contract_0320,2025-06-15,reactivation,generated_history,330000,3400000,3730000 +arr_move_0490,acct_0130,parent_0130,contract_0101,2025-06-15,new,generated_history,340000,3500000,3840000 +arr_move_0491,acct_0131,parent_0131,contract_0102,2025-06-15,expansion,generated_history,350000,3600000,3950000 +arr_move_0492,acct_0132,parent_0132,contract_0103,2025-06-15,contraction,generated_history,-360000,3700000,3340000 +arr_move_0493,acct_0133,parent_0001,contract_0104,2025-06-15,reactivation,generated_history,370000,3800000,4170000 +arr_move_0494,acct_0134,parent_0002,contract_0105,2025-06-15,new,generated_history,250000,2000000,2250000 +arr_move_0495,acct_0135,parent_0003,contract_0106,2025-06-15,expansion,generated_history,260000,2100000,2360000 +arr_move_0496,acct_0136,parent_0004,contract_0107,2025-06-15,contraction,generated_history,-270000,2200000,1930000 +arr_move_0497,acct_0137,parent_0005,contract_0108,2025-06-15,reactivation,generated_history,280000,2300000,2580000 +arr_move_0498,acct_0138,parent_0006,contract_0109,2025-06-15,new,generated_history,290000,2400000,2690000 +arr_move_0499,acct_0139,parent_0007,contract_0110,2025-06-15,expansion,generated_history,300000,2500000,2800000 +arr_move_0500,acct_0140,parent_0008,contract_0111,2025-06-15,contraction,generated_history,-310000,2600000,2290000 +arr_move_0501,acct_0141,parent_0009,contract_0112,2025-06-15,reactivation,generated_history,320000,2700000,3020000 +arr_move_0502,acct_0142,parent_0010,contract_0113,2025-06-15,new,generated_history,330000,2800000,3130000 +arr_move_0503,acct_0143,parent_0011,contract_0114,2025-06-15,expansion,generated_history,340000,2900000,3240000 +arr_move_0504,acct_0144,parent_0012,contract_0115,2025-06-15,contraction,generated_history,-350000,3000000,2650000 +arr_move_0505,acct_0145,parent_0013,contract_0116,2025-06-15,reactivation,generated_history,360000,3100000,3460000 +arr_move_0506,acct_0146,parent_0014,contract_0117,2025-06-15,new,generated_history,370000,3200000,3570000 +arr_move_0507,acct_0147,parent_0015,contract_0118,2025-06-15,expansion,generated_history,250000,3300000,3550000 +arr_move_0508,acct_0148,parent_0016,contract_0119,2025-06-15,contraction,generated_history,-260000,3400000,3140000 +arr_move_0509,acct_0149,parent_0017,contract_0120,2025-06-15,reactivation,generated_history,270000,3500000,3770000 +arr_move_0510,acct_0150,parent_0018,contract_0121,2025-06-15,new,generated_history,280000,3600000,3880000 +arr_move_0511,acct_0151,parent_0019,contract_0122,2025-06-15,expansion,generated_history,290000,3700000,3990000 +arr_move_0512,acct_0152,parent_0020,contract_0123,2025-06-15,contraction,generated_history,-300000,3800000,3500000 +arr_move_0513,acct_0153,parent_0021,contract_0124,2025-06-15,reactivation,generated_history,310000,2000000,2310000 +arr_move_0514,acct_0154,parent_0022,contract_0125,2025-06-15,new,generated_history,320000,2100000,2420000 +arr_move_0515,acct_0155,parent_0023,contract_0126,2025-06-15,expansion,generated_history,330000,2200000,2530000 +arr_move_0516,acct_0156,parent_0024,contract_0127,2025-06-15,contraction,generated_history,-340000,2300000,1960000 +arr_move_0517,acct_0157,parent_0025,contract_0128,2025-06-15,reactivation,generated_history,350000,2400000,2750000 +arr_move_0518,acct_0158,parent_0026,contract_0129,2025-06-15,new,generated_history,360000,2500000,2860000 +arr_move_0519,acct_0159,parent_0027,contract_0130,2025-06-15,expansion,generated_history,370000,2600000,2970000 +arr_move_0520,acct_0160,parent_0028,contract_0131,2025-06-15,contraction,generated_history,-250000,2700000,2450000 +arr_move_0521,acct_0161,parent_0029,contract_0132,2025-06-15,reactivation,generated_history,260000,2800000,3060000 +arr_move_0522,acct_0162,parent_0030,contract_0133,2025-06-15,new,generated_history,270000,2900000,3170000 +arr_move_0523,acct_0163,parent_0031,contract_0134,2025-06-15,expansion,generated_history,280000,3000000,3280000 +arr_move_0524,acct_0164,parent_0032,contract_0135,2025-06-15,contraction,generated_history,-290000,3100000,2810000 +arr_move_0525,acct_0165,parent_0033,contract_0136,2025-06-15,reactivation,generated_history,300000,3200000,3500000 +arr_move_0526,acct_0166,parent_0034,contract_0137,2025-06-15,new,generated_history,310000,3300000,3610000 +arr_move_0527,acct_0167,parent_0035,contract_0138,2025-06-15,expansion,generated_history,320000,3400000,3720000 +arr_move_0528,acct_0168,parent_0036,contract_0139,2025-06-15,contraction,generated_history,-330000,3500000,3170000 +arr_move_0529,acct_0169,parent_0037,contract_0140,2025-06-15,reactivation,generated_history,340000,3600000,3940000 +arr_move_0530,acct_0170,parent_0038,contract_0141,2025-06-15,new,generated_history,350000,3700000,4050000 +arr_move_0531,acct_0171,parent_0039,contract_0142,2025-06-15,expansion,generated_history,360000,3800000,4160000 +arr_move_0532,acct_0172,parent_0040,contract_0143,2025-06-15,contraction,generated_history,-370000,2000000,1630000 +arr_move_0533,acct_0173,parent_0041,contract_0144,2025-06-15,reactivation,generated_history,250000,2100000,2350000 +arr_move_0534,acct_0174,parent_0042,contract_0145,2025-06-15,new,generated_history,260000,2200000,2460000 +arr_move_0535,acct_0175,parent_0043,contract_0146,2025-06-15,expansion,generated_history,270000,2300000,2570000 +arr_move_0536,acct_0176,parent_0044,contract_0147,2025-06-15,contraction,generated_history,-280000,2400000,2120000 +arr_move_0537,acct_0177,parent_0045,contract_0148,2025-06-15,reactivation,generated_history,290000,2500000,2790000 +arr_move_0538,acct_0178,parent_0046,contract_0149,2025-06-15,new,generated_history,300000,2600000,2900000 +arr_move_0539,acct_0179,parent_0047,contract_0150,2025-06-15,expansion,generated_history,310000,2700000,3010000 +arr_move_0540,acct_0180,parent_0048,contract_0151,2025-06-15,contraction,generated_history,-320000,2800000,2480000 +arr_move_0541,acct_0181,parent_0049,contract_0152,2025-06-15,reactivation,generated_history,330000,2900000,3230000 +arr_move_0542,acct_0182,parent_0050,contract_0153,2025-06-15,new,generated_history,340000,3000000,3340000 +arr_move_0543,acct_0183,parent_0051,contract_0154,2025-06-15,expansion,generated_history,350000,3100000,3450000 +arr_move_0544,acct_0184,parent_0052,contract_0155,2025-06-15,contraction,generated_history,-360000,3200000,2840000 +arr_move_0545,acct_0185,parent_0053,contract_0156,2025-06-15,reactivation,generated_history,370000,3300000,3670000 +arr_move_0546,acct_0186,parent_0054,contract_0157,2025-06-15,new,generated_history,250000,3400000,3650000 +arr_move_0547,acct_0187,parent_0055,contract_0158,2025-06-15,expansion,generated_history,260000,3500000,3760000 +arr_move_0548,acct_0188,parent_0056,contract_0159,2025-06-15,contraction,generated_history,-270000,3600000,3330000 +arr_move_0549,acct_0189,parent_0057,contract_0160,2025-06-15,reactivation,generated_history,280000,3700000,3980000 +arr_move_0550,acct_0090,parent_0090,contract_0161,2025-06-15,new,generated_history,290000,3800000,4090000 +arr_move_0551,acct_0091,parent_0091,contract_0162,2025-06-15,expansion,generated_history,300000,2000000,2300000 +arr_move_0552,acct_0092,parent_0092,contract_0163,2025-06-15,contraction,generated_history,-310000,2100000,1790000 +arr_move_0553,acct_0093,parent_0093,contract_0164,2025-06-15,reactivation,generated_history,320000,2200000,2520000 +arr_move_0554,acct_0094,parent_0094,contract_0165,2025-06-15,new,generated_history,330000,2300000,2630000 +arr_move_0555,acct_0095,parent_0095,contract_0166,2025-06-15,expansion,generated_history,340000,2400000,2740000 +arr_move_0556,acct_0096,parent_0096,contract_0167,2025-06-15,contraction,generated_history,-350000,2500000,2150000 +arr_move_0557,acct_0097,parent_0097,contract_0168,2025-06-15,reactivation,generated_history,360000,2600000,2960000 +arr_move_0558,acct_0098,parent_0098,contract_0169,2025-06-15,new,generated_history,370000,2700000,3070000 +arr_move_0559,acct_0099,parent_0099,contract_0170,2025-06-15,expansion,generated_history,250000,2800000,3050000 +arr_move_0560,acct_0100,parent_0100,contract_0171,2025-06-15,contraction,generated_history,-260000,2900000,2640000 +arr_move_0561,acct_0101,parent_0101,contract_0172,2025-06-15,reactivation,generated_history,270000,3000000,3270000 +arr_move_0562,acct_0102,parent_0102,contract_0173,2025-06-15,new,generated_history,280000,3100000,3380000 +arr_move_0563,acct_0103,parent_0103,contract_0174,2025-06-15,expansion,generated_history,290000,3200000,3490000 +arr_move_0564,acct_0104,parent_0104,contract_0175,2025-06-15,contraction,generated_history,-300000,3300000,3000000 +arr_move_0565,acct_0105,parent_0105,contract_0176,2025-06-15,reactivation,generated_history,310000,3400000,3710000 +arr_move_0566,acct_0106,parent_0106,contract_0177,2025-06-15,new,generated_history,320000,3500000,3820000 +arr_move_0567,acct_0107,parent_0107,contract_0178,2025-06-15,expansion,generated_history,330000,3600000,3930000 +arr_move_0568,acct_0108,parent_0108,contract_0179,2025-06-15,contraction,generated_history,-340000,3700000,3360000 +arr_move_0569,acct_0109,parent_0109,contract_0180,2025-06-15,reactivation,generated_history,350000,3800000,4150000 +arr_move_0570,acct_0110,parent_0110,contract_0181,2025-06-15,new,generated_history,360000,2000000,2360000 +arr_move_0571,acct_0111,parent_0111,contract_0182,2025-06-15,expansion,generated_history,370000,2100000,2470000 +arr_move_0572,acct_0112,parent_0112,contract_0183,2025-06-15,contraction,generated_history,-250000,2200000,1950000 +arr_move_0573,acct_0113,parent_0113,contract_0184,2025-06-15,reactivation,generated_history,260000,2300000,2560000 +arr_move_0574,acct_0114,parent_0114,contract_0185,2025-06-15,new,generated_history,270000,2400000,2670000 +arr_move_0575,acct_0115,parent_0115,contract_0186,2025-06-15,expansion,generated_history,280000,2500000,2780000 +arr_move_0576,acct_0116,parent_0116,contract_0187,2025-06-15,contraction,generated_history,-290000,2600000,2310000 +arr_move_0577,acct_0117,parent_0117,contract_0188,2025-06-15,reactivation,generated_history,300000,2700000,3000000 +arr_move_0578,acct_0118,parent_0118,contract_0189,2025-06-15,new,generated_history,310000,2800000,3110000 +arr_move_0579,acct_0119,parent_0119,contract_0190,2025-06-15,expansion,generated_history,320000,2900000,3220000 +arr_move_0580,acct_0120,parent_0120,contract_0191,2025-06-15,contraction,generated_history,-330000,3000000,2670000 +arr_move_0581,acct_0121,parent_0121,contract_0192,2025-06-15,reactivation,generated_history,340000,3100000,3440000 +arr_move_0582,acct_0122,parent_0122,contract_0193,2025-06-15,new,generated_history,350000,3200000,3550000 +arr_move_0583,acct_0123,parent_0123,contract_0194,2025-06-15,expansion,generated_history,360000,3300000,3660000 +arr_move_0584,acct_0124,parent_0124,contract_0195,2025-06-15,contraction,generated_history,-370000,3400000,3030000 +arr_move_0585,acct_0125,parent_0125,contract_0196,2025-06-15,reactivation,generated_history,250000,3500000,3750000 +arr_move_0586,acct_0126,parent_0126,contract_0197,2025-06-15,new,generated_history,260000,3600000,3860000 +arr_move_0587,acct_0127,parent_0127,contract_0198,2025-06-15,expansion,generated_history,270000,3700000,3970000 +arr_move_0588,acct_0128,parent_0128,contract_0199,2025-06-15,contraction,generated_history,-280000,3800000,3520000 +arr_move_0589,acct_0129,parent_0129,contract_0200,2025-06-15,reactivation,generated_history,290000,2000000,2290000 +arr_move_0590,acct_0130,parent_0130,contract_0201,2025-06-15,new,generated_history,300000,2100000,2400000 +arr_move_0591,acct_0131,parent_0131,contract_0202,2025-06-15,expansion,generated_history,310000,2200000,2510000 +arr_move_0592,acct_0132,parent_0132,contract_0203,2025-06-15,contraction,generated_history,-320000,2300000,1980000 +arr_move_0593,acct_0133,parent_0001,contract_0204,2025-06-15,reactivation,generated_history,330000,2400000,2730000 +arr_move_0594,acct_0134,parent_0002,contract_0205,2025-06-15,new,generated_history,340000,2500000,2840000 +arr_move_0595,acct_0135,parent_0003,contract_0206,2025-06-15,expansion,generated_history,350000,2600000,2950000 +arr_move_0596,acct_0136,parent_0004,contract_0207,2025-06-15,contraction,generated_history,-360000,2700000,2340000 +arr_move_0597,acct_0137,parent_0005,contract_0208,2025-06-15,reactivation,generated_history,370000,2800000,3170000 +arr_move_0598,acct_0138,parent_0006,contract_0209,2025-06-15,new,generated_history,250000,2900000,3150000 +arr_move_0599,acct_0139,parent_0007,contract_0210,2025-06-15,expansion,generated_history,260000,3000000,3260000 +arr_move_0600,acct_0140,parent_0008,contract_0211,2025-06-15,contraction,generated_history,-270000,3100000,2830000 +arr_move_0601,acct_0141,parent_0009,contract_0212,2025-06-15,reactivation,generated_history,280000,3200000,3480000 +arr_move_0602,acct_0142,parent_0010,contract_0213,2025-06-15,new,generated_history,290000,3300000,3590000 +arr_move_0603,acct_0143,parent_0011,contract_0214,2025-06-15,expansion,generated_history,300000,3400000,3700000 +arr_move_0604,acct_0144,parent_0012,contract_0215,2025-06-15,contraction,generated_history,-310000,3500000,3190000 +arr_move_0605,acct_0145,parent_0013,contract_0216,2025-06-15,reactivation,generated_history,320000,3600000,3920000 +arr_move_0606,acct_0146,parent_0014,contract_0217,2025-06-15,new,generated_history,330000,3700000,4030000 +arr_move_0607,acct_0147,parent_0015,contract_0218,2025-06-15,expansion,generated_history,340000,3800000,4140000 +arr_move_0608,acct_0148,parent_0016,contract_0219,2025-06-15,contraction,generated_history,-350000,2000000,1650000 +arr_move_0609,acct_0149,parent_0017,contract_0220,2025-06-15,reactivation,generated_history,360000,2100000,2460000 +arr_move_0610,acct_0150,parent_0018,contract_0221,2025-06-15,new,generated_history,370000,2200000,2570000 +arr_move_0611,acct_0151,parent_0019,contract_0222,2025-06-15,expansion,generated_history,250000,2300000,2550000 +arr_move_0612,acct_0152,parent_0020,contract_0223,2025-06-15,contraction,generated_history,-260000,2400000,2140000 +arr_move_0613,acct_0153,parent_0021,contract_0224,2025-06-15,reactivation,generated_history,270000,2500000,2770000 +arr_move_0614,acct_0154,parent_0022,contract_0225,2025-06-15,new,generated_history,280000,2600000,2880000 +arr_move_0615,acct_0155,parent_0023,contract_0226,2025-06-15,expansion,generated_history,290000,2700000,2990000 +arr_move_0616,acct_0156,parent_0024,contract_0227,2025-06-15,contraction,generated_history,-300000,2800000,2500000 +arr_move_0617,acct_0157,parent_0025,contract_0228,2025-06-15,reactivation,generated_history,310000,2900000,3210000 +arr_move_0618,acct_0158,parent_0026,contract_0229,2025-06-15,new,generated_history,320000,3000000,3320000 +arr_move_0619,acct_0159,parent_0027,contract_0230,2025-06-15,expansion,generated_history,330000,3100000,3430000 +arr_move_0620,acct_0160,parent_0028,contract_0231,2025-06-15,contraction,generated_history,-340000,3200000,2860000 +arr_move_0621,acct_0161,parent_0029,contract_0232,2025-06-15,reactivation,generated_history,350000,3300000,3650000 +arr_move_0622,acct_0162,parent_0030,contract_0233,2025-06-15,new,generated_history,360000,3400000,3760000 +arr_move_0623,acct_0163,parent_0031,contract_0234,2025-06-15,expansion,generated_history,370000,3500000,3870000 +arr_move_0624,acct_0164,parent_0032,contract_0235,2025-06-15,contraction,generated_history,-250000,3600000,3350000 +arr_move_0625,acct_0165,parent_0033,contract_0236,2025-06-15,reactivation,generated_history,260000,3700000,3960000 +arr_move_0626,acct_0166,parent_0034,contract_0237,2025-06-15,new,generated_history,270000,3800000,4070000 +arr_move_0627,acct_0167,parent_0035,contract_0238,2025-06-15,expansion,generated_history,280000,2000000,2280000 +arr_move_0628,acct_0168,parent_0036,contract_0239,2025-06-15,contraction,generated_history,-290000,2100000,1810000 +arr_move_0629,acct_0169,parent_0037,contract_0240,2025-06-15,reactivation,generated_history,300000,2200000,2500000 +arr_move_0630,acct_0170,parent_0038,contract_0241,2025-06-15,new,generated_history,310000,2300000,2610000 +arr_move_0631,acct_0171,parent_0039,contract_0242,2025-06-15,expansion,generated_history,320000,2400000,2720000 +arr_move_0632,acct_0172,parent_0040,contract_0243,2025-06-15,contraction,generated_history,-330000,2500000,2170000 +arr_move_0633,acct_0173,parent_0041,contract_0244,2025-06-15,reactivation,generated_history,340000,2600000,2940000 +arr_move_0634,acct_0174,parent_0042,contract_0245,2025-06-15,new,generated_history,350000,2700000,3050000 +arr_move_0635,acct_0175,parent_0043,contract_0246,2025-06-15,expansion,generated_history,360000,2800000,3160000 +arr_move_0636,acct_0176,parent_0044,contract_0247,2025-06-15,contraction,generated_history,-370000,2900000,2530000 +arr_move_0637,acct_0177,parent_0045,contract_0248,2025-06-15,reactivation,generated_history,250000,3000000,3250000 +arr_move_0638,acct_0178,parent_0046,contract_0249,2025-06-15,new,generated_history,260000,3100000,3360000 +arr_move_0639,acct_0179,parent_0047,contract_0250,2025-06-15,expansion,generated_history,270000,3200000,3470000 +arr_move_0640,acct_0180,parent_0048,contract_0251,2025-06-15,contraction,generated_history,-280000,3300000,3020000 +arr_move_0641,acct_0181,parent_0049,contract_0252,2025-06-15,reactivation,generated_history,290000,3400000,3690000 +arr_move_0642,acct_0182,parent_0050,contract_0253,2025-06-15,new,generated_history,300000,3500000,3800000 +arr_move_0643,acct_0183,parent_0051,contract_0254,2025-06-15,expansion,generated_history,310000,3600000,3910000 +arr_move_0644,acct_0184,parent_0052,contract_0255,2025-06-15,contraction,generated_history,-320000,3700000,3380000 +arr_move_0645,acct_0185,parent_0053,contract_0256,2025-06-15,reactivation,generated_history,330000,3800000,4130000 +arr_move_0646,acct_0186,parent_0054,contract_0257,2025-06-15,new,generated_history,340000,2000000,2340000 +arr_move_0647,acct_0187,parent_0055,contract_0258,2025-06-15,expansion,generated_history,350000,2100000,2450000 +arr_move_0648,acct_0188,parent_0056,contract_0259,2025-06-15,contraction,generated_history,-360000,2200000,1840000 +arr_move_0649,acct_0189,parent_0057,contract_0260,2025-06-15,reactivation,generated_history,370000,2300000,2670000 +arr_move_0650,acct_0090,parent_0090,contract_0261,2025-06-15,new,generated_history,250000,2400000,2650000 +arr_move_0651,acct_0091,parent_0091,contract_0262,2025-06-15,expansion,generated_history,260000,2500000,2760000 +arr_move_0652,acct_0092,parent_0092,contract_0263,2025-06-15,contraction,generated_history,-270000,2600000,2330000 +arr_move_0653,acct_0093,parent_0093,contract_0264,2025-06-15,reactivation,generated_history,280000,2700000,2980000 +arr_move_0654,acct_0094,parent_0094,contract_0265,2025-06-15,new,generated_history,290000,2800000,3090000 +arr_move_0655,acct_0095,parent_0095,contract_0266,2025-06-15,expansion,generated_history,300000,2900000,3200000 +arr_move_0656,acct_0096,parent_0096,contract_0267,2025-06-15,contraction,generated_history,-310000,3000000,2690000 +arr_move_0657,acct_0097,parent_0097,contract_0268,2025-06-15,reactivation,generated_history,320000,3100000,3420000 +arr_move_0658,acct_0098,parent_0098,contract_0269,2025-06-15,new,generated_history,330000,3200000,3530000 +arr_move_0659,acct_0099,parent_0099,contract_0270,2025-06-15,expansion,generated_history,340000,3300000,3640000 +arr_move_0660,acct_0100,parent_0100,contract_0271,2025-06-15,contraction,generated_history,-350000,3400000,3050000 +arr_move_0661,acct_0101,parent_0101,contract_0272,2025-06-15,reactivation,generated_history,360000,3500000,3860000 +arr_move_0662,acct_0102,parent_0102,contract_0273,2025-06-15,new,generated_history,370000,3600000,3970000 +arr_move_0663,acct_0103,parent_0103,contract_0274,2025-06-15,expansion,generated_history,250000,3700000,3950000 +arr_move_0664,acct_0104,parent_0104,contract_0275,2025-06-15,contraction,generated_history,-260000,3800000,3540000 +arr_move_0665,acct_0105,parent_0105,contract_0276,2025-06-15,reactivation,generated_history,270000,2000000,2270000 +arr_move_0666,acct_0106,parent_0106,contract_0277,2025-06-15,new,generated_history,280000,2100000,2380000 +arr_move_0667,acct_0107,parent_0107,contract_0278,2025-06-15,expansion,generated_history,290000,2200000,2490000 +arr_move_0668,acct_0108,parent_0108,contract_0279,2025-06-15,contraction,generated_history,-300000,2300000,2000000 +arr_move_0669,acct_0109,parent_0109,contract_0280,2025-06-15,reactivation,generated_history,310000,2400000,2710000 +arr_move_0670,acct_0110,parent_0110,contract_0281,2025-06-15,new,generated_history,320000,2500000,2820000 +arr_move_0671,acct_0111,parent_0111,contract_0282,2025-06-15,expansion,generated_history,330000,2600000,2930000 +arr_move_0672,acct_0112,parent_0112,contract_0283,2025-06-15,contraction,generated_history,-340000,2700000,2360000 +arr_move_0673,acct_0113,parent_0113,contract_0284,2025-06-15,reactivation,generated_history,350000,2800000,3150000 +arr_move_0674,acct_0114,parent_0114,contract_0285,2025-06-15,new,generated_history,360000,2900000,3260000 +arr_move_0675,acct_0115,parent_0115,contract_0286,2025-06-15,expansion,generated_history,370000,3000000,3370000 +arr_move_0676,acct_0116,parent_0116,contract_0287,2025-06-15,contraction,generated_history,-250000,3100000,2850000 +arr_move_0677,acct_0117,parent_0117,contract_0288,2025-06-15,reactivation,generated_history,260000,3200000,3460000 +arr_move_0678,acct_0118,parent_0118,contract_0289,2025-06-15,new,generated_history,270000,3300000,3570000 +arr_move_0679,acct_0119,parent_0119,contract_0290,2025-06-15,expansion,generated_history,280000,3400000,3680000 +arr_move_0680,acct_0120,parent_0120,contract_0291,2025-06-15,contraction,generated_history,-290000,3500000,3210000 +arr_move_0681,acct_0121,parent_0121,contract_0292,2025-06-15,reactivation,generated_history,300000,3600000,3900000 +arr_move_0682,acct_0122,parent_0122,contract_0293,2025-06-15,new,generated_history,310000,3700000,4010000 +arr_move_0683,acct_0123,parent_0123,contract_0294,2025-06-15,expansion,generated_history,320000,3800000,4120000 +arr_move_0684,acct_0124,parent_0124,contract_0295,2025-06-15,contraction,generated_history,-330000,2000000,1670000 +arr_move_0685,acct_0125,parent_0125,contract_0296,2025-06-15,reactivation,generated_history,340000,2100000,2440000 +arr_move_0686,acct_0126,parent_0126,contract_0297,2025-06-15,new,generated_history,350000,2200000,2550000 +arr_move_0687,acct_0127,parent_0127,contract_0298,2025-06-15,expansion,generated_history,360000,2300000,2660000 +arr_move_0688,acct_0128,parent_0128,contract_0299,2025-06-15,contraction,generated_history,-370000,2400000,2030000 +arr_move_0689,acct_0129,parent_0129,contract_0300,2025-06-15,reactivation,generated_history,250000,2500000,2750000 +arr_move_0690,acct_0130,parent_0130,contract_0301,2025-06-15,new,generated_history,260000,2600000,2860000 +arr_move_0691,acct_0131,parent_0131,contract_0302,2025-06-15,expansion,generated_history,270000,2700000,2970000 +arr_move_0692,acct_0132,parent_0132,contract_0303,2025-06-15,contraction,generated_history,-280000,2800000,2520000 +arr_move_0693,acct_0133,parent_0001,contract_0304,2025-06-15,reactivation,generated_history,290000,2900000,3190000 +arr_move_0694,acct_0134,parent_0002,contract_0305,2025-06-15,new,generated_history,300000,3000000,3300000 +arr_move_0695,acct_0135,parent_0003,contract_0306,2025-06-15,expansion,generated_history,310000,3100000,3410000 +arr_move_0696,acct_0136,parent_0004,contract_0307,2025-06-15,contraction,generated_history,-320000,3200000,2880000 +arr_move_0697,acct_0137,parent_0005,contract_0308,2025-06-15,reactivation,generated_history,330000,3300000,3630000 +arr_move_0698,acct_0138,parent_0006,contract_0309,2025-06-15,new,generated_history,340000,3400000,3740000 +arr_move_0699,acct_0139,parent_0007,contract_0310,2025-06-15,expansion,generated_history,350000,3500000,3850000 +arr_move_0700,acct_0140,parent_0008,contract_0311,2025-06-15,contraction,generated_history,-360000,3600000,3240000 +arr_move_0701,acct_0141,parent_0009,contract_0312,2025-06-15,reactivation,generated_history,370000,3700000,4070000 +arr_move_0702,acct_0142,parent_0010,contract_0313,2025-06-15,new,generated_history,250000,3800000,4050000 +arr_move_0703,acct_0143,parent_0011,contract_0314,2025-06-15,expansion,generated_history,260000,2000000,2260000 +arr_move_0704,acct_0144,parent_0012,contract_0315,2025-06-15,contraction,generated_history,-270000,2100000,1830000 +arr_move_0705,acct_0145,parent_0013,contract_0316,2025-06-15,reactivation,generated_history,280000,2200000,2480000 +arr_move_0706,acct_0146,parent_0014,contract_0317,2025-06-15,new,generated_history,290000,2300000,2590000 +arr_move_0707,acct_0147,parent_0015,contract_0318,2025-06-15,expansion,generated_history,300000,2400000,2700000 +arr_move_0708,acct_0148,parent_0016,contract_0319,2025-06-15,contraction,generated_history,-310000,2500000,2190000 +arr_move_0709,acct_0149,parent_0017,contract_0320,2025-06-15,reactivation,generated_history,320000,2600000,2920000 +arr_move_0710,acct_0150,parent_0018,contract_0101,2025-06-15,new,generated_history,330000,2700000,3030000 +arr_move_0711,acct_0151,parent_0019,contract_0102,2025-06-15,expansion,generated_history,340000,2800000,3140000 +arr_move_0712,acct_0152,parent_0020,contract_0103,2025-06-15,contraction,generated_history,-350000,2900000,2550000 +arr_move_0713,acct_0153,parent_0021,contract_0104,2025-06-15,reactivation,generated_history,360000,3000000,3360000 +arr_move_0714,acct_0154,parent_0022,contract_0105,2025-06-15,new,generated_history,370000,3100000,3470000 +arr_move_0715,acct_0155,parent_0023,contract_0106,2025-06-15,expansion,generated_history,250000,3200000,3450000 +arr_move_0716,acct_0156,parent_0024,contract_0107,2025-06-15,contraction,generated_history,-260000,3300000,3040000 +arr_move_0717,acct_0157,parent_0025,contract_0108,2025-06-15,reactivation,generated_history,270000,3400000,3670000 +arr_move_0718,acct_0158,parent_0026,contract_0109,2025-06-15,new,generated_history,280000,3500000,3780000 +arr_move_0719,acct_0159,parent_0027,contract_0110,2025-06-15,expansion,generated_history,290000,3600000,3890000 +arr_move_0720,acct_0160,parent_0028,contract_0111,2025-06-15,contraction,generated_history,-300000,3700000,3400000 diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/contracts.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/contracts.csv new file mode 100644 index 00000000..a69c321d --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/contracts.csv @@ -0,0 +1,321 @@ +contract_id,account_id,parent_account_id,plan_id,contract_arr_cents,booked_arr_cents,start_date,end_date,status,renewal_type +contract_0001,acct_0001,parent_0001,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0002,acct_0002,parent_0002,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0003,acct_0003,parent_0003,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0004,acct_0004,parent_0004,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0005,acct_0005,parent_0005,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0006,acct_0006,parent_0006,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0007,acct_0007,parent_0007,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0008,acct_0008,parent_0008,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0009,acct_0009,parent_0009,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0010,acct_0010,parent_0010,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0011,acct_0011,parent_0011,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0012,acct_0012,parent_0012,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0013,acct_0013,parent_0013,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0014,acct_0014,parent_0014,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0015,acct_0015,parent_0015,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0016,acct_0016,parent_0016,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0017,acct_0017,parent_0017,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0018,acct_0018,parent_0018,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0019,acct_0019,parent_0019,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0020,acct_0020,parent_0020,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0021,acct_0021,parent_0021,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0022,acct_0022,parent_0022,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0023,acct_0023,parent_0023,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0024,acct_0024,parent_0024,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0025,acct_0025,parent_0025,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0026,acct_0026,parent_0026,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0027,acct_0027,parent_0027,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0028,acct_0028,parent_0028,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0029,acct_0029,parent_0029,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0030,acct_0030,parent_0030,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0031,acct_0031,parent_0031,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0032,acct_0032,parent_0032,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0033,acct_0033,parent_0033,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0034,acct_0034,parent_0034,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0035,acct_0035,parent_0035,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0036,acct_0036,parent_0036,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0037,acct_0037,parent_0037,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0038,acct_0038,parent_0038,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0039,acct_0039,parent_0039,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0040,acct_0040,parent_0040,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0041,acct_0041,parent_0041,plan_004,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0042,acct_0042,parent_0042,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0043,acct_0043,parent_0043,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0044,acct_0044,parent_0044,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0045,acct_0045,parent_0045,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0046,acct_0046,parent_0046,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0047,acct_0047,parent_0047,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,expansion +contract_0048,acct_0048,parent_0048,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,downgrade +contract_0049,acct_0049,parent_0049,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,new +contract_0050,acct_0050,parent_0050,plan_003,25000000,25000000,2025-01-01,2026-12-31,active,renewal +contract_0051,acct_0051,parent_0051,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0052,acct_0052,parent_0052,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0053,acct_0053,parent_0053,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0054,acct_0054,parent_0054,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0055,acct_0055,parent_0055,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0056,acct_0056,parent_0056,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0057,acct_0057,parent_0057,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0058,acct_0058,parent_0058,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0059,acct_0059,parent_0059,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0060,acct_0060,parent_0060,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0061,acct_0061,parent_0061,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0062,acct_0062,parent_0062,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0063,acct_0063,parent_0063,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0064,acct_0064,parent_0064,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0065,acct_0065,parent_0065,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0066,acct_0066,parent_0066,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0067,acct_0067,parent_0067,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0068,acct_0068,parent_0068,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0069,acct_0069,parent_0069,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0070,acct_0070,parent_0070,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0071,acct_0071,parent_0071,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0072,acct_0072,parent_0072,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0073,acct_0073,parent_0073,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0074,acct_0074,parent_0074,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0075,acct_0075,parent_0075,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0076,acct_0076,parent_0076,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0077,acct_0077,parent_0077,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0078,acct_0078,parent_0078,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0079,acct_0079,parent_0079,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0080,acct_0080,parent_0080,plan_003,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0081,acct_0081,parent_0081,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0082,acct_0082,parent_0082,plan_004,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0083,acct_0083,parent_0083,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0084,acct_0084,parent_0084,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0085,acct_0085,parent_0085,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0086,acct_0086,parent_0086,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0087,acct_0087,parent_0087,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0088,acct_0088,parent_0088,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0089,acct_0089,parent_0089,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0090,acct_0090,parent_0090,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0091,acct_0091,parent_0091,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0092,acct_0092,parent_0092,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0093,acct_0093,parent_0093,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0094,acct_0094,parent_0094,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0095,acct_0095,parent_0095,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0096,acct_0096,parent_0096,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,downgrade +contract_0097,acct_0097,parent_0097,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,new +contract_0098,acct_0098,parent_0098,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,renewal +contract_0099,acct_0099,parent_0099,plan_002,12000000,12000000,2025-01-01,2026-12-31,active,expansion +contract_0100,acct_0100,parent_0100,plan_002,36200000,36200000,2025-01-01,2026-12-31,active,downgrade +contract_0101,acct_0101,parent_0101,plan_002,4010000,4010000,2025-03-01,2025-12-31,expired,new +contract_0102,acct_0102,parent_0102,plan_002,4020000,4020000,2025-03-01,2025-12-31,cancelled,renewal +contract_0103,acct_0103,parent_0103,plan_002,4030000,4030000,2025-03-01,2025-12-31,expired,expansion +contract_0104,acct_0104,parent_0104,plan_002,4040000,4040000,2025-03-01,2025-12-31,expired,downgrade +contract_0105,acct_0105,parent_0105,plan_002,4050000,4050000,2025-03-01,2025-12-31,cancelled,new +contract_0106,acct_0106,parent_0106,plan_002,4060000,4060000,2025-03-01,2025-12-31,expired,renewal +contract_0107,acct_0107,parent_0107,plan_002,4070000,4070000,2025-03-01,2025-12-31,expired,expansion +contract_0108,acct_0108,parent_0108,plan_002,4080000,4080000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0109,acct_0109,parent_0109,plan_002,4090000,4090000,2025-03-01,2025-12-31,expired,new +contract_0110,acct_0110,parent_0110,plan_002,4100000,4100000,2025-03-01,2025-12-31,expired,renewal +contract_0111,acct_0111,parent_0111,plan_002,4110000,4110000,2025-03-01,2025-12-31,cancelled,expansion +contract_0112,acct_0112,parent_0112,plan_002,4120000,4120000,2025-03-01,2025-12-31,expired,downgrade +contract_0113,acct_0113,parent_0113,plan_002,4130000,4130000,2025-03-01,2025-12-31,expired,new +contract_0114,acct_0114,parent_0114,plan_002,4140000,4140000,2025-03-01,2025-12-31,cancelled,renewal +contract_0115,acct_0115,parent_0115,plan_002,4150000,4150000,2025-03-01,2025-12-31,expired,expansion +contract_0116,acct_0116,parent_0116,plan_002,4160000,4160000,2025-03-01,2025-12-31,expired,downgrade +contract_0117,acct_0117,parent_0117,plan_002,4170000,4170000,2025-03-01,2025-12-31,cancelled,new +contract_0118,acct_0118,parent_0118,plan_002,4180000,4180000,2025-03-01,2025-12-31,expired,renewal +contract_0119,acct_0119,parent_0119,plan_002,4190000,4190000,2025-03-01,2025-12-31,expired,expansion +contract_0120,acct_0120,parent_0120,plan_002,4200000,4200000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0121,acct_0121,parent_0121,plan_002,4210000,4210000,2025-03-01,2025-12-31,expired,new +contract_0122,acct_0122,parent_0122,plan_002,4220000,4220000,2025-03-01,2025-12-31,expired,renewal +contract_0123,acct_0123,parent_0123,plan_004,4230000,4230000,2025-03-01,2025-12-31,cancelled,expansion +contract_0124,acct_0124,parent_0124,plan_002,4240000,4240000,2025-03-01,2025-12-31,expired,downgrade +contract_0125,acct_0125,parent_0125,plan_002,4250000,4250000,2025-03-01,2025-12-31,expired,new +contract_0126,acct_0126,parent_0126,plan_002,4260000,4260000,2025-03-01,2025-12-31,cancelled,renewal +contract_0127,acct_0127,parent_0127,plan_002,4270000,4270000,2025-03-01,2025-12-31,expired,expansion +contract_0128,acct_0128,parent_0128,plan_002,4280000,4280000,2025-03-01,2025-12-31,expired,downgrade +contract_0129,acct_0129,parent_0129,plan_002,4290000,4290000,2025-03-01,2025-12-31,cancelled,new +contract_0130,acct_0130,parent_0130,plan_002,4300000,4300000,2025-03-01,2025-12-31,expired,renewal +contract_0131,acct_0131,parent_0131,plan_002,4310000,4310000,2025-03-01,2025-12-31,expired,expansion +contract_0132,acct_0132,parent_0132,plan_002,4320000,4320000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0133,acct_0133,parent_0001,plan_002,4330000,4330000,2025-03-01,2025-12-31,expired,new +contract_0134,acct_0134,parent_0002,plan_002,4340000,4340000,2025-03-01,2025-12-31,expired,renewal +contract_0135,acct_0135,parent_0003,plan_002,4350000,4350000,2025-03-01,2025-12-31,cancelled,expansion +contract_0136,acct_0136,parent_0004,plan_002,4360000,4360000,2025-03-01,2025-12-31,expired,downgrade +contract_0137,acct_0137,parent_0005,plan_002,4370000,4370000,2025-03-01,2025-12-31,expired,new +contract_0138,acct_0138,parent_0006,plan_002,4380000,4380000,2025-03-01,2025-12-31,cancelled,renewal +contract_0139,acct_0139,parent_0007,plan_002,4390000,4390000,2025-03-01,2025-12-31,expired,expansion +contract_0140,acct_0140,parent_0008,plan_002,4400000,4400000,2025-03-01,2025-12-31,expired,downgrade +contract_0141,acct_0141,parent_0009,plan_002,4410000,4410000,2025-03-01,2025-12-31,cancelled,new +contract_0142,acct_0142,parent_0010,plan_002,4420000,4420000,2025-03-01,2025-12-31,expired,renewal +contract_0143,acct_0143,parent_0011,plan_002,4430000,4430000,2025-03-01,2025-12-31,expired,expansion +contract_0144,acct_0144,parent_0012,plan_002,4440000,4440000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0145,acct_0145,parent_0013,plan_002,4450000,4450000,2025-03-01,2025-12-31,expired,new +contract_0146,acct_0146,parent_0014,plan_002,4460000,4460000,2025-03-01,2025-12-31,expired,renewal +contract_0147,acct_0147,parent_0015,plan_002,4470000,4470000,2025-03-01,2025-12-31,cancelled,expansion +contract_0148,acct_0148,parent_0016,plan_002,4480000,4480000,2025-03-01,2025-12-31,expired,downgrade +contract_0149,acct_0149,parent_0017,plan_002,4490000,4490000,2025-03-01,2025-12-31,expired,new +contract_0150,acct_0150,parent_0018,plan_002,4500000,4500000,2025-03-01,2025-12-31,cancelled,renewal +contract_0151,acct_0151,parent_0019,plan_001,4510000,4510000,2025-03-01,2025-12-31,expired,expansion +contract_0152,acct_0152,parent_0020,plan_001,4520000,4520000,2025-03-01,2025-12-31,expired,downgrade +contract_0153,acct_0153,parent_0021,plan_001,4530000,4530000,2025-03-01,2025-12-31,cancelled,new +contract_0154,acct_0154,parent_0022,plan_001,4540000,4540000,2025-03-01,2025-12-31,expired,renewal +contract_0155,acct_0155,parent_0023,plan_001,4550000,4550000,2025-03-01,2025-12-31,expired,expansion +contract_0156,acct_0156,parent_0024,plan_001,4560000,4560000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0157,acct_0157,parent_0025,plan_001,4570000,4570000,2025-03-01,2025-12-31,expired,new +contract_0158,acct_0158,parent_0026,plan_001,4580000,4580000,2025-03-01,2025-12-31,expired,renewal +contract_0159,acct_0159,parent_0027,plan_001,4590000,4590000,2025-03-01,2025-12-31,cancelled,expansion +contract_0160,acct_0160,parent_0028,plan_001,4600000,4600000,2025-03-01,2025-12-31,expired,downgrade +contract_0161,acct_0161,parent_0029,plan_001,4610000,4610000,2025-03-01,2025-12-31,expired,new +contract_0162,acct_0162,parent_0030,plan_001,4620000,4620000,2025-03-01,2025-12-31,cancelled,renewal +contract_0163,acct_0163,parent_0031,plan_001,4630000,4630000,2025-03-01,2025-12-31,expired,expansion +contract_0164,acct_0164,parent_0032,plan_004,4640000,4640000,2025-03-01,2025-12-31,expired,downgrade +contract_0165,acct_0165,parent_0033,plan_001,4650000,4650000,2025-03-01,2025-12-31,cancelled,new +contract_0166,acct_0166,parent_0034,plan_001,4660000,4660000,2025-03-01,2025-12-31,expired,renewal +contract_0167,acct_0167,parent_0035,plan_001,4670000,4670000,2025-03-01,2025-12-31,expired,expansion +contract_0168,acct_0168,parent_0036,plan_001,4680000,4680000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0169,acct_0169,parent_0037,plan_001,4690000,4690000,2025-03-01,2025-12-31,expired,new +contract_0170,acct_0170,parent_0038,plan_001,4700000,4700000,2025-03-01,2025-12-31,expired,renewal +contract_0171,acct_0171,parent_0039,plan_001,4710000,4710000,2025-03-01,2025-12-31,cancelled,expansion +contract_0172,acct_0172,parent_0040,plan_001,4720000,4720000,2025-03-01,2025-12-31,expired,downgrade +contract_0173,acct_0173,parent_0041,plan_001,4730000,4730000,2025-03-01,2025-12-31,expired,new +contract_0174,acct_0174,parent_0042,plan_001,4740000,4740000,2025-03-01,2025-12-31,cancelled,renewal +contract_0175,acct_0175,parent_0043,plan_001,4750000,4750000,2025-03-01,2025-12-31,expired,expansion +contract_0176,acct_0176,parent_0044,plan_001,4760000,4760000,2025-03-01,2025-12-31,expired,downgrade +contract_0177,acct_0177,parent_0045,plan_001,4770000,4770000,2025-03-01,2025-12-31,cancelled,new +contract_0178,acct_0178,parent_0046,plan_001,4780000,4780000,2025-03-01,2025-12-31,expired,renewal +contract_0179,acct_0179,parent_0047,plan_001,4790000,4790000,2025-03-01,2025-12-31,expired,expansion +contract_0180,acct_0180,parent_0048,plan_001,4800000,4800000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0181,acct_0181,parent_0049,plan_001,4810000,4810000,2025-03-01,2025-12-31,expired,new +contract_0182,acct_0182,parent_0050,plan_001,4820000,4820000,2025-03-01,2025-12-31,expired,renewal +contract_0183,acct_0183,parent_0051,plan_001,4830000,4830000,2025-03-01,2025-12-31,cancelled,expansion +contract_0184,acct_0184,parent_0052,plan_001,4840000,4840000,2025-03-01,2025-12-31,expired,downgrade +contract_0185,acct_0185,parent_0053,plan_001,4850000,4850000,2025-03-01,2025-12-31,expired,new +contract_0186,acct_0186,parent_0054,plan_001,4860000,4860000,2025-03-01,2025-12-31,cancelled,renewal +contract_0187,acct_0187,parent_0055,plan_001,4870000,4870000,2025-03-01,2025-12-31,expired,expansion +contract_0188,acct_0188,parent_0056,plan_001,4880000,4880000,2025-03-01,2025-12-31,expired,downgrade +contract_0189,acct_0189,parent_0057,plan_001,4890000,4890000,2025-03-01,2025-12-31,cancelled,new +contract_0190,acct_0190,parent_0058,plan_001,4900000,4900000,2025-03-01,2025-12-31,expired,renewal +contract_0191,acct_0191,parent_0059,plan_001,4910000,4910000,2025-03-01,2025-12-31,expired,expansion +contract_0192,acct_0192,parent_0060,plan_001,4920000,4920000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0193,acct_0193,parent_0061,plan_001,4930000,4930000,2025-03-01,2025-12-31,expired,new +contract_0194,acct_0194,parent_0062,plan_001,4940000,4940000,2025-03-01,2025-12-31,expired,renewal +contract_0195,acct_0195,parent_0063,plan_001,4950000,4950000,2025-03-01,2025-12-31,cancelled,expansion +contract_0196,acct_0196,parent_0064,plan_001,4960000,4960000,2025-03-01,2025-12-31,expired,downgrade +contract_0197,acct_0197,parent_0065,plan_001,4970000,4970000,2025-03-01,2025-12-31,expired,new +contract_0198,acct_0198,parent_0066,plan_001,4980000,4980000,2025-03-01,2025-12-31,cancelled,renewal +contract_0199,acct_0199,parent_0067,plan_001,4990000,4990000,2025-03-01,2025-12-31,expired,expansion +contract_0200,acct_0200,parent_0068,plan_001,5000000,5000000,2025-03-01,2025-12-31,expired,downgrade +contract_0201,acct_0001,parent_0001,plan_003,5010000,5010000,2025-03-01,2025-12-31,cancelled,new +contract_0202,acct_0002,parent_0002,plan_003,5020000,5020000,2025-03-01,2025-12-31,expired,renewal +contract_0203,acct_0003,parent_0003,plan_003,5030000,5030000,2025-03-01,2025-12-31,expired,expansion +contract_0204,acct_0004,parent_0004,plan_003,5040000,5040000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0205,acct_0005,parent_0005,plan_004,5050000,5050000,2025-03-01,2025-12-31,expired,new +contract_0206,acct_0006,parent_0006,plan_003,5060000,5060000,2025-03-01,2025-12-31,expired,renewal +contract_0207,acct_0007,parent_0007,plan_003,5070000,5070000,2025-03-01,2025-12-31,cancelled,expansion +contract_0208,acct_0008,parent_0008,plan_003,5080000,5080000,2025-03-01,2025-12-31,expired,downgrade +contract_0209,acct_0009,parent_0009,plan_003,5090000,5090000,2025-03-01,2025-12-31,expired,new +contract_0210,acct_0010,parent_0010,plan_003,5100000,5100000,2025-03-01,2025-12-31,cancelled,renewal +contract_0211,acct_0011,parent_0011,plan_003,5110000,5110000,2025-03-01,2025-12-31,expired,expansion +contract_0212,acct_0012,parent_0012,plan_003,5120000,5120000,2025-03-01,2025-12-31,expired,downgrade +contract_0213,acct_0013,parent_0013,plan_003,5130000,5130000,2025-03-01,2025-12-31,cancelled,new +contract_0214,acct_0014,parent_0014,plan_003,5140000,5140000,2025-03-01,2025-12-31,expired,renewal +contract_0215,acct_0015,parent_0015,plan_003,5150000,5150000,2025-03-01,2025-12-31,expired,expansion +contract_0216,acct_0016,parent_0016,plan_003,5160000,5160000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0217,acct_0017,parent_0017,plan_003,5170000,5170000,2025-03-01,2025-12-31,expired,new +contract_0218,acct_0018,parent_0018,plan_003,5180000,5180000,2025-03-01,2025-12-31,expired,renewal +contract_0219,acct_0019,parent_0019,plan_003,5190000,5190000,2025-03-01,2025-12-31,cancelled,expansion +contract_0220,acct_0020,parent_0020,plan_003,5200000,5200000,2025-03-01,2025-12-31,expired,downgrade +contract_0221,acct_0021,parent_0021,plan_003,5210000,5210000,2025-03-01,2025-12-31,expired,new +contract_0222,acct_0022,parent_0022,plan_003,5220000,5220000,2025-03-01,2025-12-31,cancelled,renewal +contract_0223,acct_0023,parent_0023,plan_003,5230000,5230000,2025-03-01,2025-12-31,expired,expansion +contract_0224,acct_0024,parent_0024,plan_003,5240000,5240000,2025-03-01,2025-12-31,expired,downgrade +contract_0225,acct_0025,parent_0025,plan_003,5250000,5250000,2025-03-01,2025-12-31,cancelled,new +contract_0226,acct_0026,parent_0026,plan_003,5260000,5260000,2025-03-01,2025-12-31,expired,renewal +contract_0227,acct_0027,parent_0027,plan_003,5270000,5270000,2025-03-01,2025-12-31,expired,expansion +contract_0228,acct_0028,parent_0028,plan_003,5280000,5280000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0229,acct_0029,parent_0029,plan_003,5290000,5290000,2025-03-01,2025-12-31,expired,new +contract_0230,acct_0030,parent_0030,plan_003,5300000,5300000,2025-03-01,2025-12-31,expired,renewal +contract_0231,acct_0031,parent_0031,plan_003,5310000,5310000,2025-03-01,2025-12-31,cancelled,expansion +contract_0232,acct_0032,parent_0032,plan_003,5320000,5320000,2025-03-01,2025-12-31,expired,downgrade +contract_0233,acct_0033,parent_0033,plan_003,5330000,5330000,2025-03-01,2025-12-31,expired,new +contract_0234,acct_0034,parent_0034,plan_003,5340000,5340000,2025-03-01,2025-12-31,cancelled,renewal +contract_0235,acct_0035,parent_0035,plan_003,5350000,5350000,2025-03-01,2025-12-31,expired,expansion +contract_0236,acct_0036,parent_0036,plan_003,5360000,5360000,2025-03-01,2025-12-31,expired,downgrade +contract_0237,acct_0037,parent_0037,plan_003,5370000,5370000,2025-03-01,2025-12-31,cancelled,new +contract_0238,acct_0038,parent_0038,plan_003,5380000,5380000,2025-03-01,2025-12-31,expired,renewal +contract_0239,acct_0039,parent_0039,plan_003,5390000,5390000,2025-03-01,2025-12-31,expired,expansion +contract_0240,acct_0040,parent_0040,plan_003,5400000,5400000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0241,acct_0041,parent_0041,plan_003,5410000,5410000,2025-03-01,2025-12-31,expired,new +contract_0242,acct_0042,parent_0042,plan_003,5420000,5420000,2025-03-01,2025-12-31,expired,renewal +contract_0243,acct_0043,parent_0043,plan_003,5430000,5430000,2025-03-01,2025-12-31,cancelled,expansion +contract_0244,acct_0044,parent_0044,plan_003,5440000,5440000,2025-03-01,2025-12-31,expired,downgrade +contract_0245,acct_0045,parent_0045,plan_003,5450000,5450000,2025-03-01,2025-12-31,expired,new +contract_0246,acct_0046,parent_0046,plan_004,5460000,5460000,2025-03-01,2025-12-31,cancelled,renewal +contract_0247,acct_0047,parent_0047,plan_003,5470000,5470000,2025-03-01,2025-12-31,expired,expansion +contract_0248,acct_0048,parent_0048,plan_003,5480000,5480000,2025-03-01,2025-12-31,expired,downgrade +contract_0249,acct_0049,parent_0049,plan_003,5490000,5490000,2025-03-01,2025-12-31,cancelled,new +contract_0250,acct_0050,parent_0050,plan_003,5500000,5500000,2025-03-01,2025-12-31,expired,renewal +contract_0251,acct_0051,parent_0051,plan_003,5510000,5510000,2025-03-01,2025-12-31,expired,expansion +contract_0252,acct_0052,parent_0052,plan_003,5520000,5520000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0253,acct_0053,parent_0053,plan_003,5530000,5530000,2025-03-01,2025-12-31,expired,new +contract_0254,acct_0054,parent_0054,plan_003,5540000,5540000,2025-03-01,2025-12-31,expired,renewal +contract_0255,acct_0055,parent_0055,plan_003,5550000,5550000,2025-03-01,2025-12-31,cancelled,expansion +contract_0256,acct_0056,parent_0056,plan_003,5560000,5560000,2025-03-01,2025-12-31,expired,downgrade +contract_0257,acct_0057,parent_0057,plan_003,5570000,5570000,2025-03-01,2025-12-31,expired,new +contract_0258,acct_0058,parent_0058,plan_003,5580000,5580000,2025-03-01,2025-12-31,cancelled,renewal +contract_0259,acct_0059,parent_0059,plan_003,5590000,5590000,2025-03-01,2025-12-31,expired,expansion +contract_0260,acct_0060,parent_0060,plan_003,5600000,5600000,2025-03-01,2025-12-31,expired,downgrade +contract_0261,acct_0061,parent_0061,plan_003,5610000,5610000,2025-03-01,2025-12-31,cancelled,new +contract_0262,acct_0062,parent_0062,plan_003,5620000,5620000,2025-03-01,2025-12-31,expired,renewal +contract_0263,acct_0063,parent_0063,plan_003,5630000,5630000,2025-03-01,2025-12-31,expired,expansion +contract_0264,acct_0064,parent_0064,plan_003,5640000,5640000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0265,acct_0065,parent_0065,plan_003,5650000,5650000,2025-03-01,2025-12-31,expired,new +contract_0266,acct_0066,parent_0066,plan_003,5660000,5660000,2025-03-01,2025-12-31,expired,renewal +contract_0267,acct_0067,parent_0067,plan_003,5670000,5670000,2025-03-01,2025-12-31,cancelled,expansion +contract_0268,acct_0068,parent_0068,plan_003,5680000,5680000,2025-03-01,2025-12-31,expired,downgrade +contract_0269,acct_0069,parent_0069,plan_003,5690000,5690000,2025-03-01,2025-12-31,expired,new +contract_0270,acct_0070,parent_0070,plan_003,5700000,5700000,2025-03-01,2025-12-31,cancelled,renewal +contract_0271,acct_0071,parent_0071,plan_003,5710000,5710000,2025-03-01,2025-12-31,expired,expansion +contract_0272,acct_0072,parent_0072,plan_003,5720000,5720000,2025-03-01,2025-12-31,expired,downgrade +contract_0273,acct_0073,parent_0073,plan_003,5730000,5730000,2025-03-01,2025-12-31,cancelled,new +contract_0274,acct_0074,parent_0074,plan_003,5740000,5740000,2025-03-01,2025-12-31,expired,renewal +contract_0275,acct_0075,parent_0075,plan_003,5750000,5750000,2025-03-01,2025-12-31,expired,expansion +contract_0276,acct_0076,parent_0076,plan_003,5760000,5760000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0277,acct_0077,parent_0077,plan_003,5770000,5770000,2025-03-01,2025-12-31,expired,new +contract_0278,acct_0078,parent_0078,plan_003,5780000,5780000,2025-03-01,2025-12-31,expired,renewal +contract_0279,acct_0079,parent_0079,plan_003,5790000,5790000,2025-03-01,2025-12-31,cancelled,expansion +contract_0280,acct_0080,parent_0080,plan_003,5800000,5800000,2025-03-01,2025-12-31,expired,downgrade +contract_0281,acct_0081,parent_0081,plan_002,5810000,5810000,2025-03-01,2025-12-31,expired,new +contract_0282,acct_0082,parent_0082,plan_002,5820000,5820000,2025-03-01,2025-12-31,cancelled,renewal +contract_0283,acct_0083,parent_0083,plan_002,5830000,5830000,2025-03-01,2025-12-31,expired,expansion +contract_0284,acct_0084,parent_0084,plan_002,5840000,5840000,2025-03-01,2025-12-31,expired,downgrade +contract_0285,acct_0085,parent_0085,plan_002,5850000,5850000,2025-03-01,2025-12-31,cancelled,new +contract_0286,acct_0086,parent_0086,plan_002,5860000,5860000,2025-03-01,2025-12-31,expired,renewal +contract_0287,acct_0087,parent_0087,plan_004,5870000,5870000,2025-03-01,2025-12-31,expired,expansion +contract_0288,acct_0088,parent_0088,plan_002,5880000,5880000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0289,acct_0089,parent_0089,plan_002,5890000,5890000,2025-03-01,2025-12-31,expired,new +contract_0290,acct_0090,parent_0090,plan_002,5900000,5900000,2025-03-01,2025-12-31,expired,renewal +contract_0291,acct_0091,parent_0091,plan_002,5910000,5910000,2025-03-01,2025-12-31,cancelled,expansion +contract_0292,acct_0092,parent_0092,plan_002,5920000,5920000,2025-03-01,2025-12-31,expired,downgrade +contract_0293,acct_0093,parent_0093,plan_002,5930000,5930000,2025-03-01,2025-12-31,expired,new +contract_0294,acct_0094,parent_0094,plan_002,5940000,5940000,2025-03-01,2025-12-31,cancelled,renewal +contract_0295,acct_0095,parent_0095,plan_002,5950000,5950000,2025-03-01,2025-12-31,expired,expansion +contract_0296,acct_0096,parent_0096,plan_002,5960000,5960000,2025-03-01,2025-12-31,expired,downgrade +contract_0297,acct_0097,parent_0097,plan_002,5970000,5970000,2025-03-01,2025-12-31,cancelled,new +contract_0298,acct_0098,parent_0098,plan_002,5980000,5980000,2025-03-01,2025-12-31,expired,renewal +contract_0299,acct_0099,parent_0099,plan_002,5990000,5990000,2025-03-01,2025-12-31,expired,expansion +contract_0300,acct_0100,parent_0100,plan_002,6000000,6000000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0301,acct_0101,parent_0101,plan_002,6010000,6010000,2025-03-01,2025-12-31,expired,new +contract_0302,acct_0102,parent_0102,plan_002,6020000,6020000,2025-03-01,2025-12-31,expired,renewal +contract_0303,acct_0103,parent_0103,plan_002,6030000,6030000,2025-03-01,2025-12-31,cancelled,expansion +contract_0304,acct_0104,parent_0104,plan_002,6040000,6040000,2025-03-01,2025-12-31,expired,downgrade +contract_0305,acct_0105,parent_0105,plan_002,6050000,6050000,2025-03-01,2025-12-31,expired,new +contract_0306,acct_0106,parent_0106,plan_002,6060000,6060000,2025-03-01,2025-12-31,cancelled,renewal +contract_0307,acct_0107,parent_0107,plan_002,6070000,6070000,2025-03-01,2025-12-31,expired,expansion +contract_0308,acct_0108,parent_0108,plan_002,6080000,6080000,2025-03-01,2025-12-31,expired,downgrade +contract_0309,acct_0109,parent_0109,plan_002,6090000,6090000,2025-03-01,2025-12-31,cancelled,new +contract_0310,acct_0110,parent_0110,plan_002,6100000,6100000,2025-03-01,2025-12-31,expired,renewal +contract_0311,acct_0111,parent_0111,plan_002,6110000,6110000,2025-03-01,2025-12-31,expired,expansion +contract_0312,acct_0112,parent_0112,plan_002,6120000,6120000,2025-03-01,2025-12-31,cancelled,downgrade +contract_0313,acct_0113,parent_0113,plan_002,6130000,6130000,2025-03-01,2025-12-31,expired,new +contract_0314,acct_0114,parent_0114,plan_002,6140000,6140000,2025-03-01,2025-12-31,expired,renewal +contract_0315,acct_0115,parent_0115,plan_002,6150000,6150000,2025-03-01,2025-12-31,cancelled,expansion +contract_0316,acct_0116,parent_0116,plan_002,6160000,6160000,2025-03-01,2025-12-31,expired,downgrade +contract_0317,acct_0117,parent_0117,plan_002,6170000,6170000,2025-03-01,2025-12-31,expired,new +contract_0318,acct_0118,parent_0118,plan_002,6180000,6180000,2025-03-01,2025-12-31,cancelled,renewal +contract_0319,acct_0119,parent_0119,plan_002,6190000,6190000,2025-03-01,2025-12-31,expired,expansion +contract_0320,acct_0120,parent_0120,plan_002,6200000,6200000,2025-03-01,2025-12-31,expired,downgrade diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/invoices.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/invoices.csv new file mode 100644 index 00000000..cd0e91ae --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/invoices.csv @@ -0,0 +1,3001 @@ +invoice_id,account_id,subscription_id,invoice_date,paid_at,status,currency +inv_000001,acct_0001,sub_0001,2026-02-01,2026-02-01T12:00:00Z,paid,USD +inv_000002,acct_0002,sub_0002,2026-02-02,2026-02-02T12:00:00Z,paid,USD +inv_000003,acct_0003,sub_0003,2026-02-03,2026-02-03T12:00:00Z,paid,USD +inv_000004,acct_0004,sub_0004,2026-02-04,2026-02-04T12:00:00Z,paid,USD +inv_000005,acct_0005,sub_0005,2026-02-05,2026-02-05T12:00:00Z,paid,USD +inv_000006,acct_0006,sub_0006,2026-02-06,2026-02-06T12:00:00Z,paid,USD +inv_000007,acct_0007,sub_0007,2026-02-07,2026-02-07T12:00:00Z,paid,USD +inv_000008,acct_0008,sub_0008,2026-02-08,2026-02-08T12:00:00Z,paid,USD +inv_000009,acct_0009,sub_0009,2026-02-09,2026-02-09T12:00:00Z,paid,USD +inv_000010,acct_0010,sub_0010,2026-02-10,2026-02-10T12:00:00Z,paid,USD +inv_000011,acct_0011,sub_0011,2026-02-11,2026-02-11T12:00:00Z,paid,USD +inv_000012,acct_0012,sub_0012,2026-02-12,2026-02-12T12:00:00Z,paid,USD +inv_000013,acct_0013,sub_0013,2026-02-13,2026-02-13T12:00:00Z,paid,USD +inv_000014,acct_0014,sub_0014,2026-02-14,2026-02-14T12:00:00Z,paid,USD +inv_000015,acct_0015,sub_0015,2026-02-15,2026-02-15T12:00:00Z,paid,USD +inv_000016,acct_0016,sub_0016,2026-02-16,2026-02-16T12:00:00Z,paid,USD +inv_000017,acct_0017,sub_0017,2026-02-17,2026-02-17T12:00:00Z,paid,USD +inv_000018,acct_0018,sub_0018,2026-02-18,2026-02-18T12:00:00Z,paid,USD +inv_000019,acct_0019,sub_0019,2026-02-19,2026-02-19T12:00:00Z,paid,USD +inv_000020,acct_0020,sub_0020,2026-02-20,2026-02-20T12:00:00Z,paid,USD +inv_000021,acct_0021,sub_0021,2026-02-21,2026-02-21T12:00:00Z,paid,USD +inv_000022,acct_0022,sub_0022,2026-02-22,2026-02-22T12:00:00Z,paid,USD +inv_000023,acct_0023,sub_0023,2026-02-23,2026-02-23T12:00:00Z,paid,USD +inv_000024,acct_0024,sub_0024,2026-02-24,2026-02-24T12:00:00Z,paid,USD +inv_000025,acct_0025,sub_0025,2026-02-25,2026-02-25T12:00:00Z,paid,USD +inv_000026,acct_0026,sub_0026,2026-02-26,2026-02-26T12:00:00Z,paid,USD +inv_000027,acct_0027,sub_0027,2026-02-27,2026-02-27T12:00:00Z,paid,USD +inv_000028,acct_0028,sub_0028,2026-02-28,2026-02-28T12:00:00Z,paid,USD +inv_000029,acct_0029,sub_0029,2026-02-01,2026-02-01T12:00:00Z,paid,USD +inv_000030,acct_0030,sub_0030,2026-02-02,2026-02-02T12:00:00Z,paid,USD +inv_000031,acct_0031,sub_0031,2026-02-03,2026-02-03T12:00:00Z,paid,USD +inv_000032,acct_0032,sub_0032,2026-02-04,2026-02-04T12:00:00Z,paid,USD +inv_000033,acct_0033,sub_0033,2026-02-05,2026-02-05T12:00:00Z,paid,USD +inv_000034,acct_0034,sub_0034,2026-02-06,2026-02-06T12:00:00Z,paid,USD +inv_000035,acct_0035,sub_0035,2026-02-07,2026-02-07T12:00:00Z,paid,USD +inv_000036,acct_0036,sub_0036,2026-02-08,2026-02-08T12:00:00Z,paid,USD +inv_000037,acct_0037,sub_0037,2026-02-09,2026-02-09T12:00:00Z,paid,USD +inv_000038,acct_0038,sub_0038,2026-02-10,2026-02-10T12:00:00Z,paid,USD +inv_000039,acct_0039,sub_0039,2026-02-11,2026-02-11T12:00:00Z,paid,USD +inv_000040,acct_0040,sub_0040,2026-02-12,2026-02-12T12:00:00Z,paid,USD +inv_000041,acct_0041,sub_0041,2026-02-13,2026-02-13T12:00:00Z,paid,USD +inv_000042,acct_0042,sub_0042,2026-02-14,2026-02-14T12:00:00Z,paid,USD +inv_000043,acct_0043,sub_0043,2026-02-15,2026-02-15T12:00:00Z,paid,USD +inv_000044,acct_0044,sub_0044,2026-02-16,2026-02-16T12:00:00Z,paid,USD +inv_000045,acct_0045,sub_0045,2026-02-17,2026-02-17T12:00:00Z,paid,USD +inv_000046,acct_0046,sub_0046,2026-02-18,2026-02-18T12:00:00Z,paid,USD +inv_000047,acct_0047,sub_0047,2026-02-19,2026-02-19T12:00:00Z,paid,USD +inv_000048,acct_0048,sub_0048,2026-02-20,2026-02-20T12:00:00Z,paid,USD +inv_000049,acct_0049,sub_0049,2026-02-21,2026-02-21T12:00:00Z,paid,USD +inv_000050,acct_0050,sub_0050,2026-02-22,2026-02-22T12:00:00Z,paid,USD +inv_000051,acct_0051,sub_0051,2026-02-23,2026-02-23T12:00:00Z,paid,USD +inv_000052,acct_0052,sub_0052,2026-02-24,2026-02-24T12:00:00Z,paid,USD +inv_000053,acct_0053,sub_0053,2026-02-25,2026-02-25T12:00:00Z,paid,USD +inv_000054,acct_0054,sub_0054,2026-02-26,2026-02-26T12:00:00Z,paid,USD +inv_000055,acct_0055,sub_0055,2026-02-27,2026-02-27T12:00:00Z,paid,USD +inv_000056,acct_0056,sub_0056,2026-02-28,2026-02-28T12:00:00Z,paid,USD +inv_000057,acct_0057,sub_0057,2026-02-01,2026-02-01T12:00:00Z,paid,USD +inv_000058,acct_0058,sub_0058,2026-02-02,2026-02-02T12:00:00Z,paid,USD +inv_000059,acct_0059,sub_0059,2026-02-03,2026-02-03T12:00:00Z,paid,USD +inv_000060,acct_0060,sub_0060,2026-02-04,2026-02-04T12:00:00Z,paid,USD +inv_000061,acct_0061,sub_0061,2026-02-05,2026-02-05T12:00:00Z,paid,USD +inv_000062,acct_0062,sub_0062,2026-02-06,2026-02-06T12:00:00Z,paid,USD +inv_000063,acct_0063,sub_0063,2026-02-07,2026-02-07T12:00:00Z,paid,USD +inv_000064,acct_0064,sub_0064,2026-02-08,2026-02-08T12:00:00Z,paid,USD +inv_000065,acct_0065,sub_0065,2026-02-09,2026-02-09T12:00:00Z,paid,USD +inv_000066,acct_0066,sub_0066,2026-02-10,2026-02-10T12:00:00Z,paid,USD +inv_000067,acct_0067,sub_0067,2026-02-11,2026-02-11T12:00:00Z,paid,USD +inv_000068,acct_0068,sub_0068,2026-02-12,2026-02-12T12:00:00Z,paid,USD +inv_000069,acct_0069,sub_0069,2026-02-13,2026-02-13T12:00:00Z,paid,USD +inv_000070,acct_0070,sub_0070,2026-02-14,2026-02-14T12:00:00Z,paid,USD +inv_000071,acct_0071,sub_0071,2026-02-15,2026-02-15T12:00:00Z,paid,USD +inv_000072,acct_0072,sub_0072,2026-02-16,2026-02-16T12:00:00Z,paid,USD +inv_000073,acct_0073,sub_0073,2026-02-17,2026-02-17T12:00:00Z,paid,USD +inv_000074,acct_0074,sub_0074,2026-02-18,2026-02-18T12:00:00Z,paid,USD +inv_000075,acct_0075,sub_0075,2026-02-19,2026-02-19T12:00:00Z,paid,USD +inv_000076,acct_0076,sub_0076,2026-02-20,2026-02-20T12:00:00Z,paid,USD +inv_000077,acct_0077,sub_0077,2026-02-21,2026-02-21T12:00:00Z,paid,USD +inv_000078,acct_0078,sub_0078,2026-02-22,2026-02-22T12:00:00Z,paid,USD +inv_000079,acct_0079,sub_0079,2026-02-23,2026-02-23T12:00:00Z,paid,USD +inv_000080,acct_0080,sub_0080,2026-02-24,2026-02-24T12:00:00Z,paid,USD +inv_000081,acct_0081,sub_0081,2026-02-25,2026-02-25T12:00:00Z,paid,USD +inv_000082,acct_0082,sub_0082,2026-02-26,2026-02-26T12:00:00Z,paid,USD +inv_000083,acct_0083,sub_0083,2026-02-27,2026-02-27T12:00:00Z,paid,USD +inv_000084,acct_0084,sub_0084,2026-02-28,2026-02-28T12:00:00Z,paid,USD +inv_000085,acct_0085,sub_0085,2026-02-01,2026-02-01T12:00:00Z,paid,USD +inv_000086,acct_0086,sub_0086,2026-02-02,2026-02-02T12:00:00Z,paid,USD +inv_000087,acct_0087,sub_0087,2026-02-03,2026-02-03T12:00:00Z,paid,USD +inv_000088,acct_0088,sub_0088,2026-02-04,2026-02-04T12:00:00Z,paid,USD +inv_000089,acct_0089,sub_0089,2026-02-05,2026-02-05T12:00:00Z,paid,USD +inv_000090,acct_0090,sub_0090,2026-02-06,2026-02-06T12:00:00Z,paid,USD +inv_000091,acct_0091,sub_0091,2026-02-07,2026-02-07T12:00:00Z,paid,USD +inv_000092,acct_0092,sub_0092,2026-02-08,2026-02-08T12:00:00Z,paid,USD +inv_000093,acct_0093,sub_0093,2026-02-09,2026-02-09T12:00:00Z,paid,USD +inv_000094,acct_0094,sub_0094,2026-02-10,2026-02-10T12:00:00Z,paid,USD +inv_000095,acct_0095,sub_0095,2026-02-11,2026-02-11T12:00:00Z,paid,USD +inv_000096,acct_0096,sub_0096,2026-02-12,2026-02-12T12:00:00Z,paid,USD +inv_000097,acct_0097,sub_0097,2026-02-13,2026-02-13T12:00:00Z,paid,USD +inv_000098,acct_0098,sub_0098,2026-02-14,2026-02-14T12:00:00Z,paid,USD +inv_000099,acct_0099,sub_0099,2026-02-15,2026-02-15T12:00:00Z,paid,USD +inv_000100,acct_0100,sub_0100,2026-02-16,2026-02-16T12:00:00Z,paid,USD +inv_000101,acct_0101,sub_0101,2026-02-17,2026-02-17T12:00:00Z,paid,USD +inv_000102,acct_0102,sub_0102,2026-02-18,2026-02-18T12:00:00Z,paid,USD +inv_000103,acct_0103,sub_0103,2026-02-19,2026-02-19T12:00:00Z,paid,USD +inv_000104,acct_0104,sub_0104,2026-02-20,2026-02-20T12:00:00Z,paid,USD +inv_000105,acct_0105,sub_0105,2026-02-21,2026-02-21T12:00:00Z,paid,USD +inv_000106,acct_0106,sub_0106,2026-02-22,2026-02-22T12:00:00Z,paid,USD +inv_000107,acct_0107,sub_0107,2026-02-23,2026-02-23T12:00:00Z,paid,USD +inv_000108,acct_0108,sub_0108,2026-02-24,2026-02-24T12:00:00Z,paid,USD +inv_000109,acct_0109,sub_0109,2026-02-25,2026-02-25T12:00:00Z,paid,USD +inv_000110,acct_0110,sub_0110,2026-02-26,2026-02-26T12:00:00Z,paid,USD +inv_000111,acct_0111,sub_0111,2026-02-27,2026-02-27T12:00:00Z,paid,USD +inv_000112,acct_0112,sub_0112,2026-02-28,2026-02-28T12:00:00Z,paid,USD +inv_000113,acct_0113,sub_0113,2026-02-01,2026-02-01T12:00:00Z,paid,USD +inv_000114,acct_0114,sub_0114,2026-02-02,2026-02-02T12:00:00Z,paid,USD +inv_000115,acct_0115,sub_0115,2026-02-03,2026-02-03T12:00:00Z,paid,USD +inv_000116,acct_0116,sub_0116,2026-02-04,2026-02-04T12:00:00Z,paid,USD +inv_000117,acct_0117,sub_0117,2026-02-05,2026-02-05T12:00:00Z,paid,USD +inv_000118,acct_0118,sub_0118,2026-02-06,2026-02-06T12:00:00Z,paid,USD +inv_000119,acct_0119,sub_0119,2026-02-07,2026-02-07T12:00:00Z,paid,USD +inv_000120,acct_0120,sub_0120,2026-02-08,2026-02-08T12:00:00Z,paid,USD +inv_000121,acct_0121,sub_0121,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_000122,acct_0122,sub_0122,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_000123,acct_0123,sub_0123,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_000124,acct_0124,sub_0124,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_000125,acct_0125,sub_0125,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_000126,acct_0126,sub_0126,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_000127,acct_0127,sub_0127,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_000128,acct_0128,sub_0128,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_000129,acct_0129,sub_0129,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_000130,acct_0130,sub_0130,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_000131,acct_0131,sub_0131,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_000132,acct_0132,sub_0132,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_000133,acct_0133,sub_0133,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_000134,acct_0134,sub_0134,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_000135,acct_0135,sub_0135,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_000136,acct_0136,sub_0136,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_000137,acct_0137,sub_0137,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_000138,acct_0138,sub_0138,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_000139,acct_0139,sub_0139,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_000140,acct_0140,sub_0140,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_000141,acct_0141,sub_0141,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_000142,acct_0142,sub_0142,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_000143,acct_0143,sub_0143,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_000144,acct_0144,sub_0144,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_000145,acct_0145,sub_0145,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_000146,acct_0146,sub_0146,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_000147,acct_0147,sub_0147,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_000148,acct_0148,sub_0148,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_000149,acct_0149,sub_0149,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_000150,acct_0150,sub_0150,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_000151,acct_0151,sub_0151,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_000152,acct_0152,sub_0152,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_000153,acct_0153,sub_0153,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_000154,acct_0154,sub_0154,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_000155,acct_0155,sub_0155,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_000156,acct_0156,sub_0156,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_000157,acct_0157,sub_0157,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_000158,acct_0158,sub_0158,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_000159,acct_0159,sub_0159,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_000160,acct_0160,sub_0160,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_000161,acct_0161,sub_0161,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_000162,acct_0162,sub_0162,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_000163,acct_0163,sub_0163,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_000164,acct_0164,sub_0164,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_000165,acct_0165,sub_0165,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_000166,acct_0166,sub_0166,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_000167,acct_0167,sub_0167,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_000168,acct_0168,sub_0168,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_000169,acct_0169,sub_0169,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_000170,acct_0170,sub_0170,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_000171,acct_0171,sub_0171,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_000172,acct_0172,sub_0172,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_000173,acct_0173,sub_0173,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_000174,acct_0174,sub_0174,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_000175,acct_0175,sub_0175,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_000176,acct_0176,sub_0176,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_000177,acct_0177,sub_0177,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_000178,acct_0178,sub_0178,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_000179,acct_0179,sub_0179,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_000180,acct_0180,sub_0180,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_000181,acct_0181,sub_0181,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_000182,acct_0182,sub_0182,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_000183,acct_0183,sub_0183,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_000184,acct_0184,sub_0184,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_000185,acct_0185,sub_0185,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_000186,acct_0186,sub_0186,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_000187,acct_0187,sub_0187,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_000188,acct_0188,sub_0188,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_000189,acct_0189,sub_0189,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_000190,acct_0190,sub_0190,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_000191,acct_0191,sub_0191,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_000192,acct_0192,sub_0192,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_000193,acct_0193,sub_0193,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_000194,acct_0194,sub_0194,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_000195,acct_0195,sub_0195,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_000196,acct_0196,sub_0196,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_000197,acct_0197,sub_0197,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_000198,acct_0198,sub_0198,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_000199,acct_0001,sub_0199,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_000200,acct_0002,sub_0200,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_000201,acct_0003,sub_0201,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_000202,acct_0004,sub_0202,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_000203,acct_0005,sub_0203,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_000204,acct_0006,sub_0204,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_000205,acct_0007,sub_0205,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_000206,acct_0008,sub_0206,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_000207,acct_0009,sub_0207,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_000208,acct_0010,sub_0208,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_000209,acct_0011,sub_0209,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_000210,acct_0012,sub_0210,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_000211,acct_0013,sub_0211,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_000212,acct_0014,sub_0212,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_000213,acct_0015,sub_0213,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_000214,acct_0016,sub_0214,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_000215,acct_0017,sub_0215,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_000216,acct_0018,sub_0216,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_000217,acct_0019,sub_0217,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_000218,acct_0020,sub_0218,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_000219,acct_0021,sub_0219,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_000220,acct_0022,sub_0220,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_000221,acct_0023,sub_0221,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_000222,acct_0024,sub_0222,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_000223,acct_0025,sub_0223,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_000224,acct_0026,sub_0224,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_000225,acct_0027,sub_0225,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_000226,acct_0028,sub_0226,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_000227,acct_0029,sub_0227,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_000228,acct_0030,sub_0228,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_000229,acct_0031,sub_0229,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_000230,acct_0032,sub_0230,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_000231,acct_0033,sub_0231,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_000232,acct_0034,sub_0232,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_000233,acct_0035,sub_0233,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_000234,acct_0036,sub_0234,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_000235,acct_0037,sub_0235,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_000236,acct_0038,sub_0236,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_000237,acct_0039,sub_0237,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_000238,acct_0040,sub_0238,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_000239,acct_0041,sub_0239,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_000240,acct_0042,sub_0240,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_000241,acct_0043,sub_0241,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_000242,acct_0044,sub_0242,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_000243,acct_0045,sub_0243,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_000244,acct_0046,sub_0244,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_000245,acct_0047,sub_0245,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_000246,acct_0048,sub_0246,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_000247,acct_0049,sub_0247,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_000248,acct_0050,sub_0248,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_000249,acct_0051,sub_0249,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_000250,acct_0052,sub_0250,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_000251,acct_0053,sub_0251,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_000252,acct_0054,sub_0252,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_000253,acct_0055,sub_0253,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_000254,acct_0056,sub_0254,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_000255,acct_0057,sub_0255,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_000256,acct_0058,sub_0256,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_000257,acct_0059,sub_0257,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_000258,acct_0060,sub_0258,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_000259,acct_0061,sub_0259,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_000260,acct_0062,sub_0260,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_000261,acct_0063,sub_0261,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_000262,acct_0064,sub_0262,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_000263,acct_0065,sub_0263,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_000264,acct_0066,sub_0264,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_000265,acct_0067,sub_0265,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_000266,acct_0068,sub_0266,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_000267,acct_0069,sub_0267,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_000268,acct_0070,sub_0268,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_000269,acct_0071,sub_0269,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_000270,acct_0072,sub_0270,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_000271,acct_0073,sub_0271,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_000272,acct_0074,sub_0272,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_000273,acct_0075,sub_0273,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_000274,acct_0076,sub_0274,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_000275,acct_0077,sub_0275,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_000276,acct_0078,sub_0276,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_000277,acct_0079,sub_0277,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_000278,acct_0080,sub_0278,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_000279,acct_0081,sub_0279,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_000280,acct_0082,sub_0280,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_000281,acct_0083,sub_0281,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_000282,acct_0084,sub_0282,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_000283,acct_0085,sub_0283,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_000284,acct_0086,sub_0284,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_000285,acct_0087,sub_0285,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_000286,acct_0088,sub_0286,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_000287,acct_0089,sub_0287,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_000288,acct_0090,sub_0288,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_000289,acct_0091,sub_0289,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_000290,acct_0092,sub_0290,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_000291,acct_0093,sub_0291,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_000292,acct_0094,sub_0292,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_000293,acct_0095,sub_0293,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_000294,acct_0096,sub_0294,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_000295,acct_0097,sub_0295,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_000296,acct_0098,sub_0296,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_000297,acct_0099,sub_0297,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_000298,acct_0100,sub_0298,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_000299,acct_0101,sub_0299,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_000300,acct_0102,sub_0300,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_000301,acct_0103,sub_0301,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_000302,acct_0104,sub_0302,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_000303,acct_0105,sub_0303,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_000304,acct_0106,sub_0304,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_000305,acct_0107,sub_0305,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_000306,acct_0108,sub_0306,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_000307,acct_0109,sub_0307,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_000308,acct_0110,sub_0308,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_000309,acct_0111,sub_0309,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_000310,acct_0112,sub_0310,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_000311,acct_0113,sub_0311,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_000312,acct_0114,sub_0312,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_000313,acct_0115,sub_0313,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_000314,acct_0116,sub_0314,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_000315,acct_0117,sub_0315,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_000316,acct_0118,sub_0316,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_000317,acct_0119,sub_0317,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_000318,acct_0120,sub_0318,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_000319,acct_0121,sub_0319,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_000320,acct_0122,sub_0320,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_000321,acct_0123,sub_0321,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_000322,acct_0124,sub_0322,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_000323,acct_0125,sub_0323,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_000324,acct_0126,sub_0324,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_000325,acct_0127,sub_0325,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_000326,acct_0128,sub_0326,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_000327,acct_0129,sub_0327,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_000328,acct_0130,sub_0328,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_000329,acct_0131,sub_0329,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_000330,acct_0132,sub_0330,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_000331,acct_0133,sub_0331,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_000332,acct_0134,sub_0332,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_000333,acct_0135,sub_0333,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_000334,acct_0136,sub_0334,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_000335,acct_0137,sub_0335,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_000336,acct_0138,sub_0336,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_000337,acct_0139,sub_0337,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_000338,acct_0140,sub_0338,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_000339,acct_0141,sub_0339,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_000340,acct_0142,sub_0340,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_000341,acct_0143,sub_0341,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_000342,acct_0144,sub_0342,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_000343,acct_0145,sub_0343,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_000344,acct_0146,sub_0344,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_000345,acct_0147,sub_0345,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_000346,acct_0148,sub_0346,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_000347,acct_0149,sub_0347,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_000348,acct_0150,sub_0348,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_000349,acct_0151,sub_0349,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_000350,acct_0152,sub_0350,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_000351,acct_0153,sub_0351,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_000352,acct_0154,sub_0352,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_000353,acct_0155,sub_0353,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_000354,acct_0156,sub_0354,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_000355,acct_0157,sub_0355,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_000356,acct_0158,sub_0356,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_000357,acct_0159,sub_0357,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_000358,acct_0160,sub_0358,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_000359,acct_0161,sub_0359,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_000360,acct_0162,sub_0360,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_000361,acct_0163,sub_0001,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_000362,acct_0164,sub_0002,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_000363,acct_0165,sub_0003,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_000364,acct_0166,sub_0004,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_000365,acct_0167,sub_0005,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_000366,acct_0168,sub_0006,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_000367,acct_0169,sub_0007,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_000368,acct_0170,sub_0008,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_000369,acct_0171,sub_0009,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_000370,acct_0172,sub_0010,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_000371,acct_0173,sub_0011,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_000372,acct_0174,sub_0012,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_000373,acct_0175,sub_0013,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_000374,acct_0176,sub_0014,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_000375,acct_0177,sub_0015,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_000376,acct_0178,sub_0016,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_000377,acct_0179,sub_0017,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_000378,acct_0180,sub_0018,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_000379,acct_0181,sub_0019,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_000380,acct_0182,sub_0020,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_000381,acct_0183,sub_0021,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_000382,acct_0184,sub_0022,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_000383,acct_0185,sub_0023,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_000384,acct_0186,sub_0024,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_000385,acct_0187,sub_0025,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_000386,acct_0188,sub_0026,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_000387,acct_0189,sub_0027,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_000388,acct_0190,sub_0028,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_000389,acct_0191,sub_0029,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_000390,acct_0192,sub_0030,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_000391,acct_0193,sub_0031,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_000392,acct_0194,sub_0032,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_000393,acct_0195,sub_0033,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_000394,acct_0196,sub_0034,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_000395,acct_0197,sub_0035,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_000396,acct_0198,sub_0036,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_000397,acct_0001,sub_0037,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_000398,acct_0002,sub_0038,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_000399,acct_0003,sub_0039,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_000400,acct_0004,sub_0040,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_000401,acct_0005,sub_0041,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_000402,acct_0006,sub_0042,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_000403,acct_0007,sub_0043,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_000404,acct_0008,sub_0044,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_000405,acct_0009,sub_0045,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_000406,acct_0010,sub_0046,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_000407,acct_0011,sub_0047,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_000408,acct_0012,sub_0048,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_000409,acct_0013,sub_0049,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_000410,acct_0014,sub_0050,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_000411,acct_0015,sub_0051,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_000412,acct_0016,sub_0052,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_000413,acct_0017,sub_0053,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_000414,acct_0018,sub_0054,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_000415,acct_0019,sub_0055,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_000416,acct_0020,sub_0056,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_000417,acct_0021,sub_0057,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_000418,acct_0022,sub_0058,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_000419,acct_0023,sub_0059,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_000420,acct_0024,sub_0060,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_000421,acct_0025,sub_0061,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_000422,acct_0026,sub_0062,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_000423,acct_0027,sub_0063,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_000424,acct_0028,sub_0064,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_000425,acct_0029,sub_0065,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_000426,acct_0030,sub_0066,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_000427,acct_0031,sub_0067,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_000428,acct_0032,sub_0068,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_000429,acct_0033,sub_0069,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_000430,acct_0034,sub_0070,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_000431,acct_0035,sub_0071,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_000432,acct_0036,sub_0072,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_000433,acct_0037,sub_0073,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_000434,acct_0038,sub_0074,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_000435,acct_0039,sub_0075,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_000436,acct_0040,sub_0076,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_000437,acct_0041,sub_0077,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_000438,acct_0042,sub_0078,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_000439,acct_0043,sub_0079,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_000440,acct_0044,sub_0080,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_000441,acct_0045,sub_0081,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_000442,acct_0046,sub_0082,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_000443,acct_0047,sub_0083,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_000444,acct_0048,sub_0084,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_000445,acct_0049,sub_0085,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_000446,acct_0050,sub_0086,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_000447,acct_0051,sub_0087,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_000448,acct_0052,sub_0088,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_000449,acct_0053,sub_0089,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_000450,acct_0054,sub_0090,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_000451,acct_0055,sub_0091,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_000452,acct_0056,sub_0092,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_000453,acct_0057,sub_0093,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_000454,acct_0058,sub_0094,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_000455,acct_0059,sub_0095,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_000456,acct_0060,sub_0096,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_000457,acct_0061,sub_0097,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_000458,acct_0062,sub_0098,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_000459,acct_0063,sub_0099,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_000460,acct_0064,sub_0100,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_000461,acct_0065,sub_0101,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_000462,acct_0066,sub_0102,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_000463,acct_0067,sub_0103,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_000464,acct_0068,sub_0104,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_000465,acct_0069,sub_0105,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_000466,acct_0070,sub_0106,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_000467,acct_0071,sub_0107,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_000468,acct_0072,sub_0108,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_000469,acct_0073,sub_0109,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_000470,acct_0074,sub_0110,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_000471,acct_0075,sub_0111,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_000472,acct_0076,sub_0112,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_000473,acct_0077,sub_0113,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_000474,acct_0078,sub_0114,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_000475,acct_0079,sub_0115,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_000476,acct_0080,sub_0116,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_000477,acct_0081,sub_0117,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_000478,acct_0082,sub_0118,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_000479,acct_0083,sub_0119,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_000480,acct_0084,sub_0120,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_000481,acct_0085,sub_0121,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_000482,acct_0086,sub_0122,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_000483,acct_0087,sub_0123,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_000484,acct_0088,sub_0124,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_000485,acct_0089,sub_0125,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_000486,acct_0090,sub_0126,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_000487,acct_0091,sub_0127,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_000488,acct_0092,sub_0128,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_000489,acct_0093,sub_0129,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_000490,acct_0094,sub_0130,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_000491,acct_0095,sub_0131,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_000492,acct_0096,sub_0132,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_000493,acct_0097,sub_0133,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_000494,acct_0098,sub_0134,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_000495,acct_0099,sub_0135,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_000496,acct_0100,sub_0136,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_000497,acct_0101,sub_0137,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_000498,acct_0102,sub_0138,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_000499,acct_0103,sub_0139,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_000500,acct_0104,sub_0140,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_000501,acct_0105,sub_0141,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_000502,acct_0106,sub_0142,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_000503,acct_0107,sub_0143,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_000504,acct_0108,sub_0144,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_000505,acct_0109,sub_0145,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_000506,acct_0110,sub_0146,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_000507,acct_0111,sub_0147,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_000508,acct_0112,sub_0148,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_000509,acct_0113,sub_0149,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_000510,acct_0114,sub_0150,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_000511,acct_0115,sub_0151,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_000512,acct_0116,sub_0152,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_000513,acct_0117,sub_0153,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_000514,acct_0118,sub_0154,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_000515,acct_0119,sub_0155,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_000516,acct_0120,sub_0156,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_000517,acct_0121,sub_0157,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_000518,acct_0122,sub_0158,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_000519,acct_0123,sub_0159,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_000520,acct_0124,sub_0160,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_000521,acct_0125,sub_0161,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_000522,acct_0126,sub_0162,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_000523,acct_0127,sub_0163,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_000524,acct_0128,sub_0164,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_000525,acct_0129,sub_0165,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_000526,acct_0130,sub_0166,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_000527,acct_0131,sub_0167,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_000528,acct_0132,sub_0168,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_000529,acct_0133,sub_0169,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_000530,acct_0134,sub_0170,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_000531,acct_0135,sub_0171,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_000532,acct_0136,sub_0172,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_000533,acct_0137,sub_0173,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_000534,acct_0138,sub_0174,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_000535,acct_0139,sub_0175,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_000536,acct_0140,sub_0176,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_000537,acct_0141,sub_0177,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_000538,acct_0142,sub_0178,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_000539,acct_0143,sub_0179,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_000540,acct_0144,sub_0180,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_000541,acct_0145,sub_0181,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_000542,acct_0146,sub_0182,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_000543,acct_0147,sub_0183,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_000544,acct_0148,sub_0184,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_000545,acct_0149,sub_0185,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_000546,acct_0150,sub_0186,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_000547,acct_0151,sub_0187,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_000548,acct_0152,sub_0188,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_000549,acct_0153,sub_0189,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_000550,acct_0154,sub_0190,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_000551,acct_0155,sub_0191,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_000552,acct_0156,sub_0192,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_000553,acct_0157,sub_0193,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_000554,acct_0158,sub_0194,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_000555,acct_0159,sub_0195,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_000556,acct_0160,sub_0196,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_000557,acct_0161,sub_0197,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_000558,acct_0162,sub_0198,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_000559,acct_0163,sub_0199,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_000560,acct_0164,sub_0200,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_000561,acct_0165,sub_0201,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_000562,acct_0166,sub_0202,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_000563,acct_0167,sub_0203,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_000564,acct_0168,sub_0204,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_000565,acct_0169,sub_0205,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_000566,acct_0170,sub_0206,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_000567,acct_0171,sub_0207,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_000568,acct_0172,sub_0208,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_000569,acct_0173,sub_0209,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_000570,acct_0174,sub_0210,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_000571,acct_0175,sub_0211,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_000572,acct_0176,sub_0212,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_000573,acct_0177,sub_0213,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_000574,acct_0178,sub_0214,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_000575,acct_0179,sub_0215,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_000576,acct_0180,sub_0216,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_000577,acct_0181,sub_0217,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_000578,acct_0182,sub_0218,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_000579,acct_0183,sub_0219,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_000580,acct_0184,sub_0220,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_000581,acct_0185,sub_0221,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_000582,acct_0186,sub_0222,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_000583,acct_0187,sub_0223,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_000584,acct_0188,sub_0224,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_000585,acct_0189,sub_0225,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_000586,acct_0190,sub_0226,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_000587,acct_0191,sub_0227,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_000588,acct_0192,sub_0228,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_000589,acct_0193,sub_0229,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_000590,acct_0194,sub_0230,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_000591,acct_0195,sub_0231,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_000592,acct_0196,sub_0232,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_000593,acct_0197,sub_0233,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_000594,acct_0198,sub_0234,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_000595,acct_0001,sub_0235,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_000596,acct_0002,sub_0236,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_000597,acct_0003,sub_0237,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_000598,acct_0004,sub_0238,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_000599,acct_0005,sub_0239,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_000600,acct_0006,sub_0240,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_000601,acct_0007,sub_0241,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_000602,acct_0008,sub_0242,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_000603,acct_0009,sub_0243,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_000604,acct_0010,sub_0244,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_000605,acct_0011,sub_0245,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_000606,acct_0012,sub_0246,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_000607,acct_0013,sub_0247,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_000608,acct_0014,sub_0248,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_000609,acct_0015,sub_0249,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_000610,acct_0016,sub_0250,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_000611,acct_0017,sub_0251,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_000612,acct_0018,sub_0252,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_000613,acct_0019,sub_0253,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_000614,acct_0020,sub_0254,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_000615,acct_0021,sub_0255,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_000616,acct_0022,sub_0256,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_000617,acct_0023,sub_0257,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_000618,acct_0024,sub_0258,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_000619,acct_0025,sub_0259,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_000620,acct_0026,sub_0260,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_000621,acct_0027,sub_0261,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_000622,acct_0028,sub_0262,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_000623,acct_0029,sub_0263,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_000624,acct_0030,sub_0264,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_000625,acct_0031,sub_0265,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_000626,acct_0032,sub_0266,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_000627,acct_0033,sub_0267,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_000628,acct_0034,sub_0268,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_000629,acct_0035,sub_0269,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_000630,acct_0036,sub_0270,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_000631,acct_0037,sub_0271,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_000632,acct_0038,sub_0272,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_000633,acct_0039,sub_0273,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_000634,acct_0040,sub_0274,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_000635,acct_0041,sub_0275,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_000636,acct_0042,sub_0276,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_000637,acct_0043,sub_0277,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_000638,acct_0044,sub_0278,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_000639,acct_0045,sub_0279,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_000640,acct_0046,sub_0280,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_000641,acct_0047,sub_0281,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_000642,acct_0048,sub_0282,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_000643,acct_0049,sub_0283,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_000644,acct_0050,sub_0284,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_000645,acct_0051,sub_0285,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_000646,acct_0052,sub_0286,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_000647,acct_0053,sub_0287,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_000648,acct_0054,sub_0288,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_000649,acct_0055,sub_0289,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_000650,acct_0056,sub_0290,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_000651,acct_0057,sub_0291,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_000652,acct_0058,sub_0292,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_000653,acct_0059,sub_0293,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_000654,acct_0060,sub_0294,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_000655,acct_0061,sub_0295,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_000656,acct_0062,sub_0296,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_000657,acct_0063,sub_0297,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_000658,acct_0064,sub_0298,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_000659,acct_0065,sub_0299,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_000660,acct_0066,sub_0300,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_000661,acct_0067,sub_0301,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_000662,acct_0068,sub_0302,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_000663,acct_0069,sub_0303,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_000664,acct_0070,sub_0304,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_000665,acct_0071,sub_0305,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_000666,acct_0072,sub_0306,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_000667,acct_0073,sub_0307,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_000668,acct_0074,sub_0308,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_000669,acct_0075,sub_0309,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_000670,acct_0076,sub_0310,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_000671,acct_0077,sub_0311,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_000672,acct_0078,sub_0312,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_000673,acct_0079,sub_0313,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_000674,acct_0080,sub_0314,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_000675,acct_0081,sub_0315,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_000676,acct_0082,sub_0316,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_000677,acct_0083,sub_0317,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_000678,acct_0084,sub_0318,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_000679,acct_0085,sub_0319,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_000680,acct_0086,sub_0320,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_000681,acct_0087,sub_0321,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_000682,acct_0088,sub_0322,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_000683,acct_0089,sub_0323,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_000684,acct_0090,sub_0324,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_000685,acct_0091,sub_0325,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_000686,acct_0092,sub_0326,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_000687,acct_0093,sub_0327,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_000688,acct_0094,sub_0328,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_000689,acct_0095,sub_0329,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_000690,acct_0096,sub_0330,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_000691,acct_0097,sub_0331,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_000692,acct_0098,sub_0332,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_000693,acct_0099,sub_0333,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_000694,acct_0100,sub_0334,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_000695,acct_0101,sub_0335,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_000696,acct_0102,sub_0336,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_000697,acct_0103,sub_0337,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_000698,acct_0104,sub_0338,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_000699,acct_0105,sub_0339,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_000700,acct_0106,sub_0340,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_000701,acct_0107,sub_0341,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_000702,acct_0108,sub_0342,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_000703,acct_0109,sub_0343,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_000704,acct_0110,sub_0344,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_000705,acct_0111,sub_0345,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_000706,acct_0112,sub_0346,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_000707,acct_0113,sub_0347,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_000708,acct_0114,sub_0348,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_000709,acct_0115,sub_0349,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_000710,acct_0116,sub_0350,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_000711,acct_0117,sub_0351,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_000712,acct_0118,sub_0352,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_000713,acct_0119,sub_0353,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_000714,acct_0120,sub_0354,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_000715,acct_0121,sub_0355,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_000716,acct_0122,sub_0356,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_000717,acct_0123,sub_0357,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_000718,acct_0124,sub_0358,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_000719,acct_0125,sub_0359,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_000720,acct_0126,sub_0360,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_000721,acct_0127,sub_0001,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_000722,acct_0128,sub_0002,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_000723,acct_0129,sub_0003,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_000724,acct_0130,sub_0004,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_000725,acct_0131,sub_0005,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_000726,acct_0132,sub_0006,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_000727,acct_0133,sub_0007,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_000728,acct_0134,sub_0008,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_000729,acct_0135,sub_0009,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_000730,acct_0136,sub_0010,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_000731,acct_0137,sub_0011,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_000732,acct_0138,sub_0012,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_000733,acct_0139,sub_0013,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_000734,acct_0140,sub_0014,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_000735,acct_0141,sub_0015,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_000736,acct_0142,sub_0016,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_000737,acct_0143,sub_0017,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_000738,acct_0144,sub_0018,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_000739,acct_0145,sub_0019,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_000740,acct_0146,sub_0020,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_000741,acct_0147,sub_0021,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_000742,acct_0148,sub_0022,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_000743,acct_0149,sub_0023,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_000744,acct_0150,sub_0024,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_000745,acct_0151,sub_0025,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_000746,acct_0152,sub_0026,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_000747,acct_0153,sub_0027,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_000748,acct_0154,sub_0028,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_000749,acct_0155,sub_0029,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_000750,acct_0156,sub_0030,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_000751,acct_0157,sub_0031,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_000752,acct_0158,sub_0032,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_000753,acct_0159,sub_0033,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_000754,acct_0160,sub_0034,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_000755,acct_0161,sub_0035,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_000756,acct_0162,sub_0036,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_000757,acct_0163,sub_0037,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_000758,acct_0164,sub_0038,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_000759,acct_0165,sub_0039,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_000760,acct_0166,sub_0040,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_000761,acct_0167,sub_0041,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_000762,acct_0168,sub_0042,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_000763,acct_0169,sub_0043,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_000764,acct_0170,sub_0044,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_000765,acct_0171,sub_0045,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_000766,acct_0172,sub_0046,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_000767,acct_0173,sub_0047,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_000768,acct_0174,sub_0048,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_000769,acct_0175,sub_0049,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_000770,acct_0176,sub_0050,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_000771,acct_0177,sub_0051,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_000772,acct_0178,sub_0052,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_000773,acct_0179,sub_0053,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_000774,acct_0180,sub_0054,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_000775,acct_0181,sub_0055,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_000776,acct_0182,sub_0056,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_000777,acct_0183,sub_0057,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_000778,acct_0184,sub_0058,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_000779,acct_0185,sub_0059,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_000780,acct_0186,sub_0060,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_000781,acct_0187,sub_0061,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_000782,acct_0188,sub_0062,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_000783,acct_0189,sub_0063,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_000784,acct_0190,sub_0064,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_000785,acct_0191,sub_0065,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_000786,acct_0192,sub_0066,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_000787,acct_0193,sub_0067,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_000788,acct_0194,sub_0068,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_000789,acct_0195,sub_0069,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_000790,acct_0196,sub_0070,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_000791,acct_0197,sub_0071,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_000792,acct_0198,sub_0072,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_000793,acct_0001,sub_0073,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_000794,acct_0002,sub_0074,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_000795,acct_0003,sub_0075,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_000796,acct_0004,sub_0076,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_000797,acct_0005,sub_0077,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_000798,acct_0006,sub_0078,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_000799,acct_0007,sub_0079,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_000800,acct_0008,sub_0080,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_000801,acct_0009,sub_0081,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_000802,acct_0010,sub_0082,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_000803,acct_0011,sub_0083,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_000804,acct_0012,sub_0084,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_000805,acct_0013,sub_0085,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_000806,acct_0014,sub_0086,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_000807,acct_0015,sub_0087,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_000808,acct_0016,sub_0088,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_000809,acct_0017,sub_0089,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_000810,acct_0018,sub_0090,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_000811,acct_0019,sub_0091,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_000812,acct_0020,sub_0092,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_000813,acct_0021,sub_0093,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_000814,acct_0022,sub_0094,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_000815,acct_0023,sub_0095,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_000816,acct_0024,sub_0096,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_000817,acct_0025,sub_0097,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_000818,acct_0026,sub_0098,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_000819,acct_0027,sub_0099,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_000820,acct_0028,sub_0100,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_000821,acct_0029,sub_0101,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_000822,acct_0030,sub_0102,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_000823,acct_0031,sub_0103,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_000824,acct_0032,sub_0104,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_000825,acct_0033,sub_0105,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_000826,acct_0034,sub_0106,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_000827,acct_0035,sub_0107,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_000828,acct_0036,sub_0108,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_000829,acct_0037,sub_0109,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_000830,acct_0038,sub_0110,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_000831,acct_0039,sub_0111,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_000832,acct_0040,sub_0112,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_000833,acct_0041,sub_0113,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_000834,acct_0042,sub_0114,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_000835,acct_0043,sub_0115,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_000836,acct_0044,sub_0116,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_000837,acct_0045,sub_0117,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_000838,acct_0046,sub_0118,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_000839,acct_0047,sub_0119,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_000840,acct_0048,sub_0120,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_000841,acct_0049,sub_0121,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_000842,acct_0050,sub_0122,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_000843,acct_0051,sub_0123,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_000844,acct_0052,sub_0124,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_000845,acct_0053,sub_0125,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_000846,acct_0054,sub_0126,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_000847,acct_0055,sub_0127,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_000848,acct_0056,sub_0128,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_000849,acct_0057,sub_0129,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_000850,acct_0058,sub_0130,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_000851,acct_0059,sub_0131,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_000852,acct_0060,sub_0132,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_000853,acct_0061,sub_0133,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_000854,acct_0062,sub_0134,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_000855,acct_0063,sub_0135,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_000856,acct_0064,sub_0136,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_000857,acct_0065,sub_0137,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_000858,acct_0066,sub_0138,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_000859,acct_0067,sub_0139,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_000860,acct_0068,sub_0140,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_000861,acct_0069,sub_0141,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_000862,acct_0070,sub_0142,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_000863,acct_0071,sub_0143,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_000864,acct_0072,sub_0144,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_000865,acct_0073,sub_0145,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_000866,acct_0074,sub_0146,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_000867,acct_0075,sub_0147,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_000868,acct_0076,sub_0148,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_000869,acct_0077,sub_0149,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_000870,acct_0078,sub_0150,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_000871,acct_0079,sub_0151,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_000872,acct_0080,sub_0152,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_000873,acct_0081,sub_0153,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_000874,acct_0082,sub_0154,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_000875,acct_0083,sub_0155,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_000876,acct_0084,sub_0156,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_000877,acct_0085,sub_0157,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_000878,acct_0086,sub_0158,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_000879,acct_0087,sub_0159,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_000880,acct_0088,sub_0160,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_000881,acct_0089,sub_0161,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_000882,acct_0090,sub_0162,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_000883,acct_0091,sub_0163,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_000884,acct_0092,sub_0164,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_000885,acct_0093,sub_0165,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_000886,acct_0094,sub_0166,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_000887,acct_0095,sub_0167,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_000888,acct_0096,sub_0168,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_000889,acct_0097,sub_0169,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_000890,acct_0098,sub_0170,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_000891,acct_0099,sub_0171,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_000892,acct_0100,sub_0172,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_000893,acct_0101,sub_0173,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_000894,acct_0102,sub_0174,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_000895,acct_0103,sub_0175,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_000896,acct_0104,sub_0176,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_000897,acct_0105,sub_0177,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_000898,acct_0106,sub_0178,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_000899,acct_0107,sub_0179,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_000900,acct_0108,sub_0180,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_000901,acct_0109,sub_0181,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_000902,acct_0110,sub_0182,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_000903,acct_0111,sub_0183,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_000904,acct_0112,sub_0184,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_000905,acct_0113,sub_0185,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_000906,acct_0114,sub_0186,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_000907,acct_0115,sub_0187,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_000908,acct_0116,sub_0188,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_000909,acct_0117,sub_0189,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_000910,acct_0118,sub_0190,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_000911,acct_0119,sub_0191,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_000912,acct_0120,sub_0192,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_000913,acct_0121,sub_0193,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_000914,acct_0122,sub_0194,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_000915,acct_0123,sub_0195,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_000916,acct_0124,sub_0196,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_000917,acct_0125,sub_0197,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_000918,acct_0126,sub_0198,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_000919,acct_0127,sub_0199,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_000920,acct_0128,sub_0200,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_000921,acct_0129,sub_0201,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_000922,acct_0130,sub_0202,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_000923,acct_0131,sub_0203,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_000924,acct_0132,sub_0204,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_000925,acct_0133,sub_0205,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_000926,acct_0134,sub_0206,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_000927,acct_0135,sub_0207,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_000928,acct_0136,sub_0208,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_000929,acct_0137,sub_0209,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_000930,acct_0138,sub_0210,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_000931,acct_0139,sub_0211,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_000932,acct_0140,sub_0212,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_000933,acct_0141,sub_0213,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_000934,acct_0142,sub_0214,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_000935,acct_0143,sub_0215,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_000936,acct_0144,sub_0216,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_000937,acct_0145,sub_0217,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_000938,acct_0146,sub_0218,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_000939,acct_0147,sub_0219,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_000940,acct_0148,sub_0220,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_000941,acct_0149,sub_0221,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_000942,acct_0150,sub_0222,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_000943,acct_0151,sub_0223,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_000944,acct_0152,sub_0224,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_000945,acct_0153,sub_0225,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_000946,acct_0154,sub_0226,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_000947,acct_0155,sub_0227,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_000948,acct_0156,sub_0228,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_000949,acct_0157,sub_0229,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_000950,acct_0158,sub_0230,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_000951,acct_0159,sub_0231,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_000952,acct_0160,sub_0232,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_000953,acct_0161,sub_0233,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_000954,acct_0162,sub_0234,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_000955,acct_0163,sub_0235,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_000956,acct_0164,sub_0236,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_000957,acct_0165,sub_0237,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_000958,acct_0166,sub_0238,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_000959,acct_0167,sub_0239,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_000960,acct_0168,sub_0240,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_000961,acct_0169,sub_0241,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_000962,acct_0170,sub_0242,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_000963,acct_0171,sub_0243,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_000964,acct_0172,sub_0244,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_000965,acct_0173,sub_0245,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_000966,acct_0174,sub_0246,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_000967,acct_0175,sub_0247,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_000968,acct_0176,sub_0248,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_000969,acct_0177,sub_0249,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_000970,acct_0178,sub_0250,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_000971,acct_0179,sub_0251,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_000972,acct_0180,sub_0252,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_000973,acct_0181,sub_0253,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_000974,acct_0182,sub_0254,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_000975,acct_0183,sub_0255,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_000976,acct_0184,sub_0256,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_000977,acct_0185,sub_0257,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_000978,acct_0186,sub_0258,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_000979,acct_0187,sub_0259,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_000980,acct_0188,sub_0260,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_000981,acct_0189,sub_0261,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_000982,acct_0190,sub_0262,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_000983,acct_0191,sub_0263,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_000984,acct_0192,sub_0264,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_000985,acct_0193,sub_0265,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_000986,acct_0194,sub_0266,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_000987,acct_0195,sub_0267,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_000988,acct_0196,sub_0268,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_000989,acct_0197,sub_0269,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_000990,acct_0198,sub_0270,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_000991,acct_0001,sub_0271,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_000992,acct_0002,sub_0272,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_000993,acct_0003,sub_0273,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_000994,acct_0004,sub_0274,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_000995,acct_0005,sub_0275,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_000996,acct_0006,sub_0276,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_000997,acct_0007,sub_0277,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_000998,acct_0008,sub_0278,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_000999,acct_0009,sub_0279,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001000,acct_0010,sub_0280,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001001,acct_0011,sub_0281,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001002,acct_0012,sub_0282,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001003,acct_0013,sub_0283,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001004,acct_0014,sub_0284,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001005,acct_0015,sub_0285,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001006,acct_0016,sub_0286,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001007,acct_0017,sub_0287,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001008,acct_0018,sub_0288,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001009,acct_0019,sub_0289,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001010,acct_0020,sub_0290,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001011,acct_0021,sub_0291,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001012,acct_0022,sub_0292,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001013,acct_0023,sub_0293,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001014,acct_0024,sub_0294,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001015,acct_0025,sub_0295,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001016,acct_0026,sub_0296,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001017,acct_0027,sub_0297,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001018,acct_0028,sub_0298,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001019,acct_0029,sub_0299,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_001020,acct_0030,sub_0300,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_001021,acct_0031,sub_0301,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_001022,acct_0032,sub_0302,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_001023,acct_0033,sub_0303,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_001024,acct_0034,sub_0304,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_001025,acct_0035,sub_0305,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_001026,acct_0036,sub_0306,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_001027,acct_0037,sub_0307,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_001028,acct_0038,sub_0308,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_001029,acct_0039,sub_0309,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_001030,acct_0040,sub_0310,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_001031,acct_0041,sub_0311,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_001032,acct_0042,sub_0312,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_001033,acct_0043,sub_0313,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_001034,acct_0044,sub_0314,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_001035,acct_0045,sub_0315,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_001036,acct_0046,sub_0316,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_001037,acct_0047,sub_0317,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_001038,acct_0048,sub_0318,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_001039,acct_0049,sub_0319,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_001040,acct_0050,sub_0320,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_001041,acct_0051,sub_0321,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_001042,acct_0052,sub_0322,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_001043,acct_0053,sub_0323,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_001044,acct_0054,sub_0324,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_001045,acct_0055,sub_0325,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_001046,acct_0056,sub_0326,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_001047,acct_0057,sub_0327,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_001048,acct_0058,sub_0328,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_001049,acct_0059,sub_0329,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_001050,acct_0060,sub_0330,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_001051,acct_0061,sub_0331,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_001052,acct_0062,sub_0332,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_001053,acct_0063,sub_0333,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_001054,acct_0064,sub_0334,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_001055,acct_0065,sub_0335,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_001056,acct_0066,sub_0336,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_001057,acct_0067,sub_0337,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_001058,acct_0068,sub_0338,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_001059,acct_0069,sub_0339,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_001060,acct_0070,sub_0340,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_001061,acct_0071,sub_0341,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_001062,acct_0072,sub_0342,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_001063,acct_0073,sub_0343,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_001064,acct_0074,sub_0344,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_001065,acct_0075,sub_0345,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_001066,acct_0076,sub_0346,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_001067,acct_0077,sub_0347,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_001068,acct_0078,sub_0348,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_001069,acct_0079,sub_0349,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_001070,acct_0080,sub_0350,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_001071,acct_0081,sub_0351,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_001072,acct_0082,sub_0352,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_001073,acct_0083,sub_0353,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_001074,acct_0084,sub_0354,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_001075,acct_0085,sub_0355,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_001076,acct_0086,sub_0356,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_001077,acct_0087,sub_0357,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_001078,acct_0088,sub_0358,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_001079,acct_0089,sub_0359,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_001080,acct_0090,sub_0360,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_001081,acct_0091,sub_0001,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_001082,acct_0092,sub_0002,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_001083,acct_0093,sub_0003,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_001084,acct_0094,sub_0004,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_001085,acct_0095,sub_0005,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_001086,acct_0096,sub_0006,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_001087,acct_0097,sub_0007,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_001088,acct_0098,sub_0008,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_001089,acct_0099,sub_0009,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_001090,acct_0100,sub_0010,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_001091,acct_0101,sub_0011,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_001092,acct_0102,sub_0012,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_001093,acct_0103,sub_0013,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_001094,acct_0104,sub_0014,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_001095,acct_0105,sub_0015,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_001096,acct_0106,sub_0016,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_001097,acct_0107,sub_0017,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_001098,acct_0108,sub_0018,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_001099,acct_0109,sub_0019,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_001100,acct_0110,sub_0020,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_001101,acct_0111,sub_0021,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_001102,acct_0112,sub_0022,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_001103,acct_0113,sub_0023,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_001104,acct_0114,sub_0024,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_001105,acct_0115,sub_0025,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_001106,acct_0116,sub_0026,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_001107,acct_0117,sub_0027,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_001108,acct_0118,sub_0028,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_001109,acct_0119,sub_0029,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_001110,acct_0120,sub_0030,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_001111,acct_0121,sub_0031,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_001112,acct_0122,sub_0032,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_001113,acct_0123,sub_0033,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_001114,acct_0124,sub_0034,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_001115,acct_0125,sub_0035,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_001116,acct_0126,sub_0036,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_001117,acct_0127,sub_0037,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_001118,acct_0128,sub_0038,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_001119,acct_0129,sub_0039,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_001120,acct_0130,sub_0040,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_001121,acct_0131,sub_0041,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_001122,acct_0132,sub_0042,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_001123,acct_0133,sub_0043,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_001124,acct_0134,sub_0044,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_001125,acct_0135,sub_0045,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_001126,acct_0136,sub_0046,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_001127,acct_0137,sub_0047,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_001128,acct_0138,sub_0048,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_001129,acct_0139,sub_0049,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_001130,acct_0140,sub_0050,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_001131,acct_0141,sub_0051,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_001132,acct_0142,sub_0052,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_001133,acct_0143,sub_0053,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_001134,acct_0144,sub_0054,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_001135,acct_0145,sub_0055,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_001136,acct_0146,sub_0056,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_001137,acct_0147,sub_0057,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_001138,acct_0148,sub_0058,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_001139,acct_0149,sub_0059,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001140,acct_0150,sub_0060,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001141,acct_0151,sub_0061,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001142,acct_0152,sub_0062,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001143,acct_0153,sub_0063,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001144,acct_0154,sub_0064,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001145,acct_0155,sub_0065,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001146,acct_0156,sub_0066,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001147,acct_0157,sub_0067,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001148,acct_0158,sub_0068,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001149,acct_0159,sub_0069,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001150,acct_0160,sub_0070,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001151,acct_0161,sub_0071,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001152,acct_0162,sub_0072,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001153,acct_0163,sub_0073,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001154,acct_0164,sub_0074,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001155,acct_0165,sub_0075,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001156,acct_0166,sub_0076,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001157,acct_0167,sub_0077,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001158,acct_0168,sub_0078,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001159,acct_0169,sub_0079,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_001160,acct_0170,sub_0080,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_001161,acct_0171,sub_0081,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_001162,acct_0172,sub_0082,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_001163,acct_0173,sub_0083,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_001164,acct_0174,sub_0084,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_001165,acct_0175,sub_0085,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_001166,acct_0176,sub_0086,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_001167,acct_0177,sub_0087,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_001168,acct_0178,sub_0088,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_001169,acct_0179,sub_0089,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_001170,acct_0180,sub_0090,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_001171,acct_0181,sub_0091,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_001172,acct_0182,sub_0092,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_001173,acct_0183,sub_0093,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_001174,acct_0184,sub_0094,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_001175,acct_0185,sub_0095,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_001176,acct_0186,sub_0096,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_001177,acct_0187,sub_0097,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_001178,acct_0188,sub_0098,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_001179,acct_0189,sub_0099,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_001180,acct_0190,sub_0100,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_001181,acct_0191,sub_0101,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_001182,acct_0192,sub_0102,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_001183,acct_0193,sub_0103,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_001184,acct_0194,sub_0104,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_001185,acct_0195,sub_0105,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_001186,acct_0196,sub_0106,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_001187,acct_0197,sub_0107,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_001188,acct_0198,sub_0108,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_001189,acct_0001,sub_0109,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_001190,acct_0002,sub_0110,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_001191,acct_0003,sub_0111,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_001192,acct_0004,sub_0112,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_001193,acct_0005,sub_0113,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_001194,acct_0006,sub_0114,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_001195,acct_0007,sub_0115,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_001196,acct_0008,sub_0116,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_001197,acct_0009,sub_0117,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_001198,acct_0010,sub_0118,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_001199,acct_0011,sub_0119,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_001200,acct_0012,sub_0120,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_001201,acct_0013,sub_0121,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_001202,acct_0014,sub_0122,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_001203,acct_0015,sub_0123,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_001204,acct_0016,sub_0124,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_001205,acct_0017,sub_0125,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_001206,acct_0018,sub_0126,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_001207,acct_0019,sub_0127,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_001208,acct_0020,sub_0128,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_001209,acct_0021,sub_0129,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_001210,acct_0022,sub_0130,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_001211,acct_0023,sub_0131,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_001212,acct_0024,sub_0132,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_001213,acct_0025,sub_0133,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_001214,acct_0026,sub_0134,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_001215,acct_0027,sub_0135,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_001216,acct_0028,sub_0136,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_001217,acct_0029,sub_0137,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_001218,acct_0030,sub_0138,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_001219,acct_0031,sub_0139,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_001220,acct_0032,sub_0140,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_001221,acct_0033,sub_0141,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_001222,acct_0034,sub_0142,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_001223,acct_0035,sub_0143,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_001224,acct_0036,sub_0144,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_001225,acct_0037,sub_0145,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_001226,acct_0038,sub_0146,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_001227,acct_0039,sub_0147,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_001228,acct_0040,sub_0148,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_001229,acct_0041,sub_0149,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_001230,acct_0042,sub_0150,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_001231,acct_0043,sub_0151,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_001232,acct_0044,sub_0152,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_001233,acct_0045,sub_0153,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_001234,acct_0046,sub_0154,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_001235,acct_0047,sub_0155,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_001236,acct_0048,sub_0156,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_001237,acct_0049,sub_0157,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_001238,acct_0050,sub_0158,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_001239,acct_0051,sub_0159,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_001240,acct_0052,sub_0160,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_001241,acct_0053,sub_0161,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_001242,acct_0054,sub_0162,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_001243,acct_0055,sub_0163,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_001244,acct_0056,sub_0164,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_001245,acct_0057,sub_0165,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_001246,acct_0058,sub_0166,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_001247,acct_0059,sub_0167,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_001248,acct_0060,sub_0168,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_001249,acct_0061,sub_0169,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_001250,acct_0062,sub_0170,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_001251,acct_0063,sub_0171,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_001252,acct_0064,sub_0172,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_001253,acct_0065,sub_0173,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_001254,acct_0066,sub_0174,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_001255,acct_0067,sub_0175,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_001256,acct_0068,sub_0176,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_001257,acct_0069,sub_0177,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_001258,acct_0070,sub_0178,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_001259,acct_0071,sub_0179,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_001260,acct_0072,sub_0180,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_001261,acct_0073,sub_0181,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_001262,acct_0074,sub_0182,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_001263,acct_0075,sub_0183,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_001264,acct_0076,sub_0184,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_001265,acct_0077,sub_0185,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_001266,acct_0078,sub_0186,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_001267,acct_0079,sub_0187,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_001268,acct_0080,sub_0188,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_001269,acct_0081,sub_0189,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_001270,acct_0082,sub_0190,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_001271,acct_0083,sub_0191,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_001272,acct_0084,sub_0192,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_001273,acct_0085,sub_0193,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_001274,acct_0086,sub_0194,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_001275,acct_0087,sub_0195,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_001276,acct_0088,sub_0196,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_001277,acct_0089,sub_0197,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_001278,acct_0090,sub_0198,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_001279,acct_0091,sub_0199,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001280,acct_0092,sub_0200,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001281,acct_0093,sub_0201,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001282,acct_0094,sub_0202,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001283,acct_0095,sub_0203,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001284,acct_0096,sub_0204,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001285,acct_0097,sub_0205,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001286,acct_0098,sub_0206,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001287,acct_0099,sub_0207,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001288,acct_0100,sub_0208,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001289,acct_0101,sub_0209,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001290,acct_0102,sub_0210,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001291,acct_0103,sub_0211,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001292,acct_0104,sub_0212,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001293,acct_0105,sub_0213,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001294,acct_0106,sub_0214,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001295,acct_0107,sub_0215,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001296,acct_0108,sub_0216,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001297,acct_0109,sub_0217,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001298,acct_0110,sub_0218,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001299,acct_0111,sub_0219,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_001300,acct_0112,sub_0220,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_001301,acct_0113,sub_0221,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_001302,acct_0114,sub_0222,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_001303,acct_0115,sub_0223,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_001304,acct_0116,sub_0224,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_001305,acct_0117,sub_0225,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_001306,acct_0118,sub_0226,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_001307,acct_0119,sub_0227,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_001308,acct_0120,sub_0228,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_001309,acct_0121,sub_0229,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_001310,acct_0122,sub_0230,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_001311,acct_0123,sub_0231,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_001312,acct_0124,sub_0232,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_001313,acct_0125,sub_0233,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_001314,acct_0126,sub_0234,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_001315,acct_0127,sub_0235,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_001316,acct_0128,sub_0236,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_001317,acct_0129,sub_0237,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_001318,acct_0130,sub_0238,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_001319,acct_0131,sub_0239,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_001320,acct_0132,sub_0240,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_001321,acct_0133,sub_0241,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_001322,acct_0134,sub_0242,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_001323,acct_0135,sub_0243,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_001324,acct_0136,sub_0244,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_001325,acct_0137,sub_0245,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_001326,acct_0138,sub_0246,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_001327,acct_0139,sub_0247,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_001328,acct_0140,sub_0248,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_001329,acct_0141,sub_0249,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_001330,acct_0142,sub_0250,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_001331,acct_0143,sub_0251,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_001332,acct_0144,sub_0252,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_001333,acct_0145,sub_0253,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_001334,acct_0146,sub_0254,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_001335,acct_0147,sub_0255,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_001336,acct_0148,sub_0256,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_001337,acct_0149,sub_0257,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_001338,acct_0150,sub_0258,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_001339,acct_0151,sub_0259,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_001340,acct_0152,sub_0260,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_001341,acct_0153,sub_0261,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_001342,acct_0154,sub_0262,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_001343,acct_0155,sub_0263,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_001344,acct_0156,sub_0264,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_001345,acct_0157,sub_0265,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_001346,acct_0158,sub_0266,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_001347,acct_0159,sub_0267,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_001348,acct_0160,sub_0268,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_001349,acct_0161,sub_0269,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_001350,acct_0162,sub_0270,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_001351,acct_0163,sub_0271,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_001352,acct_0164,sub_0272,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_001353,acct_0165,sub_0273,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_001354,acct_0166,sub_0274,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_001355,acct_0167,sub_0275,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_001356,acct_0168,sub_0276,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_001357,acct_0169,sub_0277,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_001358,acct_0170,sub_0278,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_001359,acct_0171,sub_0279,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_001360,acct_0172,sub_0280,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_001361,acct_0173,sub_0281,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_001362,acct_0174,sub_0282,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_001363,acct_0175,sub_0283,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_001364,acct_0176,sub_0284,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_001365,acct_0177,sub_0285,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_001366,acct_0178,sub_0286,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_001367,acct_0179,sub_0287,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_001368,acct_0180,sub_0288,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_001369,acct_0181,sub_0289,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_001370,acct_0182,sub_0290,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_001371,acct_0183,sub_0291,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_001372,acct_0184,sub_0292,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_001373,acct_0185,sub_0293,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_001374,acct_0186,sub_0294,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_001375,acct_0187,sub_0295,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_001376,acct_0188,sub_0296,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_001377,acct_0189,sub_0297,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_001378,acct_0190,sub_0298,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_001379,acct_0191,sub_0299,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_001380,acct_0192,sub_0300,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_001381,acct_0193,sub_0301,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_001382,acct_0194,sub_0302,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_001383,acct_0195,sub_0303,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_001384,acct_0196,sub_0304,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_001385,acct_0197,sub_0305,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_001386,acct_0198,sub_0306,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_001387,acct_0001,sub_0307,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_001388,acct_0002,sub_0308,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_001389,acct_0003,sub_0309,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_001390,acct_0004,sub_0310,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_001391,acct_0005,sub_0311,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_001392,acct_0006,sub_0312,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_001393,acct_0007,sub_0313,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_001394,acct_0008,sub_0314,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_001395,acct_0009,sub_0315,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_001396,acct_0010,sub_0316,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_001397,acct_0011,sub_0317,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_001398,acct_0012,sub_0318,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_001399,acct_0013,sub_0319,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_001400,acct_0014,sub_0320,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_001401,acct_0015,sub_0321,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_001402,acct_0016,sub_0322,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_001403,acct_0017,sub_0323,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_001404,acct_0018,sub_0324,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_001405,acct_0019,sub_0325,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_001406,acct_0020,sub_0326,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_001407,acct_0021,sub_0327,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_001408,acct_0022,sub_0328,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_001409,acct_0023,sub_0329,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_001410,acct_0024,sub_0330,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_001411,acct_0025,sub_0331,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_001412,acct_0026,sub_0332,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_001413,acct_0027,sub_0333,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_001414,acct_0028,sub_0334,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_001415,acct_0029,sub_0335,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_001416,acct_0030,sub_0336,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_001417,acct_0031,sub_0337,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_001418,acct_0032,sub_0338,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_001419,acct_0033,sub_0339,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001420,acct_0034,sub_0340,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001421,acct_0035,sub_0341,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001422,acct_0036,sub_0342,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001423,acct_0037,sub_0343,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001424,acct_0038,sub_0344,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001425,acct_0039,sub_0345,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001426,acct_0040,sub_0346,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001427,acct_0041,sub_0347,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001428,acct_0042,sub_0348,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001429,acct_0043,sub_0349,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001430,acct_0044,sub_0350,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001431,acct_0045,sub_0351,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001432,acct_0046,sub_0352,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001433,acct_0047,sub_0353,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001434,acct_0048,sub_0354,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001435,acct_0049,sub_0355,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001436,acct_0050,sub_0356,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001437,acct_0051,sub_0357,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001438,acct_0052,sub_0358,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001439,acct_0053,sub_0359,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_001440,acct_0054,sub_0360,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_001441,acct_0055,sub_0001,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_001442,acct_0056,sub_0002,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_001443,acct_0057,sub_0003,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_001444,acct_0058,sub_0004,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_001445,acct_0059,sub_0005,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_001446,acct_0060,sub_0006,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_001447,acct_0061,sub_0007,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_001448,acct_0062,sub_0008,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_001449,acct_0063,sub_0009,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_001450,acct_0064,sub_0010,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_001451,acct_0065,sub_0011,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_001452,acct_0066,sub_0012,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_001453,acct_0067,sub_0013,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_001454,acct_0068,sub_0014,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_001455,acct_0069,sub_0015,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_001456,acct_0070,sub_0016,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_001457,acct_0071,sub_0017,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_001458,acct_0072,sub_0018,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_001459,acct_0073,sub_0019,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_001460,acct_0074,sub_0020,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_001461,acct_0075,sub_0021,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_001462,acct_0076,sub_0022,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_001463,acct_0077,sub_0023,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_001464,acct_0078,sub_0024,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_001465,acct_0079,sub_0025,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_001466,acct_0080,sub_0026,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_001467,acct_0081,sub_0027,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_001468,acct_0082,sub_0028,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_001469,acct_0083,sub_0029,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_001470,acct_0084,sub_0030,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_001471,acct_0085,sub_0031,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_001472,acct_0086,sub_0032,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_001473,acct_0087,sub_0033,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_001474,acct_0088,sub_0034,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_001475,acct_0089,sub_0035,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_001476,acct_0090,sub_0036,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_001477,acct_0091,sub_0037,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_001478,acct_0092,sub_0038,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_001479,acct_0093,sub_0039,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_001480,acct_0094,sub_0040,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_001481,acct_0095,sub_0041,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_001482,acct_0096,sub_0042,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_001483,acct_0097,sub_0043,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_001484,acct_0098,sub_0044,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_001485,acct_0099,sub_0045,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_001486,acct_0100,sub_0046,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_001487,acct_0101,sub_0047,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_001488,acct_0102,sub_0048,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_001489,acct_0103,sub_0049,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_001490,acct_0104,sub_0050,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_001491,acct_0105,sub_0051,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_001492,acct_0106,sub_0052,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_001493,acct_0107,sub_0053,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_001494,acct_0108,sub_0054,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_001495,acct_0109,sub_0055,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_001496,acct_0110,sub_0056,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_001497,acct_0111,sub_0057,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_001498,acct_0112,sub_0058,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_001499,acct_0113,sub_0059,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_001500,acct_0114,sub_0060,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_001501,acct_0115,sub_0061,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_001502,acct_0116,sub_0062,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_001503,acct_0117,sub_0063,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_001504,acct_0118,sub_0064,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_001505,acct_0119,sub_0065,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_001506,acct_0120,sub_0066,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_001507,acct_0121,sub_0067,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_001508,acct_0122,sub_0068,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_001509,acct_0123,sub_0069,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_001510,acct_0124,sub_0070,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_001511,acct_0125,sub_0071,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_001512,acct_0126,sub_0072,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_001513,acct_0127,sub_0073,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_001514,acct_0128,sub_0074,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_001515,acct_0129,sub_0075,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_001516,acct_0130,sub_0076,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_001517,acct_0131,sub_0077,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_001518,acct_0132,sub_0078,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_001519,acct_0133,sub_0079,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_001520,acct_0134,sub_0080,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_001521,acct_0135,sub_0081,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_001522,acct_0136,sub_0082,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_001523,acct_0137,sub_0083,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_001524,acct_0138,sub_0084,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_001525,acct_0139,sub_0085,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_001526,acct_0140,sub_0086,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_001527,acct_0141,sub_0087,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_001528,acct_0142,sub_0088,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_001529,acct_0143,sub_0089,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_001530,acct_0144,sub_0090,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_001531,acct_0145,sub_0091,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_001532,acct_0146,sub_0092,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_001533,acct_0147,sub_0093,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_001534,acct_0148,sub_0094,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_001535,acct_0149,sub_0095,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_001536,acct_0150,sub_0096,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_001537,acct_0151,sub_0097,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_001538,acct_0152,sub_0098,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_001539,acct_0153,sub_0099,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_001540,acct_0154,sub_0100,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_001541,acct_0155,sub_0101,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_001542,acct_0156,sub_0102,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_001543,acct_0157,sub_0103,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_001544,acct_0158,sub_0104,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_001545,acct_0159,sub_0105,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_001546,acct_0160,sub_0106,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_001547,acct_0161,sub_0107,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_001548,acct_0162,sub_0108,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_001549,acct_0163,sub_0109,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_001550,acct_0164,sub_0110,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_001551,acct_0165,sub_0111,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_001552,acct_0166,sub_0112,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_001553,acct_0167,sub_0113,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_001554,acct_0168,sub_0114,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_001555,acct_0169,sub_0115,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_001556,acct_0170,sub_0116,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_001557,acct_0171,sub_0117,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_001558,acct_0172,sub_0118,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_001559,acct_0173,sub_0119,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001560,acct_0174,sub_0120,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001561,acct_0175,sub_0121,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001562,acct_0176,sub_0122,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001563,acct_0177,sub_0123,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001564,acct_0178,sub_0124,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001565,acct_0179,sub_0125,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001566,acct_0180,sub_0126,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001567,acct_0181,sub_0127,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001568,acct_0182,sub_0128,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001569,acct_0183,sub_0129,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001570,acct_0184,sub_0130,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001571,acct_0185,sub_0131,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001572,acct_0186,sub_0132,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001573,acct_0187,sub_0133,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001574,acct_0188,sub_0134,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001575,acct_0189,sub_0135,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001576,acct_0190,sub_0136,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001577,acct_0191,sub_0137,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001578,acct_0192,sub_0138,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001579,acct_0193,sub_0139,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_001580,acct_0194,sub_0140,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_001581,acct_0195,sub_0141,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_001582,acct_0196,sub_0142,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_001583,acct_0197,sub_0143,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_001584,acct_0198,sub_0144,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_001585,acct_0001,sub_0145,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_001586,acct_0002,sub_0146,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_001587,acct_0003,sub_0147,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_001588,acct_0004,sub_0148,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_001589,acct_0005,sub_0149,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_001590,acct_0006,sub_0150,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_001591,acct_0007,sub_0151,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_001592,acct_0008,sub_0152,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_001593,acct_0009,sub_0153,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_001594,acct_0010,sub_0154,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_001595,acct_0011,sub_0155,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_001596,acct_0012,sub_0156,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_001597,acct_0013,sub_0157,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_001598,acct_0014,sub_0158,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_001599,acct_0015,sub_0159,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_001600,acct_0016,sub_0160,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_001601,acct_0017,sub_0161,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_001602,acct_0018,sub_0162,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_001603,acct_0019,sub_0163,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_001604,acct_0020,sub_0164,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_001605,acct_0021,sub_0165,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_001606,acct_0022,sub_0166,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_001607,acct_0023,sub_0167,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_001608,acct_0024,sub_0168,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_001609,acct_0025,sub_0169,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_001610,acct_0026,sub_0170,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_001611,acct_0027,sub_0171,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_001612,acct_0028,sub_0172,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_001613,acct_0029,sub_0173,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_001614,acct_0030,sub_0174,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_001615,acct_0031,sub_0175,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_001616,acct_0032,sub_0176,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_001617,acct_0033,sub_0177,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_001618,acct_0034,sub_0178,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_001619,acct_0035,sub_0179,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_001620,acct_0036,sub_0180,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_001621,acct_0037,sub_0181,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_001622,acct_0038,sub_0182,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_001623,acct_0039,sub_0183,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_001624,acct_0040,sub_0184,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_001625,acct_0041,sub_0185,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_001626,acct_0042,sub_0186,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_001627,acct_0043,sub_0187,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_001628,acct_0044,sub_0188,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_001629,acct_0045,sub_0189,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_001630,acct_0046,sub_0190,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_001631,acct_0047,sub_0191,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_001632,acct_0048,sub_0192,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_001633,acct_0049,sub_0193,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_001634,acct_0050,sub_0194,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_001635,acct_0051,sub_0195,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_001636,acct_0052,sub_0196,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_001637,acct_0053,sub_0197,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_001638,acct_0054,sub_0198,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_001639,acct_0055,sub_0199,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_001640,acct_0056,sub_0200,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_001641,acct_0057,sub_0201,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_001642,acct_0058,sub_0202,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_001643,acct_0059,sub_0203,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_001644,acct_0060,sub_0204,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_001645,acct_0061,sub_0205,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_001646,acct_0062,sub_0206,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_001647,acct_0063,sub_0207,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_001648,acct_0064,sub_0208,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_001649,acct_0065,sub_0209,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_001650,acct_0066,sub_0210,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_001651,acct_0067,sub_0211,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_001652,acct_0068,sub_0212,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_001653,acct_0069,sub_0213,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_001654,acct_0070,sub_0214,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_001655,acct_0071,sub_0215,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_001656,acct_0072,sub_0216,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_001657,acct_0073,sub_0217,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_001658,acct_0074,sub_0218,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_001659,acct_0075,sub_0219,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_001660,acct_0076,sub_0220,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_001661,acct_0077,sub_0221,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_001662,acct_0078,sub_0222,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_001663,acct_0079,sub_0223,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_001664,acct_0080,sub_0224,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_001665,acct_0081,sub_0225,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_001666,acct_0082,sub_0226,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_001667,acct_0083,sub_0227,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_001668,acct_0084,sub_0228,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_001669,acct_0085,sub_0229,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_001670,acct_0086,sub_0230,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_001671,acct_0087,sub_0231,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_001672,acct_0088,sub_0232,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_001673,acct_0089,sub_0233,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_001674,acct_0090,sub_0234,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_001675,acct_0091,sub_0235,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_001676,acct_0092,sub_0236,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_001677,acct_0093,sub_0237,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_001678,acct_0094,sub_0238,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_001679,acct_0095,sub_0239,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_001680,acct_0096,sub_0240,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_001681,acct_0097,sub_0241,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_001682,acct_0098,sub_0242,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_001683,acct_0099,sub_0243,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_001684,acct_0100,sub_0244,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_001685,acct_0101,sub_0245,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_001686,acct_0102,sub_0246,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_001687,acct_0103,sub_0247,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_001688,acct_0104,sub_0248,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_001689,acct_0105,sub_0249,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_001690,acct_0106,sub_0250,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_001691,acct_0107,sub_0251,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_001692,acct_0108,sub_0252,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_001693,acct_0109,sub_0253,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_001694,acct_0110,sub_0254,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_001695,acct_0111,sub_0255,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_001696,acct_0112,sub_0256,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_001697,acct_0113,sub_0257,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_001698,acct_0114,sub_0258,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_001699,acct_0115,sub_0259,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001700,acct_0116,sub_0260,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001701,acct_0117,sub_0261,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001702,acct_0118,sub_0262,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001703,acct_0119,sub_0263,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001704,acct_0120,sub_0264,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001705,acct_0121,sub_0265,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001706,acct_0122,sub_0266,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001707,acct_0123,sub_0267,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001708,acct_0124,sub_0268,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001709,acct_0125,sub_0269,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001710,acct_0126,sub_0270,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001711,acct_0127,sub_0271,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001712,acct_0128,sub_0272,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001713,acct_0129,sub_0273,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001714,acct_0130,sub_0274,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001715,acct_0131,sub_0275,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001716,acct_0132,sub_0276,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001717,acct_0133,sub_0277,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001718,acct_0134,sub_0278,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001719,acct_0135,sub_0279,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_001720,acct_0136,sub_0280,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_001721,acct_0137,sub_0281,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_001722,acct_0138,sub_0282,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_001723,acct_0139,sub_0283,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_001724,acct_0140,sub_0284,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_001725,acct_0141,sub_0285,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_001726,acct_0142,sub_0286,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_001727,acct_0143,sub_0287,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_001728,acct_0144,sub_0288,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_001729,acct_0145,sub_0289,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_001730,acct_0146,sub_0290,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_001731,acct_0147,sub_0291,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_001732,acct_0148,sub_0292,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_001733,acct_0149,sub_0293,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_001734,acct_0150,sub_0294,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_001735,acct_0151,sub_0295,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_001736,acct_0152,sub_0296,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_001737,acct_0153,sub_0297,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_001738,acct_0154,sub_0298,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_001739,acct_0155,sub_0299,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_001740,acct_0156,sub_0300,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_001741,acct_0157,sub_0301,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_001742,acct_0158,sub_0302,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_001743,acct_0159,sub_0303,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_001744,acct_0160,sub_0304,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_001745,acct_0161,sub_0305,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_001746,acct_0162,sub_0306,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_001747,acct_0163,sub_0307,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_001748,acct_0164,sub_0308,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_001749,acct_0165,sub_0309,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_001750,acct_0166,sub_0310,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_001751,acct_0167,sub_0311,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_001752,acct_0168,sub_0312,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_001753,acct_0169,sub_0313,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_001754,acct_0170,sub_0314,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_001755,acct_0171,sub_0315,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_001756,acct_0172,sub_0316,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_001757,acct_0173,sub_0317,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_001758,acct_0174,sub_0318,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_001759,acct_0175,sub_0319,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_001760,acct_0176,sub_0320,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_001761,acct_0177,sub_0321,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_001762,acct_0178,sub_0322,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_001763,acct_0179,sub_0323,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_001764,acct_0180,sub_0324,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_001765,acct_0181,sub_0325,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_001766,acct_0182,sub_0326,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_001767,acct_0183,sub_0327,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_001768,acct_0184,sub_0328,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_001769,acct_0185,sub_0329,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_001770,acct_0186,sub_0330,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_001771,acct_0187,sub_0331,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_001772,acct_0188,sub_0332,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_001773,acct_0189,sub_0333,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_001774,acct_0190,sub_0334,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_001775,acct_0191,sub_0335,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_001776,acct_0192,sub_0336,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_001777,acct_0193,sub_0337,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_001778,acct_0194,sub_0338,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_001779,acct_0195,sub_0339,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_001780,acct_0196,sub_0340,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_001781,acct_0197,sub_0341,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_001782,acct_0198,sub_0342,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_001783,acct_0001,sub_0343,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_001784,acct_0002,sub_0344,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_001785,acct_0003,sub_0345,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_001786,acct_0004,sub_0346,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_001787,acct_0005,sub_0347,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_001788,acct_0006,sub_0348,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_001789,acct_0007,sub_0349,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_001790,acct_0008,sub_0350,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_001791,acct_0009,sub_0351,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_001792,acct_0010,sub_0352,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_001793,acct_0011,sub_0353,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_001794,acct_0012,sub_0354,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_001795,acct_0013,sub_0355,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_001796,acct_0014,sub_0356,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_001797,acct_0015,sub_0357,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_001798,acct_0016,sub_0358,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_001799,acct_0017,sub_0359,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_001800,acct_0018,sub_0360,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_001801,acct_0019,sub_0001,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_001802,acct_0020,sub_0002,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_001803,acct_0021,sub_0003,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_001804,acct_0022,sub_0004,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_001805,acct_0023,sub_0005,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_001806,acct_0024,sub_0006,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_001807,acct_0025,sub_0007,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_001808,acct_0026,sub_0008,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_001809,acct_0027,sub_0009,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_001810,acct_0028,sub_0010,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_001811,acct_0029,sub_0011,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_001812,acct_0030,sub_0012,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_001813,acct_0031,sub_0013,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_001814,acct_0032,sub_0014,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_001815,acct_0033,sub_0015,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_001816,acct_0034,sub_0016,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_001817,acct_0035,sub_0017,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_001818,acct_0036,sub_0018,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_001819,acct_0037,sub_0019,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_001820,acct_0038,sub_0020,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_001821,acct_0039,sub_0021,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_001822,acct_0040,sub_0022,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_001823,acct_0041,sub_0023,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_001824,acct_0042,sub_0024,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_001825,acct_0043,sub_0025,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_001826,acct_0044,sub_0026,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_001827,acct_0045,sub_0027,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_001828,acct_0046,sub_0028,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_001829,acct_0047,sub_0029,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_001830,acct_0048,sub_0030,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_001831,acct_0049,sub_0031,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_001832,acct_0050,sub_0032,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_001833,acct_0051,sub_0033,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_001834,acct_0052,sub_0034,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_001835,acct_0053,sub_0035,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_001836,acct_0054,sub_0036,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_001837,acct_0055,sub_0037,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_001838,acct_0056,sub_0038,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_001839,acct_0057,sub_0039,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001840,acct_0058,sub_0040,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001841,acct_0059,sub_0041,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001842,acct_0060,sub_0042,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001843,acct_0061,sub_0043,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001844,acct_0062,sub_0044,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001845,acct_0063,sub_0045,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001846,acct_0064,sub_0046,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001847,acct_0065,sub_0047,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001848,acct_0066,sub_0048,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001849,acct_0067,sub_0049,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001850,acct_0068,sub_0050,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001851,acct_0069,sub_0051,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001852,acct_0070,sub_0052,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001853,acct_0071,sub_0053,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001854,acct_0072,sub_0054,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001855,acct_0073,sub_0055,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001856,acct_0074,sub_0056,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001857,acct_0075,sub_0057,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001858,acct_0076,sub_0058,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001859,acct_0077,sub_0059,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_001860,acct_0078,sub_0060,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_001861,acct_0079,sub_0061,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_001862,acct_0080,sub_0062,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_001863,acct_0081,sub_0063,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_001864,acct_0082,sub_0064,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_001865,acct_0083,sub_0065,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_001866,acct_0084,sub_0066,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_001867,acct_0085,sub_0067,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_001868,acct_0086,sub_0068,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_001869,acct_0087,sub_0069,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_001870,acct_0088,sub_0070,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_001871,acct_0089,sub_0071,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_001872,acct_0090,sub_0072,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_001873,acct_0091,sub_0073,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_001874,acct_0092,sub_0074,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_001875,acct_0093,sub_0075,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_001876,acct_0094,sub_0076,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_001877,acct_0095,sub_0077,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_001878,acct_0096,sub_0078,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_001879,acct_0097,sub_0079,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_001880,acct_0098,sub_0080,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_001881,acct_0099,sub_0081,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_001882,acct_0100,sub_0082,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_001883,acct_0101,sub_0083,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_001884,acct_0102,sub_0084,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_001885,acct_0103,sub_0085,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_001886,acct_0104,sub_0086,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_001887,acct_0105,sub_0087,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_001888,acct_0106,sub_0088,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_001889,acct_0107,sub_0089,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_001890,acct_0108,sub_0090,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_001891,acct_0109,sub_0091,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_001892,acct_0110,sub_0092,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_001893,acct_0111,sub_0093,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_001894,acct_0112,sub_0094,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_001895,acct_0113,sub_0095,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_001896,acct_0114,sub_0096,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_001897,acct_0115,sub_0097,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_001898,acct_0116,sub_0098,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_001899,acct_0117,sub_0099,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_001900,acct_0118,sub_0100,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_001901,acct_0119,sub_0101,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_001902,acct_0120,sub_0102,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_001903,acct_0121,sub_0103,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_001904,acct_0122,sub_0104,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_001905,acct_0123,sub_0105,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_001906,acct_0124,sub_0106,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_001907,acct_0125,sub_0107,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_001908,acct_0126,sub_0108,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_001909,acct_0127,sub_0109,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_001910,acct_0128,sub_0110,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_001911,acct_0129,sub_0111,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_001912,acct_0130,sub_0112,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_001913,acct_0131,sub_0113,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_001914,acct_0132,sub_0114,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_001915,acct_0133,sub_0115,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_001916,acct_0134,sub_0116,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_001917,acct_0135,sub_0117,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_001918,acct_0136,sub_0118,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_001919,acct_0137,sub_0119,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_001920,acct_0138,sub_0120,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_001921,acct_0139,sub_0121,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_001922,acct_0140,sub_0122,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_001923,acct_0141,sub_0123,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_001924,acct_0142,sub_0124,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_001925,acct_0143,sub_0125,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_001926,acct_0144,sub_0126,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_001927,acct_0145,sub_0127,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_001928,acct_0146,sub_0128,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_001929,acct_0147,sub_0129,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_001930,acct_0148,sub_0130,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_001931,acct_0149,sub_0131,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_001932,acct_0150,sub_0132,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_001933,acct_0151,sub_0133,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_001934,acct_0152,sub_0134,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_001935,acct_0153,sub_0135,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_001936,acct_0154,sub_0136,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_001937,acct_0155,sub_0137,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_001938,acct_0156,sub_0138,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_001939,acct_0157,sub_0139,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_001940,acct_0158,sub_0140,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_001941,acct_0159,sub_0141,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_001942,acct_0160,sub_0142,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_001943,acct_0161,sub_0143,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_001944,acct_0162,sub_0144,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_001945,acct_0163,sub_0145,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_001946,acct_0164,sub_0146,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_001947,acct_0165,sub_0147,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_001948,acct_0166,sub_0148,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_001949,acct_0167,sub_0149,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_001950,acct_0168,sub_0150,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_001951,acct_0169,sub_0151,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_001952,acct_0170,sub_0152,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_001953,acct_0171,sub_0153,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_001954,acct_0172,sub_0154,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_001955,acct_0173,sub_0155,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_001956,acct_0174,sub_0156,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_001957,acct_0175,sub_0157,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_001958,acct_0176,sub_0158,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_001959,acct_0177,sub_0159,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_001960,acct_0178,sub_0160,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_001961,acct_0179,sub_0161,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_001962,acct_0180,sub_0162,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_001963,acct_0181,sub_0163,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_001964,acct_0182,sub_0164,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_001965,acct_0183,sub_0165,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_001966,acct_0184,sub_0166,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_001967,acct_0185,sub_0167,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_001968,acct_0186,sub_0168,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_001969,acct_0187,sub_0169,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_001970,acct_0188,sub_0170,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_001971,acct_0189,sub_0171,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_001972,acct_0190,sub_0172,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_001973,acct_0191,sub_0173,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_001974,acct_0192,sub_0174,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_001975,acct_0193,sub_0175,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_001976,acct_0194,sub_0176,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_001977,acct_0195,sub_0177,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_001978,acct_0196,sub_0178,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_001979,acct_0197,sub_0179,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_001980,acct_0198,sub_0180,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_001981,acct_0001,sub_0181,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_001982,acct_0002,sub_0182,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_001983,acct_0003,sub_0183,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_001984,acct_0004,sub_0184,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_001985,acct_0005,sub_0185,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_001986,acct_0006,sub_0186,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_001987,acct_0007,sub_0187,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_001988,acct_0008,sub_0188,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_001989,acct_0009,sub_0189,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_001990,acct_0010,sub_0190,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_001991,acct_0011,sub_0191,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_001992,acct_0012,sub_0192,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_001993,acct_0013,sub_0193,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_001994,acct_0014,sub_0194,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_001995,acct_0015,sub_0195,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_001996,acct_0016,sub_0196,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_001997,acct_0017,sub_0197,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_001998,acct_0018,sub_0198,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_001999,acct_0019,sub_0199,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002000,acct_0020,sub_0200,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002001,acct_0021,sub_0201,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002002,acct_0022,sub_0202,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002003,acct_0023,sub_0203,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002004,acct_0024,sub_0204,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002005,acct_0025,sub_0205,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002006,acct_0026,sub_0206,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002007,acct_0027,sub_0207,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002008,acct_0028,sub_0208,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002009,acct_0029,sub_0209,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002010,acct_0030,sub_0210,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002011,acct_0031,sub_0211,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002012,acct_0032,sub_0212,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002013,acct_0033,sub_0213,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002014,acct_0034,sub_0214,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002015,acct_0035,sub_0215,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002016,acct_0036,sub_0216,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002017,acct_0037,sub_0217,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002018,acct_0038,sub_0218,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002019,acct_0039,sub_0219,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_002020,acct_0040,sub_0220,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_002021,acct_0041,sub_0221,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_002022,acct_0042,sub_0222,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_002023,acct_0043,sub_0223,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_002024,acct_0044,sub_0224,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_002025,acct_0045,sub_0225,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_002026,acct_0046,sub_0226,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_002027,acct_0047,sub_0227,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_002028,acct_0048,sub_0228,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_002029,acct_0049,sub_0229,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_002030,acct_0050,sub_0230,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_002031,acct_0051,sub_0231,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_002032,acct_0052,sub_0232,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_002033,acct_0053,sub_0233,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_002034,acct_0054,sub_0234,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_002035,acct_0055,sub_0235,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_002036,acct_0056,sub_0236,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_002037,acct_0057,sub_0237,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_002038,acct_0058,sub_0238,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_002039,acct_0059,sub_0239,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_002040,acct_0060,sub_0240,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_002041,acct_0061,sub_0241,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_002042,acct_0062,sub_0242,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_002043,acct_0063,sub_0243,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_002044,acct_0064,sub_0244,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_002045,acct_0065,sub_0245,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_002046,acct_0066,sub_0246,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_002047,acct_0067,sub_0247,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_002048,acct_0068,sub_0248,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_002049,acct_0069,sub_0249,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_002050,acct_0070,sub_0250,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_002051,acct_0071,sub_0251,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_002052,acct_0072,sub_0252,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_002053,acct_0073,sub_0253,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_002054,acct_0074,sub_0254,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_002055,acct_0075,sub_0255,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_002056,acct_0076,sub_0256,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_002057,acct_0077,sub_0257,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_002058,acct_0078,sub_0258,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_002059,acct_0079,sub_0259,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_002060,acct_0080,sub_0260,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_002061,acct_0081,sub_0261,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_002062,acct_0082,sub_0262,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_002063,acct_0083,sub_0263,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_002064,acct_0084,sub_0264,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_002065,acct_0085,sub_0265,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_002066,acct_0086,sub_0266,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_002067,acct_0087,sub_0267,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_002068,acct_0088,sub_0268,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_002069,acct_0089,sub_0269,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_002070,acct_0090,sub_0270,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_002071,acct_0091,sub_0271,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_002072,acct_0092,sub_0272,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_002073,acct_0093,sub_0273,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_002074,acct_0094,sub_0274,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_002075,acct_0095,sub_0275,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_002076,acct_0096,sub_0276,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_002077,acct_0097,sub_0277,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_002078,acct_0098,sub_0278,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_002079,acct_0099,sub_0279,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_002080,acct_0100,sub_0280,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_002081,acct_0101,sub_0281,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_002082,acct_0102,sub_0282,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_002083,acct_0103,sub_0283,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_002084,acct_0104,sub_0284,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_002085,acct_0105,sub_0285,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_002086,acct_0106,sub_0286,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_002087,acct_0107,sub_0287,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_002088,acct_0108,sub_0288,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_002089,acct_0109,sub_0289,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_002090,acct_0110,sub_0290,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_002091,acct_0111,sub_0291,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_002092,acct_0112,sub_0292,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_002093,acct_0113,sub_0293,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_002094,acct_0114,sub_0294,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_002095,acct_0115,sub_0295,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_002096,acct_0116,sub_0296,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_002097,acct_0117,sub_0297,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_002098,acct_0118,sub_0298,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_002099,acct_0119,sub_0299,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_002100,acct_0120,sub_0300,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_002101,acct_0121,sub_0301,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_002102,acct_0122,sub_0302,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_002103,acct_0123,sub_0303,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_002104,acct_0124,sub_0304,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_002105,acct_0125,sub_0305,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_002106,acct_0126,sub_0306,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_002107,acct_0127,sub_0307,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_002108,acct_0128,sub_0308,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_002109,acct_0129,sub_0309,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_002110,acct_0130,sub_0310,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_002111,acct_0131,sub_0311,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_002112,acct_0132,sub_0312,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_002113,acct_0133,sub_0313,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_002114,acct_0134,sub_0314,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_002115,acct_0135,sub_0315,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_002116,acct_0136,sub_0316,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_002117,acct_0137,sub_0317,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_002118,acct_0138,sub_0318,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_002119,acct_0139,sub_0319,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_002120,acct_0140,sub_0320,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_002121,acct_0141,sub_0321,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_002122,acct_0142,sub_0322,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_002123,acct_0143,sub_0323,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_002124,acct_0144,sub_0324,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_002125,acct_0145,sub_0325,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_002126,acct_0146,sub_0326,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_002127,acct_0147,sub_0327,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_002128,acct_0148,sub_0328,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_002129,acct_0149,sub_0329,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_002130,acct_0150,sub_0330,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_002131,acct_0151,sub_0331,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_002132,acct_0152,sub_0332,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_002133,acct_0153,sub_0333,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_002134,acct_0154,sub_0334,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_002135,acct_0155,sub_0335,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_002136,acct_0156,sub_0336,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_002137,acct_0157,sub_0337,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_002138,acct_0158,sub_0338,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_002139,acct_0159,sub_0339,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002140,acct_0160,sub_0340,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002141,acct_0161,sub_0341,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002142,acct_0162,sub_0342,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002143,acct_0163,sub_0343,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002144,acct_0164,sub_0344,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002145,acct_0165,sub_0345,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002146,acct_0166,sub_0346,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002147,acct_0167,sub_0347,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002148,acct_0168,sub_0348,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002149,acct_0169,sub_0349,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002150,acct_0170,sub_0350,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002151,acct_0171,sub_0351,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002152,acct_0172,sub_0352,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002153,acct_0173,sub_0353,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002154,acct_0174,sub_0354,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002155,acct_0175,sub_0355,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002156,acct_0176,sub_0356,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002157,acct_0177,sub_0357,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002158,acct_0178,sub_0358,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002159,acct_0179,sub_0359,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_002160,acct_0180,sub_0360,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_002161,acct_0181,sub_0001,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_002162,acct_0182,sub_0002,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_002163,acct_0183,sub_0003,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_002164,acct_0184,sub_0004,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_002165,acct_0185,sub_0005,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_002166,acct_0186,sub_0006,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_002167,acct_0187,sub_0007,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_002168,acct_0188,sub_0008,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_002169,acct_0189,sub_0009,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_002170,acct_0190,sub_0010,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_002171,acct_0191,sub_0011,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_002172,acct_0192,sub_0012,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_002173,acct_0193,sub_0013,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_002174,acct_0194,sub_0014,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_002175,acct_0195,sub_0015,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_002176,acct_0196,sub_0016,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_002177,acct_0197,sub_0017,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_002178,acct_0198,sub_0018,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_002179,acct_0001,sub_0019,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_002180,acct_0002,sub_0020,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_002181,acct_0003,sub_0021,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_002182,acct_0004,sub_0022,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_002183,acct_0005,sub_0023,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_002184,acct_0006,sub_0024,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_002185,acct_0007,sub_0025,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_002186,acct_0008,sub_0026,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_002187,acct_0009,sub_0027,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_002188,acct_0010,sub_0028,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_002189,acct_0011,sub_0029,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_002190,acct_0012,sub_0030,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_002191,acct_0013,sub_0031,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_002192,acct_0014,sub_0032,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_002193,acct_0015,sub_0033,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_002194,acct_0016,sub_0034,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_002195,acct_0017,sub_0035,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_002196,acct_0018,sub_0036,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_002197,acct_0019,sub_0037,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_002198,acct_0020,sub_0038,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_002199,acct_0021,sub_0039,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_002200,acct_0022,sub_0040,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_002201,acct_0023,sub_0041,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_002202,acct_0024,sub_0042,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_002203,acct_0025,sub_0043,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_002204,acct_0026,sub_0044,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_002205,acct_0027,sub_0045,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_002206,acct_0028,sub_0046,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_002207,acct_0029,sub_0047,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_002208,acct_0030,sub_0048,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_002209,acct_0031,sub_0049,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_002210,acct_0032,sub_0050,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_002211,acct_0033,sub_0051,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_002212,acct_0034,sub_0052,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_002213,acct_0035,sub_0053,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_002214,acct_0036,sub_0054,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_002215,acct_0037,sub_0055,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_002216,acct_0038,sub_0056,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_002217,acct_0039,sub_0057,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_002218,acct_0040,sub_0058,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_002219,acct_0041,sub_0059,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_002220,acct_0042,sub_0060,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_002221,acct_0043,sub_0061,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_002222,acct_0044,sub_0062,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_002223,acct_0045,sub_0063,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_002224,acct_0046,sub_0064,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_002225,acct_0047,sub_0065,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_002226,acct_0048,sub_0066,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_002227,acct_0049,sub_0067,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_002228,acct_0050,sub_0068,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_002229,acct_0051,sub_0069,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_002230,acct_0052,sub_0070,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_002231,acct_0053,sub_0071,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_002232,acct_0054,sub_0072,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_002233,acct_0055,sub_0073,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_002234,acct_0056,sub_0074,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_002235,acct_0057,sub_0075,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_002236,acct_0058,sub_0076,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_002237,acct_0059,sub_0077,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_002238,acct_0060,sub_0078,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_002239,acct_0061,sub_0079,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_002240,acct_0062,sub_0080,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_002241,acct_0063,sub_0081,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_002242,acct_0064,sub_0082,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_002243,acct_0065,sub_0083,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_002244,acct_0066,sub_0084,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_002245,acct_0067,sub_0085,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_002246,acct_0068,sub_0086,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_002247,acct_0069,sub_0087,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_002248,acct_0070,sub_0088,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_002249,acct_0071,sub_0089,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_002250,acct_0072,sub_0090,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_002251,acct_0073,sub_0091,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_002252,acct_0074,sub_0092,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_002253,acct_0075,sub_0093,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_002254,acct_0076,sub_0094,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_002255,acct_0077,sub_0095,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_002256,acct_0078,sub_0096,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_002257,acct_0079,sub_0097,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_002258,acct_0080,sub_0098,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_002259,acct_0081,sub_0099,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_002260,acct_0082,sub_0100,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_002261,acct_0083,sub_0101,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_002262,acct_0084,sub_0102,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_002263,acct_0085,sub_0103,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_002264,acct_0086,sub_0104,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_002265,acct_0087,sub_0105,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_002266,acct_0088,sub_0106,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_002267,acct_0089,sub_0107,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_002268,acct_0090,sub_0108,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_002269,acct_0091,sub_0109,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_002270,acct_0092,sub_0110,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_002271,acct_0093,sub_0111,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_002272,acct_0094,sub_0112,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_002273,acct_0095,sub_0113,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_002274,acct_0096,sub_0114,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_002275,acct_0097,sub_0115,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_002276,acct_0098,sub_0116,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_002277,acct_0099,sub_0117,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_002278,acct_0100,sub_0118,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_002279,acct_0101,sub_0119,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002280,acct_0102,sub_0120,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002281,acct_0103,sub_0121,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002282,acct_0104,sub_0122,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002283,acct_0105,sub_0123,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002284,acct_0106,sub_0124,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002285,acct_0107,sub_0125,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002286,acct_0108,sub_0126,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002287,acct_0109,sub_0127,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002288,acct_0110,sub_0128,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002289,acct_0111,sub_0129,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002290,acct_0112,sub_0130,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002291,acct_0113,sub_0131,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002292,acct_0114,sub_0132,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002293,acct_0115,sub_0133,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002294,acct_0116,sub_0134,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002295,acct_0117,sub_0135,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002296,acct_0118,sub_0136,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002297,acct_0119,sub_0137,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002298,acct_0120,sub_0138,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002299,acct_0121,sub_0139,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_002300,acct_0122,sub_0140,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_002301,acct_0123,sub_0141,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_002302,acct_0124,sub_0142,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_002303,acct_0125,sub_0143,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_002304,acct_0126,sub_0144,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_002305,acct_0127,sub_0145,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_002306,acct_0128,sub_0146,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_002307,acct_0129,sub_0147,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_002308,acct_0130,sub_0148,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_002309,acct_0131,sub_0149,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_002310,acct_0132,sub_0150,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_002311,acct_0133,sub_0151,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_002312,acct_0134,sub_0152,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_002313,acct_0135,sub_0153,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_002314,acct_0136,sub_0154,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_002315,acct_0137,sub_0155,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_002316,acct_0138,sub_0156,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_002317,acct_0139,sub_0157,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_002318,acct_0140,sub_0158,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_002319,acct_0141,sub_0159,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_002320,acct_0142,sub_0160,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_002321,acct_0143,sub_0161,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_002322,acct_0144,sub_0162,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_002323,acct_0145,sub_0163,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_002324,acct_0146,sub_0164,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_002325,acct_0147,sub_0165,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_002326,acct_0148,sub_0166,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_002327,acct_0149,sub_0167,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_002328,acct_0150,sub_0168,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_002329,acct_0151,sub_0169,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_002330,acct_0152,sub_0170,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_002331,acct_0153,sub_0171,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_002332,acct_0154,sub_0172,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_002333,acct_0155,sub_0173,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_002334,acct_0156,sub_0174,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_002335,acct_0157,sub_0175,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_002336,acct_0158,sub_0176,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_002337,acct_0159,sub_0177,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_002338,acct_0160,sub_0178,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_002339,acct_0161,sub_0179,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_002340,acct_0162,sub_0180,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_002341,acct_0163,sub_0181,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_002342,acct_0164,sub_0182,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_002343,acct_0165,sub_0183,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_002344,acct_0166,sub_0184,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_002345,acct_0167,sub_0185,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_002346,acct_0168,sub_0186,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_002347,acct_0169,sub_0187,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_002348,acct_0170,sub_0188,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_002349,acct_0171,sub_0189,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_002350,acct_0172,sub_0190,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_002351,acct_0173,sub_0191,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_002352,acct_0174,sub_0192,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_002353,acct_0175,sub_0193,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_002354,acct_0176,sub_0194,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_002355,acct_0177,sub_0195,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_002356,acct_0178,sub_0196,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_002357,acct_0179,sub_0197,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_002358,acct_0180,sub_0198,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_002359,acct_0181,sub_0199,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_002360,acct_0182,sub_0200,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_002361,acct_0183,sub_0201,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_002362,acct_0184,sub_0202,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_002363,acct_0185,sub_0203,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_002364,acct_0186,sub_0204,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_002365,acct_0187,sub_0205,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_002366,acct_0188,sub_0206,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_002367,acct_0189,sub_0207,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_002368,acct_0190,sub_0208,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_002369,acct_0191,sub_0209,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_002370,acct_0192,sub_0210,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_002371,acct_0193,sub_0211,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_002372,acct_0194,sub_0212,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_002373,acct_0195,sub_0213,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_002374,acct_0196,sub_0214,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_002375,acct_0197,sub_0215,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_002376,acct_0198,sub_0216,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_002377,acct_0001,sub_0217,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_002378,acct_0002,sub_0218,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_002379,acct_0003,sub_0219,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_002380,acct_0004,sub_0220,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_002381,acct_0005,sub_0221,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_002382,acct_0006,sub_0222,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_002383,acct_0007,sub_0223,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_002384,acct_0008,sub_0224,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_002385,acct_0009,sub_0225,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_002386,acct_0010,sub_0226,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_002387,acct_0011,sub_0227,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_002388,acct_0012,sub_0228,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_002389,acct_0013,sub_0229,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_002390,acct_0014,sub_0230,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_002391,acct_0015,sub_0231,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_002392,acct_0016,sub_0232,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_002393,acct_0017,sub_0233,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_002394,acct_0018,sub_0234,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_002395,acct_0019,sub_0235,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_002396,acct_0020,sub_0236,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_002397,acct_0021,sub_0237,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_002398,acct_0022,sub_0238,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_002399,acct_0023,sub_0239,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_002400,acct_0024,sub_0240,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_002401,acct_0025,sub_0241,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_002402,acct_0026,sub_0242,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_002403,acct_0027,sub_0243,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_002404,acct_0028,sub_0244,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_002405,acct_0029,sub_0245,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_002406,acct_0030,sub_0246,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_002407,acct_0031,sub_0247,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_002408,acct_0032,sub_0248,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_002409,acct_0033,sub_0249,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_002410,acct_0034,sub_0250,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_002411,acct_0035,sub_0251,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_002412,acct_0036,sub_0252,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_002413,acct_0037,sub_0253,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_002414,acct_0038,sub_0254,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_002415,acct_0039,sub_0255,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_002416,acct_0040,sub_0256,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_002417,acct_0041,sub_0257,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_002418,acct_0042,sub_0258,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_002419,acct_0043,sub_0259,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002420,acct_0044,sub_0260,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002421,acct_0045,sub_0261,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002422,acct_0046,sub_0262,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002423,acct_0047,sub_0263,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002424,acct_0048,sub_0264,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002425,acct_0049,sub_0265,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002426,acct_0050,sub_0266,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002427,acct_0051,sub_0267,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002428,acct_0052,sub_0268,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002429,acct_0053,sub_0269,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002430,acct_0054,sub_0270,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002431,acct_0055,sub_0271,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002432,acct_0056,sub_0272,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002433,acct_0057,sub_0273,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002434,acct_0058,sub_0274,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002435,acct_0059,sub_0275,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002436,acct_0060,sub_0276,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002437,acct_0061,sub_0277,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002438,acct_0062,sub_0278,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002439,acct_0063,sub_0279,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_002440,acct_0064,sub_0280,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_002441,acct_0065,sub_0281,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_002442,acct_0066,sub_0282,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_002443,acct_0067,sub_0283,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_002444,acct_0068,sub_0284,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_002445,acct_0069,sub_0285,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_002446,acct_0070,sub_0286,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_002447,acct_0071,sub_0287,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_002448,acct_0072,sub_0288,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_002449,acct_0073,sub_0289,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_002450,acct_0074,sub_0290,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_002451,acct_0075,sub_0291,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_002452,acct_0076,sub_0292,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_002453,acct_0077,sub_0293,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_002454,acct_0078,sub_0294,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_002455,acct_0079,sub_0295,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_002456,acct_0080,sub_0296,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_002457,acct_0081,sub_0297,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_002458,acct_0082,sub_0298,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_002459,acct_0083,sub_0299,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_002460,acct_0084,sub_0300,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_002461,acct_0085,sub_0301,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_002462,acct_0086,sub_0302,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_002463,acct_0087,sub_0303,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_002464,acct_0088,sub_0304,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_002465,acct_0089,sub_0305,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_002466,acct_0090,sub_0306,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_002467,acct_0091,sub_0307,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_002468,acct_0092,sub_0308,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_002469,acct_0093,sub_0309,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_002470,acct_0094,sub_0310,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_002471,acct_0095,sub_0311,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_002472,acct_0096,sub_0312,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_002473,acct_0097,sub_0313,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_002474,acct_0098,sub_0314,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_002475,acct_0099,sub_0315,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_002476,acct_0100,sub_0316,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_002477,acct_0101,sub_0317,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_002478,acct_0102,sub_0318,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_002479,acct_0103,sub_0319,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_002480,acct_0104,sub_0320,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_002481,acct_0105,sub_0321,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_002482,acct_0106,sub_0322,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_002483,acct_0107,sub_0323,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_002484,acct_0108,sub_0324,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_002485,acct_0109,sub_0325,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_002486,acct_0110,sub_0326,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_002487,acct_0111,sub_0327,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_002488,acct_0112,sub_0328,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_002489,acct_0113,sub_0329,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_002490,acct_0114,sub_0330,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_002491,acct_0115,sub_0331,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_002492,acct_0116,sub_0332,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_002493,acct_0117,sub_0333,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_002494,acct_0118,sub_0334,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_002495,acct_0119,sub_0335,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_002496,acct_0120,sub_0336,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_002497,acct_0121,sub_0337,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_002498,acct_0122,sub_0338,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_002499,acct_0123,sub_0339,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_002500,acct_0124,sub_0340,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_002501,acct_0125,sub_0341,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_002502,acct_0126,sub_0342,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_002503,acct_0127,sub_0343,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_002504,acct_0128,sub_0344,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_002505,acct_0129,sub_0345,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_002506,acct_0130,sub_0346,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_002507,acct_0131,sub_0347,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_002508,acct_0132,sub_0348,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_002509,acct_0133,sub_0349,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_002510,acct_0134,sub_0350,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_002511,acct_0135,sub_0351,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_002512,acct_0136,sub_0352,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_002513,acct_0137,sub_0353,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_002514,acct_0138,sub_0354,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_002515,acct_0139,sub_0355,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_002516,acct_0140,sub_0356,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_002517,acct_0141,sub_0357,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_002518,acct_0142,sub_0358,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_002519,acct_0143,sub_0359,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_002520,acct_0144,sub_0360,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_002521,acct_0145,sub_0001,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_002522,acct_0146,sub_0002,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_002523,acct_0147,sub_0003,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_002524,acct_0148,sub_0004,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_002525,acct_0149,sub_0005,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_002526,acct_0150,sub_0006,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_002527,acct_0151,sub_0007,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_002528,acct_0152,sub_0008,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_002529,acct_0153,sub_0009,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_002530,acct_0154,sub_0010,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_002531,acct_0155,sub_0011,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_002532,acct_0156,sub_0012,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_002533,acct_0157,sub_0013,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_002534,acct_0158,sub_0014,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_002535,acct_0159,sub_0015,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_002536,acct_0160,sub_0016,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_002537,acct_0161,sub_0017,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_002538,acct_0162,sub_0018,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_002539,acct_0163,sub_0019,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_002540,acct_0164,sub_0020,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_002541,acct_0165,sub_0021,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_002542,acct_0166,sub_0022,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_002543,acct_0167,sub_0023,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_002544,acct_0168,sub_0024,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_002545,acct_0169,sub_0025,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_002546,acct_0170,sub_0026,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_002547,acct_0171,sub_0027,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_002548,acct_0172,sub_0028,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_002549,acct_0173,sub_0029,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_002550,acct_0174,sub_0030,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_002551,acct_0175,sub_0031,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_002552,acct_0176,sub_0032,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_002553,acct_0177,sub_0033,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_002554,acct_0178,sub_0034,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_002555,acct_0179,sub_0035,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_002556,acct_0180,sub_0036,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_002557,acct_0181,sub_0037,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_002558,acct_0182,sub_0038,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_002559,acct_0183,sub_0039,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002560,acct_0184,sub_0040,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002561,acct_0185,sub_0041,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002562,acct_0186,sub_0042,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002563,acct_0187,sub_0043,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002564,acct_0188,sub_0044,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002565,acct_0189,sub_0045,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002566,acct_0190,sub_0046,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002567,acct_0191,sub_0047,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002568,acct_0192,sub_0048,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002569,acct_0193,sub_0049,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002570,acct_0194,sub_0050,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002571,acct_0195,sub_0051,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002572,acct_0196,sub_0052,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002573,acct_0197,sub_0053,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002574,acct_0198,sub_0054,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002575,acct_0001,sub_0055,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002576,acct_0002,sub_0056,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002577,acct_0003,sub_0057,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002578,acct_0004,sub_0058,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002579,acct_0005,sub_0059,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_002580,acct_0006,sub_0060,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_002581,acct_0007,sub_0061,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_002582,acct_0008,sub_0062,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_002583,acct_0009,sub_0063,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_002584,acct_0010,sub_0064,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_002585,acct_0011,sub_0065,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_002586,acct_0012,sub_0066,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_002587,acct_0013,sub_0067,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_002588,acct_0014,sub_0068,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_002589,acct_0015,sub_0069,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_002590,acct_0016,sub_0070,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_002591,acct_0017,sub_0071,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_002592,acct_0018,sub_0072,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_002593,acct_0019,sub_0073,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_002594,acct_0020,sub_0074,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_002595,acct_0021,sub_0075,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_002596,acct_0022,sub_0076,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_002597,acct_0023,sub_0077,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_002598,acct_0024,sub_0078,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_002599,acct_0025,sub_0079,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_002600,acct_0026,sub_0080,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_002601,acct_0027,sub_0081,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_002602,acct_0028,sub_0082,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_002603,acct_0029,sub_0083,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_002604,acct_0030,sub_0084,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_002605,acct_0031,sub_0085,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_002606,acct_0032,sub_0086,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_002607,acct_0033,sub_0087,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_002608,acct_0034,sub_0088,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_002609,acct_0035,sub_0089,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_002610,acct_0036,sub_0090,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_002611,acct_0037,sub_0091,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_002612,acct_0038,sub_0092,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_002613,acct_0039,sub_0093,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_002614,acct_0040,sub_0094,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_002615,acct_0041,sub_0095,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_002616,acct_0042,sub_0096,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_002617,acct_0043,sub_0097,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_002618,acct_0044,sub_0098,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_002619,acct_0045,sub_0099,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_002620,acct_0046,sub_0100,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_002621,acct_0047,sub_0101,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_002622,acct_0048,sub_0102,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_002623,acct_0049,sub_0103,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_002624,acct_0050,sub_0104,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_002625,acct_0051,sub_0105,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_002626,acct_0052,sub_0106,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_002627,acct_0053,sub_0107,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_002628,acct_0054,sub_0108,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_002629,acct_0055,sub_0109,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_002630,acct_0056,sub_0110,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_002631,acct_0057,sub_0111,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_002632,acct_0058,sub_0112,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_002633,acct_0059,sub_0113,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_002634,acct_0060,sub_0114,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_002635,acct_0061,sub_0115,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_002636,acct_0062,sub_0116,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_002637,acct_0063,sub_0117,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_002638,acct_0064,sub_0118,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_002639,acct_0065,sub_0119,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_002640,acct_0066,sub_0120,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_002641,acct_0067,sub_0121,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_002642,acct_0068,sub_0122,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_002643,acct_0069,sub_0123,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_002644,acct_0070,sub_0124,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_002645,acct_0071,sub_0125,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_002646,acct_0072,sub_0126,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_002647,acct_0073,sub_0127,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_002648,acct_0074,sub_0128,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_002649,acct_0075,sub_0129,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_002650,acct_0076,sub_0130,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_002651,acct_0077,sub_0131,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_002652,acct_0078,sub_0132,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_002653,acct_0079,sub_0133,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_002654,acct_0080,sub_0134,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_002655,acct_0081,sub_0135,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_002656,acct_0082,sub_0136,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_002657,acct_0083,sub_0137,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_002658,acct_0084,sub_0138,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_002659,acct_0085,sub_0139,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_002660,acct_0086,sub_0140,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_002661,acct_0087,sub_0141,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_002662,acct_0088,sub_0142,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_002663,acct_0089,sub_0143,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_002664,acct_0090,sub_0144,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_002665,acct_0091,sub_0145,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_002666,acct_0092,sub_0146,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_002667,acct_0093,sub_0147,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_002668,acct_0094,sub_0148,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_002669,acct_0095,sub_0149,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_002670,acct_0096,sub_0150,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_002671,acct_0097,sub_0151,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_002672,acct_0098,sub_0152,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_002673,acct_0099,sub_0153,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_002674,acct_0100,sub_0154,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_002675,acct_0101,sub_0155,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_002676,acct_0102,sub_0156,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_002677,acct_0103,sub_0157,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_002678,acct_0104,sub_0158,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_002679,acct_0105,sub_0159,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_002680,acct_0106,sub_0160,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_002681,acct_0107,sub_0161,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_002682,acct_0108,sub_0162,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_002683,acct_0109,sub_0163,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_002684,acct_0110,sub_0164,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_002685,acct_0111,sub_0165,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_002686,acct_0112,sub_0166,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_002687,acct_0113,sub_0167,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_002688,acct_0114,sub_0168,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_002689,acct_0115,sub_0169,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_002690,acct_0116,sub_0170,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_002691,acct_0117,sub_0171,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_002692,acct_0118,sub_0172,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_002693,acct_0119,sub_0173,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_002694,acct_0120,sub_0174,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_002695,acct_0121,sub_0175,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_002696,acct_0122,sub_0176,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_002697,acct_0123,sub_0177,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_002698,acct_0124,sub_0178,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_002699,acct_0125,sub_0179,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002700,acct_0126,sub_0180,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002701,acct_0127,sub_0181,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002702,acct_0128,sub_0182,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002703,acct_0129,sub_0183,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002704,acct_0130,sub_0184,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002705,acct_0131,sub_0185,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002706,acct_0132,sub_0186,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002707,acct_0133,sub_0187,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002708,acct_0134,sub_0188,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002709,acct_0135,sub_0189,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002710,acct_0136,sub_0190,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002711,acct_0137,sub_0191,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002712,acct_0138,sub_0192,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002713,acct_0139,sub_0193,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002714,acct_0140,sub_0194,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002715,acct_0141,sub_0195,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002716,acct_0142,sub_0196,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002717,acct_0143,sub_0197,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002718,acct_0144,sub_0198,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002719,acct_0145,sub_0199,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_002720,acct_0146,sub_0200,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_002721,acct_0147,sub_0201,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_002722,acct_0148,sub_0202,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_002723,acct_0149,sub_0203,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_002724,acct_0150,sub_0204,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_002725,acct_0151,sub_0205,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_002726,acct_0152,sub_0206,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_002727,acct_0153,sub_0207,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_002728,acct_0154,sub_0208,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_002729,acct_0155,sub_0209,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_002730,acct_0156,sub_0210,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_002731,acct_0157,sub_0211,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_002732,acct_0158,sub_0212,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_002733,acct_0159,sub_0213,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_002734,acct_0160,sub_0214,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_002735,acct_0161,sub_0215,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_002736,acct_0162,sub_0216,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_002737,acct_0163,sub_0217,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_002738,acct_0164,sub_0218,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_002739,acct_0165,sub_0219,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_002740,acct_0166,sub_0220,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_002741,acct_0167,sub_0221,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_002742,acct_0168,sub_0222,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_002743,acct_0169,sub_0223,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_002744,acct_0170,sub_0224,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_002745,acct_0171,sub_0225,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_002746,acct_0172,sub_0226,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_002747,acct_0173,sub_0227,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_002748,acct_0174,sub_0228,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_002749,acct_0175,sub_0229,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_002750,acct_0176,sub_0230,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_002751,acct_0177,sub_0231,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_002752,acct_0178,sub_0232,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_002753,acct_0179,sub_0233,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_002754,acct_0180,sub_0234,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_002755,acct_0181,sub_0235,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_002756,acct_0182,sub_0236,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_002757,acct_0183,sub_0237,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_002758,acct_0184,sub_0238,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_002759,acct_0185,sub_0239,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_002760,acct_0186,sub_0240,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_002761,acct_0187,sub_0241,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_002762,acct_0188,sub_0242,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_002763,acct_0189,sub_0243,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_002764,acct_0190,sub_0244,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_002765,acct_0191,sub_0245,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_002766,acct_0192,sub_0246,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_002767,acct_0193,sub_0247,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_002768,acct_0194,sub_0248,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_002769,acct_0195,sub_0249,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_002770,acct_0196,sub_0250,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_002771,acct_0197,sub_0251,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_002772,acct_0198,sub_0252,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_002773,acct_0001,sub_0253,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_002774,acct_0002,sub_0254,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_002775,acct_0003,sub_0255,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_002776,acct_0004,sub_0256,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_002777,acct_0005,sub_0257,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_002778,acct_0006,sub_0258,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_002779,acct_0007,sub_0259,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_002780,acct_0008,sub_0260,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_002781,acct_0009,sub_0261,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_002782,acct_0010,sub_0262,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_002783,acct_0011,sub_0263,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_002784,acct_0012,sub_0264,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_002785,acct_0013,sub_0265,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_002786,acct_0014,sub_0266,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_002787,acct_0015,sub_0267,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_002788,acct_0016,sub_0268,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_002789,acct_0017,sub_0269,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_002790,acct_0018,sub_0270,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_002791,acct_0019,sub_0271,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_002792,acct_0020,sub_0272,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_002793,acct_0021,sub_0273,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_002794,acct_0022,sub_0274,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_002795,acct_0023,sub_0275,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_002796,acct_0024,sub_0276,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_002797,acct_0025,sub_0277,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_002798,acct_0026,sub_0278,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_002799,acct_0027,sub_0279,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_002800,acct_0028,sub_0280,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_002801,acct_0029,sub_0281,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_002802,acct_0030,sub_0282,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_002803,acct_0031,sub_0283,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_002804,acct_0032,sub_0284,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_002805,acct_0033,sub_0285,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_002806,acct_0034,sub_0286,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_002807,acct_0035,sub_0287,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_002808,acct_0036,sub_0288,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_002809,acct_0037,sub_0289,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_002810,acct_0038,sub_0290,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_002811,acct_0039,sub_0291,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_002812,acct_0040,sub_0292,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_002813,acct_0041,sub_0293,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_002814,acct_0042,sub_0294,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_002815,acct_0043,sub_0295,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_002816,acct_0044,sub_0296,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_002817,acct_0045,sub_0297,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_002818,acct_0046,sub_0298,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_002819,acct_0047,sub_0299,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_002820,acct_0048,sub_0300,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_002821,acct_0049,sub_0301,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_002822,acct_0050,sub_0302,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_002823,acct_0051,sub_0303,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_002824,acct_0052,sub_0304,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_002825,acct_0053,sub_0305,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_002826,acct_0054,sub_0306,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_002827,acct_0055,sub_0307,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_002828,acct_0056,sub_0308,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_002829,acct_0057,sub_0309,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_002830,acct_0058,sub_0310,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_002831,acct_0059,sub_0311,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_002832,acct_0060,sub_0312,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_002833,acct_0061,sub_0313,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_002834,acct_0062,sub_0314,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_002835,acct_0063,sub_0315,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_002836,acct_0064,sub_0316,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_002837,acct_0065,sub_0317,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_002838,acct_0066,sub_0318,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_002839,acct_0067,sub_0319,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002840,acct_0068,sub_0320,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002841,acct_0069,sub_0321,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002842,acct_0070,sub_0322,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002843,acct_0071,sub_0323,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002844,acct_0072,sub_0324,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002845,acct_0073,sub_0325,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002846,acct_0074,sub_0326,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002847,acct_0075,sub_0327,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002848,acct_0076,sub_0328,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002849,acct_0077,sub_0329,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002850,acct_0078,sub_0330,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002851,acct_0079,sub_0331,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002852,acct_0080,sub_0332,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002853,acct_0081,sub_0333,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002854,acct_0082,sub_0334,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002855,acct_0083,sub_0335,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002856,acct_0084,sub_0336,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002857,acct_0085,sub_0337,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002858,acct_0086,sub_0338,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002859,acct_0087,sub_0339,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_002860,acct_0088,sub_0340,2025-10-04,2025-10-04T12:00:00Z,failed,USD +inv_002861,acct_0089,sub_0341,2025-01-05,2025-01-05T12:00:00Z,paid,USD +inv_002862,acct_0090,sub_0342,2025-03-06,2025-03-06T12:00:00Z,open,USD +inv_002863,acct_0091,sub_0343,2025-03-07,2025-03-07T12:00:00Z,draft,USD +inv_002864,acct_0092,sub_0344,2025-04-08,2025-04-08T12:00:00Z,void,USD +inv_002865,acct_0093,sub_0345,2025-05-09,2025-05-09T12:00:00Z,failed,USD +inv_002866,acct_0094,sub_0346,2025-06-10,2025-06-10T12:00:00Z,paid,USD +inv_002867,acct_0095,sub_0347,2025-07-11,2025-07-11T12:00:00Z,open,USD +inv_002868,acct_0096,sub_0348,2025-08-12,2025-08-12T12:00:00Z,draft,USD +inv_002869,acct_0097,sub_0349,2025-09-13,2025-09-13T12:00:00Z,void,USD +inv_002870,acct_0098,sub_0350,2025-10-14,2025-10-14T12:00:00Z,failed,USD +inv_002871,acct_0099,sub_0351,2025-01-15,2025-01-15T12:00:00Z,paid,USD +inv_002872,acct_0100,sub_0352,2025-03-16,2025-03-16T12:00:00Z,open,USD +inv_002873,acct_0101,sub_0353,2025-03-17,2025-03-17T12:00:00Z,draft,USD +inv_002874,acct_0102,sub_0354,2025-04-18,2025-04-18T12:00:00Z,void,USD +inv_002875,acct_0103,sub_0355,2025-05-19,2025-05-19T12:00:00Z,failed,USD +inv_002876,acct_0104,sub_0356,2025-06-20,2025-06-20T12:00:00Z,paid,USD +inv_002877,acct_0105,sub_0357,2025-07-21,2025-07-21T12:00:00Z,open,USD +inv_002878,acct_0106,sub_0358,2025-08-22,2025-08-22T12:00:00Z,draft,USD +inv_002879,acct_0107,sub_0359,2025-09-23,2025-09-23T12:00:00Z,void,USD +inv_002880,acct_0108,sub_0360,2025-10-24,2025-10-24T12:00:00Z,failed,USD +inv_002881,acct_0109,sub_0001,2025-01-25,2025-01-25T12:00:00Z,paid,USD +inv_002882,acct_0110,sub_0002,2025-03-26,2025-03-26T12:00:00Z,open,USD +inv_002883,acct_0111,sub_0003,2025-03-27,2025-03-27T12:00:00Z,draft,USD +inv_002884,acct_0112,sub_0004,2025-04-28,2025-04-28T12:00:00Z,void,USD +inv_002885,acct_0113,sub_0005,2025-05-01,2025-05-01T12:00:00Z,failed,USD +inv_002886,acct_0114,sub_0006,2025-06-02,2025-06-02T12:00:00Z,paid,USD +inv_002887,acct_0115,sub_0007,2025-07-03,2025-07-03T12:00:00Z,open,USD +inv_002888,acct_0116,sub_0008,2025-08-04,2025-08-04T12:00:00Z,draft,USD +inv_002889,acct_0117,sub_0009,2025-09-05,2025-09-05T12:00:00Z,void,USD +inv_002890,acct_0118,sub_0010,2025-10-06,2025-10-06T12:00:00Z,failed,USD +inv_002891,acct_0119,sub_0011,2025-01-07,2025-01-07T12:00:00Z,paid,USD +inv_002892,acct_0120,sub_0012,2025-03-08,2025-03-08T12:00:00Z,open,USD +inv_002893,acct_0121,sub_0013,2025-03-09,2025-03-09T12:00:00Z,draft,USD +inv_002894,acct_0122,sub_0014,2025-04-10,2025-04-10T12:00:00Z,void,USD +inv_002895,acct_0123,sub_0015,2025-05-11,2025-05-11T12:00:00Z,failed,USD +inv_002896,acct_0124,sub_0016,2025-06-12,2025-06-12T12:00:00Z,paid,USD +inv_002897,acct_0125,sub_0017,2025-07-13,2025-07-13T12:00:00Z,open,USD +inv_002898,acct_0126,sub_0018,2025-08-14,2025-08-14T12:00:00Z,draft,USD +inv_002899,acct_0127,sub_0019,2025-09-15,2025-09-15T12:00:00Z,void,USD +inv_002900,acct_0128,sub_0020,2025-10-16,2025-10-16T12:00:00Z,failed,USD +inv_002901,acct_0129,sub_0021,2025-01-17,2025-01-17T12:00:00Z,paid,USD +inv_002902,acct_0130,sub_0022,2025-03-18,2025-03-18T12:00:00Z,open,USD +inv_002903,acct_0131,sub_0023,2025-03-19,2025-03-19T12:00:00Z,draft,USD +inv_002904,acct_0132,sub_0024,2025-04-20,2025-04-20T12:00:00Z,void,USD +inv_002905,acct_0133,sub_0025,2025-05-21,2025-05-21T12:00:00Z,failed,USD +inv_002906,acct_0134,sub_0026,2025-06-22,2025-06-22T12:00:00Z,paid,USD +inv_002907,acct_0135,sub_0027,2025-07-23,2025-07-23T12:00:00Z,open,USD +inv_002908,acct_0136,sub_0028,2025-08-24,2025-08-24T12:00:00Z,draft,USD +inv_002909,acct_0137,sub_0029,2025-09-25,2025-09-25T12:00:00Z,void,USD +inv_002910,acct_0138,sub_0030,2025-10-26,2025-10-26T12:00:00Z,failed,USD +inv_002911,acct_0139,sub_0031,2025-01-27,2025-01-27T12:00:00Z,paid,USD +inv_002912,acct_0140,sub_0032,2025-03-28,2025-03-28T12:00:00Z,open,USD +inv_002913,acct_0141,sub_0033,2025-03-01,2025-03-01T12:00:00Z,draft,USD +inv_002914,acct_0142,sub_0034,2025-04-02,2025-04-02T12:00:00Z,void,USD +inv_002915,acct_0143,sub_0035,2025-05-03,2025-05-03T12:00:00Z,failed,USD +inv_002916,acct_0144,sub_0036,2025-06-04,2025-06-04T12:00:00Z,paid,USD +inv_002917,acct_0145,sub_0037,2025-07-05,2025-07-05T12:00:00Z,open,USD +inv_002918,acct_0146,sub_0038,2025-08-06,2025-08-06T12:00:00Z,draft,USD +inv_002919,acct_0147,sub_0039,2025-09-07,2025-09-07T12:00:00Z,void,USD +inv_002920,acct_0148,sub_0040,2025-10-08,2025-10-08T12:00:00Z,failed,USD +inv_002921,acct_0149,sub_0041,2025-01-09,2025-01-09T12:00:00Z,paid,USD +inv_002922,acct_0150,sub_0042,2025-03-10,2025-03-10T12:00:00Z,open,USD +inv_002923,acct_0151,sub_0043,2025-03-11,2025-03-11T12:00:00Z,draft,USD +inv_002924,acct_0152,sub_0044,2025-04-12,2025-04-12T12:00:00Z,void,USD +inv_002925,acct_0153,sub_0045,2025-05-13,2025-05-13T12:00:00Z,failed,USD +inv_002926,acct_0154,sub_0046,2025-06-14,2025-06-14T12:00:00Z,paid,USD +inv_002927,acct_0155,sub_0047,2025-07-15,2025-07-15T12:00:00Z,open,USD +inv_002928,acct_0156,sub_0048,2025-08-16,2025-08-16T12:00:00Z,draft,USD +inv_002929,acct_0157,sub_0049,2025-09-17,2025-09-17T12:00:00Z,void,USD +inv_002930,acct_0158,sub_0050,2025-10-18,2025-10-18T12:00:00Z,failed,USD +inv_002931,acct_0159,sub_0051,2025-01-19,2025-01-19T12:00:00Z,paid,USD +inv_002932,acct_0160,sub_0052,2025-03-20,2025-03-20T12:00:00Z,open,USD +inv_002933,acct_0161,sub_0053,2025-03-21,2025-03-21T12:00:00Z,draft,USD +inv_002934,acct_0162,sub_0054,2025-04-22,2025-04-22T12:00:00Z,void,USD +inv_002935,acct_0163,sub_0055,2025-05-23,2025-05-23T12:00:00Z,failed,USD +inv_002936,acct_0164,sub_0056,2025-06-24,2025-06-24T12:00:00Z,paid,USD +inv_002937,acct_0165,sub_0057,2025-07-25,2025-07-25T12:00:00Z,open,USD +inv_002938,acct_0166,sub_0058,2025-08-26,2025-08-26T12:00:00Z,draft,USD +inv_002939,acct_0167,sub_0059,2025-09-27,2025-09-27T12:00:00Z,void,USD +inv_002940,acct_0168,sub_0060,2025-10-28,2025-10-28T12:00:00Z,failed,USD +inv_002941,acct_0169,sub_0061,2025-01-01,2025-01-01T12:00:00Z,paid,USD +inv_002942,acct_0170,sub_0062,2025-03-02,2025-03-02T12:00:00Z,open,USD +inv_002943,acct_0171,sub_0063,2025-03-03,2025-03-03T12:00:00Z,draft,USD +inv_002944,acct_0172,sub_0064,2025-04-04,2025-04-04T12:00:00Z,void,USD +inv_002945,acct_0173,sub_0065,2025-05-05,2025-05-05T12:00:00Z,failed,USD +inv_002946,acct_0174,sub_0066,2025-06-06,2025-06-06T12:00:00Z,paid,USD +inv_002947,acct_0175,sub_0067,2025-07-07,2025-07-07T12:00:00Z,open,USD +inv_002948,acct_0176,sub_0068,2025-08-08,2025-08-08T12:00:00Z,draft,USD +inv_002949,acct_0177,sub_0069,2025-09-09,2025-09-09T12:00:00Z,void,USD +inv_002950,acct_0178,sub_0070,2025-10-10,2025-10-10T12:00:00Z,failed,USD +inv_002951,acct_0179,sub_0071,2025-01-11,2025-01-11T12:00:00Z,paid,USD +inv_002952,acct_0180,sub_0072,2025-03-12,2025-03-12T12:00:00Z,open,USD +inv_002953,acct_0181,sub_0073,2025-03-13,2025-03-13T12:00:00Z,draft,USD +inv_002954,acct_0182,sub_0074,2025-04-14,2025-04-14T12:00:00Z,void,USD +inv_002955,acct_0183,sub_0075,2025-05-15,2025-05-15T12:00:00Z,failed,USD +inv_002956,acct_0184,sub_0076,2025-06-16,2025-06-16T12:00:00Z,paid,USD +inv_002957,acct_0185,sub_0077,2025-07-17,2025-07-17T12:00:00Z,open,USD +inv_002958,acct_0186,sub_0078,2025-08-18,2025-08-18T12:00:00Z,draft,USD +inv_002959,acct_0187,sub_0079,2025-09-19,2025-09-19T12:00:00Z,void,USD +inv_002960,acct_0188,sub_0080,2025-10-20,2025-10-20T12:00:00Z,failed,USD +inv_002961,acct_0189,sub_0081,2025-01-21,2025-01-21T12:00:00Z,paid,USD +inv_002962,acct_0190,sub_0082,2025-03-22,2025-03-22T12:00:00Z,open,USD +inv_002963,acct_0191,sub_0083,2025-03-23,2025-03-23T12:00:00Z,draft,USD +inv_002964,acct_0192,sub_0084,2025-04-24,2025-04-24T12:00:00Z,void,USD +inv_002965,acct_0193,sub_0085,2025-05-25,2025-05-25T12:00:00Z,failed,USD +inv_002966,acct_0194,sub_0086,2025-06-26,2025-06-26T12:00:00Z,paid,USD +inv_002967,acct_0195,sub_0087,2025-07-27,2025-07-27T12:00:00Z,open,USD +inv_002968,acct_0196,sub_0088,2025-08-28,2025-08-28T12:00:00Z,draft,USD +inv_002969,acct_0197,sub_0089,2025-09-01,2025-09-01T12:00:00Z,void,USD +inv_002970,acct_0198,sub_0090,2025-10-02,2025-10-02T12:00:00Z,failed,USD +inv_002971,acct_0001,sub_0091,2025-01-03,2025-01-03T12:00:00Z,paid,USD +inv_002972,acct_0002,sub_0092,2025-03-04,2025-03-04T12:00:00Z,open,USD +inv_002973,acct_0003,sub_0093,2025-03-05,2025-03-05T12:00:00Z,draft,USD +inv_002974,acct_0004,sub_0094,2025-04-06,2025-04-06T12:00:00Z,void,USD +inv_002975,acct_0005,sub_0095,2025-05-07,2025-05-07T12:00:00Z,failed,USD +inv_002976,acct_0006,sub_0096,2025-06-08,2025-06-08T12:00:00Z,paid,USD +inv_002977,acct_0007,sub_0097,2025-07-09,2025-07-09T12:00:00Z,open,USD +inv_002978,acct_0008,sub_0098,2025-08-10,2025-08-10T12:00:00Z,draft,USD +inv_002979,acct_0009,sub_0099,2025-09-11,2025-09-11T12:00:00Z,void,USD +inv_002980,acct_0010,sub_0100,2025-10-12,2025-10-12T12:00:00Z,failed,USD +inv_002981,acct_0011,sub_0101,2025-01-13,2025-01-13T12:00:00Z,paid,USD +inv_002982,acct_0012,sub_0102,2025-03-14,2025-03-14T12:00:00Z,open,USD +inv_002983,acct_0013,sub_0103,2025-03-15,2025-03-15T12:00:00Z,draft,USD +inv_002984,acct_0014,sub_0104,2025-04-16,2025-04-16T12:00:00Z,void,USD +inv_002985,acct_0015,sub_0105,2025-05-17,2025-05-17T12:00:00Z,failed,USD +inv_002986,acct_0016,sub_0106,2025-06-18,2025-06-18T12:00:00Z,paid,USD +inv_002987,acct_0017,sub_0107,2025-07-19,2025-07-19T12:00:00Z,open,USD +inv_002988,acct_0018,sub_0108,2025-08-20,2025-08-20T12:00:00Z,draft,USD +inv_002989,acct_0019,sub_0109,2025-09-21,2025-09-21T12:00:00Z,void,USD +inv_002990,acct_0020,sub_0110,2025-10-22,2025-10-22T12:00:00Z,failed,USD +inv_002991,acct_0021,sub_0111,2025-01-23,2025-01-23T12:00:00Z,paid,USD +inv_002992,acct_0022,sub_0112,2025-03-24,2025-03-24T12:00:00Z,open,USD +inv_002993,acct_0023,sub_0113,2025-03-25,2025-03-25T12:00:00Z,draft,USD +inv_002994,acct_0024,sub_0114,2025-04-26,2025-04-26T12:00:00Z,void,USD +inv_002995,acct_0025,sub_0115,2025-05-27,2025-05-27T12:00:00Z,failed,USD +inv_002996,acct_0026,sub_0116,2025-06-28,2025-06-28T12:00:00Z,paid,USD +inv_002997,acct_0027,sub_0117,2025-07-01,2025-07-01T12:00:00Z,open,USD +inv_002998,acct_0028,sub_0118,2025-08-02,2025-08-02T12:00:00Z,draft,USD +inv_002999,acct_0029,sub_0119,2025-09-03,2025-09-03T12:00:00Z,void,USD +inv_003000,acct_0030,sub_0120,2025-10-04,2025-10-04T12:00:00Z,failed,USD diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/plans.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/plans.csv new file mode 100644 index 00000000..d2ff67b8 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/plans.csv @@ -0,0 +1,5 @@ +plan_id,plan_code,plan_name,canonical_plan_code,is_retired,retired_at +plan_001,starter,Starter,starter,false,2099-12-31T00:00:00Z +plan_002,growth,Growth,growth,false,2099-12-31T00:00:00Z +plan_003,enterprise,Enterprise,enterprise,false,2099-12-31T00:00:00Z +plan_004,pro_plus,Pro Plus,growth,true,2025-10-01T00:00:00Z diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/purchase_requests.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/purchase_requests.csv new file mode 100644 index 00000000..fe1a6b4d --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/purchase_requests.csv @@ -0,0 +1,5201 @@ +purchase_request_id,account_id,requester_user_id,created_at,status,amount_cents,supplier_id +pr_000001,acct_0010,user_000001,2026-03-23T11:00:00Z,approved,25100,supplier_0001 +pr_000002,acct_0011,user_000002,2026-03-24T11:00:00Z,approved,25200,supplier_0002 +pr_000003,acct_0012,user_000003,2026-03-25T11:00:00Z,submitted,25300,supplier_0003 +pr_000004,acct_0013,user_000004,2026-03-26T11:00:00Z,approved,25400,supplier_0004 +pr_000005,acct_0014,user_000005,2026-03-27T11:00:00Z,approved,25500,supplier_0005 +pr_000006,acct_0015,user_000006,2026-03-28T11:00:00Z,submitted,25600,supplier_0006 +pr_000007,acct_0016,user_000007,2026-03-29T11:00:00Z,approved,25700,supplier_0007 +pr_000008,acct_0017,user_000008,2026-03-23T11:00:00Z,approved,25800,supplier_0008 +pr_000009,acct_0018,user_000009,2026-03-24T11:00:00Z,submitted,25900,supplier_0009 +pr_000010,acct_0019,user_000010,2026-03-25T11:00:00Z,approved,26000,supplier_0010 +pr_000011,acct_0020,user_000011,2026-03-26T11:00:00Z,approved,26100,supplier_0011 +pr_000012,acct_0021,user_000012,2026-03-27T11:00:00Z,submitted,26200,supplier_0012 +pr_000013,acct_0022,user_000013,2026-03-28T11:00:00Z,approved,26300,supplier_0013 +pr_000014,acct_0023,user_000014,2026-03-29T11:00:00Z,approved,26400,supplier_0014 +pr_000015,acct_0024,user_000015,2026-03-23T11:00:00Z,submitted,26500,supplier_0015 +pr_000016,acct_0025,user_000016,2026-03-24T11:00:00Z,approved,26600,supplier_0016 +pr_000017,acct_0026,user_000017,2026-03-25T11:00:00Z,approved,26700,supplier_0017 +pr_000018,acct_0027,user_000018,2026-03-26T11:00:00Z,submitted,26800,supplier_0018 +pr_000019,acct_0028,user_000019,2026-03-27T11:00:00Z,approved,26900,supplier_0019 +pr_000020,acct_0029,user_000020,2026-03-28T11:00:00Z,approved,27000,supplier_0020 +pr_000021,acct_0030,user_000021,2026-03-29T11:00:00Z,submitted,27100,supplier_0021 +pr_000022,acct_0031,user_000022,2026-03-23T11:00:00Z,approved,27200,supplier_0022 +pr_000023,acct_0032,user_000023,2026-03-24T11:00:00Z,approved,27300,supplier_0023 +pr_000024,acct_0033,user_000024,2026-03-25T11:00:00Z,submitted,27400,supplier_0024 +pr_000025,acct_0034,user_000025,2026-03-26T11:00:00Z,approved,27500,supplier_0025 +pr_000026,acct_0035,user_000026,2026-03-27T11:00:00Z,approved,27600,supplier_0026 +pr_000027,acct_0036,user_000027,2026-03-28T11:00:00Z,submitted,27700,supplier_0027 +pr_000028,acct_0037,user_000028,2026-03-29T11:00:00Z,approved,27800,supplier_0028 +pr_000029,acct_0038,user_000029,2026-03-23T11:00:00Z,approved,27900,supplier_0029 +pr_000030,acct_0039,user_000030,2026-03-24T11:00:00Z,submitted,28000,supplier_0030 +pr_000031,acct_0040,user_000031,2026-03-25T11:00:00Z,approved,28100,supplier_0031 +pr_000032,acct_0041,user_000032,2026-03-26T11:00:00Z,approved,28200,supplier_0032 +pr_000033,acct_0042,user_000033,2026-03-27T11:00:00Z,submitted,28300,supplier_0033 +pr_000034,acct_0043,user_000034,2026-03-28T11:00:00Z,approved,28400,supplier_0034 +pr_000035,acct_0044,user_000035,2026-03-29T11:00:00Z,approved,28500,supplier_0035 +pr_000036,acct_0045,user_000036,2026-03-23T11:00:00Z,submitted,28600,supplier_0036 +pr_000037,acct_0046,user_000037,2026-03-24T11:00:00Z,approved,28700,supplier_0037 +pr_000038,acct_0047,user_000038,2026-03-25T11:00:00Z,approved,28800,supplier_0038 +pr_000039,acct_0048,user_000039,2026-03-26T11:00:00Z,submitted,28900,supplier_0039 +pr_000040,acct_0049,user_000040,2026-03-27T11:00:00Z,approved,29000,supplier_0040 +pr_000041,acct_0050,user_000041,2026-03-28T11:00:00Z,approved,29100,supplier_0041 +pr_000042,acct_0010,user_000042,2026-03-29T11:00:00Z,submitted,29200,supplier_0042 +pr_000043,acct_0011,user_000043,2026-03-23T11:00:00Z,approved,29300,supplier_0043 +pr_000044,acct_0012,user_000044,2026-03-24T11:00:00Z,approved,29400,supplier_0044 +pr_000045,acct_0013,user_000045,2026-03-25T11:00:00Z,submitted,29500,supplier_0045 +pr_000046,acct_0014,user_000046,2026-03-26T11:00:00Z,approved,29600,supplier_0046 +pr_000047,acct_0015,user_000047,2026-03-27T11:00:00Z,approved,29700,supplier_0047 +pr_000048,acct_0016,user_000048,2026-03-28T11:00:00Z,submitted,29800,supplier_0048 +pr_000049,acct_0017,user_000049,2026-03-29T11:00:00Z,approved,29900,supplier_0049 +pr_000050,acct_0018,user_000050,2026-03-23T11:00:00Z,approved,30000,supplier_0050 +pr_000051,acct_0019,user_000051,2026-03-24T11:00:00Z,submitted,30100,supplier_0051 +pr_000052,acct_0020,user_000052,2026-03-25T11:00:00Z,approved,30200,supplier_0052 +pr_000053,acct_0021,user_000053,2026-03-26T11:00:00Z,approved,30300,supplier_0053 +pr_000054,acct_0022,user_000054,2026-03-27T11:00:00Z,submitted,30400,supplier_0054 +pr_000055,acct_0023,user_000055,2026-03-28T11:00:00Z,approved,30500,supplier_0055 +pr_000056,acct_0024,user_000056,2026-03-29T11:00:00Z,approved,30600,supplier_0056 +pr_000057,acct_0025,user_000057,2026-03-23T11:00:00Z,submitted,30700,supplier_0057 +pr_000058,acct_0026,user_000058,2026-03-24T11:00:00Z,approved,30800,supplier_0058 +pr_000059,acct_0027,user_000059,2026-03-25T11:00:00Z,approved,30900,supplier_0059 +pr_000060,acct_0028,user_000060,2026-03-26T11:00:00Z,submitted,31000,supplier_0060 +pr_000061,acct_0029,user_000061,2026-03-27T11:00:00Z,approved,31100,supplier_0061 +pr_000062,acct_0030,user_000062,2026-03-28T11:00:00Z,approved,31200,supplier_0062 +pr_000063,acct_0031,user_000063,2026-03-29T11:00:00Z,submitted,31300,supplier_0063 +pr_000064,acct_0032,user_000064,2026-03-23T11:00:00Z,approved,31400,supplier_0064 +pr_000065,acct_0033,user_000065,2026-03-24T11:00:00Z,approved,31500,supplier_0065 +pr_000066,acct_0034,user_000066,2026-03-25T11:00:00Z,submitted,31600,supplier_0066 +pr_000067,acct_0035,user_000067,2026-03-26T11:00:00Z,approved,31700,supplier_0067 +pr_000068,acct_0036,user_000068,2026-03-27T11:00:00Z,approved,31800,supplier_0068 +pr_000069,acct_0037,user_000069,2026-03-28T11:00:00Z,submitted,31900,supplier_0069 +pr_000070,acct_0038,user_000070,2026-03-29T11:00:00Z,approved,32000,supplier_0070 +pr_000071,acct_0039,user_000071,2026-03-23T11:00:00Z,approved,32100,supplier_0071 +pr_000072,acct_0040,user_000072,2026-03-24T11:00:00Z,submitted,32200,supplier_0072 +pr_000073,acct_0041,user_000073,2026-03-25T11:00:00Z,approved,32300,supplier_0073 +pr_000074,acct_0042,user_000074,2026-03-26T11:00:00Z,approved,32400,supplier_0074 +pr_000075,acct_0043,user_000075,2026-03-27T11:00:00Z,submitted,32500,supplier_0075 +pr_000076,acct_0044,user_000076,2026-03-28T11:00:00Z,approved,32600,supplier_0076 +pr_000077,acct_0045,user_000077,2026-03-29T11:00:00Z,approved,32700,supplier_0077 +pr_000078,acct_0046,user_000078,2026-03-23T11:00:00Z,submitted,32800,supplier_0078 +pr_000079,acct_0047,user_000079,2026-03-24T11:00:00Z,approved,32900,supplier_0079 +pr_000080,acct_0048,user_000080,2026-03-25T11:00:00Z,approved,33000,supplier_0080 +pr_000081,acct_0049,user_000081,2026-03-26T11:00:00Z,submitted,33100,supplier_0081 +pr_000082,acct_0050,user_000082,2026-03-27T11:00:00Z,approved,33200,supplier_0082 +pr_000083,acct_0010,user_000083,2026-03-28T11:00:00Z,approved,33300,supplier_0083 +pr_000084,acct_0011,user_000084,2026-03-29T11:00:00Z,submitted,33400,supplier_0084 +pr_000085,acct_0012,user_000085,2026-03-23T11:00:00Z,approved,33500,supplier_0085 +pr_000086,acct_0013,user_000086,2026-03-24T11:00:00Z,approved,33600,supplier_0086 +pr_000087,acct_0014,user_000087,2026-03-25T11:00:00Z,submitted,33700,supplier_0087 +pr_000088,acct_0015,user_000088,2026-03-26T11:00:00Z,approved,33800,supplier_0088 +pr_000089,acct_0016,user_000089,2026-03-27T11:00:00Z,approved,33900,supplier_0089 +pr_000090,acct_0017,user_000090,2026-03-28T11:00:00Z,submitted,34000,supplier_0090 +pr_000091,acct_0018,user_000091,2026-03-29T11:00:00Z,approved,34100,supplier_0091 +pr_000092,acct_0019,user_000092,2026-03-23T11:00:00Z,approved,34200,supplier_0092 +pr_000093,acct_0020,user_000093,2026-03-24T11:00:00Z,submitted,34300,supplier_0093 +pr_000094,acct_0021,user_000094,2026-03-25T11:00:00Z,approved,34400,supplier_0094 +pr_000095,acct_0022,user_000095,2026-03-26T11:00:00Z,approved,34500,supplier_0095 +pr_000096,acct_0023,user_000096,2026-03-27T11:00:00Z,submitted,34600,supplier_0096 +pr_000097,acct_0024,user_000097,2026-03-28T11:00:00Z,approved,34700,supplier_0097 +pr_000098,acct_0025,user_000098,2026-03-29T11:00:00Z,approved,34800,supplier_0098 +pr_000099,acct_0026,user_000099,2026-03-23T11:00:00Z,submitted,34900,supplier_0099 +pr_000100,acct_0027,user_000100,2026-03-24T11:00:00Z,approved,35000,supplier_0100 +pr_000101,acct_0028,user_000101,2026-03-25T11:00:00Z,approved,35100,supplier_0101 +pr_000102,acct_0029,user_000102,2026-03-26T11:00:00Z,submitted,35200,supplier_0102 +pr_000103,acct_0030,user_000103,2026-03-27T11:00:00Z,approved,35300,supplier_0103 +pr_000104,acct_0031,user_000104,2026-03-28T11:00:00Z,approved,35400,supplier_0104 +pr_000105,acct_0032,user_000105,2026-03-29T11:00:00Z,submitted,35500,supplier_0105 +pr_000106,acct_0033,user_000106,2026-03-23T11:00:00Z,approved,35600,supplier_0106 +pr_000107,acct_0034,user_000107,2026-03-24T11:00:00Z,approved,35700,supplier_0107 +pr_000108,acct_0035,user_000108,2026-03-25T11:00:00Z,submitted,35800,supplier_0108 +pr_000109,acct_0036,user_000109,2026-03-26T11:00:00Z,approved,35900,supplier_0109 +pr_000110,acct_0037,user_000110,2026-03-27T11:00:00Z,approved,36000,supplier_0110 +pr_000111,acct_0038,user_000111,2026-03-28T11:00:00Z,submitted,36100,supplier_0111 +pr_000112,acct_0039,user_000112,2026-03-29T11:00:00Z,approved,36200,supplier_0112 +pr_000113,acct_0040,user_000113,2026-03-23T11:00:00Z,approved,36300,supplier_0113 +pr_000114,acct_0041,user_000114,2026-03-24T11:00:00Z,submitted,36400,supplier_0114 +pr_000115,acct_0042,user_000115,2026-03-25T11:00:00Z,approved,36500,supplier_0115 +pr_000116,acct_0043,user_000116,2026-03-26T11:00:00Z,approved,36600,supplier_0116 +pr_000117,acct_0044,user_000117,2026-03-27T11:00:00Z,submitted,36700,supplier_0117 +pr_000118,acct_0045,user_000118,2026-03-28T11:00:00Z,approved,36800,supplier_0118 +pr_000119,acct_0046,user_000119,2026-03-29T11:00:00Z,approved,36900,supplier_0119 +pr_000120,acct_0047,user_000120,2026-03-23T11:00:00Z,submitted,37000,supplier_0120 +pr_000121,acct_0048,user_000121,2026-03-24T11:00:00Z,approved,37100,supplier_0121 +pr_000122,acct_0049,user_000122,2026-03-25T11:00:00Z,approved,37200,supplier_0122 +pr_000123,acct_0050,user_000123,2026-03-26T11:00:00Z,submitted,37300,supplier_0123 +pr_000124,acct_0010,user_000124,2026-03-27T11:00:00Z,approved,37400,supplier_0124 +pr_000125,acct_0011,user_000125,2026-03-28T11:00:00Z,approved,37500,supplier_0125 +pr_000126,acct_0012,user_000126,2026-03-29T11:00:00Z,submitted,37600,supplier_0126 +pr_000127,acct_0013,user_000127,2026-03-23T11:00:00Z,approved,37700,supplier_0127 +pr_000128,acct_0014,user_000128,2026-03-24T11:00:00Z,approved,37800,supplier_0128 +pr_000129,acct_0015,user_000129,2026-03-25T11:00:00Z,submitted,37900,supplier_0129 +pr_000130,acct_0016,user_000130,2026-03-26T11:00:00Z,approved,38000,supplier_0130 +pr_000131,acct_0017,user_000131,2026-03-27T11:00:00Z,approved,38100,supplier_0131 +pr_000132,acct_0018,user_000132,2026-03-28T11:00:00Z,submitted,38200,supplier_0132 +pr_000133,acct_0019,user_000133,2026-03-29T11:00:00Z,approved,38300,supplier_0133 +pr_000134,acct_0020,user_000134,2026-03-23T11:00:00Z,approved,38400,supplier_0134 +pr_000135,acct_0021,user_000135,2026-03-24T11:00:00Z,submitted,38500,supplier_0135 +pr_000136,acct_0022,user_000136,2026-03-25T11:00:00Z,approved,38600,supplier_0136 +pr_000137,acct_0023,user_000137,2026-03-26T11:00:00Z,approved,38700,supplier_0137 +pr_000138,acct_0024,user_000138,2026-03-27T11:00:00Z,submitted,38800,supplier_0138 +pr_000139,acct_0025,user_000139,2026-03-28T11:00:00Z,approved,38900,supplier_0139 +pr_000140,acct_0026,user_000140,2026-03-29T11:00:00Z,approved,39000,supplier_0140 +pr_000141,acct_0027,user_000141,2026-03-23T11:00:00Z,submitted,39100,supplier_0141 +pr_000142,acct_0028,user_000142,2026-03-24T11:00:00Z,approved,39200,supplier_0142 +pr_000143,acct_0029,user_000143,2026-03-25T11:00:00Z,approved,39300,supplier_0143 +pr_000144,acct_0030,user_000144,2026-03-26T11:00:00Z,submitted,39400,supplier_0144 +pr_000145,acct_0031,user_000145,2026-03-27T11:00:00Z,approved,39500,supplier_0145 +pr_000146,acct_0032,user_000146,2026-03-28T11:00:00Z,approved,39600,supplier_0146 +pr_000147,acct_0033,user_000147,2026-03-29T11:00:00Z,submitted,39700,supplier_0147 +pr_000148,acct_0034,user_000148,2026-03-23T11:00:00Z,approved,39800,supplier_0148 +pr_000149,acct_0035,user_000149,2026-03-24T11:00:00Z,approved,39900,supplier_0149 +pr_000150,acct_0036,user_000150,2026-03-25T11:00:00Z,submitted,40000,supplier_0150 +pr_000151,acct_0037,user_000151,2026-03-26T11:00:00Z,approved,40100,supplier_0151 +pr_000152,acct_0038,user_000152,2026-03-27T11:00:00Z,approved,40200,supplier_0152 +pr_000153,acct_0039,user_000153,2026-03-28T11:00:00Z,submitted,40300,supplier_0153 +pr_000154,acct_0040,user_000154,2026-03-29T11:00:00Z,approved,40400,supplier_0154 +pr_000155,acct_0041,user_000155,2026-03-23T11:00:00Z,approved,40500,supplier_0155 +pr_000156,acct_0042,user_000156,2026-03-24T11:00:00Z,submitted,40600,supplier_0156 +pr_000157,acct_0043,user_000157,2026-03-25T11:00:00Z,approved,40700,supplier_0157 +pr_000158,acct_0044,user_000158,2026-03-26T11:00:00Z,approved,40800,supplier_0158 +pr_000159,acct_0045,user_000159,2026-03-27T11:00:00Z,submitted,40900,supplier_0159 +pr_000160,acct_0046,user_000160,2026-03-28T11:00:00Z,approved,41000,supplier_0160 +pr_000161,acct_0047,user_000161,2026-03-29T11:00:00Z,approved,41100,supplier_0161 +pr_000162,acct_0048,user_000162,2026-03-23T11:00:00Z,submitted,41200,supplier_0162 +pr_000163,acct_0049,user_000163,2026-03-24T11:00:00Z,approved,41300,supplier_0163 +pr_000164,acct_0050,user_000164,2026-03-25T11:00:00Z,approved,41400,supplier_0164 +pr_000165,acct_0010,user_000165,2026-03-26T11:00:00Z,submitted,41500,supplier_0165 +pr_000166,acct_0011,user_000166,2026-03-27T11:00:00Z,approved,41600,supplier_0166 +pr_000167,acct_0012,user_000167,2026-03-28T11:00:00Z,approved,41700,supplier_0167 +pr_000168,acct_0013,user_000168,2026-03-29T11:00:00Z,submitted,41800,supplier_0168 +pr_000169,acct_0014,user_000169,2026-03-23T11:00:00Z,approved,41900,supplier_0169 +pr_000170,acct_0015,user_000170,2026-03-24T11:00:00Z,approved,42000,supplier_0170 +pr_000171,acct_0016,user_000171,2026-03-25T11:00:00Z,submitted,42100,supplier_0171 +pr_000172,acct_0017,user_000172,2026-03-26T11:00:00Z,approved,42200,supplier_0172 +pr_000173,acct_0018,user_000173,2026-03-27T11:00:00Z,approved,42300,supplier_0173 +pr_000174,acct_0019,user_000174,2026-03-28T11:00:00Z,submitted,42400,supplier_0174 +pr_000175,acct_0020,user_000175,2026-03-29T11:00:00Z,approved,42500,supplier_0175 +pr_000176,acct_0021,user_000176,2026-03-23T11:00:00Z,approved,42600,supplier_0176 +pr_000177,acct_0022,user_000177,2026-03-24T11:00:00Z,submitted,42700,supplier_0177 +pr_000178,acct_0023,user_000178,2026-03-25T11:00:00Z,approved,42800,supplier_0178 +pr_000179,acct_0024,user_000179,2026-03-26T11:00:00Z,approved,42900,supplier_0179 +pr_000180,acct_0025,user_000180,2026-03-27T11:00:00Z,submitted,43000,supplier_0180 +pr_000181,acct_0026,user_000181,2026-03-28T11:00:00Z,approved,43100,supplier_0181 +pr_000182,acct_0027,user_000182,2026-03-29T11:00:00Z,approved,43200,supplier_0182 +pr_000183,acct_0028,user_000183,2026-03-23T11:00:00Z,submitted,43300,supplier_0183 +pr_000184,acct_0029,user_000184,2026-03-24T11:00:00Z,approved,43400,supplier_0184 +pr_000185,acct_0030,user_000185,2026-03-25T11:00:00Z,approved,43500,supplier_0185 +pr_000186,acct_0031,user_000186,2026-03-26T11:00:00Z,submitted,43600,supplier_0186 +pr_000187,acct_0032,user_000187,2026-03-27T11:00:00Z,approved,43700,supplier_0187 +pr_000188,acct_0033,user_000188,2026-03-28T11:00:00Z,approved,43800,supplier_0188 +pr_000189,acct_0034,user_000189,2026-03-29T11:00:00Z,submitted,43900,supplier_0189 +pr_000190,acct_0035,user_000190,2026-03-23T11:00:00Z,approved,44000,supplier_0190 +pr_000191,acct_0036,user_000191,2026-03-24T11:00:00Z,approved,44100,supplier_0191 +pr_000192,acct_0037,user_000192,2026-03-25T11:00:00Z,submitted,44200,supplier_0192 +pr_000193,acct_0038,user_000193,2026-03-26T11:00:00Z,approved,44300,supplier_0193 +pr_000194,acct_0039,user_000194,2026-03-27T11:00:00Z,approved,44400,supplier_0194 +pr_000195,acct_0040,user_000195,2026-03-28T11:00:00Z,submitted,44500,supplier_0195 +pr_000196,acct_0041,user_000196,2026-03-29T11:00:00Z,approved,44600,supplier_0196 +pr_000197,acct_0042,user_000197,2026-03-23T11:00:00Z,approved,44700,supplier_0197 +pr_000198,acct_0043,user_000198,2026-03-24T11:00:00Z,submitted,44800,supplier_0198 +pr_000199,acct_0044,user_000199,2026-03-25T11:00:00Z,approved,44900,supplier_0199 +pr_000200,acct_0045,user_000200,2026-03-26T11:00:00Z,approved,45000,supplier_0200 +pr_000201,acct_0046,user_000201,2026-03-27T11:00:00Z,submitted,45100,supplier_0201 +pr_000202,acct_0047,user_000202,2026-03-28T11:00:00Z,approved,45200,supplier_0202 +pr_000203,acct_0048,user_000203,2026-03-29T11:00:00Z,approved,45300,supplier_0203 +pr_000204,acct_0049,user_000204,2026-03-23T11:00:00Z,submitted,45400,supplier_0204 +pr_000205,acct_0050,user_000205,2026-03-24T11:00:00Z,approved,45500,supplier_0205 +pr_000206,acct_0010,user_000206,2026-03-25T11:00:00Z,approved,45600,supplier_0206 +pr_000207,acct_0011,user_000207,2026-03-26T11:00:00Z,submitted,45700,supplier_0207 +pr_000208,acct_0012,user_000208,2026-03-27T11:00:00Z,approved,45800,supplier_0208 +pr_000209,acct_0013,user_000209,2026-03-28T11:00:00Z,approved,45900,supplier_0209 +pr_000210,acct_0014,user_000210,2026-03-29T11:00:00Z,submitted,46000,supplier_0210 +pr_000211,acct_0015,user_000211,2026-03-23T11:00:00Z,approved,46100,supplier_0211 +pr_000212,acct_0016,user_000212,2026-03-24T11:00:00Z,approved,46200,supplier_0212 +pr_000213,acct_0017,user_000213,2026-03-25T11:00:00Z,submitted,46300,supplier_0213 +pr_000214,acct_0018,user_000214,2026-03-26T11:00:00Z,approved,46400,supplier_0214 +pr_000215,acct_0019,user_000215,2026-03-27T11:00:00Z,approved,46500,supplier_0215 +pr_000216,acct_0020,user_000216,2026-03-28T11:00:00Z,submitted,46600,supplier_0216 +pr_000217,acct_0021,user_000217,2026-03-29T11:00:00Z,approved,46700,supplier_0217 +pr_000218,acct_0022,user_000218,2026-03-23T11:00:00Z,approved,46800,supplier_0218 +pr_000219,acct_0023,user_000219,2026-03-24T11:00:00Z,submitted,46900,supplier_0219 +pr_000220,acct_0024,user_000220,2026-03-25T11:00:00Z,approved,47000,supplier_0220 +pr_000221,acct_0025,user_000221,2026-03-26T11:00:00Z,approved,47100,supplier_0221 +pr_000222,acct_0026,user_000222,2026-03-27T11:00:00Z,submitted,47200,supplier_0222 +pr_000223,acct_0027,user_000223,2026-03-28T11:00:00Z,approved,47300,supplier_0223 +pr_000224,acct_0028,user_000224,2026-03-29T11:00:00Z,approved,47400,supplier_0224 +pr_000225,acct_0029,user_000225,2026-03-23T11:00:00Z,submitted,47500,supplier_0225 +pr_000226,acct_0030,user_000226,2026-03-24T11:00:00Z,approved,47600,supplier_0226 +pr_000227,acct_0031,user_000227,2026-03-25T11:00:00Z,approved,47700,supplier_0227 +pr_000228,acct_0032,user_000228,2026-03-26T11:00:00Z,submitted,47800,supplier_0228 +pr_000229,acct_0033,user_000229,2026-03-27T11:00:00Z,approved,47900,supplier_0229 +pr_000230,acct_0034,user_000230,2026-03-28T11:00:00Z,approved,48000,supplier_0230 +pr_000231,acct_0035,user_000231,2026-03-29T11:00:00Z,submitted,48100,supplier_0231 +pr_000232,acct_0036,user_000232,2026-03-23T11:00:00Z,approved,48200,supplier_0232 +pr_000233,acct_0037,user_000233,2026-03-24T11:00:00Z,approved,48300,supplier_0233 +pr_000234,acct_0038,user_000234,2026-03-25T11:00:00Z,submitted,48400,supplier_0234 +pr_000235,acct_0039,user_000235,2026-03-26T11:00:00Z,approved,48500,supplier_0235 +pr_000236,acct_0040,user_000236,2026-03-27T11:00:00Z,approved,48600,supplier_0236 +pr_000237,acct_0041,user_000237,2026-03-28T11:00:00Z,submitted,48700,supplier_0237 +pr_000238,acct_0042,user_000238,2026-03-29T11:00:00Z,approved,48800,supplier_0238 +pr_000239,acct_0043,user_000239,2026-03-23T11:00:00Z,approved,48900,supplier_0239 +pr_000240,acct_0044,user_000240,2026-03-24T11:00:00Z,submitted,49000,supplier_0240 +pr_000241,acct_0045,user_000241,2026-03-25T11:00:00Z,approved,49100,supplier_0241 +pr_000242,acct_0046,user_000242,2026-03-26T11:00:00Z,approved,49200,supplier_0242 +pr_000243,acct_0047,user_000243,2026-03-27T11:00:00Z,submitted,49300,supplier_0243 +pr_000244,acct_0048,user_000244,2026-03-28T11:00:00Z,approved,49400,supplier_0244 +pr_000245,acct_0049,user_000245,2026-03-29T11:00:00Z,approved,49500,supplier_0245 +pr_000246,acct_0050,user_000246,2026-03-23T11:00:00Z,submitted,49600,supplier_0246 +pr_000247,acct_0010,user_000247,2026-03-24T11:00:00Z,approved,49700,supplier_0247 +pr_000248,acct_0011,user_000248,2026-03-25T11:00:00Z,approved,49800,supplier_0248 +pr_000249,acct_0012,user_000249,2026-03-26T11:00:00Z,submitted,49900,supplier_0249 +pr_000250,acct_0013,user_000250,2026-03-27T11:00:00Z,approved,50000,supplier_0250 +pr_000251,acct_0014,user_000251,2026-03-28T11:00:00Z,approved,50100,supplier_0251 +pr_000252,acct_0015,user_000252,2026-03-29T11:00:00Z,submitted,50200,supplier_0252 +pr_000253,acct_0016,user_000253,2026-03-23T11:00:00Z,approved,50300,supplier_0253 +pr_000254,acct_0017,user_000254,2026-03-24T11:00:00Z,approved,50400,supplier_0254 +pr_000255,acct_0018,user_000255,2026-03-25T11:00:00Z,submitted,50500,supplier_0255 +pr_000256,acct_0019,user_000256,2026-03-26T11:00:00Z,approved,50600,supplier_0256 +pr_000257,acct_0020,user_000257,2026-03-27T11:00:00Z,approved,50700,supplier_0257 +pr_000258,acct_0021,user_000258,2026-03-28T11:00:00Z,submitted,50800,supplier_0258 +pr_000259,acct_0022,user_000259,2026-03-29T11:00:00Z,approved,50900,supplier_0259 +pr_000260,acct_0023,user_000260,2026-03-23T11:00:00Z,approved,51000,supplier_0260 +pr_000261,acct_0024,user_000261,2026-03-24T11:00:00Z,submitted,51100,supplier_0261 +pr_000262,acct_0025,user_000262,2026-03-25T11:00:00Z,approved,51200,supplier_0262 +pr_000263,acct_0026,user_000263,2026-03-26T11:00:00Z,approved,51300,supplier_0263 +pr_000264,acct_0027,user_000264,2026-03-27T11:00:00Z,submitted,51400,supplier_0264 +pr_000265,acct_0028,user_000265,2026-03-28T11:00:00Z,approved,51500,supplier_0265 +pr_000266,acct_0029,user_000266,2026-03-29T11:00:00Z,approved,51600,supplier_0266 +pr_000267,acct_0030,user_000267,2026-03-23T11:00:00Z,submitted,51700,supplier_0267 +pr_000268,acct_0031,user_000268,2026-03-24T11:00:00Z,approved,51800,supplier_0268 +pr_000269,acct_0032,user_000269,2026-03-25T11:00:00Z,approved,51900,supplier_0269 +pr_000270,acct_0033,user_000270,2026-03-26T11:00:00Z,submitted,52000,supplier_0270 +pr_000271,acct_0034,user_000271,2026-03-27T11:00:00Z,approved,52100,supplier_0271 +pr_000272,acct_0035,user_000272,2026-03-28T11:00:00Z,approved,52200,supplier_0272 +pr_000273,acct_0036,user_000273,2026-03-29T11:00:00Z,submitted,52300,supplier_0273 +pr_000274,acct_0037,user_000274,2026-03-23T11:00:00Z,approved,52400,supplier_0274 +pr_000275,acct_0038,user_000275,2026-03-24T11:00:00Z,approved,52500,supplier_0275 +pr_000276,acct_0039,user_000276,2026-03-25T11:00:00Z,submitted,52600,supplier_0276 +pr_000277,acct_0040,user_000277,2026-03-26T11:00:00Z,approved,52700,supplier_0277 +pr_000278,acct_0041,user_000278,2026-03-27T11:00:00Z,approved,52800,supplier_0278 +pr_000279,acct_0042,user_000279,2026-03-28T11:00:00Z,submitted,52900,supplier_0279 +pr_000280,acct_0043,user_000280,2026-03-29T11:00:00Z,approved,53000,supplier_0280 +pr_000281,acct_0044,user_000281,2026-03-23T11:00:00Z,approved,53100,supplier_0281 +pr_000282,acct_0045,user_000282,2026-03-24T11:00:00Z,submitted,53200,supplier_0282 +pr_000283,acct_0046,user_000283,2026-03-25T11:00:00Z,approved,53300,supplier_0283 +pr_000284,acct_0047,user_000284,2026-03-26T11:00:00Z,approved,53400,supplier_0284 +pr_000285,acct_0048,user_000285,2026-03-27T11:00:00Z,submitted,53500,supplier_0285 +pr_000286,acct_0049,user_000286,2026-03-28T11:00:00Z,approved,53600,supplier_0286 +pr_000287,acct_0001,user_000287,2026-02-07T11:00:00Z,submitted,95000,supplier_0287 +pr_000288,acct_0002,user_000288,2026-02-08T11:00:00Z,approved,96000,supplier_0288 +pr_000289,acct_0003,user_000289,2026-02-09T11:00:00Z,rejected,97000,supplier_0289 +pr_000290,acct_0004,user_000290,2026-02-10T11:00:00Z,cancelled,98000,supplier_0290 +pr_000291,acct_0005,user_000291,2026-02-11T11:00:00Z,draft,99000,supplier_0291 +pr_000292,acct_0006,user_000292,2026-02-12T11:00:00Z,submitted,100000,supplier_0292 +pr_000293,acct_0007,user_000293,2026-02-13T11:00:00Z,approved,101000,supplier_0293 +pr_000294,acct_0008,user_000294,2026-02-14T11:00:00Z,rejected,102000,supplier_0294 +pr_000295,acct_0009,user_000295,2026-02-15T11:00:00Z,cancelled,103000,supplier_0295 +pr_000296,acct_0010,user_000296,2026-02-16T11:00:00Z,draft,104000,supplier_0296 +pr_000297,acct_0011,user_000297,2026-02-17T11:00:00Z,submitted,105000,supplier_0297 +pr_000298,acct_0012,user_000298,2026-02-18T11:00:00Z,approved,106000,supplier_0298 +pr_000299,acct_0013,user_000299,2026-02-19T11:00:00Z,rejected,107000,supplier_0299 +pr_000300,acct_0014,user_000300,2026-02-20T11:00:00Z,cancelled,108000,supplier_0300 +pr_000301,acct_0015,user_000301,2026-02-21T11:00:00Z,draft,109000,supplier_0301 +pr_000302,acct_0016,user_000302,2026-02-22T11:00:00Z,submitted,110000,supplier_0302 +pr_000303,acct_0017,user_000303,2026-02-23T11:00:00Z,approved,10000,supplier_0303 +pr_000304,acct_0018,user_000304,2026-02-24T11:00:00Z,rejected,11000,supplier_0304 +pr_000305,acct_0019,user_000305,2026-02-25T11:00:00Z,cancelled,12000,supplier_0305 +pr_000306,acct_0020,user_000306,2026-02-26T11:00:00Z,draft,13000,supplier_0306 +pr_000307,acct_0021,user_000307,2026-02-27T11:00:00Z,submitted,14000,supplier_0307 +pr_000308,acct_0022,user_000308,2026-02-28T11:00:00Z,approved,15000,supplier_0308 +pr_000309,acct_0023,user_000309,2026-02-01T11:00:00Z,rejected,16000,supplier_0309 +pr_000310,acct_0024,user_000310,2026-02-02T11:00:00Z,cancelled,17000,supplier_0310 +pr_000311,acct_0025,user_000311,2026-02-03T11:00:00Z,draft,18000,supplier_0311 +pr_000312,acct_0026,user_000312,2026-02-04T11:00:00Z,submitted,19000,supplier_0312 +pr_000313,acct_0027,user_000313,2026-02-05T11:00:00Z,approved,20000,supplier_0313 +pr_000314,acct_0028,user_000314,2026-02-06T11:00:00Z,rejected,21000,supplier_0314 +pr_000315,acct_0029,user_000315,2026-02-07T11:00:00Z,cancelled,22000,supplier_0315 +pr_000316,acct_0030,user_000316,2026-02-08T11:00:00Z,draft,23000,supplier_0316 +pr_000317,acct_0031,user_000317,2026-02-09T11:00:00Z,submitted,24000,supplier_0317 +pr_000318,acct_0032,user_000318,2026-02-10T11:00:00Z,approved,25000,supplier_0318 +pr_000319,acct_0033,user_000319,2026-02-11T11:00:00Z,rejected,26000,supplier_0319 +pr_000320,acct_0034,user_000320,2026-02-12T11:00:00Z,cancelled,27000,supplier_0320 +pr_000321,acct_0035,user_000321,2026-02-13T11:00:00Z,draft,28000,supplier_0321 +pr_000322,acct_0036,user_000322,2026-02-14T11:00:00Z,submitted,29000,supplier_0322 +pr_000323,acct_0037,user_000323,2026-02-15T11:00:00Z,approved,30000,supplier_0323 +pr_000324,acct_0038,user_000324,2026-02-16T11:00:00Z,rejected,31000,supplier_0324 +pr_000325,acct_0039,user_000325,2026-02-17T11:00:00Z,cancelled,32000,supplier_0325 +pr_000326,acct_0040,user_000326,2026-02-18T11:00:00Z,draft,33000,supplier_0326 +pr_000327,acct_0041,user_000327,2026-02-19T11:00:00Z,submitted,34000,supplier_0327 +pr_000328,acct_0042,user_000328,2026-02-20T11:00:00Z,approved,35000,supplier_0328 +pr_000329,acct_0043,user_000329,2026-02-21T11:00:00Z,rejected,36000,supplier_0329 +pr_000330,acct_0044,user_000330,2026-02-22T11:00:00Z,cancelled,37000,supplier_0330 +pr_000331,acct_0045,user_000331,2026-02-23T11:00:00Z,draft,38000,supplier_0331 +pr_000332,acct_0046,user_000332,2026-02-24T11:00:00Z,submitted,39000,supplier_0332 +pr_000333,acct_0047,user_000333,2026-02-25T11:00:00Z,approved,40000,supplier_0333 +pr_000334,acct_0048,user_000334,2026-02-26T11:00:00Z,rejected,41000,supplier_0334 +pr_000335,acct_0049,user_000335,2026-02-27T11:00:00Z,cancelled,42000,supplier_0335 +pr_000336,acct_0050,user_000336,2026-02-28T11:00:00Z,draft,43000,supplier_0336 +pr_000337,acct_0051,user_000337,2026-02-01T11:00:00Z,submitted,44000,supplier_0337 +pr_000338,acct_0052,user_000338,2026-02-02T11:00:00Z,approved,45000,supplier_0338 +pr_000339,acct_0053,user_000339,2026-02-03T11:00:00Z,rejected,46000,supplier_0339 +pr_000340,acct_0054,user_000340,2026-02-04T11:00:00Z,cancelled,47000,supplier_0340 +pr_000341,acct_0055,user_000341,2026-02-05T11:00:00Z,draft,48000,supplier_0341 +pr_000342,acct_0056,user_000342,2026-02-06T11:00:00Z,submitted,49000,supplier_0342 +pr_000343,acct_0057,user_000343,2026-02-07T11:00:00Z,approved,50000,supplier_0343 +pr_000344,acct_0058,user_000344,2026-02-08T11:00:00Z,rejected,51000,supplier_0344 +pr_000345,acct_0059,user_000345,2026-02-09T11:00:00Z,cancelled,52000,supplier_0345 +pr_000346,acct_0060,user_000346,2026-02-10T11:00:00Z,draft,53000,supplier_0346 +pr_000347,acct_0061,user_000347,2026-02-11T11:00:00Z,submitted,54000,supplier_0347 +pr_000348,acct_0062,user_000348,2026-02-12T11:00:00Z,approved,55000,supplier_0348 +pr_000349,acct_0063,user_000349,2026-02-13T11:00:00Z,rejected,56000,supplier_0349 +pr_000350,acct_0064,user_000350,2026-02-14T11:00:00Z,cancelled,57000,supplier_0350 +pr_000351,acct_0065,user_000351,2026-02-15T11:00:00Z,draft,58000,supplier_0351 +pr_000352,acct_0066,user_000352,2026-02-16T11:00:00Z,submitted,59000,supplier_0352 +pr_000353,acct_0067,user_000353,2026-02-17T11:00:00Z,approved,60000,supplier_0353 +pr_000354,acct_0068,user_000354,2026-02-18T11:00:00Z,rejected,61000,supplier_0354 +pr_000355,acct_0069,user_000355,2026-02-19T11:00:00Z,cancelled,62000,supplier_0355 +pr_000356,acct_0070,user_000356,2026-02-20T11:00:00Z,draft,63000,supplier_0356 +pr_000357,acct_0071,user_000357,2026-02-21T11:00:00Z,submitted,64000,supplier_0357 +pr_000358,acct_0072,user_000358,2026-02-22T11:00:00Z,approved,65000,supplier_0358 +pr_000359,acct_0073,user_000359,2026-02-23T11:00:00Z,rejected,66000,supplier_0359 +pr_000360,acct_0074,user_000360,2026-02-24T11:00:00Z,cancelled,67000,supplier_0360 +pr_000361,acct_0075,user_000361,2026-02-25T11:00:00Z,draft,68000,supplier_0361 +pr_000362,acct_0076,user_000362,2026-02-26T11:00:00Z,submitted,69000,supplier_0362 +pr_000363,acct_0077,user_000363,2026-02-27T11:00:00Z,approved,70000,supplier_0363 +pr_000364,acct_0078,user_000364,2026-02-28T11:00:00Z,rejected,71000,supplier_0364 +pr_000365,acct_0079,user_000365,2026-02-01T11:00:00Z,cancelled,72000,supplier_0365 +pr_000366,acct_0080,user_000366,2026-02-02T11:00:00Z,draft,73000,supplier_0366 +pr_000367,acct_0081,user_000367,2026-02-03T11:00:00Z,submitted,74000,supplier_0367 +pr_000368,acct_0082,user_000368,2026-02-04T11:00:00Z,approved,75000,supplier_0368 +pr_000369,acct_0083,user_000369,2026-02-05T11:00:00Z,rejected,76000,supplier_0369 +pr_000370,acct_0084,user_000370,2026-02-06T11:00:00Z,cancelled,77000,supplier_0370 +pr_000371,acct_0085,user_000371,2026-02-07T11:00:00Z,draft,78000,supplier_0371 +pr_000372,acct_0086,user_000372,2026-02-08T11:00:00Z,submitted,79000,supplier_0372 +pr_000373,acct_0087,user_000373,2026-02-09T11:00:00Z,approved,80000,supplier_0373 +pr_000374,acct_0088,user_000374,2026-02-10T11:00:00Z,rejected,81000,supplier_0374 +pr_000375,acct_0089,user_000375,2026-02-11T11:00:00Z,cancelled,82000,supplier_0375 +pr_000376,acct_0090,user_000376,2026-02-12T11:00:00Z,draft,83000,supplier_0376 +pr_000377,acct_0091,user_000377,2026-02-13T11:00:00Z,submitted,84000,supplier_0377 +pr_000378,acct_0092,user_000378,2026-02-14T11:00:00Z,approved,85000,supplier_0378 +pr_000379,acct_0093,user_000379,2026-02-15T11:00:00Z,rejected,86000,supplier_0379 +pr_000380,acct_0094,user_000380,2026-02-16T11:00:00Z,cancelled,87000,supplier_0380 +pr_000381,acct_0095,user_000381,2026-02-17T11:00:00Z,draft,88000,supplier_0381 +pr_000382,acct_0096,user_000382,2026-02-18T11:00:00Z,submitted,89000,supplier_0382 +pr_000383,acct_0097,user_000383,2026-02-19T11:00:00Z,approved,90000,supplier_0383 +pr_000384,acct_0098,user_000384,2026-02-20T11:00:00Z,rejected,91000,supplier_0384 +pr_000385,acct_0099,user_000385,2026-02-21T11:00:00Z,cancelled,92000,supplier_0385 +pr_000386,acct_0100,user_000386,2026-02-22T11:00:00Z,draft,93000,supplier_0386 +pr_000387,acct_0101,user_000387,2026-02-23T11:00:00Z,submitted,94000,supplier_0387 +pr_000388,acct_0102,user_000388,2026-02-24T11:00:00Z,approved,95000,supplier_0388 +pr_000389,acct_0103,user_000389,2026-02-25T11:00:00Z,rejected,96000,supplier_0389 +pr_000390,acct_0104,user_000390,2026-02-26T11:00:00Z,cancelled,97000,supplier_0390 +pr_000391,acct_0105,user_000391,2026-02-27T11:00:00Z,draft,98000,supplier_0391 +pr_000392,acct_0106,user_000392,2026-02-28T11:00:00Z,submitted,99000,supplier_0392 +pr_000393,acct_0107,user_000393,2026-02-01T11:00:00Z,approved,100000,supplier_0393 +pr_000394,acct_0108,user_000394,2026-02-02T11:00:00Z,rejected,101000,supplier_0394 +pr_000395,acct_0109,user_000395,2026-02-03T11:00:00Z,cancelled,102000,supplier_0395 +pr_000396,acct_0110,user_000396,2026-02-04T11:00:00Z,draft,103000,supplier_0396 +pr_000397,acct_0111,user_000397,2026-02-05T11:00:00Z,submitted,104000,supplier_0397 +pr_000398,acct_0112,user_000398,2026-02-06T11:00:00Z,approved,105000,supplier_0398 +pr_000399,acct_0113,user_000399,2026-02-07T11:00:00Z,rejected,106000,supplier_0399 +pr_000400,acct_0114,user_000400,2026-02-08T11:00:00Z,cancelled,107000,supplier_0400 +pr_000401,acct_0115,user_000401,2026-02-09T11:00:00Z,draft,108000,supplier_0401 +pr_000402,acct_0116,user_000402,2026-02-10T11:00:00Z,submitted,109000,supplier_0402 +pr_000403,acct_0117,user_000403,2026-02-11T11:00:00Z,approved,110000,supplier_0403 +pr_000404,acct_0118,user_000404,2026-02-12T11:00:00Z,rejected,10000,supplier_0404 +pr_000405,acct_0119,user_000405,2026-02-13T11:00:00Z,cancelled,11000,supplier_0405 +pr_000406,acct_0120,user_000406,2026-02-14T11:00:00Z,draft,12000,supplier_0406 +pr_000407,acct_0121,user_000407,2026-02-15T11:00:00Z,submitted,13000,supplier_0407 +pr_000408,acct_0122,user_000408,2026-02-16T11:00:00Z,approved,14000,supplier_0408 +pr_000409,acct_0123,user_000409,2026-02-17T11:00:00Z,rejected,15000,supplier_0409 +pr_000410,acct_0124,user_000410,2026-02-18T11:00:00Z,cancelled,16000,supplier_0410 +pr_000411,acct_0125,user_000411,2026-02-19T11:00:00Z,draft,17000,supplier_0411 +pr_000412,acct_0126,user_000412,2026-02-20T11:00:00Z,submitted,18000,supplier_0412 +pr_000413,acct_0127,user_000413,2026-02-21T11:00:00Z,approved,19000,supplier_0413 +pr_000414,acct_0128,user_000414,2026-02-22T11:00:00Z,rejected,20000,supplier_0414 +pr_000415,acct_0129,user_000415,2026-02-23T11:00:00Z,cancelled,21000,supplier_0415 +pr_000416,acct_0130,user_000416,2026-02-24T11:00:00Z,draft,22000,supplier_0416 +pr_000417,acct_0131,user_000417,2026-02-25T11:00:00Z,submitted,23000,supplier_0417 +pr_000418,acct_0132,user_000418,2026-02-26T11:00:00Z,approved,24000,supplier_0418 +pr_000419,acct_0133,user_000419,2026-02-27T11:00:00Z,rejected,25000,supplier_0419 +pr_000420,acct_0134,user_000420,2026-02-28T11:00:00Z,cancelled,26000,supplier_0420 +pr_000421,acct_0135,user_000421,2026-02-01T11:00:00Z,draft,27000,supplier_0421 +pr_000422,acct_0136,user_000422,2026-02-02T11:00:00Z,submitted,28000,supplier_0422 +pr_000423,acct_0137,user_000423,2026-02-03T11:00:00Z,approved,29000,supplier_0423 +pr_000424,acct_0138,user_000424,2026-02-04T11:00:00Z,rejected,30000,supplier_0424 +pr_000425,acct_0139,user_000425,2026-02-05T11:00:00Z,cancelled,31000,supplier_0425 +pr_000426,acct_0140,user_000426,2026-02-06T11:00:00Z,draft,32000,supplier_0426 +pr_000427,acct_0141,user_000427,2026-02-07T11:00:00Z,submitted,33000,supplier_0427 +pr_000428,acct_0142,user_000428,2026-02-08T11:00:00Z,approved,34000,supplier_0428 +pr_000429,acct_0143,user_000429,2026-02-09T11:00:00Z,rejected,35000,supplier_0429 +pr_000430,acct_0144,user_000430,2026-02-10T11:00:00Z,cancelled,36000,supplier_0430 +pr_000431,acct_0145,user_000431,2026-02-11T11:00:00Z,draft,37000,supplier_0431 +pr_000432,acct_0146,user_000432,2026-02-12T11:00:00Z,submitted,38000,supplier_0432 +pr_000433,acct_0147,user_000433,2026-02-13T11:00:00Z,approved,39000,supplier_0433 +pr_000434,acct_0148,user_000434,2026-02-14T11:00:00Z,rejected,40000,supplier_0434 +pr_000435,acct_0149,user_000435,2026-02-15T11:00:00Z,cancelled,41000,supplier_0435 +pr_000436,acct_0150,user_000436,2026-02-16T11:00:00Z,draft,42000,supplier_0436 +pr_000437,acct_0151,user_000437,2026-02-17T11:00:00Z,submitted,43000,supplier_0437 +pr_000438,acct_0152,user_000438,2026-02-18T11:00:00Z,approved,44000,supplier_0438 +pr_000439,acct_0153,user_000439,2026-02-19T11:00:00Z,rejected,45000,supplier_0439 +pr_000440,acct_0154,user_000440,2026-02-20T11:00:00Z,cancelled,46000,supplier_0440 +pr_000441,acct_0155,user_000441,2026-02-21T11:00:00Z,draft,47000,supplier_0441 +pr_000442,acct_0156,user_000442,2026-02-22T11:00:00Z,submitted,48000,supplier_0442 +pr_000443,acct_0157,user_000443,2026-02-23T11:00:00Z,approved,49000,supplier_0443 +pr_000444,acct_0158,user_000444,2026-02-24T11:00:00Z,rejected,50000,supplier_0444 +pr_000445,acct_0159,user_000445,2026-02-25T11:00:00Z,cancelled,51000,supplier_0445 +pr_000446,acct_0160,user_000446,2026-02-26T11:00:00Z,draft,52000,supplier_0446 +pr_000447,acct_0161,user_000447,2026-02-27T11:00:00Z,submitted,53000,supplier_0447 +pr_000448,acct_0162,user_000448,2026-02-28T11:00:00Z,approved,54000,supplier_0448 +pr_000449,acct_0163,user_000449,2026-02-01T11:00:00Z,rejected,55000,supplier_0449 +pr_000450,acct_0164,user_000450,2026-02-02T11:00:00Z,cancelled,56000,supplier_0450 +pr_000451,acct_0165,user_000451,2026-02-03T11:00:00Z,draft,57000,supplier_0451 +pr_000452,acct_0166,user_000452,2026-02-04T11:00:00Z,submitted,58000,supplier_0452 +pr_000453,acct_0167,user_000453,2026-02-05T11:00:00Z,approved,59000,supplier_0453 +pr_000454,acct_0168,user_000454,2026-02-06T11:00:00Z,rejected,60000,supplier_0454 +pr_000455,acct_0169,user_000455,2026-02-07T11:00:00Z,cancelled,61000,supplier_0455 +pr_000456,acct_0170,user_000456,2026-02-08T11:00:00Z,draft,62000,supplier_0456 +pr_000457,acct_0171,user_000457,2026-02-09T11:00:00Z,submitted,63000,supplier_0457 +pr_000458,acct_0172,user_000458,2026-02-10T11:00:00Z,approved,64000,supplier_0458 +pr_000459,acct_0173,user_000459,2026-02-11T11:00:00Z,rejected,65000,supplier_0459 +pr_000460,acct_0174,user_000460,2026-02-12T11:00:00Z,cancelled,66000,supplier_0460 +pr_000461,acct_0175,user_000461,2026-02-13T11:00:00Z,draft,67000,supplier_0461 +pr_000462,acct_0176,user_000462,2026-02-14T11:00:00Z,submitted,68000,supplier_0462 +pr_000463,acct_0177,user_000463,2026-02-15T11:00:00Z,approved,69000,supplier_0463 +pr_000464,acct_0178,user_000464,2026-02-16T11:00:00Z,rejected,70000,supplier_0464 +pr_000465,acct_0179,user_000465,2026-02-17T11:00:00Z,cancelled,71000,supplier_0465 +pr_000466,acct_0180,user_000466,2026-02-18T11:00:00Z,draft,72000,supplier_0466 +pr_000467,acct_0181,user_000467,2026-02-19T11:00:00Z,submitted,73000,supplier_0467 +pr_000468,acct_0182,user_000468,2026-02-20T11:00:00Z,approved,74000,supplier_0468 +pr_000469,acct_0183,user_000469,2026-02-21T11:00:00Z,rejected,75000,supplier_0469 +pr_000470,acct_0184,user_000470,2026-02-22T11:00:00Z,cancelled,76000,supplier_0470 +pr_000471,acct_0185,user_000471,2026-02-23T11:00:00Z,draft,77000,supplier_0471 +pr_000472,acct_0186,user_000472,2026-02-24T11:00:00Z,submitted,78000,supplier_0472 +pr_000473,acct_0187,user_000473,2026-02-25T11:00:00Z,approved,79000,supplier_0473 +pr_000474,acct_0188,user_000474,2026-02-26T11:00:00Z,rejected,80000,supplier_0474 +pr_000475,acct_0189,user_000475,2026-02-27T11:00:00Z,cancelled,81000,supplier_0475 +pr_000476,acct_0190,user_000476,2026-02-28T11:00:00Z,draft,82000,supplier_0476 +pr_000477,acct_0001,user_000477,2026-02-01T11:00:00Z,submitted,83000,supplier_0477 +pr_000478,acct_0002,user_000478,2026-02-02T11:00:00Z,approved,84000,supplier_0478 +pr_000479,acct_0003,user_000479,2026-02-03T11:00:00Z,rejected,85000,supplier_0479 +pr_000480,acct_0004,user_000480,2026-02-04T11:00:00Z,cancelled,86000,supplier_0480 +pr_000481,acct_0005,user_000481,2026-02-05T11:00:00Z,draft,87000,supplier_0481 +pr_000482,acct_0006,user_000482,2026-02-06T11:00:00Z,submitted,88000,supplier_0482 +pr_000483,acct_0007,user_000483,2026-02-07T11:00:00Z,approved,89000,supplier_0483 +pr_000484,acct_0008,user_000484,2026-02-08T11:00:00Z,rejected,90000,supplier_0484 +pr_000485,acct_0009,user_000485,2026-02-09T11:00:00Z,cancelled,91000,supplier_0485 +pr_000486,acct_0010,user_000486,2026-02-10T11:00:00Z,draft,92000,supplier_0486 +pr_000487,acct_0011,user_000487,2026-02-11T11:00:00Z,submitted,93000,supplier_0487 +pr_000488,acct_0012,user_000488,2026-02-12T11:00:00Z,approved,94000,supplier_0488 +pr_000489,acct_0013,user_000489,2026-02-13T11:00:00Z,rejected,95000,supplier_0489 +pr_000490,acct_0014,user_000490,2026-02-14T11:00:00Z,cancelled,96000,supplier_0490 +pr_000491,acct_0015,user_000491,2026-02-15T11:00:00Z,draft,97000,supplier_0491 +pr_000492,acct_0016,user_000492,2026-02-16T11:00:00Z,submitted,98000,supplier_0492 +pr_000493,acct_0017,user_000493,2026-02-17T11:00:00Z,approved,99000,supplier_0493 +pr_000494,acct_0018,user_000494,2026-02-18T11:00:00Z,rejected,100000,supplier_0494 +pr_000495,acct_0019,user_000495,2026-02-19T11:00:00Z,cancelled,101000,supplier_0495 +pr_000496,acct_0020,user_000496,2026-02-20T11:00:00Z,draft,102000,supplier_0496 +pr_000497,acct_0021,user_000497,2026-02-21T11:00:00Z,submitted,103000,supplier_0497 +pr_000498,acct_0022,user_000498,2026-02-22T11:00:00Z,approved,104000,supplier_0498 +pr_000499,acct_0023,user_000499,2026-02-23T11:00:00Z,rejected,105000,supplier_0499 +pr_000500,acct_0024,user_000500,2026-02-24T11:00:00Z,cancelled,106000,supplier_0500 +pr_000501,acct_0025,user_000501,2026-02-25T11:00:00Z,draft,107000,supplier_0501 +pr_000502,acct_0026,user_000502,2026-02-26T11:00:00Z,submitted,108000,supplier_0502 +pr_000503,acct_0027,user_000503,2026-02-27T11:00:00Z,approved,109000,supplier_0503 +pr_000504,acct_0028,user_000504,2026-02-28T11:00:00Z,rejected,110000,supplier_0504 +pr_000505,acct_0029,user_000505,2026-02-01T11:00:00Z,cancelled,10000,supplier_0505 +pr_000506,acct_0030,user_000506,2026-02-02T11:00:00Z,draft,11000,supplier_0506 +pr_000507,acct_0031,user_000507,2026-02-03T11:00:00Z,submitted,12000,supplier_0507 +pr_000508,acct_0032,user_000508,2026-02-04T11:00:00Z,approved,13000,supplier_0508 +pr_000509,acct_0033,user_000509,2026-02-05T11:00:00Z,rejected,14000,supplier_0509 +pr_000510,acct_0034,user_000510,2026-02-06T11:00:00Z,cancelled,15000,supplier_0510 +pr_000511,acct_0035,user_000511,2026-02-07T11:00:00Z,draft,16000,supplier_0511 +pr_000512,acct_0036,user_000512,2026-02-08T11:00:00Z,submitted,17000,supplier_0512 +pr_000513,acct_0037,user_000513,2026-02-09T11:00:00Z,approved,18000,supplier_0513 +pr_000514,acct_0038,user_000514,2026-02-10T11:00:00Z,rejected,19000,supplier_0514 +pr_000515,acct_0039,user_000515,2026-02-11T11:00:00Z,cancelled,20000,supplier_0515 +pr_000516,acct_0040,user_000516,2026-02-12T11:00:00Z,draft,21000,supplier_0516 +pr_000517,acct_0041,user_000517,2026-02-13T11:00:00Z,submitted,22000,supplier_0517 +pr_000518,acct_0042,user_000518,2026-02-14T11:00:00Z,approved,23000,supplier_0518 +pr_000519,acct_0043,user_000519,2026-02-15T11:00:00Z,rejected,24000,supplier_0519 +pr_000520,acct_0044,user_000520,2026-02-16T11:00:00Z,cancelled,25000,supplier_0520 +pr_000521,acct_0045,user_000521,2026-02-17T11:00:00Z,draft,26000,supplier_0521 +pr_000522,acct_0046,user_000522,2026-02-18T11:00:00Z,submitted,27000,supplier_0522 +pr_000523,acct_0047,user_000523,2026-02-19T11:00:00Z,approved,28000,supplier_0523 +pr_000524,acct_0048,user_000524,2026-02-20T11:00:00Z,rejected,29000,supplier_0524 +pr_000525,acct_0049,user_000525,2026-02-21T11:00:00Z,cancelled,30000,supplier_0525 +pr_000526,acct_0050,user_000526,2026-02-22T11:00:00Z,draft,31000,supplier_0526 +pr_000527,acct_0051,user_000527,2026-02-23T11:00:00Z,submitted,32000,supplier_0527 +pr_000528,acct_0052,user_000528,2026-02-24T11:00:00Z,approved,33000,supplier_0528 +pr_000529,acct_0053,user_000529,2026-02-25T11:00:00Z,rejected,34000,supplier_0529 +pr_000530,acct_0054,user_000530,2026-02-26T11:00:00Z,cancelled,35000,supplier_0530 +pr_000531,acct_0055,user_000531,2026-02-27T11:00:00Z,draft,36000,supplier_0531 +pr_000532,acct_0056,user_000532,2026-02-28T11:00:00Z,submitted,37000,supplier_0532 +pr_000533,acct_0057,user_000533,2026-02-01T11:00:00Z,approved,38000,supplier_0533 +pr_000534,acct_0058,user_000534,2026-02-02T11:00:00Z,rejected,39000,supplier_0534 +pr_000535,acct_0059,user_000535,2026-02-03T11:00:00Z,cancelled,40000,supplier_0535 +pr_000536,acct_0060,user_000536,2026-02-04T11:00:00Z,draft,41000,supplier_0536 +pr_000537,acct_0061,user_000537,2026-02-05T11:00:00Z,submitted,42000,supplier_0537 +pr_000538,acct_0062,user_000538,2026-02-06T11:00:00Z,approved,43000,supplier_0538 +pr_000539,acct_0063,user_000539,2026-02-07T11:00:00Z,rejected,44000,supplier_0539 +pr_000540,acct_0064,user_000540,2026-02-08T11:00:00Z,cancelled,45000,supplier_0540 +pr_000541,acct_0065,user_000541,2026-02-09T11:00:00Z,draft,46000,supplier_0541 +pr_000542,acct_0066,user_000542,2026-02-10T11:00:00Z,submitted,47000,supplier_0542 +pr_000543,acct_0067,user_000543,2026-02-11T11:00:00Z,approved,48000,supplier_0543 +pr_000544,acct_0068,user_000544,2026-02-12T11:00:00Z,rejected,49000,supplier_0544 +pr_000545,acct_0069,user_000545,2026-02-13T11:00:00Z,cancelled,50000,supplier_0545 +pr_000546,acct_0070,user_000546,2026-02-14T11:00:00Z,draft,51000,supplier_0546 +pr_000547,acct_0071,user_000547,2026-02-15T11:00:00Z,submitted,52000,supplier_0547 +pr_000548,acct_0072,user_000548,2026-02-16T11:00:00Z,approved,53000,supplier_0548 +pr_000549,acct_0073,user_000549,2026-02-17T11:00:00Z,rejected,54000,supplier_0549 +pr_000550,acct_0074,user_000550,2026-02-18T11:00:00Z,cancelled,55000,supplier_0550 +pr_000551,acct_0075,user_000551,2026-02-19T11:00:00Z,draft,56000,supplier_0551 +pr_000552,acct_0076,user_000552,2026-02-20T11:00:00Z,submitted,57000,supplier_0552 +pr_000553,acct_0077,user_000553,2026-02-21T11:00:00Z,approved,58000,supplier_0553 +pr_000554,acct_0078,user_000554,2026-02-22T11:00:00Z,rejected,59000,supplier_0554 +pr_000555,acct_0079,user_000555,2026-02-23T11:00:00Z,cancelled,60000,supplier_0555 +pr_000556,acct_0080,user_000556,2026-02-24T11:00:00Z,draft,61000,supplier_0556 +pr_000557,acct_0081,user_000557,2026-02-25T11:00:00Z,submitted,62000,supplier_0557 +pr_000558,acct_0082,user_000558,2026-02-26T11:00:00Z,approved,63000,supplier_0558 +pr_000559,acct_0083,user_000559,2026-02-27T11:00:00Z,rejected,64000,supplier_0559 +pr_000560,acct_0084,user_000560,2026-02-28T11:00:00Z,cancelled,65000,supplier_0560 +pr_000561,acct_0085,user_000561,2026-02-01T11:00:00Z,draft,66000,supplier_0561 +pr_000562,acct_0086,user_000562,2026-02-02T11:00:00Z,submitted,67000,supplier_0562 +pr_000563,acct_0087,user_000563,2026-02-03T11:00:00Z,approved,68000,supplier_0563 +pr_000564,acct_0088,user_000564,2026-02-04T11:00:00Z,rejected,69000,supplier_0564 +pr_000565,acct_0089,user_000565,2026-02-05T11:00:00Z,cancelled,70000,supplier_0565 +pr_000566,acct_0090,user_000566,2026-02-06T11:00:00Z,draft,71000,supplier_0566 +pr_000567,acct_0091,user_000567,2026-02-07T11:00:00Z,submitted,72000,supplier_0567 +pr_000568,acct_0092,user_000568,2026-02-08T11:00:00Z,approved,73000,supplier_0568 +pr_000569,acct_0093,user_000569,2026-02-09T11:00:00Z,rejected,74000,supplier_0569 +pr_000570,acct_0094,user_000570,2026-02-10T11:00:00Z,cancelled,75000,supplier_0570 +pr_000571,acct_0095,user_000571,2026-02-11T11:00:00Z,draft,76000,supplier_0571 +pr_000572,acct_0096,user_000572,2026-02-12T11:00:00Z,submitted,77000,supplier_0572 +pr_000573,acct_0097,user_000573,2026-02-13T11:00:00Z,approved,78000,supplier_0573 +pr_000574,acct_0098,user_000574,2026-02-14T11:00:00Z,rejected,79000,supplier_0574 +pr_000575,acct_0099,user_000575,2026-02-15T11:00:00Z,cancelled,80000,supplier_0575 +pr_000576,acct_0100,user_000576,2026-02-16T11:00:00Z,draft,81000,supplier_0576 +pr_000577,acct_0101,user_000577,2026-02-17T11:00:00Z,submitted,82000,supplier_0577 +pr_000578,acct_0102,user_000578,2026-02-18T11:00:00Z,approved,83000,supplier_0578 +pr_000579,acct_0103,user_000579,2026-02-19T11:00:00Z,rejected,84000,supplier_0579 +pr_000580,acct_0104,user_000580,2026-02-20T11:00:00Z,cancelled,85000,supplier_0580 +pr_000581,acct_0105,user_000581,2026-02-21T11:00:00Z,draft,86000,supplier_0581 +pr_000582,acct_0106,user_000582,2026-02-22T11:00:00Z,submitted,87000,supplier_0582 +pr_000583,acct_0107,user_000583,2026-02-23T11:00:00Z,approved,88000,supplier_0583 +pr_000584,acct_0108,user_000584,2026-02-24T11:00:00Z,rejected,89000,supplier_0584 +pr_000585,acct_0109,user_000585,2026-02-25T11:00:00Z,cancelled,90000,supplier_0585 +pr_000586,acct_0110,user_000586,2026-02-26T11:00:00Z,draft,91000,supplier_0586 +pr_000587,acct_0111,user_000587,2026-02-27T11:00:00Z,submitted,92000,supplier_0587 +pr_000588,acct_0112,user_000588,2026-02-28T11:00:00Z,approved,93000,supplier_0588 +pr_000589,acct_0113,user_000589,2026-02-01T11:00:00Z,rejected,94000,supplier_0589 +pr_000590,acct_0114,user_000590,2026-02-02T11:00:00Z,cancelled,95000,supplier_0590 +pr_000591,acct_0115,user_000591,2026-02-03T11:00:00Z,draft,96000,supplier_0591 +pr_000592,acct_0116,user_000592,2026-02-04T11:00:00Z,submitted,97000,supplier_0592 +pr_000593,acct_0117,user_000593,2026-02-05T11:00:00Z,approved,98000,supplier_0593 +pr_000594,acct_0118,user_000594,2026-02-06T11:00:00Z,rejected,99000,supplier_0594 +pr_000595,acct_0119,user_000595,2026-02-07T11:00:00Z,cancelled,100000,supplier_0595 +pr_000596,acct_0120,user_000596,2026-02-08T11:00:00Z,draft,101000,supplier_0596 +pr_000597,acct_0121,user_000597,2026-02-09T11:00:00Z,submitted,102000,supplier_0597 +pr_000598,acct_0122,user_000598,2026-02-10T11:00:00Z,approved,103000,supplier_0598 +pr_000599,acct_0123,user_000599,2026-02-11T11:00:00Z,rejected,104000,supplier_0599 +pr_000600,acct_0124,user_000600,2026-02-12T11:00:00Z,cancelled,105000,supplier_0600 +pr_000601,acct_0125,user_000601,2026-02-13T11:00:00Z,draft,106000,supplier_0601 +pr_000602,acct_0126,user_000602,2026-02-14T11:00:00Z,submitted,107000,supplier_0602 +pr_000603,acct_0127,user_000603,2026-02-15T11:00:00Z,approved,108000,supplier_0603 +pr_000604,acct_0128,user_000604,2026-02-16T11:00:00Z,rejected,109000,supplier_0604 +pr_000605,acct_0129,user_000605,2026-02-17T11:00:00Z,cancelled,110000,supplier_0605 +pr_000606,acct_0130,user_000606,2026-02-18T11:00:00Z,draft,10000,supplier_0606 +pr_000607,acct_0131,user_000607,2026-02-19T11:00:00Z,submitted,11000,supplier_0607 +pr_000608,acct_0132,user_000608,2026-02-20T11:00:00Z,approved,12000,supplier_0608 +pr_000609,acct_0133,user_000609,2026-02-21T11:00:00Z,rejected,13000,supplier_0609 +pr_000610,acct_0134,user_000610,2026-02-22T11:00:00Z,cancelled,14000,supplier_0610 +pr_000611,acct_0135,user_000611,2026-02-23T11:00:00Z,draft,15000,supplier_0611 +pr_000612,acct_0136,user_000612,2026-02-24T11:00:00Z,submitted,16000,supplier_0612 +pr_000613,acct_0137,user_000613,2026-02-25T11:00:00Z,approved,17000,supplier_0613 +pr_000614,acct_0138,user_000614,2026-02-26T11:00:00Z,rejected,18000,supplier_0614 +pr_000615,acct_0139,user_000615,2026-02-27T11:00:00Z,cancelled,19000,supplier_0615 +pr_000616,acct_0140,user_000616,2026-02-28T11:00:00Z,draft,20000,supplier_0616 +pr_000617,acct_0141,user_000617,2026-02-01T11:00:00Z,submitted,21000,supplier_0617 +pr_000618,acct_0142,user_000618,2026-02-02T11:00:00Z,approved,22000,supplier_0618 +pr_000619,acct_0143,user_000619,2026-02-03T11:00:00Z,rejected,23000,supplier_0619 +pr_000620,acct_0144,user_000620,2026-02-04T11:00:00Z,cancelled,24000,supplier_0620 +pr_000621,acct_0145,user_000621,2026-02-05T11:00:00Z,draft,25000,supplier_0621 +pr_000622,acct_0146,user_000622,2026-02-06T11:00:00Z,submitted,26000,supplier_0622 +pr_000623,acct_0147,user_000623,2026-02-07T11:00:00Z,approved,27000,supplier_0623 +pr_000624,acct_0148,user_000624,2026-02-08T11:00:00Z,rejected,28000,supplier_0624 +pr_000625,acct_0149,user_000625,2026-02-09T11:00:00Z,cancelled,29000,supplier_0625 +pr_000626,acct_0150,user_000626,2026-02-10T11:00:00Z,draft,30000,supplier_0626 +pr_000627,acct_0151,user_000627,2026-02-11T11:00:00Z,submitted,31000,supplier_0627 +pr_000628,acct_0152,user_000628,2026-02-12T11:00:00Z,approved,32000,supplier_0628 +pr_000629,acct_0153,user_000629,2026-02-13T11:00:00Z,rejected,33000,supplier_0629 +pr_000630,acct_0154,user_000630,2026-02-14T11:00:00Z,cancelled,34000,supplier_0630 +pr_000631,acct_0155,user_000631,2026-02-15T11:00:00Z,draft,35000,supplier_0631 +pr_000632,acct_0156,user_000632,2026-02-16T11:00:00Z,submitted,36000,supplier_0632 +pr_000633,acct_0157,user_000633,2026-02-17T11:00:00Z,approved,37000,supplier_0633 +pr_000634,acct_0158,user_000634,2026-02-18T11:00:00Z,rejected,38000,supplier_0634 +pr_000635,acct_0159,user_000635,2026-02-19T11:00:00Z,cancelled,39000,supplier_0635 +pr_000636,acct_0160,user_000636,2026-02-20T11:00:00Z,draft,40000,supplier_0636 +pr_000637,acct_0161,user_000637,2026-02-21T11:00:00Z,submitted,41000,supplier_0637 +pr_000638,acct_0162,user_000638,2026-02-22T11:00:00Z,approved,42000,supplier_0638 +pr_000639,acct_0163,user_000639,2026-02-23T11:00:00Z,rejected,43000,supplier_0639 +pr_000640,acct_0164,user_000640,2026-02-24T11:00:00Z,cancelled,44000,supplier_0640 +pr_000641,acct_0165,user_000641,2026-02-25T11:00:00Z,draft,45000,supplier_0641 +pr_000642,acct_0166,user_000642,2026-02-26T11:00:00Z,submitted,46000,supplier_0642 +pr_000643,acct_0167,user_000643,2026-02-27T11:00:00Z,approved,47000,supplier_0643 +pr_000644,acct_0168,user_000644,2026-02-28T11:00:00Z,rejected,48000,supplier_0644 +pr_000645,acct_0169,user_000645,2026-02-01T11:00:00Z,cancelled,49000,supplier_0645 +pr_000646,acct_0170,user_000646,2026-02-02T11:00:00Z,draft,50000,supplier_0646 +pr_000647,acct_0171,user_000647,2026-02-03T11:00:00Z,submitted,51000,supplier_0647 +pr_000648,acct_0172,user_000648,2026-02-04T11:00:00Z,approved,52000,supplier_0648 +pr_000649,acct_0173,user_000649,2026-02-05T11:00:00Z,rejected,53000,supplier_0649 +pr_000650,acct_0174,user_000650,2026-02-06T11:00:00Z,cancelled,54000,supplier_0650 +pr_000651,acct_0175,user_000651,2026-02-07T11:00:00Z,draft,55000,supplier_0651 +pr_000652,acct_0176,user_000652,2026-02-08T11:00:00Z,submitted,56000,supplier_0652 +pr_000653,acct_0177,user_000653,2026-02-09T11:00:00Z,approved,57000,supplier_0653 +pr_000654,acct_0178,user_000654,2026-02-10T11:00:00Z,rejected,58000,supplier_0654 +pr_000655,acct_0179,user_000655,2026-02-11T11:00:00Z,cancelled,59000,supplier_0655 +pr_000656,acct_0180,user_000656,2026-02-12T11:00:00Z,draft,60000,supplier_0656 +pr_000657,acct_0181,user_000657,2026-02-13T11:00:00Z,submitted,61000,supplier_0657 +pr_000658,acct_0182,user_000658,2026-02-14T11:00:00Z,approved,62000,supplier_0658 +pr_000659,acct_0183,user_000659,2026-02-15T11:00:00Z,rejected,63000,supplier_0659 +pr_000660,acct_0184,user_000660,2026-02-16T11:00:00Z,cancelled,64000,supplier_0660 +pr_000661,acct_0185,user_000661,2026-02-17T11:00:00Z,draft,65000,supplier_0661 +pr_000662,acct_0186,user_000662,2026-02-18T11:00:00Z,submitted,66000,supplier_0662 +pr_000663,acct_0187,user_000663,2026-02-19T11:00:00Z,approved,67000,supplier_0663 +pr_000664,acct_0188,user_000664,2026-02-20T11:00:00Z,rejected,68000,supplier_0664 +pr_000665,acct_0189,user_000665,2026-02-21T11:00:00Z,cancelled,69000,supplier_0665 +pr_000666,acct_0190,user_000666,2026-02-22T11:00:00Z,draft,70000,supplier_0666 +pr_000667,acct_0001,user_000667,2026-02-23T11:00:00Z,submitted,71000,supplier_0667 +pr_000668,acct_0002,user_000668,2026-02-24T11:00:00Z,approved,72000,supplier_0668 +pr_000669,acct_0003,user_000669,2026-02-25T11:00:00Z,rejected,73000,supplier_0669 +pr_000670,acct_0004,user_000670,2026-02-26T11:00:00Z,cancelled,74000,supplier_0670 +pr_000671,acct_0005,user_000671,2026-02-27T11:00:00Z,draft,75000,supplier_0671 +pr_000672,acct_0006,user_000672,2026-02-28T11:00:00Z,submitted,76000,supplier_0672 +pr_000673,acct_0007,user_000673,2026-02-01T11:00:00Z,approved,77000,supplier_0673 +pr_000674,acct_0008,user_000674,2026-02-02T11:00:00Z,rejected,78000,supplier_0674 +pr_000675,acct_0009,user_000675,2026-02-03T11:00:00Z,cancelled,79000,supplier_0675 +pr_000676,acct_0010,user_000676,2026-02-04T11:00:00Z,draft,80000,supplier_0676 +pr_000677,acct_0011,user_000677,2026-02-05T11:00:00Z,submitted,81000,supplier_0677 +pr_000678,acct_0012,user_000678,2026-02-06T11:00:00Z,approved,82000,supplier_0678 +pr_000679,acct_0013,user_000679,2026-02-07T11:00:00Z,rejected,83000,supplier_0679 +pr_000680,acct_0014,user_000680,2026-02-08T11:00:00Z,cancelled,84000,supplier_0680 +pr_000681,acct_0015,user_000681,2026-02-09T11:00:00Z,draft,85000,supplier_0681 +pr_000682,acct_0016,user_000682,2026-02-10T11:00:00Z,submitted,86000,supplier_0682 +pr_000683,acct_0017,user_000683,2026-02-11T11:00:00Z,approved,87000,supplier_0683 +pr_000684,acct_0018,user_000684,2026-02-12T11:00:00Z,rejected,88000,supplier_0684 +pr_000685,acct_0019,user_000685,2026-02-13T11:00:00Z,cancelled,89000,supplier_0685 +pr_000686,acct_0020,user_000686,2026-02-14T11:00:00Z,draft,90000,supplier_0686 +pr_000687,acct_0021,user_000687,2026-02-15T11:00:00Z,submitted,91000,supplier_0687 +pr_000688,acct_0022,user_000688,2026-02-16T11:00:00Z,approved,92000,supplier_0688 +pr_000689,acct_0023,user_000689,2026-02-17T11:00:00Z,rejected,93000,supplier_0689 +pr_000690,acct_0024,user_000690,2026-02-18T11:00:00Z,cancelled,94000,supplier_0690 +pr_000691,acct_0025,user_000691,2026-02-19T11:00:00Z,draft,95000,supplier_0691 +pr_000692,acct_0026,user_000692,2026-02-20T11:00:00Z,submitted,96000,supplier_0692 +pr_000693,acct_0027,user_000693,2026-02-21T11:00:00Z,approved,97000,supplier_0693 +pr_000694,acct_0028,user_000694,2026-02-22T11:00:00Z,rejected,98000,supplier_0694 +pr_000695,acct_0029,user_000695,2026-02-23T11:00:00Z,cancelled,99000,supplier_0695 +pr_000696,acct_0030,user_000696,2026-02-24T11:00:00Z,draft,100000,supplier_0696 +pr_000697,acct_0031,user_000697,2026-02-25T11:00:00Z,submitted,101000,supplier_0697 +pr_000698,acct_0032,user_000698,2026-02-26T11:00:00Z,approved,102000,supplier_0698 +pr_000699,acct_0033,user_000699,2026-02-27T11:00:00Z,rejected,103000,supplier_0699 +pr_000700,acct_0034,user_000700,2026-02-28T11:00:00Z,cancelled,104000,supplier_0700 +pr_000701,acct_0035,user_000701,2026-02-01T11:00:00Z,draft,105000,supplier_0701 +pr_000702,acct_0036,user_000702,2026-02-02T11:00:00Z,submitted,106000,supplier_0702 +pr_000703,acct_0037,user_000703,2026-02-03T11:00:00Z,approved,107000,supplier_0703 +pr_000704,acct_0038,user_000704,2026-02-04T11:00:00Z,rejected,108000,supplier_0704 +pr_000705,acct_0039,user_000705,2026-02-05T11:00:00Z,cancelled,109000,supplier_0705 +pr_000706,acct_0040,user_000706,2026-02-06T11:00:00Z,draft,110000,supplier_0706 +pr_000707,acct_0041,user_000707,2026-02-07T11:00:00Z,submitted,10000,supplier_0707 +pr_000708,acct_0042,user_000708,2026-02-08T11:00:00Z,approved,11000,supplier_0708 +pr_000709,acct_0043,user_000709,2026-02-09T11:00:00Z,rejected,12000,supplier_0709 +pr_000710,acct_0044,user_000710,2026-02-10T11:00:00Z,cancelled,13000,supplier_0710 +pr_000711,acct_0045,user_000711,2026-02-11T11:00:00Z,draft,14000,supplier_0711 +pr_000712,acct_0046,user_000712,2026-02-12T11:00:00Z,submitted,15000,supplier_0712 +pr_000713,acct_0047,user_000713,2026-02-13T11:00:00Z,approved,16000,supplier_0713 +pr_000714,acct_0048,user_000714,2026-02-14T11:00:00Z,rejected,17000,supplier_0714 +pr_000715,acct_0049,user_000715,2026-02-15T11:00:00Z,cancelled,18000,supplier_0715 +pr_000716,acct_0050,user_000716,2026-02-16T11:00:00Z,draft,19000,supplier_0716 +pr_000717,acct_0051,user_000717,2026-02-17T11:00:00Z,submitted,20000,supplier_0717 +pr_000718,acct_0052,user_000718,2026-02-18T11:00:00Z,approved,21000,supplier_0718 +pr_000719,acct_0053,user_000719,2026-02-19T11:00:00Z,rejected,22000,supplier_0719 +pr_000720,acct_0054,user_000720,2026-02-20T11:00:00Z,cancelled,23000,supplier_0720 +pr_000721,acct_0055,user_000721,2026-02-21T11:00:00Z,draft,24000,supplier_0721 +pr_000722,acct_0056,user_000722,2026-02-22T11:00:00Z,submitted,25000,supplier_0722 +pr_000723,acct_0057,user_000723,2026-02-23T11:00:00Z,approved,26000,supplier_0723 +pr_000724,acct_0058,user_000724,2026-02-24T11:00:00Z,rejected,27000,supplier_0724 +pr_000725,acct_0059,user_000725,2026-02-25T11:00:00Z,cancelled,28000,supplier_0725 +pr_000726,acct_0060,user_000726,2026-02-26T11:00:00Z,draft,29000,supplier_0726 +pr_000727,acct_0061,user_000727,2026-02-27T11:00:00Z,submitted,30000,supplier_0727 +pr_000728,acct_0062,user_000728,2026-02-28T11:00:00Z,approved,31000,supplier_0728 +pr_000729,acct_0063,user_000729,2026-02-01T11:00:00Z,rejected,32000,supplier_0729 +pr_000730,acct_0064,user_000730,2026-02-02T11:00:00Z,cancelled,33000,supplier_0730 +pr_000731,acct_0065,user_000731,2026-02-03T11:00:00Z,draft,34000,supplier_0731 +pr_000732,acct_0066,user_000732,2026-02-04T11:00:00Z,submitted,35000,supplier_0732 +pr_000733,acct_0067,user_000733,2026-02-05T11:00:00Z,approved,36000,supplier_0733 +pr_000734,acct_0068,user_000734,2026-02-06T11:00:00Z,rejected,37000,supplier_0734 +pr_000735,acct_0069,user_000735,2026-02-07T11:00:00Z,cancelled,38000,supplier_0735 +pr_000736,acct_0070,user_000736,2026-02-08T11:00:00Z,draft,39000,supplier_0736 +pr_000737,acct_0071,user_000737,2026-02-09T11:00:00Z,submitted,40000,supplier_0737 +pr_000738,acct_0072,user_000738,2026-02-10T11:00:00Z,approved,41000,supplier_0738 +pr_000739,acct_0073,user_000739,2026-02-11T11:00:00Z,rejected,42000,supplier_0739 +pr_000740,acct_0074,user_000740,2026-02-12T11:00:00Z,cancelled,43000,supplier_0740 +pr_000741,acct_0075,user_000741,2026-02-13T11:00:00Z,draft,44000,supplier_0741 +pr_000742,acct_0076,user_000742,2026-02-14T11:00:00Z,submitted,45000,supplier_0742 +pr_000743,acct_0077,user_000743,2026-02-15T11:00:00Z,approved,46000,supplier_0743 +pr_000744,acct_0078,user_000744,2026-02-16T11:00:00Z,rejected,47000,supplier_0744 +pr_000745,acct_0079,user_000745,2026-02-17T11:00:00Z,cancelled,48000,supplier_0745 +pr_000746,acct_0080,user_000746,2026-02-18T11:00:00Z,draft,49000,supplier_0746 +pr_000747,acct_0081,user_000747,2026-02-19T11:00:00Z,submitted,50000,supplier_0747 +pr_000748,acct_0082,user_000748,2026-02-20T11:00:00Z,approved,51000,supplier_0748 +pr_000749,acct_0083,user_000749,2026-02-21T11:00:00Z,rejected,52000,supplier_0749 +pr_000750,acct_0084,user_000750,2026-02-22T11:00:00Z,cancelled,53000,supplier_0750 +pr_000751,acct_0085,user_000751,2026-02-23T11:00:00Z,draft,54000,supplier_0751 +pr_000752,acct_0086,user_000752,2026-02-24T11:00:00Z,submitted,55000,supplier_0752 +pr_000753,acct_0087,user_000753,2026-02-25T11:00:00Z,approved,56000,supplier_0753 +pr_000754,acct_0088,user_000754,2026-02-26T11:00:00Z,rejected,57000,supplier_0754 +pr_000755,acct_0089,user_000755,2026-02-27T11:00:00Z,cancelled,58000,supplier_0755 +pr_000756,acct_0090,user_000756,2026-02-28T11:00:00Z,draft,59000,supplier_0756 +pr_000757,acct_0091,user_000757,2026-02-01T11:00:00Z,submitted,60000,supplier_0757 +pr_000758,acct_0092,user_000758,2026-02-02T11:00:00Z,approved,61000,supplier_0758 +pr_000759,acct_0093,user_000759,2026-02-03T11:00:00Z,rejected,62000,supplier_0759 +pr_000760,acct_0094,user_000760,2026-02-04T11:00:00Z,cancelled,63000,supplier_0760 +pr_000761,acct_0095,user_000761,2026-02-05T11:00:00Z,draft,64000,supplier_0761 +pr_000762,acct_0096,user_000762,2026-02-06T11:00:00Z,submitted,65000,supplier_0762 +pr_000763,acct_0097,user_000763,2026-02-07T11:00:00Z,approved,66000,supplier_0763 +pr_000764,acct_0098,user_000764,2026-02-08T11:00:00Z,rejected,67000,supplier_0764 +pr_000765,acct_0099,user_000765,2026-02-09T11:00:00Z,cancelled,68000,supplier_0765 +pr_000766,acct_0100,user_000766,2026-02-10T11:00:00Z,draft,69000,supplier_0766 +pr_000767,acct_0101,user_000767,2026-02-11T11:00:00Z,submitted,70000,supplier_0767 +pr_000768,acct_0102,user_000768,2026-02-12T11:00:00Z,approved,71000,supplier_0768 +pr_000769,acct_0103,user_000769,2026-02-13T11:00:00Z,rejected,72000,supplier_0769 +pr_000770,acct_0104,user_000770,2026-02-14T11:00:00Z,cancelled,73000,supplier_0770 +pr_000771,acct_0105,user_000771,2026-02-15T11:00:00Z,draft,74000,supplier_0771 +pr_000772,acct_0106,user_000772,2026-02-16T11:00:00Z,submitted,75000,supplier_0772 +pr_000773,acct_0107,user_000773,2026-02-17T11:00:00Z,approved,76000,supplier_0773 +pr_000774,acct_0108,user_000774,2026-02-18T11:00:00Z,rejected,77000,supplier_0774 +pr_000775,acct_0109,user_000775,2026-02-19T11:00:00Z,cancelled,78000,supplier_0775 +pr_000776,acct_0110,user_000776,2026-02-20T11:00:00Z,draft,79000,supplier_0776 +pr_000777,acct_0111,user_000777,2026-02-21T11:00:00Z,submitted,80000,supplier_0777 +pr_000778,acct_0112,user_000778,2026-02-22T11:00:00Z,approved,81000,supplier_0778 +pr_000779,acct_0113,user_000779,2026-02-23T11:00:00Z,rejected,82000,supplier_0779 +pr_000780,acct_0114,user_000780,2026-02-24T11:00:00Z,cancelled,83000,supplier_0780 +pr_000781,acct_0115,user_000781,2026-02-25T11:00:00Z,draft,84000,supplier_0781 +pr_000782,acct_0116,user_000782,2026-02-26T11:00:00Z,submitted,85000,supplier_0782 +pr_000783,acct_0117,user_000783,2026-02-27T11:00:00Z,approved,86000,supplier_0783 +pr_000784,acct_0118,user_000784,2026-02-28T11:00:00Z,rejected,87000,supplier_0784 +pr_000785,acct_0119,user_000785,2026-02-01T11:00:00Z,cancelled,88000,supplier_0785 +pr_000786,acct_0120,user_000786,2026-02-02T11:00:00Z,draft,89000,supplier_0786 +pr_000787,acct_0121,user_000787,2026-02-03T11:00:00Z,submitted,90000,supplier_0787 +pr_000788,acct_0122,user_000788,2026-02-04T11:00:00Z,approved,91000,supplier_0788 +pr_000789,acct_0123,user_000789,2026-02-05T11:00:00Z,rejected,92000,supplier_0789 +pr_000790,acct_0124,user_000790,2026-02-06T11:00:00Z,cancelled,93000,supplier_0790 +pr_000791,acct_0125,user_000791,2026-02-07T11:00:00Z,draft,94000,supplier_0791 +pr_000792,acct_0126,user_000792,2026-02-08T11:00:00Z,submitted,95000,supplier_0792 +pr_000793,acct_0127,user_000793,2026-02-09T11:00:00Z,approved,96000,supplier_0793 +pr_000794,acct_0128,user_000794,2026-02-10T11:00:00Z,rejected,97000,supplier_0794 +pr_000795,acct_0129,user_000795,2026-02-11T11:00:00Z,cancelled,98000,supplier_0795 +pr_000796,acct_0130,user_000796,2026-02-12T11:00:00Z,draft,99000,supplier_0796 +pr_000797,acct_0131,user_000797,2026-02-13T11:00:00Z,submitted,100000,supplier_0797 +pr_000798,acct_0132,user_000798,2026-02-14T11:00:00Z,approved,101000,supplier_0798 +pr_000799,acct_0133,user_000799,2026-02-15T11:00:00Z,rejected,102000,supplier_0799 +pr_000800,acct_0134,user_000800,2026-02-16T11:00:00Z,cancelled,103000,supplier_0800 +pr_000801,acct_0135,user_000801,2026-02-17T11:00:00Z,draft,104000,supplier_0801 +pr_000802,acct_0136,user_000802,2026-02-18T11:00:00Z,submitted,105000,supplier_0802 +pr_000803,acct_0137,user_000803,2026-02-19T11:00:00Z,approved,106000,supplier_0803 +pr_000804,acct_0138,user_000804,2026-02-20T11:00:00Z,rejected,107000,supplier_0804 +pr_000805,acct_0139,user_000805,2026-02-21T11:00:00Z,cancelled,108000,supplier_0805 +pr_000806,acct_0140,user_000806,2026-02-22T11:00:00Z,draft,109000,supplier_0806 +pr_000807,acct_0141,user_000807,2026-02-23T11:00:00Z,submitted,110000,supplier_0807 +pr_000808,acct_0142,user_000808,2026-02-24T11:00:00Z,approved,10000,supplier_0808 +pr_000809,acct_0143,user_000809,2026-02-25T11:00:00Z,rejected,11000,supplier_0809 +pr_000810,acct_0144,user_000810,2026-02-26T11:00:00Z,cancelled,12000,supplier_0810 +pr_000811,acct_0145,user_000811,2026-02-27T11:00:00Z,draft,13000,supplier_0811 +pr_000812,acct_0146,user_000812,2026-02-28T11:00:00Z,submitted,14000,supplier_0812 +pr_000813,acct_0147,user_000813,2026-02-01T11:00:00Z,approved,15000,supplier_0813 +pr_000814,acct_0148,user_000814,2026-02-02T11:00:00Z,rejected,16000,supplier_0814 +pr_000815,acct_0149,user_000815,2026-02-03T11:00:00Z,cancelled,17000,supplier_0815 +pr_000816,acct_0150,user_000816,2026-02-04T11:00:00Z,draft,18000,supplier_0816 +pr_000817,acct_0151,user_000817,2026-02-05T11:00:00Z,submitted,19000,supplier_0817 +pr_000818,acct_0152,user_000818,2026-02-06T11:00:00Z,approved,20000,supplier_0818 +pr_000819,acct_0153,user_000819,2026-02-07T11:00:00Z,rejected,21000,supplier_0819 +pr_000820,acct_0154,user_000820,2026-02-08T11:00:00Z,cancelled,22000,supplier_0820 +pr_000821,acct_0155,user_000821,2026-02-09T11:00:00Z,draft,23000,supplier_0821 +pr_000822,acct_0156,user_000822,2026-02-10T11:00:00Z,submitted,24000,supplier_0822 +pr_000823,acct_0157,user_000823,2026-02-11T11:00:00Z,approved,25000,supplier_0823 +pr_000824,acct_0158,user_000824,2026-02-12T11:00:00Z,rejected,26000,supplier_0824 +pr_000825,acct_0159,user_000825,2026-02-13T11:00:00Z,cancelled,27000,supplier_0825 +pr_000826,acct_0160,user_000826,2026-02-14T11:00:00Z,draft,28000,supplier_0826 +pr_000827,acct_0161,user_000827,2026-02-15T11:00:00Z,submitted,29000,supplier_0827 +pr_000828,acct_0162,user_000828,2026-02-16T11:00:00Z,approved,30000,supplier_0828 +pr_000829,acct_0163,user_000829,2026-02-17T11:00:00Z,rejected,31000,supplier_0829 +pr_000830,acct_0164,user_000830,2026-02-18T11:00:00Z,cancelled,32000,supplier_0830 +pr_000831,acct_0165,user_000831,2026-02-19T11:00:00Z,draft,33000,supplier_0831 +pr_000832,acct_0166,user_000832,2026-02-20T11:00:00Z,submitted,34000,supplier_0832 +pr_000833,acct_0167,user_000833,2026-02-21T11:00:00Z,approved,35000,supplier_0833 +pr_000834,acct_0168,user_000834,2026-02-22T11:00:00Z,rejected,36000,supplier_0834 +pr_000835,acct_0169,user_000835,2026-02-23T11:00:00Z,cancelled,37000,supplier_0835 +pr_000836,acct_0170,user_000836,2026-02-24T11:00:00Z,draft,38000,supplier_0836 +pr_000837,acct_0171,user_000837,2026-02-25T11:00:00Z,submitted,39000,supplier_0837 +pr_000838,acct_0172,user_000838,2026-02-26T11:00:00Z,approved,40000,supplier_0838 +pr_000839,acct_0173,user_000839,2026-02-27T11:00:00Z,rejected,41000,supplier_0839 +pr_000840,acct_0174,user_000840,2026-02-28T11:00:00Z,cancelled,42000,supplier_0840 +pr_000841,acct_0175,user_000841,2026-02-01T11:00:00Z,draft,43000,supplier_0841 +pr_000842,acct_0176,user_000842,2026-02-02T11:00:00Z,submitted,44000,supplier_0842 +pr_000843,acct_0177,user_000843,2026-02-03T11:00:00Z,approved,45000,supplier_0843 +pr_000844,acct_0178,user_000844,2026-02-04T11:00:00Z,rejected,46000,supplier_0844 +pr_000845,acct_0179,user_000845,2026-02-05T11:00:00Z,cancelled,47000,supplier_0845 +pr_000846,acct_0180,user_000846,2026-02-06T11:00:00Z,draft,48000,supplier_0846 +pr_000847,acct_0181,user_000847,2026-02-07T11:00:00Z,submitted,49000,supplier_0847 +pr_000848,acct_0182,user_000848,2026-02-08T11:00:00Z,approved,50000,supplier_0848 +pr_000849,acct_0183,user_000849,2026-02-09T11:00:00Z,rejected,51000,supplier_0849 +pr_000850,acct_0184,user_000850,2026-02-10T11:00:00Z,cancelled,52000,supplier_0850 +pr_000851,acct_0185,user_000851,2026-02-11T11:00:00Z,draft,53000,supplier_0851 +pr_000852,acct_0186,user_000852,2026-02-12T11:00:00Z,submitted,54000,supplier_0852 +pr_000853,acct_0187,user_000853,2026-02-13T11:00:00Z,approved,55000,supplier_0853 +pr_000854,acct_0188,user_000854,2026-02-14T11:00:00Z,rejected,56000,supplier_0854 +pr_000855,acct_0189,user_000855,2026-02-15T11:00:00Z,cancelled,57000,supplier_0855 +pr_000856,acct_0190,user_000856,2026-02-16T11:00:00Z,draft,58000,supplier_0856 +pr_000857,acct_0001,user_000857,2026-02-17T11:00:00Z,submitted,59000,supplier_0857 +pr_000858,acct_0002,user_000858,2026-02-18T11:00:00Z,approved,60000,supplier_0858 +pr_000859,acct_0003,user_000859,2026-02-19T11:00:00Z,rejected,61000,supplier_0859 +pr_000860,acct_0004,user_000860,2026-02-20T11:00:00Z,cancelled,62000,supplier_0860 +pr_000861,acct_0005,user_000861,2026-02-21T11:00:00Z,draft,63000,supplier_0861 +pr_000862,acct_0006,user_000862,2026-02-22T11:00:00Z,submitted,64000,supplier_0862 +pr_000863,acct_0007,user_000863,2026-02-23T11:00:00Z,approved,65000,supplier_0863 +pr_000864,acct_0008,user_000864,2026-02-24T11:00:00Z,rejected,66000,supplier_0864 +pr_000865,acct_0009,user_000865,2026-02-25T11:00:00Z,cancelled,67000,supplier_0865 +pr_000866,acct_0010,user_000866,2026-02-26T11:00:00Z,draft,68000,supplier_0866 +pr_000867,acct_0011,user_000867,2026-02-27T11:00:00Z,submitted,69000,supplier_0867 +pr_000868,acct_0012,user_000868,2026-02-28T11:00:00Z,approved,70000,supplier_0868 +pr_000869,acct_0013,user_000869,2026-02-01T11:00:00Z,rejected,71000,supplier_0869 +pr_000870,acct_0014,user_000870,2026-02-02T11:00:00Z,cancelled,72000,supplier_0870 +pr_000871,acct_0015,user_000871,2026-02-03T11:00:00Z,draft,73000,supplier_0871 +pr_000872,acct_0016,user_000872,2026-02-04T11:00:00Z,submitted,74000,supplier_0872 +pr_000873,acct_0017,user_000873,2026-02-05T11:00:00Z,approved,75000,supplier_0873 +pr_000874,acct_0018,user_000874,2026-02-06T11:00:00Z,rejected,76000,supplier_0874 +pr_000875,acct_0019,user_000875,2026-02-07T11:00:00Z,cancelled,77000,supplier_0875 +pr_000876,acct_0020,user_000876,2026-02-08T11:00:00Z,draft,78000,supplier_0876 +pr_000877,acct_0021,user_000877,2026-02-09T11:00:00Z,submitted,79000,supplier_0877 +pr_000878,acct_0022,user_000878,2026-02-10T11:00:00Z,approved,80000,supplier_0878 +pr_000879,acct_0023,user_000879,2026-02-11T11:00:00Z,rejected,81000,supplier_0879 +pr_000880,acct_0024,user_000880,2026-02-12T11:00:00Z,cancelled,82000,supplier_0880 +pr_000881,acct_0025,user_000881,2026-02-13T11:00:00Z,draft,83000,supplier_0881 +pr_000882,acct_0026,user_000882,2026-02-14T11:00:00Z,submitted,84000,supplier_0882 +pr_000883,acct_0027,user_000883,2026-02-15T11:00:00Z,approved,85000,supplier_0883 +pr_000884,acct_0028,user_000884,2026-02-16T11:00:00Z,rejected,86000,supplier_0884 +pr_000885,acct_0029,user_000885,2026-02-17T11:00:00Z,cancelled,87000,supplier_0885 +pr_000886,acct_0030,user_000886,2026-02-18T11:00:00Z,draft,88000,supplier_0886 +pr_000887,acct_0031,user_000887,2026-02-19T11:00:00Z,submitted,89000,supplier_0887 +pr_000888,acct_0032,user_000888,2026-02-20T11:00:00Z,approved,90000,supplier_0888 +pr_000889,acct_0033,user_000889,2026-02-21T11:00:00Z,rejected,91000,supplier_0889 +pr_000890,acct_0034,user_000890,2026-02-22T11:00:00Z,cancelled,92000,supplier_0890 +pr_000891,acct_0035,user_000891,2026-02-23T11:00:00Z,draft,93000,supplier_0891 +pr_000892,acct_0036,user_000892,2026-02-24T11:00:00Z,submitted,94000,supplier_0892 +pr_000893,acct_0037,user_000893,2026-02-25T11:00:00Z,approved,95000,supplier_0893 +pr_000894,acct_0038,user_000894,2026-02-26T11:00:00Z,rejected,96000,supplier_0894 +pr_000895,acct_0039,user_000895,2026-02-27T11:00:00Z,cancelled,97000,supplier_0895 +pr_000896,acct_0040,user_000896,2026-02-28T11:00:00Z,draft,98000,supplier_0896 +pr_000897,acct_0041,user_000897,2026-02-01T11:00:00Z,submitted,99000,supplier_0897 +pr_000898,acct_0042,user_000898,2026-02-02T11:00:00Z,approved,100000,supplier_0898 +pr_000899,acct_0043,user_000899,2026-02-03T11:00:00Z,rejected,101000,supplier_0899 +pr_000900,acct_0044,user_000900,2026-02-04T11:00:00Z,cancelled,102000,supplier_0900 +pr_000901,acct_0045,user_000901,2026-02-05T11:00:00Z,draft,103000,supplier_0001 +pr_000902,acct_0046,user_000902,2026-02-06T11:00:00Z,submitted,104000,supplier_0002 +pr_000903,acct_0047,user_000903,2026-02-07T11:00:00Z,approved,105000,supplier_0003 +pr_000904,acct_0048,user_000904,2026-02-08T11:00:00Z,rejected,106000,supplier_0004 +pr_000905,acct_0049,user_000905,2026-02-09T11:00:00Z,cancelled,107000,supplier_0005 +pr_000906,acct_0050,user_000906,2026-02-10T11:00:00Z,draft,108000,supplier_0006 +pr_000907,acct_0051,user_000907,2026-02-11T11:00:00Z,submitted,109000,supplier_0007 +pr_000908,acct_0052,user_000908,2026-02-12T11:00:00Z,approved,110000,supplier_0008 +pr_000909,acct_0053,user_000909,2026-02-13T11:00:00Z,rejected,10000,supplier_0009 +pr_000910,acct_0054,user_000910,2026-02-14T11:00:00Z,cancelled,11000,supplier_0010 +pr_000911,acct_0055,user_000911,2026-02-15T11:00:00Z,draft,12000,supplier_0011 +pr_000912,acct_0056,user_000912,2026-02-16T11:00:00Z,submitted,13000,supplier_0012 +pr_000913,acct_0057,user_000913,2026-02-17T11:00:00Z,approved,14000,supplier_0013 +pr_000914,acct_0058,user_000914,2026-02-18T11:00:00Z,rejected,15000,supplier_0014 +pr_000915,acct_0059,user_000915,2026-02-19T11:00:00Z,cancelled,16000,supplier_0015 +pr_000916,acct_0060,user_000916,2026-02-20T11:00:00Z,draft,17000,supplier_0016 +pr_000917,acct_0061,user_000917,2026-02-21T11:00:00Z,submitted,18000,supplier_0017 +pr_000918,acct_0062,user_000918,2026-02-22T11:00:00Z,approved,19000,supplier_0018 +pr_000919,acct_0063,user_000919,2026-02-23T11:00:00Z,rejected,20000,supplier_0019 +pr_000920,acct_0064,user_000920,2026-02-24T11:00:00Z,cancelled,21000,supplier_0020 +pr_000921,acct_0065,user_000921,2026-02-25T11:00:00Z,draft,22000,supplier_0021 +pr_000922,acct_0066,user_000922,2026-02-26T11:00:00Z,submitted,23000,supplier_0022 +pr_000923,acct_0067,user_000923,2026-02-27T11:00:00Z,approved,24000,supplier_0023 +pr_000924,acct_0068,user_000924,2026-02-28T11:00:00Z,rejected,25000,supplier_0024 +pr_000925,acct_0069,user_000925,2026-02-01T11:00:00Z,cancelled,26000,supplier_0025 +pr_000926,acct_0070,user_000926,2026-02-02T11:00:00Z,draft,27000,supplier_0026 +pr_000927,acct_0071,user_000927,2026-02-03T11:00:00Z,submitted,28000,supplier_0027 +pr_000928,acct_0072,user_000928,2026-02-04T11:00:00Z,approved,29000,supplier_0028 +pr_000929,acct_0073,user_000929,2026-02-05T11:00:00Z,rejected,30000,supplier_0029 +pr_000930,acct_0074,user_000930,2026-02-06T11:00:00Z,cancelled,31000,supplier_0030 +pr_000931,acct_0075,user_000931,2026-02-07T11:00:00Z,draft,32000,supplier_0031 +pr_000932,acct_0076,user_000932,2026-02-08T11:00:00Z,submitted,33000,supplier_0032 +pr_000933,acct_0077,user_000933,2026-02-09T11:00:00Z,approved,34000,supplier_0033 +pr_000934,acct_0078,user_000934,2026-02-10T11:00:00Z,rejected,35000,supplier_0034 +pr_000935,acct_0079,user_000935,2026-02-11T11:00:00Z,cancelled,36000,supplier_0035 +pr_000936,acct_0080,user_000936,2026-02-12T11:00:00Z,draft,37000,supplier_0036 +pr_000937,acct_0081,user_000937,2026-02-13T11:00:00Z,submitted,38000,supplier_0037 +pr_000938,acct_0082,user_000938,2026-02-14T11:00:00Z,approved,39000,supplier_0038 +pr_000939,acct_0083,user_000939,2026-02-15T11:00:00Z,rejected,40000,supplier_0039 +pr_000940,acct_0084,user_000940,2026-02-16T11:00:00Z,cancelled,41000,supplier_0040 +pr_000941,acct_0085,user_000941,2026-02-17T11:00:00Z,draft,42000,supplier_0041 +pr_000942,acct_0086,user_000942,2026-02-18T11:00:00Z,submitted,43000,supplier_0042 +pr_000943,acct_0087,user_000943,2026-02-19T11:00:00Z,approved,44000,supplier_0043 +pr_000944,acct_0088,user_000944,2026-02-20T11:00:00Z,rejected,45000,supplier_0044 +pr_000945,acct_0089,user_000945,2026-02-21T11:00:00Z,cancelled,46000,supplier_0045 +pr_000946,acct_0090,user_000946,2026-02-22T11:00:00Z,draft,47000,supplier_0046 +pr_000947,acct_0091,user_000947,2026-02-23T11:00:00Z,submitted,48000,supplier_0047 +pr_000948,acct_0092,user_000948,2026-02-24T11:00:00Z,approved,49000,supplier_0048 +pr_000949,acct_0093,user_000949,2026-02-25T11:00:00Z,rejected,50000,supplier_0049 +pr_000950,acct_0094,user_000950,2026-02-26T11:00:00Z,cancelled,51000,supplier_0050 +pr_000951,acct_0095,user_000951,2026-02-27T11:00:00Z,draft,52000,supplier_0051 +pr_000952,acct_0096,user_000952,2026-02-28T11:00:00Z,submitted,53000,supplier_0052 +pr_000953,acct_0097,user_000953,2026-02-01T11:00:00Z,approved,54000,supplier_0053 +pr_000954,acct_0098,user_000954,2026-02-02T11:00:00Z,rejected,55000,supplier_0054 +pr_000955,acct_0099,user_000955,2026-02-03T11:00:00Z,cancelled,56000,supplier_0055 +pr_000956,acct_0100,user_000956,2026-02-04T11:00:00Z,draft,57000,supplier_0056 +pr_000957,acct_0101,user_000957,2026-02-05T11:00:00Z,submitted,58000,supplier_0057 +pr_000958,acct_0102,user_000958,2026-02-06T11:00:00Z,approved,59000,supplier_0058 +pr_000959,acct_0103,user_000959,2026-02-07T11:00:00Z,rejected,60000,supplier_0059 +pr_000960,acct_0104,user_000960,2026-02-08T11:00:00Z,cancelled,61000,supplier_0060 +pr_000961,acct_0105,user_000961,2026-02-09T11:00:00Z,draft,62000,supplier_0061 +pr_000962,acct_0106,user_000962,2026-02-10T11:00:00Z,submitted,63000,supplier_0062 +pr_000963,acct_0107,user_000963,2026-02-11T11:00:00Z,approved,64000,supplier_0063 +pr_000964,acct_0108,user_000964,2026-02-12T11:00:00Z,rejected,65000,supplier_0064 +pr_000965,acct_0109,user_000965,2026-02-13T11:00:00Z,cancelled,66000,supplier_0065 +pr_000966,acct_0110,user_000966,2026-02-14T11:00:00Z,draft,67000,supplier_0066 +pr_000967,acct_0111,user_000967,2026-02-15T11:00:00Z,submitted,68000,supplier_0067 +pr_000968,acct_0112,user_000968,2026-02-16T11:00:00Z,approved,69000,supplier_0068 +pr_000969,acct_0113,user_000969,2026-02-17T11:00:00Z,rejected,70000,supplier_0069 +pr_000970,acct_0114,user_000970,2026-02-18T11:00:00Z,cancelled,71000,supplier_0070 +pr_000971,acct_0115,user_000971,2026-02-19T11:00:00Z,draft,72000,supplier_0071 +pr_000972,acct_0116,user_000972,2026-02-20T11:00:00Z,submitted,73000,supplier_0072 +pr_000973,acct_0117,user_000973,2026-02-21T11:00:00Z,approved,74000,supplier_0073 +pr_000974,acct_0118,user_000974,2026-02-22T11:00:00Z,rejected,75000,supplier_0074 +pr_000975,acct_0119,user_000975,2026-02-23T11:00:00Z,cancelled,76000,supplier_0075 +pr_000976,acct_0120,user_000976,2026-02-24T11:00:00Z,draft,77000,supplier_0076 +pr_000977,acct_0121,user_000977,2026-02-25T11:00:00Z,submitted,78000,supplier_0077 +pr_000978,acct_0122,user_000978,2026-02-26T11:00:00Z,approved,79000,supplier_0078 +pr_000979,acct_0123,user_000979,2026-02-27T11:00:00Z,rejected,80000,supplier_0079 +pr_000980,acct_0124,user_000980,2026-02-28T11:00:00Z,cancelled,81000,supplier_0080 +pr_000981,acct_0125,user_000981,2026-02-01T11:00:00Z,draft,82000,supplier_0081 +pr_000982,acct_0126,user_000982,2026-02-02T11:00:00Z,submitted,83000,supplier_0082 +pr_000983,acct_0127,user_000983,2026-02-03T11:00:00Z,approved,84000,supplier_0083 +pr_000984,acct_0128,user_000984,2026-02-04T11:00:00Z,rejected,85000,supplier_0084 +pr_000985,acct_0129,user_000985,2026-02-05T11:00:00Z,cancelled,86000,supplier_0085 +pr_000986,acct_0130,user_000986,2026-02-06T11:00:00Z,draft,87000,supplier_0086 +pr_000987,acct_0131,user_000987,2026-02-07T11:00:00Z,submitted,88000,supplier_0087 +pr_000988,acct_0132,user_000988,2026-02-08T11:00:00Z,approved,89000,supplier_0088 +pr_000989,acct_0133,user_000989,2026-02-09T11:00:00Z,rejected,90000,supplier_0089 +pr_000990,acct_0134,user_000990,2026-02-10T11:00:00Z,cancelled,91000,supplier_0090 +pr_000991,acct_0135,user_000991,2026-02-11T11:00:00Z,draft,92000,supplier_0091 +pr_000992,acct_0136,user_000992,2026-02-12T11:00:00Z,submitted,93000,supplier_0092 +pr_000993,acct_0137,user_000993,2026-02-13T11:00:00Z,approved,94000,supplier_0093 +pr_000994,acct_0138,user_000994,2026-02-14T11:00:00Z,rejected,95000,supplier_0094 +pr_000995,acct_0139,user_000995,2026-02-15T11:00:00Z,cancelled,96000,supplier_0095 +pr_000996,acct_0140,user_000996,2026-02-16T11:00:00Z,draft,97000,supplier_0096 +pr_000997,acct_0141,user_000997,2026-02-17T11:00:00Z,submitted,98000,supplier_0097 +pr_000998,acct_0142,user_000998,2026-02-18T11:00:00Z,approved,99000,supplier_0098 +pr_000999,acct_0143,user_000999,2026-02-19T11:00:00Z,rejected,100000,supplier_0099 +pr_001000,acct_0144,user_001000,2026-02-20T11:00:00Z,cancelled,101000,supplier_0100 +pr_001001,acct_0145,user_001001,2026-02-21T11:00:00Z,draft,102000,supplier_0101 +pr_001002,acct_0146,user_001002,2026-02-22T11:00:00Z,submitted,103000,supplier_0102 +pr_001003,acct_0147,user_001003,2026-02-23T11:00:00Z,approved,104000,supplier_0103 +pr_001004,acct_0148,user_001004,2026-02-24T11:00:00Z,rejected,105000,supplier_0104 +pr_001005,acct_0149,user_001005,2026-02-25T11:00:00Z,cancelled,106000,supplier_0105 +pr_001006,acct_0150,user_001006,2026-02-26T11:00:00Z,draft,107000,supplier_0106 +pr_001007,acct_0151,user_001007,2026-02-27T11:00:00Z,submitted,108000,supplier_0107 +pr_001008,acct_0152,user_001008,2026-02-28T11:00:00Z,approved,109000,supplier_0108 +pr_001009,acct_0153,user_001009,2026-02-01T11:00:00Z,rejected,110000,supplier_0109 +pr_001010,acct_0154,user_001010,2026-02-02T11:00:00Z,cancelled,10000,supplier_0110 +pr_001011,acct_0155,user_001011,2026-02-03T11:00:00Z,draft,11000,supplier_0111 +pr_001012,acct_0156,user_001012,2026-02-04T11:00:00Z,submitted,12000,supplier_0112 +pr_001013,acct_0157,user_001013,2026-02-05T11:00:00Z,approved,13000,supplier_0113 +pr_001014,acct_0158,user_001014,2026-02-06T11:00:00Z,rejected,14000,supplier_0114 +pr_001015,acct_0159,user_001015,2026-02-07T11:00:00Z,cancelled,15000,supplier_0115 +pr_001016,acct_0160,user_001016,2026-02-08T11:00:00Z,draft,16000,supplier_0116 +pr_001017,acct_0161,user_001017,2026-02-09T11:00:00Z,submitted,17000,supplier_0117 +pr_001018,acct_0162,user_001018,2026-02-10T11:00:00Z,approved,18000,supplier_0118 +pr_001019,acct_0163,user_001019,2026-02-11T11:00:00Z,rejected,19000,supplier_0119 +pr_001020,acct_0164,user_001020,2026-02-12T11:00:00Z,cancelled,20000,supplier_0120 +pr_001021,acct_0165,user_001021,2026-02-13T11:00:00Z,draft,21000,supplier_0121 +pr_001022,acct_0166,user_001022,2026-02-14T11:00:00Z,submitted,22000,supplier_0122 +pr_001023,acct_0167,user_001023,2026-02-15T11:00:00Z,approved,23000,supplier_0123 +pr_001024,acct_0168,user_001024,2026-02-16T11:00:00Z,rejected,24000,supplier_0124 +pr_001025,acct_0169,user_001025,2026-02-17T11:00:00Z,cancelled,25000,supplier_0125 +pr_001026,acct_0170,user_001026,2026-02-18T11:00:00Z,draft,26000,supplier_0126 +pr_001027,acct_0171,user_001027,2026-02-19T11:00:00Z,submitted,27000,supplier_0127 +pr_001028,acct_0172,user_001028,2026-02-20T11:00:00Z,approved,28000,supplier_0128 +pr_001029,acct_0173,user_001029,2026-02-21T11:00:00Z,rejected,29000,supplier_0129 +pr_001030,acct_0174,user_001030,2026-02-22T11:00:00Z,cancelled,30000,supplier_0130 +pr_001031,acct_0175,user_001031,2026-02-23T11:00:00Z,draft,31000,supplier_0131 +pr_001032,acct_0176,user_001032,2026-02-24T11:00:00Z,submitted,32000,supplier_0132 +pr_001033,acct_0177,user_001033,2026-02-25T11:00:00Z,approved,33000,supplier_0133 +pr_001034,acct_0178,user_001034,2026-02-26T11:00:00Z,rejected,34000,supplier_0134 +pr_001035,acct_0179,user_001035,2026-02-27T11:00:00Z,cancelled,35000,supplier_0135 +pr_001036,acct_0180,user_001036,2026-02-28T11:00:00Z,draft,36000,supplier_0136 +pr_001037,acct_0181,user_001037,2026-02-01T11:00:00Z,submitted,37000,supplier_0137 +pr_001038,acct_0182,user_001038,2026-02-02T11:00:00Z,approved,38000,supplier_0138 +pr_001039,acct_0183,user_001039,2026-02-03T11:00:00Z,rejected,39000,supplier_0139 +pr_001040,acct_0184,user_001040,2026-02-04T11:00:00Z,cancelled,40000,supplier_0140 +pr_001041,acct_0185,user_001041,2026-02-05T11:00:00Z,draft,41000,supplier_0141 +pr_001042,acct_0186,user_001042,2026-02-06T11:00:00Z,submitted,42000,supplier_0142 +pr_001043,acct_0187,user_001043,2026-02-07T11:00:00Z,approved,43000,supplier_0143 +pr_001044,acct_0188,user_001044,2026-02-08T11:00:00Z,rejected,44000,supplier_0144 +pr_001045,acct_0189,user_001045,2026-02-09T11:00:00Z,cancelled,45000,supplier_0145 +pr_001046,acct_0190,user_001046,2026-02-10T11:00:00Z,draft,46000,supplier_0146 +pr_001047,acct_0001,user_001047,2026-02-11T11:00:00Z,submitted,47000,supplier_0147 +pr_001048,acct_0002,user_001048,2026-02-12T11:00:00Z,approved,48000,supplier_0148 +pr_001049,acct_0003,user_001049,2026-02-13T11:00:00Z,rejected,49000,supplier_0149 +pr_001050,acct_0004,user_001050,2026-02-14T11:00:00Z,cancelled,50000,supplier_0150 +pr_001051,acct_0005,user_001051,2026-02-15T11:00:00Z,draft,51000,supplier_0151 +pr_001052,acct_0006,user_001052,2026-02-16T11:00:00Z,submitted,52000,supplier_0152 +pr_001053,acct_0007,user_001053,2026-02-17T11:00:00Z,approved,53000,supplier_0153 +pr_001054,acct_0008,user_001054,2026-02-18T11:00:00Z,rejected,54000,supplier_0154 +pr_001055,acct_0009,user_001055,2026-02-19T11:00:00Z,cancelled,55000,supplier_0155 +pr_001056,acct_0010,user_001056,2026-02-20T11:00:00Z,draft,56000,supplier_0156 +pr_001057,acct_0011,user_001057,2026-02-21T11:00:00Z,submitted,57000,supplier_0157 +pr_001058,acct_0012,user_001058,2026-02-22T11:00:00Z,approved,58000,supplier_0158 +pr_001059,acct_0013,user_001059,2026-02-23T11:00:00Z,rejected,59000,supplier_0159 +pr_001060,acct_0014,user_001060,2026-02-24T11:00:00Z,cancelled,60000,supplier_0160 +pr_001061,acct_0015,user_001061,2026-02-25T11:00:00Z,draft,61000,supplier_0161 +pr_001062,acct_0016,user_001062,2026-02-26T11:00:00Z,submitted,62000,supplier_0162 +pr_001063,acct_0017,user_001063,2026-02-27T11:00:00Z,approved,63000,supplier_0163 +pr_001064,acct_0018,user_001064,2026-02-28T11:00:00Z,rejected,64000,supplier_0164 +pr_001065,acct_0019,user_001065,2026-02-01T11:00:00Z,cancelled,65000,supplier_0165 +pr_001066,acct_0020,user_001066,2026-02-02T11:00:00Z,draft,66000,supplier_0166 +pr_001067,acct_0021,user_001067,2026-02-03T11:00:00Z,submitted,67000,supplier_0167 +pr_001068,acct_0022,user_001068,2026-02-04T11:00:00Z,approved,68000,supplier_0168 +pr_001069,acct_0023,user_001069,2026-02-05T11:00:00Z,rejected,69000,supplier_0169 +pr_001070,acct_0024,user_001070,2026-02-06T11:00:00Z,cancelled,70000,supplier_0170 +pr_001071,acct_0025,user_001071,2026-02-07T11:00:00Z,draft,71000,supplier_0171 +pr_001072,acct_0026,user_001072,2026-02-08T11:00:00Z,submitted,72000,supplier_0172 +pr_001073,acct_0027,user_001073,2026-02-09T11:00:00Z,approved,73000,supplier_0173 +pr_001074,acct_0028,user_001074,2026-02-10T11:00:00Z,rejected,74000,supplier_0174 +pr_001075,acct_0029,user_001075,2026-02-11T11:00:00Z,cancelled,75000,supplier_0175 +pr_001076,acct_0030,user_001076,2026-02-12T11:00:00Z,draft,76000,supplier_0176 +pr_001077,acct_0031,user_001077,2026-02-13T11:00:00Z,submitted,77000,supplier_0177 +pr_001078,acct_0032,user_001078,2026-02-14T11:00:00Z,approved,78000,supplier_0178 +pr_001079,acct_0033,user_001079,2026-02-15T11:00:00Z,rejected,79000,supplier_0179 +pr_001080,acct_0034,user_001080,2026-02-16T11:00:00Z,cancelled,80000,supplier_0180 +pr_001081,acct_0035,user_001081,2026-02-17T11:00:00Z,draft,81000,supplier_0181 +pr_001082,acct_0036,user_001082,2026-02-18T11:00:00Z,submitted,82000,supplier_0182 +pr_001083,acct_0037,user_001083,2026-02-19T11:00:00Z,approved,83000,supplier_0183 +pr_001084,acct_0038,user_001084,2026-02-20T11:00:00Z,rejected,84000,supplier_0184 +pr_001085,acct_0039,user_001085,2026-02-21T11:00:00Z,cancelled,85000,supplier_0185 +pr_001086,acct_0040,user_001086,2026-02-22T11:00:00Z,draft,86000,supplier_0186 +pr_001087,acct_0041,user_001087,2026-02-23T11:00:00Z,submitted,87000,supplier_0187 +pr_001088,acct_0042,user_001088,2026-02-24T11:00:00Z,approved,88000,supplier_0188 +pr_001089,acct_0043,user_001089,2026-02-25T11:00:00Z,rejected,89000,supplier_0189 +pr_001090,acct_0044,user_001090,2026-02-26T11:00:00Z,cancelled,90000,supplier_0190 +pr_001091,acct_0045,user_001091,2026-02-27T11:00:00Z,draft,91000,supplier_0191 +pr_001092,acct_0046,user_001092,2026-02-28T11:00:00Z,submitted,92000,supplier_0192 +pr_001093,acct_0047,user_001093,2026-02-01T11:00:00Z,approved,93000,supplier_0193 +pr_001094,acct_0048,user_001094,2026-02-02T11:00:00Z,rejected,94000,supplier_0194 +pr_001095,acct_0049,user_001095,2026-02-03T11:00:00Z,cancelled,95000,supplier_0195 +pr_001096,acct_0050,user_001096,2026-02-04T11:00:00Z,draft,96000,supplier_0196 +pr_001097,acct_0051,user_001097,2026-02-05T11:00:00Z,submitted,97000,supplier_0197 +pr_001098,acct_0052,user_001098,2026-02-06T11:00:00Z,approved,98000,supplier_0198 +pr_001099,acct_0053,user_001099,2026-02-07T11:00:00Z,rejected,99000,supplier_0199 +pr_001100,acct_0054,user_001100,2026-02-08T11:00:00Z,cancelled,100000,supplier_0200 +pr_001101,acct_0010,user_000001,2026-02-09T11:00:00Z,draft,101000,supplier_0201 +pr_001102,acct_0011,user_000002,2026-02-10T11:00:00Z,submitted,102000,supplier_0202 +pr_001103,acct_0012,user_000003,2026-02-11T11:00:00Z,approved,103000,supplier_0203 +pr_001104,acct_0013,user_000004,2026-02-12T11:00:00Z,rejected,104000,supplier_0204 +pr_001105,acct_0014,user_000005,2026-02-13T11:00:00Z,cancelled,105000,supplier_0205 +pr_001106,acct_0015,user_000006,2026-02-14T11:00:00Z,draft,106000,supplier_0206 +pr_001107,acct_0016,user_000007,2026-02-15T11:00:00Z,submitted,107000,supplier_0207 +pr_001108,acct_0017,user_000008,2026-02-16T11:00:00Z,approved,108000,supplier_0208 +pr_001109,acct_0018,user_000009,2026-02-17T11:00:00Z,rejected,109000,supplier_0209 +pr_001110,acct_0019,user_000010,2026-02-18T11:00:00Z,cancelled,110000,supplier_0210 +pr_001111,acct_0020,user_000011,2026-02-19T11:00:00Z,draft,10000,supplier_0211 +pr_001112,acct_0021,user_000012,2026-02-20T11:00:00Z,submitted,11000,supplier_0212 +pr_001113,acct_0022,user_000013,2026-02-21T11:00:00Z,approved,12000,supplier_0213 +pr_001114,acct_0023,user_000014,2026-02-22T11:00:00Z,rejected,13000,supplier_0214 +pr_001115,acct_0024,user_000015,2026-02-23T11:00:00Z,cancelled,14000,supplier_0215 +pr_001116,acct_0025,user_000016,2026-02-24T11:00:00Z,draft,15000,supplier_0216 +pr_001117,acct_0026,user_000017,2026-02-25T11:00:00Z,submitted,16000,supplier_0217 +pr_001118,acct_0027,user_000018,2026-02-26T11:00:00Z,approved,17000,supplier_0218 +pr_001119,acct_0028,user_000019,2026-02-27T11:00:00Z,rejected,18000,supplier_0219 +pr_001120,acct_0029,user_000020,2026-02-28T11:00:00Z,cancelled,19000,supplier_0220 +pr_001121,acct_0030,user_000021,2026-02-01T11:00:00Z,draft,20000,supplier_0221 +pr_001122,acct_0031,user_000022,2026-02-02T11:00:00Z,submitted,21000,supplier_0222 +pr_001123,acct_0032,user_000023,2026-02-03T11:00:00Z,approved,22000,supplier_0223 +pr_001124,acct_0033,user_000024,2026-02-04T11:00:00Z,rejected,23000,supplier_0224 +pr_001125,acct_0034,user_000025,2026-02-05T11:00:00Z,cancelled,24000,supplier_0225 +pr_001126,acct_0035,user_000026,2026-02-06T11:00:00Z,draft,25000,supplier_0226 +pr_001127,acct_0036,user_000027,2026-02-07T11:00:00Z,submitted,26000,supplier_0227 +pr_001128,acct_0037,user_000028,2026-02-08T11:00:00Z,approved,27000,supplier_0228 +pr_001129,acct_0038,user_000029,2026-02-09T11:00:00Z,rejected,28000,supplier_0229 +pr_001130,acct_0039,user_000030,2026-02-10T11:00:00Z,cancelled,29000,supplier_0230 +pr_001131,acct_0040,user_000031,2026-02-11T11:00:00Z,draft,30000,supplier_0231 +pr_001132,acct_0041,user_000032,2026-02-12T11:00:00Z,submitted,31000,supplier_0232 +pr_001133,acct_0042,user_000033,2026-02-13T11:00:00Z,approved,32000,supplier_0233 +pr_001134,acct_0043,user_000034,2026-02-14T11:00:00Z,rejected,33000,supplier_0234 +pr_001135,acct_0044,user_000035,2026-02-15T11:00:00Z,cancelled,34000,supplier_0235 +pr_001136,acct_0045,user_000036,2026-02-16T11:00:00Z,draft,35000,supplier_0236 +pr_001137,acct_0046,user_000037,2026-02-17T11:00:00Z,submitted,36000,supplier_0237 +pr_001138,acct_0047,user_000038,2026-02-18T11:00:00Z,approved,37000,supplier_0238 +pr_001139,acct_0048,user_000039,2026-02-19T11:00:00Z,rejected,38000,supplier_0239 +pr_001140,acct_0049,user_000040,2026-02-20T11:00:00Z,cancelled,39000,supplier_0240 +pr_001141,acct_0050,user_000041,2026-02-21T11:00:00Z,draft,40000,supplier_0241 +pr_001142,acct_0010,user_000042,2026-02-22T11:00:00Z,submitted,41000,supplier_0242 +pr_001143,acct_0011,user_000043,2026-02-23T11:00:00Z,approved,42000,supplier_0243 +pr_001144,acct_0012,user_000044,2026-02-24T11:00:00Z,rejected,43000,supplier_0244 +pr_001145,acct_0013,user_000045,2026-02-25T11:00:00Z,cancelled,44000,supplier_0245 +pr_001146,acct_0014,user_000046,2026-02-26T11:00:00Z,draft,45000,supplier_0246 +pr_001147,acct_0015,user_000047,2026-02-27T11:00:00Z,submitted,46000,supplier_0247 +pr_001148,acct_0016,user_000048,2026-02-28T11:00:00Z,approved,47000,supplier_0248 +pr_001149,acct_0017,user_000049,2026-02-01T11:00:00Z,rejected,48000,supplier_0249 +pr_001150,acct_0018,user_000050,2026-02-02T11:00:00Z,cancelled,49000,supplier_0250 +pr_001151,acct_0019,user_000051,2026-02-03T11:00:00Z,draft,50000,supplier_0251 +pr_001152,acct_0020,user_000052,2026-02-04T11:00:00Z,submitted,51000,supplier_0252 +pr_001153,acct_0021,user_000053,2026-02-05T11:00:00Z,approved,52000,supplier_0253 +pr_001154,acct_0022,user_000054,2026-02-06T11:00:00Z,rejected,53000,supplier_0254 +pr_001155,acct_0023,user_000055,2026-02-07T11:00:00Z,cancelled,54000,supplier_0255 +pr_001156,acct_0024,user_000056,2026-02-08T11:00:00Z,draft,55000,supplier_0256 +pr_001157,acct_0025,user_000057,2026-02-09T11:00:00Z,submitted,56000,supplier_0257 +pr_001158,acct_0026,user_000058,2026-02-10T11:00:00Z,approved,57000,supplier_0258 +pr_001159,acct_0027,user_000059,2026-02-11T11:00:00Z,rejected,58000,supplier_0259 +pr_001160,acct_0028,user_000060,2026-02-12T11:00:00Z,cancelled,59000,supplier_0260 +pr_001161,acct_0029,user_000061,2026-02-13T11:00:00Z,draft,60000,supplier_0261 +pr_001162,acct_0030,user_000062,2026-02-14T11:00:00Z,submitted,61000,supplier_0262 +pr_001163,acct_0031,user_000063,2026-02-15T11:00:00Z,approved,62000,supplier_0263 +pr_001164,acct_0032,user_000064,2026-02-16T11:00:00Z,rejected,63000,supplier_0264 +pr_001165,acct_0033,user_000065,2026-02-17T11:00:00Z,cancelled,64000,supplier_0265 +pr_001166,acct_0034,user_000066,2026-02-18T11:00:00Z,draft,65000,supplier_0266 +pr_001167,acct_0035,user_000067,2026-02-19T11:00:00Z,submitted,66000,supplier_0267 +pr_001168,acct_0036,user_000068,2026-02-20T11:00:00Z,approved,67000,supplier_0268 +pr_001169,acct_0037,user_000069,2026-02-21T11:00:00Z,rejected,68000,supplier_0269 +pr_001170,acct_0038,user_000070,2026-02-22T11:00:00Z,cancelled,69000,supplier_0270 +pr_001171,acct_0039,user_000071,2026-02-23T11:00:00Z,draft,70000,supplier_0271 +pr_001172,acct_0040,user_000072,2026-02-24T11:00:00Z,submitted,71000,supplier_0272 +pr_001173,acct_0041,user_000073,2026-02-25T11:00:00Z,approved,72000,supplier_0273 +pr_001174,acct_0042,user_000074,2026-02-26T11:00:00Z,rejected,73000,supplier_0274 +pr_001175,acct_0043,user_000075,2026-02-27T11:00:00Z,cancelled,74000,supplier_0275 +pr_001176,acct_0044,user_000076,2026-02-28T11:00:00Z,draft,75000,supplier_0276 +pr_001177,acct_0045,user_000077,2026-02-01T11:00:00Z,submitted,76000,supplier_0277 +pr_001178,acct_0046,user_000078,2026-02-02T11:00:00Z,approved,77000,supplier_0278 +pr_001179,acct_0047,user_000079,2026-02-03T11:00:00Z,rejected,78000,supplier_0279 +pr_001180,acct_0048,user_000080,2026-02-04T11:00:00Z,cancelled,79000,supplier_0280 +pr_001181,acct_0049,user_000081,2026-02-05T11:00:00Z,draft,80000,supplier_0281 +pr_001182,acct_0050,user_000082,2026-02-06T11:00:00Z,submitted,81000,supplier_0282 +pr_001183,acct_0010,user_000083,2026-02-07T11:00:00Z,approved,82000,supplier_0283 +pr_001184,acct_0011,user_000084,2026-02-08T11:00:00Z,rejected,83000,supplier_0284 +pr_001185,acct_0012,user_000085,2026-02-09T11:00:00Z,cancelled,84000,supplier_0285 +pr_001186,acct_0013,user_000086,2026-02-10T11:00:00Z,draft,85000,supplier_0286 +pr_001187,acct_0014,user_000087,2026-02-11T11:00:00Z,submitted,86000,supplier_0287 +pr_001188,acct_0015,user_000088,2026-02-12T11:00:00Z,approved,87000,supplier_0288 +pr_001189,acct_0016,user_000089,2026-02-13T11:00:00Z,rejected,88000,supplier_0289 +pr_001190,acct_0017,user_000090,2026-02-14T11:00:00Z,cancelled,89000,supplier_0290 +pr_001191,acct_0018,user_000091,2026-02-15T11:00:00Z,draft,90000,supplier_0291 +pr_001192,acct_0019,user_000092,2026-02-16T11:00:00Z,submitted,91000,supplier_0292 +pr_001193,acct_0020,user_000093,2026-02-17T11:00:00Z,approved,92000,supplier_0293 +pr_001194,acct_0021,user_000094,2026-02-18T11:00:00Z,rejected,93000,supplier_0294 +pr_001195,acct_0022,user_000095,2026-02-19T11:00:00Z,cancelled,94000,supplier_0295 +pr_001196,acct_0023,user_000096,2026-02-20T11:00:00Z,draft,95000,supplier_0296 +pr_001197,acct_0024,user_000097,2026-02-21T11:00:00Z,submitted,96000,supplier_0297 +pr_001198,acct_0025,user_000098,2026-02-22T11:00:00Z,approved,97000,supplier_0298 +pr_001199,acct_0026,user_000099,2026-02-23T11:00:00Z,rejected,98000,supplier_0299 +pr_001200,acct_0027,user_000100,2026-02-24T11:00:00Z,cancelled,99000,supplier_0300 +pr_001201,acct_0028,user_000101,2026-02-25T11:00:00Z,draft,100000,supplier_0301 +pr_001202,acct_0029,user_000102,2026-02-26T11:00:00Z,submitted,101000,supplier_0302 +pr_001203,acct_0030,user_000103,2026-02-27T11:00:00Z,approved,102000,supplier_0303 +pr_001204,acct_0031,user_000104,2026-02-28T11:00:00Z,rejected,103000,supplier_0304 +pr_001205,acct_0032,user_000105,2026-02-01T11:00:00Z,cancelled,104000,supplier_0305 +pr_001206,acct_0033,user_000106,2026-02-02T11:00:00Z,draft,105000,supplier_0306 +pr_001207,acct_0034,user_000107,2026-02-03T11:00:00Z,submitted,106000,supplier_0307 +pr_001208,acct_0035,user_000108,2026-02-04T11:00:00Z,approved,107000,supplier_0308 +pr_001209,acct_0036,user_000109,2026-02-05T11:00:00Z,rejected,108000,supplier_0309 +pr_001210,acct_0037,user_000110,2026-02-06T11:00:00Z,cancelled,109000,supplier_0310 +pr_001211,acct_0038,user_000111,2026-02-07T11:00:00Z,draft,110000,supplier_0311 +pr_001212,acct_0039,user_000112,2026-02-08T11:00:00Z,submitted,10000,supplier_0312 +pr_001213,acct_0040,user_000113,2026-02-09T11:00:00Z,approved,11000,supplier_0313 +pr_001214,acct_0041,user_000114,2026-02-10T11:00:00Z,rejected,12000,supplier_0314 +pr_001215,acct_0042,user_000115,2026-02-11T11:00:00Z,cancelled,13000,supplier_0315 +pr_001216,acct_0043,user_000116,2026-02-12T11:00:00Z,draft,14000,supplier_0316 +pr_001217,acct_0044,user_000117,2026-02-13T11:00:00Z,submitted,15000,supplier_0317 +pr_001218,acct_0045,user_000118,2026-02-14T11:00:00Z,approved,16000,supplier_0318 +pr_001219,acct_0046,user_000119,2026-02-15T11:00:00Z,rejected,17000,supplier_0319 +pr_001220,acct_0047,user_000120,2026-02-16T11:00:00Z,cancelled,18000,supplier_0320 +pr_001221,acct_0048,user_000121,2026-02-17T11:00:00Z,draft,19000,supplier_0321 +pr_001222,acct_0049,user_000122,2026-02-18T11:00:00Z,submitted,20000,supplier_0322 +pr_001223,acct_0050,user_000123,2026-02-19T11:00:00Z,approved,21000,supplier_0323 +pr_001224,acct_0010,user_000124,2026-02-20T11:00:00Z,rejected,22000,supplier_0324 +pr_001225,acct_0011,user_000125,2026-02-21T11:00:00Z,cancelled,23000,supplier_0325 +pr_001226,acct_0012,user_000126,2026-02-22T11:00:00Z,draft,24000,supplier_0326 +pr_001227,acct_0013,user_000127,2026-02-23T11:00:00Z,submitted,25000,supplier_0327 +pr_001228,acct_0014,user_000128,2026-02-24T11:00:00Z,approved,26000,supplier_0328 +pr_001229,acct_0015,user_000129,2026-02-25T11:00:00Z,rejected,27000,supplier_0329 +pr_001230,acct_0016,user_000130,2026-02-26T11:00:00Z,cancelled,28000,supplier_0330 +pr_001231,acct_0017,user_000131,2026-02-27T11:00:00Z,draft,29000,supplier_0331 +pr_001232,acct_0018,user_000132,2026-02-28T11:00:00Z,submitted,30000,supplier_0332 +pr_001233,acct_0019,user_000133,2026-02-01T11:00:00Z,approved,31000,supplier_0333 +pr_001234,acct_0020,user_000134,2026-02-02T11:00:00Z,rejected,32000,supplier_0334 +pr_001235,acct_0021,user_000135,2026-02-03T11:00:00Z,cancelled,33000,supplier_0335 +pr_001236,acct_0022,user_000136,2026-02-04T11:00:00Z,draft,34000,supplier_0336 +pr_001237,acct_0023,user_000137,2026-02-05T11:00:00Z,submitted,35000,supplier_0337 +pr_001238,acct_0024,user_000138,2026-02-06T11:00:00Z,approved,36000,supplier_0338 +pr_001239,acct_0025,user_000139,2026-02-07T11:00:00Z,rejected,37000,supplier_0339 +pr_001240,acct_0026,user_000140,2026-02-08T11:00:00Z,cancelled,38000,supplier_0340 +pr_001241,acct_0027,user_000141,2026-02-09T11:00:00Z,draft,39000,supplier_0341 +pr_001242,acct_0028,user_000142,2026-02-10T11:00:00Z,submitted,40000,supplier_0342 +pr_001243,acct_0029,user_000143,2026-02-11T11:00:00Z,approved,41000,supplier_0343 +pr_001244,acct_0030,user_000144,2026-02-12T11:00:00Z,rejected,42000,supplier_0344 +pr_001245,acct_0031,user_000145,2026-02-13T11:00:00Z,cancelled,43000,supplier_0345 +pr_001246,acct_0032,user_000146,2026-02-14T11:00:00Z,draft,44000,supplier_0346 +pr_001247,acct_0033,user_000147,2026-02-15T11:00:00Z,submitted,45000,supplier_0347 +pr_001248,acct_0034,user_000148,2026-02-16T11:00:00Z,approved,46000,supplier_0348 +pr_001249,acct_0035,user_000149,2026-02-17T11:00:00Z,rejected,47000,supplier_0349 +pr_001250,acct_0036,user_000150,2026-02-18T11:00:00Z,cancelled,48000,supplier_0350 +pr_001251,acct_0037,user_000151,2026-02-19T11:00:00Z,draft,49000,supplier_0351 +pr_001252,acct_0038,user_000152,2026-02-20T11:00:00Z,submitted,50000,supplier_0352 +pr_001253,acct_0039,user_000153,2026-02-21T11:00:00Z,approved,51000,supplier_0353 +pr_001254,acct_0040,user_000154,2026-02-22T11:00:00Z,rejected,52000,supplier_0354 +pr_001255,acct_0041,user_000155,2026-02-23T11:00:00Z,cancelled,53000,supplier_0355 +pr_001256,acct_0042,user_000156,2026-02-24T11:00:00Z,draft,54000,supplier_0356 +pr_001257,acct_0043,user_000157,2026-02-25T11:00:00Z,submitted,55000,supplier_0357 +pr_001258,acct_0044,user_000158,2026-02-26T11:00:00Z,approved,56000,supplier_0358 +pr_001259,acct_0045,user_000159,2026-02-27T11:00:00Z,rejected,57000,supplier_0359 +pr_001260,acct_0046,user_000160,2026-02-28T11:00:00Z,cancelled,58000,supplier_0360 +pr_001261,acct_0047,user_000161,2026-02-01T11:00:00Z,draft,59000,supplier_0361 +pr_001262,acct_0048,user_000162,2026-02-02T11:00:00Z,submitted,60000,supplier_0362 +pr_001263,acct_0049,user_000163,2026-02-03T11:00:00Z,approved,61000,supplier_0363 +pr_001264,acct_0050,user_000164,2026-02-04T11:00:00Z,rejected,62000,supplier_0364 +pr_001265,acct_0010,user_000165,2026-02-05T11:00:00Z,cancelled,63000,supplier_0365 +pr_001266,acct_0011,user_000166,2026-02-06T11:00:00Z,draft,64000,supplier_0366 +pr_001267,acct_0012,user_000167,2026-02-07T11:00:00Z,submitted,65000,supplier_0367 +pr_001268,acct_0013,user_000168,2026-02-08T11:00:00Z,approved,66000,supplier_0368 +pr_001269,acct_0014,user_000169,2026-02-09T11:00:00Z,rejected,67000,supplier_0369 +pr_001270,acct_0015,user_000170,2026-02-10T11:00:00Z,cancelled,68000,supplier_0370 +pr_001271,acct_0016,user_000171,2026-02-11T11:00:00Z,draft,69000,supplier_0371 +pr_001272,acct_0017,user_000172,2026-02-12T11:00:00Z,submitted,70000,supplier_0372 +pr_001273,acct_0018,user_000173,2026-02-13T11:00:00Z,approved,71000,supplier_0373 +pr_001274,acct_0019,user_000174,2026-02-14T11:00:00Z,rejected,72000,supplier_0374 +pr_001275,acct_0020,user_000175,2026-02-15T11:00:00Z,cancelled,73000,supplier_0375 +pr_001276,acct_0021,user_000176,2026-02-16T11:00:00Z,draft,74000,supplier_0376 +pr_001277,acct_0022,user_000177,2026-02-17T11:00:00Z,submitted,75000,supplier_0377 +pr_001278,acct_0023,user_000178,2026-02-18T11:00:00Z,approved,76000,supplier_0378 +pr_001279,acct_0024,user_000179,2026-02-19T11:00:00Z,rejected,77000,supplier_0379 +pr_001280,acct_0025,user_000180,2026-02-20T11:00:00Z,cancelled,78000,supplier_0380 +pr_001281,acct_0026,user_000181,2026-02-21T11:00:00Z,draft,79000,supplier_0381 +pr_001282,acct_0027,user_000182,2026-02-22T11:00:00Z,submitted,80000,supplier_0382 +pr_001283,acct_0028,user_000183,2026-02-23T11:00:00Z,approved,81000,supplier_0383 +pr_001284,acct_0029,user_000184,2026-02-24T11:00:00Z,rejected,82000,supplier_0384 +pr_001285,acct_0030,user_000185,2026-02-25T11:00:00Z,cancelled,83000,supplier_0385 +pr_001286,acct_0031,user_000186,2026-02-26T11:00:00Z,draft,84000,supplier_0386 +pr_001287,acct_0032,user_000187,2026-02-27T11:00:00Z,submitted,85000,supplier_0387 +pr_001288,acct_0033,user_000188,2026-02-28T11:00:00Z,approved,86000,supplier_0388 +pr_001289,acct_0034,user_000189,2026-02-01T11:00:00Z,rejected,87000,supplier_0389 +pr_001290,acct_0035,user_000190,2026-02-02T11:00:00Z,cancelled,88000,supplier_0390 +pr_001291,acct_0036,user_000191,2026-02-03T11:00:00Z,draft,89000,supplier_0391 +pr_001292,acct_0037,user_000192,2026-02-04T11:00:00Z,submitted,90000,supplier_0392 +pr_001293,acct_0038,user_000193,2026-02-05T11:00:00Z,approved,91000,supplier_0393 +pr_001294,acct_0039,user_000194,2026-02-06T11:00:00Z,rejected,92000,supplier_0394 +pr_001295,acct_0040,user_000195,2026-02-07T11:00:00Z,cancelled,93000,supplier_0395 +pr_001296,acct_0041,user_000196,2026-02-08T11:00:00Z,draft,94000,supplier_0396 +pr_001297,acct_0042,user_000197,2026-02-09T11:00:00Z,submitted,95000,supplier_0397 +pr_001298,acct_0043,user_000198,2026-02-10T11:00:00Z,approved,96000,supplier_0398 +pr_001299,acct_0044,user_000199,2026-02-11T11:00:00Z,rejected,97000,supplier_0399 +pr_001300,acct_0045,user_000200,2026-02-12T11:00:00Z,cancelled,98000,supplier_0400 +pr_001301,acct_0046,user_000201,2026-02-13T11:00:00Z,draft,99000,supplier_0401 +pr_001302,acct_0047,user_000202,2026-02-14T11:00:00Z,submitted,100000,supplier_0402 +pr_001303,acct_0048,user_000203,2026-02-15T11:00:00Z,approved,101000,supplier_0403 +pr_001304,acct_0049,user_000204,2026-02-16T11:00:00Z,rejected,102000,supplier_0404 +pr_001305,acct_0050,user_000205,2026-02-17T11:00:00Z,cancelled,103000,supplier_0405 +pr_001306,acct_0010,user_000206,2026-02-18T11:00:00Z,draft,104000,supplier_0406 +pr_001307,acct_0011,user_000207,2026-02-19T11:00:00Z,submitted,105000,supplier_0407 +pr_001308,acct_0012,user_000208,2026-02-20T11:00:00Z,approved,106000,supplier_0408 +pr_001309,acct_0013,user_000209,2026-02-21T11:00:00Z,rejected,107000,supplier_0409 +pr_001310,acct_0014,user_000210,2026-02-22T11:00:00Z,cancelled,108000,supplier_0410 +pr_001311,acct_0015,user_000211,2026-02-23T11:00:00Z,draft,109000,supplier_0411 +pr_001312,acct_0016,user_000212,2026-02-24T11:00:00Z,submitted,110000,supplier_0412 +pr_001313,acct_0017,user_000213,2026-02-25T11:00:00Z,approved,10000,supplier_0413 +pr_001314,acct_0018,user_000214,2026-02-26T11:00:00Z,rejected,11000,supplier_0414 +pr_001315,acct_0019,user_000215,2026-02-27T11:00:00Z,cancelled,12000,supplier_0415 +pr_001316,acct_0020,user_000216,2026-02-28T11:00:00Z,draft,13000,supplier_0416 +pr_001317,acct_0021,user_000217,2026-02-01T11:00:00Z,submitted,14000,supplier_0417 +pr_001318,acct_0022,user_000218,2026-02-02T11:00:00Z,approved,15000,supplier_0418 +pr_001319,acct_0023,user_000219,2026-02-03T11:00:00Z,rejected,16000,supplier_0419 +pr_001320,acct_0024,user_000220,2026-02-04T11:00:00Z,cancelled,17000,supplier_0420 +pr_001321,acct_0025,user_000221,2026-02-05T11:00:00Z,draft,18000,supplier_0421 +pr_001322,acct_0026,user_000222,2026-02-06T11:00:00Z,submitted,19000,supplier_0422 +pr_001323,acct_0027,user_000223,2026-02-07T11:00:00Z,approved,20000,supplier_0423 +pr_001324,acct_0028,user_000224,2026-02-08T11:00:00Z,rejected,21000,supplier_0424 +pr_001325,acct_0029,user_000225,2026-02-09T11:00:00Z,cancelled,22000,supplier_0425 +pr_001326,acct_0030,user_000226,2026-02-10T11:00:00Z,draft,23000,supplier_0426 +pr_001327,acct_0031,user_000227,2026-02-11T11:00:00Z,submitted,24000,supplier_0427 +pr_001328,acct_0032,user_000228,2026-02-12T11:00:00Z,approved,25000,supplier_0428 +pr_001329,acct_0033,user_000229,2026-02-13T11:00:00Z,rejected,26000,supplier_0429 +pr_001330,acct_0034,user_000230,2026-02-14T11:00:00Z,cancelled,27000,supplier_0430 +pr_001331,acct_0035,user_000231,2026-02-15T11:00:00Z,draft,28000,supplier_0431 +pr_001332,acct_0036,user_000232,2026-02-16T11:00:00Z,submitted,29000,supplier_0432 +pr_001333,acct_0037,user_000233,2026-02-17T11:00:00Z,approved,30000,supplier_0433 +pr_001334,acct_0038,user_000234,2026-02-18T11:00:00Z,rejected,31000,supplier_0434 +pr_001335,acct_0039,user_000235,2026-02-19T11:00:00Z,cancelled,32000,supplier_0435 +pr_001336,acct_0040,user_000236,2026-02-20T11:00:00Z,draft,33000,supplier_0436 +pr_001337,acct_0041,user_000237,2026-02-21T11:00:00Z,submitted,34000,supplier_0437 +pr_001338,acct_0042,user_000238,2026-02-22T11:00:00Z,approved,35000,supplier_0438 +pr_001339,acct_0043,user_000239,2026-02-23T11:00:00Z,rejected,36000,supplier_0439 +pr_001340,acct_0044,user_000240,2026-02-24T11:00:00Z,cancelled,37000,supplier_0440 +pr_001341,acct_0045,user_000241,2026-02-25T11:00:00Z,draft,38000,supplier_0441 +pr_001342,acct_0046,user_000242,2026-02-26T11:00:00Z,submitted,39000,supplier_0442 +pr_001343,acct_0047,user_000243,2026-02-27T11:00:00Z,approved,40000,supplier_0443 +pr_001344,acct_0048,user_000244,2026-02-28T11:00:00Z,rejected,41000,supplier_0444 +pr_001345,acct_0049,user_000245,2026-02-01T11:00:00Z,cancelled,42000,supplier_0445 +pr_001346,acct_0050,user_000246,2026-02-02T11:00:00Z,draft,43000,supplier_0446 +pr_001347,acct_0010,user_000247,2026-02-03T11:00:00Z,submitted,44000,supplier_0447 +pr_001348,acct_0011,user_000248,2026-02-04T11:00:00Z,approved,45000,supplier_0448 +pr_001349,acct_0012,user_000249,2026-02-05T11:00:00Z,rejected,46000,supplier_0449 +pr_001350,acct_0013,user_000250,2026-02-06T11:00:00Z,cancelled,47000,supplier_0450 +pr_001351,acct_0014,user_000251,2026-02-07T11:00:00Z,draft,48000,supplier_0451 +pr_001352,acct_0015,user_000252,2026-02-08T11:00:00Z,submitted,49000,supplier_0452 +pr_001353,acct_0016,user_000253,2026-02-09T11:00:00Z,approved,50000,supplier_0453 +pr_001354,acct_0017,user_000254,2026-02-10T11:00:00Z,rejected,51000,supplier_0454 +pr_001355,acct_0018,user_000255,2026-02-11T11:00:00Z,cancelled,52000,supplier_0455 +pr_001356,acct_0019,user_000256,2026-02-12T11:00:00Z,draft,53000,supplier_0456 +pr_001357,acct_0020,user_000257,2026-02-13T11:00:00Z,submitted,54000,supplier_0457 +pr_001358,acct_0021,user_000258,2026-02-14T11:00:00Z,approved,55000,supplier_0458 +pr_001359,acct_0022,user_000259,2026-02-15T11:00:00Z,rejected,56000,supplier_0459 +pr_001360,acct_0023,user_000260,2026-02-16T11:00:00Z,cancelled,57000,supplier_0460 +pr_001361,acct_0024,user_000261,2026-02-17T11:00:00Z,draft,58000,supplier_0461 +pr_001362,acct_0025,user_000262,2026-02-18T11:00:00Z,submitted,59000,supplier_0462 +pr_001363,acct_0026,user_000263,2026-02-19T11:00:00Z,approved,60000,supplier_0463 +pr_001364,acct_0027,user_000264,2026-02-20T11:00:00Z,rejected,61000,supplier_0464 +pr_001365,acct_0028,user_000265,2026-02-21T11:00:00Z,cancelled,62000,supplier_0465 +pr_001366,acct_0029,user_000266,2026-02-22T11:00:00Z,draft,63000,supplier_0466 +pr_001367,acct_0030,user_000267,2026-02-23T11:00:00Z,submitted,64000,supplier_0467 +pr_001368,acct_0031,user_000268,2026-02-24T11:00:00Z,approved,65000,supplier_0468 +pr_001369,acct_0032,user_000269,2026-02-25T11:00:00Z,rejected,66000,supplier_0469 +pr_001370,acct_0033,user_000270,2026-02-26T11:00:00Z,cancelled,67000,supplier_0470 +pr_001371,acct_0034,user_000271,2026-02-27T11:00:00Z,draft,68000,supplier_0471 +pr_001372,acct_0035,user_000272,2026-02-28T11:00:00Z,submitted,69000,supplier_0472 +pr_001373,acct_0036,user_000273,2026-02-01T11:00:00Z,approved,70000,supplier_0473 +pr_001374,acct_0037,user_000274,2026-02-02T11:00:00Z,rejected,71000,supplier_0474 +pr_001375,acct_0038,user_000275,2026-02-03T11:00:00Z,cancelled,72000,supplier_0475 +pr_001376,acct_0039,user_000276,2026-02-04T11:00:00Z,draft,73000,supplier_0476 +pr_001377,acct_0040,user_000277,2026-02-05T11:00:00Z,submitted,74000,supplier_0477 +pr_001378,acct_0041,user_000278,2026-02-06T11:00:00Z,approved,75000,supplier_0478 +pr_001379,acct_0042,user_000279,2026-02-07T11:00:00Z,rejected,76000,supplier_0479 +pr_001380,acct_0043,user_000280,2026-02-08T11:00:00Z,cancelled,77000,supplier_0480 +pr_001381,acct_0044,user_000281,2026-02-09T11:00:00Z,draft,78000,supplier_0481 +pr_001382,acct_0045,user_000282,2026-02-10T11:00:00Z,submitted,79000,supplier_0482 +pr_001383,acct_0046,user_000283,2026-02-11T11:00:00Z,approved,80000,supplier_0483 +pr_001384,acct_0047,user_000284,2026-02-12T11:00:00Z,rejected,81000,supplier_0484 +pr_001385,acct_0048,user_000285,2026-02-13T11:00:00Z,cancelled,82000,supplier_0485 +pr_001386,acct_0049,user_000286,2026-02-14T11:00:00Z,draft,83000,supplier_0486 +pr_001387,acct_0001,user_000287,2026-02-15T11:00:00Z,submitted,84000,supplier_0487 +pr_001388,acct_0002,user_000288,2026-02-16T11:00:00Z,approved,85000,supplier_0488 +pr_001389,acct_0003,user_000289,2026-02-17T11:00:00Z,rejected,86000,supplier_0489 +pr_001390,acct_0004,user_000290,2026-02-18T11:00:00Z,cancelled,87000,supplier_0490 +pr_001391,acct_0005,user_000291,2026-02-19T11:00:00Z,draft,88000,supplier_0491 +pr_001392,acct_0006,user_000292,2026-02-20T11:00:00Z,submitted,89000,supplier_0492 +pr_001393,acct_0007,user_000293,2026-02-21T11:00:00Z,approved,90000,supplier_0493 +pr_001394,acct_0008,user_000294,2026-02-22T11:00:00Z,rejected,91000,supplier_0494 +pr_001395,acct_0009,user_000295,2026-02-23T11:00:00Z,cancelled,92000,supplier_0495 +pr_001396,acct_0010,user_000296,2026-02-24T11:00:00Z,draft,93000,supplier_0496 +pr_001397,acct_0011,user_000297,2026-02-25T11:00:00Z,submitted,94000,supplier_0497 +pr_001398,acct_0012,user_000298,2026-02-26T11:00:00Z,approved,95000,supplier_0498 +pr_001399,acct_0013,user_000299,2026-02-27T11:00:00Z,rejected,96000,supplier_0499 +pr_001400,acct_0014,user_000300,2026-02-28T11:00:00Z,cancelled,97000,supplier_0500 +pr_001401,acct_0015,user_000301,2026-02-01T11:00:00Z,draft,98000,supplier_0501 +pr_001402,acct_0016,user_000302,2026-02-02T11:00:00Z,submitted,99000,supplier_0502 +pr_001403,acct_0017,user_000303,2026-02-03T11:00:00Z,approved,100000,supplier_0503 +pr_001404,acct_0018,user_000304,2026-02-04T11:00:00Z,rejected,101000,supplier_0504 +pr_001405,acct_0019,user_000305,2026-02-05T11:00:00Z,cancelled,102000,supplier_0505 +pr_001406,acct_0020,user_000306,2026-02-06T11:00:00Z,draft,103000,supplier_0506 +pr_001407,acct_0021,user_000307,2026-02-07T11:00:00Z,submitted,104000,supplier_0507 +pr_001408,acct_0022,user_000308,2026-02-08T11:00:00Z,approved,105000,supplier_0508 +pr_001409,acct_0023,user_000309,2026-02-09T11:00:00Z,rejected,106000,supplier_0509 +pr_001410,acct_0024,user_000310,2026-02-10T11:00:00Z,cancelled,107000,supplier_0510 +pr_001411,acct_0025,user_000311,2026-02-11T11:00:00Z,draft,108000,supplier_0511 +pr_001412,acct_0026,user_000312,2026-02-12T11:00:00Z,submitted,109000,supplier_0512 +pr_001413,acct_0027,user_000313,2026-02-13T11:00:00Z,approved,110000,supplier_0513 +pr_001414,acct_0028,user_000314,2026-02-14T11:00:00Z,rejected,10000,supplier_0514 +pr_001415,acct_0029,user_000315,2026-02-15T11:00:00Z,cancelled,11000,supplier_0515 +pr_001416,acct_0030,user_000316,2026-02-16T11:00:00Z,draft,12000,supplier_0516 +pr_001417,acct_0031,user_000317,2026-02-17T11:00:00Z,submitted,13000,supplier_0517 +pr_001418,acct_0032,user_000318,2026-02-18T11:00:00Z,approved,14000,supplier_0518 +pr_001419,acct_0033,user_000319,2026-02-19T11:00:00Z,rejected,15000,supplier_0519 +pr_001420,acct_0034,user_000320,2026-02-20T11:00:00Z,cancelled,16000,supplier_0520 +pr_001421,acct_0035,user_000321,2026-02-21T11:00:00Z,draft,17000,supplier_0521 +pr_001422,acct_0036,user_000322,2026-02-22T11:00:00Z,submitted,18000,supplier_0522 +pr_001423,acct_0037,user_000323,2026-02-23T11:00:00Z,approved,19000,supplier_0523 +pr_001424,acct_0038,user_000324,2026-02-24T11:00:00Z,rejected,20000,supplier_0524 +pr_001425,acct_0039,user_000325,2026-02-25T11:00:00Z,cancelled,21000,supplier_0525 +pr_001426,acct_0040,user_000326,2026-02-26T11:00:00Z,draft,22000,supplier_0526 +pr_001427,acct_0041,user_000327,2026-02-27T11:00:00Z,submitted,23000,supplier_0527 +pr_001428,acct_0042,user_000328,2026-02-28T11:00:00Z,approved,24000,supplier_0528 +pr_001429,acct_0043,user_000329,2026-02-01T11:00:00Z,rejected,25000,supplier_0529 +pr_001430,acct_0044,user_000330,2026-02-02T11:00:00Z,cancelled,26000,supplier_0530 +pr_001431,acct_0045,user_000331,2026-02-03T11:00:00Z,draft,27000,supplier_0531 +pr_001432,acct_0046,user_000332,2026-02-04T11:00:00Z,submitted,28000,supplier_0532 +pr_001433,acct_0047,user_000333,2026-02-05T11:00:00Z,approved,29000,supplier_0533 +pr_001434,acct_0048,user_000334,2026-02-06T11:00:00Z,rejected,30000,supplier_0534 +pr_001435,acct_0049,user_000335,2026-02-07T11:00:00Z,cancelled,31000,supplier_0535 +pr_001436,acct_0050,user_000336,2026-02-08T11:00:00Z,draft,32000,supplier_0536 +pr_001437,acct_0051,user_000337,2026-02-09T11:00:00Z,submitted,33000,supplier_0537 +pr_001438,acct_0052,user_000338,2026-02-10T11:00:00Z,approved,34000,supplier_0538 +pr_001439,acct_0053,user_000339,2026-02-11T11:00:00Z,rejected,35000,supplier_0539 +pr_001440,acct_0054,user_000340,2026-02-12T11:00:00Z,cancelled,36000,supplier_0540 +pr_001441,acct_0055,user_000341,2026-02-13T11:00:00Z,draft,37000,supplier_0541 +pr_001442,acct_0056,user_000342,2026-02-14T11:00:00Z,submitted,38000,supplier_0542 +pr_001443,acct_0057,user_000343,2026-02-15T11:00:00Z,approved,39000,supplier_0543 +pr_001444,acct_0058,user_000344,2026-02-16T11:00:00Z,rejected,40000,supplier_0544 +pr_001445,acct_0059,user_000345,2026-02-17T11:00:00Z,cancelled,41000,supplier_0545 +pr_001446,acct_0060,user_000346,2026-02-18T11:00:00Z,draft,42000,supplier_0546 +pr_001447,acct_0061,user_000347,2026-02-19T11:00:00Z,submitted,43000,supplier_0547 +pr_001448,acct_0062,user_000348,2026-02-20T11:00:00Z,approved,44000,supplier_0548 +pr_001449,acct_0063,user_000349,2026-02-21T11:00:00Z,rejected,45000,supplier_0549 +pr_001450,acct_0064,user_000350,2026-02-22T11:00:00Z,cancelled,46000,supplier_0550 +pr_001451,acct_0065,user_000351,2026-02-23T11:00:00Z,draft,47000,supplier_0551 +pr_001452,acct_0066,user_000352,2026-02-24T11:00:00Z,submitted,48000,supplier_0552 +pr_001453,acct_0067,user_000353,2026-02-25T11:00:00Z,approved,49000,supplier_0553 +pr_001454,acct_0068,user_000354,2026-02-26T11:00:00Z,rejected,50000,supplier_0554 +pr_001455,acct_0069,user_000355,2026-02-27T11:00:00Z,cancelled,51000,supplier_0555 +pr_001456,acct_0070,user_000356,2026-02-28T11:00:00Z,draft,52000,supplier_0556 +pr_001457,acct_0071,user_000357,2026-02-01T11:00:00Z,submitted,53000,supplier_0557 +pr_001458,acct_0072,user_000358,2026-02-02T11:00:00Z,approved,54000,supplier_0558 +pr_001459,acct_0073,user_000359,2026-02-03T11:00:00Z,rejected,55000,supplier_0559 +pr_001460,acct_0074,user_000360,2026-02-04T11:00:00Z,cancelled,56000,supplier_0560 +pr_001461,acct_0075,user_000361,2026-02-05T11:00:00Z,draft,57000,supplier_0561 +pr_001462,acct_0076,user_000362,2026-02-06T11:00:00Z,submitted,58000,supplier_0562 +pr_001463,acct_0077,user_000363,2026-02-07T11:00:00Z,approved,59000,supplier_0563 +pr_001464,acct_0078,user_000364,2026-02-08T11:00:00Z,rejected,60000,supplier_0564 +pr_001465,acct_0079,user_000365,2026-02-09T11:00:00Z,cancelled,61000,supplier_0565 +pr_001466,acct_0080,user_000366,2026-02-10T11:00:00Z,draft,62000,supplier_0566 +pr_001467,acct_0081,user_000367,2026-02-11T11:00:00Z,submitted,63000,supplier_0567 +pr_001468,acct_0082,user_000368,2026-02-12T11:00:00Z,approved,64000,supplier_0568 +pr_001469,acct_0083,user_000369,2026-02-13T11:00:00Z,rejected,65000,supplier_0569 +pr_001470,acct_0084,user_000370,2026-02-14T11:00:00Z,cancelled,66000,supplier_0570 +pr_001471,acct_0085,user_000371,2026-02-15T11:00:00Z,draft,67000,supplier_0571 +pr_001472,acct_0086,user_000372,2026-02-16T11:00:00Z,submitted,68000,supplier_0572 +pr_001473,acct_0087,user_000373,2026-02-17T11:00:00Z,approved,69000,supplier_0573 +pr_001474,acct_0088,user_000374,2026-02-18T11:00:00Z,rejected,70000,supplier_0574 +pr_001475,acct_0089,user_000375,2026-02-19T11:00:00Z,cancelled,71000,supplier_0575 +pr_001476,acct_0090,user_000376,2026-02-20T11:00:00Z,draft,72000,supplier_0576 +pr_001477,acct_0091,user_000377,2026-02-21T11:00:00Z,submitted,73000,supplier_0577 +pr_001478,acct_0092,user_000378,2026-02-22T11:00:00Z,approved,74000,supplier_0578 +pr_001479,acct_0093,user_000379,2026-02-23T11:00:00Z,rejected,75000,supplier_0579 +pr_001480,acct_0094,user_000380,2026-02-24T11:00:00Z,cancelled,76000,supplier_0580 +pr_001481,acct_0095,user_000381,2026-02-25T11:00:00Z,draft,77000,supplier_0581 +pr_001482,acct_0096,user_000382,2026-02-26T11:00:00Z,submitted,78000,supplier_0582 +pr_001483,acct_0097,user_000383,2026-02-27T11:00:00Z,approved,79000,supplier_0583 +pr_001484,acct_0098,user_000384,2026-02-28T11:00:00Z,rejected,80000,supplier_0584 +pr_001485,acct_0099,user_000385,2026-02-01T11:00:00Z,cancelled,81000,supplier_0585 +pr_001486,acct_0100,user_000386,2026-02-02T11:00:00Z,draft,82000,supplier_0586 +pr_001487,acct_0101,user_000387,2026-02-03T11:00:00Z,submitted,83000,supplier_0587 +pr_001488,acct_0102,user_000388,2026-02-04T11:00:00Z,approved,84000,supplier_0588 +pr_001489,acct_0103,user_000389,2026-02-05T11:00:00Z,rejected,85000,supplier_0589 +pr_001490,acct_0104,user_000390,2026-02-06T11:00:00Z,cancelled,86000,supplier_0590 +pr_001491,acct_0105,user_000391,2026-02-07T11:00:00Z,draft,87000,supplier_0591 +pr_001492,acct_0106,user_000392,2026-02-08T11:00:00Z,submitted,88000,supplier_0592 +pr_001493,acct_0107,user_000393,2026-02-09T11:00:00Z,approved,89000,supplier_0593 +pr_001494,acct_0108,user_000394,2026-02-10T11:00:00Z,rejected,90000,supplier_0594 +pr_001495,acct_0109,user_000395,2026-02-11T11:00:00Z,cancelled,91000,supplier_0595 +pr_001496,acct_0110,user_000396,2026-02-12T11:00:00Z,draft,92000,supplier_0596 +pr_001497,acct_0111,user_000397,2026-02-13T11:00:00Z,submitted,93000,supplier_0597 +pr_001498,acct_0112,user_000398,2026-02-14T11:00:00Z,approved,94000,supplier_0598 +pr_001499,acct_0113,user_000399,2026-02-15T11:00:00Z,rejected,95000,supplier_0599 +pr_001500,acct_0114,user_000400,2026-02-16T11:00:00Z,cancelled,96000,supplier_0600 +pr_001501,acct_0115,user_000401,2026-02-17T11:00:00Z,draft,97000,supplier_0601 +pr_001502,acct_0116,user_000402,2026-02-18T11:00:00Z,submitted,98000,supplier_0602 +pr_001503,acct_0117,user_000403,2026-02-19T11:00:00Z,approved,99000,supplier_0603 +pr_001504,acct_0118,user_000404,2026-02-20T11:00:00Z,rejected,100000,supplier_0604 +pr_001505,acct_0119,user_000405,2026-02-21T11:00:00Z,cancelled,101000,supplier_0605 +pr_001506,acct_0120,user_000406,2026-02-22T11:00:00Z,draft,102000,supplier_0606 +pr_001507,acct_0121,user_000407,2026-02-23T11:00:00Z,submitted,103000,supplier_0607 +pr_001508,acct_0122,user_000408,2026-02-24T11:00:00Z,approved,104000,supplier_0608 +pr_001509,acct_0123,user_000409,2026-02-25T11:00:00Z,rejected,105000,supplier_0609 +pr_001510,acct_0124,user_000410,2026-02-26T11:00:00Z,cancelled,106000,supplier_0610 +pr_001511,acct_0125,user_000411,2026-02-27T11:00:00Z,draft,107000,supplier_0611 +pr_001512,acct_0126,user_000412,2026-02-28T11:00:00Z,submitted,108000,supplier_0612 +pr_001513,acct_0127,user_000413,2026-02-01T11:00:00Z,approved,109000,supplier_0613 +pr_001514,acct_0128,user_000414,2026-02-02T11:00:00Z,rejected,110000,supplier_0614 +pr_001515,acct_0129,user_000415,2026-02-03T11:00:00Z,cancelled,10000,supplier_0615 +pr_001516,acct_0130,user_000416,2026-02-04T11:00:00Z,draft,11000,supplier_0616 +pr_001517,acct_0131,user_000417,2026-02-05T11:00:00Z,submitted,12000,supplier_0617 +pr_001518,acct_0132,user_000418,2026-02-06T11:00:00Z,approved,13000,supplier_0618 +pr_001519,acct_0133,user_000419,2026-02-07T11:00:00Z,rejected,14000,supplier_0619 +pr_001520,acct_0134,user_000420,2026-02-08T11:00:00Z,cancelled,15000,supplier_0620 +pr_001521,acct_0135,user_000421,2026-02-09T11:00:00Z,draft,16000,supplier_0621 +pr_001522,acct_0136,user_000422,2026-02-10T11:00:00Z,submitted,17000,supplier_0622 +pr_001523,acct_0137,user_000423,2026-02-11T11:00:00Z,approved,18000,supplier_0623 +pr_001524,acct_0138,user_000424,2026-02-12T11:00:00Z,rejected,19000,supplier_0624 +pr_001525,acct_0139,user_000425,2026-02-13T11:00:00Z,cancelled,20000,supplier_0625 +pr_001526,acct_0140,user_000426,2026-02-14T11:00:00Z,draft,21000,supplier_0626 +pr_001527,acct_0141,user_000427,2026-02-15T11:00:00Z,submitted,22000,supplier_0627 +pr_001528,acct_0142,user_000428,2026-02-16T11:00:00Z,approved,23000,supplier_0628 +pr_001529,acct_0143,user_000429,2026-02-17T11:00:00Z,rejected,24000,supplier_0629 +pr_001530,acct_0144,user_000430,2026-02-18T11:00:00Z,cancelled,25000,supplier_0630 +pr_001531,acct_0145,user_000431,2026-02-19T11:00:00Z,draft,26000,supplier_0631 +pr_001532,acct_0146,user_000432,2026-02-20T11:00:00Z,submitted,27000,supplier_0632 +pr_001533,acct_0147,user_000433,2026-02-21T11:00:00Z,approved,28000,supplier_0633 +pr_001534,acct_0148,user_000434,2026-02-22T11:00:00Z,rejected,29000,supplier_0634 +pr_001535,acct_0149,user_000435,2026-02-23T11:00:00Z,cancelled,30000,supplier_0635 +pr_001536,acct_0150,user_000436,2026-02-24T11:00:00Z,draft,31000,supplier_0636 +pr_001537,acct_0151,user_000437,2026-02-25T11:00:00Z,submitted,32000,supplier_0637 +pr_001538,acct_0152,user_000438,2026-02-26T11:00:00Z,approved,33000,supplier_0638 +pr_001539,acct_0153,user_000439,2026-02-27T11:00:00Z,rejected,34000,supplier_0639 +pr_001540,acct_0154,user_000440,2026-02-28T11:00:00Z,cancelled,35000,supplier_0640 +pr_001541,acct_0155,user_000441,2026-02-01T11:00:00Z,draft,36000,supplier_0641 +pr_001542,acct_0156,user_000442,2026-02-02T11:00:00Z,submitted,37000,supplier_0642 +pr_001543,acct_0157,user_000443,2026-02-03T11:00:00Z,approved,38000,supplier_0643 +pr_001544,acct_0158,user_000444,2026-02-04T11:00:00Z,rejected,39000,supplier_0644 +pr_001545,acct_0159,user_000445,2026-02-05T11:00:00Z,cancelled,40000,supplier_0645 +pr_001546,acct_0160,user_000446,2026-02-06T11:00:00Z,draft,41000,supplier_0646 +pr_001547,acct_0161,user_000447,2026-02-07T11:00:00Z,submitted,42000,supplier_0647 +pr_001548,acct_0162,user_000448,2026-02-08T11:00:00Z,approved,43000,supplier_0648 +pr_001549,acct_0163,user_000449,2026-02-09T11:00:00Z,rejected,44000,supplier_0649 +pr_001550,acct_0164,user_000450,2026-02-10T11:00:00Z,cancelled,45000,supplier_0650 +pr_001551,acct_0165,user_000451,2026-02-11T11:00:00Z,draft,46000,supplier_0651 +pr_001552,acct_0166,user_000452,2026-02-12T11:00:00Z,submitted,47000,supplier_0652 +pr_001553,acct_0167,user_000453,2026-02-13T11:00:00Z,approved,48000,supplier_0653 +pr_001554,acct_0168,user_000454,2026-02-14T11:00:00Z,rejected,49000,supplier_0654 +pr_001555,acct_0169,user_000455,2026-02-15T11:00:00Z,cancelled,50000,supplier_0655 +pr_001556,acct_0170,user_000456,2026-02-16T11:00:00Z,draft,51000,supplier_0656 +pr_001557,acct_0171,user_000457,2026-02-17T11:00:00Z,submitted,52000,supplier_0657 +pr_001558,acct_0172,user_000458,2026-02-18T11:00:00Z,approved,53000,supplier_0658 +pr_001559,acct_0173,user_000459,2026-02-19T11:00:00Z,rejected,54000,supplier_0659 +pr_001560,acct_0174,user_000460,2026-02-20T11:00:00Z,cancelled,55000,supplier_0660 +pr_001561,acct_0175,user_000461,2026-02-21T11:00:00Z,draft,56000,supplier_0661 +pr_001562,acct_0176,user_000462,2026-02-22T11:00:00Z,submitted,57000,supplier_0662 +pr_001563,acct_0177,user_000463,2026-02-23T11:00:00Z,approved,58000,supplier_0663 +pr_001564,acct_0178,user_000464,2026-02-24T11:00:00Z,rejected,59000,supplier_0664 +pr_001565,acct_0179,user_000465,2026-02-25T11:00:00Z,cancelled,60000,supplier_0665 +pr_001566,acct_0180,user_000466,2026-02-26T11:00:00Z,draft,61000,supplier_0666 +pr_001567,acct_0181,user_000467,2026-02-27T11:00:00Z,submitted,62000,supplier_0667 +pr_001568,acct_0182,user_000468,2026-02-28T11:00:00Z,approved,63000,supplier_0668 +pr_001569,acct_0183,user_000469,2026-02-01T11:00:00Z,rejected,64000,supplier_0669 +pr_001570,acct_0184,user_000470,2026-02-02T11:00:00Z,cancelled,65000,supplier_0670 +pr_001571,acct_0185,user_000471,2026-02-03T11:00:00Z,draft,66000,supplier_0671 +pr_001572,acct_0186,user_000472,2026-02-04T11:00:00Z,submitted,67000,supplier_0672 +pr_001573,acct_0187,user_000473,2026-02-05T11:00:00Z,approved,68000,supplier_0673 +pr_001574,acct_0188,user_000474,2026-02-06T11:00:00Z,rejected,69000,supplier_0674 +pr_001575,acct_0189,user_000475,2026-02-07T11:00:00Z,cancelled,70000,supplier_0675 +pr_001576,acct_0190,user_000476,2026-02-08T11:00:00Z,draft,71000,supplier_0676 +pr_001577,acct_0001,user_000477,2026-02-09T11:00:00Z,submitted,72000,supplier_0677 +pr_001578,acct_0002,user_000478,2026-02-10T11:00:00Z,approved,73000,supplier_0678 +pr_001579,acct_0003,user_000479,2026-02-11T11:00:00Z,rejected,74000,supplier_0679 +pr_001580,acct_0004,user_000480,2026-02-12T11:00:00Z,cancelled,75000,supplier_0680 +pr_001581,acct_0005,user_000481,2026-02-13T11:00:00Z,draft,76000,supplier_0681 +pr_001582,acct_0006,user_000482,2026-02-14T11:00:00Z,submitted,77000,supplier_0682 +pr_001583,acct_0007,user_000483,2026-02-15T11:00:00Z,approved,78000,supplier_0683 +pr_001584,acct_0008,user_000484,2026-02-16T11:00:00Z,rejected,79000,supplier_0684 +pr_001585,acct_0009,user_000485,2026-02-17T11:00:00Z,cancelled,80000,supplier_0685 +pr_001586,acct_0010,user_000486,2026-02-18T11:00:00Z,draft,81000,supplier_0686 +pr_001587,acct_0011,user_000487,2026-02-19T11:00:00Z,submitted,82000,supplier_0687 +pr_001588,acct_0012,user_000488,2026-02-20T11:00:00Z,approved,83000,supplier_0688 +pr_001589,acct_0013,user_000489,2026-02-21T11:00:00Z,rejected,84000,supplier_0689 +pr_001590,acct_0014,user_000490,2026-02-22T11:00:00Z,cancelled,85000,supplier_0690 +pr_001591,acct_0015,user_000491,2026-02-23T11:00:00Z,draft,86000,supplier_0691 +pr_001592,acct_0016,user_000492,2026-02-24T11:00:00Z,submitted,87000,supplier_0692 +pr_001593,acct_0017,user_000493,2026-02-25T11:00:00Z,approved,88000,supplier_0693 +pr_001594,acct_0018,user_000494,2026-02-26T11:00:00Z,rejected,89000,supplier_0694 +pr_001595,acct_0019,user_000495,2026-02-27T11:00:00Z,cancelled,90000,supplier_0695 +pr_001596,acct_0020,user_000496,2026-02-28T11:00:00Z,draft,91000,supplier_0696 +pr_001597,acct_0021,user_000497,2026-02-01T11:00:00Z,submitted,92000,supplier_0697 +pr_001598,acct_0022,user_000498,2026-02-02T11:00:00Z,approved,93000,supplier_0698 +pr_001599,acct_0023,user_000499,2026-02-03T11:00:00Z,rejected,94000,supplier_0699 +pr_001600,acct_0024,user_000500,2026-02-04T11:00:00Z,cancelled,95000,supplier_0700 +pr_001601,acct_0025,user_000501,2026-02-05T11:00:00Z,draft,96000,supplier_0701 +pr_001602,acct_0026,user_000502,2026-02-06T11:00:00Z,submitted,97000,supplier_0702 +pr_001603,acct_0027,user_000503,2026-02-07T11:00:00Z,approved,98000,supplier_0703 +pr_001604,acct_0028,user_000504,2026-02-08T11:00:00Z,rejected,99000,supplier_0704 +pr_001605,acct_0029,user_000505,2026-02-09T11:00:00Z,cancelled,100000,supplier_0705 +pr_001606,acct_0030,user_000506,2026-02-10T11:00:00Z,draft,101000,supplier_0706 +pr_001607,acct_0031,user_000507,2026-02-11T11:00:00Z,submitted,102000,supplier_0707 +pr_001608,acct_0032,user_000508,2026-02-12T11:00:00Z,approved,103000,supplier_0708 +pr_001609,acct_0033,user_000509,2026-02-13T11:00:00Z,rejected,104000,supplier_0709 +pr_001610,acct_0034,user_000510,2026-02-14T11:00:00Z,cancelled,105000,supplier_0710 +pr_001611,acct_0035,user_000511,2026-02-15T11:00:00Z,draft,106000,supplier_0711 +pr_001612,acct_0036,user_000512,2026-02-16T11:00:00Z,submitted,107000,supplier_0712 +pr_001613,acct_0037,user_000513,2026-02-17T11:00:00Z,approved,108000,supplier_0713 +pr_001614,acct_0038,user_000514,2026-02-18T11:00:00Z,rejected,109000,supplier_0714 +pr_001615,acct_0039,user_000515,2026-02-19T11:00:00Z,cancelled,110000,supplier_0715 +pr_001616,acct_0040,user_000516,2026-02-20T11:00:00Z,draft,10000,supplier_0716 +pr_001617,acct_0041,user_000517,2026-02-21T11:00:00Z,submitted,11000,supplier_0717 +pr_001618,acct_0042,user_000518,2026-02-22T11:00:00Z,approved,12000,supplier_0718 +pr_001619,acct_0043,user_000519,2026-02-23T11:00:00Z,rejected,13000,supplier_0719 +pr_001620,acct_0044,user_000520,2026-02-24T11:00:00Z,cancelled,14000,supplier_0720 +pr_001621,acct_0045,user_000521,2026-02-25T11:00:00Z,draft,15000,supplier_0721 +pr_001622,acct_0046,user_000522,2026-02-26T11:00:00Z,submitted,16000,supplier_0722 +pr_001623,acct_0047,user_000523,2026-02-27T11:00:00Z,approved,17000,supplier_0723 +pr_001624,acct_0048,user_000524,2026-02-28T11:00:00Z,rejected,18000,supplier_0724 +pr_001625,acct_0049,user_000525,2026-02-01T11:00:00Z,cancelled,19000,supplier_0725 +pr_001626,acct_0050,user_000526,2026-02-02T11:00:00Z,draft,20000,supplier_0726 +pr_001627,acct_0051,user_000527,2026-02-03T11:00:00Z,submitted,21000,supplier_0727 +pr_001628,acct_0052,user_000528,2026-02-04T11:00:00Z,approved,22000,supplier_0728 +pr_001629,acct_0053,user_000529,2026-02-05T11:00:00Z,rejected,23000,supplier_0729 +pr_001630,acct_0054,user_000530,2026-02-06T11:00:00Z,cancelled,24000,supplier_0730 +pr_001631,acct_0055,user_000531,2026-02-07T11:00:00Z,draft,25000,supplier_0731 +pr_001632,acct_0056,user_000532,2026-02-08T11:00:00Z,submitted,26000,supplier_0732 +pr_001633,acct_0057,user_000533,2026-02-09T11:00:00Z,approved,27000,supplier_0733 +pr_001634,acct_0058,user_000534,2026-02-10T11:00:00Z,rejected,28000,supplier_0734 +pr_001635,acct_0059,user_000535,2026-02-11T11:00:00Z,cancelled,29000,supplier_0735 +pr_001636,acct_0060,user_000536,2026-02-12T11:00:00Z,draft,30000,supplier_0736 +pr_001637,acct_0061,user_000537,2026-02-13T11:00:00Z,submitted,31000,supplier_0737 +pr_001638,acct_0062,user_000538,2026-02-14T11:00:00Z,approved,32000,supplier_0738 +pr_001639,acct_0063,user_000539,2026-02-15T11:00:00Z,rejected,33000,supplier_0739 +pr_001640,acct_0064,user_000540,2026-02-16T11:00:00Z,cancelled,34000,supplier_0740 +pr_001641,acct_0065,user_000541,2026-02-17T11:00:00Z,draft,35000,supplier_0741 +pr_001642,acct_0066,user_000542,2026-02-18T11:00:00Z,submitted,36000,supplier_0742 +pr_001643,acct_0067,user_000543,2026-02-19T11:00:00Z,approved,37000,supplier_0743 +pr_001644,acct_0068,user_000544,2026-02-20T11:00:00Z,rejected,38000,supplier_0744 +pr_001645,acct_0069,user_000545,2026-02-21T11:00:00Z,cancelled,39000,supplier_0745 +pr_001646,acct_0070,user_000546,2026-02-22T11:00:00Z,draft,40000,supplier_0746 +pr_001647,acct_0071,user_000547,2026-02-23T11:00:00Z,submitted,41000,supplier_0747 +pr_001648,acct_0072,user_000548,2026-02-24T11:00:00Z,approved,42000,supplier_0748 +pr_001649,acct_0073,user_000549,2026-02-25T11:00:00Z,rejected,43000,supplier_0749 +pr_001650,acct_0074,user_000550,2026-02-26T11:00:00Z,cancelled,44000,supplier_0750 +pr_001651,acct_0075,user_000551,2026-02-27T11:00:00Z,draft,45000,supplier_0751 +pr_001652,acct_0076,user_000552,2026-02-28T11:00:00Z,submitted,46000,supplier_0752 +pr_001653,acct_0077,user_000553,2026-02-01T11:00:00Z,approved,47000,supplier_0753 +pr_001654,acct_0078,user_000554,2026-02-02T11:00:00Z,rejected,48000,supplier_0754 +pr_001655,acct_0079,user_000555,2026-02-03T11:00:00Z,cancelled,49000,supplier_0755 +pr_001656,acct_0080,user_000556,2026-02-04T11:00:00Z,draft,50000,supplier_0756 +pr_001657,acct_0081,user_000557,2026-02-05T11:00:00Z,submitted,51000,supplier_0757 +pr_001658,acct_0082,user_000558,2026-02-06T11:00:00Z,approved,52000,supplier_0758 +pr_001659,acct_0083,user_000559,2026-02-07T11:00:00Z,rejected,53000,supplier_0759 +pr_001660,acct_0084,user_000560,2026-02-08T11:00:00Z,cancelled,54000,supplier_0760 +pr_001661,acct_0085,user_000561,2026-02-09T11:00:00Z,draft,55000,supplier_0761 +pr_001662,acct_0086,user_000562,2026-02-10T11:00:00Z,submitted,56000,supplier_0762 +pr_001663,acct_0087,user_000563,2026-02-11T11:00:00Z,approved,57000,supplier_0763 +pr_001664,acct_0088,user_000564,2026-02-12T11:00:00Z,rejected,58000,supplier_0764 +pr_001665,acct_0089,user_000565,2026-02-13T11:00:00Z,cancelled,59000,supplier_0765 +pr_001666,acct_0090,user_000566,2026-02-14T11:00:00Z,draft,60000,supplier_0766 +pr_001667,acct_0091,user_000567,2026-02-15T11:00:00Z,submitted,61000,supplier_0767 +pr_001668,acct_0092,user_000568,2026-02-16T11:00:00Z,approved,62000,supplier_0768 +pr_001669,acct_0093,user_000569,2026-02-17T11:00:00Z,rejected,63000,supplier_0769 +pr_001670,acct_0094,user_000570,2026-02-18T11:00:00Z,cancelled,64000,supplier_0770 +pr_001671,acct_0095,user_000571,2026-02-19T11:00:00Z,draft,65000,supplier_0771 +pr_001672,acct_0096,user_000572,2026-02-20T11:00:00Z,submitted,66000,supplier_0772 +pr_001673,acct_0097,user_000573,2026-02-21T11:00:00Z,approved,67000,supplier_0773 +pr_001674,acct_0098,user_000574,2026-02-22T11:00:00Z,rejected,68000,supplier_0774 +pr_001675,acct_0099,user_000575,2026-02-23T11:00:00Z,cancelled,69000,supplier_0775 +pr_001676,acct_0100,user_000576,2026-02-24T11:00:00Z,draft,70000,supplier_0776 +pr_001677,acct_0101,user_000577,2026-02-25T11:00:00Z,submitted,71000,supplier_0777 +pr_001678,acct_0102,user_000578,2026-02-26T11:00:00Z,approved,72000,supplier_0778 +pr_001679,acct_0103,user_000579,2026-02-27T11:00:00Z,rejected,73000,supplier_0779 +pr_001680,acct_0104,user_000580,2026-02-28T11:00:00Z,cancelled,74000,supplier_0780 +pr_001681,acct_0105,user_000581,2026-02-01T11:00:00Z,draft,75000,supplier_0781 +pr_001682,acct_0106,user_000582,2026-02-02T11:00:00Z,submitted,76000,supplier_0782 +pr_001683,acct_0107,user_000583,2026-02-03T11:00:00Z,approved,77000,supplier_0783 +pr_001684,acct_0108,user_000584,2026-02-04T11:00:00Z,rejected,78000,supplier_0784 +pr_001685,acct_0109,user_000585,2026-02-05T11:00:00Z,cancelled,79000,supplier_0785 +pr_001686,acct_0110,user_000586,2026-02-06T11:00:00Z,draft,80000,supplier_0786 +pr_001687,acct_0111,user_000587,2026-02-07T11:00:00Z,submitted,81000,supplier_0787 +pr_001688,acct_0112,user_000588,2026-02-08T11:00:00Z,approved,82000,supplier_0788 +pr_001689,acct_0113,user_000589,2026-02-09T11:00:00Z,rejected,83000,supplier_0789 +pr_001690,acct_0114,user_000590,2026-02-10T11:00:00Z,cancelled,84000,supplier_0790 +pr_001691,acct_0115,user_000591,2026-02-11T11:00:00Z,draft,85000,supplier_0791 +pr_001692,acct_0116,user_000592,2026-02-12T11:00:00Z,submitted,86000,supplier_0792 +pr_001693,acct_0117,user_000593,2026-02-13T11:00:00Z,approved,87000,supplier_0793 +pr_001694,acct_0118,user_000594,2026-02-14T11:00:00Z,rejected,88000,supplier_0794 +pr_001695,acct_0119,user_000595,2026-02-15T11:00:00Z,cancelled,89000,supplier_0795 +pr_001696,acct_0120,user_000596,2026-02-16T11:00:00Z,draft,90000,supplier_0796 +pr_001697,acct_0121,user_000597,2026-02-17T11:00:00Z,submitted,91000,supplier_0797 +pr_001698,acct_0122,user_000598,2026-02-18T11:00:00Z,approved,92000,supplier_0798 +pr_001699,acct_0123,user_000599,2026-02-19T11:00:00Z,rejected,93000,supplier_0799 +pr_001700,acct_0124,user_000600,2026-02-20T11:00:00Z,cancelled,94000,supplier_0800 +pr_001701,acct_0125,user_000601,2026-02-21T11:00:00Z,draft,95000,supplier_0801 +pr_001702,acct_0126,user_000602,2026-02-22T11:00:00Z,submitted,96000,supplier_0802 +pr_001703,acct_0127,user_000603,2026-02-23T11:00:00Z,approved,97000,supplier_0803 +pr_001704,acct_0128,user_000604,2026-02-24T11:00:00Z,rejected,98000,supplier_0804 +pr_001705,acct_0129,user_000605,2026-02-25T11:00:00Z,cancelled,99000,supplier_0805 +pr_001706,acct_0130,user_000606,2026-02-26T11:00:00Z,draft,100000,supplier_0806 +pr_001707,acct_0131,user_000607,2026-02-27T11:00:00Z,submitted,101000,supplier_0807 +pr_001708,acct_0132,user_000608,2026-02-28T11:00:00Z,approved,102000,supplier_0808 +pr_001709,acct_0133,user_000609,2026-02-01T11:00:00Z,rejected,103000,supplier_0809 +pr_001710,acct_0134,user_000610,2026-02-02T11:00:00Z,cancelled,104000,supplier_0810 +pr_001711,acct_0135,user_000611,2026-02-03T11:00:00Z,draft,105000,supplier_0811 +pr_001712,acct_0136,user_000612,2026-02-04T11:00:00Z,submitted,106000,supplier_0812 +pr_001713,acct_0137,user_000613,2026-02-05T11:00:00Z,approved,107000,supplier_0813 +pr_001714,acct_0138,user_000614,2026-02-06T11:00:00Z,rejected,108000,supplier_0814 +pr_001715,acct_0139,user_000615,2026-02-07T11:00:00Z,cancelled,109000,supplier_0815 +pr_001716,acct_0140,user_000616,2026-02-08T11:00:00Z,draft,110000,supplier_0816 +pr_001717,acct_0141,user_000617,2026-02-09T11:00:00Z,submitted,10000,supplier_0817 +pr_001718,acct_0142,user_000618,2026-02-10T11:00:00Z,approved,11000,supplier_0818 +pr_001719,acct_0143,user_000619,2026-02-11T11:00:00Z,rejected,12000,supplier_0819 +pr_001720,acct_0144,user_000620,2026-02-12T11:00:00Z,cancelled,13000,supplier_0820 +pr_001721,acct_0145,user_000621,2026-02-13T11:00:00Z,draft,14000,supplier_0821 +pr_001722,acct_0146,user_000622,2026-02-14T11:00:00Z,submitted,15000,supplier_0822 +pr_001723,acct_0147,user_000623,2026-02-15T11:00:00Z,approved,16000,supplier_0823 +pr_001724,acct_0148,user_000624,2026-02-16T11:00:00Z,rejected,17000,supplier_0824 +pr_001725,acct_0149,user_000625,2026-02-17T11:00:00Z,cancelled,18000,supplier_0825 +pr_001726,acct_0150,user_000626,2026-02-18T11:00:00Z,draft,19000,supplier_0826 +pr_001727,acct_0151,user_000627,2026-02-19T11:00:00Z,submitted,20000,supplier_0827 +pr_001728,acct_0152,user_000628,2026-02-20T11:00:00Z,approved,21000,supplier_0828 +pr_001729,acct_0153,user_000629,2026-02-21T11:00:00Z,rejected,22000,supplier_0829 +pr_001730,acct_0154,user_000630,2026-02-22T11:00:00Z,cancelled,23000,supplier_0830 +pr_001731,acct_0155,user_000631,2026-02-23T11:00:00Z,draft,24000,supplier_0831 +pr_001732,acct_0156,user_000632,2026-02-24T11:00:00Z,submitted,25000,supplier_0832 +pr_001733,acct_0157,user_000633,2026-02-25T11:00:00Z,approved,26000,supplier_0833 +pr_001734,acct_0158,user_000634,2026-02-26T11:00:00Z,rejected,27000,supplier_0834 +pr_001735,acct_0159,user_000635,2026-02-27T11:00:00Z,cancelled,28000,supplier_0835 +pr_001736,acct_0160,user_000636,2026-02-28T11:00:00Z,draft,29000,supplier_0836 +pr_001737,acct_0161,user_000637,2026-02-01T11:00:00Z,submitted,30000,supplier_0837 +pr_001738,acct_0162,user_000638,2026-02-02T11:00:00Z,approved,31000,supplier_0838 +pr_001739,acct_0163,user_000639,2026-02-03T11:00:00Z,rejected,32000,supplier_0839 +pr_001740,acct_0164,user_000640,2026-02-04T11:00:00Z,cancelled,33000,supplier_0840 +pr_001741,acct_0165,user_000641,2026-02-05T11:00:00Z,draft,34000,supplier_0841 +pr_001742,acct_0166,user_000642,2026-02-06T11:00:00Z,submitted,35000,supplier_0842 +pr_001743,acct_0167,user_000643,2026-02-07T11:00:00Z,approved,36000,supplier_0843 +pr_001744,acct_0168,user_000644,2026-02-08T11:00:00Z,rejected,37000,supplier_0844 +pr_001745,acct_0169,user_000645,2026-02-09T11:00:00Z,cancelled,38000,supplier_0845 +pr_001746,acct_0170,user_000646,2026-02-10T11:00:00Z,draft,39000,supplier_0846 +pr_001747,acct_0171,user_000647,2026-02-11T11:00:00Z,submitted,40000,supplier_0847 +pr_001748,acct_0172,user_000648,2026-02-12T11:00:00Z,approved,41000,supplier_0848 +pr_001749,acct_0173,user_000649,2026-02-13T11:00:00Z,rejected,42000,supplier_0849 +pr_001750,acct_0174,user_000650,2026-02-14T11:00:00Z,cancelled,43000,supplier_0850 +pr_001751,acct_0175,user_000651,2026-02-15T11:00:00Z,draft,44000,supplier_0851 +pr_001752,acct_0176,user_000652,2026-02-16T11:00:00Z,submitted,45000,supplier_0852 +pr_001753,acct_0177,user_000653,2026-02-17T11:00:00Z,approved,46000,supplier_0853 +pr_001754,acct_0178,user_000654,2026-02-18T11:00:00Z,rejected,47000,supplier_0854 +pr_001755,acct_0179,user_000655,2026-02-19T11:00:00Z,cancelled,48000,supplier_0855 +pr_001756,acct_0180,user_000656,2026-02-20T11:00:00Z,draft,49000,supplier_0856 +pr_001757,acct_0181,user_000657,2026-02-21T11:00:00Z,submitted,50000,supplier_0857 +pr_001758,acct_0182,user_000658,2026-02-22T11:00:00Z,approved,51000,supplier_0858 +pr_001759,acct_0183,user_000659,2026-02-23T11:00:00Z,rejected,52000,supplier_0859 +pr_001760,acct_0184,user_000660,2026-02-24T11:00:00Z,cancelled,53000,supplier_0860 +pr_001761,acct_0185,user_000661,2026-02-25T11:00:00Z,draft,54000,supplier_0861 +pr_001762,acct_0186,user_000662,2026-02-26T11:00:00Z,submitted,55000,supplier_0862 +pr_001763,acct_0187,user_000663,2026-02-27T11:00:00Z,approved,56000,supplier_0863 +pr_001764,acct_0188,user_000664,2026-02-28T11:00:00Z,rejected,57000,supplier_0864 +pr_001765,acct_0189,user_000665,2026-02-01T11:00:00Z,cancelled,58000,supplier_0865 +pr_001766,acct_0190,user_000666,2026-02-02T11:00:00Z,draft,59000,supplier_0866 +pr_001767,acct_0001,user_000667,2026-02-03T11:00:00Z,submitted,60000,supplier_0867 +pr_001768,acct_0002,user_000668,2026-02-04T11:00:00Z,approved,61000,supplier_0868 +pr_001769,acct_0003,user_000669,2026-02-05T11:00:00Z,rejected,62000,supplier_0869 +pr_001770,acct_0004,user_000670,2026-02-06T11:00:00Z,cancelled,63000,supplier_0870 +pr_001771,acct_0005,user_000671,2026-02-07T11:00:00Z,draft,64000,supplier_0871 +pr_001772,acct_0006,user_000672,2026-02-08T11:00:00Z,submitted,65000,supplier_0872 +pr_001773,acct_0007,user_000673,2026-02-09T11:00:00Z,approved,66000,supplier_0873 +pr_001774,acct_0008,user_000674,2026-02-10T11:00:00Z,rejected,67000,supplier_0874 +pr_001775,acct_0009,user_000675,2026-02-11T11:00:00Z,cancelled,68000,supplier_0875 +pr_001776,acct_0010,user_000676,2026-02-12T11:00:00Z,draft,69000,supplier_0876 +pr_001777,acct_0011,user_000677,2026-02-13T11:00:00Z,submitted,70000,supplier_0877 +pr_001778,acct_0012,user_000678,2026-02-14T11:00:00Z,approved,71000,supplier_0878 +pr_001779,acct_0013,user_000679,2026-02-15T11:00:00Z,rejected,72000,supplier_0879 +pr_001780,acct_0014,user_000680,2026-02-16T11:00:00Z,cancelled,73000,supplier_0880 +pr_001781,acct_0015,user_000681,2026-02-17T11:00:00Z,draft,74000,supplier_0881 +pr_001782,acct_0016,user_000682,2026-02-18T11:00:00Z,submitted,75000,supplier_0882 +pr_001783,acct_0017,user_000683,2026-02-19T11:00:00Z,approved,76000,supplier_0883 +pr_001784,acct_0018,user_000684,2026-02-20T11:00:00Z,rejected,77000,supplier_0884 +pr_001785,acct_0019,user_000685,2026-02-21T11:00:00Z,cancelled,78000,supplier_0885 +pr_001786,acct_0020,user_000686,2026-02-22T11:00:00Z,draft,79000,supplier_0886 +pr_001787,acct_0021,user_000687,2026-02-23T11:00:00Z,submitted,80000,supplier_0887 +pr_001788,acct_0022,user_000688,2026-02-24T11:00:00Z,approved,81000,supplier_0888 +pr_001789,acct_0023,user_000689,2026-02-25T11:00:00Z,rejected,82000,supplier_0889 +pr_001790,acct_0024,user_000690,2026-02-26T11:00:00Z,cancelled,83000,supplier_0890 +pr_001791,acct_0025,user_000691,2026-02-27T11:00:00Z,draft,84000,supplier_0891 +pr_001792,acct_0026,user_000692,2026-02-28T11:00:00Z,submitted,85000,supplier_0892 +pr_001793,acct_0027,user_000693,2026-02-01T11:00:00Z,approved,86000,supplier_0893 +pr_001794,acct_0028,user_000694,2026-02-02T11:00:00Z,rejected,87000,supplier_0894 +pr_001795,acct_0029,user_000695,2026-02-03T11:00:00Z,cancelled,88000,supplier_0895 +pr_001796,acct_0030,user_000696,2026-02-04T11:00:00Z,draft,89000,supplier_0896 +pr_001797,acct_0031,user_000697,2026-02-05T11:00:00Z,submitted,90000,supplier_0897 +pr_001798,acct_0032,user_000698,2026-02-06T11:00:00Z,approved,91000,supplier_0898 +pr_001799,acct_0033,user_000699,2026-02-07T11:00:00Z,rejected,92000,supplier_0899 +pr_001800,acct_0034,user_000700,2026-02-08T11:00:00Z,cancelled,93000,supplier_0900 +pr_001801,acct_0035,user_000701,2026-02-09T11:00:00Z,draft,94000,supplier_0001 +pr_001802,acct_0036,user_000702,2026-02-10T11:00:00Z,submitted,95000,supplier_0002 +pr_001803,acct_0037,user_000703,2026-02-11T11:00:00Z,approved,96000,supplier_0003 +pr_001804,acct_0038,user_000704,2026-02-12T11:00:00Z,rejected,97000,supplier_0004 +pr_001805,acct_0039,user_000705,2026-02-13T11:00:00Z,cancelled,98000,supplier_0005 +pr_001806,acct_0040,user_000706,2026-02-14T11:00:00Z,draft,99000,supplier_0006 +pr_001807,acct_0041,user_000707,2026-02-15T11:00:00Z,submitted,100000,supplier_0007 +pr_001808,acct_0042,user_000708,2026-02-16T11:00:00Z,approved,101000,supplier_0008 +pr_001809,acct_0043,user_000709,2026-02-17T11:00:00Z,rejected,102000,supplier_0009 +pr_001810,acct_0044,user_000710,2026-02-18T11:00:00Z,cancelled,103000,supplier_0010 +pr_001811,acct_0045,user_000711,2026-02-19T11:00:00Z,draft,104000,supplier_0011 +pr_001812,acct_0046,user_000712,2026-02-20T11:00:00Z,submitted,105000,supplier_0012 +pr_001813,acct_0047,user_000713,2026-02-21T11:00:00Z,approved,106000,supplier_0013 +pr_001814,acct_0048,user_000714,2026-02-22T11:00:00Z,rejected,107000,supplier_0014 +pr_001815,acct_0049,user_000715,2026-02-23T11:00:00Z,cancelled,108000,supplier_0015 +pr_001816,acct_0050,user_000716,2026-02-24T11:00:00Z,draft,109000,supplier_0016 +pr_001817,acct_0051,user_000717,2026-02-25T11:00:00Z,submitted,110000,supplier_0017 +pr_001818,acct_0052,user_000718,2026-02-26T11:00:00Z,approved,10000,supplier_0018 +pr_001819,acct_0053,user_000719,2026-02-27T11:00:00Z,rejected,11000,supplier_0019 +pr_001820,acct_0054,user_000720,2026-02-28T11:00:00Z,cancelled,12000,supplier_0020 +pr_001821,acct_0055,user_000721,2026-02-01T11:00:00Z,draft,13000,supplier_0021 +pr_001822,acct_0056,user_000722,2026-02-02T11:00:00Z,submitted,14000,supplier_0022 +pr_001823,acct_0057,user_000723,2026-02-03T11:00:00Z,approved,15000,supplier_0023 +pr_001824,acct_0058,user_000724,2026-02-04T11:00:00Z,rejected,16000,supplier_0024 +pr_001825,acct_0059,user_000725,2026-02-05T11:00:00Z,cancelled,17000,supplier_0025 +pr_001826,acct_0060,user_000726,2026-02-06T11:00:00Z,draft,18000,supplier_0026 +pr_001827,acct_0061,user_000727,2026-02-07T11:00:00Z,submitted,19000,supplier_0027 +pr_001828,acct_0062,user_000728,2026-02-08T11:00:00Z,approved,20000,supplier_0028 +pr_001829,acct_0063,user_000729,2026-02-09T11:00:00Z,rejected,21000,supplier_0029 +pr_001830,acct_0064,user_000730,2026-02-10T11:00:00Z,cancelled,22000,supplier_0030 +pr_001831,acct_0065,user_000731,2026-02-11T11:00:00Z,draft,23000,supplier_0031 +pr_001832,acct_0066,user_000732,2026-02-12T11:00:00Z,submitted,24000,supplier_0032 +pr_001833,acct_0067,user_000733,2026-02-13T11:00:00Z,approved,25000,supplier_0033 +pr_001834,acct_0068,user_000734,2026-02-14T11:00:00Z,rejected,26000,supplier_0034 +pr_001835,acct_0069,user_000735,2026-02-15T11:00:00Z,cancelled,27000,supplier_0035 +pr_001836,acct_0070,user_000736,2026-02-16T11:00:00Z,draft,28000,supplier_0036 +pr_001837,acct_0071,user_000737,2026-02-17T11:00:00Z,submitted,29000,supplier_0037 +pr_001838,acct_0072,user_000738,2026-02-18T11:00:00Z,approved,30000,supplier_0038 +pr_001839,acct_0073,user_000739,2026-02-19T11:00:00Z,rejected,31000,supplier_0039 +pr_001840,acct_0074,user_000740,2026-02-20T11:00:00Z,cancelled,32000,supplier_0040 +pr_001841,acct_0075,user_000741,2026-02-21T11:00:00Z,draft,33000,supplier_0041 +pr_001842,acct_0076,user_000742,2026-02-22T11:00:00Z,submitted,34000,supplier_0042 +pr_001843,acct_0077,user_000743,2026-02-23T11:00:00Z,approved,35000,supplier_0043 +pr_001844,acct_0078,user_000744,2026-02-24T11:00:00Z,rejected,36000,supplier_0044 +pr_001845,acct_0079,user_000745,2026-02-25T11:00:00Z,cancelled,37000,supplier_0045 +pr_001846,acct_0080,user_000746,2026-02-26T11:00:00Z,draft,38000,supplier_0046 +pr_001847,acct_0081,user_000747,2026-02-27T11:00:00Z,submitted,39000,supplier_0047 +pr_001848,acct_0082,user_000748,2026-02-28T11:00:00Z,approved,40000,supplier_0048 +pr_001849,acct_0083,user_000749,2026-02-01T11:00:00Z,rejected,41000,supplier_0049 +pr_001850,acct_0084,user_000750,2026-02-02T11:00:00Z,cancelled,42000,supplier_0050 +pr_001851,acct_0085,user_000751,2026-02-03T11:00:00Z,draft,43000,supplier_0051 +pr_001852,acct_0086,user_000752,2026-02-04T11:00:00Z,submitted,44000,supplier_0052 +pr_001853,acct_0087,user_000753,2026-02-05T11:00:00Z,approved,45000,supplier_0053 +pr_001854,acct_0088,user_000754,2026-02-06T11:00:00Z,rejected,46000,supplier_0054 +pr_001855,acct_0089,user_000755,2026-02-07T11:00:00Z,cancelled,47000,supplier_0055 +pr_001856,acct_0090,user_000756,2026-02-08T11:00:00Z,draft,48000,supplier_0056 +pr_001857,acct_0091,user_000757,2026-02-09T11:00:00Z,submitted,49000,supplier_0057 +pr_001858,acct_0092,user_000758,2026-02-10T11:00:00Z,approved,50000,supplier_0058 +pr_001859,acct_0093,user_000759,2026-02-11T11:00:00Z,rejected,51000,supplier_0059 +pr_001860,acct_0094,user_000760,2026-02-12T11:00:00Z,cancelled,52000,supplier_0060 +pr_001861,acct_0095,user_000761,2026-02-13T11:00:00Z,draft,53000,supplier_0061 +pr_001862,acct_0096,user_000762,2026-02-14T11:00:00Z,submitted,54000,supplier_0062 +pr_001863,acct_0097,user_000763,2026-02-15T11:00:00Z,approved,55000,supplier_0063 +pr_001864,acct_0098,user_000764,2026-02-16T11:00:00Z,rejected,56000,supplier_0064 +pr_001865,acct_0099,user_000765,2026-02-17T11:00:00Z,cancelled,57000,supplier_0065 +pr_001866,acct_0100,user_000766,2026-02-18T11:00:00Z,draft,58000,supplier_0066 +pr_001867,acct_0101,user_000767,2026-02-19T11:00:00Z,submitted,59000,supplier_0067 +pr_001868,acct_0102,user_000768,2026-02-20T11:00:00Z,approved,60000,supplier_0068 +pr_001869,acct_0103,user_000769,2026-02-21T11:00:00Z,rejected,61000,supplier_0069 +pr_001870,acct_0104,user_000770,2026-02-22T11:00:00Z,cancelled,62000,supplier_0070 +pr_001871,acct_0105,user_000771,2026-02-23T11:00:00Z,draft,63000,supplier_0071 +pr_001872,acct_0106,user_000772,2026-02-24T11:00:00Z,submitted,64000,supplier_0072 +pr_001873,acct_0107,user_000773,2026-02-25T11:00:00Z,approved,65000,supplier_0073 +pr_001874,acct_0108,user_000774,2026-02-26T11:00:00Z,rejected,66000,supplier_0074 +pr_001875,acct_0109,user_000775,2026-02-27T11:00:00Z,cancelled,67000,supplier_0075 +pr_001876,acct_0110,user_000776,2026-02-28T11:00:00Z,draft,68000,supplier_0076 +pr_001877,acct_0111,user_000777,2026-02-01T11:00:00Z,submitted,69000,supplier_0077 +pr_001878,acct_0112,user_000778,2026-02-02T11:00:00Z,approved,70000,supplier_0078 +pr_001879,acct_0113,user_000779,2026-02-03T11:00:00Z,rejected,71000,supplier_0079 +pr_001880,acct_0114,user_000780,2026-02-04T11:00:00Z,cancelled,72000,supplier_0080 +pr_001881,acct_0115,user_000781,2026-02-05T11:00:00Z,draft,73000,supplier_0081 +pr_001882,acct_0116,user_000782,2026-02-06T11:00:00Z,submitted,74000,supplier_0082 +pr_001883,acct_0117,user_000783,2026-02-07T11:00:00Z,approved,75000,supplier_0083 +pr_001884,acct_0118,user_000784,2026-02-08T11:00:00Z,rejected,76000,supplier_0084 +pr_001885,acct_0119,user_000785,2026-02-09T11:00:00Z,cancelled,77000,supplier_0085 +pr_001886,acct_0120,user_000786,2026-02-10T11:00:00Z,draft,78000,supplier_0086 +pr_001887,acct_0121,user_000787,2026-02-11T11:00:00Z,submitted,79000,supplier_0087 +pr_001888,acct_0122,user_000788,2026-02-12T11:00:00Z,approved,80000,supplier_0088 +pr_001889,acct_0123,user_000789,2026-02-13T11:00:00Z,rejected,81000,supplier_0089 +pr_001890,acct_0124,user_000790,2026-02-14T11:00:00Z,cancelled,82000,supplier_0090 +pr_001891,acct_0125,user_000791,2026-02-15T11:00:00Z,draft,83000,supplier_0091 +pr_001892,acct_0126,user_000792,2026-02-16T11:00:00Z,submitted,84000,supplier_0092 +pr_001893,acct_0127,user_000793,2026-02-17T11:00:00Z,approved,85000,supplier_0093 +pr_001894,acct_0128,user_000794,2026-02-18T11:00:00Z,rejected,86000,supplier_0094 +pr_001895,acct_0129,user_000795,2026-02-19T11:00:00Z,cancelled,87000,supplier_0095 +pr_001896,acct_0130,user_000796,2026-02-20T11:00:00Z,draft,88000,supplier_0096 +pr_001897,acct_0131,user_000797,2026-02-21T11:00:00Z,submitted,89000,supplier_0097 +pr_001898,acct_0132,user_000798,2026-02-22T11:00:00Z,approved,90000,supplier_0098 +pr_001899,acct_0133,user_000799,2026-02-23T11:00:00Z,rejected,91000,supplier_0099 +pr_001900,acct_0134,user_000800,2026-02-24T11:00:00Z,cancelled,92000,supplier_0100 +pr_001901,acct_0135,user_000801,2026-02-25T11:00:00Z,draft,93000,supplier_0101 +pr_001902,acct_0136,user_000802,2026-02-26T11:00:00Z,submitted,94000,supplier_0102 +pr_001903,acct_0137,user_000803,2026-02-27T11:00:00Z,approved,95000,supplier_0103 +pr_001904,acct_0138,user_000804,2026-02-28T11:00:00Z,rejected,96000,supplier_0104 +pr_001905,acct_0139,user_000805,2026-02-01T11:00:00Z,cancelled,97000,supplier_0105 +pr_001906,acct_0140,user_000806,2026-02-02T11:00:00Z,draft,98000,supplier_0106 +pr_001907,acct_0141,user_000807,2026-02-03T11:00:00Z,submitted,99000,supplier_0107 +pr_001908,acct_0142,user_000808,2026-02-04T11:00:00Z,approved,100000,supplier_0108 +pr_001909,acct_0143,user_000809,2026-02-05T11:00:00Z,rejected,101000,supplier_0109 +pr_001910,acct_0144,user_000810,2026-02-06T11:00:00Z,cancelled,102000,supplier_0110 +pr_001911,acct_0145,user_000811,2026-02-07T11:00:00Z,draft,103000,supplier_0111 +pr_001912,acct_0146,user_000812,2026-02-08T11:00:00Z,submitted,104000,supplier_0112 +pr_001913,acct_0147,user_000813,2026-02-09T11:00:00Z,approved,105000,supplier_0113 +pr_001914,acct_0148,user_000814,2026-02-10T11:00:00Z,rejected,106000,supplier_0114 +pr_001915,acct_0149,user_000815,2026-02-11T11:00:00Z,cancelled,107000,supplier_0115 +pr_001916,acct_0150,user_000816,2026-02-12T11:00:00Z,draft,108000,supplier_0116 +pr_001917,acct_0151,user_000817,2026-02-13T11:00:00Z,submitted,109000,supplier_0117 +pr_001918,acct_0152,user_000818,2026-02-14T11:00:00Z,approved,110000,supplier_0118 +pr_001919,acct_0153,user_000819,2026-02-15T11:00:00Z,rejected,10000,supplier_0119 +pr_001920,acct_0154,user_000820,2026-02-16T11:00:00Z,cancelled,11000,supplier_0120 +pr_001921,acct_0155,user_000821,2026-02-17T11:00:00Z,draft,12000,supplier_0121 +pr_001922,acct_0156,user_000822,2026-02-18T11:00:00Z,submitted,13000,supplier_0122 +pr_001923,acct_0157,user_000823,2026-02-19T11:00:00Z,approved,14000,supplier_0123 +pr_001924,acct_0158,user_000824,2026-02-20T11:00:00Z,rejected,15000,supplier_0124 +pr_001925,acct_0159,user_000825,2026-02-21T11:00:00Z,cancelled,16000,supplier_0125 +pr_001926,acct_0160,user_000826,2026-02-22T11:00:00Z,draft,17000,supplier_0126 +pr_001927,acct_0161,user_000827,2026-02-23T11:00:00Z,submitted,18000,supplier_0127 +pr_001928,acct_0162,user_000828,2026-02-24T11:00:00Z,approved,19000,supplier_0128 +pr_001929,acct_0163,user_000829,2026-02-25T11:00:00Z,rejected,20000,supplier_0129 +pr_001930,acct_0164,user_000830,2026-02-26T11:00:00Z,cancelled,21000,supplier_0130 +pr_001931,acct_0165,user_000831,2026-02-27T11:00:00Z,draft,22000,supplier_0131 +pr_001932,acct_0166,user_000832,2026-02-28T11:00:00Z,submitted,23000,supplier_0132 +pr_001933,acct_0167,user_000833,2026-02-01T11:00:00Z,approved,24000,supplier_0133 +pr_001934,acct_0168,user_000834,2026-02-02T11:00:00Z,rejected,25000,supplier_0134 +pr_001935,acct_0169,user_000835,2026-02-03T11:00:00Z,cancelled,26000,supplier_0135 +pr_001936,acct_0170,user_000836,2026-02-04T11:00:00Z,draft,27000,supplier_0136 +pr_001937,acct_0171,user_000837,2026-02-05T11:00:00Z,submitted,28000,supplier_0137 +pr_001938,acct_0172,user_000838,2026-02-06T11:00:00Z,approved,29000,supplier_0138 +pr_001939,acct_0173,user_000839,2026-02-07T11:00:00Z,rejected,30000,supplier_0139 +pr_001940,acct_0174,user_000840,2026-02-08T11:00:00Z,cancelled,31000,supplier_0140 +pr_001941,acct_0175,user_000841,2026-02-09T11:00:00Z,draft,32000,supplier_0141 +pr_001942,acct_0176,user_000842,2026-02-10T11:00:00Z,submitted,33000,supplier_0142 +pr_001943,acct_0177,user_000843,2026-02-11T11:00:00Z,approved,34000,supplier_0143 +pr_001944,acct_0178,user_000844,2026-02-12T11:00:00Z,rejected,35000,supplier_0144 +pr_001945,acct_0179,user_000845,2026-02-13T11:00:00Z,cancelled,36000,supplier_0145 +pr_001946,acct_0180,user_000846,2026-02-14T11:00:00Z,draft,37000,supplier_0146 +pr_001947,acct_0181,user_000847,2026-02-15T11:00:00Z,submitted,38000,supplier_0147 +pr_001948,acct_0182,user_000848,2026-02-16T11:00:00Z,approved,39000,supplier_0148 +pr_001949,acct_0183,user_000849,2026-02-17T11:00:00Z,rejected,40000,supplier_0149 +pr_001950,acct_0184,user_000850,2026-02-18T11:00:00Z,cancelled,41000,supplier_0150 +pr_001951,acct_0185,user_000851,2026-02-19T11:00:00Z,draft,42000,supplier_0151 +pr_001952,acct_0186,user_000852,2026-02-20T11:00:00Z,submitted,43000,supplier_0152 +pr_001953,acct_0187,user_000853,2026-02-21T11:00:00Z,approved,44000,supplier_0153 +pr_001954,acct_0188,user_000854,2026-02-22T11:00:00Z,rejected,45000,supplier_0154 +pr_001955,acct_0189,user_000855,2026-02-23T11:00:00Z,cancelled,46000,supplier_0155 +pr_001956,acct_0190,user_000856,2026-02-24T11:00:00Z,draft,47000,supplier_0156 +pr_001957,acct_0001,user_000857,2026-02-25T11:00:00Z,submitted,48000,supplier_0157 +pr_001958,acct_0002,user_000858,2026-02-26T11:00:00Z,approved,49000,supplier_0158 +pr_001959,acct_0003,user_000859,2026-02-27T11:00:00Z,rejected,50000,supplier_0159 +pr_001960,acct_0004,user_000860,2026-02-28T11:00:00Z,cancelled,51000,supplier_0160 +pr_001961,acct_0005,user_000861,2026-02-01T11:00:00Z,draft,52000,supplier_0161 +pr_001962,acct_0006,user_000862,2026-02-02T11:00:00Z,submitted,53000,supplier_0162 +pr_001963,acct_0007,user_000863,2026-02-03T11:00:00Z,approved,54000,supplier_0163 +pr_001964,acct_0008,user_000864,2026-02-04T11:00:00Z,rejected,55000,supplier_0164 +pr_001965,acct_0009,user_000865,2026-02-05T11:00:00Z,cancelled,56000,supplier_0165 +pr_001966,acct_0010,user_000866,2026-02-06T11:00:00Z,draft,57000,supplier_0166 +pr_001967,acct_0011,user_000867,2026-02-07T11:00:00Z,submitted,58000,supplier_0167 +pr_001968,acct_0012,user_000868,2026-02-08T11:00:00Z,approved,59000,supplier_0168 +pr_001969,acct_0013,user_000869,2026-02-09T11:00:00Z,rejected,60000,supplier_0169 +pr_001970,acct_0014,user_000870,2026-02-10T11:00:00Z,cancelled,61000,supplier_0170 +pr_001971,acct_0015,user_000871,2026-02-11T11:00:00Z,draft,62000,supplier_0171 +pr_001972,acct_0016,user_000872,2026-02-12T11:00:00Z,submitted,63000,supplier_0172 +pr_001973,acct_0017,user_000873,2026-02-13T11:00:00Z,approved,64000,supplier_0173 +pr_001974,acct_0018,user_000874,2026-02-14T11:00:00Z,rejected,65000,supplier_0174 +pr_001975,acct_0019,user_000875,2026-02-15T11:00:00Z,cancelled,66000,supplier_0175 +pr_001976,acct_0020,user_000876,2026-02-16T11:00:00Z,draft,67000,supplier_0176 +pr_001977,acct_0021,user_000877,2026-02-17T11:00:00Z,submitted,68000,supplier_0177 +pr_001978,acct_0022,user_000878,2026-02-18T11:00:00Z,approved,69000,supplier_0178 +pr_001979,acct_0023,user_000879,2026-02-19T11:00:00Z,rejected,70000,supplier_0179 +pr_001980,acct_0024,user_000880,2026-02-20T11:00:00Z,cancelled,71000,supplier_0180 +pr_001981,acct_0025,user_000881,2026-02-21T11:00:00Z,draft,72000,supplier_0181 +pr_001982,acct_0026,user_000882,2026-02-22T11:00:00Z,submitted,73000,supplier_0182 +pr_001983,acct_0027,user_000883,2026-02-23T11:00:00Z,approved,74000,supplier_0183 +pr_001984,acct_0028,user_000884,2026-02-24T11:00:00Z,rejected,75000,supplier_0184 +pr_001985,acct_0029,user_000885,2026-02-25T11:00:00Z,cancelled,76000,supplier_0185 +pr_001986,acct_0030,user_000886,2026-02-26T11:00:00Z,draft,77000,supplier_0186 +pr_001987,acct_0031,user_000887,2026-02-27T11:00:00Z,submitted,78000,supplier_0187 +pr_001988,acct_0032,user_000888,2026-02-28T11:00:00Z,approved,79000,supplier_0188 +pr_001989,acct_0033,user_000889,2026-02-01T11:00:00Z,rejected,80000,supplier_0189 +pr_001990,acct_0034,user_000890,2026-02-02T11:00:00Z,cancelled,81000,supplier_0190 +pr_001991,acct_0035,user_000891,2026-02-03T11:00:00Z,draft,82000,supplier_0191 +pr_001992,acct_0036,user_000892,2026-02-04T11:00:00Z,submitted,83000,supplier_0192 +pr_001993,acct_0037,user_000893,2026-02-05T11:00:00Z,approved,84000,supplier_0193 +pr_001994,acct_0038,user_000894,2026-02-06T11:00:00Z,rejected,85000,supplier_0194 +pr_001995,acct_0039,user_000895,2026-02-07T11:00:00Z,cancelled,86000,supplier_0195 +pr_001996,acct_0040,user_000896,2026-02-08T11:00:00Z,draft,87000,supplier_0196 +pr_001997,acct_0041,user_000897,2026-02-09T11:00:00Z,submitted,88000,supplier_0197 +pr_001998,acct_0042,user_000898,2026-02-10T11:00:00Z,approved,89000,supplier_0198 +pr_001999,acct_0043,user_000899,2026-02-11T11:00:00Z,rejected,90000,supplier_0199 +pr_002000,acct_0044,user_000900,2026-02-12T11:00:00Z,cancelled,91000,supplier_0200 +pr_002001,acct_0045,user_000901,2026-02-13T11:00:00Z,draft,92000,supplier_0201 +pr_002002,acct_0046,user_000902,2026-02-14T11:00:00Z,submitted,93000,supplier_0202 +pr_002003,acct_0047,user_000903,2026-02-15T11:00:00Z,approved,94000,supplier_0203 +pr_002004,acct_0048,user_000904,2026-02-16T11:00:00Z,rejected,95000,supplier_0204 +pr_002005,acct_0049,user_000905,2026-02-17T11:00:00Z,cancelled,96000,supplier_0205 +pr_002006,acct_0050,user_000906,2026-02-18T11:00:00Z,draft,97000,supplier_0206 +pr_002007,acct_0051,user_000907,2026-02-19T11:00:00Z,submitted,98000,supplier_0207 +pr_002008,acct_0052,user_000908,2026-02-20T11:00:00Z,approved,99000,supplier_0208 +pr_002009,acct_0053,user_000909,2026-02-21T11:00:00Z,rejected,100000,supplier_0209 +pr_002010,acct_0054,user_000910,2026-02-22T11:00:00Z,cancelled,101000,supplier_0210 +pr_002011,acct_0055,user_000911,2026-02-23T11:00:00Z,draft,102000,supplier_0211 +pr_002012,acct_0056,user_000912,2026-02-24T11:00:00Z,submitted,103000,supplier_0212 +pr_002013,acct_0057,user_000913,2026-02-25T11:00:00Z,approved,104000,supplier_0213 +pr_002014,acct_0058,user_000914,2026-02-26T11:00:00Z,rejected,105000,supplier_0214 +pr_002015,acct_0059,user_000915,2026-02-27T11:00:00Z,cancelled,106000,supplier_0215 +pr_002016,acct_0060,user_000916,2026-02-28T11:00:00Z,draft,107000,supplier_0216 +pr_002017,acct_0061,user_000917,2026-02-01T11:00:00Z,submitted,108000,supplier_0217 +pr_002018,acct_0062,user_000918,2026-02-02T11:00:00Z,approved,109000,supplier_0218 +pr_002019,acct_0063,user_000919,2026-02-03T11:00:00Z,rejected,110000,supplier_0219 +pr_002020,acct_0064,user_000920,2026-02-04T11:00:00Z,cancelled,10000,supplier_0220 +pr_002021,acct_0065,user_000921,2026-02-05T11:00:00Z,draft,11000,supplier_0221 +pr_002022,acct_0066,user_000922,2026-02-06T11:00:00Z,submitted,12000,supplier_0222 +pr_002023,acct_0067,user_000923,2026-02-07T11:00:00Z,approved,13000,supplier_0223 +pr_002024,acct_0068,user_000924,2026-02-08T11:00:00Z,rejected,14000,supplier_0224 +pr_002025,acct_0069,user_000925,2026-02-09T11:00:00Z,cancelled,15000,supplier_0225 +pr_002026,acct_0070,user_000926,2026-02-10T11:00:00Z,draft,16000,supplier_0226 +pr_002027,acct_0071,user_000927,2026-02-11T11:00:00Z,submitted,17000,supplier_0227 +pr_002028,acct_0072,user_000928,2026-02-12T11:00:00Z,approved,18000,supplier_0228 +pr_002029,acct_0073,user_000929,2026-02-13T11:00:00Z,rejected,19000,supplier_0229 +pr_002030,acct_0074,user_000930,2026-02-14T11:00:00Z,cancelled,20000,supplier_0230 +pr_002031,acct_0075,user_000931,2026-02-15T11:00:00Z,draft,21000,supplier_0231 +pr_002032,acct_0076,user_000932,2026-02-16T11:00:00Z,submitted,22000,supplier_0232 +pr_002033,acct_0077,user_000933,2026-02-17T11:00:00Z,approved,23000,supplier_0233 +pr_002034,acct_0078,user_000934,2026-02-18T11:00:00Z,rejected,24000,supplier_0234 +pr_002035,acct_0079,user_000935,2026-02-19T11:00:00Z,cancelled,25000,supplier_0235 +pr_002036,acct_0080,user_000936,2026-02-20T11:00:00Z,draft,26000,supplier_0236 +pr_002037,acct_0081,user_000937,2026-02-21T11:00:00Z,submitted,27000,supplier_0237 +pr_002038,acct_0082,user_000938,2026-02-22T11:00:00Z,approved,28000,supplier_0238 +pr_002039,acct_0083,user_000939,2026-02-23T11:00:00Z,rejected,29000,supplier_0239 +pr_002040,acct_0084,user_000940,2026-02-24T11:00:00Z,cancelled,30000,supplier_0240 +pr_002041,acct_0085,user_000941,2026-02-25T11:00:00Z,draft,31000,supplier_0241 +pr_002042,acct_0086,user_000942,2026-02-26T11:00:00Z,submitted,32000,supplier_0242 +pr_002043,acct_0087,user_000943,2026-02-27T11:00:00Z,approved,33000,supplier_0243 +pr_002044,acct_0088,user_000944,2026-02-28T11:00:00Z,rejected,34000,supplier_0244 +pr_002045,acct_0089,user_000945,2026-02-01T11:00:00Z,cancelled,35000,supplier_0245 +pr_002046,acct_0090,user_000946,2026-02-02T11:00:00Z,draft,36000,supplier_0246 +pr_002047,acct_0091,user_000947,2026-02-03T11:00:00Z,submitted,37000,supplier_0247 +pr_002048,acct_0092,user_000948,2026-02-04T11:00:00Z,approved,38000,supplier_0248 +pr_002049,acct_0093,user_000949,2026-02-05T11:00:00Z,rejected,39000,supplier_0249 +pr_002050,acct_0094,user_000950,2026-02-06T11:00:00Z,cancelled,40000,supplier_0250 +pr_002051,acct_0095,user_000951,2026-02-07T11:00:00Z,draft,41000,supplier_0251 +pr_002052,acct_0096,user_000952,2026-02-08T11:00:00Z,submitted,42000,supplier_0252 +pr_002053,acct_0097,user_000953,2026-02-09T11:00:00Z,approved,43000,supplier_0253 +pr_002054,acct_0098,user_000954,2026-02-10T11:00:00Z,rejected,44000,supplier_0254 +pr_002055,acct_0099,user_000955,2026-02-11T11:00:00Z,cancelled,45000,supplier_0255 +pr_002056,acct_0100,user_000956,2026-02-12T11:00:00Z,draft,46000,supplier_0256 +pr_002057,acct_0101,user_000957,2026-02-13T11:00:00Z,submitted,47000,supplier_0257 +pr_002058,acct_0102,user_000958,2026-02-14T11:00:00Z,approved,48000,supplier_0258 +pr_002059,acct_0103,user_000959,2026-02-15T11:00:00Z,rejected,49000,supplier_0259 +pr_002060,acct_0104,user_000960,2026-02-16T11:00:00Z,cancelled,50000,supplier_0260 +pr_002061,acct_0105,user_000961,2026-02-17T11:00:00Z,draft,51000,supplier_0261 +pr_002062,acct_0106,user_000962,2026-02-18T11:00:00Z,submitted,52000,supplier_0262 +pr_002063,acct_0107,user_000963,2026-02-19T11:00:00Z,approved,53000,supplier_0263 +pr_002064,acct_0108,user_000964,2026-02-20T11:00:00Z,rejected,54000,supplier_0264 +pr_002065,acct_0109,user_000965,2026-02-21T11:00:00Z,cancelled,55000,supplier_0265 +pr_002066,acct_0110,user_000966,2026-02-22T11:00:00Z,draft,56000,supplier_0266 +pr_002067,acct_0111,user_000967,2026-02-23T11:00:00Z,submitted,57000,supplier_0267 +pr_002068,acct_0112,user_000968,2026-02-24T11:00:00Z,approved,58000,supplier_0268 +pr_002069,acct_0113,user_000969,2026-02-25T11:00:00Z,rejected,59000,supplier_0269 +pr_002070,acct_0114,user_000970,2026-02-26T11:00:00Z,cancelled,60000,supplier_0270 +pr_002071,acct_0115,user_000971,2026-02-27T11:00:00Z,draft,61000,supplier_0271 +pr_002072,acct_0116,user_000972,2026-02-28T11:00:00Z,submitted,62000,supplier_0272 +pr_002073,acct_0117,user_000973,2026-02-01T11:00:00Z,approved,63000,supplier_0273 +pr_002074,acct_0118,user_000974,2026-02-02T11:00:00Z,rejected,64000,supplier_0274 +pr_002075,acct_0119,user_000975,2026-02-03T11:00:00Z,cancelled,65000,supplier_0275 +pr_002076,acct_0120,user_000976,2026-02-04T11:00:00Z,draft,66000,supplier_0276 +pr_002077,acct_0121,user_000977,2026-02-05T11:00:00Z,submitted,67000,supplier_0277 +pr_002078,acct_0122,user_000978,2026-02-06T11:00:00Z,approved,68000,supplier_0278 +pr_002079,acct_0123,user_000979,2026-02-07T11:00:00Z,rejected,69000,supplier_0279 +pr_002080,acct_0124,user_000980,2026-02-08T11:00:00Z,cancelled,70000,supplier_0280 +pr_002081,acct_0125,user_000981,2026-02-09T11:00:00Z,draft,71000,supplier_0281 +pr_002082,acct_0126,user_000982,2026-02-10T11:00:00Z,submitted,72000,supplier_0282 +pr_002083,acct_0127,user_000983,2026-02-11T11:00:00Z,approved,73000,supplier_0283 +pr_002084,acct_0128,user_000984,2026-02-12T11:00:00Z,rejected,74000,supplier_0284 +pr_002085,acct_0129,user_000985,2026-02-13T11:00:00Z,cancelled,75000,supplier_0285 +pr_002086,acct_0130,user_000986,2026-02-14T11:00:00Z,draft,76000,supplier_0286 +pr_002087,acct_0131,user_000987,2026-02-15T11:00:00Z,submitted,77000,supplier_0287 +pr_002088,acct_0132,user_000988,2026-02-16T11:00:00Z,approved,78000,supplier_0288 +pr_002089,acct_0133,user_000989,2026-02-17T11:00:00Z,rejected,79000,supplier_0289 +pr_002090,acct_0134,user_000990,2026-02-18T11:00:00Z,cancelled,80000,supplier_0290 +pr_002091,acct_0135,user_000991,2026-02-19T11:00:00Z,draft,81000,supplier_0291 +pr_002092,acct_0136,user_000992,2026-02-20T11:00:00Z,submitted,82000,supplier_0292 +pr_002093,acct_0137,user_000993,2026-02-21T11:00:00Z,approved,83000,supplier_0293 +pr_002094,acct_0138,user_000994,2026-02-22T11:00:00Z,rejected,84000,supplier_0294 +pr_002095,acct_0139,user_000995,2026-02-23T11:00:00Z,cancelled,85000,supplier_0295 +pr_002096,acct_0140,user_000996,2026-02-24T11:00:00Z,draft,86000,supplier_0296 +pr_002097,acct_0141,user_000997,2026-02-25T11:00:00Z,submitted,87000,supplier_0297 +pr_002098,acct_0142,user_000998,2026-02-26T11:00:00Z,approved,88000,supplier_0298 +pr_002099,acct_0143,user_000999,2026-02-27T11:00:00Z,rejected,89000,supplier_0299 +pr_002100,acct_0144,user_001000,2026-02-28T11:00:00Z,cancelled,90000,supplier_0300 +pr_002101,acct_0145,user_001001,2026-02-01T11:00:00Z,draft,91000,supplier_0301 +pr_002102,acct_0146,user_001002,2026-02-02T11:00:00Z,submitted,92000,supplier_0302 +pr_002103,acct_0147,user_001003,2026-02-03T11:00:00Z,approved,93000,supplier_0303 +pr_002104,acct_0148,user_001004,2026-02-04T11:00:00Z,rejected,94000,supplier_0304 +pr_002105,acct_0149,user_001005,2026-02-05T11:00:00Z,cancelled,95000,supplier_0305 +pr_002106,acct_0150,user_001006,2026-02-06T11:00:00Z,draft,96000,supplier_0306 +pr_002107,acct_0151,user_001007,2026-02-07T11:00:00Z,submitted,97000,supplier_0307 +pr_002108,acct_0152,user_001008,2026-02-08T11:00:00Z,approved,98000,supplier_0308 +pr_002109,acct_0153,user_001009,2026-02-09T11:00:00Z,rejected,99000,supplier_0309 +pr_002110,acct_0154,user_001010,2026-02-10T11:00:00Z,cancelled,100000,supplier_0310 +pr_002111,acct_0155,user_001011,2026-02-11T11:00:00Z,draft,101000,supplier_0311 +pr_002112,acct_0156,user_001012,2026-02-12T11:00:00Z,submitted,102000,supplier_0312 +pr_002113,acct_0157,user_001013,2026-02-13T11:00:00Z,approved,103000,supplier_0313 +pr_002114,acct_0158,user_001014,2026-02-14T11:00:00Z,rejected,104000,supplier_0314 +pr_002115,acct_0159,user_001015,2026-02-15T11:00:00Z,cancelled,105000,supplier_0315 +pr_002116,acct_0160,user_001016,2026-02-16T11:00:00Z,draft,106000,supplier_0316 +pr_002117,acct_0161,user_001017,2026-02-17T11:00:00Z,submitted,107000,supplier_0317 +pr_002118,acct_0162,user_001018,2026-02-18T11:00:00Z,approved,108000,supplier_0318 +pr_002119,acct_0163,user_001019,2026-02-19T11:00:00Z,rejected,109000,supplier_0319 +pr_002120,acct_0164,user_001020,2026-02-20T11:00:00Z,cancelled,110000,supplier_0320 +pr_002121,acct_0165,user_001021,2026-02-21T11:00:00Z,draft,10000,supplier_0321 +pr_002122,acct_0166,user_001022,2026-02-22T11:00:00Z,submitted,11000,supplier_0322 +pr_002123,acct_0167,user_001023,2026-02-23T11:00:00Z,approved,12000,supplier_0323 +pr_002124,acct_0168,user_001024,2026-02-24T11:00:00Z,rejected,13000,supplier_0324 +pr_002125,acct_0169,user_001025,2026-02-25T11:00:00Z,cancelled,14000,supplier_0325 +pr_002126,acct_0170,user_001026,2026-02-26T11:00:00Z,draft,15000,supplier_0326 +pr_002127,acct_0171,user_001027,2026-02-27T11:00:00Z,submitted,16000,supplier_0327 +pr_002128,acct_0172,user_001028,2026-02-28T11:00:00Z,approved,17000,supplier_0328 +pr_002129,acct_0173,user_001029,2026-02-01T11:00:00Z,rejected,18000,supplier_0329 +pr_002130,acct_0174,user_001030,2026-02-02T11:00:00Z,cancelled,19000,supplier_0330 +pr_002131,acct_0175,user_001031,2026-02-03T11:00:00Z,draft,20000,supplier_0331 +pr_002132,acct_0176,user_001032,2026-02-04T11:00:00Z,submitted,21000,supplier_0332 +pr_002133,acct_0177,user_001033,2026-02-05T11:00:00Z,approved,22000,supplier_0333 +pr_002134,acct_0178,user_001034,2026-02-06T11:00:00Z,rejected,23000,supplier_0334 +pr_002135,acct_0179,user_001035,2026-02-07T11:00:00Z,cancelled,24000,supplier_0335 +pr_002136,acct_0180,user_001036,2026-02-08T11:00:00Z,draft,25000,supplier_0336 +pr_002137,acct_0181,user_001037,2026-02-09T11:00:00Z,submitted,26000,supplier_0337 +pr_002138,acct_0182,user_001038,2026-02-10T11:00:00Z,approved,27000,supplier_0338 +pr_002139,acct_0183,user_001039,2026-02-11T11:00:00Z,rejected,28000,supplier_0339 +pr_002140,acct_0184,user_001040,2026-02-12T11:00:00Z,cancelled,29000,supplier_0340 +pr_002141,acct_0185,user_001041,2026-02-13T11:00:00Z,draft,30000,supplier_0341 +pr_002142,acct_0186,user_001042,2026-02-14T11:00:00Z,submitted,31000,supplier_0342 +pr_002143,acct_0187,user_001043,2026-02-15T11:00:00Z,approved,32000,supplier_0343 +pr_002144,acct_0188,user_001044,2026-02-16T11:00:00Z,rejected,33000,supplier_0344 +pr_002145,acct_0189,user_001045,2026-02-17T11:00:00Z,cancelled,34000,supplier_0345 +pr_002146,acct_0190,user_001046,2026-02-18T11:00:00Z,draft,35000,supplier_0346 +pr_002147,acct_0001,user_001047,2026-02-19T11:00:00Z,submitted,36000,supplier_0347 +pr_002148,acct_0002,user_001048,2026-02-20T11:00:00Z,approved,37000,supplier_0348 +pr_002149,acct_0003,user_001049,2026-02-21T11:00:00Z,rejected,38000,supplier_0349 +pr_002150,acct_0004,user_001050,2026-02-22T11:00:00Z,cancelled,39000,supplier_0350 +pr_002151,acct_0005,user_001051,2026-02-23T11:00:00Z,draft,40000,supplier_0351 +pr_002152,acct_0006,user_001052,2026-02-24T11:00:00Z,submitted,41000,supplier_0352 +pr_002153,acct_0007,user_001053,2026-02-25T11:00:00Z,approved,42000,supplier_0353 +pr_002154,acct_0008,user_001054,2026-02-26T11:00:00Z,rejected,43000,supplier_0354 +pr_002155,acct_0009,user_001055,2026-02-27T11:00:00Z,cancelled,44000,supplier_0355 +pr_002156,acct_0010,user_001056,2026-02-28T11:00:00Z,draft,45000,supplier_0356 +pr_002157,acct_0011,user_001057,2026-02-01T11:00:00Z,submitted,46000,supplier_0357 +pr_002158,acct_0012,user_001058,2026-02-02T11:00:00Z,approved,47000,supplier_0358 +pr_002159,acct_0013,user_001059,2026-02-03T11:00:00Z,rejected,48000,supplier_0359 +pr_002160,acct_0014,user_001060,2026-02-04T11:00:00Z,cancelled,49000,supplier_0360 +pr_002161,acct_0015,user_001061,2026-02-05T11:00:00Z,draft,50000,supplier_0361 +pr_002162,acct_0016,user_001062,2026-02-06T11:00:00Z,submitted,51000,supplier_0362 +pr_002163,acct_0017,user_001063,2026-02-07T11:00:00Z,approved,52000,supplier_0363 +pr_002164,acct_0018,user_001064,2026-02-08T11:00:00Z,rejected,53000,supplier_0364 +pr_002165,acct_0019,user_001065,2026-02-09T11:00:00Z,cancelled,54000,supplier_0365 +pr_002166,acct_0020,user_001066,2026-02-10T11:00:00Z,draft,55000,supplier_0366 +pr_002167,acct_0021,user_001067,2026-02-11T11:00:00Z,submitted,56000,supplier_0367 +pr_002168,acct_0022,user_001068,2026-02-12T11:00:00Z,approved,57000,supplier_0368 +pr_002169,acct_0023,user_001069,2026-02-13T11:00:00Z,rejected,58000,supplier_0369 +pr_002170,acct_0024,user_001070,2026-02-14T11:00:00Z,cancelled,59000,supplier_0370 +pr_002171,acct_0025,user_001071,2026-02-15T11:00:00Z,draft,60000,supplier_0371 +pr_002172,acct_0026,user_001072,2026-02-16T11:00:00Z,submitted,61000,supplier_0372 +pr_002173,acct_0027,user_001073,2026-02-17T11:00:00Z,approved,62000,supplier_0373 +pr_002174,acct_0028,user_001074,2026-02-18T11:00:00Z,rejected,63000,supplier_0374 +pr_002175,acct_0029,user_001075,2026-02-19T11:00:00Z,cancelled,64000,supplier_0375 +pr_002176,acct_0030,user_001076,2026-02-20T11:00:00Z,draft,65000,supplier_0376 +pr_002177,acct_0031,user_001077,2026-02-21T11:00:00Z,submitted,66000,supplier_0377 +pr_002178,acct_0032,user_001078,2026-02-22T11:00:00Z,approved,67000,supplier_0378 +pr_002179,acct_0033,user_001079,2026-02-23T11:00:00Z,rejected,68000,supplier_0379 +pr_002180,acct_0034,user_001080,2026-02-24T11:00:00Z,cancelled,69000,supplier_0380 +pr_002181,acct_0035,user_001081,2026-02-25T11:00:00Z,draft,70000,supplier_0381 +pr_002182,acct_0036,user_001082,2026-02-26T11:00:00Z,submitted,71000,supplier_0382 +pr_002183,acct_0037,user_001083,2026-02-27T11:00:00Z,approved,72000,supplier_0383 +pr_002184,acct_0038,user_001084,2026-02-28T11:00:00Z,rejected,73000,supplier_0384 +pr_002185,acct_0039,user_001085,2026-02-01T11:00:00Z,cancelled,74000,supplier_0385 +pr_002186,acct_0040,user_001086,2026-02-02T11:00:00Z,draft,75000,supplier_0386 +pr_002187,acct_0041,user_001087,2026-02-03T11:00:00Z,submitted,76000,supplier_0387 +pr_002188,acct_0042,user_001088,2026-02-04T11:00:00Z,approved,77000,supplier_0388 +pr_002189,acct_0043,user_001089,2026-02-05T11:00:00Z,rejected,78000,supplier_0389 +pr_002190,acct_0044,user_001090,2026-02-06T11:00:00Z,cancelled,79000,supplier_0390 +pr_002191,acct_0045,user_001091,2026-02-07T11:00:00Z,draft,80000,supplier_0391 +pr_002192,acct_0046,user_001092,2026-02-08T11:00:00Z,submitted,81000,supplier_0392 +pr_002193,acct_0047,user_001093,2026-02-09T11:00:00Z,approved,82000,supplier_0393 +pr_002194,acct_0048,user_001094,2026-02-10T11:00:00Z,rejected,83000,supplier_0394 +pr_002195,acct_0049,user_001095,2026-02-11T11:00:00Z,cancelled,84000,supplier_0395 +pr_002196,acct_0050,user_001096,2026-02-12T11:00:00Z,draft,85000,supplier_0396 +pr_002197,acct_0051,user_001097,2026-02-13T11:00:00Z,submitted,86000,supplier_0397 +pr_002198,acct_0052,user_001098,2026-02-14T11:00:00Z,approved,87000,supplier_0398 +pr_002199,acct_0053,user_001099,2026-02-15T11:00:00Z,rejected,88000,supplier_0399 +pr_002200,acct_0054,user_001100,2026-02-16T11:00:00Z,cancelled,89000,supplier_0400 +pr_002201,acct_0010,user_000001,2026-02-17T11:00:00Z,draft,90000,supplier_0401 +pr_002202,acct_0011,user_000002,2026-02-18T11:00:00Z,submitted,91000,supplier_0402 +pr_002203,acct_0012,user_000003,2026-02-19T11:00:00Z,approved,92000,supplier_0403 +pr_002204,acct_0013,user_000004,2026-02-20T11:00:00Z,rejected,93000,supplier_0404 +pr_002205,acct_0014,user_000005,2026-02-21T11:00:00Z,cancelled,94000,supplier_0405 +pr_002206,acct_0015,user_000006,2026-02-22T11:00:00Z,draft,95000,supplier_0406 +pr_002207,acct_0016,user_000007,2026-02-23T11:00:00Z,submitted,96000,supplier_0407 +pr_002208,acct_0017,user_000008,2026-02-24T11:00:00Z,approved,97000,supplier_0408 +pr_002209,acct_0018,user_000009,2026-02-25T11:00:00Z,rejected,98000,supplier_0409 +pr_002210,acct_0019,user_000010,2026-02-26T11:00:00Z,cancelled,99000,supplier_0410 +pr_002211,acct_0020,user_000011,2026-02-27T11:00:00Z,draft,100000,supplier_0411 +pr_002212,acct_0021,user_000012,2026-02-28T11:00:00Z,submitted,101000,supplier_0412 +pr_002213,acct_0022,user_000013,2026-02-01T11:00:00Z,approved,102000,supplier_0413 +pr_002214,acct_0023,user_000014,2026-02-02T11:00:00Z,rejected,103000,supplier_0414 +pr_002215,acct_0024,user_000015,2026-02-03T11:00:00Z,cancelled,104000,supplier_0415 +pr_002216,acct_0025,user_000016,2026-02-04T11:00:00Z,draft,105000,supplier_0416 +pr_002217,acct_0026,user_000017,2026-02-05T11:00:00Z,submitted,106000,supplier_0417 +pr_002218,acct_0027,user_000018,2026-02-06T11:00:00Z,approved,107000,supplier_0418 +pr_002219,acct_0028,user_000019,2026-02-07T11:00:00Z,rejected,108000,supplier_0419 +pr_002220,acct_0029,user_000020,2026-02-08T11:00:00Z,cancelled,109000,supplier_0420 +pr_002221,acct_0030,user_000021,2026-02-09T11:00:00Z,draft,110000,supplier_0421 +pr_002222,acct_0031,user_000022,2026-02-10T11:00:00Z,submitted,10000,supplier_0422 +pr_002223,acct_0032,user_000023,2026-02-11T11:00:00Z,approved,11000,supplier_0423 +pr_002224,acct_0033,user_000024,2026-02-12T11:00:00Z,rejected,12000,supplier_0424 +pr_002225,acct_0034,user_000025,2026-02-13T11:00:00Z,cancelled,13000,supplier_0425 +pr_002226,acct_0035,user_000026,2026-02-14T11:00:00Z,draft,14000,supplier_0426 +pr_002227,acct_0036,user_000027,2026-02-15T11:00:00Z,submitted,15000,supplier_0427 +pr_002228,acct_0037,user_000028,2026-02-16T11:00:00Z,approved,16000,supplier_0428 +pr_002229,acct_0038,user_000029,2026-02-17T11:00:00Z,rejected,17000,supplier_0429 +pr_002230,acct_0039,user_000030,2026-02-18T11:00:00Z,cancelled,18000,supplier_0430 +pr_002231,acct_0040,user_000031,2026-02-19T11:00:00Z,draft,19000,supplier_0431 +pr_002232,acct_0041,user_000032,2026-02-20T11:00:00Z,submitted,20000,supplier_0432 +pr_002233,acct_0042,user_000033,2026-02-21T11:00:00Z,approved,21000,supplier_0433 +pr_002234,acct_0043,user_000034,2026-02-22T11:00:00Z,rejected,22000,supplier_0434 +pr_002235,acct_0044,user_000035,2026-02-23T11:00:00Z,cancelled,23000,supplier_0435 +pr_002236,acct_0045,user_000036,2026-02-24T11:00:00Z,draft,24000,supplier_0436 +pr_002237,acct_0046,user_000037,2026-02-25T11:00:00Z,submitted,25000,supplier_0437 +pr_002238,acct_0047,user_000038,2026-02-26T11:00:00Z,approved,26000,supplier_0438 +pr_002239,acct_0048,user_000039,2026-02-27T11:00:00Z,rejected,27000,supplier_0439 +pr_002240,acct_0049,user_000040,2026-02-28T11:00:00Z,cancelled,28000,supplier_0440 +pr_002241,acct_0050,user_000041,2026-02-01T11:00:00Z,draft,29000,supplier_0441 +pr_002242,acct_0010,user_000042,2026-02-02T11:00:00Z,submitted,30000,supplier_0442 +pr_002243,acct_0011,user_000043,2026-02-03T11:00:00Z,approved,31000,supplier_0443 +pr_002244,acct_0012,user_000044,2026-02-04T11:00:00Z,rejected,32000,supplier_0444 +pr_002245,acct_0013,user_000045,2026-02-05T11:00:00Z,cancelled,33000,supplier_0445 +pr_002246,acct_0014,user_000046,2026-02-06T11:00:00Z,draft,34000,supplier_0446 +pr_002247,acct_0015,user_000047,2026-02-07T11:00:00Z,submitted,35000,supplier_0447 +pr_002248,acct_0016,user_000048,2026-02-08T11:00:00Z,approved,36000,supplier_0448 +pr_002249,acct_0017,user_000049,2026-02-09T11:00:00Z,rejected,37000,supplier_0449 +pr_002250,acct_0018,user_000050,2026-02-10T11:00:00Z,cancelled,38000,supplier_0450 +pr_002251,acct_0019,user_000051,2026-02-11T11:00:00Z,draft,39000,supplier_0451 +pr_002252,acct_0020,user_000052,2026-02-12T11:00:00Z,submitted,40000,supplier_0452 +pr_002253,acct_0021,user_000053,2026-02-13T11:00:00Z,approved,41000,supplier_0453 +pr_002254,acct_0022,user_000054,2026-02-14T11:00:00Z,rejected,42000,supplier_0454 +pr_002255,acct_0023,user_000055,2026-02-15T11:00:00Z,cancelled,43000,supplier_0455 +pr_002256,acct_0024,user_000056,2026-02-16T11:00:00Z,draft,44000,supplier_0456 +pr_002257,acct_0025,user_000057,2026-02-17T11:00:00Z,submitted,45000,supplier_0457 +pr_002258,acct_0026,user_000058,2026-02-18T11:00:00Z,approved,46000,supplier_0458 +pr_002259,acct_0027,user_000059,2026-02-19T11:00:00Z,rejected,47000,supplier_0459 +pr_002260,acct_0028,user_000060,2026-02-20T11:00:00Z,cancelled,48000,supplier_0460 +pr_002261,acct_0029,user_000061,2026-02-21T11:00:00Z,draft,49000,supplier_0461 +pr_002262,acct_0030,user_000062,2026-02-22T11:00:00Z,submitted,50000,supplier_0462 +pr_002263,acct_0031,user_000063,2026-02-23T11:00:00Z,approved,51000,supplier_0463 +pr_002264,acct_0032,user_000064,2026-02-24T11:00:00Z,rejected,52000,supplier_0464 +pr_002265,acct_0033,user_000065,2026-02-25T11:00:00Z,cancelled,53000,supplier_0465 +pr_002266,acct_0034,user_000066,2026-02-26T11:00:00Z,draft,54000,supplier_0466 +pr_002267,acct_0035,user_000067,2026-02-27T11:00:00Z,submitted,55000,supplier_0467 +pr_002268,acct_0036,user_000068,2026-02-28T11:00:00Z,approved,56000,supplier_0468 +pr_002269,acct_0037,user_000069,2026-02-01T11:00:00Z,rejected,57000,supplier_0469 +pr_002270,acct_0038,user_000070,2026-02-02T11:00:00Z,cancelled,58000,supplier_0470 +pr_002271,acct_0039,user_000071,2026-02-03T11:00:00Z,draft,59000,supplier_0471 +pr_002272,acct_0040,user_000072,2026-02-04T11:00:00Z,submitted,60000,supplier_0472 +pr_002273,acct_0041,user_000073,2026-02-05T11:00:00Z,approved,61000,supplier_0473 +pr_002274,acct_0042,user_000074,2026-02-06T11:00:00Z,rejected,62000,supplier_0474 +pr_002275,acct_0043,user_000075,2026-02-07T11:00:00Z,cancelled,63000,supplier_0475 +pr_002276,acct_0044,user_000076,2026-02-08T11:00:00Z,draft,64000,supplier_0476 +pr_002277,acct_0045,user_000077,2026-02-09T11:00:00Z,submitted,65000,supplier_0477 +pr_002278,acct_0046,user_000078,2026-02-10T11:00:00Z,approved,66000,supplier_0478 +pr_002279,acct_0047,user_000079,2026-02-11T11:00:00Z,rejected,67000,supplier_0479 +pr_002280,acct_0048,user_000080,2026-02-12T11:00:00Z,cancelled,68000,supplier_0480 +pr_002281,acct_0049,user_000081,2026-02-13T11:00:00Z,draft,69000,supplier_0481 +pr_002282,acct_0050,user_000082,2026-02-14T11:00:00Z,submitted,70000,supplier_0482 +pr_002283,acct_0010,user_000083,2026-02-15T11:00:00Z,approved,71000,supplier_0483 +pr_002284,acct_0011,user_000084,2026-02-16T11:00:00Z,rejected,72000,supplier_0484 +pr_002285,acct_0012,user_000085,2026-02-17T11:00:00Z,cancelled,73000,supplier_0485 +pr_002286,acct_0013,user_000086,2026-02-18T11:00:00Z,draft,74000,supplier_0486 +pr_002287,acct_0014,user_000087,2026-02-19T11:00:00Z,submitted,75000,supplier_0487 +pr_002288,acct_0015,user_000088,2026-02-20T11:00:00Z,approved,76000,supplier_0488 +pr_002289,acct_0016,user_000089,2026-02-21T11:00:00Z,rejected,77000,supplier_0489 +pr_002290,acct_0017,user_000090,2026-02-22T11:00:00Z,cancelled,78000,supplier_0490 +pr_002291,acct_0018,user_000091,2026-02-23T11:00:00Z,draft,79000,supplier_0491 +pr_002292,acct_0019,user_000092,2026-02-24T11:00:00Z,submitted,80000,supplier_0492 +pr_002293,acct_0020,user_000093,2026-02-25T11:00:00Z,approved,81000,supplier_0493 +pr_002294,acct_0021,user_000094,2026-02-26T11:00:00Z,rejected,82000,supplier_0494 +pr_002295,acct_0022,user_000095,2026-02-27T11:00:00Z,cancelled,83000,supplier_0495 +pr_002296,acct_0023,user_000096,2026-02-28T11:00:00Z,draft,84000,supplier_0496 +pr_002297,acct_0024,user_000097,2026-02-01T11:00:00Z,submitted,85000,supplier_0497 +pr_002298,acct_0025,user_000098,2026-02-02T11:00:00Z,approved,86000,supplier_0498 +pr_002299,acct_0026,user_000099,2026-02-03T11:00:00Z,rejected,87000,supplier_0499 +pr_002300,acct_0027,user_000100,2026-02-04T11:00:00Z,cancelled,88000,supplier_0500 +pr_002301,acct_0028,user_000101,2026-02-05T11:00:00Z,draft,89000,supplier_0501 +pr_002302,acct_0029,user_000102,2026-02-06T11:00:00Z,submitted,90000,supplier_0502 +pr_002303,acct_0030,user_000103,2026-02-07T11:00:00Z,approved,91000,supplier_0503 +pr_002304,acct_0031,user_000104,2026-02-08T11:00:00Z,rejected,92000,supplier_0504 +pr_002305,acct_0032,user_000105,2026-02-09T11:00:00Z,cancelled,93000,supplier_0505 +pr_002306,acct_0033,user_000106,2026-02-10T11:00:00Z,draft,94000,supplier_0506 +pr_002307,acct_0034,user_000107,2026-02-11T11:00:00Z,submitted,95000,supplier_0507 +pr_002308,acct_0035,user_000108,2026-02-12T11:00:00Z,approved,96000,supplier_0508 +pr_002309,acct_0036,user_000109,2026-02-13T11:00:00Z,rejected,97000,supplier_0509 +pr_002310,acct_0037,user_000110,2026-02-14T11:00:00Z,cancelled,98000,supplier_0510 +pr_002311,acct_0038,user_000111,2026-02-15T11:00:00Z,draft,99000,supplier_0511 +pr_002312,acct_0039,user_000112,2026-02-16T11:00:00Z,submitted,100000,supplier_0512 +pr_002313,acct_0040,user_000113,2026-02-17T11:00:00Z,approved,101000,supplier_0513 +pr_002314,acct_0041,user_000114,2026-02-18T11:00:00Z,rejected,102000,supplier_0514 +pr_002315,acct_0042,user_000115,2026-02-19T11:00:00Z,cancelled,103000,supplier_0515 +pr_002316,acct_0043,user_000116,2026-02-20T11:00:00Z,draft,104000,supplier_0516 +pr_002317,acct_0044,user_000117,2026-02-21T11:00:00Z,submitted,105000,supplier_0517 +pr_002318,acct_0045,user_000118,2026-02-22T11:00:00Z,approved,106000,supplier_0518 +pr_002319,acct_0046,user_000119,2026-02-23T11:00:00Z,rejected,107000,supplier_0519 +pr_002320,acct_0047,user_000120,2026-02-24T11:00:00Z,cancelled,108000,supplier_0520 +pr_002321,acct_0048,user_000121,2026-02-25T11:00:00Z,draft,109000,supplier_0521 +pr_002322,acct_0049,user_000122,2026-02-26T11:00:00Z,submitted,110000,supplier_0522 +pr_002323,acct_0050,user_000123,2026-02-27T11:00:00Z,approved,10000,supplier_0523 +pr_002324,acct_0010,user_000124,2026-02-28T11:00:00Z,rejected,11000,supplier_0524 +pr_002325,acct_0011,user_000125,2026-02-01T11:00:00Z,cancelled,12000,supplier_0525 +pr_002326,acct_0012,user_000126,2026-02-02T11:00:00Z,draft,13000,supplier_0526 +pr_002327,acct_0013,user_000127,2026-02-03T11:00:00Z,submitted,14000,supplier_0527 +pr_002328,acct_0014,user_000128,2026-02-04T11:00:00Z,approved,15000,supplier_0528 +pr_002329,acct_0015,user_000129,2026-02-05T11:00:00Z,rejected,16000,supplier_0529 +pr_002330,acct_0016,user_000130,2026-02-06T11:00:00Z,cancelled,17000,supplier_0530 +pr_002331,acct_0017,user_000131,2026-02-07T11:00:00Z,draft,18000,supplier_0531 +pr_002332,acct_0018,user_000132,2026-02-08T11:00:00Z,submitted,19000,supplier_0532 +pr_002333,acct_0019,user_000133,2026-02-09T11:00:00Z,approved,20000,supplier_0533 +pr_002334,acct_0020,user_000134,2026-02-10T11:00:00Z,rejected,21000,supplier_0534 +pr_002335,acct_0021,user_000135,2026-02-11T11:00:00Z,cancelled,22000,supplier_0535 +pr_002336,acct_0022,user_000136,2026-02-12T11:00:00Z,draft,23000,supplier_0536 +pr_002337,acct_0023,user_000137,2026-02-13T11:00:00Z,submitted,24000,supplier_0537 +pr_002338,acct_0024,user_000138,2026-02-14T11:00:00Z,approved,25000,supplier_0538 +pr_002339,acct_0025,user_000139,2026-02-15T11:00:00Z,rejected,26000,supplier_0539 +pr_002340,acct_0026,user_000140,2026-02-16T11:00:00Z,cancelled,27000,supplier_0540 +pr_002341,acct_0027,user_000141,2026-02-17T11:00:00Z,draft,28000,supplier_0541 +pr_002342,acct_0028,user_000142,2026-02-18T11:00:00Z,submitted,29000,supplier_0542 +pr_002343,acct_0029,user_000143,2026-02-19T11:00:00Z,approved,30000,supplier_0543 +pr_002344,acct_0030,user_000144,2026-02-20T11:00:00Z,rejected,31000,supplier_0544 +pr_002345,acct_0031,user_000145,2026-02-21T11:00:00Z,cancelled,32000,supplier_0545 +pr_002346,acct_0032,user_000146,2026-02-22T11:00:00Z,draft,33000,supplier_0546 +pr_002347,acct_0033,user_000147,2026-02-23T11:00:00Z,submitted,34000,supplier_0547 +pr_002348,acct_0034,user_000148,2026-02-24T11:00:00Z,approved,35000,supplier_0548 +pr_002349,acct_0035,user_000149,2026-02-25T11:00:00Z,rejected,36000,supplier_0549 +pr_002350,acct_0036,user_000150,2026-02-26T11:00:00Z,cancelled,37000,supplier_0550 +pr_002351,acct_0037,user_000151,2026-02-27T11:00:00Z,draft,38000,supplier_0551 +pr_002352,acct_0038,user_000152,2026-02-28T11:00:00Z,submitted,39000,supplier_0552 +pr_002353,acct_0039,user_000153,2026-02-01T11:00:00Z,approved,40000,supplier_0553 +pr_002354,acct_0040,user_000154,2026-02-02T11:00:00Z,rejected,41000,supplier_0554 +pr_002355,acct_0041,user_000155,2026-02-03T11:00:00Z,cancelled,42000,supplier_0555 +pr_002356,acct_0042,user_000156,2026-02-04T11:00:00Z,draft,43000,supplier_0556 +pr_002357,acct_0043,user_000157,2026-02-05T11:00:00Z,submitted,44000,supplier_0557 +pr_002358,acct_0044,user_000158,2026-02-06T11:00:00Z,approved,45000,supplier_0558 +pr_002359,acct_0045,user_000159,2026-02-07T11:00:00Z,rejected,46000,supplier_0559 +pr_002360,acct_0046,user_000160,2026-02-08T11:00:00Z,cancelled,47000,supplier_0560 +pr_002361,acct_0047,user_000161,2026-02-09T11:00:00Z,draft,48000,supplier_0561 +pr_002362,acct_0048,user_000162,2026-02-10T11:00:00Z,submitted,49000,supplier_0562 +pr_002363,acct_0049,user_000163,2026-02-11T11:00:00Z,approved,50000,supplier_0563 +pr_002364,acct_0050,user_000164,2026-02-12T11:00:00Z,rejected,51000,supplier_0564 +pr_002365,acct_0010,user_000165,2026-02-13T11:00:00Z,cancelled,52000,supplier_0565 +pr_002366,acct_0011,user_000166,2026-02-14T11:00:00Z,draft,53000,supplier_0566 +pr_002367,acct_0012,user_000167,2026-02-15T11:00:00Z,submitted,54000,supplier_0567 +pr_002368,acct_0013,user_000168,2026-02-16T11:00:00Z,approved,55000,supplier_0568 +pr_002369,acct_0014,user_000169,2026-02-17T11:00:00Z,rejected,56000,supplier_0569 +pr_002370,acct_0015,user_000170,2026-02-18T11:00:00Z,cancelled,57000,supplier_0570 +pr_002371,acct_0016,user_000171,2026-02-19T11:00:00Z,draft,58000,supplier_0571 +pr_002372,acct_0017,user_000172,2026-02-20T11:00:00Z,submitted,59000,supplier_0572 +pr_002373,acct_0018,user_000173,2026-02-21T11:00:00Z,approved,60000,supplier_0573 +pr_002374,acct_0019,user_000174,2026-02-22T11:00:00Z,rejected,61000,supplier_0574 +pr_002375,acct_0020,user_000175,2026-02-23T11:00:00Z,cancelled,62000,supplier_0575 +pr_002376,acct_0021,user_000176,2026-02-24T11:00:00Z,draft,63000,supplier_0576 +pr_002377,acct_0022,user_000177,2026-02-25T11:00:00Z,submitted,64000,supplier_0577 +pr_002378,acct_0023,user_000178,2026-02-26T11:00:00Z,approved,65000,supplier_0578 +pr_002379,acct_0024,user_000179,2026-02-27T11:00:00Z,rejected,66000,supplier_0579 +pr_002380,acct_0025,user_000180,2026-02-28T11:00:00Z,cancelled,67000,supplier_0580 +pr_002381,acct_0026,user_000181,2026-02-01T11:00:00Z,draft,68000,supplier_0581 +pr_002382,acct_0027,user_000182,2026-02-02T11:00:00Z,submitted,69000,supplier_0582 +pr_002383,acct_0028,user_000183,2026-02-03T11:00:00Z,approved,70000,supplier_0583 +pr_002384,acct_0029,user_000184,2026-02-04T11:00:00Z,rejected,71000,supplier_0584 +pr_002385,acct_0030,user_000185,2026-02-05T11:00:00Z,cancelled,72000,supplier_0585 +pr_002386,acct_0031,user_000186,2026-02-06T11:00:00Z,draft,73000,supplier_0586 +pr_002387,acct_0032,user_000187,2026-02-07T11:00:00Z,submitted,74000,supplier_0587 +pr_002388,acct_0033,user_000188,2026-02-08T11:00:00Z,approved,75000,supplier_0588 +pr_002389,acct_0034,user_000189,2026-02-09T11:00:00Z,rejected,76000,supplier_0589 +pr_002390,acct_0035,user_000190,2026-02-10T11:00:00Z,cancelled,77000,supplier_0590 +pr_002391,acct_0036,user_000191,2026-02-11T11:00:00Z,draft,78000,supplier_0591 +pr_002392,acct_0037,user_000192,2026-02-12T11:00:00Z,submitted,79000,supplier_0592 +pr_002393,acct_0038,user_000193,2026-02-13T11:00:00Z,approved,80000,supplier_0593 +pr_002394,acct_0039,user_000194,2026-02-14T11:00:00Z,rejected,81000,supplier_0594 +pr_002395,acct_0040,user_000195,2026-02-15T11:00:00Z,cancelled,82000,supplier_0595 +pr_002396,acct_0041,user_000196,2026-02-16T11:00:00Z,draft,83000,supplier_0596 +pr_002397,acct_0042,user_000197,2026-02-17T11:00:00Z,submitted,84000,supplier_0597 +pr_002398,acct_0043,user_000198,2026-02-18T11:00:00Z,approved,85000,supplier_0598 +pr_002399,acct_0044,user_000199,2026-02-19T11:00:00Z,rejected,86000,supplier_0599 +pr_002400,acct_0045,user_000200,2026-02-20T11:00:00Z,cancelled,87000,supplier_0600 +pr_002401,acct_0046,user_000201,2026-02-21T11:00:00Z,draft,88000,supplier_0601 +pr_002402,acct_0047,user_000202,2026-02-22T11:00:00Z,submitted,89000,supplier_0602 +pr_002403,acct_0048,user_000203,2026-02-23T11:00:00Z,approved,90000,supplier_0603 +pr_002404,acct_0049,user_000204,2026-02-24T11:00:00Z,rejected,91000,supplier_0604 +pr_002405,acct_0050,user_000205,2026-02-25T11:00:00Z,cancelled,92000,supplier_0605 +pr_002406,acct_0010,user_000206,2026-02-26T11:00:00Z,draft,93000,supplier_0606 +pr_002407,acct_0011,user_000207,2026-02-27T11:00:00Z,submitted,94000,supplier_0607 +pr_002408,acct_0012,user_000208,2026-02-28T11:00:00Z,approved,95000,supplier_0608 +pr_002409,acct_0013,user_000209,2026-02-01T11:00:00Z,rejected,96000,supplier_0609 +pr_002410,acct_0014,user_000210,2026-02-02T11:00:00Z,cancelled,97000,supplier_0610 +pr_002411,acct_0015,user_000211,2026-02-03T11:00:00Z,draft,98000,supplier_0611 +pr_002412,acct_0016,user_000212,2026-02-04T11:00:00Z,submitted,99000,supplier_0612 +pr_002413,acct_0017,user_000213,2026-02-05T11:00:00Z,approved,100000,supplier_0613 +pr_002414,acct_0018,user_000214,2026-02-06T11:00:00Z,rejected,101000,supplier_0614 +pr_002415,acct_0019,user_000215,2026-02-07T11:00:00Z,cancelled,102000,supplier_0615 +pr_002416,acct_0020,user_000216,2026-02-08T11:00:00Z,draft,103000,supplier_0616 +pr_002417,acct_0021,user_000217,2026-02-09T11:00:00Z,submitted,104000,supplier_0617 +pr_002418,acct_0022,user_000218,2026-02-10T11:00:00Z,approved,105000,supplier_0618 +pr_002419,acct_0023,user_000219,2026-02-11T11:00:00Z,rejected,106000,supplier_0619 +pr_002420,acct_0024,user_000220,2026-02-12T11:00:00Z,cancelled,107000,supplier_0620 +pr_002421,acct_0025,user_000221,2026-02-13T11:00:00Z,draft,108000,supplier_0621 +pr_002422,acct_0026,user_000222,2026-02-14T11:00:00Z,submitted,109000,supplier_0622 +pr_002423,acct_0027,user_000223,2026-02-15T11:00:00Z,approved,110000,supplier_0623 +pr_002424,acct_0028,user_000224,2026-02-16T11:00:00Z,rejected,10000,supplier_0624 +pr_002425,acct_0029,user_000225,2026-02-17T11:00:00Z,cancelled,11000,supplier_0625 +pr_002426,acct_0030,user_000226,2026-02-18T11:00:00Z,draft,12000,supplier_0626 +pr_002427,acct_0031,user_000227,2026-02-19T11:00:00Z,submitted,13000,supplier_0627 +pr_002428,acct_0032,user_000228,2026-02-20T11:00:00Z,approved,14000,supplier_0628 +pr_002429,acct_0033,user_000229,2026-02-21T11:00:00Z,rejected,15000,supplier_0629 +pr_002430,acct_0034,user_000230,2026-02-22T11:00:00Z,cancelled,16000,supplier_0630 +pr_002431,acct_0035,user_000231,2026-02-23T11:00:00Z,draft,17000,supplier_0631 +pr_002432,acct_0036,user_000232,2026-02-24T11:00:00Z,submitted,18000,supplier_0632 +pr_002433,acct_0037,user_000233,2026-02-25T11:00:00Z,approved,19000,supplier_0633 +pr_002434,acct_0038,user_000234,2026-02-26T11:00:00Z,rejected,20000,supplier_0634 +pr_002435,acct_0039,user_000235,2026-02-27T11:00:00Z,cancelled,21000,supplier_0635 +pr_002436,acct_0040,user_000236,2026-02-28T11:00:00Z,draft,22000,supplier_0636 +pr_002437,acct_0041,user_000237,2026-02-01T11:00:00Z,submitted,23000,supplier_0637 +pr_002438,acct_0042,user_000238,2026-02-02T11:00:00Z,approved,24000,supplier_0638 +pr_002439,acct_0043,user_000239,2026-02-03T11:00:00Z,rejected,25000,supplier_0639 +pr_002440,acct_0044,user_000240,2026-02-04T11:00:00Z,cancelled,26000,supplier_0640 +pr_002441,acct_0045,user_000241,2026-02-05T11:00:00Z,draft,27000,supplier_0641 +pr_002442,acct_0046,user_000242,2026-02-06T11:00:00Z,submitted,28000,supplier_0642 +pr_002443,acct_0047,user_000243,2026-02-07T11:00:00Z,approved,29000,supplier_0643 +pr_002444,acct_0048,user_000244,2026-02-08T11:00:00Z,rejected,30000,supplier_0644 +pr_002445,acct_0049,user_000245,2026-02-09T11:00:00Z,cancelled,31000,supplier_0645 +pr_002446,acct_0050,user_000246,2026-02-10T11:00:00Z,draft,32000,supplier_0646 +pr_002447,acct_0010,user_000247,2026-02-11T11:00:00Z,submitted,33000,supplier_0647 +pr_002448,acct_0011,user_000248,2026-02-12T11:00:00Z,approved,34000,supplier_0648 +pr_002449,acct_0012,user_000249,2026-02-13T11:00:00Z,rejected,35000,supplier_0649 +pr_002450,acct_0013,user_000250,2026-02-14T11:00:00Z,cancelled,36000,supplier_0650 +pr_002451,acct_0014,user_000251,2026-02-15T11:00:00Z,draft,37000,supplier_0651 +pr_002452,acct_0015,user_000252,2026-02-16T11:00:00Z,submitted,38000,supplier_0652 +pr_002453,acct_0016,user_000253,2026-02-17T11:00:00Z,approved,39000,supplier_0653 +pr_002454,acct_0017,user_000254,2026-02-18T11:00:00Z,rejected,40000,supplier_0654 +pr_002455,acct_0018,user_000255,2026-02-19T11:00:00Z,cancelled,41000,supplier_0655 +pr_002456,acct_0019,user_000256,2026-02-20T11:00:00Z,draft,42000,supplier_0656 +pr_002457,acct_0020,user_000257,2026-02-21T11:00:00Z,submitted,43000,supplier_0657 +pr_002458,acct_0021,user_000258,2026-02-22T11:00:00Z,approved,44000,supplier_0658 +pr_002459,acct_0022,user_000259,2026-02-23T11:00:00Z,rejected,45000,supplier_0659 +pr_002460,acct_0023,user_000260,2026-02-24T11:00:00Z,cancelled,46000,supplier_0660 +pr_002461,acct_0024,user_000261,2026-02-25T11:00:00Z,draft,47000,supplier_0661 +pr_002462,acct_0025,user_000262,2026-02-26T11:00:00Z,submitted,48000,supplier_0662 +pr_002463,acct_0026,user_000263,2026-02-27T11:00:00Z,approved,49000,supplier_0663 +pr_002464,acct_0027,user_000264,2026-02-28T11:00:00Z,rejected,50000,supplier_0664 +pr_002465,acct_0028,user_000265,2026-02-01T11:00:00Z,cancelled,51000,supplier_0665 +pr_002466,acct_0029,user_000266,2026-02-02T11:00:00Z,draft,52000,supplier_0666 +pr_002467,acct_0030,user_000267,2026-02-03T11:00:00Z,submitted,53000,supplier_0667 +pr_002468,acct_0031,user_000268,2026-02-04T11:00:00Z,approved,54000,supplier_0668 +pr_002469,acct_0032,user_000269,2026-02-05T11:00:00Z,rejected,55000,supplier_0669 +pr_002470,acct_0033,user_000270,2026-02-06T11:00:00Z,cancelled,56000,supplier_0670 +pr_002471,acct_0034,user_000271,2026-02-07T11:00:00Z,draft,57000,supplier_0671 +pr_002472,acct_0035,user_000272,2026-02-08T11:00:00Z,submitted,58000,supplier_0672 +pr_002473,acct_0036,user_000273,2026-02-09T11:00:00Z,approved,59000,supplier_0673 +pr_002474,acct_0037,user_000274,2026-02-10T11:00:00Z,rejected,60000,supplier_0674 +pr_002475,acct_0038,user_000275,2026-02-11T11:00:00Z,cancelled,61000,supplier_0675 +pr_002476,acct_0039,user_000276,2026-02-12T11:00:00Z,draft,62000,supplier_0676 +pr_002477,acct_0040,user_000277,2026-02-13T11:00:00Z,submitted,63000,supplier_0677 +pr_002478,acct_0041,user_000278,2026-02-14T11:00:00Z,approved,64000,supplier_0678 +pr_002479,acct_0042,user_000279,2026-02-15T11:00:00Z,rejected,65000,supplier_0679 +pr_002480,acct_0043,user_000280,2026-02-16T11:00:00Z,cancelled,66000,supplier_0680 +pr_002481,acct_0044,user_000281,2026-02-17T11:00:00Z,draft,67000,supplier_0681 +pr_002482,acct_0045,user_000282,2026-02-18T11:00:00Z,submitted,68000,supplier_0682 +pr_002483,acct_0046,user_000283,2026-02-19T11:00:00Z,approved,69000,supplier_0683 +pr_002484,acct_0047,user_000284,2026-02-20T11:00:00Z,rejected,70000,supplier_0684 +pr_002485,acct_0048,user_000285,2026-02-21T11:00:00Z,cancelled,71000,supplier_0685 +pr_002486,acct_0049,user_000286,2026-02-22T11:00:00Z,draft,72000,supplier_0686 +pr_002487,acct_0001,user_000287,2026-02-23T11:00:00Z,submitted,73000,supplier_0687 +pr_002488,acct_0002,user_000288,2026-02-24T11:00:00Z,approved,74000,supplier_0688 +pr_002489,acct_0003,user_000289,2026-02-25T11:00:00Z,rejected,75000,supplier_0689 +pr_002490,acct_0004,user_000290,2026-02-26T11:00:00Z,cancelled,76000,supplier_0690 +pr_002491,acct_0005,user_000291,2026-02-27T11:00:00Z,draft,77000,supplier_0691 +pr_002492,acct_0006,user_000292,2026-02-28T11:00:00Z,submitted,78000,supplier_0692 +pr_002493,acct_0007,user_000293,2026-02-01T11:00:00Z,approved,79000,supplier_0693 +pr_002494,acct_0008,user_000294,2026-02-02T11:00:00Z,rejected,80000,supplier_0694 +pr_002495,acct_0009,user_000295,2026-02-03T11:00:00Z,cancelled,81000,supplier_0695 +pr_002496,acct_0010,user_000296,2026-02-04T11:00:00Z,draft,82000,supplier_0696 +pr_002497,acct_0011,user_000297,2026-02-05T11:00:00Z,submitted,83000,supplier_0697 +pr_002498,acct_0012,user_000298,2026-02-06T11:00:00Z,approved,84000,supplier_0698 +pr_002499,acct_0013,user_000299,2026-02-07T11:00:00Z,rejected,85000,supplier_0699 +pr_002500,acct_0014,user_000300,2026-02-08T11:00:00Z,cancelled,86000,supplier_0700 +pr_002501,acct_0015,user_000301,2026-02-09T11:00:00Z,draft,87000,supplier_0701 +pr_002502,acct_0016,user_000302,2026-02-10T11:00:00Z,submitted,88000,supplier_0702 +pr_002503,acct_0017,user_000303,2026-02-11T11:00:00Z,approved,89000,supplier_0703 +pr_002504,acct_0018,user_000304,2026-02-12T11:00:00Z,rejected,90000,supplier_0704 +pr_002505,acct_0019,user_000305,2026-02-13T11:00:00Z,cancelled,91000,supplier_0705 +pr_002506,acct_0020,user_000306,2026-02-14T11:00:00Z,draft,92000,supplier_0706 +pr_002507,acct_0021,user_000307,2026-02-15T11:00:00Z,submitted,93000,supplier_0707 +pr_002508,acct_0022,user_000308,2026-02-16T11:00:00Z,approved,94000,supplier_0708 +pr_002509,acct_0023,user_000309,2026-02-17T11:00:00Z,rejected,95000,supplier_0709 +pr_002510,acct_0024,user_000310,2026-02-18T11:00:00Z,cancelled,96000,supplier_0710 +pr_002511,acct_0025,user_000311,2026-02-19T11:00:00Z,draft,97000,supplier_0711 +pr_002512,acct_0026,user_000312,2026-02-20T11:00:00Z,submitted,98000,supplier_0712 +pr_002513,acct_0027,user_000313,2026-02-21T11:00:00Z,approved,99000,supplier_0713 +pr_002514,acct_0028,user_000314,2026-02-22T11:00:00Z,rejected,100000,supplier_0714 +pr_002515,acct_0029,user_000315,2026-02-23T11:00:00Z,cancelled,101000,supplier_0715 +pr_002516,acct_0030,user_000316,2026-02-24T11:00:00Z,draft,102000,supplier_0716 +pr_002517,acct_0031,user_000317,2026-02-25T11:00:00Z,submitted,103000,supplier_0717 +pr_002518,acct_0032,user_000318,2026-02-26T11:00:00Z,approved,104000,supplier_0718 +pr_002519,acct_0033,user_000319,2026-02-27T11:00:00Z,rejected,105000,supplier_0719 +pr_002520,acct_0034,user_000320,2026-02-28T11:00:00Z,cancelled,106000,supplier_0720 +pr_002521,acct_0035,user_000321,2026-02-01T11:00:00Z,draft,107000,supplier_0721 +pr_002522,acct_0036,user_000322,2026-02-02T11:00:00Z,submitted,108000,supplier_0722 +pr_002523,acct_0037,user_000323,2026-02-03T11:00:00Z,approved,109000,supplier_0723 +pr_002524,acct_0038,user_000324,2026-02-04T11:00:00Z,rejected,110000,supplier_0724 +pr_002525,acct_0039,user_000325,2026-02-05T11:00:00Z,cancelled,10000,supplier_0725 +pr_002526,acct_0040,user_000326,2026-02-06T11:00:00Z,draft,11000,supplier_0726 +pr_002527,acct_0041,user_000327,2026-02-07T11:00:00Z,submitted,12000,supplier_0727 +pr_002528,acct_0042,user_000328,2026-02-08T11:00:00Z,approved,13000,supplier_0728 +pr_002529,acct_0043,user_000329,2026-02-09T11:00:00Z,rejected,14000,supplier_0729 +pr_002530,acct_0044,user_000330,2026-02-10T11:00:00Z,cancelled,15000,supplier_0730 +pr_002531,acct_0045,user_000331,2026-02-11T11:00:00Z,draft,16000,supplier_0731 +pr_002532,acct_0046,user_000332,2026-02-12T11:00:00Z,submitted,17000,supplier_0732 +pr_002533,acct_0047,user_000333,2026-02-13T11:00:00Z,approved,18000,supplier_0733 +pr_002534,acct_0048,user_000334,2026-02-14T11:00:00Z,rejected,19000,supplier_0734 +pr_002535,acct_0049,user_000335,2026-02-15T11:00:00Z,cancelled,20000,supplier_0735 +pr_002536,acct_0050,user_000336,2026-02-16T11:00:00Z,draft,21000,supplier_0736 +pr_002537,acct_0051,user_000337,2026-02-17T11:00:00Z,submitted,22000,supplier_0737 +pr_002538,acct_0052,user_000338,2026-02-18T11:00:00Z,approved,23000,supplier_0738 +pr_002539,acct_0053,user_000339,2026-02-19T11:00:00Z,rejected,24000,supplier_0739 +pr_002540,acct_0054,user_000340,2026-02-20T11:00:00Z,cancelled,25000,supplier_0740 +pr_002541,acct_0055,user_000341,2026-02-21T11:00:00Z,draft,26000,supplier_0741 +pr_002542,acct_0056,user_000342,2026-02-22T11:00:00Z,submitted,27000,supplier_0742 +pr_002543,acct_0057,user_000343,2026-02-23T11:00:00Z,approved,28000,supplier_0743 +pr_002544,acct_0058,user_000344,2026-02-24T11:00:00Z,rejected,29000,supplier_0744 +pr_002545,acct_0059,user_000345,2026-02-25T11:00:00Z,cancelled,30000,supplier_0745 +pr_002546,acct_0060,user_000346,2026-02-26T11:00:00Z,draft,31000,supplier_0746 +pr_002547,acct_0061,user_000347,2026-02-27T11:00:00Z,submitted,32000,supplier_0747 +pr_002548,acct_0062,user_000348,2026-02-28T11:00:00Z,approved,33000,supplier_0748 +pr_002549,acct_0063,user_000349,2026-02-01T11:00:00Z,rejected,34000,supplier_0749 +pr_002550,acct_0064,user_000350,2026-02-02T11:00:00Z,cancelled,35000,supplier_0750 +pr_002551,acct_0065,user_000351,2026-02-03T11:00:00Z,draft,36000,supplier_0751 +pr_002552,acct_0066,user_000352,2026-02-04T11:00:00Z,submitted,37000,supplier_0752 +pr_002553,acct_0067,user_000353,2026-02-05T11:00:00Z,approved,38000,supplier_0753 +pr_002554,acct_0068,user_000354,2026-02-06T11:00:00Z,rejected,39000,supplier_0754 +pr_002555,acct_0069,user_000355,2026-02-07T11:00:00Z,cancelled,40000,supplier_0755 +pr_002556,acct_0070,user_000356,2026-02-08T11:00:00Z,draft,41000,supplier_0756 +pr_002557,acct_0071,user_000357,2026-02-09T11:00:00Z,submitted,42000,supplier_0757 +pr_002558,acct_0072,user_000358,2026-02-10T11:00:00Z,approved,43000,supplier_0758 +pr_002559,acct_0073,user_000359,2026-02-11T11:00:00Z,rejected,44000,supplier_0759 +pr_002560,acct_0074,user_000360,2026-02-12T11:00:00Z,cancelled,45000,supplier_0760 +pr_002561,acct_0075,user_000361,2026-02-13T11:00:00Z,draft,46000,supplier_0761 +pr_002562,acct_0076,user_000362,2026-02-14T11:00:00Z,submitted,47000,supplier_0762 +pr_002563,acct_0077,user_000363,2026-02-15T11:00:00Z,approved,48000,supplier_0763 +pr_002564,acct_0078,user_000364,2026-02-16T11:00:00Z,rejected,49000,supplier_0764 +pr_002565,acct_0079,user_000365,2026-02-17T11:00:00Z,cancelled,50000,supplier_0765 +pr_002566,acct_0080,user_000366,2026-02-18T11:00:00Z,draft,51000,supplier_0766 +pr_002567,acct_0081,user_000367,2026-02-19T11:00:00Z,submitted,52000,supplier_0767 +pr_002568,acct_0082,user_000368,2026-02-20T11:00:00Z,approved,53000,supplier_0768 +pr_002569,acct_0083,user_000369,2026-02-21T11:00:00Z,rejected,54000,supplier_0769 +pr_002570,acct_0084,user_000370,2026-02-22T11:00:00Z,cancelled,55000,supplier_0770 +pr_002571,acct_0085,user_000371,2026-02-23T11:00:00Z,draft,56000,supplier_0771 +pr_002572,acct_0086,user_000372,2026-02-24T11:00:00Z,submitted,57000,supplier_0772 +pr_002573,acct_0087,user_000373,2026-02-25T11:00:00Z,approved,58000,supplier_0773 +pr_002574,acct_0088,user_000374,2026-02-26T11:00:00Z,rejected,59000,supplier_0774 +pr_002575,acct_0089,user_000375,2026-02-27T11:00:00Z,cancelled,60000,supplier_0775 +pr_002576,acct_0090,user_000376,2026-02-28T11:00:00Z,draft,61000,supplier_0776 +pr_002577,acct_0091,user_000377,2026-02-01T11:00:00Z,submitted,62000,supplier_0777 +pr_002578,acct_0092,user_000378,2026-02-02T11:00:00Z,approved,63000,supplier_0778 +pr_002579,acct_0093,user_000379,2026-02-03T11:00:00Z,rejected,64000,supplier_0779 +pr_002580,acct_0094,user_000380,2026-02-04T11:00:00Z,cancelled,65000,supplier_0780 +pr_002581,acct_0095,user_000381,2026-02-05T11:00:00Z,draft,66000,supplier_0781 +pr_002582,acct_0096,user_000382,2026-02-06T11:00:00Z,submitted,67000,supplier_0782 +pr_002583,acct_0097,user_000383,2026-02-07T11:00:00Z,approved,68000,supplier_0783 +pr_002584,acct_0098,user_000384,2026-02-08T11:00:00Z,rejected,69000,supplier_0784 +pr_002585,acct_0099,user_000385,2026-02-09T11:00:00Z,cancelled,70000,supplier_0785 +pr_002586,acct_0100,user_000386,2026-02-10T11:00:00Z,draft,71000,supplier_0786 +pr_002587,acct_0101,user_000387,2026-02-11T11:00:00Z,submitted,72000,supplier_0787 +pr_002588,acct_0102,user_000388,2026-02-12T11:00:00Z,approved,73000,supplier_0788 +pr_002589,acct_0103,user_000389,2026-02-13T11:00:00Z,rejected,74000,supplier_0789 +pr_002590,acct_0104,user_000390,2026-02-14T11:00:00Z,cancelled,75000,supplier_0790 +pr_002591,acct_0105,user_000391,2026-02-15T11:00:00Z,draft,76000,supplier_0791 +pr_002592,acct_0106,user_000392,2026-02-16T11:00:00Z,submitted,77000,supplier_0792 +pr_002593,acct_0107,user_000393,2026-02-17T11:00:00Z,approved,78000,supplier_0793 +pr_002594,acct_0108,user_000394,2026-02-18T11:00:00Z,rejected,79000,supplier_0794 +pr_002595,acct_0109,user_000395,2026-02-19T11:00:00Z,cancelled,80000,supplier_0795 +pr_002596,acct_0110,user_000396,2026-02-20T11:00:00Z,draft,81000,supplier_0796 +pr_002597,acct_0111,user_000397,2026-02-21T11:00:00Z,submitted,82000,supplier_0797 +pr_002598,acct_0112,user_000398,2026-02-22T11:00:00Z,approved,83000,supplier_0798 +pr_002599,acct_0113,user_000399,2026-02-23T11:00:00Z,rejected,84000,supplier_0799 +pr_002600,acct_0114,user_000400,2026-02-24T11:00:00Z,cancelled,85000,supplier_0800 +pr_002601,acct_0115,user_000401,2026-02-25T11:00:00Z,draft,86000,supplier_0801 +pr_002602,acct_0116,user_000402,2026-02-26T11:00:00Z,submitted,87000,supplier_0802 +pr_002603,acct_0117,user_000403,2026-02-27T11:00:00Z,approved,88000,supplier_0803 +pr_002604,acct_0118,user_000404,2026-02-28T11:00:00Z,rejected,89000,supplier_0804 +pr_002605,acct_0119,user_000405,2026-02-01T11:00:00Z,cancelled,90000,supplier_0805 +pr_002606,acct_0120,user_000406,2026-02-02T11:00:00Z,draft,91000,supplier_0806 +pr_002607,acct_0121,user_000407,2026-02-03T11:00:00Z,submitted,92000,supplier_0807 +pr_002608,acct_0122,user_000408,2026-02-04T11:00:00Z,approved,93000,supplier_0808 +pr_002609,acct_0123,user_000409,2026-02-05T11:00:00Z,rejected,94000,supplier_0809 +pr_002610,acct_0124,user_000410,2026-02-06T11:00:00Z,cancelled,95000,supplier_0810 +pr_002611,acct_0125,user_000411,2026-02-07T11:00:00Z,draft,96000,supplier_0811 +pr_002612,acct_0126,user_000412,2026-02-08T11:00:00Z,submitted,97000,supplier_0812 +pr_002613,acct_0127,user_000413,2026-02-09T11:00:00Z,approved,98000,supplier_0813 +pr_002614,acct_0128,user_000414,2026-02-10T11:00:00Z,rejected,99000,supplier_0814 +pr_002615,acct_0129,user_000415,2026-02-11T11:00:00Z,cancelled,100000,supplier_0815 +pr_002616,acct_0130,user_000416,2026-02-12T11:00:00Z,draft,101000,supplier_0816 +pr_002617,acct_0131,user_000417,2026-02-13T11:00:00Z,submitted,102000,supplier_0817 +pr_002618,acct_0132,user_000418,2026-02-14T11:00:00Z,approved,103000,supplier_0818 +pr_002619,acct_0133,user_000419,2026-02-15T11:00:00Z,rejected,104000,supplier_0819 +pr_002620,acct_0134,user_000420,2026-02-16T11:00:00Z,cancelled,105000,supplier_0820 +pr_002621,acct_0135,user_000421,2026-02-17T11:00:00Z,draft,106000,supplier_0821 +pr_002622,acct_0136,user_000422,2026-02-18T11:00:00Z,submitted,107000,supplier_0822 +pr_002623,acct_0137,user_000423,2026-02-19T11:00:00Z,approved,108000,supplier_0823 +pr_002624,acct_0138,user_000424,2026-02-20T11:00:00Z,rejected,109000,supplier_0824 +pr_002625,acct_0139,user_000425,2026-02-21T11:00:00Z,cancelled,110000,supplier_0825 +pr_002626,acct_0140,user_000426,2026-02-22T11:00:00Z,draft,10000,supplier_0826 +pr_002627,acct_0141,user_000427,2026-02-23T11:00:00Z,submitted,11000,supplier_0827 +pr_002628,acct_0142,user_000428,2026-02-24T11:00:00Z,approved,12000,supplier_0828 +pr_002629,acct_0143,user_000429,2026-02-25T11:00:00Z,rejected,13000,supplier_0829 +pr_002630,acct_0144,user_000430,2026-02-26T11:00:00Z,cancelled,14000,supplier_0830 +pr_002631,acct_0145,user_000431,2026-02-27T11:00:00Z,draft,15000,supplier_0831 +pr_002632,acct_0146,user_000432,2026-02-28T11:00:00Z,submitted,16000,supplier_0832 +pr_002633,acct_0147,user_000433,2026-02-01T11:00:00Z,approved,17000,supplier_0833 +pr_002634,acct_0148,user_000434,2026-02-02T11:00:00Z,rejected,18000,supplier_0834 +pr_002635,acct_0149,user_000435,2026-02-03T11:00:00Z,cancelled,19000,supplier_0835 +pr_002636,acct_0150,user_000436,2026-02-04T11:00:00Z,draft,20000,supplier_0836 +pr_002637,acct_0151,user_000437,2026-02-05T11:00:00Z,submitted,21000,supplier_0837 +pr_002638,acct_0152,user_000438,2026-02-06T11:00:00Z,approved,22000,supplier_0838 +pr_002639,acct_0153,user_000439,2026-02-07T11:00:00Z,rejected,23000,supplier_0839 +pr_002640,acct_0154,user_000440,2026-02-08T11:00:00Z,cancelled,24000,supplier_0840 +pr_002641,acct_0155,user_000441,2026-02-09T11:00:00Z,draft,25000,supplier_0841 +pr_002642,acct_0156,user_000442,2026-02-10T11:00:00Z,submitted,26000,supplier_0842 +pr_002643,acct_0157,user_000443,2026-02-11T11:00:00Z,approved,27000,supplier_0843 +pr_002644,acct_0158,user_000444,2026-02-12T11:00:00Z,rejected,28000,supplier_0844 +pr_002645,acct_0159,user_000445,2026-02-13T11:00:00Z,cancelled,29000,supplier_0845 +pr_002646,acct_0160,user_000446,2026-02-14T11:00:00Z,draft,30000,supplier_0846 +pr_002647,acct_0161,user_000447,2026-02-15T11:00:00Z,submitted,31000,supplier_0847 +pr_002648,acct_0162,user_000448,2026-02-16T11:00:00Z,approved,32000,supplier_0848 +pr_002649,acct_0163,user_000449,2026-02-17T11:00:00Z,rejected,33000,supplier_0849 +pr_002650,acct_0164,user_000450,2026-02-18T11:00:00Z,cancelled,34000,supplier_0850 +pr_002651,acct_0165,user_000451,2026-02-19T11:00:00Z,draft,35000,supplier_0851 +pr_002652,acct_0166,user_000452,2026-02-20T11:00:00Z,submitted,36000,supplier_0852 +pr_002653,acct_0167,user_000453,2026-02-21T11:00:00Z,approved,37000,supplier_0853 +pr_002654,acct_0168,user_000454,2026-02-22T11:00:00Z,rejected,38000,supplier_0854 +pr_002655,acct_0169,user_000455,2026-02-23T11:00:00Z,cancelled,39000,supplier_0855 +pr_002656,acct_0170,user_000456,2026-02-24T11:00:00Z,draft,40000,supplier_0856 +pr_002657,acct_0171,user_000457,2026-02-25T11:00:00Z,submitted,41000,supplier_0857 +pr_002658,acct_0172,user_000458,2026-02-26T11:00:00Z,approved,42000,supplier_0858 +pr_002659,acct_0173,user_000459,2026-02-27T11:00:00Z,rejected,43000,supplier_0859 +pr_002660,acct_0174,user_000460,2026-02-28T11:00:00Z,cancelled,44000,supplier_0860 +pr_002661,acct_0175,user_000461,2026-02-01T11:00:00Z,draft,45000,supplier_0861 +pr_002662,acct_0176,user_000462,2026-02-02T11:00:00Z,submitted,46000,supplier_0862 +pr_002663,acct_0177,user_000463,2026-02-03T11:00:00Z,approved,47000,supplier_0863 +pr_002664,acct_0178,user_000464,2026-02-04T11:00:00Z,rejected,48000,supplier_0864 +pr_002665,acct_0179,user_000465,2026-02-05T11:00:00Z,cancelled,49000,supplier_0865 +pr_002666,acct_0180,user_000466,2026-02-06T11:00:00Z,draft,50000,supplier_0866 +pr_002667,acct_0181,user_000467,2026-02-07T11:00:00Z,submitted,51000,supplier_0867 +pr_002668,acct_0182,user_000468,2026-02-08T11:00:00Z,approved,52000,supplier_0868 +pr_002669,acct_0183,user_000469,2026-02-09T11:00:00Z,rejected,53000,supplier_0869 +pr_002670,acct_0184,user_000470,2026-02-10T11:00:00Z,cancelled,54000,supplier_0870 +pr_002671,acct_0185,user_000471,2026-02-11T11:00:00Z,draft,55000,supplier_0871 +pr_002672,acct_0186,user_000472,2026-02-12T11:00:00Z,submitted,56000,supplier_0872 +pr_002673,acct_0187,user_000473,2026-02-13T11:00:00Z,approved,57000,supplier_0873 +pr_002674,acct_0188,user_000474,2026-02-14T11:00:00Z,rejected,58000,supplier_0874 +pr_002675,acct_0189,user_000475,2026-02-15T11:00:00Z,cancelled,59000,supplier_0875 +pr_002676,acct_0190,user_000476,2026-02-16T11:00:00Z,draft,60000,supplier_0876 +pr_002677,acct_0001,user_000477,2026-02-17T11:00:00Z,submitted,61000,supplier_0877 +pr_002678,acct_0002,user_000478,2026-02-18T11:00:00Z,approved,62000,supplier_0878 +pr_002679,acct_0003,user_000479,2026-02-19T11:00:00Z,rejected,63000,supplier_0879 +pr_002680,acct_0004,user_000480,2026-02-20T11:00:00Z,cancelled,64000,supplier_0880 +pr_002681,acct_0005,user_000481,2026-02-21T11:00:00Z,draft,65000,supplier_0881 +pr_002682,acct_0006,user_000482,2026-02-22T11:00:00Z,submitted,66000,supplier_0882 +pr_002683,acct_0007,user_000483,2026-02-23T11:00:00Z,approved,67000,supplier_0883 +pr_002684,acct_0008,user_000484,2026-02-24T11:00:00Z,rejected,68000,supplier_0884 +pr_002685,acct_0009,user_000485,2026-02-25T11:00:00Z,cancelled,69000,supplier_0885 +pr_002686,acct_0010,user_000486,2026-02-26T11:00:00Z,draft,70000,supplier_0886 +pr_002687,acct_0011,user_000487,2026-02-27T11:00:00Z,submitted,71000,supplier_0887 +pr_002688,acct_0012,user_000488,2026-02-28T11:00:00Z,approved,72000,supplier_0888 +pr_002689,acct_0013,user_000489,2026-02-01T11:00:00Z,rejected,73000,supplier_0889 +pr_002690,acct_0014,user_000490,2026-02-02T11:00:00Z,cancelled,74000,supplier_0890 +pr_002691,acct_0015,user_000491,2026-02-03T11:00:00Z,draft,75000,supplier_0891 +pr_002692,acct_0016,user_000492,2026-02-04T11:00:00Z,submitted,76000,supplier_0892 +pr_002693,acct_0017,user_000493,2026-02-05T11:00:00Z,approved,77000,supplier_0893 +pr_002694,acct_0018,user_000494,2026-02-06T11:00:00Z,rejected,78000,supplier_0894 +pr_002695,acct_0019,user_000495,2026-02-07T11:00:00Z,cancelled,79000,supplier_0895 +pr_002696,acct_0020,user_000496,2026-02-08T11:00:00Z,draft,80000,supplier_0896 +pr_002697,acct_0021,user_000497,2026-02-09T11:00:00Z,submitted,81000,supplier_0897 +pr_002698,acct_0022,user_000498,2026-02-10T11:00:00Z,approved,82000,supplier_0898 +pr_002699,acct_0023,user_000499,2026-02-11T11:00:00Z,rejected,83000,supplier_0899 +pr_002700,acct_0024,user_000500,2026-02-12T11:00:00Z,cancelled,84000,supplier_0900 +pr_002701,acct_0025,user_000501,2026-02-13T11:00:00Z,draft,85000,supplier_0001 +pr_002702,acct_0026,user_000502,2026-02-14T11:00:00Z,submitted,86000,supplier_0002 +pr_002703,acct_0027,user_000503,2026-02-15T11:00:00Z,approved,87000,supplier_0003 +pr_002704,acct_0028,user_000504,2026-02-16T11:00:00Z,rejected,88000,supplier_0004 +pr_002705,acct_0029,user_000505,2026-02-17T11:00:00Z,cancelled,89000,supplier_0005 +pr_002706,acct_0030,user_000506,2026-02-18T11:00:00Z,draft,90000,supplier_0006 +pr_002707,acct_0031,user_000507,2026-02-19T11:00:00Z,submitted,91000,supplier_0007 +pr_002708,acct_0032,user_000508,2026-02-20T11:00:00Z,approved,92000,supplier_0008 +pr_002709,acct_0033,user_000509,2026-02-21T11:00:00Z,rejected,93000,supplier_0009 +pr_002710,acct_0034,user_000510,2026-02-22T11:00:00Z,cancelled,94000,supplier_0010 +pr_002711,acct_0035,user_000511,2026-02-23T11:00:00Z,draft,95000,supplier_0011 +pr_002712,acct_0036,user_000512,2026-02-24T11:00:00Z,submitted,96000,supplier_0012 +pr_002713,acct_0037,user_000513,2026-02-25T11:00:00Z,approved,97000,supplier_0013 +pr_002714,acct_0038,user_000514,2026-02-26T11:00:00Z,rejected,98000,supplier_0014 +pr_002715,acct_0039,user_000515,2026-02-27T11:00:00Z,cancelled,99000,supplier_0015 +pr_002716,acct_0040,user_000516,2026-02-28T11:00:00Z,draft,100000,supplier_0016 +pr_002717,acct_0041,user_000517,2026-02-01T11:00:00Z,submitted,101000,supplier_0017 +pr_002718,acct_0042,user_000518,2026-02-02T11:00:00Z,approved,102000,supplier_0018 +pr_002719,acct_0043,user_000519,2026-02-03T11:00:00Z,rejected,103000,supplier_0019 +pr_002720,acct_0044,user_000520,2026-02-04T11:00:00Z,cancelled,104000,supplier_0020 +pr_002721,acct_0045,user_000521,2026-02-05T11:00:00Z,draft,105000,supplier_0021 +pr_002722,acct_0046,user_000522,2026-02-06T11:00:00Z,submitted,106000,supplier_0022 +pr_002723,acct_0047,user_000523,2026-02-07T11:00:00Z,approved,107000,supplier_0023 +pr_002724,acct_0048,user_000524,2026-02-08T11:00:00Z,rejected,108000,supplier_0024 +pr_002725,acct_0049,user_000525,2026-02-09T11:00:00Z,cancelled,109000,supplier_0025 +pr_002726,acct_0050,user_000526,2026-02-10T11:00:00Z,draft,110000,supplier_0026 +pr_002727,acct_0051,user_000527,2026-02-11T11:00:00Z,submitted,10000,supplier_0027 +pr_002728,acct_0052,user_000528,2026-02-12T11:00:00Z,approved,11000,supplier_0028 +pr_002729,acct_0053,user_000529,2026-02-13T11:00:00Z,rejected,12000,supplier_0029 +pr_002730,acct_0054,user_000530,2026-02-14T11:00:00Z,cancelled,13000,supplier_0030 +pr_002731,acct_0055,user_000531,2026-02-15T11:00:00Z,draft,14000,supplier_0031 +pr_002732,acct_0056,user_000532,2026-02-16T11:00:00Z,submitted,15000,supplier_0032 +pr_002733,acct_0057,user_000533,2026-02-17T11:00:00Z,approved,16000,supplier_0033 +pr_002734,acct_0058,user_000534,2026-02-18T11:00:00Z,rejected,17000,supplier_0034 +pr_002735,acct_0059,user_000535,2026-02-19T11:00:00Z,cancelled,18000,supplier_0035 +pr_002736,acct_0060,user_000536,2026-02-20T11:00:00Z,draft,19000,supplier_0036 +pr_002737,acct_0061,user_000537,2026-02-21T11:00:00Z,submitted,20000,supplier_0037 +pr_002738,acct_0062,user_000538,2026-02-22T11:00:00Z,approved,21000,supplier_0038 +pr_002739,acct_0063,user_000539,2026-02-23T11:00:00Z,rejected,22000,supplier_0039 +pr_002740,acct_0064,user_000540,2026-02-24T11:00:00Z,cancelled,23000,supplier_0040 +pr_002741,acct_0065,user_000541,2026-02-25T11:00:00Z,draft,24000,supplier_0041 +pr_002742,acct_0066,user_000542,2026-02-26T11:00:00Z,submitted,25000,supplier_0042 +pr_002743,acct_0067,user_000543,2026-02-27T11:00:00Z,approved,26000,supplier_0043 +pr_002744,acct_0068,user_000544,2026-02-28T11:00:00Z,rejected,27000,supplier_0044 +pr_002745,acct_0069,user_000545,2026-02-01T11:00:00Z,cancelled,28000,supplier_0045 +pr_002746,acct_0070,user_000546,2026-02-02T11:00:00Z,draft,29000,supplier_0046 +pr_002747,acct_0071,user_000547,2026-02-03T11:00:00Z,submitted,30000,supplier_0047 +pr_002748,acct_0072,user_000548,2026-02-04T11:00:00Z,approved,31000,supplier_0048 +pr_002749,acct_0073,user_000549,2026-02-05T11:00:00Z,rejected,32000,supplier_0049 +pr_002750,acct_0074,user_000550,2026-02-06T11:00:00Z,cancelled,33000,supplier_0050 +pr_002751,acct_0075,user_000551,2026-02-07T11:00:00Z,draft,34000,supplier_0051 +pr_002752,acct_0076,user_000552,2026-02-08T11:00:00Z,submitted,35000,supplier_0052 +pr_002753,acct_0077,user_000553,2026-02-09T11:00:00Z,approved,36000,supplier_0053 +pr_002754,acct_0078,user_000554,2026-02-10T11:00:00Z,rejected,37000,supplier_0054 +pr_002755,acct_0079,user_000555,2026-02-11T11:00:00Z,cancelled,38000,supplier_0055 +pr_002756,acct_0080,user_000556,2026-02-12T11:00:00Z,draft,39000,supplier_0056 +pr_002757,acct_0081,user_000557,2026-02-13T11:00:00Z,submitted,40000,supplier_0057 +pr_002758,acct_0082,user_000558,2026-02-14T11:00:00Z,approved,41000,supplier_0058 +pr_002759,acct_0083,user_000559,2026-02-15T11:00:00Z,rejected,42000,supplier_0059 +pr_002760,acct_0084,user_000560,2026-02-16T11:00:00Z,cancelled,43000,supplier_0060 +pr_002761,acct_0085,user_000561,2026-02-17T11:00:00Z,draft,44000,supplier_0061 +pr_002762,acct_0086,user_000562,2026-02-18T11:00:00Z,submitted,45000,supplier_0062 +pr_002763,acct_0087,user_000563,2026-02-19T11:00:00Z,approved,46000,supplier_0063 +pr_002764,acct_0088,user_000564,2026-02-20T11:00:00Z,rejected,47000,supplier_0064 +pr_002765,acct_0089,user_000565,2026-02-21T11:00:00Z,cancelled,48000,supplier_0065 +pr_002766,acct_0090,user_000566,2026-02-22T11:00:00Z,draft,49000,supplier_0066 +pr_002767,acct_0091,user_000567,2026-02-23T11:00:00Z,submitted,50000,supplier_0067 +pr_002768,acct_0092,user_000568,2026-02-24T11:00:00Z,approved,51000,supplier_0068 +pr_002769,acct_0093,user_000569,2026-02-25T11:00:00Z,rejected,52000,supplier_0069 +pr_002770,acct_0094,user_000570,2026-02-26T11:00:00Z,cancelled,53000,supplier_0070 +pr_002771,acct_0095,user_000571,2026-02-27T11:00:00Z,draft,54000,supplier_0071 +pr_002772,acct_0096,user_000572,2026-02-28T11:00:00Z,submitted,55000,supplier_0072 +pr_002773,acct_0097,user_000573,2026-02-01T11:00:00Z,approved,56000,supplier_0073 +pr_002774,acct_0098,user_000574,2026-02-02T11:00:00Z,rejected,57000,supplier_0074 +pr_002775,acct_0099,user_000575,2026-02-03T11:00:00Z,cancelled,58000,supplier_0075 +pr_002776,acct_0100,user_000576,2026-02-04T11:00:00Z,draft,59000,supplier_0076 +pr_002777,acct_0101,user_000577,2026-02-05T11:00:00Z,submitted,60000,supplier_0077 +pr_002778,acct_0102,user_000578,2026-02-06T11:00:00Z,approved,61000,supplier_0078 +pr_002779,acct_0103,user_000579,2026-02-07T11:00:00Z,rejected,62000,supplier_0079 +pr_002780,acct_0104,user_000580,2026-02-08T11:00:00Z,cancelled,63000,supplier_0080 +pr_002781,acct_0105,user_000581,2026-02-09T11:00:00Z,draft,64000,supplier_0081 +pr_002782,acct_0106,user_000582,2026-02-10T11:00:00Z,submitted,65000,supplier_0082 +pr_002783,acct_0107,user_000583,2026-02-11T11:00:00Z,approved,66000,supplier_0083 +pr_002784,acct_0108,user_000584,2026-02-12T11:00:00Z,rejected,67000,supplier_0084 +pr_002785,acct_0109,user_000585,2026-02-13T11:00:00Z,cancelled,68000,supplier_0085 +pr_002786,acct_0110,user_000586,2026-02-14T11:00:00Z,draft,69000,supplier_0086 +pr_002787,acct_0111,user_000587,2026-02-15T11:00:00Z,submitted,70000,supplier_0087 +pr_002788,acct_0112,user_000588,2026-02-16T11:00:00Z,approved,71000,supplier_0088 +pr_002789,acct_0113,user_000589,2026-02-17T11:00:00Z,rejected,72000,supplier_0089 +pr_002790,acct_0114,user_000590,2026-02-18T11:00:00Z,cancelled,73000,supplier_0090 +pr_002791,acct_0115,user_000591,2026-02-19T11:00:00Z,draft,74000,supplier_0091 +pr_002792,acct_0116,user_000592,2026-02-20T11:00:00Z,submitted,75000,supplier_0092 +pr_002793,acct_0117,user_000593,2026-02-21T11:00:00Z,approved,76000,supplier_0093 +pr_002794,acct_0118,user_000594,2026-02-22T11:00:00Z,rejected,77000,supplier_0094 +pr_002795,acct_0119,user_000595,2026-02-23T11:00:00Z,cancelled,78000,supplier_0095 +pr_002796,acct_0120,user_000596,2026-02-24T11:00:00Z,draft,79000,supplier_0096 +pr_002797,acct_0121,user_000597,2026-02-25T11:00:00Z,submitted,80000,supplier_0097 +pr_002798,acct_0122,user_000598,2026-02-26T11:00:00Z,approved,81000,supplier_0098 +pr_002799,acct_0123,user_000599,2026-02-27T11:00:00Z,rejected,82000,supplier_0099 +pr_002800,acct_0124,user_000600,2026-02-28T11:00:00Z,cancelled,83000,supplier_0100 +pr_002801,acct_0125,user_000601,2026-02-01T11:00:00Z,draft,84000,supplier_0101 +pr_002802,acct_0126,user_000602,2026-02-02T11:00:00Z,submitted,85000,supplier_0102 +pr_002803,acct_0127,user_000603,2026-02-03T11:00:00Z,approved,86000,supplier_0103 +pr_002804,acct_0128,user_000604,2026-02-04T11:00:00Z,rejected,87000,supplier_0104 +pr_002805,acct_0129,user_000605,2026-02-05T11:00:00Z,cancelled,88000,supplier_0105 +pr_002806,acct_0130,user_000606,2026-02-06T11:00:00Z,draft,89000,supplier_0106 +pr_002807,acct_0131,user_000607,2026-02-07T11:00:00Z,submitted,90000,supplier_0107 +pr_002808,acct_0132,user_000608,2026-02-08T11:00:00Z,approved,91000,supplier_0108 +pr_002809,acct_0133,user_000609,2026-02-09T11:00:00Z,rejected,92000,supplier_0109 +pr_002810,acct_0134,user_000610,2026-02-10T11:00:00Z,cancelled,93000,supplier_0110 +pr_002811,acct_0135,user_000611,2026-02-11T11:00:00Z,draft,94000,supplier_0111 +pr_002812,acct_0136,user_000612,2026-02-12T11:00:00Z,submitted,95000,supplier_0112 +pr_002813,acct_0137,user_000613,2026-02-13T11:00:00Z,approved,96000,supplier_0113 +pr_002814,acct_0138,user_000614,2026-02-14T11:00:00Z,rejected,97000,supplier_0114 +pr_002815,acct_0139,user_000615,2026-02-15T11:00:00Z,cancelled,98000,supplier_0115 +pr_002816,acct_0140,user_000616,2026-02-16T11:00:00Z,draft,99000,supplier_0116 +pr_002817,acct_0141,user_000617,2026-02-17T11:00:00Z,submitted,100000,supplier_0117 +pr_002818,acct_0142,user_000618,2026-02-18T11:00:00Z,approved,101000,supplier_0118 +pr_002819,acct_0143,user_000619,2026-02-19T11:00:00Z,rejected,102000,supplier_0119 +pr_002820,acct_0144,user_000620,2026-02-20T11:00:00Z,cancelled,103000,supplier_0120 +pr_002821,acct_0145,user_000621,2026-02-21T11:00:00Z,draft,104000,supplier_0121 +pr_002822,acct_0146,user_000622,2026-02-22T11:00:00Z,submitted,105000,supplier_0122 +pr_002823,acct_0147,user_000623,2026-02-23T11:00:00Z,approved,106000,supplier_0123 +pr_002824,acct_0148,user_000624,2026-02-24T11:00:00Z,rejected,107000,supplier_0124 +pr_002825,acct_0149,user_000625,2026-02-25T11:00:00Z,cancelled,108000,supplier_0125 +pr_002826,acct_0150,user_000626,2026-02-26T11:00:00Z,draft,109000,supplier_0126 +pr_002827,acct_0151,user_000627,2026-02-27T11:00:00Z,submitted,110000,supplier_0127 +pr_002828,acct_0152,user_000628,2026-02-28T11:00:00Z,approved,10000,supplier_0128 +pr_002829,acct_0153,user_000629,2026-02-01T11:00:00Z,rejected,11000,supplier_0129 +pr_002830,acct_0154,user_000630,2026-02-02T11:00:00Z,cancelled,12000,supplier_0130 +pr_002831,acct_0155,user_000631,2026-02-03T11:00:00Z,draft,13000,supplier_0131 +pr_002832,acct_0156,user_000632,2026-02-04T11:00:00Z,submitted,14000,supplier_0132 +pr_002833,acct_0157,user_000633,2026-02-05T11:00:00Z,approved,15000,supplier_0133 +pr_002834,acct_0158,user_000634,2026-02-06T11:00:00Z,rejected,16000,supplier_0134 +pr_002835,acct_0159,user_000635,2026-02-07T11:00:00Z,cancelled,17000,supplier_0135 +pr_002836,acct_0160,user_000636,2026-02-08T11:00:00Z,draft,18000,supplier_0136 +pr_002837,acct_0161,user_000637,2026-02-09T11:00:00Z,submitted,19000,supplier_0137 +pr_002838,acct_0162,user_000638,2026-02-10T11:00:00Z,approved,20000,supplier_0138 +pr_002839,acct_0163,user_000639,2026-02-11T11:00:00Z,rejected,21000,supplier_0139 +pr_002840,acct_0164,user_000640,2026-02-12T11:00:00Z,cancelled,22000,supplier_0140 +pr_002841,acct_0165,user_000641,2026-02-13T11:00:00Z,draft,23000,supplier_0141 +pr_002842,acct_0166,user_000642,2026-02-14T11:00:00Z,submitted,24000,supplier_0142 +pr_002843,acct_0167,user_000643,2026-02-15T11:00:00Z,approved,25000,supplier_0143 +pr_002844,acct_0168,user_000644,2026-02-16T11:00:00Z,rejected,26000,supplier_0144 +pr_002845,acct_0169,user_000645,2026-02-17T11:00:00Z,cancelled,27000,supplier_0145 +pr_002846,acct_0170,user_000646,2026-02-18T11:00:00Z,draft,28000,supplier_0146 +pr_002847,acct_0171,user_000647,2026-02-19T11:00:00Z,submitted,29000,supplier_0147 +pr_002848,acct_0172,user_000648,2026-02-20T11:00:00Z,approved,30000,supplier_0148 +pr_002849,acct_0173,user_000649,2026-02-21T11:00:00Z,rejected,31000,supplier_0149 +pr_002850,acct_0174,user_000650,2026-02-22T11:00:00Z,cancelled,32000,supplier_0150 +pr_002851,acct_0175,user_000651,2026-02-23T11:00:00Z,draft,33000,supplier_0151 +pr_002852,acct_0176,user_000652,2026-02-24T11:00:00Z,submitted,34000,supplier_0152 +pr_002853,acct_0177,user_000653,2026-02-25T11:00:00Z,approved,35000,supplier_0153 +pr_002854,acct_0178,user_000654,2026-02-26T11:00:00Z,rejected,36000,supplier_0154 +pr_002855,acct_0179,user_000655,2026-02-27T11:00:00Z,cancelled,37000,supplier_0155 +pr_002856,acct_0180,user_000656,2026-02-28T11:00:00Z,draft,38000,supplier_0156 +pr_002857,acct_0181,user_000657,2026-02-01T11:00:00Z,submitted,39000,supplier_0157 +pr_002858,acct_0182,user_000658,2026-02-02T11:00:00Z,approved,40000,supplier_0158 +pr_002859,acct_0183,user_000659,2026-02-03T11:00:00Z,rejected,41000,supplier_0159 +pr_002860,acct_0184,user_000660,2026-02-04T11:00:00Z,cancelled,42000,supplier_0160 +pr_002861,acct_0185,user_000661,2026-02-05T11:00:00Z,draft,43000,supplier_0161 +pr_002862,acct_0186,user_000662,2026-02-06T11:00:00Z,submitted,44000,supplier_0162 +pr_002863,acct_0187,user_000663,2026-02-07T11:00:00Z,approved,45000,supplier_0163 +pr_002864,acct_0188,user_000664,2026-02-08T11:00:00Z,rejected,46000,supplier_0164 +pr_002865,acct_0189,user_000665,2026-02-09T11:00:00Z,cancelled,47000,supplier_0165 +pr_002866,acct_0190,user_000666,2026-02-10T11:00:00Z,draft,48000,supplier_0166 +pr_002867,acct_0001,user_000667,2026-02-11T11:00:00Z,submitted,49000,supplier_0167 +pr_002868,acct_0002,user_000668,2026-02-12T11:00:00Z,approved,50000,supplier_0168 +pr_002869,acct_0003,user_000669,2026-02-13T11:00:00Z,rejected,51000,supplier_0169 +pr_002870,acct_0004,user_000670,2026-02-14T11:00:00Z,cancelled,52000,supplier_0170 +pr_002871,acct_0005,user_000671,2026-02-15T11:00:00Z,draft,53000,supplier_0171 +pr_002872,acct_0006,user_000672,2026-02-16T11:00:00Z,submitted,54000,supplier_0172 +pr_002873,acct_0007,user_000673,2026-02-17T11:00:00Z,approved,55000,supplier_0173 +pr_002874,acct_0008,user_000674,2026-02-18T11:00:00Z,rejected,56000,supplier_0174 +pr_002875,acct_0009,user_000675,2026-02-19T11:00:00Z,cancelled,57000,supplier_0175 +pr_002876,acct_0010,user_000676,2026-02-20T11:00:00Z,draft,58000,supplier_0176 +pr_002877,acct_0011,user_000677,2026-02-21T11:00:00Z,submitted,59000,supplier_0177 +pr_002878,acct_0012,user_000678,2026-02-22T11:00:00Z,approved,60000,supplier_0178 +pr_002879,acct_0013,user_000679,2026-02-23T11:00:00Z,rejected,61000,supplier_0179 +pr_002880,acct_0014,user_000680,2026-02-24T11:00:00Z,cancelled,62000,supplier_0180 +pr_002881,acct_0015,user_000681,2026-02-25T11:00:00Z,draft,63000,supplier_0181 +pr_002882,acct_0016,user_000682,2026-02-26T11:00:00Z,submitted,64000,supplier_0182 +pr_002883,acct_0017,user_000683,2026-02-27T11:00:00Z,approved,65000,supplier_0183 +pr_002884,acct_0018,user_000684,2026-02-28T11:00:00Z,rejected,66000,supplier_0184 +pr_002885,acct_0019,user_000685,2026-02-01T11:00:00Z,cancelled,67000,supplier_0185 +pr_002886,acct_0020,user_000686,2026-02-02T11:00:00Z,draft,68000,supplier_0186 +pr_002887,acct_0021,user_000687,2026-02-03T11:00:00Z,submitted,69000,supplier_0187 +pr_002888,acct_0022,user_000688,2026-02-04T11:00:00Z,approved,70000,supplier_0188 +pr_002889,acct_0023,user_000689,2026-02-05T11:00:00Z,rejected,71000,supplier_0189 +pr_002890,acct_0024,user_000690,2026-02-06T11:00:00Z,cancelled,72000,supplier_0190 +pr_002891,acct_0025,user_000691,2026-02-07T11:00:00Z,draft,73000,supplier_0191 +pr_002892,acct_0026,user_000692,2026-02-08T11:00:00Z,submitted,74000,supplier_0192 +pr_002893,acct_0027,user_000693,2026-02-09T11:00:00Z,approved,75000,supplier_0193 +pr_002894,acct_0028,user_000694,2026-02-10T11:00:00Z,rejected,76000,supplier_0194 +pr_002895,acct_0029,user_000695,2026-02-11T11:00:00Z,cancelled,77000,supplier_0195 +pr_002896,acct_0030,user_000696,2026-02-12T11:00:00Z,draft,78000,supplier_0196 +pr_002897,acct_0031,user_000697,2026-02-13T11:00:00Z,submitted,79000,supplier_0197 +pr_002898,acct_0032,user_000698,2026-02-14T11:00:00Z,approved,80000,supplier_0198 +pr_002899,acct_0033,user_000699,2026-02-15T11:00:00Z,rejected,81000,supplier_0199 +pr_002900,acct_0034,user_000700,2026-02-16T11:00:00Z,cancelled,82000,supplier_0200 +pr_002901,acct_0035,user_000701,2026-02-17T11:00:00Z,draft,83000,supplier_0201 +pr_002902,acct_0036,user_000702,2026-02-18T11:00:00Z,submitted,84000,supplier_0202 +pr_002903,acct_0037,user_000703,2026-02-19T11:00:00Z,approved,85000,supplier_0203 +pr_002904,acct_0038,user_000704,2026-02-20T11:00:00Z,rejected,86000,supplier_0204 +pr_002905,acct_0039,user_000705,2026-02-21T11:00:00Z,cancelled,87000,supplier_0205 +pr_002906,acct_0040,user_000706,2026-02-22T11:00:00Z,draft,88000,supplier_0206 +pr_002907,acct_0041,user_000707,2026-02-23T11:00:00Z,submitted,89000,supplier_0207 +pr_002908,acct_0042,user_000708,2026-02-24T11:00:00Z,approved,90000,supplier_0208 +pr_002909,acct_0043,user_000709,2026-02-25T11:00:00Z,rejected,91000,supplier_0209 +pr_002910,acct_0044,user_000710,2026-02-26T11:00:00Z,cancelled,92000,supplier_0210 +pr_002911,acct_0045,user_000711,2026-02-27T11:00:00Z,draft,93000,supplier_0211 +pr_002912,acct_0046,user_000712,2026-02-28T11:00:00Z,submitted,94000,supplier_0212 +pr_002913,acct_0047,user_000713,2026-02-01T11:00:00Z,approved,95000,supplier_0213 +pr_002914,acct_0048,user_000714,2026-02-02T11:00:00Z,rejected,96000,supplier_0214 +pr_002915,acct_0049,user_000715,2026-02-03T11:00:00Z,cancelled,97000,supplier_0215 +pr_002916,acct_0050,user_000716,2026-02-04T11:00:00Z,draft,98000,supplier_0216 +pr_002917,acct_0051,user_000717,2026-02-05T11:00:00Z,submitted,99000,supplier_0217 +pr_002918,acct_0052,user_000718,2026-02-06T11:00:00Z,approved,100000,supplier_0218 +pr_002919,acct_0053,user_000719,2026-02-07T11:00:00Z,rejected,101000,supplier_0219 +pr_002920,acct_0054,user_000720,2026-02-08T11:00:00Z,cancelled,102000,supplier_0220 +pr_002921,acct_0055,user_000721,2026-02-09T11:00:00Z,draft,103000,supplier_0221 +pr_002922,acct_0056,user_000722,2026-02-10T11:00:00Z,submitted,104000,supplier_0222 +pr_002923,acct_0057,user_000723,2026-02-11T11:00:00Z,approved,105000,supplier_0223 +pr_002924,acct_0058,user_000724,2026-02-12T11:00:00Z,rejected,106000,supplier_0224 +pr_002925,acct_0059,user_000725,2026-02-13T11:00:00Z,cancelled,107000,supplier_0225 +pr_002926,acct_0060,user_000726,2026-02-14T11:00:00Z,draft,108000,supplier_0226 +pr_002927,acct_0061,user_000727,2026-02-15T11:00:00Z,submitted,109000,supplier_0227 +pr_002928,acct_0062,user_000728,2026-02-16T11:00:00Z,approved,110000,supplier_0228 +pr_002929,acct_0063,user_000729,2026-02-17T11:00:00Z,rejected,10000,supplier_0229 +pr_002930,acct_0064,user_000730,2026-02-18T11:00:00Z,cancelled,11000,supplier_0230 +pr_002931,acct_0065,user_000731,2026-02-19T11:00:00Z,draft,12000,supplier_0231 +pr_002932,acct_0066,user_000732,2026-02-20T11:00:00Z,submitted,13000,supplier_0232 +pr_002933,acct_0067,user_000733,2026-02-21T11:00:00Z,approved,14000,supplier_0233 +pr_002934,acct_0068,user_000734,2026-02-22T11:00:00Z,rejected,15000,supplier_0234 +pr_002935,acct_0069,user_000735,2026-02-23T11:00:00Z,cancelled,16000,supplier_0235 +pr_002936,acct_0070,user_000736,2026-02-24T11:00:00Z,draft,17000,supplier_0236 +pr_002937,acct_0071,user_000737,2026-02-25T11:00:00Z,submitted,18000,supplier_0237 +pr_002938,acct_0072,user_000738,2026-02-26T11:00:00Z,approved,19000,supplier_0238 +pr_002939,acct_0073,user_000739,2026-02-27T11:00:00Z,rejected,20000,supplier_0239 +pr_002940,acct_0074,user_000740,2026-02-28T11:00:00Z,cancelled,21000,supplier_0240 +pr_002941,acct_0075,user_000741,2026-02-01T11:00:00Z,draft,22000,supplier_0241 +pr_002942,acct_0076,user_000742,2026-02-02T11:00:00Z,submitted,23000,supplier_0242 +pr_002943,acct_0077,user_000743,2026-02-03T11:00:00Z,approved,24000,supplier_0243 +pr_002944,acct_0078,user_000744,2026-02-04T11:00:00Z,rejected,25000,supplier_0244 +pr_002945,acct_0079,user_000745,2026-02-05T11:00:00Z,cancelled,26000,supplier_0245 +pr_002946,acct_0080,user_000746,2026-02-06T11:00:00Z,draft,27000,supplier_0246 +pr_002947,acct_0081,user_000747,2026-02-07T11:00:00Z,submitted,28000,supplier_0247 +pr_002948,acct_0082,user_000748,2026-02-08T11:00:00Z,approved,29000,supplier_0248 +pr_002949,acct_0083,user_000749,2026-02-09T11:00:00Z,rejected,30000,supplier_0249 +pr_002950,acct_0084,user_000750,2026-02-10T11:00:00Z,cancelled,31000,supplier_0250 +pr_002951,acct_0085,user_000751,2026-02-11T11:00:00Z,draft,32000,supplier_0251 +pr_002952,acct_0086,user_000752,2026-02-12T11:00:00Z,submitted,33000,supplier_0252 +pr_002953,acct_0087,user_000753,2026-02-13T11:00:00Z,approved,34000,supplier_0253 +pr_002954,acct_0088,user_000754,2026-02-14T11:00:00Z,rejected,35000,supplier_0254 +pr_002955,acct_0089,user_000755,2026-02-15T11:00:00Z,cancelled,36000,supplier_0255 +pr_002956,acct_0090,user_000756,2026-02-16T11:00:00Z,draft,37000,supplier_0256 +pr_002957,acct_0091,user_000757,2026-02-17T11:00:00Z,submitted,38000,supplier_0257 +pr_002958,acct_0092,user_000758,2026-02-18T11:00:00Z,approved,39000,supplier_0258 +pr_002959,acct_0093,user_000759,2026-02-19T11:00:00Z,rejected,40000,supplier_0259 +pr_002960,acct_0094,user_000760,2026-02-20T11:00:00Z,cancelled,41000,supplier_0260 +pr_002961,acct_0095,user_000761,2026-02-21T11:00:00Z,draft,42000,supplier_0261 +pr_002962,acct_0096,user_000762,2026-02-22T11:00:00Z,submitted,43000,supplier_0262 +pr_002963,acct_0097,user_000763,2026-02-23T11:00:00Z,approved,44000,supplier_0263 +pr_002964,acct_0098,user_000764,2026-02-24T11:00:00Z,rejected,45000,supplier_0264 +pr_002965,acct_0099,user_000765,2026-02-25T11:00:00Z,cancelled,46000,supplier_0265 +pr_002966,acct_0100,user_000766,2026-02-26T11:00:00Z,draft,47000,supplier_0266 +pr_002967,acct_0101,user_000767,2026-02-27T11:00:00Z,submitted,48000,supplier_0267 +pr_002968,acct_0102,user_000768,2026-02-28T11:00:00Z,approved,49000,supplier_0268 +pr_002969,acct_0103,user_000769,2026-02-01T11:00:00Z,rejected,50000,supplier_0269 +pr_002970,acct_0104,user_000770,2026-02-02T11:00:00Z,cancelled,51000,supplier_0270 +pr_002971,acct_0105,user_000771,2026-02-03T11:00:00Z,draft,52000,supplier_0271 +pr_002972,acct_0106,user_000772,2026-02-04T11:00:00Z,submitted,53000,supplier_0272 +pr_002973,acct_0107,user_000773,2026-02-05T11:00:00Z,approved,54000,supplier_0273 +pr_002974,acct_0108,user_000774,2026-02-06T11:00:00Z,rejected,55000,supplier_0274 +pr_002975,acct_0109,user_000775,2026-02-07T11:00:00Z,cancelled,56000,supplier_0275 +pr_002976,acct_0110,user_000776,2026-02-08T11:00:00Z,draft,57000,supplier_0276 +pr_002977,acct_0111,user_000777,2026-02-09T11:00:00Z,submitted,58000,supplier_0277 +pr_002978,acct_0112,user_000778,2026-02-10T11:00:00Z,approved,59000,supplier_0278 +pr_002979,acct_0113,user_000779,2026-02-11T11:00:00Z,rejected,60000,supplier_0279 +pr_002980,acct_0114,user_000780,2026-02-12T11:00:00Z,cancelled,61000,supplier_0280 +pr_002981,acct_0115,user_000781,2026-02-13T11:00:00Z,draft,62000,supplier_0281 +pr_002982,acct_0116,user_000782,2026-02-14T11:00:00Z,submitted,63000,supplier_0282 +pr_002983,acct_0117,user_000783,2026-02-15T11:00:00Z,approved,64000,supplier_0283 +pr_002984,acct_0118,user_000784,2026-02-16T11:00:00Z,rejected,65000,supplier_0284 +pr_002985,acct_0119,user_000785,2026-02-17T11:00:00Z,cancelled,66000,supplier_0285 +pr_002986,acct_0120,user_000786,2026-02-18T11:00:00Z,draft,67000,supplier_0286 +pr_002987,acct_0121,user_000787,2026-02-19T11:00:00Z,submitted,68000,supplier_0287 +pr_002988,acct_0122,user_000788,2026-02-20T11:00:00Z,approved,69000,supplier_0288 +pr_002989,acct_0123,user_000789,2026-02-21T11:00:00Z,rejected,70000,supplier_0289 +pr_002990,acct_0124,user_000790,2026-02-22T11:00:00Z,cancelled,71000,supplier_0290 +pr_002991,acct_0125,user_000791,2026-02-23T11:00:00Z,draft,72000,supplier_0291 +pr_002992,acct_0126,user_000792,2026-02-24T11:00:00Z,submitted,73000,supplier_0292 +pr_002993,acct_0127,user_000793,2026-02-25T11:00:00Z,approved,74000,supplier_0293 +pr_002994,acct_0128,user_000794,2026-02-26T11:00:00Z,rejected,75000,supplier_0294 +pr_002995,acct_0129,user_000795,2026-02-27T11:00:00Z,cancelled,76000,supplier_0295 +pr_002996,acct_0130,user_000796,2026-02-28T11:00:00Z,draft,77000,supplier_0296 +pr_002997,acct_0131,user_000797,2026-02-01T11:00:00Z,submitted,78000,supplier_0297 +pr_002998,acct_0132,user_000798,2026-02-02T11:00:00Z,approved,79000,supplier_0298 +pr_002999,acct_0133,user_000799,2026-02-03T11:00:00Z,rejected,80000,supplier_0299 +pr_003000,acct_0134,user_000800,2026-02-04T11:00:00Z,cancelled,81000,supplier_0300 +pr_003001,acct_0135,user_000801,2026-02-05T11:00:00Z,draft,82000,supplier_0301 +pr_003002,acct_0136,user_000802,2026-02-06T11:00:00Z,submitted,83000,supplier_0302 +pr_003003,acct_0137,user_000803,2026-02-07T11:00:00Z,approved,84000,supplier_0303 +pr_003004,acct_0138,user_000804,2026-02-08T11:00:00Z,rejected,85000,supplier_0304 +pr_003005,acct_0139,user_000805,2026-02-09T11:00:00Z,cancelled,86000,supplier_0305 +pr_003006,acct_0140,user_000806,2026-02-10T11:00:00Z,draft,87000,supplier_0306 +pr_003007,acct_0141,user_000807,2026-02-11T11:00:00Z,submitted,88000,supplier_0307 +pr_003008,acct_0142,user_000808,2026-02-12T11:00:00Z,approved,89000,supplier_0308 +pr_003009,acct_0143,user_000809,2026-02-13T11:00:00Z,rejected,90000,supplier_0309 +pr_003010,acct_0144,user_000810,2026-02-14T11:00:00Z,cancelled,91000,supplier_0310 +pr_003011,acct_0145,user_000811,2026-02-15T11:00:00Z,draft,92000,supplier_0311 +pr_003012,acct_0146,user_000812,2026-02-16T11:00:00Z,submitted,93000,supplier_0312 +pr_003013,acct_0147,user_000813,2026-02-17T11:00:00Z,approved,94000,supplier_0313 +pr_003014,acct_0148,user_000814,2026-02-18T11:00:00Z,rejected,95000,supplier_0314 +pr_003015,acct_0149,user_000815,2026-02-19T11:00:00Z,cancelled,96000,supplier_0315 +pr_003016,acct_0150,user_000816,2026-02-20T11:00:00Z,draft,97000,supplier_0316 +pr_003017,acct_0151,user_000817,2026-02-21T11:00:00Z,submitted,98000,supplier_0317 +pr_003018,acct_0152,user_000818,2026-02-22T11:00:00Z,approved,99000,supplier_0318 +pr_003019,acct_0153,user_000819,2026-02-23T11:00:00Z,rejected,100000,supplier_0319 +pr_003020,acct_0154,user_000820,2026-02-24T11:00:00Z,cancelled,101000,supplier_0320 +pr_003021,acct_0155,user_000821,2026-02-25T11:00:00Z,draft,102000,supplier_0321 +pr_003022,acct_0156,user_000822,2026-02-26T11:00:00Z,submitted,103000,supplier_0322 +pr_003023,acct_0157,user_000823,2026-02-27T11:00:00Z,approved,104000,supplier_0323 +pr_003024,acct_0158,user_000824,2026-02-28T11:00:00Z,rejected,105000,supplier_0324 +pr_003025,acct_0159,user_000825,2026-02-01T11:00:00Z,cancelled,106000,supplier_0325 +pr_003026,acct_0160,user_000826,2026-02-02T11:00:00Z,draft,107000,supplier_0326 +pr_003027,acct_0161,user_000827,2026-02-03T11:00:00Z,submitted,108000,supplier_0327 +pr_003028,acct_0162,user_000828,2026-02-04T11:00:00Z,approved,109000,supplier_0328 +pr_003029,acct_0163,user_000829,2026-02-05T11:00:00Z,rejected,110000,supplier_0329 +pr_003030,acct_0164,user_000830,2026-02-06T11:00:00Z,cancelled,10000,supplier_0330 +pr_003031,acct_0165,user_000831,2026-02-07T11:00:00Z,draft,11000,supplier_0331 +pr_003032,acct_0166,user_000832,2026-02-08T11:00:00Z,submitted,12000,supplier_0332 +pr_003033,acct_0167,user_000833,2026-02-09T11:00:00Z,approved,13000,supplier_0333 +pr_003034,acct_0168,user_000834,2026-02-10T11:00:00Z,rejected,14000,supplier_0334 +pr_003035,acct_0169,user_000835,2026-02-11T11:00:00Z,cancelled,15000,supplier_0335 +pr_003036,acct_0170,user_000836,2026-02-12T11:00:00Z,draft,16000,supplier_0336 +pr_003037,acct_0171,user_000837,2026-02-13T11:00:00Z,submitted,17000,supplier_0337 +pr_003038,acct_0172,user_000838,2026-02-14T11:00:00Z,approved,18000,supplier_0338 +pr_003039,acct_0173,user_000839,2026-02-15T11:00:00Z,rejected,19000,supplier_0339 +pr_003040,acct_0174,user_000840,2026-02-16T11:00:00Z,cancelled,20000,supplier_0340 +pr_003041,acct_0175,user_000841,2026-02-17T11:00:00Z,draft,21000,supplier_0341 +pr_003042,acct_0176,user_000842,2026-02-18T11:00:00Z,submitted,22000,supplier_0342 +pr_003043,acct_0177,user_000843,2026-02-19T11:00:00Z,approved,23000,supplier_0343 +pr_003044,acct_0178,user_000844,2026-02-20T11:00:00Z,rejected,24000,supplier_0344 +pr_003045,acct_0179,user_000845,2026-02-21T11:00:00Z,cancelled,25000,supplier_0345 +pr_003046,acct_0180,user_000846,2026-02-22T11:00:00Z,draft,26000,supplier_0346 +pr_003047,acct_0181,user_000847,2026-02-23T11:00:00Z,submitted,27000,supplier_0347 +pr_003048,acct_0182,user_000848,2026-02-24T11:00:00Z,approved,28000,supplier_0348 +pr_003049,acct_0183,user_000849,2026-02-25T11:00:00Z,rejected,29000,supplier_0349 +pr_003050,acct_0184,user_000850,2026-02-26T11:00:00Z,cancelled,30000,supplier_0350 +pr_003051,acct_0185,user_000851,2026-02-27T11:00:00Z,draft,31000,supplier_0351 +pr_003052,acct_0186,user_000852,2026-02-28T11:00:00Z,submitted,32000,supplier_0352 +pr_003053,acct_0187,user_000853,2026-02-01T11:00:00Z,approved,33000,supplier_0353 +pr_003054,acct_0188,user_000854,2026-02-02T11:00:00Z,rejected,34000,supplier_0354 +pr_003055,acct_0189,user_000855,2026-02-03T11:00:00Z,cancelled,35000,supplier_0355 +pr_003056,acct_0190,user_000856,2026-02-04T11:00:00Z,draft,36000,supplier_0356 +pr_003057,acct_0001,user_000857,2026-02-05T11:00:00Z,submitted,37000,supplier_0357 +pr_003058,acct_0002,user_000858,2026-02-06T11:00:00Z,approved,38000,supplier_0358 +pr_003059,acct_0003,user_000859,2026-02-07T11:00:00Z,rejected,39000,supplier_0359 +pr_003060,acct_0004,user_000860,2026-02-08T11:00:00Z,cancelled,40000,supplier_0360 +pr_003061,acct_0005,user_000861,2026-02-09T11:00:00Z,draft,41000,supplier_0361 +pr_003062,acct_0006,user_000862,2026-02-10T11:00:00Z,submitted,42000,supplier_0362 +pr_003063,acct_0007,user_000863,2026-02-11T11:00:00Z,approved,43000,supplier_0363 +pr_003064,acct_0008,user_000864,2026-02-12T11:00:00Z,rejected,44000,supplier_0364 +pr_003065,acct_0009,user_000865,2026-02-13T11:00:00Z,cancelled,45000,supplier_0365 +pr_003066,acct_0010,user_000866,2026-02-14T11:00:00Z,draft,46000,supplier_0366 +pr_003067,acct_0011,user_000867,2026-02-15T11:00:00Z,submitted,47000,supplier_0367 +pr_003068,acct_0012,user_000868,2026-02-16T11:00:00Z,approved,48000,supplier_0368 +pr_003069,acct_0013,user_000869,2026-02-17T11:00:00Z,rejected,49000,supplier_0369 +pr_003070,acct_0014,user_000870,2026-02-18T11:00:00Z,cancelled,50000,supplier_0370 +pr_003071,acct_0015,user_000871,2026-02-19T11:00:00Z,draft,51000,supplier_0371 +pr_003072,acct_0016,user_000872,2026-02-20T11:00:00Z,submitted,52000,supplier_0372 +pr_003073,acct_0017,user_000873,2026-02-21T11:00:00Z,approved,53000,supplier_0373 +pr_003074,acct_0018,user_000874,2026-02-22T11:00:00Z,rejected,54000,supplier_0374 +pr_003075,acct_0019,user_000875,2026-02-23T11:00:00Z,cancelled,55000,supplier_0375 +pr_003076,acct_0020,user_000876,2026-02-24T11:00:00Z,draft,56000,supplier_0376 +pr_003077,acct_0021,user_000877,2026-02-25T11:00:00Z,submitted,57000,supplier_0377 +pr_003078,acct_0022,user_000878,2026-02-26T11:00:00Z,approved,58000,supplier_0378 +pr_003079,acct_0023,user_000879,2026-02-27T11:00:00Z,rejected,59000,supplier_0379 +pr_003080,acct_0024,user_000880,2026-02-28T11:00:00Z,cancelled,60000,supplier_0380 +pr_003081,acct_0025,user_000881,2026-02-01T11:00:00Z,draft,61000,supplier_0381 +pr_003082,acct_0026,user_000882,2026-02-02T11:00:00Z,submitted,62000,supplier_0382 +pr_003083,acct_0027,user_000883,2026-02-03T11:00:00Z,approved,63000,supplier_0383 +pr_003084,acct_0028,user_000884,2026-02-04T11:00:00Z,rejected,64000,supplier_0384 +pr_003085,acct_0029,user_000885,2026-02-05T11:00:00Z,cancelled,65000,supplier_0385 +pr_003086,acct_0030,user_000886,2026-02-06T11:00:00Z,draft,66000,supplier_0386 +pr_003087,acct_0031,user_000887,2026-02-07T11:00:00Z,submitted,67000,supplier_0387 +pr_003088,acct_0032,user_000888,2026-02-08T11:00:00Z,approved,68000,supplier_0388 +pr_003089,acct_0033,user_000889,2026-02-09T11:00:00Z,rejected,69000,supplier_0389 +pr_003090,acct_0034,user_000890,2026-02-10T11:00:00Z,cancelled,70000,supplier_0390 +pr_003091,acct_0035,user_000891,2026-02-11T11:00:00Z,draft,71000,supplier_0391 +pr_003092,acct_0036,user_000892,2026-02-12T11:00:00Z,submitted,72000,supplier_0392 +pr_003093,acct_0037,user_000893,2026-02-13T11:00:00Z,approved,73000,supplier_0393 +pr_003094,acct_0038,user_000894,2026-02-14T11:00:00Z,rejected,74000,supplier_0394 +pr_003095,acct_0039,user_000895,2026-02-15T11:00:00Z,cancelled,75000,supplier_0395 +pr_003096,acct_0040,user_000896,2026-02-16T11:00:00Z,draft,76000,supplier_0396 +pr_003097,acct_0041,user_000897,2026-02-17T11:00:00Z,submitted,77000,supplier_0397 +pr_003098,acct_0042,user_000898,2026-02-18T11:00:00Z,approved,78000,supplier_0398 +pr_003099,acct_0043,user_000899,2026-02-19T11:00:00Z,rejected,79000,supplier_0399 +pr_003100,acct_0044,user_000900,2026-02-20T11:00:00Z,cancelled,80000,supplier_0400 +pr_003101,acct_0045,user_000901,2026-02-21T11:00:00Z,draft,81000,supplier_0401 +pr_003102,acct_0046,user_000902,2026-02-22T11:00:00Z,submitted,82000,supplier_0402 +pr_003103,acct_0047,user_000903,2026-02-23T11:00:00Z,approved,83000,supplier_0403 +pr_003104,acct_0048,user_000904,2026-02-24T11:00:00Z,rejected,84000,supplier_0404 +pr_003105,acct_0049,user_000905,2026-02-25T11:00:00Z,cancelled,85000,supplier_0405 +pr_003106,acct_0050,user_000906,2026-02-26T11:00:00Z,draft,86000,supplier_0406 +pr_003107,acct_0051,user_000907,2026-02-27T11:00:00Z,submitted,87000,supplier_0407 +pr_003108,acct_0052,user_000908,2026-02-28T11:00:00Z,approved,88000,supplier_0408 +pr_003109,acct_0053,user_000909,2026-02-01T11:00:00Z,rejected,89000,supplier_0409 +pr_003110,acct_0054,user_000910,2026-02-02T11:00:00Z,cancelled,90000,supplier_0410 +pr_003111,acct_0055,user_000911,2026-02-03T11:00:00Z,draft,91000,supplier_0411 +pr_003112,acct_0056,user_000912,2026-02-04T11:00:00Z,submitted,92000,supplier_0412 +pr_003113,acct_0057,user_000913,2026-02-05T11:00:00Z,approved,93000,supplier_0413 +pr_003114,acct_0058,user_000914,2026-02-06T11:00:00Z,rejected,94000,supplier_0414 +pr_003115,acct_0059,user_000915,2026-02-07T11:00:00Z,cancelled,95000,supplier_0415 +pr_003116,acct_0060,user_000916,2026-02-08T11:00:00Z,draft,96000,supplier_0416 +pr_003117,acct_0061,user_000917,2026-02-09T11:00:00Z,submitted,97000,supplier_0417 +pr_003118,acct_0062,user_000918,2026-02-10T11:00:00Z,approved,98000,supplier_0418 +pr_003119,acct_0063,user_000919,2026-02-11T11:00:00Z,rejected,99000,supplier_0419 +pr_003120,acct_0064,user_000920,2026-02-12T11:00:00Z,cancelled,100000,supplier_0420 +pr_003121,acct_0065,user_000921,2026-02-13T11:00:00Z,draft,101000,supplier_0421 +pr_003122,acct_0066,user_000922,2026-02-14T11:00:00Z,submitted,102000,supplier_0422 +pr_003123,acct_0067,user_000923,2026-02-15T11:00:00Z,approved,103000,supplier_0423 +pr_003124,acct_0068,user_000924,2026-02-16T11:00:00Z,rejected,104000,supplier_0424 +pr_003125,acct_0069,user_000925,2026-02-17T11:00:00Z,cancelled,105000,supplier_0425 +pr_003126,acct_0070,user_000926,2026-02-18T11:00:00Z,draft,106000,supplier_0426 +pr_003127,acct_0071,user_000927,2026-02-19T11:00:00Z,submitted,107000,supplier_0427 +pr_003128,acct_0072,user_000928,2026-02-20T11:00:00Z,approved,108000,supplier_0428 +pr_003129,acct_0073,user_000929,2026-02-21T11:00:00Z,rejected,109000,supplier_0429 +pr_003130,acct_0074,user_000930,2026-02-22T11:00:00Z,cancelled,110000,supplier_0430 +pr_003131,acct_0075,user_000931,2026-02-23T11:00:00Z,draft,10000,supplier_0431 +pr_003132,acct_0076,user_000932,2026-02-24T11:00:00Z,submitted,11000,supplier_0432 +pr_003133,acct_0077,user_000933,2026-02-25T11:00:00Z,approved,12000,supplier_0433 +pr_003134,acct_0078,user_000934,2026-02-26T11:00:00Z,rejected,13000,supplier_0434 +pr_003135,acct_0079,user_000935,2026-02-27T11:00:00Z,cancelled,14000,supplier_0435 +pr_003136,acct_0080,user_000936,2026-02-28T11:00:00Z,draft,15000,supplier_0436 +pr_003137,acct_0081,user_000937,2026-02-01T11:00:00Z,submitted,16000,supplier_0437 +pr_003138,acct_0082,user_000938,2026-02-02T11:00:00Z,approved,17000,supplier_0438 +pr_003139,acct_0083,user_000939,2026-02-03T11:00:00Z,rejected,18000,supplier_0439 +pr_003140,acct_0084,user_000940,2026-02-04T11:00:00Z,cancelled,19000,supplier_0440 +pr_003141,acct_0085,user_000941,2026-02-05T11:00:00Z,draft,20000,supplier_0441 +pr_003142,acct_0086,user_000942,2026-02-06T11:00:00Z,submitted,21000,supplier_0442 +pr_003143,acct_0087,user_000943,2026-02-07T11:00:00Z,approved,22000,supplier_0443 +pr_003144,acct_0088,user_000944,2026-02-08T11:00:00Z,rejected,23000,supplier_0444 +pr_003145,acct_0089,user_000945,2026-02-09T11:00:00Z,cancelled,24000,supplier_0445 +pr_003146,acct_0090,user_000946,2026-02-10T11:00:00Z,draft,25000,supplier_0446 +pr_003147,acct_0091,user_000947,2026-02-11T11:00:00Z,submitted,26000,supplier_0447 +pr_003148,acct_0092,user_000948,2026-02-12T11:00:00Z,approved,27000,supplier_0448 +pr_003149,acct_0093,user_000949,2026-02-13T11:00:00Z,rejected,28000,supplier_0449 +pr_003150,acct_0094,user_000950,2026-02-14T11:00:00Z,cancelled,29000,supplier_0450 +pr_003151,acct_0095,user_000951,2026-02-15T11:00:00Z,draft,30000,supplier_0451 +pr_003152,acct_0096,user_000952,2026-02-16T11:00:00Z,submitted,31000,supplier_0452 +pr_003153,acct_0097,user_000953,2026-02-17T11:00:00Z,approved,32000,supplier_0453 +pr_003154,acct_0098,user_000954,2026-02-18T11:00:00Z,rejected,33000,supplier_0454 +pr_003155,acct_0099,user_000955,2026-02-19T11:00:00Z,cancelled,34000,supplier_0455 +pr_003156,acct_0100,user_000956,2026-02-20T11:00:00Z,draft,35000,supplier_0456 +pr_003157,acct_0101,user_000957,2026-02-21T11:00:00Z,submitted,36000,supplier_0457 +pr_003158,acct_0102,user_000958,2026-02-22T11:00:00Z,approved,37000,supplier_0458 +pr_003159,acct_0103,user_000959,2026-02-23T11:00:00Z,rejected,38000,supplier_0459 +pr_003160,acct_0104,user_000960,2026-02-24T11:00:00Z,cancelled,39000,supplier_0460 +pr_003161,acct_0105,user_000961,2026-02-25T11:00:00Z,draft,40000,supplier_0461 +pr_003162,acct_0106,user_000962,2026-02-26T11:00:00Z,submitted,41000,supplier_0462 +pr_003163,acct_0107,user_000963,2026-02-27T11:00:00Z,approved,42000,supplier_0463 +pr_003164,acct_0108,user_000964,2026-02-28T11:00:00Z,rejected,43000,supplier_0464 +pr_003165,acct_0109,user_000965,2026-02-01T11:00:00Z,cancelled,44000,supplier_0465 +pr_003166,acct_0110,user_000966,2026-02-02T11:00:00Z,draft,45000,supplier_0466 +pr_003167,acct_0111,user_000967,2026-02-03T11:00:00Z,submitted,46000,supplier_0467 +pr_003168,acct_0112,user_000968,2026-02-04T11:00:00Z,approved,47000,supplier_0468 +pr_003169,acct_0113,user_000969,2026-02-05T11:00:00Z,rejected,48000,supplier_0469 +pr_003170,acct_0114,user_000970,2026-02-06T11:00:00Z,cancelled,49000,supplier_0470 +pr_003171,acct_0115,user_000971,2026-02-07T11:00:00Z,draft,50000,supplier_0471 +pr_003172,acct_0116,user_000972,2026-02-08T11:00:00Z,submitted,51000,supplier_0472 +pr_003173,acct_0117,user_000973,2026-02-09T11:00:00Z,approved,52000,supplier_0473 +pr_003174,acct_0118,user_000974,2026-02-10T11:00:00Z,rejected,53000,supplier_0474 +pr_003175,acct_0119,user_000975,2026-02-11T11:00:00Z,cancelled,54000,supplier_0475 +pr_003176,acct_0120,user_000976,2026-02-12T11:00:00Z,draft,55000,supplier_0476 +pr_003177,acct_0121,user_000977,2026-02-13T11:00:00Z,submitted,56000,supplier_0477 +pr_003178,acct_0122,user_000978,2026-02-14T11:00:00Z,approved,57000,supplier_0478 +pr_003179,acct_0123,user_000979,2026-02-15T11:00:00Z,rejected,58000,supplier_0479 +pr_003180,acct_0124,user_000980,2026-02-16T11:00:00Z,cancelled,59000,supplier_0480 +pr_003181,acct_0125,user_000981,2026-02-17T11:00:00Z,draft,60000,supplier_0481 +pr_003182,acct_0126,user_000982,2026-02-18T11:00:00Z,submitted,61000,supplier_0482 +pr_003183,acct_0127,user_000983,2026-02-19T11:00:00Z,approved,62000,supplier_0483 +pr_003184,acct_0128,user_000984,2026-02-20T11:00:00Z,rejected,63000,supplier_0484 +pr_003185,acct_0129,user_000985,2026-02-21T11:00:00Z,cancelled,64000,supplier_0485 +pr_003186,acct_0130,user_000986,2026-02-22T11:00:00Z,draft,65000,supplier_0486 +pr_003187,acct_0131,user_000987,2026-02-23T11:00:00Z,submitted,66000,supplier_0487 +pr_003188,acct_0132,user_000988,2026-02-24T11:00:00Z,approved,67000,supplier_0488 +pr_003189,acct_0133,user_000989,2026-02-25T11:00:00Z,rejected,68000,supplier_0489 +pr_003190,acct_0134,user_000990,2026-02-26T11:00:00Z,cancelled,69000,supplier_0490 +pr_003191,acct_0135,user_000991,2026-02-27T11:00:00Z,draft,70000,supplier_0491 +pr_003192,acct_0136,user_000992,2026-02-28T11:00:00Z,submitted,71000,supplier_0492 +pr_003193,acct_0137,user_000993,2026-02-01T11:00:00Z,approved,72000,supplier_0493 +pr_003194,acct_0138,user_000994,2026-02-02T11:00:00Z,rejected,73000,supplier_0494 +pr_003195,acct_0139,user_000995,2026-02-03T11:00:00Z,cancelled,74000,supplier_0495 +pr_003196,acct_0140,user_000996,2026-02-04T11:00:00Z,draft,75000,supplier_0496 +pr_003197,acct_0141,user_000997,2026-02-05T11:00:00Z,submitted,76000,supplier_0497 +pr_003198,acct_0142,user_000998,2026-02-06T11:00:00Z,approved,77000,supplier_0498 +pr_003199,acct_0143,user_000999,2026-02-07T11:00:00Z,rejected,78000,supplier_0499 +pr_003200,acct_0144,user_001000,2026-02-08T11:00:00Z,cancelled,79000,supplier_0500 +pr_003201,acct_0145,user_001001,2026-02-09T11:00:00Z,draft,80000,supplier_0501 +pr_003202,acct_0146,user_001002,2026-02-10T11:00:00Z,submitted,81000,supplier_0502 +pr_003203,acct_0147,user_001003,2026-02-11T11:00:00Z,approved,82000,supplier_0503 +pr_003204,acct_0148,user_001004,2026-02-12T11:00:00Z,rejected,83000,supplier_0504 +pr_003205,acct_0149,user_001005,2026-02-13T11:00:00Z,cancelled,84000,supplier_0505 +pr_003206,acct_0150,user_001006,2026-02-14T11:00:00Z,draft,85000,supplier_0506 +pr_003207,acct_0151,user_001007,2026-02-15T11:00:00Z,submitted,86000,supplier_0507 +pr_003208,acct_0152,user_001008,2026-02-16T11:00:00Z,approved,87000,supplier_0508 +pr_003209,acct_0153,user_001009,2026-02-17T11:00:00Z,rejected,88000,supplier_0509 +pr_003210,acct_0154,user_001010,2026-02-18T11:00:00Z,cancelled,89000,supplier_0510 +pr_003211,acct_0155,user_001011,2026-02-19T11:00:00Z,draft,90000,supplier_0511 +pr_003212,acct_0156,user_001012,2026-02-20T11:00:00Z,submitted,91000,supplier_0512 +pr_003213,acct_0157,user_001013,2026-02-21T11:00:00Z,approved,92000,supplier_0513 +pr_003214,acct_0158,user_001014,2026-02-22T11:00:00Z,rejected,93000,supplier_0514 +pr_003215,acct_0159,user_001015,2026-02-23T11:00:00Z,cancelled,94000,supplier_0515 +pr_003216,acct_0160,user_001016,2026-02-24T11:00:00Z,draft,95000,supplier_0516 +pr_003217,acct_0161,user_001017,2026-02-25T11:00:00Z,submitted,96000,supplier_0517 +pr_003218,acct_0162,user_001018,2026-02-26T11:00:00Z,approved,97000,supplier_0518 +pr_003219,acct_0163,user_001019,2026-02-27T11:00:00Z,rejected,98000,supplier_0519 +pr_003220,acct_0164,user_001020,2026-02-28T11:00:00Z,cancelled,99000,supplier_0520 +pr_003221,acct_0165,user_001021,2026-02-01T11:00:00Z,draft,100000,supplier_0521 +pr_003222,acct_0166,user_001022,2026-02-02T11:00:00Z,submitted,101000,supplier_0522 +pr_003223,acct_0167,user_001023,2026-02-03T11:00:00Z,approved,102000,supplier_0523 +pr_003224,acct_0168,user_001024,2026-02-04T11:00:00Z,rejected,103000,supplier_0524 +pr_003225,acct_0169,user_001025,2026-02-05T11:00:00Z,cancelled,104000,supplier_0525 +pr_003226,acct_0170,user_001026,2026-02-06T11:00:00Z,draft,105000,supplier_0526 +pr_003227,acct_0171,user_001027,2026-02-07T11:00:00Z,submitted,106000,supplier_0527 +pr_003228,acct_0172,user_001028,2026-02-08T11:00:00Z,approved,107000,supplier_0528 +pr_003229,acct_0173,user_001029,2026-02-09T11:00:00Z,rejected,108000,supplier_0529 +pr_003230,acct_0174,user_001030,2026-02-10T11:00:00Z,cancelled,109000,supplier_0530 +pr_003231,acct_0175,user_001031,2026-02-11T11:00:00Z,draft,110000,supplier_0531 +pr_003232,acct_0176,user_001032,2026-02-12T11:00:00Z,submitted,10000,supplier_0532 +pr_003233,acct_0177,user_001033,2026-02-13T11:00:00Z,approved,11000,supplier_0533 +pr_003234,acct_0178,user_001034,2026-02-14T11:00:00Z,rejected,12000,supplier_0534 +pr_003235,acct_0179,user_001035,2026-02-15T11:00:00Z,cancelled,13000,supplier_0535 +pr_003236,acct_0180,user_001036,2026-02-16T11:00:00Z,draft,14000,supplier_0536 +pr_003237,acct_0181,user_001037,2026-02-17T11:00:00Z,submitted,15000,supplier_0537 +pr_003238,acct_0182,user_001038,2026-02-18T11:00:00Z,approved,16000,supplier_0538 +pr_003239,acct_0183,user_001039,2026-02-19T11:00:00Z,rejected,17000,supplier_0539 +pr_003240,acct_0184,user_001040,2026-02-20T11:00:00Z,cancelled,18000,supplier_0540 +pr_003241,acct_0185,user_001041,2026-02-21T11:00:00Z,draft,19000,supplier_0541 +pr_003242,acct_0186,user_001042,2026-02-22T11:00:00Z,submitted,20000,supplier_0542 +pr_003243,acct_0187,user_001043,2026-02-23T11:00:00Z,approved,21000,supplier_0543 +pr_003244,acct_0188,user_001044,2026-02-24T11:00:00Z,rejected,22000,supplier_0544 +pr_003245,acct_0189,user_001045,2026-02-25T11:00:00Z,cancelled,23000,supplier_0545 +pr_003246,acct_0190,user_001046,2026-02-26T11:00:00Z,draft,24000,supplier_0546 +pr_003247,acct_0001,user_001047,2026-02-27T11:00:00Z,submitted,25000,supplier_0547 +pr_003248,acct_0002,user_001048,2026-02-28T11:00:00Z,approved,26000,supplier_0548 +pr_003249,acct_0003,user_001049,2026-02-01T11:00:00Z,rejected,27000,supplier_0549 +pr_003250,acct_0004,user_001050,2026-02-02T11:00:00Z,cancelled,28000,supplier_0550 +pr_003251,acct_0005,user_001051,2026-02-03T11:00:00Z,draft,29000,supplier_0551 +pr_003252,acct_0006,user_001052,2026-02-04T11:00:00Z,submitted,30000,supplier_0552 +pr_003253,acct_0007,user_001053,2026-02-05T11:00:00Z,approved,31000,supplier_0553 +pr_003254,acct_0008,user_001054,2026-02-06T11:00:00Z,rejected,32000,supplier_0554 +pr_003255,acct_0009,user_001055,2026-02-07T11:00:00Z,cancelled,33000,supplier_0555 +pr_003256,acct_0010,user_001056,2026-02-08T11:00:00Z,draft,34000,supplier_0556 +pr_003257,acct_0011,user_001057,2026-02-09T11:00:00Z,submitted,35000,supplier_0557 +pr_003258,acct_0012,user_001058,2026-02-10T11:00:00Z,approved,36000,supplier_0558 +pr_003259,acct_0013,user_001059,2026-02-11T11:00:00Z,rejected,37000,supplier_0559 +pr_003260,acct_0014,user_001060,2026-02-12T11:00:00Z,cancelled,38000,supplier_0560 +pr_003261,acct_0015,user_001061,2026-02-13T11:00:00Z,draft,39000,supplier_0561 +pr_003262,acct_0016,user_001062,2026-02-14T11:00:00Z,submitted,40000,supplier_0562 +pr_003263,acct_0017,user_001063,2026-02-15T11:00:00Z,approved,41000,supplier_0563 +pr_003264,acct_0018,user_001064,2026-02-16T11:00:00Z,rejected,42000,supplier_0564 +pr_003265,acct_0019,user_001065,2026-02-17T11:00:00Z,cancelled,43000,supplier_0565 +pr_003266,acct_0020,user_001066,2026-02-18T11:00:00Z,draft,44000,supplier_0566 +pr_003267,acct_0021,user_001067,2026-02-19T11:00:00Z,submitted,45000,supplier_0567 +pr_003268,acct_0022,user_001068,2026-02-20T11:00:00Z,approved,46000,supplier_0568 +pr_003269,acct_0023,user_001069,2026-02-21T11:00:00Z,rejected,47000,supplier_0569 +pr_003270,acct_0024,user_001070,2026-02-22T11:00:00Z,cancelled,48000,supplier_0570 +pr_003271,acct_0025,user_001071,2026-02-23T11:00:00Z,draft,49000,supplier_0571 +pr_003272,acct_0026,user_001072,2026-02-24T11:00:00Z,submitted,50000,supplier_0572 +pr_003273,acct_0027,user_001073,2026-02-25T11:00:00Z,approved,51000,supplier_0573 +pr_003274,acct_0028,user_001074,2026-02-26T11:00:00Z,rejected,52000,supplier_0574 +pr_003275,acct_0029,user_001075,2026-02-27T11:00:00Z,cancelled,53000,supplier_0575 +pr_003276,acct_0030,user_001076,2026-02-28T11:00:00Z,draft,54000,supplier_0576 +pr_003277,acct_0031,user_001077,2026-02-01T11:00:00Z,submitted,55000,supplier_0577 +pr_003278,acct_0032,user_001078,2026-02-02T11:00:00Z,approved,56000,supplier_0578 +pr_003279,acct_0033,user_001079,2026-02-03T11:00:00Z,rejected,57000,supplier_0579 +pr_003280,acct_0034,user_001080,2026-02-04T11:00:00Z,cancelled,58000,supplier_0580 +pr_003281,acct_0035,user_001081,2026-02-05T11:00:00Z,draft,59000,supplier_0581 +pr_003282,acct_0036,user_001082,2026-02-06T11:00:00Z,submitted,60000,supplier_0582 +pr_003283,acct_0037,user_001083,2026-02-07T11:00:00Z,approved,61000,supplier_0583 +pr_003284,acct_0038,user_001084,2026-02-08T11:00:00Z,rejected,62000,supplier_0584 +pr_003285,acct_0039,user_001085,2026-02-09T11:00:00Z,cancelled,63000,supplier_0585 +pr_003286,acct_0040,user_001086,2026-02-10T11:00:00Z,draft,64000,supplier_0586 +pr_003287,acct_0041,user_001087,2026-02-11T11:00:00Z,submitted,65000,supplier_0587 +pr_003288,acct_0042,user_001088,2026-02-12T11:00:00Z,approved,66000,supplier_0588 +pr_003289,acct_0043,user_001089,2026-02-13T11:00:00Z,rejected,67000,supplier_0589 +pr_003290,acct_0044,user_001090,2026-02-14T11:00:00Z,cancelled,68000,supplier_0590 +pr_003291,acct_0045,user_001091,2026-02-15T11:00:00Z,draft,69000,supplier_0591 +pr_003292,acct_0046,user_001092,2026-02-16T11:00:00Z,submitted,70000,supplier_0592 +pr_003293,acct_0047,user_001093,2026-02-17T11:00:00Z,approved,71000,supplier_0593 +pr_003294,acct_0048,user_001094,2026-02-18T11:00:00Z,rejected,72000,supplier_0594 +pr_003295,acct_0049,user_001095,2026-02-19T11:00:00Z,cancelled,73000,supplier_0595 +pr_003296,acct_0050,user_001096,2026-02-20T11:00:00Z,draft,74000,supplier_0596 +pr_003297,acct_0051,user_001097,2026-02-21T11:00:00Z,submitted,75000,supplier_0597 +pr_003298,acct_0052,user_001098,2026-02-22T11:00:00Z,approved,76000,supplier_0598 +pr_003299,acct_0053,user_001099,2026-02-23T11:00:00Z,rejected,77000,supplier_0599 +pr_003300,acct_0054,user_001100,2026-02-24T11:00:00Z,cancelled,78000,supplier_0600 +pr_003301,acct_0010,user_000001,2026-02-25T11:00:00Z,draft,79000,supplier_0601 +pr_003302,acct_0011,user_000002,2026-02-26T11:00:00Z,submitted,80000,supplier_0602 +pr_003303,acct_0012,user_000003,2026-02-27T11:00:00Z,approved,81000,supplier_0603 +pr_003304,acct_0013,user_000004,2026-02-28T11:00:00Z,rejected,82000,supplier_0604 +pr_003305,acct_0014,user_000005,2026-02-01T11:00:00Z,cancelled,83000,supplier_0605 +pr_003306,acct_0015,user_000006,2026-02-02T11:00:00Z,draft,84000,supplier_0606 +pr_003307,acct_0016,user_000007,2026-02-03T11:00:00Z,submitted,85000,supplier_0607 +pr_003308,acct_0017,user_000008,2026-02-04T11:00:00Z,approved,86000,supplier_0608 +pr_003309,acct_0018,user_000009,2026-02-05T11:00:00Z,rejected,87000,supplier_0609 +pr_003310,acct_0019,user_000010,2026-02-06T11:00:00Z,cancelled,88000,supplier_0610 +pr_003311,acct_0020,user_000011,2026-02-07T11:00:00Z,draft,89000,supplier_0611 +pr_003312,acct_0021,user_000012,2026-02-08T11:00:00Z,submitted,90000,supplier_0612 +pr_003313,acct_0022,user_000013,2026-02-09T11:00:00Z,approved,91000,supplier_0613 +pr_003314,acct_0023,user_000014,2026-02-10T11:00:00Z,rejected,92000,supplier_0614 +pr_003315,acct_0024,user_000015,2026-02-11T11:00:00Z,cancelled,93000,supplier_0615 +pr_003316,acct_0025,user_000016,2026-02-12T11:00:00Z,draft,94000,supplier_0616 +pr_003317,acct_0026,user_000017,2026-02-13T11:00:00Z,submitted,95000,supplier_0617 +pr_003318,acct_0027,user_000018,2026-02-14T11:00:00Z,approved,96000,supplier_0618 +pr_003319,acct_0028,user_000019,2026-02-15T11:00:00Z,rejected,97000,supplier_0619 +pr_003320,acct_0029,user_000020,2026-02-16T11:00:00Z,cancelled,98000,supplier_0620 +pr_003321,acct_0030,user_000021,2026-02-17T11:00:00Z,draft,99000,supplier_0621 +pr_003322,acct_0031,user_000022,2026-02-18T11:00:00Z,submitted,100000,supplier_0622 +pr_003323,acct_0032,user_000023,2026-02-19T11:00:00Z,approved,101000,supplier_0623 +pr_003324,acct_0033,user_000024,2026-02-20T11:00:00Z,rejected,102000,supplier_0624 +pr_003325,acct_0034,user_000025,2026-02-21T11:00:00Z,cancelled,103000,supplier_0625 +pr_003326,acct_0035,user_000026,2026-02-22T11:00:00Z,draft,104000,supplier_0626 +pr_003327,acct_0036,user_000027,2026-02-23T11:00:00Z,submitted,105000,supplier_0627 +pr_003328,acct_0037,user_000028,2026-02-24T11:00:00Z,approved,106000,supplier_0628 +pr_003329,acct_0038,user_000029,2026-02-25T11:00:00Z,rejected,107000,supplier_0629 +pr_003330,acct_0039,user_000030,2026-02-26T11:00:00Z,cancelled,108000,supplier_0630 +pr_003331,acct_0040,user_000031,2026-02-27T11:00:00Z,draft,109000,supplier_0631 +pr_003332,acct_0041,user_000032,2026-02-28T11:00:00Z,submitted,110000,supplier_0632 +pr_003333,acct_0042,user_000033,2026-02-01T11:00:00Z,approved,10000,supplier_0633 +pr_003334,acct_0043,user_000034,2026-02-02T11:00:00Z,rejected,11000,supplier_0634 +pr_003335,acct_0044,user_000035,2026-02-03T11:00:00Z,cancelled,12000,supplier_0635 +pr_003336,acct_0045,user_000036,2026-02-04T11:00:00Z,draft,13000,supplier_0636 +pr_003337,acct_0046,user_000037,2026-02-05T11:00:00Z,submitted,14000,supplier_0637 +pr_003338,acct_0047,user_000038,2026-02-06T11:00:00Z,approved,15000,supplier_0638 +pr_003339,acct_0048,user_000039,2026-02-07T11:00:00Z,rejected,16000,supplier_0639 +pr_003340,acct_0049,user_000040,2026-02-08T11:00:00Z,cancelled,17000,supplier_0640 +pr_003341,acct_0050,user_000041,2026-02-09T11:00:00Z,draft,18000,supplier_0641 +pr_003342,acct_0010,user_000042,2026-02-10T11:00:00Z,submitted,19000,supplier_0642 +pr_003343,acct_0011,user_000043,2026-02-11T11:00:00Z,approved,20000,supplier_0643 +pr_003344,acct_0012,user_000044,2026-02-12T11:00:00Z,rejected,21000,supplier_0644 +pr_003345,acct_0013,user_000045,2026-02-13T11:00:00Z,cancelled,22000,supplier_0645 +pr_003346,acct_0014,user_000046,2026-02-14T11:00:00Z,draft,23000,supplier_0646 +pr_003347,acct_0015,user_000047,2026-02-15T11:00:00Z,submitted,24000,supplier_0647 +pr_003348,acct_0016,user_000048,2026-02-16T11:00:00Z,approved,25000,supplier_0648 +pr_003349,acct_0017,user_000049,2026-02-17T11:00:00Z,rejected,26000,supplier_0649 +pr_003350,acct_0018,user_000050,2026-02-18T11:00:00Z,cancelled,27000,supplier_0650 +pr_003351,acct_0019,user_000051,2026-02-19T11:00:00Z,draft,28000,supplier_0651 +pr_003352,acct_0020,user_000052,2026-02-20T11:00:00Z,submitted,29000,supplier_0652 +pr_003353,acct_0021,user_000053,2026-02-21T11:00:00Z,approved,30000,supplier_0653 +pr_003354,acct_0022,user_000054,2026-02-22T11:00:00Z,rejected,31000,supplier_0654 +pr_003355,acct_0023,user_000055,2026-02-23T11:00:00Z,cancelled,32000,supplier_0655 +pr_003356,acct_0024,user_000056,2026-02-24T11:00:00Z,draft,33000,supplier_0656 +pr_003357,acct_0025,user_000057,2026-02-25T11:00:00Z,submitted,34000,supplier_0657 +pr_003358,acct_0026,user_000058,2026-02-26T11:00:00Z,approved,35000,supplier_0658 +pr_003359,acct_0027,user_000059,2026-02-27T11:00:00Z,rejected,36000,supplier_0659 +pr_003360,acct_0028,user_000060,2026-02-28T11:00:00Z,cancelled,37000,supplier_0660 +pr_003361,acct_0029,user_000061,2026-02-01T11:00:00Z,draft,38000,supplier_0661 +pr_003362,acct_0030,user_000062,2026-02-02T11:00:00Z,submitted,39000,supplier_0662 +pr_003363,acct_0031,user_000063,2026-02-03T11:00:00Z,approved,40000,supplier_0663 +pr_003364,acct_0032,user_000064,2026-02-04T11:00:00Z,rejected,41000,supplier_0664 +pr_003365,acct_0033,user_000065,2026-02-05T11:00:00Z,cancelled,42000,supplier_0665 +pr_003366,acct_0034,user_000066,2026-02-06T11:00:00Z,draft,43000,supplier_0666 +pr_003367,acct_0035,user_000067,2026-02-07T11:00:00Z,submitted,44000,supplier_0667 +pr_003368,acct_0036,user_000068,2026-02-08T11:00:00Z,approved,45000,supplier_0668 +pr_003369,acct_0037,user_000069,2026-02-09T11:00:00Z,rejected,46000,supplier_0669 +pr_003370,acct_0038,user_000070,2026-02-10T11:00:00Z,cancelled,47000,supplier_0670 +pr_003371,acct_0039,user_000071,2026-02-11T11:00:00Z,draft,48000,supplier_0671 +pr_003372,acct_0040,user_000072,2026-02-12T11:00:00Z,submitted,49000,supplier_0672 +pr_003373,acct_0041,user_000073,2026-02-13T11:00:00Z,approved,50000,supplier_0673 +pr_003374,acct_0042,user_000074,2026-02-14T11:00:00Z,rejected,51000,supplier_0674 +pr_003375,acct_0043,user_000075,2026-02-15T11:00:00Z,cancelled,52000,supplier_0675 +pr_003376,acct_0044,user_000076,2026-02-16T11:00:00Z,draft,53000,supplier_0676 +pr_003377,acct_0045,user_000077,2026-02-17T11:00:00Z,submitted,54000,supplier_0677 +pr_003378,acct_0046,user_000078,2026-02-18T11:00:00Z,approved,55000,supplier_0678 +pr_003379,acct_0047,user_000079,2026-02-19T11:00:00Z,rejected,56000,supplier_0679 +pr_003380,acct_0048,user_000080,2026-02-20T11:00:00Z,cancelled,57000,supplier_0680 +pr_003381,acct_0049,user_000081,2026-02-21T11:00:00Z,draft,58000,supplier_0681 +pr_003382,acct_0050,user_000082,2026-02-22T11:00:00Z,submitted,59000,supplier_0682 +pr_003383,acct_0010,user_000083,2026-02-23T11:00:00Z,approved,60000,supplier_0683 +pr_003384,acct_0011,user_000084,2026-02-24T11:00:00Z,rejected,61000,supplier_0684 +pr_003385,acct_0012,user_000085,2026-02-25T11:00:00Z,cancelled,62000,supplier_0685 +pr_003386,acct_0013,user_000086,2026-02-26T11:00:00Z,draft,63000,supplier_0686 +pr_003387,acct_0014,user_000087,2026-02-27T11:00:00Z,submitted,64000,supplier_0687 +pr_003388,acct_0015,user_000088,2026-02-28T11:00:00Z,approved,65000,supplier_0688 +pr_003389,acct_0016,user_000089,2026-02-01T11:00:00Z,rejected,66000,supplier_0689 +pr_003390,acct_0017,user_000090,2026-02-02T11:00:00Z,cancelled,67000,supplier_0690 +pr_003391,acct_0018,user_000091,2026-02-03T11:00:00Z,draft,68000,supplier_0691 +pr_003392,acct_0019,user_000092,2026-02-04T11:00:00Z,submitted,69000,supplier_0692 +pr_003393,acct_0020,user_000093,2026-02-05T11:00:00Z,approved,70000,supplier_0693 +pr_003394,acct_0021,user_000094,2026-02-06T11:00:00Z,rejected,71000,supplier_0694 +pr_003395,acct_0022,user_000095,2026-02-07T11:00:00Z,cancelled,72000,supplier_0695 +pr_003396,acct_0023,user_000096,2026-02-08T11:00:00Z,draft,73000,supplier_0696 +pr_003397,acct_0024,user_000097,2026-02-09T11:00:00Z,submitted,74000,supplier_0697 +pr_003398,acct_0025,user_000098,2026-02-10T11:00:00Z,approved,75000,supplier_0698 +pr_003399,acct_0026,user_000099,2026-02-11T11:00:00Z,rejected,76000,supplier_0699 +pr_003400,acct_0027,user_000100,2026-02-12T11:00:00Z,cancelled,77000,supplier_0700 +pr_003401,acct_0028,user_000101,2026-02-13T11:00:00Z,draft,78000,supplier_0701 +pr_003402,acct_0029,user_000102,2026-02-14T11:00:00Z,submitted,79000,supplier_0702 +pr_003403,acct_0030,user_000103,2026-02-15T11:00:00Z,approved,80000,supplier_0703 +pr_003404,acct_0031,user_000104,2026-02-16T11:00:00Z,rejected,81000,supplier_0704 +pr_003405,acct_0032,user_000105,2026-02-17T11:00:00Z,cancelled,82000,supplier_0705 +pr_003406,acct_0033,user_000106,2026-02-18T11:00:00Z,draft,83000,supplier_0706 +pr_003407,acct_0034,user_000107,2026-02-19T11:00:00Z,submitted,84000,supplier_0707 +pr_003408,acct_0035,user_000108,2026-02-20T11:00:00Z,approved,85000,supplier_0708 +pr_003409,acct_0036,user_000109,2026-02-21T11:00:00Z,rejected,86000,supplier_0709 +pr_003410,acct_0037,user_000110,2026-02-22T11:00:00Z,cancelled,87000,supplier_0710 +pr_003411,acct_0038,user_000111,2026-02-23T11:00:00Z,draft,88000,supplier_0711 +pr_003412,acct_0039,user_000112,2026-02-24T11:00:00Z,submitted,89000,supplier_0712 +pr_003413,acct_0040,user_000113,2026-02-25T11:00:00Z,approved,90000,supplier_0713 +pr_003414,acct_0041,user_000114,2026-02-26T11:00:00Z,rejected,91000,supplier_0714 +pr_003415,acct_0042,user_000115,2026-02-27T11:00:00Z,cancelled,92000,supplier_0715 +pr_003416,acct_0043,user_000116,2026-02-28T11:00:00Z,draft,93000,supplier_0716 +pr_003417,acct_0044,user_000117,2026-02-01T11:00:00Z,submitted,94000,supplier_0717 +pr_003418,acct_0045,user_000118,2026-02-02T11:00:00Z,approved,95000,supplier_0718 +pr_003419,acct_0046,user_000119,2026-02-03T11:00:00Z,rejected,96000,supplier_0719 +pr_003420,acct_0047,user_000120,2026-02-04T11:00:00Z,cancelled,97000,supplier_0720 +pr_003421,acct_0048,user_000121,2026-02-05T11:00:00Z,draft,98000,supplier_0721 +pr_003422,acct_0049,user_000122,2026-02-06T11:00:00Z,submitted,99000,supplier_0722 +pr_003423,acct_0050,user_000123,2026-02-07T11:00:00Z,approved,100000,supplier_0723 +pr_003424,acct_0010,user_000124,2026-02-08T11:00:00Z,rejected,101000,supplier_0724 +pr_003425,acct_0011,user_000125,2026-02-09T11:00:00Z,cancelled,102000,supplier_0725 +pr_003426,acct_0012,user_000126,2026-02-10T11:00:00Z,draft,103000,supplier_0726 +pr_003427,acct_0013,user_000127,2026-02-11T11:00:00Z,submitted,104000,supplier_0727 +pr_003428,acct_0014,user_000128,2026-02-12T11:00:00Z,approved,105000,supplier_0728 +pr_003429,acct_0015,user_000129,2026-02-13T11:00:00Z,rejected,106000,supplier_0729 +pr_003430,acct_0016,user_000130,2026-02-14T11:00:00Z,cancelled,107000,supplier_0730 +pr_003431,acct_0017,user_000131,2026-02-15T11:00:00Z,draft,108000,supplier_0731 +pr_003432,acct_0018,user_000132,2026-02-16T11:00:00Z,submitted,109000,supplier_0732 +pr_003433,acct_0019,user_000133,2026-02-17T11:00:00Z,approved,110000,supplier_0733 +pr_003434,acct_0020,user_000134,2026-02-18T11:00:00Z,rejected,10000,supplier_0734 +pr_003435,acct_0021,user_000135,2026-02-19T11:00:00Z,cancelled,11000,supplier_0735 +pr_003436,acct_0022,user_000136,2026-02-20T11:00:00Z,draft,12000,supplier_0736 +pr_003437,acct_0023,user_000137,2026-02-21T11:00:00Z,submitted,13000,supplier_0737 +pr_003438,acct_0024,user_000138,2026-02-22T11:00:00Z,approved,14000,supplier_0738 +pr_003439,acct_0025,user_000139,2026-02-23T11:00:00Z,rejected,15000,supplier_0739 +pr_003440,acct_0026,user_000140,2026-02-24T11:00:00Z,cancelled,16000,supplier_0740 +pr_003441,acct_0027,user_000141,2026-02-25T11:00:00Z,draft,17000,supplier_0741 +pr_003442,acct_0028,user_000142,2026-02-26T11:00:00Z,submitted,18000,supplier_0742 +pr_003443,acct_0029,user_000143,2026-02-27T11:00:00Z,approved,19000,supplier_0743 +pr_003444,acct_0030,user_000144,2026-02-28T11:00:00Z,rejected,20000,supplier_0744 +pr_003445,acct_0031,user_000145,2026-02-01T11:00:00Z,cancelled,21000,supplier_0745 +pr_003446,acct_0032,user_000146,2026-02-02T11:00:00Z,draft,22000,supplier_0746 +pr_003447,acct_0033,user_000147,2026-02-03T11:00:00Z,submitted,23000,supplier_0747 +pr_003448,acct_0034,user_000148,2026-02-04T11:00:00Z,approved,24000,supplier_0748 +pr_003449,acct_0035,user_000149,2026-02-05T11:00:00Z,rejected,25000,supplier_0749 +pr_003450,acct_0036,user_000150,2026-02-06T11:00:00Z,cancelled,26000,supplier_0750 +pr_003451,acct_0037,user_000151,2026-02-07T11:00:00Z,draft,27000,supplier_0751 +pr_003452,acct_0038,user_000152,2026-02-08T11:00:00Z,submitted,28000,supplier_0752 +pr_003453,acct_0039,user_000153,2026-02-09T11:00:00Z,approved,29000,supplier_0753 +pr_003454,acct_0040,user_000154,2026-02-10T11:00:00Z,rejected,30000,supplier_0754 +pr_003455,acct_0041,user_000155,2026-02-11T11:00:00Z,cancelled,31000,supplier_0755 +pr_003456,acct_0042,user_000156,2026-02-12T11:00:00Z,draft,32000,supplier_0756 +pr_003457,acct_0043,user_000157,2026-02-13T11:00:00Z,submitted,33000,supplier_0757 +pr_003458,acct_0044,user_000158,2026-02-14T11:00:00Z,approved,34000,supplier_0758 +pr_003459,acct_0045,user_000159,2026-02-15T11:00:00Z,rejected,35000,supplier_0759 +pr_003460,acct_0046,user_000160,2026-02-16T11:00:00Z,cancelled,36000,supplier_0760 +pr_003461,acct_0047,user_000161,2026-02-17T11:00:00Z,draft,37000,supplier_0761 +pr_003462,acct_0048,user_000162,2026-02-18T11:00:00Z,submitted,38000,supplier_0762 +pr_003463,acct_0049,user_000163,2026-02-19T11:00:00Z,approved,39000,supplier_0763 +pr_003464,acct_0050,user_000164,2026-02-20T11:00:00Z,rejected,40000,supplier_0764 +pr_003465,acct_0010,user_000165,2026-02-21T11:00:00Z,cancelled,41000,supplier_0765 +pr_003466,acct_0011,user_000166,2026-02-22T11:00:00Z,draft,42000,supplier_0766 +pr_003467,acct_0012,user_000167,2026-02-23T11:00:00Z,submitted,43000,supplier_0767 +pr_003468,acct_0013,user_000168,2026-02-24T11:00:00Z,approved,44000,supplier_0768 +pr_003469,acct_0014,user_000169,2026-02-25T11:00:00Z,rejected,45000,supplier_0769 +pr_003470,acct_0015,user_000170,2026-02-26T11:00:00Z,cancelled,46000,supplier_0770 +pr_003471,acct_0016,user_000171,2026-02-27T11:00:00Z,draft,47000,supplier_0771 +pr_003472,acct_0017,user_000172,2026-02-28T11:00:00Z,submitted,48000,supplier_0772 +pr_003473,acct_0018,user_000173,2026-02-01T11:00:00Z,approved,49000,supplier_0773 +pr_003474,acct_0019,user_000174,2026-02-02T11:00:00Z,rejected,50000,supplier_0774 +pr_003475,acct_0020,user_000175,2026-02-03T11:00:00Z,cancelled,51000,supplier_0775 +pr_003476,acct_0021,user_000176,2026-02-04T11:00:00Z,draft,52000,supplier_0776 +pr_003477,acct_0022,user_000177,2026-02-05T11:00:00Z,submitted,53000,supplier_0777 +pr_003478,acct_0023,user_000178,2026-02-06T11:00:00Z,approved,54000,supplier_0778 +pr_003479,acct_0024,user_000179,2026-02-07T11:00:00Z,rejected,55000,supplier_0779 +pr_003480,acct_0025,user_000180,2026-02-08T11:00:00Z,cancelled,56000,supplier_0780 +pr_003481,acct_0026,user_000181,2026-02-09T11:00:00Z,draft,57000,supplier_0781 +pr_003482,acct_0027,user_000182,2026-02-10T11:00:00Z,submitted,58000,supplier_0782 +pr_003483,acct_0028,user_000183,2026-02-11T11:00:00Z,approved,59000,supplier_0783 +pr_003484,acct_0029,user_000184,2026-02-12T11:00:00Z,rejected,60000,supplier_0784 +pr_003485,acct_0030,user_000185,2026-02-13T11:00:00Z,cancelled,61000,supplier_0785 +pr_003486,acct_0031,user_000186,2026-02-14T11:00:00Z,draft,62000,supplier_0786 +pr_003487,acct_0032,user_000187,2026-02-15T11:00:00Z,submitted,63000,supplier_0787 +pr_003488,acct_0033,user_000188,2026-02-16T11:00:00Z,approved,64000,supplier_0788 +pr_003489,acct_0034,user_000189,2026-02-17T11:00:00Z,rejected,65000,supplier_0789 +pr_003490,acct_0035,user_000190,2026-02-18T11:00:00Z,cancelled,66000,supplier_0790 +pr_003491,acct_0036,user_000191,2026-02-19T11:00:00Z,draft,67000,supplier_0791 +pr_003492,acct_0037,user_000192,2026-02-20T11:00:00Z,submitted,68000,supplier_0792 +pr_003493,acct_0038,user_000193,2026-02-21T11:00:00Z,approved,69000,supplier_0793 +pr_003494,acct_0039,user_000194,2026-02-22T11:00:00Z,rejected,70000,supplier_0794 +pr_003495,acct_0040,user_000195,2026-02-23T11:00:00Z,cancelled,71000,supplier_0795 +pr_003496,acct_0041,user_000196,2026-02-24T11:00:00Z,draft,72000,supplier_0796 +pr_003497,acct_0042,user_000197,2026-02-25T11:00:00Z,submitted,73000,supplier_0797 +pr_003498,acct_0043,user_000198,2026-02-26T11:00:00Z,approved,74000,supplier_0798 +pr_003499,acct_0044,user_000199,2026-02-27T11:00:00Z,rejected,75000,supplier_0799 +pr_003500,acct_0045,user_000200,2026-02-28T11:00:00Z,cancelled,76000,supplier_0800 +pr_003501,acct_0046,user_000201,2026-02-01T11:00:00Z,draft,77000,supplier_0801 +pr_003502,acct_0047,user_000202,2026-02-02T11:00:00Z,submitted,78000,supplier_0802 +pr_003503,acct_0048,user_000203,2026-02-03T11:00:00Z,approved,79000,supplier_0803 +pr_003504,acct_0049,user_000204,2026-02-04T11:00:00Z,rejected,80000,supplier_0804 +pr_003505,acct_0050,user_000205,2026-02-05T11:00:00Z,cancelled,81000,supplier_0805 +pr_003506,acct_0010,user_000206,2026-02-06T11:00:00Z,draft,82000,supplier_0806 +pr_003507,acct_0011,user_000207,2026-02-07T11:00:00Z,submitted,83000,supplier_0807 +pr_003508,acct_0012,user_000208,2026-02-08T11:00:00Z,approved,84000,supplier_0808 +pr_003509,acct_0013,user_000209,2026-02-09T11:00:00Z,rejected,85000,supplier_0809 +pr_003510,acct_0014,user_000210,2026-02-10T11:00:00Z,cancelled,86000,supplier_0810 +pr_003511,acct_0015,user_000211,2026-02-11T11:00:00Z,draft,87000,supplier_0811 +pr_003512,acct_0016,user_000212,2026-02-12T11:00:00Z,submitted,88000,supplier_0812 +pr_003513,acct_0017,user_000213,2026-02-13T11:00:00Z,approved,89000,supplier_0813 +pr_003514,acct_0018,user_000214,2026-02-14T11:00:00Z,rejected,90000,supplier_0814 +pr_003515,acct_0019,user_000215,2026-02-15T11:00:00Z,cancelled,91000,supplier_0815 +pr_003516,acct_0020,user_000216,2026-02-16T11:00:00Z,draft,92000,supplier_0816 +pr_003517,acct_0021,user_000217,2026-02-17T11:00:00Z,submitted,93000,supplier_0817 +pr_003518,acct_0022,user_000218,2026-02-18T11:00:00Z,approved,94000,supplier_0818 +pr_003519,acct_0023,user_000219,2026-02-19T11:00:00Z,rejected,95000,supplier_0819 +pr_003520,acct_0024,user_000220,2026-02-20T11:00:00Z,cancelled,96000,supplier_0820 +pr_003521,acct_0025,user_000221,2026-02-21T11:00:00Z,draft,97000,supplier_0821 +pr_003522,acct_0026,user_000222,2026-02-22T11:00:00Z,submitted,98000,supplier_0822 +pr_003523,acct_0027,user_000223,2026-02-23T11:00:00Z,approved,99000,supplier_0823 +pr_003524,acct_0028,user_000224,2026-02-24T11:00:00Z,rejected,100000,supplier_0824 +pr_003525,acct_0029,user_000225,2026-02-25T11:00:00Z,cancelled,101000,supplier_0825 +pr_003526,acct_0030,user_000226,2026-02-26T11:00:00Z,draft,102000,supplier_0826 +pr_003527,acct_0031,user_000227,2026-02-27T11:00:00Z,submitted,103000,supplier_0827 +pr_003528,acct_0032,user_000228,2026-02-28T11:00:00Z,approved,104000,supplier_0828 +pr_003529,acct_0033,user_000229,2026-02-01T11:00:00Z,rejected,105000,supplier_0829 +pr_003530,acct_0034,user_000230,2026-02-02T11:00:00Z,cancelled,106000,supplier_0830 +pr_003531,acct_0035,user_000231,2026-02-03T11:00:00Z,draft,107000,supplier_0831 +pr_003532,acct_0036,user_000232,2026-02-04T11:00:00Z,submitted,108000,supplier_0832 +pr_003533,acct_0037,user_000233,2026-02-05T11:00:00Z,approved,109000,supplier_0833 +pr_003534,acct_0038,user_000234,2026-02-06T11:00:00Z,rejected,110000,supplier_0834 +pr_003535,acct_0039,user_000235,2026-02-07T11:00:00Z,cancelled,10000,supplier_0835 +pr_003536,acct_0040,user_000236,2026-02-08T11:00:00Z,draft,11000,supplier_0836 +pr_003537,acct_0041,user_000237,2026-02-09T11:00:00Z,submitted,12000,supplier_0837 +pr_003538,acct_0042,user_000238,2026-02-10T11:00:00Z,approved,13000,supplier_0838 +pr_003539,acct_0043,user_000239,2026-02-11T11:00:00Z,rejected,14000,supplier_0839 +pr_003540,acct_0044,user_000240,2026-02-12T11:00:00Z,cancelled,15000,supplier_0840 +pr_003541,acct_0045,user_000241,2026-02-13T11:00:00Z,draft,16000,supplier_0841 +pr_003542,acct_0046,user_000242,2026-02-14T11:00:00Z,submitted,17000,supplier_0842 +pr_003543,acct_0047,user_000243,2026-02-15T11:00:00Z,approved,18000,supplier_0843 +pr_003544,acct_0048,user_000244,2026-02-16T11:00:00Z,rejected,19000,supplier_0844 +pr_003545,acct_0049,user_000245,2026-02-17T11:00:00Z,cancelled,20000,supplier_0845 +pr_003546,acct_0050,user_000246,2026-02-18T11:00:00Z,draft,21000,supplier_0846 +pr_003547,acct_0010,user_000247,2026-02-19T11:00:00Z,submitted,22000,supplier_0847 +pr_003548,acct_0011,user_000248,2026-02-20T11:00:00Z,approved,23000,supplier_0848 +pr_003549,acct_0012,user_000249,2026-02-21T11:00:00Z,rejected,24000,supplier_0849 +pr_003550,acct_0013,user_000250,2026-02-22T11:00:00Z,cancelled,25000,supplier_0850 +pr_003551,acct_0014,user_000251,2026-02-23T11:00:00Z,draft,26000,supplier_0851 +pr_003552,acct_0015,user_000252,2026-02-24T11:00:00Z,submitted,27000,supplier_0852 +pr_003553,acct_0016,user_000253,2026-02-25T11:00:00Z,approved,28000,supplier_0853 +pr_003554,acct_0017,user_000254,2026-02-26T11:00:00Z,rejected,29000,supplier_0854 +pr_003555,acct_0018,user_000255,2026-02-27T11:00:00Z,cancelled,30000,supplier_0855 +pr_003556,acct_0019,user_000256,2026-02-28T11:00:00Z,draft,31000,supplier_0856 +pr_003557,acct_0020,user_000257,2026-02-01T11:00:00Z,submitted,32000,supplier_0857 +pr_003558,acct_0021,user_000258,2026-02-02T11:00:00Z,approved,33000,supplier_0858 +pr_003559,acct_0022,user_000259,2026-02-03T11:00:00Z,rejected,34000,supplier_0859 +pr_003560,acct_0023,user_000260,2026-02-04T11:00:00Z,cancelled,35000,supplier_0860 +pr_003561,acct_0024,user_000261,2026-02-05T11:00:00Z,draft,36000,supplier_0861 +pr_003562,acct_0025,user_000262,2026-02-06T11:00:00Z,submitted,37000,supplier_0862 +pr_003563,acct_0026,user_000263,2026-02-07T11:00:00Z,approved,38000,supplier_0863 +pr_003564,acct_0027,user_000264,2026-02-08T11:00:00Z,rejected,39000,supplier_0864 +pr_003565,acct_0028,user_000265,2026-02-09T11:00:00Z,cancelled,40000,supplier_0865 +pr_003566,acct_0029,user_000266,2026-02-10T11:00:00Z,draft,41000,supplier_0866 +pr_003567,acct_0030,user_000267,2026-02-11T11:00:00Z,submitted,42000,supplier_0867 +pr_003568,acct_0031,user_000268,2026-02-12T11:00:00Z,approved,43000,supplier_0868 +pr_003569,acct_0032,user_000269,2026-02-13T11:00:00Z,rejected,44000,supplier_0869 +pr_003570,acct_0033,user_000270,2026-02-14T11:00:00Z,cancelled,45000,supplier_0870 +pr_003571,acct_0034,user_000271,2026-02-15T11:00:00Z,draft,46000,supplier_0871 +pr_003572,acct_0035,user_000272,2026-02-16T11:00:00Z,submitted,47000,supplier_0872 +pr_003573,acct_0036,user_000273,2026-02-17T11:00:00Z,approved,48000,supplier_0873 +pr_003574,acct_0037,user_000274,2026-02-18T11:00:00Z,rejected,49000,supplier_0874 +pr_003575,acct_0038,user_000275,2026-02-19T11:00:00Z,cancelled,50000,supplier_0875 +pr_003576,acct_0039,user_000276,2026-02-20T11:00:00Z,draft,51000,supplier_0876 +pr_003577,acct_0040,user_000277,2026-02-21T11:00:00Z,submitted,52000,supplier_0877 +pr_003578,acct_0041,user_000278,2026-02-22T11:00:00Z,approved,53000,supplier_0878 +pr_003579,acct_0042,user_000279,2026-02-23T11:00:00Z,rejected,54000,supplier_0879 +pr_003580,acct_0043,user_000280,2026-02-24T11:00:00Z,cancelled,55000,supplier_0880 +pr_003581,acct_0044,user_000281,2026-02-25T11:00:00Z,draft,56000,supplier_0881 +pr_003582,acct_0045,user_000282,2026-02-26T11:00:00Z,submitted,57000,supplier_0882 +pr_003583,acct_0046,user_000283,2026-02-27T11:00:00Z,approved,58000,supplier_0883 +pr_003584,acct_0047,user_000284,2026-02-28T11:00:00Z,rejected,59000,supplier_0884 +pr_003585,acct_0048,user_000285,2026-02-01T11:00:00Z,cancelled,60000,supplier_0885 +pr_003586,acct_0049,user_000286,2026-02-02T11:00:00Z,draft,61000,supplier_0886 +pr_003587,acct_0001,user_000287,2026-02-03T11:00:00Z,submitted,62000,supplier_0887 +pr_003588,acct_0002,user_000288,2026-02-04T11:00:00Z,approved,63000,supplier_0888 +pr_003589,acct_0003,user_000289,2026-02-05T11:00:00Z,rejected,64000,supplier_0889 +pr_003590,acct_0004,user_000290,2026-02-06T11:00:00Z,cancelled,65000,supplier_0890 +pr_003591,acct_0005,user_000291,2026-02-07T11:00:00Z,draft,66000,supplier_0891 +pr_003592,acct_0006,user_000292,2026-02-08T11:00:00Z,submitted,67000,supplier_0892 +pr_003593,acct_0007,user_000293,2026-02-09T11:00:00Z,approved,68000,supplier_0893 +pr_003594,acct_0008,user_000294,2026-02-10T11:00:00Z,rejected,69000,supplier_0894 +pr_003595,acct_0009,user_000295,2026-02-11T11:00:00Z,cancelled,70000,supplier_0895 +pr_003596,acct_0010,user_000296,2026-02-12T11:00:00Z,draft,71000,supplier_0896 +pr_003597,acct_0011,user_000297,2026-02-13T11:00:00Z,submitted,72000,supplier_0897 +pr_003598,acct_0012,user_000298,2026-02-14T11:00:00Z,approved,73000,supplier_0898 +pr_003599,acct_0013,user_000299,2026-02-15T11:00:00Z,rejected,74000,supplier_0899 +pr_003600,acct_0014,user_000300,2026-02-16T11:00:00Z,cancelled,75000,supplier_0900 +pr_003601,acct_0015,user_000301,2026-02-17T11:00:00Z,draft,76000,supplier_0001 +pr_003602,acct_0016,user_000302,2026-02-18T11:00:00Z,submitted,77000,supplier_0002 +pr_003603,acct_0017,user_000303,2026-02-19T11:00:00Z,approved,78000,supplier_0003 +pr_003604,acct_0018,user_000304,2026-02-20T11:00:00Z,rejected,79000,supplier_0004 +pr_003605,acct_0019,user_000305,2026-02-21T11:00:00Z,cancelled,80000,supplier_0005 +pr_003606,acct_0020,user_000306,2026-02-22T11:00:00Z,draft,81000,supplier_0006 +pr_003607,acct_0021,user_000307,2026-02-23T11:00:00Z,submitted,82000,supplier_0007 +pr_003608,acct_0022,user_000308,2026-02-24T11:00:00Z,approved,83000,supplier_0008 +pr_003609,acct_0023,user_000309,2026-02-25T11:00:00Z,rejected,84000,supplier_0009 +pr_003610,acct_0024,user_000310,2026-02-26T11:00:00Z,cancelled,85000,supplier_0010 +pr_003611,acct_0025,user_000311,2026-02-27T11:00:00Z,draft,86000,supplier_0011 +pr_003612,acct_0026,user_000312,2026-02-28T11:00:00Z,submitted,87000,supplier_0012 +pr_003613,acct_0027,user_000313,2026-02-01T11:00:00Z,approved,88000,supplier_0013 +pr_003614,acct_0028,user_000314,2026-02-02T11:00:00Z,rejected,89000,supplier_0014 +pr_003615,acct_0029,user_000315,2026-02-03T11:00:00Z,cancelled,90000,supplier_0015 +pr_003616,acct_0030,user_000316,2026-02-04T11:00:00Z,draft,91000,supplier_0016 +pr_003617,acct_0031,user_000317,2026-02-05T11:00:00Z,submitted,92000,supplier_0017 +pr_003618,acct_0032,user_000318,2026-02-06T11:00:00Z,approved,93000,supplier_0018 +pr_003619,acct_0033,user_000319,2026-02-07T11:00:00Z,rejected,94000,supplier_0019 +pr_003620,acct_0034,user_000320,2026-02-08T11:00:00Z,cancelled,95000,supplier_0020 +pr_003621,acct_0035,user_000321,2026-02-09T11:00:00Z,draft,96000,supplier_0021 +pr_003622,acct_0036,user_000322,2026-02-10T11:00:00Z,submitted,97000,supplier_0022 +pr_003623,acct_0037,user_000323,2026-02-11T11:00:00Z,approved,98000,supplier_0023 +pr_003624,acct_0038,user_000324,2026-02-12T11:00:00Z,rejected,99000,supplier_0024 +pr_003625,acct_0039,user_000325,2026-02-13T11:00:00Z,cancelled,100000,supplier_0025 +pr_003626,acct_0040,user_000326,2026-02-14T11:00:00Z,draft,101000,supplier_0026 +pr_003627,acct_0041,user_000327,2026-02-15T11:00:00Z,submitted,102000,supplier_0027 +pr_003628,acct_0042,user_000328,2026-02-16T11:00:00Z,approved,103000,supplier_0028 +pr_003629,acct_0043,user_000329,2026-02-17T11:00:00Z,rejected,104000,supplier_0029 +pr_003630,acct_0044,user_000330,2026-02-18T11:00:00Z,cancelled,105000,supplier_0030 +pr_003631,acct_0045,user_000331,2026-02-19T11:00:00Z,draft,106000,supplier_0031 +pr_003632,acct_0046,user_000332,2026-02-20T11:00:00Z,submitted,107000,supplier_0032 +pr_003633,acct_0047,user_000333,2026-02-21T11:00:00Z,approved,108000,supplier_0033 +pr_003634,acct_0048,user_000334,2026-02-22T11:00:00Z,rejected,109000,supplier_0034 +pr_003635,acct_0049,user_000335,2026-02-23T11:00:00Z,cancelled,110000,supplier_0035 +pr_003636,acct_0050,user_000336,2026-02-24T11:00:00Z,draft,10000,supplier_0036 +pr_003637,acct_0051,user_000337,2026-02-25T11:00:00Z,submitted,11000,supplier_0037 +pr_003638,acct_0052,user_000338,2026-02-26T11:00:00Z,approved,12000,supplier_0038 +pr_003639,acct_0053,user_000339,2026-02-27T11:00:00Z,rejected,13000,supplier_0039 +pr_003640,acct_0054,user_000340,2026-02-28T11:00:00Z,cancelled,14000,supplier_0040 +pr_003641,acct_0055,user_000341,2026-02-01T11:00:00Z,draft,15000,supplier_0041 +pr_003642,acct_0056,user_000342,2026-02-02T11:00:00Z,submitted,16000,supplier_0042 +pr_003643,acct_0057,user_000343,2026-02-03T11:00:00Z,approved,17000,supplier_0043 +pr_003644,acct_0058,user_000344,2026-02-04T11:00:00Z,rejected,18000,supplier_0044 +pr_003645,acct_0059,user_000345,2026-02-05T11:00:00Z,cancelled,19000,supplier_0045 +pr_003646,acct_0060,user_000346,2026-02-06T11:00:00Z,draft,20000,supplier_0046 +pr_003647,acct_0061,user_000347,2026-02-07T11:00:00Z,submitted,21000,supplier_0047 +pr_003648,acct_0062,user_000348,2026-02-08T11:00:00Z,approved,22000,supplier_0048 +pr_003649,acct_0063,user_000349,2026-02-09T11:00:00Z,rejected,23000,supplier_0049 +pr_003650,acct_0064,user_000350,2026-02-10T11:00:00Z,cancelled,24000,supplier_0050 +pr_003651,acct_0065,user_000351,2026-02-11T11:00:00Z,draft,25000,supplier_0051 +pr_003652,acct_0066,user_000352,2026-02-12T11:00:00Z,submitted,26000,supplier_0052 +pr_003653,acct_0067,user_000353,2026-02-13T11:00:00Z,approved,27000,supplier_0053 +pr_003654,acct_0068,user_000354,2026-02-14T11:00:00Z,rejected,28000,supplier_0054 +pr_003655,acct_0069,user_000355,2026-02-15T11:00:00Z,cancelled,29000,supplier_0055 +pr_003656,acct_0070,user_000356,2026-02-16T11:00:00Z,draft,30000,supplier_0056 +pr_003657,acct_0071,user_000357,2026-02-17T11:00:00Z,submitted,31000,supplier_0057 +pr_003658,acct_0072,user_000358,2026-02-18T11:00:00Z,approved,32000,supplier_0058 +pr_003659,acct_0073,user_000359,2026-02-19T11:00:00Z,rejected,33000,supplier_0059 +pr_003660,acct_0074,user_000360,2026-02-20T11:00:00Z,cancelled,34000,supplier_0060 +pr_003661,acct_0075,user_000361,2026-02-21T11:00:00Z,draft,35000,supplier_0061 +pr_003662,acct_0076,user_000362,2026-02-22T11:00:00Z,submitted,36000,supplier_0062 +pr_003663,acct_0077,user_000363,2026-02-23T11:00:00Z,approved,37000,supplier_0063 +pr_003664,acct_0078,user_000364,2026-02-24T11:00:00Z,rejected,38000,supplier_0064 +pr_003665,acct_0079,user_000365,2026-02-25T11:00:00Z,cancelled,39000,supplier_0065 +pr_003666,acct_0080,user_000366,2026-02-26T11:00:00Z,draft,40000,supplier_0066 +pr_003667,acct_0081,user_000367,2026-02-27T11:00:00Z,submitted,41000,supplier_0067 +pr_003668,acct_0082,user_000368,2026-02-28T11:00:00Z,approved,42000,supplier_0068 +pr_003669,acct_0083,user_000369,2026-02-01T11:00:00Z,rejected,43000,supplier_0069 +pr_003670,acct_0084,user_000370,2026-02-02T11:00:00Z,cancelled,44000,supplier_0070 +pr_003671,acct_0085,user_000371,2026-02-03T11:00:00Z,draft,45000,supplier_0071 +pr_003672,acct_0086,user_000372,2026-02-04T11:00:00Z,submitted,46000,supplier_0072 +pr_003673,acct_0087,user_000373,2026-02-05T11:00:00Z,approved,47000,supplier_0073 +pr_003674,acct_0088,user_000374,2026-02-06T11:00:00Z,rejected,48000,supplier_0074 +pr_003675,acct_0089,user_000375,2026-02-07T11:00:00Z,cancelled,49000,supplier_0075 +pr_003676,acct_0090,user_000376,2026-02-08T11:00:00Z,draft,50000,supplier_0076 +pr_003677,acct_0091,user_000377,2026-02-09T11:00:00Z,submitted,51000,supplier_0077 +pr_003678,acct_0092,user_000378,2026-02-10T11:00:00Z,approved,52000,supplier_0078 +pr_003679,acct_0093,user_000379,2026-02-11T11:00:00Z,rejected,53000,supplier_0079 +pr_003680,acct_0094,user_000380,2026-02-12T11:00:00Z,cancelled,54000,supplier_0080 +pr_003681,acct_0095,user_000381,2026-02-13T11:00:00Z,draft,55000,supplier_0081 +pr_003682,acct_0096,user_000382,2026-02-14T11:00:00Z,submitted,56000,supplier_0082 +pr_003683,acct_0097,user_000383,2026-02-15T11:00:00Z,approved,57000,supplier_0083 +pr_003684,acct_0098,user_000384,2026-02-16T11:00:00Z,rejected,58000,supplier_0084 +pr_003685,acct_0099,user_000385,2026-02-17T11:00:00Z,cancelled,59000,supplier_0085 +pr_003686,acct_0100,user_000386,2026-02-18T11:00:00Z,draft,60000,supplier_0086 +pr_003687,acct_0101,user_000387,2026-02-19T11:00:00Z,submitted,61000,supplier_0087 +pr_003688,acct_0102,user_000388,2026-02-20T11:00:00Z,approved,62000,supplier_0088 +pr_003689,acct_0103,user_000389,2026-02-21T11:00:00Z,rejected,63000,supplier_0089 +pr_003690,acct_0104,user_000390,2026-02-22T11:00:00Z,cancelled,64000,supplier_0090 +pr_003691,acct_0105,user_000391,2026-02-23T11:00:00Z,draft,65000,supplier_0091 +pr_003692,acct_0106,user_000392,2026-02-24T11:00:00Z,submitted,66000,supplier_0092 +pr_003693,acct_0107,user_000393,2026-02-25T11:00:00Z,approved,67000,supplier_0093 +pr_003694,acct_0108,user_000394,2026-02-26T11:00:00Z,rejected,68000,supplier_0094 +pr_003695,acct_0109,user_000395,2026-02-27T11:00:00Z,cancelled,69000,supplier_0095 +pr_003696,acct_0110,user_000396,2026-02-28T11:00:00Z,draft,70000,supplier_0096 +pr_003697,acct_0111,user_000397,2026-02-01T11:00:00Z,submitted,71000,supplier_0097 +pr_003698,acct_0112,user_000398,2026-02-02T11:00:00Z,approved,72000,supplier_0098 +pr_003699,acct_0113,user_000399,2026-02-03T11:00:00Z,rejected,73000,supplier_0099 +pr_003700,acct_0114,user_000400,2026-02-04T11:00:00Z,cancelled,74000,supplier_0100 +pr_003701,acct_0115,user_000401,2026-02-05T11:00:00Z,draft,75000,supplier_0101 +pr_003702,acct_0116,user_000402,2026-02-06T11:00:00Z,submitted,76000,supplier_0102 +pr_003703,acct_0117,user_000403,2026-02-07T11:00:00Z,approved,77000,supplier_0103 +pr_003704,acct_0118,user_000404,2026-02-08T11:00:00Z,rejected,78000,supplier_0104 +pr_003705,acct_0119,user_000405,2026-02-09T11:00:00Z,cancelled,79000,supplier_0105 +pr_003706,acct_0120,user_000406,2026-02-10T11:00:00Z,draft,80000,supplier_0106 +pr_003707,acct_0121,user_000407,2026-02-11T11:00:00Z,submitted,81000,supplier_0107 +pr_003708,acct_0122,user_000408,2026-02-12T11:00:00Z,approved,82000,supplier_0108 +pr_003709,acct_0123,user_000409,2026-02-13T11:00:00Z,rejected,83000,supplier_0109 +pr_003710,acct_0124,user_000410,2026-02-14T11:00:00Z,cancelled,84000,supplier_0110 +pr_003711,acct_0125,user_000411,2026-02-15T11:00:00Z,draft,85000,supplier_0111 +pr_003712,acct_0126,user_000412,2026-02-16T11:00:00Z,submitted,86000,supplier_0112 +pr_003713,acct_0127,user_000413,2026-02-17T11:00:00Z,approved,87000,supplier_0113 +pr_003714,acct_0128,user_000414,2026-02-18T11:00:00Z,rejected,88000,supplier_0114 +pr_003715,acct_0129,user_000415,2026-02-19T11:00:00Z,cancelled,89000,supplier_0115 +pr_003716,acct_0130,user_000416,2026-02-20T11:00:00Z,draft,90000,supplier_0116 +pr_003717,acct_0131,user_000417,2026-02-21T11:00:00Z,submitted,91000,supplier_0117 +pr_003718,acct_0132,user_000418,2026-02-22T11:00:00Z,approved,92000,supplier_0118 +pr_003719,acct_0133,user_000419,2026-02-23T11:00:00Z,rejected,93000,supplier_0119 +pr_003720,acct_0134,user_000420,2026-02-24T11:00:00Z,cancelled,94000,supplier_0120 +pr_003721,acct_0135,user_000421,2026-02-25T11:00:00Z,draft,95000,supplier_0121 +pr_003722,acct_0136,user_000422,2026-02-26T11:00:00Z,submitted,96000,supplier_0122 +pr_003723,acct_0137,user_000423,2026-02-27T11:00:00Z,approved,97000,supplier_0123 +pr_003724,acct_0138,user_000424,2026-02-28T11:00:00Z,rejected,98000,supplier_0124 +pr_003725,acct_0139,user_000425,2026-02-01T11:00:00Z,cancelled,99000,supplier_0125 +pr_003726,acct_0140,user_000426,2026-02-02T11:00:00Z,draft,100000,supplier_0126 +pr_003727,acct_0141,user_000427,2026-02-03T11:00:00Z,submitted,101000,supplier_0127 +pr_003728,acct_0142,user_000428,2026-02-04T11:00:00Z,approved,102000,supplier_0128 +pr_003729,acct_0143,user_000429,2026-02-05T11:00:00Z,rejected,103000,supplier_0129 +pr_003730,acct_0144,user_000430,2026-02-06T11:00:00Z,cancelled,104000,supplier_0130 +pr_003731,acct_0145,user_000431,2026-02-07T11:00:00Z,draft,105000,supplier_0131 +pr_003732,acct_0146,user_000432,2026-02-08T11:00:00Z,submitted,106000,supplier_0132 +pr_003733,acct_0147,user_000433,2026-02-09T11:00:00Z,approved,107000,supplier_0133 +pr_003734,acct_0148,user_000434,2026-02-10T11:00:00Z,rejected,108000,supplier_0134 +pr_003735,acct_0149,user_000435,2026-02-11T11:00:00Z,cancelled,109000,supplier_0135 +pr_003736,acct_0150,user_000436,2026-02-12T11:00:00Z,draft,110000,supplier_0136 +pr_003737,acct_0151,user_000437,2026-02-13T11:00:00Z,submitted,10000,supplier_0137 +pr_003738,acct_0152,user_000438,2026-02-14T11:00:00Z,approved,11000,supplier_0138 +pr_003739,acct_0153,user_000439,2026-02-15T11:00:00Z,rejected,12000,supplier_0139 +pr_003740,acct_0154,user_000440,2026-02-16T11:00:00Z,cancelled,13000,supplier_0140 +pr_003741,acct_0155,user_000441,2026-02-17T11:00:00Z,draft,14000,supplier_0141 +pr_003742,acct_0156,user_000442,2026-02-18T11:00:00Z,submitted,15000,supplier_0142 +pr_003743,acct_0157,user_000443,2026-02-19T11:00:00Z,approved,16000,supplier_0143 +pr_003744,acct_0158,user_000444,2026-02-20T11:00:00Z,rejected,17000,supplier_0144 +pr_003745,acct_0159,user_000445,2026-02-21T11:00:00Z,cancelled,18000,supplier_0145 +pr_003746,acct_0160,user_000446,2026-02-22T11:00:00Z,draft,19000,supplier_0146 +pr_003747,acct_0161,user_000447,2026-02-23T11:00:00Z,submitted,20000,supplier_0147 +pr_003748,acct_0162,user_000448,2026-02-24T11:00:00Z,approved,21000,supplier_0148 +pr_003749,acct_0163,user_000449,2026-02-25T11:00:00Z,rejected,22000,supplier_0149 +pr_003750,acct_0164,user_000450,2026-02-26T11:00:00Z,cancelled,23000,supplier_0150 +pr_003751,acct_0165,user_000451,2026-02-27T11:00:00Z,draft,24000,supplier_0151 +pr_003752,acct_0166,user_000452,2026-02-28T11:00:00Z,submitted,25000,supplier_0152 +pr_003753,acct_0167,user_000453,2026-02-01T11:00:00Z,approved,26000,supplier_0153 +pr_003754,acct_0168,user_000454,2026-02-02T11:00:00Z,rejected,27000,supplier_0154 +pr_003755,acct_0169,user_000455,2026-02-03T11:00:00Z,cancelled,28000,supplier_0155 +pr_003756,acct_0170,user_000456,2026-02-04T11:00:00Z,draft,29000,supplier_0156 +pr_003757,acct_0171,user_000457,2026-02-05T11:00:00Z,submitted,30000,supplier_0157 +pr_003758,acct_0172,user_000458,2026-02-06T11:00:00Z,approved,31000,supplier_0158 +pr_003759,acct_0173,user_000459,2026-02-07T11:00:00Z,rejected,32000,supplier_0159 +pr_003760,acct_0174,user_000460,2026-02-08T11:00:00Z,cancelled,33000,supplier_0160 +pr_003761,acct_0175,user_000461,2026-02-09T11:00:00Z,draft,34000,supplier_0161 +pr_003762,acct_0176,user_000462,2026-02-10T11:00:00Z,submitted,35000,supplier_0162 +pr_003763,acct_0177,user_000463,2026-02-11T11:00:00Z,approved,36000,supplier_0163 +pr_003764,acct_0178,user_000464,2026-02-12T11:00:00Z,rejected,37000,supplier_0164 +pr_003765,acct_0179,user_000465,2026-02-13T11:00:00Z,cancelled,38000,supplier_0165 +pr_003766,acct_0180,user_000466,2026-02-14T11:00:00Z,draft,39000,supplier_0166 +pr_003767,acct_0181,user_000467,2026-02-15T11:00:00Z,submitted,40000,supplier_0167 +pr_003768,acct_0182,user_000468,2026-02-16T11:00:00Z,approved,41000,supplier_0168 +pr_003769,acct_0183,user_000469,2026-02-17T11:00:00Z,rejected,42000,supplier_0169 +pr_003770,acct_0184,user_000470,2026-02-18T11:00:00Z,cancelled,43000,supplier_0170 +pr_003771,acct_0185,user_000471,2026-02-19T11:00:00Z,draft,44000,supplier_0171 +pr_003772,acct_0186,user_000472,2026-02-20T11:00:00Z,submitted,45000,supplier_0172 +pr_003773,acct_0187,user_000473,2026-02-21T11:00:00Z,approved,46000,supplier_0173 +pr_003774,acct_0188,user_000474,2026-02-22T11:00:00Z,rejected,47000,supplier_0174 +pr_003775,acct_0189,user_000475,2026-02-23T11:00:00Z,cancelled,48000,supplier_0175 +pr_003776,acct_0190,user_000476,2026-02-24T11:00:00Z,draft,49000,supplier_0176 +pr_003777,acct_0001,user_000477,2026-02-25T11:00:00Z,submitted,50000,supplier_0177 +pr_003778,acct_0002,user_000478,2026-02-26T11:00:00Z,approved,51000,supplier_0178 +pr_003779,acct_0003,user_000479,2026-02-27T11:00:00Z,rejected,52000,supplier_0179 +pr_003780,acct_0004,user_000480,2026-02-28T11:00:00Z,cancelled,53000,supplier_0180 +pr_003781,acct_0005,user_000481,2026-02-01T11:00:00Z,draft,54000,supplier_0181 +pr_003782,acct_0006,user_000482,2026-02-02T11:00:00Z,submitted,55000,supplier_0182 +pr_003783,acct_0007,user_000483,2026-02-03T11:00:00Z,approved,56000,supplier_0183 +pr_003784,acct_0008,user_000484,2026-02-04T11:00:00Z,rejected,57000,supplier_0184 +pr_003785,acct_0009,user_000485,2026-02-05T11:00:00Z,cancelled,58000,supplier_0185 +pr_003786,acct_0010,user_000486,2026-02-06T11:00:00Z,draft,59000,supplier_0186 +pr_003787,acct_0011,user_000487,2026-02-07T11:00:00Z,submitted,60000,supplier_0187 +pr_003788,acct_0012,user_000488,2026-02-08T11:00:00Z,approved,61000,supplier_0188 +pr_003789,acct_0013,user_000489,2026-02-09T11:00:00Z,rejected,62000,supplier_0189 +pr_003790,acct_0014,user_000490,2026-02-10T11:00:00Z,cancelled,63000,supplier_0190 +pr_003791,acct_0015,user_000491,2026-02-11T11:00:00Z,draft,64000,supplier_0191 +pr_003792,acct_0016,user_000492,2026-02-12T11:00:00Z,submitted,65000,supplier_0192 +pr_003793,acct_0017,user_000493,2026-02-13T11:00:00Z,approved,66000,supplier_0193 +pr_003794,acct_0018,user_000494,2026-02-14T11:00:00Z,rejected,67000,supplier_0194 +pr_003795,acct_0019,user_000495,2026-02-15T11:00:00Z,cancelled,68000,supplier_0195 +pr_003796,acct_0020,user_000496,2026-02-16T11:00:00Z,draft,69000,supplier_0196 +pr_003797,acct_0021,user_000497,2026-02-17T11:00:00Z,submitted,70000,supplier_0197 +pr_003798,acct_0022,user_000498,2026-02-18T11:00:00Z,approved,71000,supplier_0198 +pr_003799,acct_0023,user_000499,2026-02-19T11:00:00Z,rejected,72000,supplier_0199 +pr_003800,acct_0024,user_000500,2026-02-20T11:00:00Z,cancelled,73000,supplier_0200 +pr_003801,acct_0025,user_000501,2026-02-21T11:00:00Z,draft,74000,supplier_0201 +pr_003802,acct_0026,user_000502,2026-02-22T11:00:00Z,submitted,75000,supplier_0202 +pr_003803,acct_0027,user_000503,2026-02-23T11:00:00Z,approved,76000,supplier_0203 +pr_003804,acct_0028,user_000504,2026-02-24T11:00:00Z,rejected,77000,supplier_0204 +pr_003805,acct_0029,user_000505,2026-02-25T11:00:00Z,cancelled,78000,supplier_0205 +pr_003806,acct_0030,user_000506,2026-02-26T11:00:00Z,draft,79000,supplier_0206 +pr_003807,acct_0031,user_000507,2026-02-27T11:00:00Z,submitted,80000,supplier_0207 +pr_003808,acct_0032,user_000508,2026-02-28T11:00:00Z,approved,81000,supplier_0208 +pr_003809,acct_0033,user_000509,2026-02-01T11:00:00Z,rejected,82000,supplier_0209 +pr_003810,acct_0034,user_000510,2026-02-02T11:00:00Z,cancelled,83000,supplier_0210 +pr_003811,acct_0035,user_000511,2026-02-03T11:00:00Z,draft,84000,supplier_0211 +pr_003812,acct_0036,user_000512,2026-02-04T11:00:00Z,submitted,85000,supplier_0212 +pr_003813,acct_0037,user_000513,2026-02-05T11:00:00Z,approved,86000,supplier_0213 +pr_003814,acct_0038,user_000514,2026-02-06T11:00:00Z,rejected,87000,supplier_0214 +pr_003815,acct_0039,user_000515,2026-02-07T11:00:00Z,cancelled,88000,supplier_0215 +pr_003816,acct_0040,user_000516,2026-02-08T11:00:00Z,draft,89000,supplier_0216 +pr_003817,acct_0041,user_000517,2026-02-09T11:00:00Z,submitted,90000,supplier_0217 +pr_003818,acct_0042,user_000518,2026-02-10T11:00:00Z,approved,91000,supplier_0218 +pr_003819,acct_0043,user_000519,2026-02-11T11:00:00Z,rejected,92000,supplier_0219 +pr_003820,acct_0044,user_000520,2026-02-12T11:00:00Z,cancelled,93000,supplier_0220 +pr_003821,acct_0045,user_000521,2026-02-13T11:00:00Z,draft,94000,supplier_0221 +pr_003822,acct_0046,user_000522,2026-02-14T11:00:00Z,submitted,95000,supplier_0222 +pr_003823,acct_0047,user_000523,2026-02-15T11:00:00Z,approved,96000,supplier_0223 +pr_003824,acct_0048,user_000524,2026-02-16T11:00:00Z,rejected,97000,supplier_0224 +pr_003825,acct_0049,user_000525,2026-02-17T11:00:00Z,cancelled,98000,supplier_0225 +pr_003826,acct_0050,user_000526,2026-02-18T11:00:00Z,draft,99000,supplier_0226 +pr_003827,acct_0051,user_000527,2026-02-19T11:00:00Z,submitted,100000,supplier_0227 +pr_003828,acct_0052,user_000528,2026-02-20T11:00:00Z,approved,101000,supplier_0228 +pr_003829,acct_0053,user_000529,2026-02-21T11:00:00Z,rejected,102000,supplier_0229 +pr_003830,acct_0054,user_000530,2026-02-22T11:00:00Z,cancelled,103000,supplier_0230 +pr_003831,acct_0055,user_000531,2026-02-23T11:00:00Z,draft,104000,supplier_0231 +pr_003832,acct_0056,user_000532,2026-02-24T11:00:00Z,submitted,105000,supplier_0232 +pr_003833,acct_0057,user_000533,2026-02-25T11:00:00Z,approved,106000,supplier_0233 +pr_003834,acct_0058,user_000534,2026-02-26T11:00:00Z,rejected,107000,supplier_0234 +pr_003835,acct_0059,user_000535,2026-02-27T11:00:00Z,cancelled,108000,supplier_0235 +pr_003836,acct_0060,user_000536,2026-02-28T11:00:00Z,draft,109000,supplier_0236 +pr_003837,acct_0061,user_000537,2026-02-01T11:00:00Z,submitted,110000,supplier_0237 +pr_003838,acct_0062,user_000538,2026-02-02T11:00:00Z,approved,10000,supplier_0238 +pr_003839,acct_0063,user_000539,2026-02-03T11:00:00Z,rejected,11000,supplier_0239 +pr_003840,acct_0064,user_000540,2026-02-04T11:00:00Z,cancelled,12000,supplier_0240 +pr_003841,acct_0065,user_000541,2026-02-05T11:00:00Z,draft,13000,supplier_0241 +pr_003842,acct_0066,user_000542,2026-02-06T11:00:00Z,submitted,14000,supplier_0242 +pr_003843,acct_0067,user_000543,2026-02-07T11:00:00Z,approved,15000,supplier_0243 +pr_003844,acct_0068,user_000544,2026-02-08T11:00:00Z,rejected,16000,supplier_0244 +pr_003845,acct_0069,user_000545,2026-02-09T11:00:00Z,cancelled,17000,supplier_0245 +pr_003846,acct_0070,user_000546,2026-02-10T11:00:00Z,draft,18000,supplier_0246 +pr_003847,acct_0071,user_000547,2026-02-11T11:00:00Z,submitted,19000,supplier_0247 +pr_003848,acct_0072,user_000548,2026-02-12T11:00:00Z,approved,20000,supplier_0248 +pr_003849,acct_0073,user_000549,2026-02-13T11:00:00Z,rejected,21000,supplier_0249 +pr_003850,acct_0074,user_000550,2026-02-14T11:00:00Z,cancelled,22000,supplier_0250 +pr_003851,acct_0075,user_000551,2026-02-15T11:00:00Z,draft,23000,supplier_0251 +pr_003852,acct_0076,user_000552,2026-02-16T11:00:00Z,submitted,24000,supplier_0252 +pr_003853,acct_0077,user_000553,2026-02-17T11:00:00Z,approved,25000,supplier_0253 +pr_003854,acct_0078,user_000554,2026-02-18T11:00:00Z,rejected,26000,supplier_0254 +pr_003855,acct_0079,user_000555,2026-02-19T11:00:00Z,cancelled,27000,supplier_0255 +pr_003856,acct_0080,user_000556,2026-02-20T11:00:00Z,draft,28000,supplier_0256 +pr_003857,acct_0081,user_000557,2026-02-21T11:00:00Z,submitted,29000,supplier_0257 +pr_003858,acct_0082,user_000558,2026-02-22T11:00:00Z,approved,30000,supplier_0258 +pr_003859,acct_0083,user_000559,2026-02-23T11:00:00Z,rejected,31000,supplier_0259 +pr_003860,acct_0084,user_000560,2026-02-24T11:00:00Z,cancelled,32000,supplier_0260 +pr_003861,acct_0085,user_000561,2026-02-25T11:00:00Z,draft,33000,supplier_0261 +pr_003862,acct_0086,user_000562,2026-02-26T11:00:00Z,submitted,34000,supplier_0262 +pr_003863,acct_0087,user_000563,2026-02-27T11:00:00Z,approved,35000,supplier_0263 +pr_003864,acct_0088,user_000564,2026-02-28T11:00:00Z,rejected,36000,supplier_0264 +pr_003865,acct_0089,user_000565,2026-02-01T11:00:00Z,cancelled,37000,supplier_0265 +pr_003866,acct_0090,user_000566,2026-02-02T11:00:00Z,draft,38000,supplier_0266 +pr_003867,acct_0091,user_000567,2026-02-03T11:00:00Z,submitted,39000,supplier_0267 +pr_003868,acct_0092,user_000568,2026-02-04T11:00:00Z,approved,40000,supplier_0268 +pr_003869,acct_0093,user_000569,2026-02-05T11:00:00Z,rejected,41000,supplier_0269 +pr_003870,acct_0094,user_000570,2026-02-06T11:00:00Z,cancelled,42000,supplier_0270 +pr_003871,acct_0095,user_000571,2026-02-07T11:00:00Z,draft,43000,supplier_0271 +pr_003872,acct_0096,user_000572,2026-02-08T11:00:00Z,submitted,44000,supplier_0272 +pr_003873,acct_0097,user_000573,2026-02-09T11:00:00Z,approved,45000,supplier_0273 +pr_003874,acct_0098,user_000574,2026-02-10T11:00:00Z,rejected,46000,supplier_0274 +pr_003875,acct_0099,user_000575,2026-02-11T11:00:00Z,cancelled,47000,supplier_0275 +pr_003876,acct_0100,user_000576,2026-02-12T11:00:00Z,draft,48000,supplier_0276 +pr_003877,acct_0101,user_000577,2026-02-13T11:00:00Z,submitted,49000,supplier_0277 +pr_003878,acct_0102,user_000578,2026-02-14T11:00:00Z,approved,50000,supplier_0278 +pr_003879,acct_0103,user_000579,2026-02-15T11:00:00Z,rejected,51000,supplier_0279 +pr_003880,acct_0104,user_000580,2026-02-16T11:00:00Z,cancelled,52000,supplier_0280 +pr_003881,acct_0105,user_000581,2026-02-17T11:00:00Z,draft,53000,supplier_0281 +pr_003882,acct_0106,user_000582,2026-02-18T11:00:00Z,submitted,54000,supplier_0282 +pr_003883,acct_0107,user_000583,2026-02-19T11:00:00Z,approved,55000,supplier_0283 +pr_003884,acct_0108,user_000584,2026-02-20T11:00:00Z,rejected,56000,supplier_0284 +pr_003885,acct_0109,user_000585,2026-02-21T11:00:00Z,cancelled,57000,supplier_0285 +pr_003886,acct_0110,user_000586,2026-02-22T11:00:00Z,draft,58000,supplier_0286 +pr_003887,acct_0111,user_000587,2026-02-23T11:00:00Z,submitted,59000,supplier_0287 +pr_003888,acct_0112,user_000588,2026-02-24T11:00:00Z,approved,60000,supplier_0288 +pr_003889,acct_0113,user_000589,2026-02-25T11:00:00Z,rejected,61000,supplier_0289 +pr_003890,acct_0114,user_000590,2026-02-26T11:00:00Z,cancelled,62000,supplier_0290 +pr_003891,acct_0115,user_000591,2026-02-27T11:00:00Z,draft,63000,supplier_0291 +pr_003892,acct_0116,user_000592,2026-02-28T11:00:00Z,submitted,64000,supplier_0292 +pr_003893,acct_0117,user_000593,2026-02-01T11:00:00Z,approved,65000,supplier_0293 +pr_003894,acct_0118,user_000594,2026-02-02T11:00:00Z,rejected,66000,supplier_0294 +pr_003895,acct_0119,user_000595,2026-02-03T11:00:00Z,cancelled,67000,supplier_0295 +pr_003896,acct_0120,user_000596,2026-02-04T11:00:00Z,draft,68000,supplier_0296 +pr_003897,acct_0121,user_000597,2026-02-05T11:00:00Z,submitted,69000,supplier_0297 +pr_003898,acct_0122,user_000598,2026-02-06T11:00:00Z,approved,70000,supplier_0298 +pr_003899,acct_0123,user_000599,2026-02-07T11:00:00Z,rejected,71000,supplier_0299 +pr_003900,acct_0124,user_000600,2026-02-08T11:00:00Z,cancelled,72000,supplier_0300 +pr_003901,acct_0125,user_000601,2026-02-09T11:00:00Z,draft,73000,supplier_0301 +pr_003902,acct_0126,user_000602,2026-02-10T11:00:00Z,submitted,74000,supplier_0302 +pr_003903,acct_0127,user_000603,2026-02-11T11:00:00Z,approved,75000,supplier_0303 +pr_003904,acct_0128,user_000604,2026-02-12T11:00:00Z,rejected,76000,supplier_0304 +pr_003905,acct_0129,user_000605,2026-02-13T11:00:00Z,cancelled,77000,supplier_0305 +pr_003906,acct_0130,user_000606,2026-02-14T11:00:00Z,draft,78000,supplier_0306 +pr_003907,acct_0131,user_000607,2026-02-15T11:00:00Z,submitted,79000,supplier_0307 +pr_003908,acct_0132,user_000608,2026-02-16T11:00:00Z,approved,80000,supplier_0308 +pr_003909,acct_0133,user_000609,2026-02-17T11:00:00Z,rejected,81000,supplier_0309 +pr_003910,acct_0134,user_000610,2026-02-18T11:00:00Z,cancelled,82000,supplier_0310 +pr_003911,acct_0135,user_000611,2026-02-19T11:00:00Z,draft,83000,supplier_0311 +pr_003912,acct_0136,user_000612,2026-02-20T11:00:00Z,submitted,84000,supplier_0312 +pr_003913,acct_0137,user_000613,2026-02-21T11:00:00Z,approved,85000,supplier_0313 +pr_003914,acct_0138,user_000614,2026-02-22T11:00:00Z,rejected,86000,supplier_0314 +pr_003915,acct_0139,user_000615,2026-02-23T11:00:00Z,cancelled,87000,supplier_0315 +pr_003916,acct_0140,user_000616,2026-02-24T11:00:00Z,draft,88000,supplier_0316 +pr_003917,acct_0141,user_000617,2026-02-25T11:00:00Z,submitted,89000,supplier_0317 +pr_003918,acct_0142,user_000618,2026-02-26T11:00:00Z,approved,90000,supplier_0318 +pr_003919,acct_0143,user_000619,2026-02-27T11:00:00Z,rejected,91000,supplier_0319 +pr_003920,acct_0144,user_000620,2026-02-28T11:00:00Z,cancelled,92000,supplier_0320 +pr_003921,acct_0145,user_000621,2026-02-01T11:00:00Z,draft,93000,supplier_0321 +pr_003922,acct_0146,user_000622,2026-02-02T11:00:00Z,submitted,94000,supplier_0322 +pr_003923,acct_0147,user_000623,2026-02-03T11:00:00Z,approved,95000,supplier_0323 +pr_003924,acct_0148,user_000624,2026-02-04T11:00:00Z,rejected,96000,supplier_0324 +pr_003925,acct_0149,user_000625,2026-02-05T11:00:00Z,cancelled,97000,supplier_0325 +pr_003926,acct_0150,user_000626,2026-02-06T11:00:00Z,draft,98000,supplier_0326 +pr_003927,acct_0151,user_000627,2026-02-07T11:00:00Z,submitted,99000,supplier_0327 +pr_003928,acct_0152,user_000628,2026-02-08T11:00:00Z,approved,100000,supplier_0328 +pr_003929,acct_0153,user_000629,2026-02-09T11:00:00Z,rejected,101000,supplier_0329 +pr_003930,acct_0154,user_000630,2026-02-10T11:00:00Z,cancelled,102000,supplier_0330 +pr_003931,acct_0155,user_000631,2026-02-11T11:00:00Z,draft,103000,supplier_0331 +pr_003932,acct_0156,user_000632,2026-02-12T11:00:00Z,submitted,104000,supplier_0332 +pr_003933,acct_0157,user_000633,2026-02-13T11:00:00Z,approved,105000,supplier_0333 +pr_003934,acct_0158,user_000634,2026-02-14T11:00:00Z,rejected,106000,supplier_0334 +pr_003935,acct_0159,user_000635,2026-02-15T11:00:00Z,cancelled,107000,supplier_0335 +pr_003936,acct_0160,user_000636,2026-02-16T11:00:00Z,draft,108000,supplier_0336 +pr_003937,acct_0161,user_000637,2026-02-17T11:00:00Z,submitted,109000,supplier_0337 +pr_003938,acct_0162,user_000638,2026-02-18T11:00:00Z,approved,110000,supplier_0338 +pr_003939,acct_0163,user_000639,2026-02-19T11:00:00Z,rejected,10000,supplier_0339 +pr_003940,acct_0164,user_000640,2026-02-20T11:00:00Z,cancelled,11000,supplier_0340 +pr_003941,acct_0165,user_000641,2026-02-21T11:00:00Z,draft,12000,supplier_0341 +pr_003942,acct_0166,user_000642,2026-02-22T11:00:00Z,submitted,13000,supplier_0342 +pr_003943,acct_0167,user_000643,2026-02-23T11:00:00Z,approved,14000,supplier_0343 +pr_003944,acct_0168,user_000644,2026-02-24T11:00:00Z,rejected,15000,supplier_0344 +pr_003945,acct_0169,user_000645,2026-02-25T11:00:00Z,cancelled,16000,supplier_0345 +pr_003946,acct_0170,user_000646,2026-02-26T11:00:00Z,draft,17000,supplier_0346 +pr_003947,acct_0171,user_000647,2026-02-27T11:00:00Z,submitted,18000,supplier_0347 +pr_003948,acct_0172,user_000648,2026-02-28T11:00:00Z,approved,19000,supplier_0348 +pr_003949,acct_0173,user_000649,2026-02-01T11:00:00Z,rejected,20000,supplier_0349 +pr_003950,acct_0174,user_000650,2026-02-02T11:00:00Z,cancelled,21000,supplier_0350 +pr_003951,acct_0175,user_000651,2026-02-03T11:00:00Z,draft,22000,supplier_0351 +pr_003952,acct_0176,user_000652,2026-02-04T11:00:00Z,submitted,23000,supplier_0352 +pr_003953,acct_0177,user_000653,2026-02-05T11:00:00Z,approved,24000,supplier_0353 +pr_003954,acct_0178,user_000654,2026-02-06T11:00:00Z,rejected,25000,supplier_0354 +pr_003955,acct_0179,user_000655,2026-02-07T11:00:00Z,cancelled,26000,supplier_0355 +pr_003956,acct_0180,user_000656,2026-02-08T11:00:00Z,draft,27000,supplier_0356 +pr_003957,acct_0181,user_000657,2026-02-09T11:00:00Z,submitted,28000,supplier_0357 +pr_003958,acct_0182,user_000658,2026-02-10T11:00:00Z,approved,29000,supplier_0358 +pr_003959,acct_0183,user_000659,2026-02-11T11:00:00Z,rejected,30000,supplier_0359 +pr_003960,acct_0184,user_000660,2026-02-12T11:00:00Z,cancelled,31000,supplier_0360 +pr_003961,acct_0185,user_000661,2026-02-13T11:00:00Z,draft,32000,supplier_0361 +pr_003962,acct_0186,user_000662,2026-02-14T11:00:00Z,submitted,33000,supplier_0362 +pr_003963,acct_0187,user_000663,2026-02-15T11:00:00Z,approved,34000,supplier_0363 +pr_003964,acct_0188,user_000664,2026-02-16T11:00:00Z,rejected,35000,supplier_0364 +pr_003965,acct_0189,user_000665,2026-02-17T11:00:00Z,cancelled,36000,supplier_0365 +pr_003966,acct_0190,user_000666,2026-02-18T11:00:00Z,draft,37000,supplier_0366 +pr_003967,acct_0001,user_000667,2026-02-19T11:00:00Z,submitted,38000,supplier_0367 +pr_003968,acct_0002,user_000668,2026-02-20T11:00:00Z,approved,39000,supplier_0368 +pr_003969,acct_0003,user_000669,2026-02-21T11:00:00Z,rejected,40000,supplier_0369 +pr_003970,acct_0004,user_000670,2026-02-22T11:00:00Z,cancelled,41000,supplier_0370 +pr_003971,acct_0005,user_000671,2026-02-23T11:00:00Z,draft,42000,supplier_0371 +pr_003972,acct_0006,user_000672,2026-02-24T11:00:00Z,submitted,43000,supplier_0372 +pr_003973,acct_0007,user_000673,2026-02-25T11:00:00Z,approved,44000,supplier_0373 +pr_003974,acct_0008,user_000674,2026-02-26T11:00:00Z,rejected,45000,supplier_0374 +pr_003975,acct_0009,user_000675,2026-02-27T11:00:00Z,cancelled,46000,supplier_0375 +pr_003976,acct_0010,user_000676,2026-02-28T11:00:00Z,draft,47000,supplier_0376 +pr_003977,acct_0011,user_000677,2026-02-01T11:00:00Z,submitted,48000,supplier_0377 +pr_003978,acct_0012,user_000678,2026-02-02T11:00:00Z,approved,49000,supplier_0378 +pr_003979,acct_0013,user_000679,2026-02-03T11:00:00Z,rejected,50000,supplier_0379 +pr_003980,acct_0014,user_000680,2026-02-04T11:00:00Z,cancelled,51000,supplier_0380 +pr_003981,acct_0015,user_000681,2026-02-05T11:00:00Z,draft,52000,supplier_0381 +pr_003982,acct_0016,user_000682,2026-02-06T11:00:00Z,submitted,53000,supplier_0382 +pr_003983,acct_0017,user_000683,2026-02-07T11:00:00Z,approved,54000,supplier_0383 +pr_003984,acct_0018,user_000684,2026-02-08T11:00:00Z,rejected,55000,supplier_0384 +pr_003985,acct_0019,user_000685,2026-02-09T11:00:00Z,cancelled,56000,supplier_0385 +pr_003986,acct_0020,user_000686,2026-02-10T11:00:00Z,draft,57000,supplier_0386 +pr_003987,acct_0021,user_000687,2026-02-11T11:00:00Z,submitted,58000,supplier_0387 +pr_003988,acct_0022,user_000688,2026-02-12T11:00:00Z,approved,59000,supplier_0388 +pr_003989,acct_0023,user_000689,2026-02-13T11:00:00Z,rejected,60000,supplier_0389 +pr_003990,acct_0024,user_000690,2026-02-14T11:00:00Z,cancelled,61000,supplier_0390 +pr_003991,acct_0025,user_000691,2026-02-15T11:00:00Z,draft,62000,supplier_0391 +pr_003992,acct_0026,user_000692,2026-02-16T11:00:00Z,submitted,63000,supplier_0392 +pr_003993,acct_0027,user_000693,2026-02-17T11:00:00Z,approved,64000,supplier_0393 +pr_003994,acct_0028,user_000694,2026-02-18T11:00:00Z,rejected,65000,supplier_0394 +pr_003995,acct_0029,user_000695,2026-02-19T11:00:00Z,cancelled,66000,supplier_0395 +pr_003996,acct_0030,user_000696,2026-02-20T11:00:00Z,draft,67000,supplier_0396 +pr_003997,acct_0031,user_000697,2026-02-21T11:00:00Z,submitted,68000,supplier_0397 +pr_003998,acct_0032,user_000698,2026-02-22T11:00:00Z,approved,69000,supplier_0398 +pr_003999,acct_0033,user_000699,2026-02-23T11:00:00Z,rejected,70000,supplier_0399 +pr_004000,acct_0034,user_000700,2026-02-24T11:00:00Z,cancelled,71000,supplier_0400 +pr_004001,acct_0035,user_000701,2026-02-25T11:00:00Z,draft,72000,supplier_0401 +pr_004002,acct_0036,user_000702,2026-02-26T11:00:00Z,submitted,73000,supplier_0402 +pr_004003,acct_0037,user_000703,2026-02-27T11:00:00Z,approved,74000,supplier_0403 +pr_004004,acct_0038,user_000704,2026-02-28T11:00:00Z,rejected,75000,supplier_0404 +pr_004005,acct_0039,user_000705,2026-02-01T11:00:00Z,cancelled,76000,supplier_0405 +pr_004006,acct_0040,user_000706,2026-02-02T11:00:00Z,draft,77000,supplier_0406 +pr_004007,acct_0041,user_000707,2026-02-03T11:00:00Z,submitted,78000,supplier_0407 +pr_004008,acct_0042,user_000708,2026-02-04T11:00:00Z,approved,79000,supplier_0408 +pr_004009,acct_0043,user_000709,2026-02-05T11:00:00Z,rejected,80000,supplier_0409 +pr_004010,acct_0044,user_000710,2026-02-06T11:00:00Z,cancelled,81000,supplier_0410 +pr_004011,acct_0045,user_000711,2026-02-07T11:00:00Z,draft,82000,supplier_0411 +pr_004012,acct_0046,user_000712,2026-02-08T11:00:00Z,submitted,83000,supplier_0412 +pr_004013,acct_0047,user_000713,2026-02-09T11:00:00Z,approved,84000,supplier_0413 +pr_004014,acct_0048,user_000714,2026-02-10T11:00:00Z,rejected,85000,supplier_0414 +pr_004015,acct_0049,user_000715,2026-02-11T11:00:00Z,cancelled,86000,supplier_0415 +pr_004016,acct_0050,user_000716,2026-02-12T11:00:00Z,draft,87000,supplier_0416 +pr_004017,acct_0051,user_000717,2026-02-13T11:00:00Z,submitted,88000,supplier_0417 +pr_004018,acct_0052,user_000718,2026-02-14T11:00:00Z,approved,89000,supplier_0418 +pr_004019,acct_0053,user_000719,2026-02-15T11:00:00Z,rejected,90000,supplier_0419 +pr_004020,acct_0054,user_000720,2026-02-16T11:00:00Z,cancelled,91000,supplier_0420 +pr_004021,acct_0055,user_000721,2026-02-17T11:00:00Z,draft,92000,supplier_0421 +pr_004022,acct_0056,user_000722,2026-02-18T11:00:00Z,submitted,93000,supplier_0422 +pr_004023,acct_0057,user_000723,2026-02-19T11:00:00Z,approved,94000,supplier_0423 +pr_004024,acct_0058,user_000724,2026-02-20T11:00:00Z,rejected,95000,supplier_0424 +pr_004025,acct_0059,user_000725,2026-02-21T11:00:00Z,cancelled,96000,supplier_0425 +pr_004026,acct_0060,user_000726,2026-02-22T11:00:00Z,draft,97000,supplier_0426 +pr_004027,acct_0061,user_000727,2026-02-23T11:00:00Z,submitted,98000,supplier_0427 +pr_004028,acct_0062,user_000728,2026-02-24T11:00:00Z,approved,99000,supplier_0428 +pr_004029,acct_0063,user_000729,2026-02-25T11:00:00Z,rejected,100000,supplier_0429 +pr_004030,acct_0064,user_000730,2026-02-26T11:00:00Z,cancelled,101000,supplier_0430 +pr_004031,acct_0065,user_000731,2026-02-27T11:00:00Z,draft,102000,supplier_0431 +pr_004032,acct_0066,user_000732,2026-02-28T11:00:00Z,submitted,103000,supplier_0432 +pr_004033,acct_0067,user_000733,2026-02-01T11:00:00Z,approved,104000,supplier_0433 +pr_004034,acct_0068,user_000734,2026-02-02T11:00:00Z,rejected,105000,supplier_0434 +pr_004035,acct_0069,user_000735,2026-02-03T11:00:00Z,cancelled,106000,supplier_0435 +pr_004036,acct_0070,user_000736,2026-02-04T11:00:00Z,draft,107000,supplier_0436 +pr_004037,acct_0071,user_000737,2026-02-05T11:00:00Z,submitted,108000,supplier_0437 +pr_004038,acct_0072,user_000738,2026-02-06T11:00:00Z,approved,109000,supplier_0438 +pr_004039,acct_0073,user_000739,2026-02-07T11:00:00Z,rejected,110000,supplier_0439 +pr_004040,acct_0074,user_000740,2026-02-08T11:00:00Z,cancelled,10000,supplier_0440 +pr_004041,acct_0075,user_000741,2026-02-09T11:00:00Z,draft,11000,supplier_0441 +pr_004042,acct_0076,user_000742,2026-02-10T11:00:00Z,submitted,12000,supplier_0442 +pr_004043,acct_0077,user_000743,2026-02-11T11:00:00Z,approved,13000,supplier_0443 +pr_004044,acct_0078,user_000744,2026-02-12T11:00:00Z,rejected,14000,supplier_0444 +pr_004045,acct_0079,user_000745,2026-02-13T11:00:00Z,cancelled,15000,supplier_0445 +pr_004046,acct_0080,user_000746,2026-02-14T11:00:00Z,draft,16000,supplier_0446 +pr_004047,acct_0081,user_000747,2026-02-15T11:00:00Z,submitted,17000,supplier_0447 +pr_004048,acct_0082,user_000748,2026-02-16T11:00:00Z,approved,18000,supplier_0448 +pr_004049,acct_0083,user_000749,2026-02-17T11:00:00Z,rejected,19000,supplier_0449 +pr_004050,acct_0084,user_000750,2026-02-18T11:00:00Z,cancelled,20000,supplier_0450 +pr_004051,acct_0085,user_000751,2026-02-19T11:00:00Z,draft,21000,supplier_0451 +pr_004052,acct_0086,user_000752,2026-02-20T11:00:00Z,submitted,22000,supplier_0452 +pr_004053,acct_0087,user_000753,2026-02-21T11:00:00Z,approved,23000,supplier_0453 +pr_004054,acct_0088,user_000754,2026-02-22T11:00:00Z,rejected,24000,supplier_0454 +pr_004055,acct_0089,user_000755,2026-02-23T11:00:00Z,cancelled,25000,supplier_0455 +pr_004056,acct_0090,user_000756,2026-02-24T11:00:00Z,draft,26000,supplier_0456 +pr_004057,acct_0091,user_000757,2026-02-25T11:00:00Z,submitted,27000,supplier_0457 +pr_004058,acct_0092,user_000758,2026-02-26T11:00:00Z,approved,28000,supplier_0458 +pr_004059,acct_0093,user_000759,2026-02-27T11:00:00Z,rejected,29000,supplier_0459 +pr_004060,acct_0094,user_000760,2026-02-28T11:00:00Z,cancelled,30000,supplier_0460 +pr_004061,acct_0095,user_000761,2026-02-01T11:00:00Z,draft,31000,supplier_0461 +pr_004062,acct_0096,user_000762,2026-02-02T11:00:00Z,submitted,32000,supplier_0462 +pr_004063,acct_0097,user_000763,2026-02-03T11:00:00Z,approved,33000,supplier_0463 +pr_004064,acct_0098,user_000764,2026-02-04T11:00:00Z,rejected,34000,supplier_0464 +pr_004065,acct_0099,user_000765,2026-02-05T11:00:00Z,cancelled,35000,supplier_0465 +pr_004066,acct_0100,user_000766,2026-02-06T11:00:00Z,draft,36000,supplier_0466 +pr_004067,acct_0101,user_000767,2026-02-07T11:00:00Z,submitted,37000,supplier_0467 +pr_004068,acct_0102,user_000768,2026-02-08T11:00:00Z,approved,38000,supplier_0468 +pr_004069,acct_0103,user_000769,2026-02-09T11:00:00Z,rejected,39000,supplier_0469 +pr_004070,acct_0104,user_000770,2026-02-10T11:00:00Z,cancelled,40000,supplier_0470 +pr_004071,acct_0105,user_000771,2026-02-11T11:00:00Z,draft,41000,supplier_0471 +pr_004072,acct_0106,user_000772,2026-02-12T11:00:00Z,submitted,42000,supplier_0472 +pr_004073,acct_0107,user_000773,2026-02-13T11:00:00Z,approved,43000,supplier_0473 +pr_004074,acct_0108,user_000774,2026-02-14T11:00:00Z,rejected,44000,supplier_0474 +pr_004075,acct_0109,user_000775,2026-02-15T11:00:00Z,cancelled,45000,supplier_0475 +pr_004076,acct_0110,user_000776,2026-02-16T11:00:00Z,draft,46000,supplier_0476 +pr_004077,acct_0111,user_000777,2026-02-17T11:00:00Z,submitted,47000,supplier_0477 +pr_004078,acct_0112,user_000778,2026-02-18T11:00:00Z,approved,48000,supplier_0478 +pr_004079,acct_0113,user_000779,2026-02-19T11:00:00Z,rejected,49000,supplier_0479 +pr_004080,acct_0114,user_000780,2026-02-20T11:00:00Z,cancelled,50000,supplier_0480 +pr_004081,acct_0115,user_000781,2026-02-21T11:00:00Z,draft,51000,supplier_0481 +pr_004082,acct_0116,user_000782,2026-02-22T11:00:00Z,submitted,52000,supplier_0482 +pr_004083,acct_0117,user_000783,2026-02-23T11:00:00Z,approved,53000,supplier_0483 +pr_004084,acct_0118,user_000784,2026-02-24T11:00:00Z,rejected,54000,supplier_0484 +pr_004085,acct_0119,user_000785,2026-02-25T11:00:00Z,cancelled,55000,supplier_0485 +pr_004086,acct_0120,user_000786,2026-02-26T11:00:00Z,draft,56000,supplier_0486 +pr_004087,acct_0121,user_000787,2026-02-27T11:00:00Z,submitted,57000,supplier_0487 +pr_004088,acct_0122,user_000788,2026-02-28T11:00:00Z,approved,58000,supplier_0488 +pr_004089,acct_0123,user_000789,2026-02-01T11:00:00Z,rejected,59000,supplier_0489 +pr_004090,acct_0124,user_000790,2026-02-02T11:00:00Z,cancelled,60000,supplier_0490 +pr_004091,acct_0125,user_000791,2026-02-03T11:00:00Z,draft,61000,supplier_0491 +pr_004092,acct_0126,user_000792,2026-02-04T11:00:00Z,submitted,62000,supplier_0492 +pr_004093,acct_0127,user_000793,2026-02-05T11:00:00Z,approved,63000,supplier_0493 +pr_004094,acct_0128,user_000794,2026-02-06T11:00:00Z,rejected,64000,supplier_0494 +pr_004095,acct_0129,user_000795,2026-02-07T11:00:00Z,cancelled,65000,supplier_0495 +pr_004096,acct_0130,user_000796,2026-02-08T11:00:00Z,draft,66000,supplier_0496 +pr_004097,acct_0131,user_000797,2026-02-09T11:00:00Z,submitted,67000,supplier_0497 +pr_004098,acct_0132,user_000798,2026-02-10T11:00:00Z,approved,68000,supplier_0498 +pr_004099,acct_0133,user_000799,2026-02-11T11:00:00Z,rejected,69000,supplier_0499 +pr_004100,acct_0134,user_000800,2026-02-12T11:00:00Z,cancelled,70000,supplier_0500 +pr_004101,acct_0135,user_000801,2026-02-13T11:00:00Z,draft,71000,supplier_0501 +pr_004102,acct_0136,user_000802,2026-02-14T11:00:00Z,submitted,72000,supplier_0502 +pr_004103,acct_0137,user_000803,2026-02-15T11:00:00Z,approved,73000,supplier_0503 +pr_004104,acct_0138,user_000804,2026-02-16T11:00:00Z,rejected,74000,supplier_0504 +pr_004105,acct_0139,user_000805,2026-02-17T11:00:00Z,cancelled,75000,supplier_0505 +pr_004106,acct_0140,user_000806,2026-02-18T11:00:00Z,draft,76000,supplier_0506 +pr_004107,acct_0141,user_000807,2026-02-19T11:00:00Z,submitted,77000,supplier_0507 +pr_004108,acct_0142,user_000808,2026-02-20T11:00:00Z,approved,78000,supplier_0508 +pr_004109,acct_0143,user_000809,2026-02-21T11:00:00Z,rejected,79000,supplier_0509 +pr_004110,acct_0144,user_000810,2026-02-22T11:00:00Z,cancelled,80000,supplier_0510 +pr_004111,acct_0145,user_000811,2026-02-23T11:00:00Z,draft,81000,supplier_0511 +pr_004112,acct_0146,user_000812,2026-02-24T11:00:00Z,submitted,82000,supplier_0512 +pr_004113,acct_0147,user_000813,2026-02-25T11:00:00Z,approved,83000,supplier_0513 +pr_004114,acct_0148,user_000814,2026-02-26T11:00:00Z,rejected,84000,supplier_0514 +pr_004115,acct_0149,user_000815,2026-02-27T11:00:00Z,cancelled,85000,supplier_0515 +pr_004116,acct_0150,user_000816,2026-02-28T11:00:00Z,draft,86000,supplier_0516 +pr_004117,acct_0151,user_000817,2026-02-01T11:00:00Z,submitted,87000,supplier_0517 +pr_004118,acct_0152,user_000818,2026-02-02T11:00:00Z,approved,88000,supplier_0518 +pr_004119,acct_0153,user_000819,2026-02-03T11:00:00Z,rejected,89000,supplier_0519 +pr_004120,acct_0154,user_000820,2026-02-04T11:00:00Z,cancelled,90000,supplier_0520 +pr_004121,acct_0155,user_000821,2026-02-05T11:00:00Z,draft,91000,supplier_0521 +pr_004122,acct_0156,user_000822,2026-02-06T11:00:00Z,submitted,92000,supplier_0522 +pr_004123,acct_0157,user_000823,2026-02-07T11:00:00Z,approved,93000,supplier_0523 +pr_004124,acct_0158,user_000824,2026-02-08T11:00:00Z,rejected,94000,supplier_0524 +pr_004125,acct_0159,user_000825,2026-02-09T11:00:00Z,cancelled,95000,supplier_0525 +pr_004126,acct_0160,user_000826,2026-02-10T11:00:00Z,draft,96000,supplier_0526 +pr_004127,acct_0161,user_000827,2026-02-11T11:00:00Z,submitted,97000,supplier_0527 +pr_004128,acct_0162,user_000828,2026-02-12T11:00:00Z,approved,98000,supplier_0528 +pr_004129,acct_0163,user_000829,2026-02-13T11:00:00Z,rejected,99000,supplier_0529 +pr_004130,acct_0164,user_000830,2026-02-14T11:00:00Z,cancelled,100000,supplier_0530 +pr_004131,acct_0165,user_000831,2026-02-15T11:00:00Z,draft,101000,supplier_0531 +pr_004132,acct_0166,user_000832,2026-02-16T11:00:00Z,submitted,102000,supplier_0532 +pr_004133,acct_0167,user_000833,2026-02-17T11:00:00Z,approved,103000,supplier_0533 +pr_004134,acct_0168,user_000834,2026-02-18T11:00:00Z,rejected,104000,supplier_0534 +pr_004135,acct_0169,user_000835,2026-02-19T11:00:00Z,cancelled,105000,supplier_0535 +pr_004136,acct_0170,user_000836,2026-02-20T11:00:00Z,draft,106000,supplier_0536 +pr_004137,acct_0171,user_000837,2026-02-21T11:00:00Z,submitted,107000,supplier_0537 +pr_004138,acct_0172,user_000838,2026-02-22T11:00:00Z,approved,108000,supplier_0538 +pr_004139,acct_0173,user_000839,2026-02-23T11:00:00Z,rejected,109000,supplier_0539 +pr_004140,acct_0174,user_000840,2026-02-24T11:00:00Z,cancelled,110000,supplier_0540 +pr_004141,acct_0175,user_000841,2026-02-25T11:00:00Z,draft,10000,supplier_0541 +pr_004142,acct_0176,user_000842,2026-02-26T11:00:00Z,submitted,11000,supplier_0542 +pr_004143,acct_0177,user_000843,2026-02-27T11:00:00Z,approved,12000,supplier_0543 +pr_004144,acct_0178,user_000844,2026-02-28T11:00:00Z,rejected,13000,supplier_0544 +pr_004145,acct_0179,user_000845,2026-02-01T11:00:00Z,cancelled,14000,supplier_0545 +pr_004146,acct_0180,user_000846,2026-02-02T11:00:00Z,draft,15000,supplier_0546 +pr_004147,acct_0181,user_000847,2026-02-03T11:00:00Z,submitted,16000,supplier_0547 +pr_004148,acct_0182,user_000848,2026-02-04T11:00:00Z,approved,17000,supplier_0548 +pr_004149,acct_0183,user_000849,2026-02-05T11:00:00Z,rejected,18000,supplier_0549 +pr_004150,acct_0184,user_000850,2026-02-06T11:00:00Z,cancelled,19000,supplier_0550 +pr_004151,acct_0185,user_000851,2026-02-07T11:00:00Z,draft,20000,supplier_0551 +pr_004152,acct_0186,user_000852,2026-02-08T11:00:00Z,submitted,21000,supplier_0552 +pr_004153,acct_0187,user_000853,2026-02-09T11:00:00Z,approved,22000,supplier_0553 +pr_004154,acct_0188,user_000854,2026-02-10T11:00:00Z,rejected,23000,supplier_0554 +pr_004155,acct_0189,user_000855,2026-02-11T11:00:00Z,cancelled,24000,supplier_0555 +pr_004156,acct_0190,user_000856,2026-02-12T11:00:00Z,draft,25000,supplier_0556 +pr_004157,acct_0001,user_000857,2026-02-13T11:00:00Z,submitted,26000,supplier_0557 +pr_004158,acct_0002,user_000858,2026-02-14T11:00:00Z,approved,27000,supplier_0558 +pr_004159,acct_0003,user_000859,2026-02-15T11:00:00Z,rejected,28000,supplier_0559 +pr_004160,acct_0004,user_000860,2026-02-16T11:00:00Z,cancelled,29000,supplier_0560 +pr_004161,acct_0005,user_000861,2026-02-17T11:00:00Z,draft,30000,supplier_0561 +pr_004162,acct_0006,user_000862,2026-02-18T11:00:00Z,submitted,31000,supplier_0562 +pr_004163,acct_0007,user_000863,2026-02-19T11:00:00Z,approved,32000,supplier_0563 +pr_004164,acct_0008,user_000864,2026-02-20T11:00:00Z,rejected,33000,supplier_0564 +pr_004165,acct_0009,user_000865,2026-02-21T11:00:00Z,cancelled,34000,supplier_0565 +pr_004166,acct_0010,user_000866,2026-02-22T11:00:00Z,draft,35000,supplier_0566 +pr_004167,acct_0011,user_000867,2026-02-23T11:00:00Z,submitted,36000,supplier_0567 +pr_004168,acct_0012,user_000868,2026-02-24T11:00:00Z,approved,37000,supplier_0568 +pr_004169,acct_0013,user_000869,2026-02-25T11:00:00Z,rejected,38000,supplier_0569 +pr_004170,acct_0014,user_000870,2026-02-26T11:00:00Z,cancelled,39000,supplier_0570 +pr_004171,acct_0015,user_000871,2026-02-27T11:00:00Z,draft,40000,supplier_0571 +pr_004172,acct_0016,user_000872,2026-02-28T11:00:00Z,submitted,41000,supplier_0572 +pr_004173,acct_0017,user_000873,2026-02-01T11:00:00Z,approved,42000,supplier_0573 +pr_004174,acct_0018,user_000874,2026-02-02T11:00:00Z,rejected,43000,supplier_0574 +pr_004175,acct_0019,user_000875,2026-02-03T11:00:00Z,cancelled,44000,supplier_0575 +pr_004176,acct_0020,user_000876,2026-02-04T11:00:00Z,draft,45000,supplier_0576 +pr_004177,acct_0021,user_000877,2026-02-05T11:00:00Z,submitted,46000,supplier_0577 +pr_004178,acct_0022,user_000878,2026-02-06T11:00:00Z,approved,47000,supplier_0578 +pr_004179,acct_0023,user_000879,2026-02-07T11:00:00Z,rejected,48000,supplier_0579 +pr_004180,acct_0024,user_000880,2026-02-08T11:00:00Z,cancelled,49000,supplier_0580 +pr_004181,acct_0025,user_000881,2026-02-09T11:00:00Z,draft,50000,supplier_0581 +pr_004182,acct_0026,user_000882,2026-02-10T11:00:00Z,submitted,51000,supplier_0582 +pr_004183,acct_0027,user_000883,2026-02-11T11:00:00Z,approved,52000,supplier_0583 +pr_004184,acct_0028,user_000884,2026-02-12T11:00:00Z,rejected,53000,supplier_0584 +pr_004185,acct_0029,user_000885,2026-02-13T11:00:00Z,cancelled,54000,supplier_0585 +pr_004186,acct_0030,user_000886,2026-02-14T11:00:00Z,draft,55000,supplier_0586 +pr_004187,acct_0031,user_000887,2026-02-15T11:00:00Z,submitted,56000,supplier_0587 +pr_004188,acct_0032,user_000888,2026-02-16T11:00:00Z,approved,57000,supplier_0588 +pr_004189,acct_0033,user_000889,2026-02-17T11:00:00Z,rejected,58000,supplier_0589 +pr_004190,acct_0034,user_000890,2026-02-18T11:00:00Z,cancelled,59000,supplier_0590 +pr_004191,acct_0035,user_000891,2026-02-19T11:00:00Z,draft,60000,supplier_0591 +pr_004192,acct_0036,user_000892,2026-02-20T11:00:00Z,submitted,61000,supplier_0592 +pr_004193,acct_0037,user_000893,2026-02-21T11:00:00Z,approved,62000,supplier_0593 +pr_004194,acct_0038,user_000894,2026-02-22T11:00:00Z,rejected,63000,supplier_0594 +pr_004195,acct_0039,user_000895,2026-02-23T11:00:00Z,cancelled,64000,supplier_0595 +pr_004196,acct_0040,user_000896,2026-02-24T11:00:00Z,draft,65000,supplier_0596 +pr_004197,acct_0041,user_000897,2026-02-25T11:00:00Z,submitted,66000,supplier_0597 +pr_004198,acct_0042,user_000898,2026-02-26T11:00:00Z,approved,67000,supplier_0598 +pr_004199,acct_0043,user_000899,2026-02-27T11:00:00Z,rejected,68000,supplier_0599 +pr_004200,acct_0044,user_000900,2026-02-28T11:00:00Z,cancelled,69000,supplier_0600 +pr_004201,acct_0045,user_000901,2026-02-01T11:00:00Z,draft,70000,supplier_0601 +pr_004202,acct_0046,user_000902,2026-02-02T11:00:00Z,submitted,71000,supplier_0602 +pr_004203,acct_0047,user_000903,2026-02-03T11:00:00Z,approved,72000,supplier_0603 +pr_004204,acct_0048,user_000904,2026-02-04T11:00:00Z,rejected,73000,supplier_0604 +pr_004205,acct_0049,user_000905,2026-02-05T11:00:00Z,cancelled,74000,supplier_0605 +pr_004206,acct_0050,user_000906,2026-02-06T11:00:00Z,draft,75000,supplier_0606 +pr_004207,acct_0051,user_000907,2026-02-07T11:00:00Z,submitted,76000,supplier_0607 +pr_004208,acct_0052,user_000908,2026-02-08T11:00:00Z,approved,77000,supplier_0608 +pr_004209,acct_0053,user_000909,2026-02-09T11:00:00Z,rejected,78000,supplier_0609 +pr_004210,acct_0054,user_000910,2026-02-10T11:00:00Z,cancelled,79000,supplier_0610 +pr_004211,acct_0055,user_000911,2026-02-11T11:00:00Z,draft,80000,supplier_0611 +pr_004212,acct_0056,user_000912,2026-02-12T11:00:00Z,submitted,81000,supplier_0612 +pr_004213,acct_0057,user_000913,2026-02-13T11:00:00Z,approved,82000,supplier_0613 +pr_004214,acct_0058,user_000914,2026-02-14T11:00:00Z,rejected,83000,supplier_0614 +pr_004215,acct_0059,user_000915,2026-02-15T11:00:00Z,cancelled,84000,supplier_0615 +pr_004216,acct_0060,user_000916,2026-02-16T11:00:00Z,draft,85000,supplier_0616 +pr_004217,acct_0061,user_000917,2026-02-17T11:00:00Z,submitted,86000,supplier_0617 +pr_004218,acct_0062,user_000918,2026-02-18T11:00:00Z,approved,87000,supplier_0618 +pr_004219,acct_0063,user_000919,2026-02-19T11:00:00Z,rejected,88000,supplier_0619 +pr_004220,acct_0064,user_000920,2026-02-20T11:00:00Z,cancelled,89000,supplier_0620 +pr_004221,acct_0065,user_000921,2026-02-21T11:00:00Z,draft,90000,supplier_0621 +pr_004222,acct_0066,user_000922,2026-02-22T11:00:00Z,submitted,91000,supplier_0622 +pr_004223,acct_0067,user_000923,2026-02-23T11:00:00Z,approved,92000,supplier_0623 +pr_004224,acct_0068,user_000924,2026-02-24T11:00:00Z,rejected,93000,supplier_0624 +pr_004225,acct_0069,user_000925,2026-02-25T11:00:00Z,cancelled,94000,supplier_0625 +pr_004226,acct_0070,user_000926,2026-02-26T11:00:00Z,draft,95000,supplier_0626 +pr_004227,acct_0071,user_000927,2026-02-27T11:00:00Z,submitted,96000,supplier_0627 +pr_004228,acct_0072,user_000928,2026-02-28T11:00:00Z,approved,97000,supplier_0628 +pr_004229,acct_0073,user_000929,2026-02-01T11:00:00Z,rejected,98000,supplier_0629 +pr_004230,acct_0074,user_000930,2026-02-02T11:00:00Z,cancelled,99000,supplier_0630 +pr_004231,acct_0075,user_000931,2026-02-03T11:00:00Z,draft,100000,supplier_0631 +pr_004232,acct_0076,user_000932,2026-02-04T11:00:00Z,submitted,101000,supplier_0632 +pr_004233,acct_0077,user_000933,2026-02-05T11:00:00Z,approved,102000,supplier_0633 +pr_004234,acct_0078,user_000934,2026-02-06T11:00:00Z,rejected,103000,supplier_0634 +pr_004235,acct_0079,user_000935,2026-02-07T11:00:00Z,cancelled,104000,supplier_0635 +pr_004236,acct_0080,user_000936,2026-02-08T11:00:00Z,draft,105000,supplier_0636 +pr_004237,acct_0081,user_000937,2026-02-09T11:00:00Z,submitted,106000,supplier_0637 +pr_004238,acct_0082,user_000938,2026-02-10T11:00:00Z,approved,107000,supplier_0638 +pr_004239,acct_0083,user_000939,2026-02-11T11:00:00Z,rejected,108000,supplier_0639 +pr_004240,acct_0084,user_000940,2026-02-12T11:00:00Z,cancelled,109000,supplier_0640 +pr_004241,acct_0085,user_000941,2026-02-13T11:00:00Z,draft,110000,supplier_0641 +pr_004242,acct_0086,user_000942,2026-02-14T11:00:00Z,submitted,10000,supplier_0642 +pr_004243,acct_0087,user_000943,2026-02-15T11:00:00Z,approved,11000,supplier_0643 +pr_004244,acct_0088,user_000944,2026-02-16T11:00:00Z,rejected,12000,supplier_0644 +pr_004245,acct_0089,user_000945,2026-02-17T11:00:00Z,cancelled,13000,supplier_0645 +pr_004246,acct_0090,user_000946,2026-02-18T11:00:00Z,draft,14000,supplier_0646 +pr_004247,acct_0091,user_000947,2026-02-19T11:00:00Z,submitted,15000,supplier_0647 +pr_004248,acct_0092,user_000948,2026-02-20T11:00:00Z,approved,16000,supplier_0648 +pr_004249,acct_0093,user_000949,2026-02-21T11:00:00Z,rejected,17000,supplier_0649 +pr_004250,acct_0094,user_000950,2026-02-22T11:00:00Z,cancelled,18000,supplier_0650 +pr_004251,acct_0095,user_000951,2026-02-23T11:00:00Z,draft,19000,supplier_0651 +pr_004252,acct_0096,user_000952,2026-02-24T11:00:00Z,submitted,20000,supplier_0652 +pr_004253,acct_0097,user_000953,2026-02-25T11:00:00Z,approved,21000,supplier_0653 +pr_004254,acct_0098,user_000954,2026-02-26T11:00:00Z,rejected,22000,supplier_0654 +pr_004255,acct_0099,user_000955,2026-02-27T11:00:00Z,cancelled,23000,supplier_0655 +pr_004256,acct_0100,user_000956,2026-02-28T11:00:00Z,draft,24000,supplier_0656 +pr_004257,acct_0101,user_000957,2026-02-01T11:00:00Z,submitted,25000,supplier_0657 +pr_004258,acct_0102,user_000958,2026-02-02T11:00:00Z,approved,26000,supplier_0658 +pr_004259,acct_0103,user_000959,2026-02-03T11:00:00Z,rejected,27000,supplier_0659 +pr_004260,acct_0104,user_000960,2026-02-04T11:00:00Z,cancelled,28000,supplier_0660 +pr_004261,acct_0105,user_000961,2026-02-05T11:00:00Z,draft,29000,supplier_0661 +pr_004262,acct_0106,user_000962,2026-02-06T11:00:00Z,submitted,30000,supplier_0662 +pr_004263,acct_0107,user_000963,2026-02-07T11:00:00Z,approved,31000,supplier_0663 +pr_004264,acct_0108,user_000964,2026-02-08T11:00:00Z,rejected,32000,supplier_0664 +pr_004265,acct_0109,user_000965,2026-02-09T11:00:00Z,cancelled,33000,supplier_0665 +pr_004266,acct_0110,user_000966,2026-02-10T11:00:00Z,draft,34000,supplier_0666 +pr_004267,acct_0111,user_000967,2026-02-11T11:00:00Z,submitted,35000,supplier_0667 +pr_004268,acct_0112,user_000968,2026-02-12T11:00:00Z,approved,36000,supplier_0668 +pr_004269,acct_0113,user_000969,2026-02-13T11:00:00Z,rejected,37000,supplier_0669 +pr_004270,acct_0114,user_000970,2026-02-14T11:00:00Z,cancelled,38000,supplier_0670 +pr_004271,acct_0115,user_000971,2026-02-15T11:00:00Z,draft,39000,supplier_0671 +pr_004272,acct_0116,user_000972,2026-02-16T11:00:00Z,submitted,40000,supplier_0672 +pr_004273,acct_0117,user_000973,2026-02-17T11:00:00Z,approved,41000,supplier_0673 +pr_004274,acct_0118,user_000974,2026-02-18T11:00:00Z,rejected,42000,supplier_0674 +pr_004275,acct_0119,user_000975,2026-02-19T11:00:00Z,cancelled,43000,supplier_0675 +pr_004276,acct_0120,user_000976,2026-02-20T11:00:00Z,draft,44000,supplier_0676 +pr_004277,acct_0121,user_000977,2026-02-21T11:00:00Z,submitted,45000,supplier_0677 +pr_004278,acct_0122,user_000978,2026-02-22T11:00:00Z,approved,46000,supplier_0678 +pr_004279,acct_0123,user_000979,2026-02-23T11:00:00Z,rejected,47000,supplier_0679 +pr_004280,acct_0124,user_000980,2026-02-24T11:00:00Z,cancelled,48000,supplier_0680 +pr_004281,acct_0125,user_000981,2026-02-25T11:00:00Z,draft,49000,supplier_0681 +pr_004282,acct_0126,user_000982,2026-02-26T11:00:00Z,submitted,50000,supplier_0682 +pr_004283,acct_0127,user_000983,2026-02-27T11:00:00Z,approved,51000,supplier_0683 +pr_004284,acct_0128,user_000984,2026-02-28T11:00:00Z,rejected,52000,supplier_0684 +pr_004285,acct_0129,user_000985,2026-02-01T11:00:00Z,cancelled,53000,supplier_0685 +pr_004286,acct_0130,user_000986,2026-02-02T11:00:00Z,draft,54000,supplier_0686 +pr_004287,acct_0131,user_000987,2026-02-03T11:00:00Z,submitted,55000,supplier_0687 +pr_004288,acct_0132,user_000988,2026-02-04T11:00:00Z,approved,56000,supplier_0688 +pr_004289,acct_0133,user_000989,2026-02-05T11:00:00Z,rejected,57000,supplier_0689 +pr_004290,acct_0134,user_000990,2026-02-06T11:00:00Z,cancelled,58000,supplier_0690 +pr_004291,acct_0135,user_000991,2026-02-07T11:00:00Z,draft,59000,supplier_0691 +pr_004292,acct_0136,user_000992,2026-02-08T11:00:00Z,submitted,60000,supplier_0692 +pr_004293,acct_0137,user_000993,2026-02-09T11:00:00Z,approved,61000,supplier_0693 +pr_004294,acct_0138,user_000994,2026-02-10T11:00:00Z,rejected,62000,supplier_0694 +pr_004295,acct_0139,user_000995,2026-02-11T11:00:00Z,cancelled,63000,supplier_0695 +pr_004296,acct_0140,user_000996,2026-02-12T11:00:00Z,draft,64000,supplier_0696 +pr_004297,acct_0141,user_000997,2026-02-13T11:00:00Z,submitted,65000,supplier_0697 +pr_004298,acct_0142,user_000998,2026-02-14T11:00:00Z,approved,66000,supplier_0698 +pr_004299,acct_0143,user_000999,2026-02-15T11:00:00Z,rejected,67000,supplier_0699 +pr_004300,acct_0144,user_001000,2026-02-16T11:00:00Z,cancelled,68000,supplier_0700 +pr_004301,acct_0145,user_001001,2026-02-17T11:00:00Z,draft,69000,supplier_0701 +pr_004302,acct_0146,user_001002,2026-02-18T11:00:00Z,submitted,70000,supplier_0702 +pr_004303,acct_0147,user_001003,2026-02-19T11:00:00Z,approved,71000,supplier_0703 +pr_004304,acct_0148,user_001004,2026-02-20T11:00:00Z,rejected,72000,supplier_0704 +pr_004305,acct_0149,user_001005,2026-02-21T11:00:00Z,cancelled,73000,supplier_0705 +pr_004306,acct_0150,user_001006,2026-02-22T11:00:00Z,draft,74000,supplier_0706 +pr_004307,acct_0151,user_001007,2026-02-23T11:00:00Z,submitted,75000,supplier_0707 +pr_004308,acct_0152,user_001008,2026-02-24T11:00:00Z,approved,76000,supplier_0708 +pr_004309,acct_0153,user_001009,2026-02-25T11:00:00Z,rejected,77000,supplier_0709 +pr_004310,acct_0154,user_001010,2026-02-26T11:00:00Z,cancelled,78000,supplier_0710 +pr_004311,acct_0155,user_001011,2026-02-27T11:00:00Z,draft,79000,supplier_0711 +pr_004312,acct_0156,user_001012,2026-02-28T11:00:00Z,submitted,80000,supplier_0712 +pr_004313,acct_0157,user_001013,2026-02-01T11:00:00Z,approved,81000,supplier_0713 +pr_004314,acct_0158,user_001014,2026-02-02T11:00:00Z,rejected,82000,supplier_0714 +pr_004315,acct_0159,user_001015,2026-02-03T11:00:00Z,cancelled,83000,supplier_0715 +pr_004316,acct_0160,user_001016,2026-02-04T11:00:00Z,draft,84000,supplier_0716 +pr_004317,acct_0161,user_001017,2026-02-05T11:00:00Z,submitted,85000,supplier_0717 +pr_004318,acct_0162,user_001018,2026-02-06T11:00:00Z,approved,86000,supplier_0718 +pr_004319,acct_0163,user_001019,2026-02-07T11:00:00Z,rejected,87000,supplier_0719 +pr_004320,acct_0164,user_001020,2026-02-08T11:00:00Z,cancelled,88000,supplier_0720 +pr_004321,acct_0165,user_001021,2026-02-09T11:00:00Z,draft,89000,supplier_0721 +pr_004322,acct_0166,user_001022,2026-02-10T11:00:00Z,submitted,90000,supplier_0722 +pr_004323,acct_0167,user_001023,2026-02-11T11:00:00Z,approved,91000,supplier_0723 +pr_004324,acct_0168,user_001024,2026-02-12T11:00:00Z,rejected,92000,supplier_0724 +pr_004325,acct_0169,user_001025,2026-02-13T11:00:00Z,cancelled,93000,supplier_0725 +pr_004326,acct_0170,user_001026,2026-02-14T11:00:00Z,draft,94000,supplier_0726 +pr_004327,acct_0171,user_001027,2026-02-15T11:00:00Z,submitted,95000,supplier_0727 +pr_004328,acct_0172,user_001028,2026-02-16T11:00:00Z,approved,96000,supplier_0728 +pr_004329,acct_0173,user_001029,2026-02-17T11:00:00Z,rejected,97000,supplier_0729 +pr_004330,acct_0174,user_001030,2026-02-18T11:00:00Z,cancelled,98000,supplier_0730 +pr_004331,acct_0175,user_001031,2026-02-19T11:00:00Z,draft,99000,supplier_0731 +pr_004332,acct_0176,user_001032,2026-02-20T11:00:00Z,submitted,100000,supplier_0732 +pr_004333,acct_0177,user_001033,2026-02-21T11:00:00Z,approved,101000,supplier_0733 +pr_004334,acct_0178,user_001034,2026-02-22T11:00:00Z,rejected,102000,supplier_0734 +pr_004335,acct_0179,user_001035,2026-02-23T11:00:00Z,cancelled,103000,supplier_0735 +pr_004336,acct_0180,user_001036,2026-02-24T11:00:00Z,draft,104000,supplier_0736 +pr_004337,acct_0181,user_001037,2026-02-25T11:00:00Z,submitted,105000,supplier_0737 +pr_004338,acct_0182,user_001038,2026-02-26T11:00:00Z,approved,106000,supplier_0738 +pr_004339,acct_0183,user_001039,2026-02-27T11:00:00Z,rejected,107000,supplier_0739 +pr_004340,acct_0184,user_001040,2026-02-28T11:00:00Z,cancelled,108000,supplier_0740 +pr_004341,acct_0185,user_001041,2026-02-01T11:00:00Z,draft,109000,supplier_0741 +pr_004342,acct_0186,user_001042,2026-02-02T11:00:00Z,submitted,110000,supplier_0742 +pr_004343,acct_0187,user_001043,2026-02-03T11:00:00Z,approved,10000,supplier_0743 +pr_004344,acct_0188,user_001044,2026-02-04T11:00:00Z,rejected,11000,supplier_0744 +pr_004345,acct_0189,user_001045,2026-02-05T11:00:00Z,cancelled,12000,supplier_0745 +pr_004346,acct_0190,user_001046,2026-02-06T11:00:00Z,draft,13000,supplier_0746 +pr_004347,acct_0001,user_001047,2026-02-07T11:00:00Z,submitted,14000,supplier_0747 +pr_004348,acct_0002,user_001048,2026-02-08T11:00:00Z,approved,15000,supplier_0748 +pr_004349,acct_0003,user_001049,2026-02-09T11:00:00Z,rejected,16000,supplier_0749 +pr_004350,acct_0004,user_001050,2026-02-10T11:00:00Z,cancelled,17000,supplier_0750 +pr_004351,acct_0005,user_001051,2026-02-11T11:00:00Z,draft,18000,supplier_0751 +pr_004352,acct_0006,user_001052,2026-02-12T11:00:00Z,submitted,19000,supplier_0752 +pr_004353,acct_0007,user_001053,2026-02-13T11:00:00Z,approved,20000,supplier_0753 +pr_004354,acct_0008,user_001054,2026-02-14T11:00:00Z,rejected,21000,supplier_0754 +pr_004355,acct_0009,user_001055,2026-02-15T11:00:00Z,cancelled,22000,supplier_0755 +pr_004356,acct_0010,user_001056,2026-02-16T11:00:00Z,draft,23000,supplier_0756 +pr_004357,acct_0011,user_001057,2026-02-17T11:00:00Z,submitted,24000,supplier_0757 +pr_004358,acct_0012,user_001058,2026-02-18T11:00:00Z,approved,25000,supplier_0758 +pr_004359,acct_0013,user_001059,2026-02-19T11:00:00Z,rejected,26000,supplier_0759 +pr_004360,acct_0014,user_001060,2026-02-20T11:00:00Z,cancelled,27000,supplier_0760 +pr_004361,acct_0015,user_001061,2026-02-21T11:00:00Z,draft,28000,supplier_0761 +pr_004362,acct_0016,user_001062,2026-02-22T11:00:00Z,submitted,29000,supplier_0762 +pr_004363,acct_0017,user_001063,2026-02-23T11:00:00Z,approved,30000,supplier_0763 +pr_004364,acct_0018,user_001064,2026-02-24T11:00:00Z,rejected,31000,supplier_0764 +pr_004365,acct_0019,user_001065,2026-02-25T11:00:00Z,cancelled,32000,supplier_0765 +pr_004366,acct_0020,user_001066,2026-02-26T11:00:00Z,draft,33000,supplier_0766 +pr_004367,acct_0021,user_001067,2026-02-27T11:00:00Z,submitted,34000,supplier_0767 +pr_004368,acct_0022,user_001068,2026-02-28T11:00:00Z,approved,35000,supplier_0768 +pr_004369,acct_0023,user_001069,2026-02-01T11:00:00Z,rejected,36000,supplier_0769 +pr_004370,acct_0024,user_001070,2026-02-02T11:00:00Z,cancelled,37000,supplier_0770 +pr_004371,acct_0025,user_001071,2026-02-03T11:00:00Z,draft,38000,supplier_0771 +pr_004372,acct_0026,user_001072,2026-02-04T11:00:00Z,submitted,39000,supplier_0772 +pr_004373,acct_0027,user_001073,2026-02-05T11:00:00Z,approved,40000,supplier_0773 +pr_004374,acct_0028,user_001074,2026-02-06T11:00:00Z,rejected,41000,supplier_0774 +pr_004375,acct_0029,user_001075,2026-02-07T11:00:00Z,cancelled,42000,supplier_0775 +pr_004376,acct_0030,user_001076,2026-02-08T11:00:00Z,draft,43000,supplier_0776 +pr_004377,acct_0031,user_001077,2026-02-09T11:00:00Z,submitted,44000,supplier_0777 +pr_004378,acct_0032,user_001078,2026-02-10T11:00:00Z,approved,45000,supplier_0778 +pr_004379,acct_0033,user_001079,2026-02-11T11:00:00Z,rejected,46000,supplier_0779 +pr_004380,acct_0034,user_001080,2026-02-12T11:00:00Z,cancelled,47000,supplier_0780 +pr_004381,acct_0035,user_001081,2026-02-13T11:00:00Z,draft,48000,supplier_0781 +pr_004382,acct_0036,user_001082,2026-02-14T11:00:00Z,submitted,49000,supplier_0782 +pr_004383,acct_0037,user_001083,2026-02-15T11:00:00Z,approved,50000,supplier_0783 +pr_004384,acct_0038,user_001084,2026-02-16T11:00:00Z,rejected,51000,supplier_0784 +pr_004385,acct_0039,user_001085,2026-02-17T11:00:00Z,cancelled,52000,supplier_0785 +pr_004386,acct_0040,user_001086,2026-02-18T11:00:00Z,draft,53000,supplier_0786 +pr_004387,acct_0041,user_001087,2026-02-19T11:00:00Z,submitted,54000,supplier_0787 +pr_004388,acct_0042,user_001088,2026-02-20T11:00:00Z,approved,55000,supplier_0788 +pr_004389,acct_0043,user_001089,2026-02-21T11:00:00Z,rejected,56000,supplier_0789 +pr_004390,acct_0044,user_001090,2026-02-22T11:00:00Z,cancelled,57000,supplier_0790 +pr_004391,acct_0045,user_001091,2026-02-23T11:00:00Z,draft,58000,supplier_0791 +pr_004392,acct_0046,user_001092,2026-02-24T11:00:00Z,submitted,59000,supplier_0792 +pr_004393,acct_0047,user_001093,2026-02-25T11:00:00Z,approved,60000,supplier_0793 +pr_004394,acct_0048,user_001094,2026-02-26T11:00:00Z,rejected,61000,supplier_0794 +pr_004395,acct_0049,user_001095,2026-02-27T11:00:00Z,cancelled,62000,supplier_0795 +pr_004396,acct_0050,user_001096,2026-02-28T11:00:00Z,draft,63000,supplier_0796 +pr_004397,acct_0051,user_001097,2026-02-01T11:00:00Z,submitted,64000,supplier_0797 +pr_004398,acct_0052,user_001098,2026-02-02T11:00:00Z,approved,65000,supplier_0798 +pr_004399,acct_0053,user_001099,2026-02-03T11:00:00Z,rejected,66000,supplier_0799 +pr_004400,acct_0054,user_001100,2026-02-04T11:00:00Z,cancelled,67000,supplier_0800 +pr_004401,acct_0010,user_000001,2026-02-05T11:00:00Z,draft,68000,supplier_0801 +pr_004402,acct_0011,user_000002,2026-02-06T11:00:00Z,submitted,69000,supplier_0802 +pr_004403,acct_0012,user_000003,2026-02-07T11:00:00Z,approved,70000,supplier_0803 +pr_004404,acct_0013,user_000004,2026-02-08T11:00:00Z,rejected,71000,supplier_0804 +pr_004405,acct_0014,user_000005,2026-02-09T11:00:00Z,cancelled,72000,supplier_0805 +pr_004406,acct_0015,user_000006,2026-02-10T11:00:00Z,draft,73000,supplier_0806 +pr_004407,acct_0016,user_000007,2026-02-11T11:00:00Z,submitted,74000,supplier_0807 +pr_004408,acct_0017,user_000008,2026-02-12T11:00:00Z,approved,75000,supplier_0808 +pr_004409,acct_0018,user_000009,2026-02-13T11:00:00Z,rejected,76000,supplier_0809 +pr_004410,acct_0019,user_000010,2026-02-14T11:00:00Z,cancelled,77000,supplier_0810 +pr_004411,acct_0020,user_000011,2026-02-15T11:00:00Z,draft,78000,supplier_0811 +pr_004412,acct_0021,user_000012,2026-02-16T11:00:00Z,submitted,79000,supplier_0812 +pr_004413,acct_0022,user_000013,2026-02-17T11:00:00Z,approved,80000,supplier_0813 +pr_004414,acct_0023,user_000014,2026-02-18T11:00:00Z,rejected,81000,supplier_0814 +pr_004415,acct_0024,user_000015,2026-02-19T11:00:00Z,cancelled,82000,supplier_0815 +pr_004416,acct_0025,user_000016,2026-02-20T11:00:00Z,draft,83000,supplier_0816 +pr_004417,acct_0026,user_000017,2026-02-21T11:00:00Z,submitted,84000,supplier_0817 +pr_004418,acct_0027,user_000018,2026-02-22T11:00:00Z,approved,85000,supplier_0818 +pr_004419,acct_0028,user_000019,2026-02-23T11:00:00Z,rejected,86000,supplier_0819 +pr_004420,acct_0029,user_000020,2026-02-24T11:00:00Z,cancelled,87000,supplier_0820 +pr_004421,acct_0030,user_000021,2026-02-25T11:00:00Z,draft,88000,supplier_0821 +pr_004422,acct_0031,user_000022,2026-02-26T11:00:00Z,submitted,89000,supplier_0822 +pr_004423,acct_0032,user_000023,2026-02-27T11:00:00Z,approved,90000,supplier_0823 +pr_004424,acct_0033,user_000024,2026-02-28T11:00:00Z,rejected,91000,supplier_0824 +pr_004425,acct_0034,user_000025,2026-02-01T11:00:00Z,cancelled,92000,supplier_0825 +pr_004426,acct_0035,user_000026,2026-02-02T11:00:00Z,draft,93000,supplier_0826 +pr_004427,acct_0036,user_000027,2026-02-03T11:00:00Z,submitted,94000,supplier_0827 +pr_004428,acct_0037,user_000028,2026-02-04T11:00:00Z,approved,95000,supplier_0828 +pr_004429,acct_0038,user_000029,2026-02-05T11:00:00Z,rejected,96000,supplier_0829 +pr_004430,acct_0039,user_000030,2026-02-06T11:00:00Z,cancelled,97000,supplier_0830 +pr_004431,acct_0040,user_000031,2026-02-07T11:00:00Z,draft,98000,supplier_0831 +pr_004432,acct_0041,user_000032,2026-02-08T11:00:00Z,submitted,99000,supplier_0832 +pr_004433,acct_0042,user_000033,2026-02-09T11:00:00Z,approved,100000,supplier_0833 +pr_004434,acct_0043,user_000034,2026-02-10T11:00:00Z,rejected,101000,supplier_0834 +pr_004435,acct_0044,user_000035,2026-02-11T11:00:00Z,cancelled,102000,supplier_0835 +pr_004436,acct_0045,user_000036,2026-02-12T11:00:00Z,draft,103000,supplier_0836 +pr_004437,acct_0046,user_000037,2026-02-13T11:00:00Z,submitted,104000,supplier_0837 +pr_004438,acct_0047,user_000038,2026-02-14T11:00:00Z,approved,105000,supplier_0838 +pr_004439,acct_0048,user_000039,2026-02-15T11:00:00Z,rejected,106000,supplier_0839 +pr_004440,acct_0049,user_000040,2026-02-16T11:00:00Z,cancelled,107000,supplier_0840 +pr_004441,acct_0050,user_000041,2026-02-17T11:00:00Z,draft,108000,supplier_0841 +pr_004442,acct_0010,user_000042,2026-02-18T11:00:00Z,submitted,109000,supplier_0842 +pr_004443,acct_0011,user_000043,2026-02-19T11:00:00Z,approved,110000,supplier_0843 +pr_004444,acct_0012,user_000044,2026-02-20T11:00:00Z,rejected,10000,supplier_0844 +pr_004445,acct_0013,user_000045,2026-02-21T11:00:00Z,cancelled,11000,supplier_0845 +pr_004446,acct_0014,user_000046,2026-02-22T11:00:00Z,draft,12000,supplier_0846 +pr_004447,acct_0015,user_000047,2026-02-23T11:00:00Z,submitted,13000,supplier_0847 +pr_004448,acct_0016,user_000048,2026-02-24T11:00:00Z,approved,14000,supplier_0848 +pr_004449,acct_0017,user_000049,2026-02-25T11:00:00Z,rejected,15000,supplier_0849 +pr_004450,acct_0018,user_000050,2026-02-26T11:00:00Z,cancelled,16000,supplier_0850 +pr_004451,acct_0019,user_000051,2026-02-27T11:00:00Z,draft,17000,supplier_0851 +pr_004452,acct_0020,user_000052,2026-02-28T11:00:00Z,submitted,18000,supplier_0852 +pr_004453,acct_0021,user_000053,2026-02-01T11:00:00Z,approved,19000,supplier_0853 +pr_004454,acct_0022,user_000054,2026-02-02T11:00:00Z,rejected,20000,supplier_0854 +pr_004455,acct_0023,user_000055,2026-02-03T11:00:00Z,cancelled,21000,supplier_0855 +pr_004456,acct_0024,user_000056,2026-02-04T11:00:00Z,draft,22000,supplier_0856 +pr_004457,acct_0025,user_000057,2026-02-05T11:00:00Z,submitted,23000,supplier_0857 +pr_004458,acct_0026,user_000058,2026-02-06T11:00:00Z,approved,24000,supplier_0858 +pr_004459,acct_0027,user_000059,2026-02-07T11:00:00Z,rejected,25000,supplier_0859 +pr_004460,acct_0028,user_000060,2026-02-08T11:00:00Z,cancelled,26000,supplier_0860 +pr_004461,acct_0029,user_000061,2026-02-09T11:00:00Z,draft,27000,supplier_0861 +pr_004462,acct_0030,user_000062,2026-02-10T11:00:00Z,submitted,28000,supplier_0862 +pr_004463,acct_0031,user_000063,2026-02-11T11:00:00Z,approved,29000,supplier_0863 +pr_004464,acct_0032,user_000064,2026-02-12T11:00:00Z,rejected,30000,supplier_0864 +pr_004465,acct_0033,user_000065,2026-02-13T11:00:00Z,cancelled,31000,supplier_0865 +pr_004466,acct_0034,user_000066,2026-02-14T11:00:00Z,draft,32000,supplier_0866 +pr_004467,acct_0035,user_000067,2026-02-15T11:00:00Z,submitted,33000,supplier_0867 +pr_004468,acct_0036,user_000068,2026-02-16T11:00:00Z,approved,34000,supplier_0868 +pr_004469,acct_0037,user_000069,2026-02-17T11:00:00Z,rejected,35000,supplier_0869 +pr_004470,acct_0038,user_000070,2026-02-18T11:00:00Z,cancelled,36000,supplier_0870 +pr_004471,acct_0039,user_000071,2026-02-19T11:00:00Z,draft,37000,supplier_0871 +pr_004472,acct_0040,user_000072,2026-02-20T11:00:00Z,submitted,38000,supplier_0872 +pr_004473,acct_0041,user_000073,2026-02-21T11:00:00Z,approved,39000,supplier_0873 +pr_004474,acct_0042,user_000074,2026-02-22T11:00:00Z,rejected,40000,supplier_0874 +pr_004475,acct_0043,user_000075,2026-02-23T11:00:00Z,cancelled,41000,supplier_0875 +pr_004476,acct_0044,user_000076,2026-02-24T11:00:00Z,draft,42000,supplier_0876 +pr_004477,acct_0045,user_000077,2026-02-25T11:00:00Z,submitted,43000,supplier_0877 +pr_004478,acct_0046,user_000078,2026-02-26T11:00:00Z,approved,44000,supplier_0878 +pr_004479,acct_0047,user_000079,2026-02-27T11:00:00Z,rejected,45000,supplier_0879 +pr_004480,acct_0048,user_000080,2026-02-28T11:00:00Z,cancelled,46000,supplier_0880 +pr_004481,acct_0049,user_000081,2026-02-01T11:00:00Z,draft,47000,supplier_0881 +pr_004482,acct_0050,user_000082,2026-02-02T11:00:00Z,submitted,48000,supplier_0882 +pr_004483,acct_0010,user_000083,2026-02-03T11:00:00Z,approved,49000,supplier_0883 +pr_004484,acct_0011,user_000084,2026-02-04T11:00:00Z,rejected,50000,supplier_0884 +pr_004485,acct_0012,user_000085,2026-02-05T11:00:00Z,cancelled,51000,supplier_0885 +pr_004486,acct_0013,user_000086,2026-02-06T11:00:00Z,draft,52000,supplier_0886 +pr_004487,acct_0014,user_000087,2026-02-07T11:00:00Z,submitted,53000,supplier_0887 +pr_004488,acct_0015,user_000088,2026-02-08T11:00:00Z,approved,54000,supplier_0888 +pr_004489,acct_0016,user_000089,2026-02-09T11:00:00Z,rejected,55000,supplier_0889 +pr_004490,acct_0017,user_000090,2026-02-10T11:00:00Z,cancelled,56000,supplier_0890 +pr_004491,acct_0018,user_000091,2026-02-11T11:00:00Z,draft,57000,supplier_0891 +pr_004492,acct_0019,user_000092,2026-02-12T11:00:00Z,submitted,58000,supplier_0892 +pr_004493,acct_0020,user_000093,2026-02-13T11:00:00Z,approved,59000,supplier_0893 +pr_004494,acct_0021,user_000094,2026-02-14T11:00:00Z,rejected,60000,supplier_0894 +pr_004495,acct_0022,user_000095,2026-02-15T11:00:00Z,cancelled,61000,supplier_0895 +pr_004496,acct_0023,user_000096,2026-02-16T11:00:00Z,draft,62000,supplier_0896 +pr_004497,acct_0024,user_000097,2026-02-17T11:00:00Z,submitted,63000,supplier_0897 +pr_004498,acct_0025,user_000098,2026-02-18T11:00:00Z,approved,64000,supplier_0898 +pr_004499,acct_0026,user_000099,2026-02-19T11:00:00Z,rejected,65000,supplier_0899 +pr_004500,acct_0027,user_000100,2026-02-20T11:00:00Z,cancelled,66000,supplier_0900 +pr_004501,acct_0028,user_000101,2026-02-21T11:00:00Z,draft,67000,supplier_0001 +pr_004502,acct_0029,user_000102,2026-02-22T11:00:00Z,submitted,68000,supplier_0002 +pr_004503,acct_0030,user_000103,2026-02-23T11:00:00Z,approved,69000,supplier_0003 +pr_004504,acct_0031,user_000104,2026-02-24T11:00:00Z,rejected,70000,supplier_0004 +pr_004505,acct_0032,user_000105,2026-02-25T11:00:00Z,cancelled,71000,supplier_0005 +pr_004506,acct_0033,user_000106,2026-02-26T11:00:00Z,draft,72000,supplier_0006 +pr_004507,acct_0034,user_000107,2026-02-27T11:00:00Z,submitted,73000,supplier_0007 +pr_004508,acct_0035,user_000108,2026-02-28T11:00:00Z,approved,74000,supplier_0008 +pr_004509,acct_0036,user_000109,2026-02-01T11:00:00Z,rejected,75000,supplier_0009 +pr_004510,acct_0037,user_000110,2026-02-02T11:00:00Z,cancelled,76000,supplier_0010 +pr_004511,acct_0038,user_000111,2026-02-03T11:00:00Z,draft,77000,supplier_0011 +pr_004512,acct_0039,user_000112,2026-02-04T11:00:00Z,submitted,78000,supplier_0012 +pr_004513,acct_0040,user_000113,2026-02-05T11:00:00Z,approved,79000,supplier_0013 +pr_004514,acct_0041,user_000114,2026-02-06T11:00:00Z,rejected,80000,supplier_0014 +pr_004515,acct_0042,user_000115,2026-02-07T11:00:00Z,cancelled,81000,supplier_0015 +pr_004516,acct_0043,user_000116,2026-02-08T11:00:00Z,draft,82000,supplier_0016 +pr_004517,acct_0044,user_000117,2026-02-09T11:00:00Z,submitted,83000,supplier_0017 +pr_004518,acct_0045,user_000118,2026-02-10T11:00:00Z,approved,84000,supplier_0018 +pr_004519,acct_0046,user_000119,2026-02-11T11:00:00Z,rejected,85000,supplier_0019 +pr_004520,acct_0047,user_000120,2026-02-12T11:00:00Z,cancelled,86000,supplier_0020 +pr_004521,acct_0048,user_000121,2026-02-13T11:00:00Z,draft,87000,supplier_0021 +pr_004522,acct_0049,user_000122,2026-02-14T11:00:00Z,submitted,88000,supplier_0022 +pr_004523,acct_0050,user_000123,2026-02-15T11:00:00Z,approved,89000,supplier_0023 +pr_004524,acct_0010,user_000124,2026-02-16T11:00:00Z,rejected,90000,supplier_0024 +pr_004525,acct_0011,user_000125,2026-02-17T11:00:00Z,cancelled,91000,supplier_0025 +pr_004526,acct_0012,user_000126,2026-02-18T11:00:00Z,draft,92000,supplier_0026 +pr_004527,acct_0013,user_000127,2026-02-19T11:00:00Z,submitted,93000,supplier_0027 +pr_004528,acct_0014,user_000128,2026-02-20T11:00:00Z,approved,94000,supplier_0028 +pr_004529,acct_0015,user_000129,2026-02-21T11:00:00Z,rejected,95000,supplier_0029 +pr_004530,acct_0016,user_000130,2026-02-22T11:00:00Z,cancelled,96000,supplier_0030 +pr_004531,acct_0017,user_000131,2026-02-23T11:00:00Z,draft,97000,supplier_0031 +pr_004532,acct_0018,user_000132,2026-02-24T11:00:00Z,submitted,98000,supplier_0032 +pr_004533,acct_0019,user_000133,2026-02-25T11:00:00Z,approved,99000,supplier_0033 +pr_004534,acct_0020,user_000134,2026-02-26T11:00:00Z,rejected,100000,supplier_0034 +pr_004535,acct_0021,user_000135,2026-02-27T11:00:00Z,cancelled,101000,supplier_0035 +pr_004536,acct_0022,user_000136,2026-02-28T11:00:00Z,draft,102000,supplier_0036 +pr_004537,acct_0023,user_000137,2026-02-01T11:00:00Z,submitted,103000,supplier_0037 +pr_004538,acct_0024,user_000138,2026-02-02T11:00:00Z,approved,104000,supplier_0038 +pr_004539,acct_0025,user_000139,2026-02-03T11:00:00Z,rejected,105000,supplier_0039 +pr_004540,acct_0026,user_000140,2026-02-04T11:00:00Z,cancelled,106000,supplier_0040 +pr_004541,acct_0027,user_000141,2026-02-05T11:00:00Z,draft,107000,supplier_0041 +pr_004542,acct_0028,user_000142,2026-02-06T11:00:00Z,submitted,108000,supplier_0042 +pr_004543,acct_0029,user_000143,2026-02-07T11:00:00Z,approved,109000,supplier_0043 +pr_004544,acct_0030,user_000144,2026-02-08T11:00:00Z,rejected,110000,supplier_0044 +pr_004545,acct_0031,user_000145,2026-02-09T11:00:00Z,cancelled,10000,supplier_0045 +pr_004546,acct_0032,user_000146,2026-02-10T11:00:00Z,draft,11000,supplier_0046 +pr_004547,acct_0033,user_000147,2026-02-11T11:00:00Z,submitted,12000,supplier_0047 +pr_004548,acct_0034,user_000148,2026-02-12T11:00:00Z,approved,13000,supplier_0048 +pr_004549,acct_0035,user_000149,2026-02-13T11:00:00Z,rejected,14000,supplier_0049 +pr_004550,acct_0036,user_000150,2026-02-14T11:00:00Z,cancelled,15000,supplier_0050 +pr_004551,acct_0037,user_000151,2026-02-15T11:00:00Z,draft,16000,supplier_0051 +pr_004552,acct_0038,user_000152,2026-02-16T11:00:00Z,submitted,17000,supplier_0052 +pr_004553,acct_0039,user_000153,2026-02-17T11:00:00Z,approved,18000,supplier_0053 +pr_004554,acct_0040,user_000154,2026-02-18T11:00:00Z,rejected,19000,supplier_0054 +pr_004555,acct_0041,user_000155,2026-02-19T11:00:00Z,cancelled,20000,supplier_0055 +pr_004556,acct_0042,user_000156,2026-02-20T11:00:00Z,draft,21000,supplier_0056 +pr_004557,acct_0043,user_000157,2026-02-21T11:00:00Z,submitted,22000,supplier_0057 +pr_004558,acct_0044,user_000158,2026-02-22T11:00:00Z,approved,23000,supplier_0058 +pr_004559,acct_0045,user_000159,2026-02-23T11:00:00Z,rejected,24000,supplier_0059 +pr_004560,acct_0046,user_000160,2026-02-24T11:00:00Z,cancelled,25000,supplier_0060 +pr_004561,acct_0047,user_000161,2026-02-25T11:00:00Z,draft,26000,supplier_0061 +pr_004562,acct_0048,user_000162,2026-02-26T11:00:00Z,submitted,27000,supplier_0062 +pr_004563,acct_0049,user_000163,2026-02-27T11:00:00Z,approved,28000,supplier_0063 +pr_004564,acct_0050,user_000164,2026-02-28T11:00:00Z,rejected,29000,supplier_0064 +pr_004565,acct_0010,user_000165,2026-02-01T11:00:00Z,cancelled,30000,supplier_0065 +pr_004566,acct_0011,user_000166,2026-02-02T11:00:00Z,draft,31000,supplier_0066 +pr_004567,acct_0012,user_000167,2026-02-03T11:00:00Z,submitted,32000,supplier_0067 +pr_004568,acct_0013,user_000168,2026-02-04T11:00:00Z,approved,33000,supplier_0068 +pr_004569,acct_0014,user_000169,2026-02-05T11:00:00Z,rejected,34000,supplier_0069 +pr_004570,acct_0015,user_000170,2026-02-06T11:00:00Z,cancelled,35000,supplier_0070 +pr_004571,acct_0016,user_000171,2026-02-07T11:00:00Z,draft,36000,supplier_0071 +pr_004572,acct_0017,user_000172,2026-02-08T11:00:00Z,submitted,37000,supplier_0072 +pr_004573,acct_0018,user_000173,2026-02-09T11:00:00Z,approved,38000,supplier_0073 +pr_004574,acct_0019,user_000174,2026-02-10T11:00:00Z,rejected,39000,supplier_0074 +pr_004575,acct_0020,user_000175,2026-02-11T11:00:00Z,cancelled,40000,supplier_0075 +pr_004576,acct_0021,user_000176,2026-02-12T11:00:00Z,draft,41000,supplier_0076 +pr_004577,acct_0022,user_000177,2026-02-13T11:00:00Z,submitted,42000,supplier_0077 +pr_004578,acct_0023,user_000178,2026-02-14T11:00:00Z,approved,43000,supplier_0078 +pr_004579,acct_0024,user_000179,2026-02-15T11:00:00Z,rejected,44000,supplier_0079 +pr_004580,acct_0025,user_000180,2026-02-16T11:00:00Z,cancelled,45000,supplier_0080 +pr_004581,acct_0026,user_000181,2026-02-17T11:00:00Z,draft,46000,supplier_0081 +pr_004582,acct_0027,user_000182,2026-02-18T11:00:00Z,submitted,47000,supplier_0082 +pr_004583,acct_0028,user_000183,2026-02-19T11:00:00Z,approved,48000,supplier_0083 +pr_004584,acct_0029,user_000184,2026-02-20T11:00:00Z,rejected,49000,supplier_0084 +pr_004585,acct_0030,user_000185,2026-02-21T11:00:00Z,cancelled,50000,supplier_0085 +pr_004586,acct_0031,user_000186,2026-02-22T11:00:00Z,draft,51000,supplier_0086 +pr_004587,acct_0032,user_000187,2026-02-23T11:00:00Z,submitted,52000,supplier_0087 +pr_004588,acct_0033,user_000188,2026-02-24T11:00:00Z,approved,53000,supplier_0088 +pr_004589,acct_0034,user_000189,2026-02-25T11:00:00Z,rejected,54000,supplier_0089 +pr_004590,acct_0035,user_000190,2026-02-26T11:00:00Z,cancelled,55000,supplier_0090 +pr_004591,acct_0036,user_000191,2026-02-27T11:00:00Z,draft,56000,supplier_0091 +pr_004592,acct_0037,user_000192,2026-02-28T11:00:00Z,submitted,57000,supplier_0092 +pr_004593,acct_0038,user_000193,2026-02-01T11:00:00Z,approved,58000,supplier_0093 +pr_004594,acct_0039,user_000194,2026-02-02T11:00:00Z,rejected,59000,supplier_0094 +pr_004595,acct_0040,user_000195,2026-02-03T11:00:00Z,cancelled,60000,supplier_0095 +pr_004596,acct_0041,user_000196,2026-02-04T11:00:00Z,draft,61000,supplier_0096 +pr_004597,acct_0042,user_000197,2026-02-05T11:00:00Z,submitted,62000,supplier_0097 +pr_004598,acct_0043,user_000198,2026-02-06T11:00:00Z,approved,63000,supplier_0098 +pr_004599,acct_0044,user_000199,2026-02-07T11:00:00Z,rejected,64000,supplier_0099 +pr_004600,acct_0045,user_000200,2026-02-08T11:00:00Z,cancelled,65000,supplier_0100 +pr_004601,acct_0046,user_000201,2026-02-09T11:00:00Z,draft,66000,supplier_0101 +pr_004602,acct_0047,user_000202,2026-02-10T11:00:00Z,submitted,67000,supplier_0102 +pr_004603,acct_0048,user_000203,2026-02-11T11:00:00Z,approved,68000,supplier_0103 +pr_004604,acct_0049,user_000204,2026-02-12T11:00:00Z,rejected,69000,supplier_0104 +pr_004605,acct_0050,user_000205,2026-02-13T11:00:00Z,cancelled,70000,supplier_0105 +pr_004606,acct_0010,user_000206,2026-02-14T11:00:00Z,draft,71000,supplier_0106 +pr_004607,acct_0011,user_000207,2026-02-15T11:00:00Z,submitted,72000,supplier_0107 +pr_004608,acct_0012,user_000208,2026-02-16T11:00:00Z,approved,73000,supplier_0108 +pr_004609,acct_0013,user_000209,2026-02-17T11:00:00Z,rejected,74000,supplier_0109 +pr_004610,acct_0014,user_000210,2026-02-18T11:00:00Z,cancelled,75000,supplier_0110 +pr_004611,acct_0015,user_000211,2026-02-19T11:00:00Z,draft,76000,supplier_0111 +pr_004612,acct_0016,user_000212,2026-02-20T11:00:00Z,submitted,77000,supplier_0112 +pr_004613,acct_0017,user_000213,2026-02-21T11:00:00Z,approved,78000,supplier_0113 +pr_004614,acct_0018,user_000214,2026-02-22T11:00:00Z,rejected,79000,supplier_0114 +pr_004615,acct_0019,user_000215,2026-02-23T11:00:00Z,cancelled,80000,supplier_0115 +pr_004616,acct_0020,user_000216,2026-02-24T11:00:00Z,draft,81000,supplier_0116 +pr_004617,acct_0021,user_000217,2026-02-25T11:00:00Z,submitted,82000,supplier_0117 +pr_004618,acct_0022,user_000218,2026-02-26T11:00:00Z,approved,83000,supplier_0118 +pr_004619,acct_0023,user_000219,2026-02-27T11:00:00Z,rejected,84000,supplier_0119 +pr_004620,acct_0024,user_000220,2026-02-28T11:00:00Z,cancelled,85000,supplier_0120 +pr_004621,acct_0025,user_000221,2026-02-01T11:00:00Z,draft,86000,supplier_0121 +pr_004622,acct_0026,user_000222,2026-02-02T11:00:00Z,submitted,87000,supplier_0122 +pr_004623,acct_0027,user_000223,2026-02-03T11:00:00Z,approved,88000,supplier_0123 +pr_004624,acct_0028,user_000224,2026-02-04T11:00:00Z,rejected,89000,supplier_0124 +pr_004625,acct_0029,user_000225,2026-02-05T11:00:00Z,cancelled,90000,supplier_0125 +pr_004626,acct_0030,user_000226,2026-02-06T11:00:00Z,draft,91000,supplier_0126 +pr_004627,acct_0031,user_000227,2026-02-07T11:00:00Z,submitted,92000,supplier_0127 +pr_004628,acct_0032,user_000228,2026-02-08T11:00:00Z,approved,93000,supplier_0128 +pr_004629,acct_0033,user_000229,2026-02-09T11:00:00Z,rejected,94000,supplier_0129 +pr_004630,acct_0034,user_000230,2026-02-10T11:00:00Z,cancelled,95000,supplier_0130 +pr_004631,acct_0035,user_000231,2026-02-11T11:00:00Z,draft,96000,supplier_0131 +pr_004632,acct_0036,user_000232,2026-02-12T11:00:00Z,submitted,97000,supplier_0132 +pr_004633,acct_0037,user_000233,2026-02-13T11:00:00Z,approved,98000,supplier_0133 +pr_004634,acct_0038,user_000234,2026-02-14T11:00:00Z,rejected,99000,supplier_0134 +pr_004635,acct_0039,user_000235,2026-02-15T11:00:00Z,cancelled,100000,supplier_0135 +pr_004636,acct_0040,user_000236,2026-02-16T11:00:00Z,draft,101000,supplier_0136 +pr_004637,acct_0041,user_000237,2026-02-17T11:00:00Z,submitted,102000,supplier_0137 +pr_004638,acct_0042,user_000238,2026-02-18T11:00:00Z,approved,103000,supplier_0138 +pr_004639,acct_0043,user_000239,2026-02-19T11:00:00Z,rejected,104000,supplier_0139 +pr_004640,acct_0044,user_000240,2026-02-20T11:00:00Z,cancelled,105000,supplier_0140 +pr_004641,acct_0045,user_000241,2026-02-21T11:00:00Z,draft,106000,supplier_0141 +pr_004642,acct_0046,user_000242,2026-02-22T11:00:00Z,submitted,107000,supplier_0142 +pr_004643,acct_0047,user_000243,2026-02-23T11:00:00Z,approved,108000,supplier_0143 +pr_004644,acct_0048,user_000244,2026-02-24T11:00:00Z,rejected,109000,supplier_0144 +pr_004645,acct_0049,user_000245,2026-02-25T11:00:00Z,cancelled,110000,supplier_0145 +pr_004646,acct_0050,user_000246,2026-02-26T11:00:00Z,draft,10000,supplier_0146 +pr_004647,acct_0010,user_000247,2026-02-27T11:00:00Z,submitted,11000,supplier_0147 +pr_004648,acct_0011,user_000248,2026-02-28T11:00:00Z,approved,12000,supplier_0148 +pr_004649,acct_0012,user_000249,2026-02-01T11:00:00Z,rejected,13000,supplier_0149 +pr_004650,acct_0013,user_000250,2026-02-02T11:00:00Z,cancelled,14000,supplier_0150 +pr_004651,acct_0014,user_000251,2026-02-03T11:00:00Z,draft,15000,supplier_0151 +pr_004652,acct_0015,user_000252,2026-02-04T11:00:00Z,submitted,16000,supplier_0152 +pr_004653,acct_0016,user_000253,2026-02-05T11:00:00Z,approved,17000,supplier_0153 +pr_004654,acct_0017,user_000254,2026-02-06T11:00:00Z,rejected,18000,supplier_0154 +pr_004655,acct_0018,user_000255,2026-02-07T11:00:00Z,cancelled,19000,supplier_0155 +pr_004656,acct_0019,user_000256,2026-02-08T11:00:00Z,draft,20000,supplier_0156 +pr_004657,acct_0020,user_000257,2026-02-09T11:00:00Z,submitted,21000,supplier_0157 +pr_004658,acct_0021,user_000258,2026-02-10T11:00:00Z,approved,22000,supplier_0158 +pr_004659,acct_0022,user_000259,2026-02-11T11:00:00Z,rejected,23000,supplier_0159 +pr_004660,acct_0023,user_000260,2026-02-12T11:00:00Z,cancelled,24000,supplier_0160 +pr_004661,acct_0024,user_000261,2026-02-13T11:00:00Z,draft,25000,supplier_0161 +pr_004662,acct_0025,user_000262,2026-02-14T11:00:00Z,submitted,26000,supplier_0162 +pr_004663,acct_0026,user_000263,2026-02-15T11:00:00Z,approved,27000,supplier_0163 +pr_004664,acct_0027,user_000264,2026-02-16T11:00:00Z,rejected,28000,supplier_0164 +pr_004665,acct_0028,user_000265,2026-02-17T11:00:00Z,cancelled,29000,supplier_0165 +pr_004666,acct_0029,user_000266,2026-02-18T11:00:00Z,draft,30000,supplier_0166 +pr_004667,acct_0030,user_000267,2026-02-19T11:00:00Z,submitted,31000,supplier_0167 +pr_004668,acct_0031,user_000268,2026-02-20T11:00:00Z,approved,32000,supplier_0168 +pr_004669,acct_0032,user_000269,2026-02-21T11:00:00Z,rejected,33000,supplier_0169 +pr_004670,acct_0033,user_000270,2026-02-22T11:00:00Z,cancelled,34000,supplier_0170 +pr_004671,acct_0034,user_000271,2026-02-23T11:00:00Z,draft,35000,supplier_0171 +pr_004672,acct_0035,user_000272,2026-02-24T11:00:00Z,submitted,36000,supplier_0172 +pr_004673,acct_0036,user_000273,2026-02-25T11:00:00Z,approved,37000,supplier_0173 +pr_004674,acct_0037,user_000274,2026-02-26T11:00:00Z,rejected,38000,supplier_0174 +pr_004675,acct_0038,user_000275,2026-02-27T11:00:00Z,cancelled,39000,supplier_0175 +pr_004676,acct_0039,user_000276,2026-02-28T11:00:00Z,draft,40000,supplier_0176 +pr_004677,acct_0040,user_000277,2026-02-01T11:00:00Z,submitted,41000,supplier_0177 +pr_004678,acct_0041,user_000278,2026-02-02T11:00:00Z,approved,42000,supplier_0178 +pr_004679,acct_0042,user_000279,2026-02-03T11:00:00Z,rejected,43000,supplier_0179 +pr_004680,acct_0043,user_000280,2026-02-04T11:00:00Z,cancelled,44000,supplier_0180 +pr_004681,acct_0044,user_000281,2026-02-05T11:00:00Z,draft,45000,supplier_0181 +pr_004682,acct_0045,user_000282,2026-02-06T11:00:00Z,submitted,46000,supplier_0182 +pr_004683,acct_0046,user_000283,2026-02-07T11:00:00Z,approved,47000,supplier_0183 +pr_004684,acct_0047,user_000284,2026-02-08T11:00:00Z,rejected,48000,supplier_0184 +pr_004685,acct_0048,user_000285,2026-02-09T11:00:00Z,cancelled,49000,supplier_0185 +pr_004686,acct_0049,user_000286,2026-02-10T11:00:00Z,draft,50000,supplier_0186 +pr_004687,acct_0001,user_000287,2026-02-11T11:00:00Z,submitted,51000,supplier_0187 +pr_004688,acct_0002,user_000288,2026-02-12T11:00:00Z,approved,52000,supplier_0188 +pr_004689,acct_0003,user_000289,2026-02-13T11:00:00Z,rejected,53000,supplier_0189 +pr_004690,acct_0004,user_000290,2026-02-14T11:00:00Z,cancelled,54000,supplier_0190 +pr_004691,acct_0005,user_000291,2026-02-15T11:00:00Z,draft,55000,supplier_0191 +pr_004692,acct_0006,user_000292,2026-02-16T11:00:00Z,submitted,56000,supplier_0192 +pr_004693,acct_0007,user_000293,2026-02-17T11:00:00Z,approved,57000,supplier_0193 +pr_004694,acct_0008,user_000294,2026-02-18T11:00:00Z,rejected,58000,supplier_0194 +pr_004695,acct_0009,user_000295,2026-02-19T11:00:00Z,cancelled,59000,supplier_0195 +pr_004696,acct_0010,user_000296,2026-02-20T11:00:00Z,draft,60000,supplier_0196 +pr_004697,acct_0011,user_000297,2026-02-21T11:00:00Z,submitted,61000,supplier_0197 +pr_004698,acct_0012,user_000298,2026-02-22T11:00:00Z,approved,62000,supplier_0198 +pr_004699,acct_0013,user_000299,2026-02-23T11:00:00Z,rejected,63000,supplier_0199 +pr_004700,acct_0014,user_000300,2026-02-24T11:00:00Z,cancelled,64000,supplier_0200 +pr_004701,acct_0015,user_000301,2026-02-25T11:00:00Z,draft,65000,supplier_0201 +pr_004702,acct_0016,user_000302,2026-02-26T11:00:00Z,submitted,66000,supplier_0202 +pr_004703,acct_0017,user_000303,2026-02-27T11:00:00Z,approved,67000,supplier_0203 +pr_004704,acct_0018,user_000304,2026-02-28T11:00:00Z,rejected,68000,supplier_0204 +pr_004705,acct_0019,user_000305,2026-02-01T11:00:00Z,cancelled,69000,supplier_0205 +pr_004706,acct_0020,user_000306,2026-02-02T11:00:00Z,draft,70000,supplier_0206 +pr_004707,acct_0021,user_000307,2026-02-03T11:00:00Z,submitted,71000,supplier_0207 +pr_004708,acct_0022,user_000308,2026-02-04T11:00:00Z,approved,72000,supplier_0208 +pr_004709,acct_0023,user_000309,2026-02-05T11:00:00Z,rejected,73000,supplier_0209 +pr_004710,acct_0024,user_000310,2026-02-06T11:00:00Z,cancelled,74000,supplier_0210 +pr_004711,acct_0025,user_000311,2026-02-07T11:00:00Z,draft,75000,supplier_0211 +pr_004712,acct_0026,user_000312,2026-02-08T11:00:00Z,submitted,76000,supplier_0212 +pr_004713,acct_0027,user_000313,2026-02-09T11:00:00Z,approved,77000,supplier_0213 +pr_004714,acct_0028,user_000314,2026-02-10T11:00:00Z,rejected,78000,supplier_0214 +pr_004715,acct_0029,user_000315,2026-02-11T11:00:00Z,cancelled,79000,supplier_0215 +pr_004716,acct_0030,user_000316,2026-02-12T11:00:00Z,draft,80000,supplier_0216 +pr_004717,acct_0031,user_000317,2026-02-13T11:00:00Z,submitted,81000,supplier_0217 +pr_004718,acct_0032,user_000318,2026-02-14T11:00:00Z,approved,82000,supplier_0218 +pr_004719,acct_0033,user_000319,2026-02-15T11:00:00Z,rejected,83000,supplier_0219 +pr_004720,acct_0034,user_000320,2026-02-16T11:00:00Z,cancelled,84000,supplier_0220 +pr_004721,acct_0035,user_000321,2026-02-17T11:00:00Z,draft,85000,supplier_0221 +pr_004722,acct_0036,user_000322,2026-02-18T11:00:00Z,submitted,86000,supplier_0222 +pr_004723,acct_0037,user_000323,2026-02-19T11:00:00Z,approved,87000,supplier_0223 +pr_004724,acct_0038,user_000324,2026-02-20T11:00:00Z,rejected,88000,supplier_0224 +pr_004725,acct_0039,user_000325,2026-02-21T11:00:00Z,cancelled,89000,supplier_0225 +pr_004726,acct_0040,user_000326,2026-02-22T11:00:00Z,draft,90000,supplier_0226 +pr_004727,acct_0041,user_000327,2026-02-23T11:00:00Z,submitted,91000,supplier_0227 +pr_004728,acct_0042,user_000328,2026-02-24T11:00:00Z,approved,92000,supplier_0228 +pr_004729,acct_0043,user_000329,2026-02-25T11:00:00Z,rejected,93000,supplier_0229 +pr_004730,acct_0044,user_000330,2026-02-26T11:00:00Z,cancelled,94000,supplier_0230 +pr_004731,acct_0045,user_000331,2026-02-27T11:00:00Z,draft,95000,supplier_0231 +pr_004732,acct_0046,user_000332,2026-02-28T11:00:00Z,submitted,96000,supplier_0232 +pr_004733,acct_0047,user_000333,2026-02-01T11:00:00Z,approved,97000,supplier_0233 +pr_004734,acct_0048,user_000334,2026-02-02T11:00:00Z,rejected,98000,supplier_0234 +pr_004735,acct_0049,user_000335,2026-02-03T11:00:00Z,cancelled,99000,supplier_0235 +pr_004736,acct_0050,user_000336,2026-02-04T11:00:00Z,draft,100000,supplier_0236 +pr_004737,acct_0051,user_000337,2026-02-05T11:00:00Z,submitted,101000,supplier_0237 +pr_004738,acct_0052,user_000338,2026-02-06T11:00:00Z,approved,102000,supplier_0238 +pr_004739,acct_0053,user_000339,2026-02-07T11:00:00Z,rejected,103000,supplier_0239 +pr_004740,acct_0054,user_000340,2026-02-08T11:00:00Z,cancelled,104000,supplier_0240 +pr_004741,acct_0055,user_000341,2026-02-09T11:00:00Z,draft,105000,supplier_0241 +pr_004742,acct_0056,user_000342,2026-02-10T11:00:00Z,submitted,106000,supplier_0242 +pr_004743,acct_0057,user_000343,2026-02-11T11:00:00Z,approved,107000,supplier_0243 +pr_004744,acct_0058,user_000344,2026-02-12T11:00:00Z,rejected,108000,supplier_0244 +pr_004745,acct_0059,user_000345,2026-02-13T11:00:00Z,cancelled,109000,supplier_0245 +pr_004746,acct_0060,user_000346,2026-02-14T11:00:00Z,draft,110000,supplier_0246 +pr_004747,acct_0061,user_000347,2026-02-15T11:00:00Z,submitted,10000,supplier_0247 +pr_004748,acct_0062,user_000348,2026-02-16T11:00:00Z,approved,11000,supplier_0248 +pr_004749,acct_0063,user_000349,2026-02-17T11:00:00Z,rejected,12000,supplier_0249 +pr_004750,acct_0064,user_000350,2026-02-18T11:00:00Z,cancelled,13000,supplier_0250 +pr_004751,acct_0065,user_000351,2026-02-19T11:00:00Z,draft,14000,supplier_0251 +pr_004752,acct_0066,user_000352,2026-02-20T11:00:00Z,submitted,15000,supplier_0252 +pr_004753,acct_0067,user_000353,2026-02-21T11:00:00Z,approved,16000,supplier_0253 +pr_004754,acct_0068,user_000354,2026-02-22T11:00:00Z,rejected,17000,supplier_0254 +pr_004755,acct_0069,user_000355,2026-02-23T11:00:00Z,cancelled,18000,supplier_0255 +pr_004756,acct_0070,user_000356,2026-02-24T11:00:00Z,draft,19000,supplier_0256 +pr_004757,acct_0071,user_000357,2026-02-25T11:00:00Z,submitted,20000,supplier_0257 +pr_004758,acct_0072,user_000358,2026-02-26T11:00:00Z,approved,21000,supplier_0258 +pr_004759,acct_0073,user_000359,2026-02-27T11:00:00Z,rejected,22000,supplier_0259 +pr_004760,acct_0074,user_000360,2026-02-28T11:00:00Z,cancelled,23000,supplier_0260 +pr_004761,acct_0075,user_000361,2026-02-01T11:00:00Z,draft,24000,supplier_0261 +pr_004762,acct_0076,user_000362,2026-02-02T11:00:00Z,submitted,25000,supplier_0262 +pr_004763,acct_0077,user_000363,2026-02-03T11:00:00Z,approved,26000,supplier_0263 +pr_004764,acct_0078,user_000364,2026-02-04T11:00:00Z,rejected,27000,supplier_0264 +pr_004765,acct_0079,user_000365,2026-02-05T11:00:00Z,cancelled,28000,supplier_0265 +pr_004766,acct_0080,user_000366,2026-02-06T11:00:00Z,draft,29000,supplier_0266 +pr_004767,acct_0081,user_000367,2026-02-07T11:00:00Z,submitted,30000,supplier_0267 +pr_004768,acct_0082,user_000368,2026-02-08T11:00:00Z,approved,31000,supplier_0268 +pr_004769,acct_0083,user_000369,2026-02-09T11:00:00Z,rejected,32000,supplier_0269 +pr_004770,acct_0084,user_000370,2026-02-10T11:00:00Z,cancelled,33000,supplier_0270 +pr_004771,acct_0085,user_000371,2026-02-11T11:00:00Z,draft,34000,supplier_0271 +pr_004772,acct_0086,user_000372,2026-02-12T11:00:00Z,submitted,35000,supplier_0272 +pr_004773,acct_0087,user_000373,2026-02-13T11:00:00Z,approved,36000,supplier_0273 +pr_004774,acct_0088,user_000374,2026-02-14T11:00:00Z,rejected,37000,supplier_0274 +pr_004775,acct_0089,user_000375,2026-02-15T11:00:00Z,cancelled,38000,supplier_0275 +pr_004776,acct_0090,user_000376,2026-02-16T11:00:00Z,draft,39000,supplier_0276 +pr_004777,acct_0091,user_000377,2026-02-17T11:00:00Z,submitted,40000,supplier_0277 +pr_004778,acct_0092,user_000378,2026-02-18T11:00:00Z,approved,41000,supplier_0278 +pr_004779,acct_0093,user_000379,2026-02-19T11:00:00Z,rejected,42000,supplier_0279 +pr_004780,acct_0094,user_000380,2026-02-20T11:00:00Z,cancelled,43000,supplier_0280 +pr_004781,acct_0095,user_000381,2026-02-21T11:00:00Z,draft,44000,supplier_0281 +pr_004782,acct_0096,user_000382,2026-02-22T11:00:00Z,submitted,45000,supplier_0282 +pr_004783,acct_0097,user_000383,2026-02-23T11:00:00Z,approved,46000,supplier_0283 +pr_004784,acct_0098,user_000384,2026-02-24T11:00:00Z,rejected,47000,supplier_0284 +pr_004785,acct_0099,user_000385,2026-02-25T11:00:00Z,cancelled,48000,supplier_0285 +pr_004786,acct_0100,user_000386,2026-02-26T11:00:00Z,draft,49000,supplier_0286 +pr_004787,acct_0101,user_000387,2026-02-27T11:00:00Z,submitted,50000,supplier_0287 +pr_004788,acct_0102,user_000388,2026-02-28T11:00:00Z,approved,51000,supplier_0288 +pr_004789,acct_0103,user_000389,2026-02-01T11:00:00Z,rejected,52000,supplier_0289 +pr_004790,acct_0104,user_000390,2026-02-02T11:00:00Z,cancelled,53000,supplier_0290 +pr_004791,acct_0105,user_000391,2026-02-03T11:00:00Z,draft,54000,supplier_0291 +pr_004792,acct_0106,user_000392,2026-02-04T11:00:00Z,submitted,55000,supplier_0292 +pr_004793,acct_0107,user_000393,2026-02-05T11:00:00Z,approved,56000,supplier_0293 +pr_004794,acct_0108,user_000394,2026-02-06T11:00:00Z,rejected,57000,supplier_0294 +pr_004795,acct_0109,user_000395,2026-02-07T11:00:00Z,cancelled,58000,supplier_0295 +pr_004796,acct_0110,user_000396,2026-02-08T11:00:00Z,draft,59000,supplier_0296 +pr_004797,acct_0111,user_000397,2026-02-09T11:00:00Z,submitted,60000,supplier_0297 +pr_004798,acct_0112,user_000398,2026-02-10T11:00:00Z,approved,61000,supplier_0298 +pr_004799,acct_0113,user_000399,2026-02-11T11:00:00Z,rejected,62000,supplier_0299 +pr_004800,acct_0114,user_000400,2026-02-12T11:00:00Z,cancelled,63000,supplier_0300 +pr_004801,acct_0115,user_000401,2026-02-13T11:00:00Z,draft,64000,supplier_0301 +pr_004802,acct_0116,user_000402,2026-02-14T11:00:00Z,submitted,65000,supplier_0302 +pr_004803,acct_0117,user_000403,2026-02-15T11:00:00Z,approved,66000,supplier_0303 +pr_004804,acct_0118,user_000404,2026-02-16T11:00:00Z,rejected,67000,supplier_0304 +pr_004805,acct_0119,user_000405,2026-02-17T11:00:00Z,cancelled,68000,supplier_0305 +pr_004806,acct_0120,user_000406,2026-02-18T11:00:00Z,draft,69000,supplier_0306 +pr_004807,acct_0121,user_000407,2026-02-19T11:00:00Z,submitted,70000,supplier_0307 +pr_004808,acct_0122,user_000408,2026-02-20T11:00:00Z,approved,71000,supplier_0308 +pr_004809,acct_0123,user_000409,2026-02-21T11:00:00Z,rejected,72000,supplier_0309 +pr_004810,acct_0124,user_000410,2026-02-22T11:00:00Z,cancelled,73000,supplier_0310 +pr_004811,acct_0125,user_000411,2026-02-23T11:00:00Z,draft,74000,supplier_0311 +pr_004812,acct_0126,user_000412,2026-02-24T11:00:00Z,submitted,75000,supplier_0312 +pr_004813,acct_0127,user_000413,2026-02-25T11:00:00Z,approved,76000,supplier_0313 +pr_004814,acct_0128,user_000414,2026-02-26T11:00:00Z,rejected,77000,supplier_0314 +pr_004815,acct_0129,user_000415,2026-02-27T11:00:00Z,cancelled,78000,supplier_0315 +pr_004816,acct_0130,user_000416,2026-02-28T11:00:00Z,draft,79000,supplier_0316 +pr_004817,acct_0131,user_000417,2026-02-01T11:00:00Z,submitted,80000,supplier_0317 +pr_004818,acct_0132,user_000418,2026-02-02T11:00:00Z,approved,81000,supplier_0318 +pr_004819,acct_0133,user_000419,2026-02-03T11:00:00Z,rejected,82000,supplier_0319 +pr_004820,acct_0134,user_000420,2026-02-04T11:00:00Z,cancelled,83000,supplier_0320 +pr_004821,acct_0135,user_000421,2026-02-05T11:00:00Z,draft,84000,supplier_0321 +pr_004822,acct_0136,user_000422,2026-02-06T11:00:00Z,submitted,85000,supplier_0322 +pr_004823,acct_0137,user_000423,2026-02-07T11:00:00Z,approved,86000,supplier_0323 +pr_004824,acct_0138,user_000424,2026-02-08T11:00:00Z,rejected,87000,supplier_0324 +pr_004825,acct_0139,user_000425,2026-02-09T11:00:00Z,cancelled,88000,supplier_0325 +pr_004826,acct_0140,user_000426,2026-02-10T11:00:00Z,draft,89000,supplier_0326 +pr_004827,acct_0141,user_000427,2026-02-11T11:00:00Z,submitted,90000,supplier_0327 +pr_004828,acct_0142,user_000428,2026-02-12T11:00:00Z,approved,91000,supplier_0328 +pr_004829,acct_0143,user_000429,2026-02-13T11:00:00Z,rejected,92000,supplier_0329 +pr_004830,acct_0144,user_000430,2026-02-14T11:00:00Z,cancelled,93000,supplier_0330 +pr_004831,acct_0145,user_000431,2026-02-15T11:00:00Z,draft,94000,supplier_0331 +pr_004832,acct_0146,user_000432,2026-02-16T11:00:00Z,submitted,95000,supplier_0332 +pr_004833,acct_0147,user_000433,2026-02-17T11:00:00Z,approved,96000,supplier_0333 +pr_004834,acct_0148,user_000434,2026-02-18T11:00:00Z,rejected,97000,supplier_0334 +pr_004835,acct_0149,user_000435,2026-02-19T11:00:00Z,cancelled,98000,supplier_0335 +pr_004836,acct_0150,user_000436,2026-02-20T11:00:00Z,draft,99000,supplier_0336 +pr_004837,acct_0151,user_000437,2026-02-21T11:00:00Z,submitted,100000,supplier_0337 +pr_004838,acct_0152,user_000438,2026-02-22T11:00:00Z,approved,101000,supplier_0338 +pr_004839,acct_0153,user_000439,2026-02-23T11:00:00Z,rejected,102000,supplier_0339 +pr_004840,acct_0154,user_000440,2026-02-24T11:00:00Z,cancelled,103000,supplier_0340 +pr_004841,acct_0155,user_000441,2026-02-25T11:00:00Z,draft,104000,supplier_0341 +pr_004842,acct_0156,user_000442,2026-02-26T11:00:00Z,submitted,105000,supplier_0342 +pr_004843,acct_0157,user_000443,2026-02-27T11:00:00Z,approved,106000,supplier_0343 +pr_004844,acct_0158,user_000444,2026-02-28T11:00:00Z,rejected,107000,supplier_0344 +pr_004845,acct_0159,user_000445,2026-02-01T11:00:00Z,cancelled,108000,supplier_0345 +pr_004846,acct_0160,user_000446,2026-02-02T11:00:00Z,draft,109000,supplier_0346 +pr_004847,acct_0161,user_000447,2026-02-03T11:00:00Z,submitted,110000,supplier_0347 +pr_004848,acct_0162,user_000448,2026-02-04T11:00:00Z,approved,10000,supplier_0348 +pr_004849,acct_0163,user_000449,2026-02-05T11:00:00Z,rejected,11000,supplier_0349 +pr_004850,acct_0164,user_000450,2026-02-06T11:00:00Z,cancelled,12000,supplier_0350 +pr_004851,acct_0165,user_000451,2026-02-07T11:00:00Z,draft,13000,supplier_0351 +pr_004852,acct_0166,user_000452,2026-02-08T11:00:00Z,submitted,14000,supplier_0352 +pr_004853,acct_0167,user_000453,2026-02-09T11:00:00Z,approved,15000,supplier_0353 +pr_004854,acct_0168,user_000454,2026-02-10T11:00:00Z,rejected,16000,supplier_0354 +pr_004855,acct_0169,user_000455,2026-02-11T11:00:00Z,cancelled,17000,supplier_0355 +pr_004856,acct_0170,user_000456,2026-02-12T11:00:00Z,draft,18000,supplier_0356 +pr_004857,acct_0171,user_000457,2026-02-13T11:00:00Z,submitted,19000,supplier_0357 +pr_004858,acct_0172,user_000458,2026-02-14T11:00:00Z,approved,20000,supplier_0358 +pr_004859,acct_0173,user_000459,2026-02-15T11:00:00Z,rejected,21000,supplier_0359 +pr_004860,acct_0174,user_000460,2026-02-16T11:00:00Z,cancelled,22000,supplier_0360 +pr_004861,acct_0175,user_000461,2026-02-17T11:00:00Z,draft,23000,supplier_0361 +pr_004862,acct_0176,user_000462,2026-02-18T11:00:00Z,submitted,24000,supplier_0362 +pr_004863,acct_0177,user_000463,2026-02-19T11:00:00Z,approved,25000,supplier_0363 +pr_004864,acct_0178,user_000464,2026-02-20T11:00:00Z,rejected,26000,supplier_0364 +pr_004865,acct_0179,user_000465,2026-02-21T11:00:00Z,cancelled,27000,supplier_0365 +pr_004866,acct_0180,user_000466,2026-02-22T11:00:00Z,draft,28000,supplier_0366 +pr_004867,acct_0181,user_000467,2026-02-23T11:00:00Z,submitted,29000,supplier_0367 +pr_004868,acct_0182,user_000468,2026-02-24T11:00:00Z,approved,30000,supplier_0368 +pr_004869,acct_0183,user_000469,2026-02-25T11:00:00Z,rejected,31000,supplier_0369 +pr_004870,acct_0184,user_000470,2026-02-26T11:00:00Z,cancelled,32000,supplier_0370 +pr_004871,acct_0185,user_000471,2026-02-27T11:00:00Z,draft,33000,supplier_0371 +pr_004872,acct_0186,user_000472,2026-02-28T11:00:00Z,submitted,34000,supplier_0372 +pr_004873,acct_0187,user_000473,2026-02-01T11:00:00Z,approved,35000,supplier_0373 +pr_004874,acct_0188,user_000474,2026-02-02T11:00:00Z,rejected,36000,supplier_0374 +pr_004875,acct_0189,user_000475,2026-02-03T11:00:00Z,cancelled,37000,supplier_0375 +pr_004876,acct_0190,user_000476,2026-02-04T11:00:00Z,draft,38000,supplier_0376 +pr_004877,acct_0001,user_000477,2026-02-05T11:00:00Z,submitted,39000,supplier_0377 +pr_004878,acct_0002,user_000478,2026-02-06T11:00:00Z,approved,40000,supplier_0378 +pr_004879,acct_0003,user_000479,2026-02-07T11:00:00Z,rejected,41000,supplier_0379 +pr_004880,acct_0004,user_000480,2026-02-08T11:00:00Z,cancelled,42000,supplier_0380 +pr_004881,acct_0005,user_000481,2026-02-09T11:00:00Z,draft,43000,supplier_0381 +pr_004882,acct_0006,user_000482,2026-02-10T11:00:00Z,submitted,44000,supplier_0382 +pr_004883,acct_0007,user_000483,2026-02-11T11:00:00Z,approved,45000,supplier_0383 +pr_004884,acct_0008,user_000484,2026-02-12T11:00:00Z,rejected,46000,supplier_0384 +pr_004885,acct_0009,user_000485,2026-02-13T11:00:00Z,cancelled,47000,supplier_0385 +pr_004886,acct_0010,user_000486,2026-02-14T11:00:00Z,draft,48000,supplier_0386 +pr_004887,acct_0011,user_000487,2026-02-15T11:00:00Z,submitted,49000,supplier_0387 +pr_004888,acct_0012,user_000488,2026-02-16T11:00:00Z,approved,50000,supplier_0388 +pr_004889,acct_0013,user_000489,2026-02-17T11:00:00Z,rejected,51000,supplier_0389 +pr_004890,acct_0014,user_000490,2026-02-18T11:00:00Z,cancelled,52000,supplier_0390 +pr_004891,acct_0015,user_000491,2026-02-19T11:00:00Z,draft,53000,supplier_0391 +pr_004892,acct_0016,user_000492,2026-02-20T11:00:00Z,submitted,54000,supplier_0392 +pr_004893,acct_0017,user_000493,2026-02-21T11:00:00Z,approved,55000,supplier_0393 +pr_004894,acct_0018,user_000494,2026-02-22T11:00:00Z,rejected,56000,supplier_0394 +pr_004895,acct_0019,user_000495,2026-02-23T11:00:00Z,cancelled,57000,supplier_0395 +pr_004896,acct_0020,user_000496,2026-02-24T11:00:00Z,draft,58000,supplier_0396 +pr_004897,acct_0021,user_000497,2026-02-25T11:00:00Z,submitted,59000,supplier_0397 +pr_004898,acct_0022,user_000498,2026-02-26T11:00:00Z,approved,60000,supplier_0398 +pr_004899,acct_0023,user_000499,2026-02-27T11:00:00Z,rejected,61000,supplier_0399 +pr_004900,acct_0024,user_000500,2026-02-28T11:00:00Z,cancelled,62000,supplier_0400 +pr_004901,acct_0025,user_000501,2026-02-01T11:00:00Z,draft,63000,supplier_0401 +pr_004902,acct_0026,user_000502,2026-02-02T11:00:00Z,submitted,64000,supplier_0402 +pr_004903,acct_0027,user_000503,2026-02-03T11:00:00Z,approved,65000,supplier_0403 +pr_004904,acct_0028,user_000504,2026-02-04T11:00:00Z,rejected,66000,supplier_0404 +pr_004905,acct_0029,user_000505,2026-02-05T11:00:00Z,cancelled,67000,supplier_0405 +pr_004906,acct_0030,user_000506,2026-02-06T11:00:00Z,draft,68000,supplier_0406 +pr_004907,acct_0031,user_000507,2026-02-07T11:00:00Z,submitted,69000,supplier_0407 +pr_004908,acct_0032,user_000508,2026-02-08T11:00:00Z,approved,70000,supplier_0408 +pr_004909,acct_0033,user_000509,2026-02-09T11:00:00Z,rejected,71000,supplier_0409 +pr_004910,acct_0034,user_000510,2026-02-10T11:00:00Z,cancelled,72000,supplier_0410 +pr_004911,acct_0035,user_000511,2026-02-11T11:00:00Z,draft,73000,supplier_0411 +pr_004912,acct_0036,user_000512,2026-02-12T11:00:00Z,submitted,74000,supplier_0412 +pr_004913,acct_0037,user_000513,2026-02-13T11:00:00Z,approved,75000,supplier_0413 +pr_004914,acct_0038,user_000514,2026-02-14T11:00:00Z,rejected,76000,supplier_0414 +pr_004915,acct_0039,user_000515,2026-02-15T11:00:00Z,cancelled,77000,supplier_0415 +pr_004916,acct_0040,user_000516,2026-02-16T11:00:00Z,draft,78000,supplier_0416 +pr_004917,acct_0041,user_000517,2026-02-17T11:00:00Z,submitted,79000,supplier_0417 +pr_004918,acct_0042,user_000518,2026-02-18T11:00:00Z,approved,80000,supplier_0418 +pr_004919,acct_0043,user_000519,2026-02-19T11:00:00Z,rejected,81000,supplier_0419 +pr_004920,acct_0044,user_000520,2026-02-20T11:00:00Z,cancelled,82000,supplier_0420 +pr_004921,acct_0045,user_000521,2026-02-21T11:00:00Z,draft,83000,supplier_0421 +pr_004922,acct_0046,user_000522,2026-02-22T11:00:00Z,submitted,84000,supplier_0422 +pr_004923,acct_0047,user_000523,2026-02-23T11:00:00Z,approved,85000,supplier_0423 +pr_004924,acct_0048,user_000524,2026-02-24T11:00:00Z,rejected,86000,supplier_0424 +pr_004925,acct_0049,user_000525,2026-02-25T11:00:00Z,cancelled,87000,supplier_0425 +pr_004926,acct_0050,user_000526,2026-02-26T11:00:00Z,draft,88000,supplier_0426 +pr_004927,acct_0051,user_000527,2026-02-27T11:00:00Z,submitted,89000,supplier_0427 +pr_004928,acct_0052,user_000528,2026-02-28T11:00:00Z,approved,90000,supplier_0428 +pr_004929,acct_0053,user_000529,2026-02-01T11:00:00Z,rejected,91000,supplier_0429 +pr_004930,acct_0054,user_000530,2026-02-02T11:00:00Z,cancelled,92000,supplier_0430 +pr_004931,acct_0055,user_000531,2026-02-03T11:00:00Z,draft,93000,supplier_0431 +pr_004932,acct_0056,user_000532,2026-02-04T11:00:00Z,submitted,94000,supplier_0432 +pr_004933,acct_0057,user_000533,2026-02-05T11:00:00Z,approved,95000,supplier_0433 +pr_004934,acct_0058,user_000534,2026-02-06T11:00:00Z,rejected,96000,supplier_0434 +pr_004935,acct_0059,user_000535,2026-02-07T11:00:00Z,cancelled,97000,supplier_0435 +pr_004936,acct_0060,user_000536,2026-02-08T11:00:00Z,draft,98000,supplier_0436 +pr_004937,acct_0061,user_000537,2026-02-09T11:00:00Z,submitted,99000,supplier_0437 +pr_004938,acct_0062,user_000538,2026-02-10T11:00:00Z,approved,100000,supplier_0438 +pr_004939,acct_0063,user_000539,2026-02-11T11:00:00Z,rejected,101000,supplier_0439 +pr_004940,acct_0064,user_000540,2026-02-12T11:00:00Z,cancelled,102000,supplier_0440 +pr_004941,acct_0065,user_000541,2026-02-13T11:00:00Z,draft,103000,supplier_0441 +pr_004942,acct_0066,user_000542,2026-02-14T11:00:00Z,submitted,104000,supplier_0442 +pr_004943,acct_0067,user_000543,2026-02-15T11:00:00Z,approved,105000,supplier_0443 +pr_004944,acct_0068,user_000544,2026-02-16T11:00:00Z,rejected,106000,supplier_0444 +pr_004945,acct_0069,user_000545,2026-02-17T11:00:00Z,cancelled,107000,supplier_0445 +pr_004946,acct_0070,user_000546,2026-02-18T11:00:00Z,draft,108000,supplier_0446 +pr_004947,acct_0071,user_000547,2026-02-19T11:00:00Z,submitted,109000,supplier_0447 +pr_004948,acct_0072,user_000548,2026-02-20T11:00:00Z,approved,110000,supplier_0448 +pr_004949,acct_0073,user_000549,2026-02-21T11:00:00Z,rejected,10000,supplier_0449 +pr_004950,acct_0074,user_000550,2026-02-22T11:00:00Z,cancelled,11000,supplier_0450 +pr_004951,acct_0075,user_000551,2026-02-23T11:00:00Z,draft,12000,supplier_0451 +pr_004952,acct_0076,user_000552,2026-02-24T11:00:00Z,submitted,13000,supplier_0452 +pr_004953,acct_0077,user_000553,2026-02-25T11:00:00Z,approved,14000,supplier_0453 +pr_004954,acct_0078,user_000554,2026-02-26T11:00:00Z,rejected,15000,supplier_0454 +pr_004955,acct_0079,user_000555,2026-02-27T11:00:00Z,cancelled,16000,supplier_0455 +pr_004956,acct_0080,user_000556,2026-02-28T11:00:00Z,draft,17000,supplier_0456 +pr_004957,acct_0081,user_000557,2026-02-01T11:00:00Z,submitted,18000,supplier_0457 +pr_004958,acct_0082,user_000558,2026-02-02T11:00:00Z,approved,19000,supplier_0458 +pr_004959,acct_0083,user_000559,2026-02-03T11:00:00Z,rejected,20000,supplier_0459 +pr_004960,acct_0084,user_000560,2026-02-04T11:00:00Z,cancelled,21000,supplier_0460 +pr_004961,acct_0085,user_000561,2026-02-05T11:00:00Z,draft,22000,supplier_0461 +pr_004962,acct_0086,user_000562,2026-02-06T11:00:00Z,submitted,23000,supplier_0462 +pr_004963,acct_0087,user_000563,2026-02-07T11:00:00Z,approved,24000,supplier_0463 +pr_004964,acct_0088,user_000564,2026-02-08T11:00:00Z,rejected,25000,supplier_0464 +pr_004965,acct_0089,user_000565,2026-02-09T11:00:00Z,cancelled,26000,supplier_0465 +pr_004966,acct_0090,user_000566,2026-02-10T11:00:00Z,draft,27000,supplier_0466 +pr_004967,acct_0091,user_000567,2026-02-11T11:00:00Z,submitted,28000,supplier_0467 +pr_004968,acct_0092,user_000568,2026-02-12T11:00:00Z,approved,29000,supplier_0468 +pr_004969,acct_0093,user_000569,2026-02-13T11:00:00Z,rejected,30000,supplier_0469 +pr_004970,acct_0094,user_000570,2026-02-14T11:00:00Z,cancelled,31000,supplier_0470 +pr_004971,acct_0095,user_000571,2026-02-15T11:00:00Z,draft,32000,supplier_0471 +pr_004972,acct_0096,user_000572,2026-02-16T11:00:00Z,submitted,33000,supplier_0472 +pr_004973,acct_0097,user_000573,2026-02-17T11:00:00Z,approved,34000,supplier_0473 +pr_004974,acct_0098,user_000574,2026-02-18T11:00:00Z,rejected,35000,supplier_0474 +pr_004975,acct_0099,user_000575,2026-02-19T11:00:00Z,cancelled,36000,supplier_0475 +pr_004976,acct_0100,user_000576,2026-02-20T11:00:00Z,draft,37000,supplier_0476 +pr_004977,acct_0101,user_000577,2026-02-21T11:00:00Z,submitted,38000,supplier_0477 +pr_004978,acct_0102,user_000578,2026-02-22T11:00:00Z,approved,39000,supplier_0478 +pr_004979,acct_0103,user_000579,2026-02-23T11:00:00Z,rejected,40000,supplier_0479 +pr_004980,acct_0104,user_000580,2026-02-24T11:00:00Z,cancelled,41000,supplier_0480 +pr_004981,acct_0105,user_000581,2026-02-25T11:00:00Z,draft,42000,supplier_0481 +pr_004982,acct_0106,user_000582,2026-02-26T11:00:00Z,submitted,43000,supplier_0482 +pr_004983,acct_0107,user_000583,2026-02-27T11:00:00Z,approved,44000,supplier_0483 +pr_004984,acct_0108,user_000584,2026-02-28T11:00:00Z,rejected,45000,supplier_0484 +pr_004985,acct_0109,user_000585,2026-02-01T11:00:00Z,cancelled,46000,supplier_0485 +pr_004986,acct_0110,user_000586,2026-02-02T11:00:00Z,draft,47000,supplier_0486 +pr_004987,acct_0111,user_000587,2026-02-03T11:00:00Z,submitted,48000,supplier_0487 +pr_004988,acct_0112,user_000588,2026-02-04T11:00:00Z,approved,49000,supplier_0488 +pr_004989,acct_0113,user_000589,2026-02-05T11:00:00Z,rejected,50000,supplier_0489 +pr_004990,acct_0114,user_000590,2026-02-06T11:00:00Z,cancelled,51000,supplier_0490 +pr_004991,acct_0115,user_000591,2026-02-07T11:00:00Z,draft,52000,supplier_0491 +pr_004992,acct_0116,user_000592,2026-02-08T11:00:00Z,submitted,53000,supplier_0492 +pr_004993,acct_0117,user_000593,2026-02-09T11:00:00Z,approved,54000,supplier_0493 +pr_004994,acct_0118,user_000594,2026-02-10T11:00:00Z,rejected,55000,supplier_0494 +pr_004995,acct_0119,user_000595,2026-02-11T11:00:00Z,cancelled,56000,supplier_0495 +pr_004996,acct_0120,user_000596,2026-02-12T11:00:00Z,draft,57000,supplier_0496 +pr_004997,acct_0121,user_000597,2026-02-13T11:00:00Z,submitted,58000,supplier_0497 +pr_004998,acct_0122,user_000598,2026-02-14T11:00:00Z,approved,59000,supplier_0498 +pr_004999,acct_0123,user_000599,2026-02-15T11:00:00Z,rejected,60000,supplier_0499 +pr_005000,acct_0124,user_000600,2026-02-16T11:00:00Z,cancelled,61000,supplier_0500 +pr_005001,acct_0125,user_000601,2026-02-17T11:00:00Z,draft,62000,supplier_0501 +pr_005002,acct_0126,user_000602,2026-02-18T11:00:00Z,submitted,63000,supplier_0502 +pr_005003,acct_0127,user_000603,2026-02-19T11:00:00Z,approved,64000,supplier_0503 +pr_005004,acct_0128,user_000604,2026-02-20T11:00:00Z,rejected,65000,supplier_0504 +pr_005005,acct_0129,user_000605,2026-02-21T11:00:00Z,cancelled,66000,supplier_0505 +pr_005006,acct_0130,user_000606,2026-02-22T11:00:00Z,draft,67000,supplier_0506 +pr_005007,acct_0131,user_000607,2026-02-23T11:00:00Z,submitted,68000,supplier_0507 +pr_005008,acct_0132,user_000608,2026-02-24T11:00:00Z,approved,69000,supplier_0508 +pr_005009,acct_0133,user_000609,2026-02-25T11:00:00Z,rejected,70000,supplier_0509 +pr_005010,acct_0134,user_000610,2026-02-26T11:00:00Z,cancelled,71000,supplier_0510 +pr_005011,acct_0135,user_000611,2026-02-27T11:00:00Z,draft,72000,supplier_0511 +pr_005012,acct_0136,user_000612,2026-02-28T11:00:00Z,submitted,73000,supplier_0512 +pr_005013,acct_0137,user_000613,2026-02-01T11:00:00Z,approved,74000,supplier_0513 +pr_005014,acct_0138,user_000614,2026-02-02T11:00:00Z,rejected,75000,supplier_0514 +pr_005015,acct_0139,user_000615,2026-02-03T11:00:00Z,cancelled,76000,supplier_0515 +pr_005016,acct_0140,user_000616,2026-02-04T11:00:00Z,draft,77000,supplier_0516 +pr_005017,acct_0141,user_000617,2026-02-05T11:00:00Z,submitted,78000,supplier_0517 +pr_005018,acct_0142,user_000618,2026-02-06T11:00:00Z,approved,79000,supplier_0518 +pr_005019,acct_0143,user_000619,2026-02-07T11:00:00Z,rejected,80000,supplier_0519 +pr_005020,acct_0144,user_000620,2026-02-08T11:00:00Z,cancelled,81000,supplier_0520 +pr_005021,acct_0145,user_000621,2026-02-09T11:00:00Z,draft,82000,supplier_0521 +pr_005022,acct_0146,user_000622,2026-02-10T11:00:00Z,submitted,83000,supplier_0522 +pr_005023,acct_0147,user_000623,2026-02-11T11:00:00Z,approved,84000,supplier_0523 +pr_005024,acct_0148,user_000624,2026-02-12T11:00:00Z,rejected,85000,supplier_0524 +pr_005025,acct_0149,user_000625,2026-02-13T11:00:00Z,cancelled,86000,supplier_0525 +pr_005026,acct_0150,user_000626,2026-02-14T11:00:00Z,draft,87000,supplier_0526 +pr_005027,acct_0151,user_000627,2026-02-15T11:00:00Z,submitted,88000,supplier_0527 +pr_005028,acct_0152,user_000628,2026-02-16T11:00:00Z,approved,89000,supplier_0528 +pr_005029,acct_0153,user_000629,2026-02-17T11:00:00Z,rejected,90000,supplier_0529 +pr_005030,acct_0154,user_000630,2026-02-18T11:00:00Z,cancelled,91000,supplier_0530 +pr_005031,acct_0155,user_000631,2026-02-19T11:00:00Z,draft,92000,supplier_0531 +pr_005032,acct_0156,user_000632,2026-02-20T11:00:00Z,submitted,93000,supplier_0532 +pr_005033,acct_0157,user_000633,2026-02-21T11:00:00Z,approved,94000,supplier_0533 +pr_005034,acct_0158,user_000634,2026-02-22T11:00:00Z,rejected,95000,supplier_0534 +pr_005035,acct_0159,user_000635,2026-02-23T11:00:00Z,cancelled,96000,supplier_0535 +pr_005036,acct_0160,user_000636,2026-02-24T11:00:00Z,draft,97000,supplier_0536 +pr_005037,acct_0161,user_000637,2026-02-25T11:00:00Z,submitted,98000,supplier_0537 +pr_005038,acct_0162,user_000638,2026-02-26T11:00:00Z,approved,99000,supplier_0538 +pr_005039,acct_0163,user_000639,2026-02-27T11:00:00Z,rejected,100000,supplier_0539 +pr_005040,acct_0164,user_000640,2026-02-28T11:00:00Z,cancelled,101000,supplier_0540 +pr_005041,acct_0165,user_000641,2026-02-01T11:00:00Z,draft,102000,supplier_0541 +pr_005042,acct_0166,user_000642,2026-02-02T11:00:00Z,submitted,103000,supplier_0542 +pr_005043,acct_0167,user_000643,2026-02-03T11:00:00Z,approved,104000,supplier_0543 +pr_005044,acct_0168,user_000644,2026-02-04T11:00:00Z,rejected,105000,supplier_0544 +pr_005045,acct_0169,user_000645,2026-02-05T11:00:00Z,cancelled,106000,supplier_0545 +pr_005046,acct_0170,user_000646,2026-02-06T11:00:00Z,draft,107000,supplier_0546 +pr_005047,acct_0171,user_000647,2026-02-07T11:00:00Z,submitted,108000,supplier_0547 +pr_005048,acct_0172,user_000648,2026-02-08T11:00:00Z,approved,109000,supplier_0548 +pr_005049,acct_0173,user_000649,2026-02-09T11:00:00Z,rejected,110000,supplier_0549 +pr_005050,acct_0174,user_000650,2026-02-10T11:00:00Z,cancelled,10000,supplier_0550 +pr_005051,acct_0175,user_000651,2026-02-11T11:00:00Z,draft,11000,supplier_0551 +pr_005052,acct_0176,user_000652,2026-02-12T11:00:00Z,submitted,12000,supplier_0552 +pr_005053,acct_0177,user_000653,2026-02-13T11:00:00Z,approved,13000,supplier_0553 +pr_005054,acct_0178,user_000654,2026-02-14T11:00:00Z,rejected,14000,supplier_0554 +pr_005055,acct_0179,user_000655,2026-02-15T11:00:00Z,cancelled,15000,supplier_0555 +pr_005056,acct_0180,user_000656,2026-02-16T11:00:00Z,draft,16000,supplier_0556 +pr_005057,acct_0181,user_000657,2026-02-17T11:00:00Z,submitted,17000,supplier_0557 +pr_005058,acct_0182,user_000658,2026-02-18T11:00:00Z,approved,18000,supplier_0558 +pr_005059,acct_0183,user_000659,2026-02-19T11:00:00Z,rejected,19000,supplier_0559 +pr_005060,acct_0184,user_000660,2026-02-20T11:00:00Z,cancelled,20000,supplier_0560 +pr_005061,acct_0185,user_000661,2026-02-21T11:00:00Z,draft,21000,supplier_0561 +pr_005062,acct_0186,user_000662,2026-02-22T11:00:00Z,submitted,22000,supplier_0562 +pr_005063,acct_0187,user_000663,2026-02-23T11:00:00Z,approved,23000,supplier_0563 +pr_005064,acct_0188,user_000664,2026-02-24T11:00:00Z,rejected,24000,supplier_0564 +pr_005065,acct_0189,user_000665,2026-02-25T11:00:00Z,cancelled,25000,supplier_0565 +pr_005066,acct_0190,user_000666,2026-02-26T11:00:00Z,draft,26000,supplier_0566 +pr_005067,acct_0001,user_000667,2026-02-27T11:00:00Z,submitted,27000,supplier_0567 +pr_005068,acct_0002,user_000668,2026-02-28T11:00:00Z,approved,28000,supplier_0568 +pr_005069,acct_0003,user_000669,2026-02-01T11:00:00Z,rejected,29000,supplier_0569 +pr_005070,acct_0004,user_000670,2026-02-02T11:00:00Z,cancelled,30000,supplier_0570 +pr_005071,acct_0005,user_000671,2026-02-03T11:00:00Z,draft,31000,supplier_0571 +pr_005072,acct_0006,user_000672,2026-02-04T11:00:00Z,submitted,32000,supplier_0572 +pr_005073,acct_0007,user_000673,2026-02-05T11:00:00Z,approved,33000,supplier_0573 +pr_005074,acct_0008,user_000674,2026-02-06T11:00:00Z,rejected,34000,supplier_0574 +pr_005075,acct_0009,user_000675,2026-02-07T11:00:00Z,cancelled,35000,supplier_0575 +pr_005076,acct_0010,user_000676,2026-02-08T11:00:00Z,draft,36000,supplier_0576 +pr_005077,acct_0011,user_000677,2026-02-09T11:00:00Z,submitted,37000,supplier_0577 +pr_005078,acct_0012,user_000678,2026-02-10T11:00:00Z,approved,38000,supplier_0578 +pr_005079,acct_0013,user_000679,2026-02-11T11:00:00Z,rejected,39000,supplier_0579 +pr_005080,acct_0014,user_000680,2026-02-12T11:00:00Z,cancelled,40000,supplier_0580 +pr_005081,acct_0015,user_000681,2026-02-13T11:00:00Z,draft,41000,supplier_0581 +pr_005082,acct_0016,user_000682,2026-02-14T11:00:00Z,submitted,42000,supplier_0582 +pr_005083,acct_0017,user_000683,2026-02-15T11:00:00Z,approved,43000,supplier_0583 +pr_005084,acct_0018,user_000684,2026-02-16T11:00:00Z,rejected,44000,supplier_0584 +pr_005085,acct_0019,user_000685,2026-02-17T11:00:00Z,cancelled,45000,supplier_0585 +pr_005086,acct_0020,user_000686,2026-02-18T11:00:00Z,draft,46000,supplier_0586 +pr_005087,acct_0021,user_000687,2026-02-19T11:00:00Z,submitted,47000,supplier_0587 +pr_005088,acct_0022,user_000688,2026-02-20T11:00:00Z,approved,48000,supplier_0588 +pr_005089,acct_0023,user_000689,2026-02-21T11:00:00Z,rejected,49000,supplier_0589 +pr_005090,acct_0024,user_000690,2026-02-22T11:00:00Z,cancelled,50000,supplier_0590 +pr_005091,acct_0025,user_000691,2026-02-23T11:00:00Z,draft,51000,supplier_0591 +pr_005092,acct_0026,user_000692,2026-02-24T11:00:00Z,submitted,52000,supplier_0592 +pr_005093,acct_0027,user_000693,2026-02-25T11:00:00Z,approved,53000,supplier_0593 +pr_005094,acct_0028,user_000694,2026-02-26T11:00:00Z,rejected,54000,supplier_0594 +pr_005095,acct_0029,user_000695,2026-02-27T11:00:00Z,cancelled,55000,supplier_0595 +pr_005096,acct_0030,user_000696,2026-02-28T11:00:00Z,draft,56000,supplier_0596 +pr_005097,acct_0031,user_000697,2026-02-01T11:00:00Z,submitted,57000,supplier_0597 +pr_005098,acct_0032,user_000698,2026-02-02T11:00:00Z,approved,58000,supplier_0598 +pr_005099,acct_0033,user_000699,2026-02-03T11:00:00Z,rejected,59000,supplier_0599 +pr_005100,acct_0034,user_000700,2026-02-04T11:00:00Z,cancelled,60000,supplier_0600 +pr_005101,acct_0035,user_000701,2026-02-05T11:00:00Z,draft,61000,supplier_0601 +pr_005102,acct_0036,user_000702,2026-02-06T11:00:00Z,submitted,62000,supplier_0602 +pr_005103,acct_0037,user_000703,2026-02-07T11:00:00Z,approved,63000,supplier_0603 +pr_005104,acct_0038,user_000704,2026-02-08T11:00:00Z,rejected,64000,supplier_0604 +pr_005105,acct_0039,user_000705,2026-02-09T11:00:00Z,cancelled,65000,supplier_0605 +pr_005106,acct_0040,user_000706,2026-02-10T11:00:00Z,draft,66000,supplier_0606 +pr_005107,acct_0041,user_000707,2026-02-11T11:00:00Z,submitted,67000,supplier_0607 +pr_005108,acct_0042,user_000708,2026-02-12T11:00:00Z,approved,68000,supplier_0608 +pr_005109,acct_0043,user_000709,2026-02-13T11:00:00Z,rejected,69000,supplier_0609 +pr_005110,acct_0044,user_000710,2026-02-14T11:00:00Z,cancelled,70000,supplier_0610 +pr_005111,acct_0045,user_000711,2026-02-15T11:00:00Z,draft,71000,supplier_0611 +pr_005112,acct_0046,user_000712,2026-02-16T11:00:00Z,submitted,72000,supplier_0612 +pr_005113,acct_0047,user_000713,2026-02-17T11:00:00Z,approved,73000,supplier_0613 +pr_005114,acct_0048,user_000714,2026-02-18T11:00:00Z,rejected,74000,supplier_0614 +pr_005115,acct_0049,user_000715,2026-02-19T11:00:00Z,cancelled,75000,supplier_0615 +pr_005116,acct_0050,user_000716,2026-02-20T11:00:00Z,draft,76000,supplier_0616 +pr_005117,acct_0051,user_000717,2026-02-21T11:00:00Z,submitted,77000,supplier_0617 +pr_005118,acct_0052,user_000718,2026-02-22T11:00:00Z,approved,78000,supplier_0618 +pr_005119,acct_0053,user_000719,2026-02-23T11:00:00Z,rejected,79000,supplier_0619 +pr_005120,acct_0054,user_000720,2026-02-24T11:00:00Z,cancelled,80000,supplier_0620 +pr_005121,acct_0055,user_000721,2026-02-25T11:00:00Z,draft,81000,supplier_0621 +pr_005122,acct_0056,user_000722,2026-02-26T11:00:00Z,submitted,82000,supplier_0622 +pr_005123,acct_0057,user_000723,2026-02-27T11:00:00Z,approved,83000,supplier_0623 +pr_005124,acct_0058,user_000724,2026-02-28T11:00:00Z,rejected,84000,supplier_0624 +pr_005125,acct_0059,user_000725,2026-02-01T11:00:00Z,cancelled,85000,supplier_0625 +pr_005126,acct_0060,user_000726,2026-02-02T11:00:00Z,draft,86000,supplier_0626 +pr_005127,acct_0061,user_000727,2026-02-03T11:00:00Z,submitted,87000,supplier_0627 +pr_005128,acct_0062,user_000728,2026-02-04T11:00:00Z,approved,88000,supplier_0628 +pr_005129,acct_0063,user_000729,2026-02-05T11:00:00Z,rejected,89000,supplier_0629 +pr_005130,acct_0064,user_000730,2026-02-06T11:00:00Z,cancelled,90000,supplier_0630 +pr_005131,acct_0065,user_000731,2026-02-07T11:00:00Z,draft,91000,supplier_0631 +pr_005132,acct_0066,user_000732,2026-02-08T11:00:00Z,submitted,92000,supplier_0632 +pr_005133,acct_0067,user_000733,2026-02-09T11:00:00Z,approved,93000,supplier_0633 +pr_005134,acct_0068,user_000734,2026-02-10T11:00:00Z,rejected,94000,supplier_0634 +pr_005135,acct_0069,user_000735,2026-02-11T11:00:00Z,cancelled,95000,supplier_0635 +pr_005136,acct_0070,user_000736,2026-02-12T11:00:00Z,draft,96000,supplier_0636 +pr_005137,acct_0071,user_000737,2026-02-13T11:00:00Z,submitted,97000,supplier_0637 +pr_005138,acct_0072,user_000738,2026-02-14T11:00:00Z,approved,98000,supplier_0638 +pr_005139,acct_0073,user_000739,2026-02-15T11:00:00Z,rejected,99000,supplier_0639 +pr_005140,acct_0074,user_000740,2026-02-16T11:00:00Z,cancelled,100000,supplier_0640 +pr_005141,acct_0075,user_000741,2026-02-17T11:00:00Z,draft,101000,supplier_0641 +pr_005142,acct_0076,user_000742,2026-02-18T11:00:00Z,submitted,102000,supplier_0642 +pr_005143,acct_0077,user_000743,2026-02-19T11:00:00Z,approved,103000,supplier_0643 +pr_005144,acct_0078,user_000744,2026-02-20T11:00:00Z,rejected,104000,supplier_0644 +pr_005145,acct_0079,user_000745,2026-02-21T11:00:00Z,cancelled,105000,supplier_0645 +pr_005146,acct_0080,user_000746,2026-02-22T11:00:00Z,draft,106000,supplier_0646 +pr_005147,acct_0081,user_000747,2026-02-23T11:00:00Z,submitted,107000,supplier_0647 +pr_005148,acct_0082,user_000748,2026-02-24T11:00:00Z,approved,108000,supplier_0648 +pr_005149,acct_0083,user_000749,2026-02-25T11:00:00Z,rejected,109000,supplier_0649 +pr_005150,acct_0084,user_000750,2026-02-26T11:00:00Z,cancelled,110000,supplier_0650 +pr_005151,acct_0085,user_000751,2026-02-27T11:00:00Z,draft,10000,supplier_0651 +pr_005152,acct_0086,user_000752,2026-02-28T11:00:00Z,submitted,11000,supplier_0652 +pr_005153,acct_0087,user_000753,2026-02-01T11:00:00Z,approved,12000,supplier_0653 +pr_005154,acct_0088,user_000754,2026-02-02T11:00:00Z,rejected,13000,supplier_0654 +pr_005155,acct_0089,user_000755,2026-02-03T11:00:00Z,cancelled,14000,supplier_0655 +pr_005156,acct_0090,user_000756,2026-02-04T11:00:00Z,draft,15000,supplier_0656 +pr_005157,acct_0091,user_000757,2026-02-05T11:00:00Z,submitted,16000,supplier_0657 +pr_005158,acct_0092,user_000758,2026-02-06T11:00:00Z,approved,17000,supplier_0658 +pr_005159,acct_0093,user_000759,2026-02-07T11:00:00Z,rejected,18000,supplier_0659 +pr_005160,acct_0094,user_000760,2026-02-08T11:00:00Z,cancelled,19000,supplier_0660 +pr_005161,acct_0095,user_000761,2026-02-09T11:00:00Z,draft,20000,supplier_0661 +pr_005162,acct_0096,user_000762,2026-02-10T11:00:00Z,submitted,21000,supplier_0662 +pr_005163,acct_0097,user_000763,2026-02-11T11:00:00Z,approved,22000,supplier_0663 +pr_005164,acct_0098,user_000764,2026-02-12T11:00:00Z,rejected,23000,supplier_0664 +pr_005165,acct_0099,user_000765,2026-02-13T11:00:00Z,cancelled,24000,supplier_0665 +pr_005166,acct_0100,user_000766,2026-02-14T11:00:00Z,draft,25000,supplier_0666 +pr_005167,acct_0101,user_000767,2026-02-15T11:00:00Z,submitted,26000,supplier_0667 +pr_005168,acct_0102,user_000768,2026-02-16T11:00:00Z,approved,27000,supplier_0668 +pr_005169,acct_0103,user_000769,2026-02-17T11:00:00Z,rejected,28000,supplier_0669 +pr_005170,acct_0104,user_000770,2026-02-18T11:00:00Z,cancelled,29000,supplier_0670 +pr_005171,acct_0105,user_000771,2026-02-19T11:00:00Z,draft,30000,supplier_0671 +pr_005172,acct_0106,user_000772,2026-02-20T11:00:00Z,submitted,31000,supplier_0672 +pr_005173,acct_0107,user_000773,2026-02-21T11:00:00Z,approved,32000,supplier_0673 +pr_005174,acct_0108,user_000774,2026-02-22T11:00:00Z,rejected,33000,supplier_0674 +pr_005175,acct_0109,user_000775,2026-02-23T11:00:00Z,cancelled,34000,supplier_0675 +pr_005176,acct_0110,user_000776,2026-02-24T11:00:00Z,draft,35000,supplier_0676 +pr_005177,acct_0111,user_000777,2026-02-25T11:00:00Z,submitted,36000,supplier_0677 +pr_005178,acct_0112,user_000778,2026-02-26T11:00:00Z,approved,37000,supplier_0678 +pr_005179,acct_0113,user_000779,2026-02-27T11:00:00Z,rejected,38000,supplier_0679 +pr_005180,acct_0114,user_000780,2026-02-28T11:00:00Z,cancelled,39000,supplier_0680 +pr_005181,acct_0115,user_000781,2026-02-01T11:00:00Z,draft,40000,supplier_0681 +pr_005182,acct_0116,user_000782,2026-02-02T11:00:00Z,submitted,41000,supplier_0682 +pr_005183,acct_0117,user_000783,2026-02-03T11:00:00Z,approved,42000,supplier_0683 +pr_005184,acct_0118,user_000784,2026-02-04T11:00:00Z,rejected,43000,supplier_0684 +pr_005185,acct_0119,user_000785,2026-02-05T11:00:00Z,cancelled,44000,supplier_0685 +pr_005186,acct_0120,user_000786,2026-02-06T11:00:00Z,draft,45000,supplier_0686 +pr_005187,acct_0121,user_000787,2026-02-07T11:00:00Z,submitted,46000,supplier_0687 +pr_005188,acct_0122,user_000788,2026-02-08T11:00:00Z,approved,47000,supplier_0688 +pr_005189,acct_0123,user_000789,2026-02-09T11:00:00Z,rejected,48000,supplier_0689 +pr_005190,acct_0124,user_000790,2026-02-10T11:00:00Z,cancelled,49000,supplier_0690 +pr_005191,acct_0125,user_000791,2026-02-11T11:00:00Z,draft,50000,supplier_0691 +pr_005192,acct_0126,user_000792,2026-02-12T11:00:00Z,submitted,51000,supplier_0692 +pr_005193,acct_0127,user_000793,2026-02-13T11:00:00Z,approved,52000,supplier_0693 +pr_005194,acct_0128,user_000794,2026-02-14T11:00:00Z,rejected,53000,supplier_0694 +pr_005195,acct_0129,user_000795,2026-02-15T11:00:00Z,cancelled,54000,supplier_0695 +pr_005196,acct_0130,user_000796,2026-02-16T11:00:00Z,draft,55000,supplier_0696 +pr_005197,acct_0131,user_000797,2026-02-17T11:00:00Z,submitted,56000,supplier_0697 +pr_005198,acct_0132,user_000798,2026-02-18T11:00:00Z,approved,57000,supplier_0698 +pr_005199,acct_0133,user_000799,2026-02-19T11:00:00Z,rejected,58000,supplier_0699 +pr_005200,acct_0134,user_000800,2026-02-20T11:00:00Z,cancelled,59000,supplier_0700 diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/support_tickets.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/support_tickets.csv new file mode 100644 index 00000000..75719ddd --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/support_tickets.csv @@ -0,0 +1,521 @@ +support_ticket_id,account_id,requester_user_id,severity,category,status,created_at,resolved_at,owner_user_id +ticket_0001,acct_0001,user_000287,critical,approval_routing,open,2026-03-10T17:00:00Z,2099-12-31T00:00:00Z,user_000901 +ticket_0002,acct_0002,user_000288,high,approval_routing,open,2026-03-11T17:00:00Z,2099-12-31T00:00:00Z,user_000902 +ticket_0003,acct_0003,user_000289,critical,approval_routing,open,2026-03-12T17:00:00Z,2099-12-31T00:00:00Z,user_000903 +ticket_0004,acct_0004,user_000290,high,approval_routing,open,2026-03-13T17:00:00Z,2099-12-31T00:00:00Z,user_000904 +ticket_0005,acct_0005,user_000291,critical,approval_routing,open,2026-03-14T17:00:00Z,2099-12-31T00:00:00Z,user_000905 +ticket_0006,acct_0006,user_000292,high,approval_routing,open,2026-03-15T17:00:00Z,2099-12-31T00:00:00Z,user_000906 +ticket_0007,acct_0007,user_000293,critical,approval_routing,open,2026-03-16T17:00:00Z,2099-12-31T00:00:00Z,user_000907 +ticket_0008,acct_0008,user_000294,high,approval_routing,open,2026-03-17T17:00:00Z,2099-12-31T00:00:00Z,user_000908 +ticket_0009,acct_0009,user_000295,critical,approval_routing,open,2026-03-18T17:00:00Z,2099-12-31T00:00:00Z,user_000909 +ticket_0010,acct_0010,user_000010,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000909 +ticket_0011,acct_0011,user_000011,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000910 +ticket_0012,acct_0012,user_000012,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000911 +ticket_0013,acct_0013,user_000013,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000912 +ticket_0014,acct_0014,user_000014,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000913 +ticket_0015,acct_0015,user_000015,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000914 +ticket_0016,acct_0016,user_000016,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000915 +ticket_0017,acct_0017,user_000017,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000916 +ticket_0018,acct_0018,user_000018,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000917 +ticket_0019,acct_0019,user_000019,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000918 +ticket_0020,acct_0020,user_000020,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000919 +ticket_0021,acct_0021,user_000021,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000920 +ticket_0022,acct_0022,user_000022,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000921 +ticket_0023,acct_0023,user_000023,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000922 +ticket_0024,acct_0024,user_000024,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000923 +ticket_0025,acct_0025,user_000025,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000924 +ticket_0026,acct_0026,user_000026,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000925 +ticket_0027,acct_0027,user_000027,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000926 +ticket_0028,acct_0028,user_000028,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000927 +ticket_0029,acct_0029,user_000029,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000928 +ticket_0030,acct_0030,user_000030,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000929 +ticket_0031,acct_0031,user_000031,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000930 +ticket_0032,acct_0032,user_000032,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000931 +ticket_0033,acct_0033,user_000033,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000932 +ticket_0034,acct_0034,user_000034,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000933 +ticket_0035,acct_0035,user_000035,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000934 +ticket_0036,acct_0036,user_000036,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000935 +ticket_0037,acct_0037,user_000037,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000936 +ticket_0038,acct_0038,user_000038,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000937 +ticket_0039,acct_0039,user_000039,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000938 +ticket_0040,acct_0040,user_000040,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000939 +ticket_0041,acct_0041,user_000041,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000940 +ticket_0042,acct_0042,user_000042,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000941 +ticket_0043,acct_0043,user_000043,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000942 +ticket_0044,acct_0044,user_000044,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000943 +ticket_0045,acct_0045,user_000045,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000944 +ticket_0046,acct_0046,user_000046,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000945 +ticket_0047,acct_0047,user_000047,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000946 +ticket_0048,acct_0048,user_000048,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000947 +ticket_0049,acct_0049,user_000049,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000948 +ticket_0050,acct_0050,user_000050,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000949 +ticket_0051,acct_0051,user_000051,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000950 +ticket_0052,acct_0052,user_000052,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000951 +ticket_0053,acct_0053,user_000053,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000952 +ticket_0054,acct_0054,user_000054,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000953 +ticket_0055,acct_0055,user_000055,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000954 +ticket_0056,acct_0056,user_000056,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000955 +ticket_0057,acct_0057,user_000057,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000956 +ticket_0058,acct_0058,user_000058,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000957 +ticket_0059,acct_0059,user_000059,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000958 +ticket_0060,acct_0060,user_000060,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000959 +ticket_0061,acct_0061,user_000061,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000960 +ticket_0062,acct_0062,user_000062,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000961 +ticket_0063,acct_0063,user_000063,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000962 +ticket_0064,acct_0064,user_000064,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000963 +ticket_0065,acct_0065,user_000065,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000964 +ticket_0066,acct_0066,user_000066,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000965 +ticket_0067,acct_0067,user_000067,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000966 +ticket_0068,acct_0068,user_000068,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000967 +ticket_0069,acct_0069,user_000069,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000968 +ticket_0070,acct_0070,user_000070,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000969 +ticket_0071,acct_0071,user_000071,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000970 +ticket_0072,acct_0072,user_000072,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000971 +ticket_0073,acct_0073,user_000073,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000972 +ticket_0074,acct_0074,user_000074,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000973 +ticket_0075,acct_0075,user_000075,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000974 +ticket_0076,acct_0076,user_000076,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000975 +ticket_0077,acct_0077,user_000077,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000976 +ticket_0078,acct_0078,user_000078,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000977 +ticket_0079,acct_0079,user_000079,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000978 +ticket_0080,acct_0080,user_000080,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000979 +ticket_0081,acct_0081,user_000081,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000980 +ticket_0082,acct_0082,user_000082,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000981 +ticket_0083,acct_0083,user_000083,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000982 +ticket_0084,acct_0084,user_000084,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000983 +ticket_0085,acct_0085,user_000085,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000984 +ticket_0086,acct_0086,user_000086,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000985 +ticket_0087,acct_0087,user_000087,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000986 +ticket_0088,acct_0088,user_000088,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000987 +ticket_0089,acct_0089,user_000089,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000988 +ticket_0090,acct_0090,user_000090,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000989 +ticket_0091,acct_0091,user_000091,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000990 +ticket_0092,acct_0092,user_000092,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000991 +ticket_0093,acct_0093,user_000093,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000992 +ticket_0094,acct_0094,user_000094,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000993 +ticket_0095,acct_0095,user_000095,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000994 +ticket_0096,acct_0096,user_000096,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000995 +ticket_0097,acct_0097,user_000097,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000996 +ticket_0098,acct_0098,user_000098,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000997 +ticket_0099,acct_0099,user_000099,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000998 +ticket_0100,acct_0100,user_000100,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000999 +ticket_0101,acct_0101,user_000101,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_001000 +ticket_0102,acct_0102,user_000102,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_001001 +ticket_0103,acct_0103,user_000103,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_001002 +ticket_0104,acct_0104,user_000104,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_001003 +ticket_0105,acct_0105,user_000105,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001004 +ticket_0106,acct_0106,user_000106,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001005 +ticket_0107,acct_0107,user_000107,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001006 +ticket_0108,acct_0108,user_000108,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001007 +ticket_0109,acct_0109,user_000109,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001008 +ticket_0110,acct_0110,user_000110,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001009 +ticket_0111,acct_0111,user_000111,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001010 +ticket_0112,acct_0112,user_000112,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001011 +ticket_0113,acct_0113,user_000113,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001012 +ticket_0114,acct_0114,user_000114,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001013 +ticket_0115,acct_0115,user_000115,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001014 +ticket_0116,acct_0116,user_000116,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001015 +ticket_0117,acct_0117,user_000117,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001016 +ticket_0118,acct_0118,user_000118,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001017 +ticket_0119,acct_0119,user_000119,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001018 +ticket_0120,acct_0120,user_000120,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001019 +ticket_0121,acct_0121,user_000121,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_001020 +ticket_0122,acct_0122,user_000122,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_001021 +ticket_0123,acct_0123,user_000123,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_001022 +ticket_0124,acct_0124,user_000124,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_001023 +ticket_0125,acct_0125,user_000125,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_001024 +ticket_0126,acct_0126,user_000126,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_001025 +ticket_0127,acct_0127,user_000127,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_001026 +ticket_0128,acct_0128,user_000128,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_001027 +ticket_0129,acct_0129,user_000129,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_001028 +ticket_0130,acct_0130,user_000130,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_001029 +ticket_0131,acct_0131,user_000131,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_001030 +ticket_0132,acct_0132,user_000132,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_001031 +ticket_0133,acct_0133,user_000133,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001032 +ticket_0134,acct_0134,user_000134,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001033 +ticket_0135,acct_0135,user_000135,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001034 +ticket_0136,acct_0136,user_000136,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001035 +ticket_0137,acct_0137,user_000137,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001036 +ticket_0138,acct_0138,user_000138,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001037 +ticket_0139,acct_0139,user_000139,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001038 +ticket_0140,acct_0140,user_000140,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001039 +ticket_0141,acct_0141,user_000141,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001040 +ticket_0142,acct_0142,user_000142,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001041 +ticket_0143,acct_0143,user_000143,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001042 +ticket_0144,acct_0144,user_000144,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001043 +ticket_0145,acct_0145,user_000145,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001044 +ticket_0146,acct_0146,user_000146,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001045 +ticket_0147,acct_0147,user_000147,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001046 +ticket_0148,acct_0148,user_000148,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001047 +ticket_0149,acct_0149,user_000149,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_001048 +ticket_0150,acct_0150,user_000150,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_001049 +ticket_0151,acct_0151,user_000151,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_001050 +ticket_0152,acct_0152,user_000152,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_001051 +ticket_0153,acct_0153,user_000153,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_001052 +ticket_0154,acct_0154,user_000154,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_001053 +ticket_0155,acct_0155,user_000155,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_001054 +ticket_0156,acct_0156,user_000156,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_001055 +ticket_0157,acct_0157,user_000157,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_001056 +ticket_0158,acct_0158,user_000158,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_001057 +ticket_0159,acct_0159,user_000159,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_001058 +ticket_0160,acct_0160,user_000160,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_001059 +ticket_0161,acct_0161,user_000161,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001060 +ticket_0162,acct_0162,user_000162,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001061 +ticket_0163,acct_0163,user_000163,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001062 +ticket_0164,acct_0164,user_000164,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001063 +ticket_0165,acct_0165,user_000165,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001064 +ticket_0166,acct_0166,user_000166,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001065 +ticket_0167,acct_0167,user_000167,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001066 +ticket_0168,acct_0168,user_000168,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001067 +ticket_0169,acct_0169,user_000169,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001068 +ticket_0170,acct_0170,user_000170,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001069 +ticket_0171,acct_0171,user_000171,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001070 +ticket_0172,acct_0172,user_000172,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001071 +ticket_0173,acct_0173,user_000173,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001072 +ticket_0174,acct_0174,user_000174,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001073 +ticket_0175,acct_0175,user_000175,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001074 +ticket_0176,acct_0176,user_000176,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001075 +ticket_0177,acct_0177,user_000177,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_001076 +ticket_0178,acct_0178,user_000178,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_001077 +ticket_0179,acct_0179,user_000179,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_001078 +ticket_0180,acct_0180,user_000180,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_001079 +ticket_0181,acct_0181,user_000181,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_001080 +ticket_0182,acct_0182,user_000182,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_001081 +ticket_0183,acct_0183,user_000183,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_001082 +ticket_0184,acct_0184,user_000184,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_001083 +ticket_0185,acct_0185,user_000185,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_001084 +ticket_0186,acct_0186,user_000186,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_001085 +ticket_0187,acct_0187,user_000187,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_001086 +ticket_0188,acct_0188,user_000188,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_001087 +ticket_0189,acct_0189,user_000189,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001088 +ticket_0190,acct_0190,user_000190,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001089 +ticket_0191,acct_0191,user_000191,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001090 +ticket_0192,acct_0192,user_000192,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001091 +ticket_0193,acct_0193,user_000193,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001092 +ticket_0194,acct_0194,user_000194,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001093 +ticket_0195,acct_0195,user_000195,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001094 +ticket_0196,acct_0196,user_000196,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001095 +ticket_0197,acct_0197,user_000197,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001096 +ticket_0198,acct_0198,user_000198,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001097 +ticket_0199,acct_0199,user_000199,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001098 +ticket_0200,acct_0010,user_000200,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001099 +ticket_0201,acct_0011,user_000201,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000900 +ticket_0202,acct_0012,user_000202,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000901 +ticket_0203,acct_0013,user_000203,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000902 +ticket_0204,acct_0014,user_000204,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000903 +ticket_0205,acct_0015,user_000205,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000904 +ticket_0206,acct_0016,user_000206,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000905 +ticket_0207,acct_0017,user_000207,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000906 +ticket_0208,acct_0018,user_000208,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000907 +ticket_0209,acct_0019,user_000209,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000908 +ticket_0210,acct_0020,user_000210,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000909 +ticket_0211,acct_0021,user_000211,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000910 +ticket_0212,acct_0022,user_000212,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000911 +ticket_0213,acct_0023,user_000213,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000912 +ticket_0214,acct_0024,user_000214,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000913 +ticket_0215,acct_0025,user_000215,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000914 +ticket_0216,acct_0026,user_000216,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000915 +ticket_0217,acct_0027,user_000217,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000916 +ticket_0218,acct_0028,user_000218,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000917 +ticket_0219,acct_0029,user_000219,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000918 +ticket_0220,acct_0030,user_000220,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000919 +ticket_0221,acct_0031,user_000221,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000920 +ticket_0222,acct_0032,user_000222,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000921 +ticket_0223,acct_0033,user_000223,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000922 +ticket_0224,acct_0034,user_000224,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000923 +ticket_0225,acct_0035,user_000225,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000924 +ticket_0226,acct_0036,user_000226,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000925 +ticket_0227,acct_0037,user_000227,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000926 +ticket_0228,acct_0038,user_000228,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000927 +ticket_0229,acct_0039,user_000229,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000928 +ticket_0230,acct_0040,user_000230,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000929 +ticket_0231,acct_0041,user_000231,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000930 +ticket_0232,acct_0042,user_000232,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000931 +ticket_0233,acct_0043,user_000233,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000932 +ticket_0234,acct_0044,user_000234,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000933 +ticket_0235,acct_0045,user_000235,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000934 +ticket_0236,acct_0046,user_000236,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000935 +ticket_0237,acct_0047,user_000237,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000936 +ticket_0238,acct_0048,user_000238,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000937 +ticket_0239,acct_0049,user_000239,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000938 +ticket_0240,acct_0050,user_000240,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000939 +ticket_0241,acct_0051,user_000241,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000940 +ticket_0242,acct_0052,user_000242,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000941 +ticket_0243,acct_0053,user_000243,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000942 +ticket_0244,acct_0054,user_000244,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000943 +ticket_0245,acct_0055,user_000245,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000944 +ticket_0246,acct_0056,user_000246,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000945 +ticket_0247,acct_0057,user_000247,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000946 +ticket_0248,acct_0058,user_000248,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000947 +ticket_0249,acct_0059,user_000249,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000948 +ticket_0250,acct_0060,user_000250,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000949 +ticket_0251,acct_0061,user_000251,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000950 +ticket_0252,acct_0062,user_000252,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000951 +ticket_0253,acct_0063,user_000253,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000952 +ticket_0254,acct_0064,user_000254,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000953 +ticket_0255,acct_0065,user_000255,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000954 +ticket_0256,acct_0066,user_000256,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000955 +ticket_0257,acct_0067,user_000257,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000956 +ticket_0258,acct_0068,user_000258,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000957 +ticket_0259,acct_0069,user_000259,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000958 +ticket_0260,acct_0070,user_000260,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000959 +ticket_0261,acct_0071,user_000261,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000960 +ticket_0262,acct_0072,user_000262,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000961 +ticket_0263,acct_0073,user_000263,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000962 +ticket_0264,acct_0074,user_000264,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000963 +ticket_0265,acct_0075,user_000265,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000964 +ticket_0266,acct_0076,user_000266,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000965 +ticket_0267,acct_0077,user_000267,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000966 +ticket_0268,acct_0078,user_000268,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000967 +ticket_0269,acct_0079,user_000269,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000968 +ticket_0270,acct_0080,user_000270,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000969 +ticket_0271,acct_0081,user_000271,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000970 +ticket_0272,acct_0082,user_000272,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000971 +ticket_0273,acct_0083,user_000273,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000972 +ticket_0274,acct_0084,user_000274,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000973 +ticket_0275,acct_0085,user_000275,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000974 +ticket_0276,acct_0086,user_000276,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000975 +ticket_0277,acct_0087,user_000277,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000976 +ticket_0278,acct_0088,user_000278,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000977 +ticket_0279,acct_0089,user_000279,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000978 +ticket_0280,acct_0090,user_000280,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000979 +ticket_0281,acct_0091,user_000281,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000980 +ticket_0282,acct_0092,user_000282,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000981 +ticket_0283,acct_0093,user_000283,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000982 +ticket_0284,acct_0094,user_000284,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000983 +ticket_0285,acct_0095,user_000285,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000984 +ticket_0286,acct_0096,user_000286,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000985 +ticket_0287,acct_0097,user_000287,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000986 +ticket_0288,acct_0098,user_000288,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000987 +ticket_0289,acct_0099,user_000289,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000988 +ticket_0290,acct_0100,user_000290,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000989 +ticket_0291,acct_0101,user_000291,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000990 +ticket_0292,acct_0102,user_000292,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000991 +ticket_0293,acct_0103,user_000293,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000992 +ticket_0294,acct_0104,user_000294,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000993 +ticket_0295,acct_0105,user_000295,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000994 +ticket_0296,acct_0106,user_000296,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000995 +ticket_0297,acct_0107,user_000297,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000996 +ticket_0298,acct_0108,user_000298,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000997 +ticket_0299,acct_0109,user_000299,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000998 +ticket_0300,acct_0110,user_000300,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000999 +ticket_0301,acct_0111,user_000301,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001000 +ticket_0302,acct_0112,user_000302,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001001 +ticket_0303,acct_0113,user_000303,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001002 +ticket_0304,acct_0114,user_000304,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001003 +ticket_0305,acct_0115,user_000305,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001004 +ticket_0306,acct_0116,user_000306,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001005 +ticket_0307,acct_0117,user_000307,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001006 +ticket_0308,acct_0118,user_000308,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001007 +ticket_0309,acct_0119,user_000309,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001008 +ticket_0310,acct_0120,user_000310,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001009 +ticket_0311,acct_0121,user_000311,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001010 +ticket_0312,acct_0122,user_000312,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001011 +ticket_0313,acct_0123,user_000313,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001012 +ticket_0314,acct_0124,user_000314,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001013 +ticket_0315,acct_0125,user_000315,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001014 +ticket_0316,acct_0126,user_000316,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001015 +ticket_0317,acct_0127,user_000317,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_001016 +ticket_0318,acct_0128,user_000318,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_001017 +ticket_0319,acct_0129,user_000319,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_001018 +ticket_0320,acct_0130,user_000320,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_001019 +ticket_0321,acct_0131,user_000321,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_001020 +ticket_0322,acct_0132,user_000322,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_001021 +ticket_0323,acct_0133,user_000323,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_001022 +ticket_0324,acct_0134,user_000324,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_001023 +ticket_0325,acct_0135,user_000325,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_001024 +ticket_0326,acct_0136,user_000326,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_001025 +ticket_0327,acct_0137,user_000327,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_001026 +ticket_0328,acct_0138,user_000328,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_001027 +ticket_0329,acct_0139,user_000329,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001028 +ticket_0330,acct_0140,user_000330,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001029 +ticket_0331,acct_0141,user_000331,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001030 +ticket_0332,acct_0142,user_000332,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001031 +ticket_0333,acct_0143,user_000333,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001032 +ticket_0334,acct_0144,user_000334,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001033 +ticket_0335,acct_0145,user_000335,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001034 +ticket_0336,acct_0146,user_000336,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001035 +ticket_0337,acct_0147,user_000337,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001036 +ticket_0338,acct_0148,user_000338,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001037 +ticket_0339,acct_0149,user_000339,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001038 +ticket_0340,acct_0150,user_000340,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001039 +ticket_0341,acct_0151,user_000341,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001040 +ticket_0342,acct_0152,user_000342,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001041 +ticket_0343,acct_0153,user_000343,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001042 +ticket_0344,acct_0154,user_000344,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001043 +ticket_0345,acct_0155,user_000345,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_001044 +ticket_0346,acct_0156,user_000346,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_001045 +ticket_0347,acct_0157,user_000347,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_001046 +ticket_0348,acct_0158,user_000348,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_001047 +ticket_0349,acct_0159,user_000349,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_001048 +ticket_0350,acct_0160,user_000350,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_001049 +ticket_0351,acct_0161,user_000351,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_001050 +ticket_0352,acct_0162,user_000352,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_001051 +ticket_0353,acct_0163,user_000353,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_001052 +ticket_0354,acct_0164,user_000354,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_001053 +ticket_0355,acct_0165,user_000355,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_001054 +ticket_0356,acct_0166,user_000356,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_001055 +ticket_0357,acct_0167,user_000357,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001056 +ticket_0358,acct_0168,user_000358,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001057 +ticket_0359,acct_0169,user_000359,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001058 +ticket_0360,acct_0170,user_000360,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001059 +ticket_0361,acct_0171,user_000361,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001060 +ticket_0362,acct_0172,user_000362,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001061 +ticket_0363,acct_0173,user_000363,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001062 +ticket_0364,acct_0174,user_000364,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001063 +ticket_0365,acct_0175,user_000365,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001064 +ticket_0366,acct_0176,user_000366,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001065 +ticket_0367,acct_0177,user_000367,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001066 +ticket_0368,acct_0178,user_000368,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001067 +ticket_0369,acct_0179,user_000369,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001068 +ticket_0370,acct_0180,user_000370,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001069 +ticket_0371,acct_0181,user_000371,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001070 +ticket_0372,acct_0182,user_000372,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001071 +ticket_0373,acct_0183,user_000373,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_001072 +ticket_0374,acct_0184,user_000374,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_001073 +ticket_0375,acct_0185,user_000375,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_001074 +ticket_0376,acct_0186,user_000376,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_001075 +ticket_0377,acct_0187,user_000377,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_001076 +ticket_0378,acct_0188,user_000378,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_001077 +ticket_0379,acct_0189,user_000379,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_001078 +ticket_0380,acct_0190,user_000380,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_001079 +ticket_0381,acct_0191,user_000381,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_001080 +ticket_0382,acct_0192,user_000382,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_001081 +ticket_0383,acct_0193,user_000383,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_001082 +ticket_0384,acct_0194,user_000384,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_001083 +ticket_0385,acct_0195,user_000385,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_001084 +ticket_0386,acct_0196,user_000386,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_001085 +ticket_0387,acct_0197,user_000387,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_001086 +ticket_0388,acct_0198,user_000388,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_001087 +ticket_0389,acct_0199,user_000389,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001088 +ticket_0390,acct_0010,user_000390,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001089 +ticket_0391,acct_0011,user_000391,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001090 +ticket_0392,acct_0012,user_000392,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001091 +ticket_0393,acct_0013,user_000393,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001092 +ticket_0394,acct_0014,user_000394,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001093 +ticket_0395,acct_0015,user_000395,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001094 +ticket_0396,acct_0016,user_000396,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001095 +ticket_0397,acct_0017,user_000397,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001096 +ticket_0398,acct_0018,user_000398,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001097 +ticket_0399,acct_0019,user_000399,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001098 +ticket_0400,acct_0020,user_000400,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001099 +ticket_0401,acct_0021,user_000401,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000900 +ticket_0402,acct_0022,user_000402,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000901 +ticket_0403,acct_0023,user_000403,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000902 +ticket_0404,acct_0024,user_000404,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000903 +ticket_0405,acct_0025,user_000405,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000904 +ticket_0406,acct_0026,user_000406,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000905 +ticket_0407,acct_0027,user_000407,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000906 +ticket_0408,acct_0028,user_000408,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000907 +ticket_0409,acct_0029,user_000409,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000908 +ticket_0410,acct_0030,user_000410,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000909 +ticket_0411,acct_0031,user_000411,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000910 +ticket_0412,acct_0032,user_000412,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000911 +ticket_0413,acct_0033,user_000413,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000912 +ticket_0414,acct_0034,user_000414,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000913 +ticket_0415,acct_0035,user_000415,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000914 +ticket_0416,acct_0036,user_000416,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000915 +ticket_0417,acct_0037,user_000417,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000916 +ticket_0418,acct_0038,user_000418,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000917 +ticket_0419,acct_0039,user_000419,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000918 +ticket_0420,acct_0040,user_000420,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000919 +ticket_0421,acct_0041,user_000421,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000920 +ticket_0422,acct_0042,user_000422,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000921 +ticket_0423,acct_0043,user_000423,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000922 +ticket_0424,acct_0044,user_000424,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000923 +ticket_0425,acct_0045,user_000425,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000924 +ticket_0426,acct_0046,user_000426,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000925 +ticket_0427,acct_0047,user_000427,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000926 +ticket_0428,acct_0048,user_000428,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000927 +ticket_0429,acct_0049,user_000429,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000928 +ticket_0430,acct_0050,user_000430,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000929 +ticket_0431,acct_0051,user_000431,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000930 +ticket_0432,acct_0052,user_000432,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000931 +ticket_0433,acct_0053,user_000433,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000932 +ticket_0434,acct_0054,user_000434,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000933 +ticket_0435,acct_0055,user_000435,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000934 +ticket_0436,acct_0056,user_000436,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000935 +ticket_0437,acct_0057,user_000437,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000936 +ticket_0438,acct_0058,user_000438,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000937 +ticket_0439,acct_0059,user_000439,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000938 +ticket_0440,acct_0060,user_000440,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000939 +ticket_0441,acct_0061,user_000441,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000940 +ticket_0442,acct_0062,user_000442,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000941 +ticket_0443,acct_0063,user_000443,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000942 +ticket_0444,acct_0064,user_000444,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000943 +ticket_0445,acct_0065,user_000445,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000944 +ticket_0446,acct_0066,user_000446,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000945 +ticket_0447,acct_0067,user_000447,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000946 +ticket_0448,acct_0068,user_000448,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000947 +ticket_0449,acct_0069,user_000449,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000948 +ticket_0450,acct_0070,user_000450,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000949 +ticket_0451,acct_0071,user_000451,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000950 +ticket_0452,acct_0072,user_000452,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000951 +ticket_0453,acct_0073,user_000453,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000952 +ticket_0454,acct_0074,user_000454,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000953 +ticket_0455,acct_0075,user_000455,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000954 +ticket_0456,acct_0076,user_000456,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000955 +ticket_0457,acct_0077,user_000457,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000956 +ticket_0458,acct_0078,user_000458,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000957 +ticket_0459,acct_0079,user_000459,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000958 +ticket_0460,acct_0080,user_000460,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000959 +ticket_0461,acct_0081,user_000461,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000960 +ticket_0462,acct_0082,user_000462,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000961 +ticket_0463,acct_0083,user_000463,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000962 +ticket_0464,acct_0084,user_000464,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000963 +ticket_0465,acct_0085,user_000465,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000964 +ticket_0466,acct_0086,user_000466,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000965 +ticket_0467,acct_0087,user_000467,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000966 +ticket_0468,acct_0088,user_000468,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000967 +ticket_0469,acct_0089,user_000469,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000968 +ticket_0470,acct_0090,user_000470,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000969 +ticket_0471,acct_0091,user_000471,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000970 +ticket_0472,acct_0092,user_000472,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000971 +ticket_0473,acct_0093,user_000473,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_000972 +ticket_0474,acct_0094,user_000474,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_000973 +ticket_0475,acct_0095,user_000475,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_000974 +ticket_0476,acct_0096,user_000476,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_000975 +ticket_0477,acct_0097,user_000477,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_000976 +ticket_0478,acct_0098,user_000478,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_000977 +ticket_0479,acct_0099,user_000479,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_000978 +ticket_0480,acct_0100,user_000480,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_000979 +ticket_0481,acct_0101,user_000481,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_000980 +ticket_0482,acct_0102,user_000482,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_000981 +ticket_0483,acct_0103,user_000483,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_000982 +ticket_0484,acct_0104,user_000484,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_000983 +ticket_0485,acct_0105,user_000485,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_000984 +ticket_0486,acct_0106,user_000486,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_000985 +ticket_0487,acct_0107,user_000487,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_000986 +ticket_0488,acct_0108,user_000488,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_000987 +ticket_0489,acct_0109,user_000489,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_000988 +ticket_0490,acct_0110,user_000490,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_000989 +ticket_0491,acct_0111,user_000491,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_000990 +ticket_0492,acct_0112,user_000492,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_000991 +ticket_0493,acct_0113,user_000493,low,approval_routing,open,2026-02-17T17:00:00Z,2026-02-28T17:00:00Z,user_000992 +ticket_0494,acct_0114,user_000494,medium,supplier_onboarding,pending,2026-02-18T17:00:00Z,2026-02-28T17:00:00Z,user_000993 +ticket_0495,acct_0115,user_000495,high,billing,solved,2026-02-19T17:00:00Z,2026-02-28T17:00:00Z,user_000994 +ticket_0496,acct_0116,user_000496,critical,permissions,closed,2026-02-20T17:00:00Z,2026-02-28T17:00:00Z,user_000995 +ticket_0497,acct_0117,user_000497,low,approval_routing,open,2026-02-21T17:00:00Z,2026-02-28T17:00:00Z,user_000996 +ticket_0498,acct_0118,user_000498,medium,supplier_onboarding,pending,2026-02-22T17:00:00Z,2026-02-28T17:00:00Z,user_000997 +ticket_0499,acct_0119,user_000499,high,billing,solved,2026-02-23T17:00:00Z,2026-02-28T17:00:00Z,user_000998 +ticket_0500,acct_0120,user_000500,critical,permissions,closed,2026-02-24T17:00:00Z,2026-02-28T17:00:00Z,user_000999 +ticket_0501,acct_0121,user_000501,low,approval_routing,open,2026-02-25T17:00:00Z,2026-02-28T17:00:00Z,user_001000 +ticket_0502,acct_0122,user_000502,medium,supplier_onboarding,pending,2026-02-26T17:00:00Z,2026-02-28T17:00:00Z,user_001001 +ticket_0503,acct_0123,user_000503,high,billing,solved,2026-02-27T17:00:00Z,2026-02-28T17:00:00Z,user_001002 +ticket_0504,acct_0124,user_000504,critical,permissions,closed,2026-02-28T17:00:00Z,2026-02-28T17:00:00Z,user_001003 +ticket_0505,acct_0125,user_000505,low,approval_routing,open,2026-02-01T17:00:00Z,2026-02-28T17:00:00Z,user_001004 +ticket_0506,acct_0126,user_000506,medium,supplier_onboarding,pending,2026-02-02T17:00:00Z,2026-02-28T17:00:00Z,user_001005 +ticket_0507,acct_0127,user_000507,high,billing,solved,2026-02-03T17:00:00Z,2026-02-28T17:00:00Z,user_001006 +ticket_0508,acct_0128,user_000508,critical,permissions,closed,2026-02-04T17:00:00Z,2026-02-28T17:00:00Z,user_001007 +ticket_0509,acct_0129,user_000509,low,approval_routing,open,2026-02-05T17:00:00Z,2026-02-28T17:00:00Z,user_001008 +ticket_0510,acct_0130,user_000510,medium,supplier_onboarding,pending,2026-02-06T17:00:00Z,2026-02-28T17:00:00Z,user_001009 +ticket_0511,acct_0131,user_000511,high,billing,solved,2026-02-07T17:00:00Z,2026-02-28T17:00:00Z,user_001010 +ticket_0512,acct_0132,user_000512,critical,permissions,closed,2026-02-08T17:00:00Z,2026-02-28T17:00:00Z,user_001011 +ticket_0513,acct_0133,user_000513,low,approval_routing,open,2026-02-09T17:00:00Z,2026-02-28T17:00:00Z,user_001012 +ticket_0514,acct_0134,user_000514,medium,supplier_onboarding,pending,2026-02-10T17:00:00Z,2026-02-28T17:00:00Z,user_001013 +ticket_0515,acct_0135,user_000515,high,billing,solved,2026-02-11T17:00:00Z,2026-02-28T17:00:00Z,user_001014 +ticket_0516,acct_0136,user_000516,critical,permissions,closed,2026-02-12T17:00:00Z,2026-02-28T17:00:00Z,user_001015 +ticket_0517,acct_0137,user_000517,low,approval_routing,open,2026-02-13T17:00:00Z,2026-02-28T17:00:00Z,user_001016 +ticket_0518,acct_0138,user_000518,medium,supplier_onboarding,pending,2026-02-14T17:00:00Z,2026-02-28T17:00:00Z,user_001017 +ticket_0519,acct_0139,user_000519,high,billing,solved,2026-02-15T17:00:00Z,2026-02-28T17:00:00Z,user_001018 +ticket_0520,acct_0140,user_000520,critical,permissions,closed,2026-02-16T17:00:00Z,2026-02-28T17:00:00Z,user_001019 diff --git a/packages/cli/assets/demo/orbit/raw-sources/warehouse/users.csv b/packages/cli/assets/demo/orbit/raw-sources/warehouse/users.csv new file mode 100644 index 00000000..55160667 --- /dev/null +++ b/packages/cli/assets/demo/orbit/raw-sources/warehouse/users.csv @@ -0,0 +1,1261 @@ +user_id,account_id,email,role,is_requester,is_internal,is_test,created_at,slack_user_id,looker_user_id,notion_user_id,drive_owner_id +user_000001,acct_0010,user-000001@customer-010.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000001,looker_000001,notion_000001,drive_000001 +user_000002,acct_0011,user-000002@customer-011.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000002,looker_000002,notion_000002,drive_000002 +user_000003,acct_0012,user-000003@customer-012.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000003,looker_000003,notion_000003,drive_000003 +user_000004,acct_0013,user-000004@customer-013.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000004,looker_000004,notion_000004,drive_000004 +user_000005,acct_0014,user-000005@customer-014.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000005,looker_000005,notion_000005,drive_000005 +user_000006,acct_0015,user-000006@customer-015.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000006,looker_000006,notion_000006,drive_000006 +user_000007,acct_0016,user-000007@customer-016.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000007,looker_000007,notion_000007,drive_000007 +user_000008,acct_0017,user-000008@customer-017.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000008,looker_000008,notion_000008,drive_000008 +user_000009,acct_0018,user-000009@customer-018.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000009,looker_000009,notion_000009,drive_000009 +user_000010,acct_0019,user-000010@customer-019.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000010,looker_000010,notion_000010,drive_000010 +user_000011,acct_0020,user-000011@customer-020.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000011,looker_000011,notion_000011,drive_000011 +user_000012,acct_0021,user-000012@customer-021.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000012,looker_000012,notion_000012,drive_000012 +user_000013,acct_0022,user-000013@customer-022.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000013,looker_000013,notion_000013,drive_000013 +user_000014,acct_0023,user-000014@customer-023.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000014,looker_000014,notion_000014,drive_000014 +user_000015,acct_0024,user-000015@customer-024.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000015,looker_000015,notion_000015,drive_000015 +user_000016,acct_0025,user-000016@customer-025.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000016,looker_000016,notion_000016,drive_000016 +user_000017,acct_0026,user-000017@customer-026.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000017,looker_000017,notion_000017,drive_000017 +user_000018,acct_0027,user-000018@customer-027.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000018,looker_000018,notion_000018,drive_000018 +user_000019,acct_0028,user-000019@customer-028.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000019,looker_000019,notion_000019,drive_000019 +user_000020,acct_0029,user-000020@customer-029.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000020,looker_000020,notion_000020,drive_000020 +user_000021,acct_0030,user-000021@customer-030.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000021,looker_000021,notion_000021,drive_000021 +user_000022,acct_0031,user-000022@customer-031.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000022,looker_000022,notion_000022,drive_000022 +user_000023,acct_0032,user-000023@customer-032.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000023,looker_000023,notion_000023,drive_000023 +user_000024,acct_0033,user-000024@customer-033.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000024,looker_000024,notion_000024,drive_000024 +user_000025,acct_0034,user-000025@customer-034.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000025,looker_000025,notion_000025,drive_000025 +user_000026,acct_0035,user-000026@customer-035.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000026,looker_000026,notion_000026,drive_000026 +user_000027,acct_0036,user-000027@customer-036.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000027,looker_000027,notion_000027,drive_000027 +user_000028,acct_0037,user-000028@customer-037.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000028,looker_000028,notion_000028,drive_000028 +user_000029,acct_0038,user-000029@customer-038.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000029,looker_000029,notion_000029,drive_000029 +user_000030,acct_0039,user-000030@customer-039.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000030,looker_000030,notion_000030,drive_000030 +user_000031,acct_0040,user-000031@customer-040.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000031,looker_000031,notion_000031,drive_000031 +user_000032,acct_0041,user-000032@customer-041.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000032,looker_000032,notion_000032,drive_000032 +user_000033,acct_0042,user-000033@customer-042.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000033,looker_000033,notion_000033,drive_000033 +user_000034,acct_0043,user-000034@customer-043.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000034,looker_000034,notion_000034,drive_000034 +user_000035,acct_0044,user-000035@customer-044.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000035,looker_000035,notion_000035,drive_000035 +user_000036,acct_0045,user-000036@customer-045.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000036,looker_000036,notion_000036,drive_000036 +user_000037,acct_0046,user-000037@customer-046.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000037,looker_000037,notion_000037,drive_000037 +user_000038,acct_0047,user-000038@customer-047.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000038,looker_000038,notion_000038,drive_000038 +user_000039,acct_0048,user-000039@customer-048.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000039,looker_000039,notion_000039,drive_000039 +user_000040,acct_0049,user-000040@customer-049.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000040,looker_000040,notion_000040,drive_000040 +user_000041,acct_0050,user-000041@customer-050.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000041,looker_000041,notion_000041,drive_000041 +user_000042,acct_0010,user-000042@customer-010.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000042,looker_000042,notion_000042,drive_000042 +user_000043,acct_0011,user-000043@customer-011.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000043,looker_000043,notion_000043,drive_000043 +user_000044,acct_0012,user-000044@customer-012.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000044,looker_000044,notion_000044,drive_000044 +user_000045,acct_0013,user-000045@customer-013.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000045,looker_000045,notion_000045,drive_000045 +user_000046,acct_0014,user-000046@customer-014.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000046,looker_000046,notion_000046,drive_000046 +user_000047,acct_0015,user-000047@customer-015.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000047,looker_000047,notion_000047,drive_000047 +user_000048,acct_0016,user-000048@customer-016.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000048,looker_000048,notion_000048,drive_000048 +user_000049,acct_0017,user-000049@customer-017.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000049,looker_000049,notion_000049,drive_000049 +user_000050,acct_0018,user-000050@customer-018.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000050,looker_000050,notion_000050,drive_000050 +user_000051,acct_0019,user-000051@customer-019.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000051,looker_000051,notion_000051,drive_000051 +user_000052,acct_0020,user-000052@customer-020.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000052,looker_000052,notion_000052,drive_000052 +user_000053,acct_0021,user-000053@customer-021.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000053,looker_000053,notion_000053,drive_000053 +user_000054,acct_0022,user-000054@customer-022.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000054,looker_000054,notion_000054,drive_000054 +user_000055,acct_0023,user-000055@customer-023.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000055,looker_000055,notion_000055,drive_000055 +user_000056,acct_0024,user-000056@customer-024.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000056,looker_000056,notion_000056,drive_000056 +user_000057,acct_0025,user-000057@customer-025.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000057,looker_000057,notion_000057,drive_000057 +user_000058,acct_0026,user-000058@customer-026.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000058,looker_000058,notion_000058,drive_000058 +user_000059,acct_0027,user-000059@customer-027.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000059,looker_000059,notion_000059,drive_000059 +user_000060,acct_0028,user-000060@customer-028.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000060,looker_000060,notion_000060,drive_000060 +user_000061,acct_0029,user-000061@customer-029.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000061,looker_000061,notion_000061,drive_000061 +user_000062,acct_0030,user-000062@customer-030.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000062,looker_000062,notion_000062,drive_000062 +user_000063,acct_0031,user-000063@customer-031.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000063,looker_000063,notion_000063,drive_000063 +user_000064,acct_0032,user-000064@customer-032.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000064,looker_000064,notion_000064,drive_000064 +user_000065,acct_0033,user-000065@customer-033.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000065,looker_000065,notion_000065,drive_000065 +user_000066,acct_0034,user-000066@customer-034.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000066,looker_000066,notion_000066,drive_000066 +user_000067,acct_0035,user-000067@customer-035.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000067,looker_000067,notion_000067,drive_000067 +user_000068,acct_0036,user-000068@customer-036.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000068,looker_000068,notion_000068,drive_000068 +user_000069,acct_0037,user-000069@customer-037.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000069,looker_000069,notion_000069,drive_000069 +user_000070,acct_0038,user-000070@customer-038.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000070,looker_000070,notion_000070,drive_000070 +user_000071,acct_0039,user-000071@customer-039.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000071,looker_000071,notion_000071,drive_000071 +user_000072,acct_0040,user-000072@customer-040.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000072,looker_000072,notion_000072,drive_000072 +user_000073,acct_0041,user-000073@customer-041.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000073,looker_000073,notion_000073,drive_000073 +user_000074,acct_0042,user-000074@customer-042.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000074,looker_000074,notion_000074,drive_000074 +user_000075,acct_0043,user-000075@customer-043.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000075,looker_000075,notion_000075,drive_000075 +user_000076,acct_0044,user-000076@customer-044.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000076,looker_000076,notion_000076,drive_000076 +user_000077,acct_0045,user-000077@customer-045.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000077,looker_000077,notion_000077,drive_000077 +user_000078,acct_0046,user-000078@customer-046.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000078,looker_000078,notion_000078,drive_000078 +user_000079,acct_0047,user-000079@customer-047.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000079,looker_000079,notion_000079,drive_000079 +user_000080,acct_0048,user-000080@customer-048.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000080,looker_000080,notion_000080,drive_000080 +user_000081,acct_0049,user-000081@customer-049.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000081,looker_000081,notion_000081,drive_000081 +user_000082,acct_0050,user-000082@customer-050.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000082,looker_000082,notion_000082,drive_000082 +user_000083,acct_0010,user-000083@customer-010.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000083,looker_000083,notion_000083,drive_000083 +user_000084,acct_0011,user-000084@customer-011.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000084,looker_000084,notion_000084,drive_000084 +user_000085,acct_0012,user-000085@customer-012.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000085,looker_000085,notion_000085,drive_000085 +user_000086,acct_0013,user-000086@customer-013.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000086,looker_000086,notion_000086,drive_000086 +user_000087,acct_0014,user-000087@customer-014.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000087,looker_000087,notion_000087,drive_000087 +user_000088,acct_0015,user-000088@customer-015.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000088,looker_000088,notion_000088,drive_000088 +user_000089,acct_0016,user-000089@customer-016.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000089,looker_000089,notion_000089,drive_000089 +user_000090,acct_0017,user-000090@customer-017.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000090,looker_000090,notion_000090,drive_000090 +user_000091,acct_0018,user-000091@customer-018.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000091,looker_000091,notion_000091,drive_000091 +user_000092,acct_0019,user-000092@customer-019.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000092,looker_000092,notion_000092,drive_000092 +user_000093,acct_0020,user-000093@customer-020.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000093,looker_000093,notion_000093,drive_000093 +user_000094,acct_0021,user-000094@customer-021.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000094,looker_000094,notion_000094,drive_000094 +user_000095,acct_0022,user-000095@customer-022.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000095,looker_000095,notion_000095,drive_000095 +user_000096,acct_0023,user-000096@customer-023.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000096,looker_000096,notion_000096,drive_000096 +user_000097,acct_0024,user-000097@customer-024.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000097,looker_000097,notion_000097,drive_000097 +user_000098,acct_0025,user-000098@customer-025.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000098,looker_000098,notion_000098,drive_000098 +user_000099,acct_0026,user-000099@customer-026.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000099,looker_000099,notion_000099,drive_000099 +user_000100,acct_0027,user-000100@customer-027.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000100,looker_000100,notion_000100,drive_000100 +user_000101,acct_0028,user-000101@customer-028.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000101,looker_000101,notion_000101,drive_000101 +user_000102,acct_0029,user-000102@customer-029.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000102,looker_000102,notion_000102,drive_000102 +user_000103,acct_0030,user-000103@customer-030.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000103,looker_000103,notion_000103,drive_000103 +user_000104,acct_0031,user-000104@customer-031.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000104,looker_000104,notion_000104,drive_000104 +user_000105,acct_0032,user-000105@customer-032.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000105,looker_000105,notion_000105,drive_000105 +user_000106,acct_0033,user-000106@customer-033.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000106,looker_000106,notion_000106,drive_000106 +user_000107,acct_0034,user-000107@customer-034.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000107,looker_000107,notion_000107,drive_000107 +user_000108,acct_0035,user-000108@customer-035.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000108,looker_000108,notion_000108,drive_000108 +user_000109,acct_0036,user-000109@customer-036.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000109,looker_000109,notion_000109,drive_000109 +user_000110,acct_0037,user-000110@customer-037.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000110,looker_000110,notion_000110,drive_000110 +user_000111,acct_0038,user-000111@customer-038.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000111,looker_000111,notion_000111,drive_000111 +user_000112,acct_0039,user-000112@customer-039.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000112,looker_000112,notion_000112,drive_000112 +user_000113,acct_0040,user-000113@customer-040.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000113,looker_000113,notion_000113,drive_000113 +user_000114,acct_0041,user-000114@customer-041.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000114,looker_000114,notion_000114,drive_000114 +user_000115,acct_0042,user-000115@customer-042.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000115,looker_000115,notion_000115,drive_000115 +user_000116,acct_0043,user-000116@customer-043.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000116,looker_000116,notion_000116,drive_000116 +user_000117,acct_0044,user-000117@customer-044.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000117,looker_000117,notion_000117,drive_000117 +user_000118,acct_0045,user-000118@customer-045.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000118,looker_000118,notion_000118,drive_000118 +user_000119,acct_0046,user-000119@customer-046.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000119,looker_000119,notion_000119,drive_000119 +user_000120,acct_0047,user-000120@customer-047.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000120,looker_000120,notion_000120,drive_000120 +user_000121,acct_0048,user-000121@customer-048.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000121,looker_000121,notion_000121,drive_000121 +user_000122,acct_0049,user-000122@customer-049.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000122,looker_000122,notion_000122,drive_000122 +user_000123,acct_0050,user-000123@customer-050.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000123,looker_000123,notion_000123,drive_000123 +user_000124,acct_0010,user-000124@customer-010.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000124,looker_000124,notion_000124,drive_000124 +user_000125,acct_0011,user-000125@customer-011.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000125,looker_000125,notion_000125,drive_000125 +user_000126,acct_0012,user-000126@customer-012.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000126,looker_000126,notion_000126,drive_000126 +user_000127,acct_0013,user-000127@customer-013.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000127,looker_000127,notion_000127,drive_000127 +user_000128,acct_0014,user-000128@customer-014.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000128,looker_000128,notion_000128,drive_000128 +user_000129,acct_0015,user-000129@customer-015.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000129,looker_000129,notion_000129,drive_000129 +user_000130,acct_0016,user-000130@customer-016.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000130,looker_000130,notion_000130,drive_000130 +user_000131,acct_0017,user-000131@customer-017.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000131,looker_000131,notion_000131,drive_000131 +user_000132,acct_0018,user-000132@customer-018.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000132,looker_000132,notion_000132,drive_000132 +user_000133,acct_0019,user-000133@customer-019.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000133,looker_000133,notion_000133,drive_000133 +user_000134,acct_0020,user-000134@customer-020.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000134,looker_000134,notion_000134,drive_000134 +user_000135,acct_0021,user-000135@customer-021.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000135,looker_000135,notion_000135,drive_000135 +user_000136,acct_0022,user-000136@customer-022.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000136,looker_000136,notion_000136,drive_000136 +user_000137,acct_0023,user-000137@customer-023.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000137,looker_000137,notion_000137,drive_000137 +user_000138,acct_0024,user-000138@customer-024.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000138,looker_000138,notion_000138,drive_000138 +user_000139,acct_0025,user-000139@customer-025.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000139,looker_000139,notion_000139,drive_000139 +user_000140,acct_0026,user-000140@customer-026.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000140,looker_000140,notion_000140,drive_000140 +user_000141,acct_0027,user-000141@customer-027.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000141,looker_000141,notion_000141,drive_000141 +user_000142,acct_0028,user-000142@customer-028.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000142,looker_000142,notion_000142,drive_000142 +user_000143,acct_0029,user-000143@customer-029.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000143,looker_000143,notion_000143,drive_000143 +user_000144,acct_0030,user-000144@customer-030.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000144,looker_000144,notion_000144,drive_000144 +user_000145,acct_0031,user-000145@customer-031.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000145,looker_000145,notion_000145,drive_000145 +user_000146,acct_0032,user-000146@customer-032.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000146,looker_000146,notion_000146,drive_000146 +user_000147,acct_0033,user-000147@customer-033.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000147,looker_000147,notion_000147,drive_000147 +user_000148,acct_0034,user-000148@customer-034.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000148,looker_000148,notion_000148,drive_000148 +user_000149,acct_0035,user-000149@customer-035.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000149,looker_000149,notion_000149,drive_000149 +user_000150,acct_0036,user-000150@customer-036.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000150,looker_000150,notion_000150,drive_000150 +user_000151,acct_0037,user-000151@customer-037.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000151,looker_000151,notion_000151,drive_000151 +user_000152,acct_0038,user-000152@customer-038.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000152,looker_000152,notion_000152,drive_000152 +user_000153,acct_0039,user-000153@customer-039.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000153,looker_000153,notion_000153,drive_000153 +user_000154,acct_0040,user-000154@customer-040.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000154,looker_000154,notion_000154,drive_000154 +user_000155,acct_0041,user-000155@customer-041.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000155,looker_000155,notion_000155,drive_000155 +user_000156,acct_0042,user-000156@customer-042.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000156,looker_000156,notion_000156,drive_000156 +user_000157,acct_0043,user-000157@customer-043.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000157,looker_000157,notion_000157,drive_000157 +user_000158,acct_0044,user-000158@customer-044.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000158,looker_000158,notion_000158,drive_000158 +user_000159,acct_0045,user-000159@customer-045.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000159,looker_000159,notion_000159,drive_000159 +user_000160,acct_0046,user-000160@customer-046.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000160,looker_000160,notion_000160,drive_000160 +user_000161,acct_0047,user-000161@customer-047.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000161,looker_000161,notion_000161,drive_000161 +user_000162,acct_0048,user-000162@customer-048.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000162,looker_000162,notion_000162,drive_000162 +user_000163,acct_0049,user-000163@customer-049.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000163,looker_000163,notion_000163,drive_000163 +user_000164,acct_0050,user-000164@customer-050.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000164,looker_000164,notion_000164,drive_000164 +user_000165,acct_0010,user-000165@customer-010.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000165,looker_000165,notion_000165,drive_000165 +user_000166,acct_0011,user-000166@customer-011.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000166,looker_000166,notion_000166,drive_000166 +user_000167,acct_0012,user-000167@customer-012.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000167,looker_000167,notion_000167,drive_000167 +user_000168,acct_0013,user-000168@customer-013.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000168,looker_000168,notion_000168,drive_000168 +user_000169,acct_0014,user-000169@customer-014.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000169,looker_000169,notion_000169,drive_000169 +user_000170,acct_0015,user-000170@customer-015.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000170,looker_000170,notion_000170,drive_000170 +user_000171,acct_0016,user-000171@customer-016.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000171,looker_000171,notion_000171,drive_000171 +user_000172,acct_0017,user-000172@customer-017.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000172,looker_000172,notion_000172,drive_000172 +user_000173,acct_0018,user-000173@customer-018.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000173,looker_000173,notion_000173,drive_000173 +user_000174,acct_0019,user-000174@customer-019.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000174,looker_000174,notion_000174,drive_000174 +user_000175,acct_0020,user-000175@customer-020.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000175,looker_000175,notion_000175,drive_000175 +user_000176,acct_0021,user-000176@customer-021.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000176,looker_000176,notion_000176,drive_000176 +user_000177,acct_0022,user-000177@customer-022.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000177,looker_000177,notion_000177,drive_000177 +user_000178,acct_0023,user-000178@customer-023.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000178,looker_000178,notion_000178,drive_000178 +user_000179,acct_0024,user-000179@customer-024.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000179,looker_000179,notion_000179,drive_000179 +user_000180,acct_0025,user-000180@customer-025.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000180,looker_000180,notion_000180,drive_000180 +user_000181,acct_0026,user-000181@customer-026.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000181,looker_000181,notion_000181,drive_000181 +user_000182,acct_0027,user-000182@customer-027.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000182,looker_000182,notion_000182,drive_000182 +user_000183,acct_0028,user-000183@customer-028.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000183,looker_000183,notion_000183,drive_000183 +user_000184,acct_0029,user-000184@customer-029.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000184,looker_000184,notion_000184,drive_000184 +user_000185,acct_0030,user-000185@customer-030.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000185,looker_000185,notion_000185,drive_000185 +user_000186,acct_0031,user-000186@customer-031.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000186,looker_000186,notion_000186,drive_000186 +user_000187,acct_0032,user-000187@customer-032.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000187,looker_000187,notion_000187,drive_000187 +user_000188,acct_0033,user-000188@customer-033.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000188,looker_000188,notion_000188,drive_000188 +user_000189,acct_0034,user-000189@customer-034.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000189,looker_000189,notion_000189,drive_000189 +user_000190,acct_0035,user-000190@customer-035.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000190,looker_000190,notion_000190,drive_000190 +user_000191,acct_0036,user-000191@customer-036.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000191,looker_000191,notion_000191,drive_000191 +user_000192,acct_0037,user-000192@customer-037.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000192,looker_000192,notion_000192,drive_000192 +user_000193,acct_0038,user-000193@customer-038.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000193,looker_000193,notion_000193,drive_000193 +user_000194,acct_0039,user-000194@customer-039.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000194,looker_000194,notion_000194,drive_000194 +user_000195,acct_0040,user-000195@customer-040.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000195,looker_000195,notion_000195,drive_000195 +user_000196,acct_0041,user-000196@customer-041.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000196,looker_000196,notion_000196,drive_000196 +user_000197,acct_0042,user-000197@customer-042.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000197,looker_000197,notion_000197,drive_000197 +user_000198,acct_0043,user-000198@customer-043.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000198,looker_000198,notion_000198,drive_000198 +user_000199,acct_0044,user-000199@customer-044.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000199,looker_000199,notion_000199,drive_000199 +user_000200,acct_0045,user-000200@customer-045.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000200,looker_000200,notion_000200,drive_000200 +user_000201,acct_0046,user-000201@customer-046.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000201,looker_000201,notion_000201,drive_000201 +user_000202,acct_0047,user-000202@customer-047.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000202,looker_000202,notion_000202,drive_000202 +user_000203,acct_0048,user-000203@customer-048.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000203,looker_000203,notion_000203,drive_000203 +user_000204,acct_0049,user-000204@customer-049.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000204,looker_000204,notion_000204,drive_000204 +user_000205,acct_0050,user-000205@customer-050.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000205,looker_000205,notion_000205,drive_000205 +user_000206,acct_0010,user-000206@customer-010.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000206,looker_000206,notion_000206,drive_000206 +user_000207,acct_0011,user-000207@customer-011.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000207,looker_000207,notion_000207,drive_000207 +user_000208,acct_0012,user-000208@customer-012.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000208,looker_000208,notion_000208,drive_000208 +user_000209,acct_0013,user-000209@customer-013.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000209,looker_000209,notion_000209,drive_000209 +user_000210,acct_0014,user-000210@customer-014.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000210,looker_000210,notion_000210,drive_000210 +user_000211,acct_0015,user-000211@customer-015.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000211,looker_000211,notion_000211,drive_000211 +user_000212,acct_0016,user-000212@customer-016.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000212,looker_000212,notion_000212,drive_000212 +user_000213,acct_0017,user-000213@customer-017.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000213,looker_000213,notion_000213,drive_000213 +user_000214,acct_0018,user-000214@customer-018.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000214,looker_000214,notion_000214,drive_000214 +user_000215,acct_0019,user-000215@customer-019.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000215,looker_000215,notion_000215,drive_000215 +user_000216,acct_0020,user-000216@customer-020.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000216,looker_000216,notion_000216,drive_000216 +user_000217,acct_0021,user-000217@customer-021.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000217,looker_000217,notion_000217,drive_000217 +user_000218,acct_0022,user-000218@customer-022.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000218,looker_000218,notion_000218,drive_000218 +user_000219,acct_0023,user-000219@customer-023.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000219,looker_000219,notion_000219,drive_000219 +user_000220,acct_0024,user-000220@customer-024.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000220,looker_000220,notion_000220,drive_000220 +user_000221,acct_0025,user-000221@customer-025.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000221,looker_000221,notion_000221,drive_000221 +user_000222,acct_0026,user-000222@customer-026.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000222,looker_000222,notion_000222,drive_000222 +user_000223,acct_0027,user-000223@customer-027.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000223,looker_000223,notion_000223,drive_000223 +user_000224,acct_0028,user-000224@customer-028.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000224,looker_000224,notion_000224,drive_000224 +user_000225,acct_0029,user-000225@customer-029.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000225,looker_000225,notion_000225,drive_000225 +user_000226,acct_0030,user-000226@customer-030.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000226,looker_000226,notion_000226,drive_000226 +user_000227,acct_0031,user-000227@customer-031.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000227,looker_000227,notion_000227,drive_000227 +user_000228,acct_0032,user-000228@customer-032.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000228,looker_000228,notion_000228,drive_000228 +user_000229,acct_0033,user-000229@customer-033.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000229,looker_000229,notion_000229,drive_000229 +user_000230,acct_0034,user-000230@customer-034.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000230,looker_000230,notion_000230,drive_000230 +user_000231,acct_0035,user-000231@customer-035.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000231,looker_000231,notion_000231,drive_000231 +user_000232,acct_0036,user-000232@customer-036.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000232,looker_000232,notion_000232,drive_000232 +user_000233,acct_0037,user-000233@customer-037.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000233,looker_000233,notion_000233,drive_000233 +user_000234,acct_0038,user-000234@customer-038.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000234,looker_000234,notion_000234,drive_000234 +user_000235,acct_0039,user-000235@customer-039.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000235,looker_000235,notion_000235,drive_000235 +user_000236,acct_0040,user-000236@customer-040.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000236,looker_000236,notion_000236,drive_000236 +user_000237,acct_0041,user-000237@customer-041.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000237,looker_000237,notion_000237,drive_000237 +user_000238,acct_0042,user-000238@customer-042.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000238,looker_000238,notion_000238,drive_000238 +user_000239,acct_0043,user-000239@customer-043.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000239,looker_000239,notion_000239,drive_000239 +user_000240,acct_0044,user-000240@customer-044.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000240,looker_000240,notion_000240,drive_000240 +user_000241,acct_0045,user-000241@customer-045.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000241,looker_000241,notion_000241,drive_000241 +user_000242,acct_0046,user-000242@customer-046.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000242,looker_000242,notion_000242,drive_000242 +user_000243,acct_0047,user-000243@customer-047.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000243,looker_000243,notion_000243,drive_000243 +user_000244,acct_0048,user-000244@customer-048.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000244,looker_000244,notion_000244,drive_000244 +user_000245,acct_0049,user-000245@customer-049.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000245,looker_000245,notion_000245,drive_000245 +user_000246,acct_0050,user-000246@customer-050.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000246,looker_000246,notion_000246,drive_000246 +user_000247,acct_0010,user-000247@customer-010.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000247,looker_000247,notion_000247,drive_000247 +user_000248,acct_0011,user-000248@customer-011.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000248,looker_000248,notion_000248,drive_000248 +user_000249,acct_0012,user-000249@customer-012.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000249,looker_000249,notion_000249,drive_000249 +user_000250,acct_0013,user-000250@customer-013.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000250,looker_000250,notion_000250,drive_000250 +user_000251,acct_0014,user-000251@customer-014.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000251,looker_000251,notion_000251,drive_000251 +user_000252,acct_0015,user-000252@customer-015.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000252,looker_000252,notion_000252,drive_000252 +user_000253,acct_0016,user-000253@customer-016.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000253,looker_000253,notion_000253,drive_000253 +user_000254,acct_0017,user-000254@customer-017.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000254,looker_000254,notion_000254,drive_000254 +user_000255,acct_0018,user-000255@customer-018.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000255,looker_000255,notion_000255,drive_000255 +user_000256,acct_0019,user-000256@customer-019.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000256,looker_000256,notion_000256,drive_000256 +user_000257,acct_0020,user-000257@customer-020.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000257,looker_000257,notion_000257,drive_000257 +user_000258,acct_0021,user-000258@customer-021.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000258,looker_000258,notion_000258,drive_000258 +user_000259,acct_0022,user-000259@customer-022.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000259,looker_000259,notion_000259,drive_000259 +user_000260,acct_0023,user-000260@customer-023.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000260,looker_000260,notion_000260,drive_000260 +user_000261,acct_0024,user-000261@customer-024.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000261,looker_000261,notion_000261,drive_000261 +user_000262,acct_0025,user-000262@customer-025.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000262,looker_000262,notion_000262,drive_000262 +user_000263,acct_0026,user-000263@customer-026.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000263,looker_000263,notion_000263,drive_000263 +user_000264,acct_0027,user-000264@customer-027.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000264,looker_000264,notion_000264,drive_000264 +user_000265,acct_0028,user-000265@customer-028.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000265,looker_000265,notion_000265,drive_000265 +user_000266,acct_0029,user-000266@customer-029.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000266,looker_000266,notion_000266,drive_000266 +user_000267,acct_0030,user-000267@customer-030.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000267,looker_000267,notion_000267,drive_000267 +user_000268,acct_0031,user-000268@customer-031.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000268,looker_000268,notion_000268,drive_000268 +user_000269,acct_0032,user-000269@customer-032.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000269,looker_000269,notion_000269,drive_000269 +user_000270,acct_0033,user-000270@customer-033.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000270,looker_000270,notion_000270,drive_000270 +user_000271,acct_0034,user-000271@customer-034.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000271,looker_000271,notion_000271,drive_000271 +user_000272,acct_0035,user-000272@customer-035.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000272,looker_000272,notion_000272,drive_000272 +user_000273,acct_0036,user-000273@customer-036.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000273,looker_000273,notion_000273,drive_000273 +user_000274,acct_0037,user-000274@customer-037.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000274,looker_000274,notion_000274,drive_000274 +user_000275,acct_0038,user-000275@customer-038.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000275,looker_000275,notion_000275,drive_000275 +user_000276,acct_0039,user-000276@customer-039.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000276,looker_000276,notion_000276,drive_000276 +user_000277,acct_0040,user-000277@customer-040.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000277,looker_000277,notion_000277,drive_000277 +user_000278,acct_0041,user-000278@customer-041.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000278,looker_000278,notion_000278,drive_000278 +user_000279,acct_0042,user-000279@customer-042.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000279,looker_000279,notion_000279,drive_000279 +user_000280,acct_0043,user-000280@customer-043.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000280,looker_000280,notion_000280,drive_000280 +user_000281,acct_0044,user-000281@customer-044.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000281,looker_000281,notion_000281,drive_000281 +user_000282,acct_0045,user-000282@customer-045.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000282,looker_000282,notion_000282,drive_000282 +user_000283,acct_0046,user-000283@customer-046.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000283,looker_000283,notion_000283,drive_000283 +user_000284,acct_0047,user-000284@customer-047.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000284,looker_000284,notion_000284,drive_000284 +user_000285,acct_0048,user-000285@customer-048.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000285,looker_000285,notion_000285,drive_000285 +user_000286,acct_0049,user-000286@customer-049.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000286,looker_000286,notion_000286,drive_000286 +user_000287,acct_0001,user-000287@customer-001.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000287,looker_000287,notion_000287,drive_000287 +user_000288,acct_0002,user-000288@customer-002.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000288,looker_000288,notion_000288,drive_000288 +user_000289,acct_0003,user-000289@customer-003.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000289,looker_000289,notion_000289,drive_000289 +user_000290,acct_0004,user-000290@customer-004.example.com,approver,false,false,false,2025-02-10T00:00:00Z,U000290,looker_000290,notion_000290,drive_000290 +user_000291,acct_0005,user-000291@customer-005.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000291,looker_000291,notion_000291,drive_000291 +user_000292,acct_0006,user-000292@customer-006.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000292,looker_000292,notion_000292,drive_000292 +user_000293,acct_0007,user-000293@customer-007.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000293,looker_000293,notion_000293,drive_000293 +user_000294,acct_0008,user-000294@customer-008.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000294,looker_000294,notion_000294,drive_000294 +user_000295,acct_0009,user-000295@customer-009.example.com,admin,false,false,false,2025-07-15T00:00:00Z,U000295,looker_000295,notion_000295,drive_000295 +user_000296,acct_0010,user-000296@customer-010.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000296,looker_000296,notion_000296,drive_000296 +user_000297,acct_0011,user-000297@customer-011.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000297,looker_000297,notion_000297,drive_000297 +user_000298,acct_0012,user-000298@customer-012.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000298,looker_000298,notion_000298,drive_000298 +user_000299,acct_0013,user-000299@customer-013.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000299,looker_000299,notion_000299,drive_000299 +user_000300,acct_0014,user-000300@customer-014.example.com,finance,false,false,false,2025-12-20T00:00:00Z,U000300,looker_000300,notion_000300,drive_000300 +user_000301,acct_0015,user-000301@customer-015.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000301,looker_000301,notion_000301,drive_000301 +user_000302,acct_0016,user-000302@customer-016.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000302,looker_000302,notion_000302,drive_000302 +user_000303,acct_0017,user-000303@customer-017.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000303,looker_000303,notion_000303,drive_000303 +user_000304,acct_0018,user-000304@customer-018.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000304,looker_000304,notion_000304,drive_000304 +user_000305,acct_0019,user-000305@customer-019.example.com,requester,false,false,false,2025-05-25T00:00:00Z,U000305,looker_000305,notion_000305,drive_000305 +user_000306,acct_0020,user-000306@customer-020.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000306,looker_000306,notion_000306,drive_000306 +user_000307,acct_0021,user-000307@customer-021.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000307,looker_000307,notion_000307,drive_000307 +user_000308,acct_0022,user-000308@customer-022.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000308,looker_000308,notion_000308,drive_000308 +user_000309,acct_0023,user-000309@customer-023.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000309,looker_000309,notion_000309,drive_000309 +user_000310,acct_0024,user-000310@customer-024.example.com,approver,false,false,false,2025-10-02T00:00:00Z,U000310,looker_000310,notion_000310,drive_000310 +user_000311,acct_0025,user-000311@customer-025.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000311,looker_000311,notion_000311,drive_000311 +user_000312,acct_0026,user-000312@customer-026.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000312,looker_000312,notion_000312,drive_000312 +user_000313,acct_0027,user-000313@customer-027.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000313,looker_000313,notion_000313,drive_000313 +user_000314,acct_0028,user-000314@customer-028.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000314,looker_000314,notion_000314,drive_000314 +user_000315,acct_0029,user-000315@customer-029.example.com,admin,false,false,false,2025-03-07T00:00:00Z,U000315,looker_000315,notion_000315,drive_000315 +user_000316,acct_0030,user-000316@customer-030.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000316,looker_000316,notion_000316,drive_000316 +user_000317,acct_0031,user-000317@customer-031.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000317,looker_000317,notion_000317,drive_000317 +user_000318,acct_0032,user-000318@customer-032.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000318,looker_000318,notion_000318,drive_000318 +user_000319,acct_0033,user-000319@customer-033.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000319,looker_000319,notion_000319,drive_000319 +user_000320,acct_0034,user-000320@customer-034.example.com,finance,false,false,false,2025-08-12T00:00:00Z,U000320,looker_000320,notion_000320,drive_000320 +user_000321,acct_0035,user-000321@customer-035.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000321,looker_000321,notion_000321,drive_000321 +user_000322,acct_0036,user-000322@customer-036.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000322,looker_000322,notion_000322,drive_000322 +user_000323,acct_0037,user-000323@customer-037.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000323,looker_000323,notion_000323,drive_000323 +user_000324,acct_0038,user-000324@customer-038.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000324,looker_000324,notion_000324,drive_000324 +user_000325,acct_0039,user-000325@customer-039.example.com,requester,false,false,false,2025-01-17T00:00:00Z,U000325,looker_000325,notion_000325,drive_000325 +user_000326,acct_0040,user-000326@customer-040.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000326,looker_000326,notion_000326,drive_000326 +user_000327,acct_0041,user-000327@customer-041.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000327,looker_000327,notion_000327,drive_000327 +user_000328,acct_0042,user-000328@customer-042.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000328,looker_000328,notion_000328,drive_000328 +user_000329,acct_0043,user-000329@customer-043.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000329,looker_000329,notion_000329,drive_000329 +user_000330,acct_0044,user-000330@customer-044.example.com,approver,false,false,false,2025-06-22T00:00:00Z,U000330,looker_000330,notion_000330,drive_000330 +user_000331,acct_0045,user-000331@customer-045.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000331,looker_000331,notion_000331,drive_000331 +user_000332,acct_0046,user-000332@customer-046.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000332,looker_000332,notion_000332,drive_000332 +user_000333,acct_0047,user-000333@customer-047.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000333,looker_000333,notion_000333,drive_000333 +user_000334,acct_0048,user-000334@customer-048.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000334,looker_000334,notion_000334,drive_000334 +user_000335,acct_0049,user-000335@customer-049.example.com,admin,false,false,false,2025-11-27T00:00:00Z,U000335,looker_000335,notion_000335,drive_000335 +user_000336,acct_0050,user-000336@customer-050.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000336,looker_000336,notion_000336,drive_000336 +user_000337,acct_0051,user-000337@customer-051.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000337,looker_000337,notion_000337,drive_000337 +user_000338,acct_0052,user-000338@customer-052.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000338,looker_000338,notion_000338,drive_000338 +user_000339,acct_0053,user-000339@customer-053.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000339,looker_000339,notion_000339,drive_000339 +user_000340,acct_0054,user-000340@customer-054.example.com,finance,false,false,false,2025-04-04T00:00:00Z,U000340,looker_000340,notion_000340,drive_000340 +user_000341,acct_0055,user-000341@customer-055.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000341,looker_000341,notion_000341,drive_000341 +user_000342,acct_0056,user-000342@customer-056.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000342,looker_000342,notion_000342,drive_000342 +user_000343,acct_0057,user-000343@customer-057.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000343,looker_000343,notion_000343,drive_000343 +user_000344,acct_0058,user-000344@customer-058.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000344,looker_000344,notion_000344,drive_000344 +user_000345,acct_0059,user-000345@customer-059.example.com,requester,false,false,false,2025-09-09T00:00:00Z,U000345,looker_000345,notion_000345,drive_000345 +user_000346,acct_0060,user-000346@customer-060.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000346,looker_000346,notion_000346,drive_000346 +user_000347,acct_0061,user-000347@customer-061.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000347,looker_000347,notion_000347,drive_000347 +user_000348,acct_0062,user-000348@customer-062.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000348,looker_000348,notion_000348,drive_000348 +user_000349,acct_0063,user-000349@customer-063.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000349,looker_000349,notion_000349,drive_000349 +user_000350,acct_0064,user-000350@customer-064.example.com,approver,false,false,false,2025-02-14T00:00:00Z,U000350,looker_000350,notion_000350,drive_000350 +user_000351,acct_0065,user-000351@customer-065.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000351,looker_000351,notion_000351,drive_000351 +user_000352,acct_0066,user-000352@customer-066.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000352,looker_000352,notion_000352,drive_000352 +user_000353,acct_0067,user-000353@customer-067.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000353,looker_000353,notion_000353,drive_000353 +user_000354,acct_0068,user-000354@customer-068.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000354,looker_000354,notion_000354,drive_000354 +user_000355,acct_0069,user-000355@customer-069.example.com,admin,false,false,false,2025-07-19T00:00:00Z,U000355,looker_000355,notion_000355,drive_000355 +user_000356,acct_0070,user-000356@customer-070.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000356,looker_000356,notion_000356,drive_000356 +user_000357,acct_0071,user-000357@customer-071.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000357,looker_000357,notion_000357,drive_000357 +user_000358,acct_0072,user-000358@customer-072.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000358,looker_000358,notion_000358,drive_000358 +user_000359,acct_0073,user-000359@customer-073.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000359,looker_000359,notion_000359,drive_000359 +user_000360,acct_0074,user-000360@customer-074.example.com,finance,false,false,false,2025-12-24T00:00:00Z,U000360,looker_000360,notion_000360,drive_000360 +user_000361,acct_0075,user-000361@customer-075.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000361,looker_000361,notion_000361,drive_000361 +user_000362,acct_0076,user-000362@customer-076.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000362,looker_000362,notion_000362,drive_000362 +user_000363,acct_0077,user-000363@customer-077.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000363,looker_000363,notion_000363,drive_000363 +user_000364,acct_0078,user-000364@customer-078.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000364,looker_000364,notion_000364,drive_000364 +user_000365,acct_0079,user-000365@customer-079.example.com,requester,false,false,false,2025-05-01T00:00:00Z,U000365,looker_000365,notion_000365,drive_000365 +user_000366,acct_0080,user-000366@customer-080.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000366,looker_000366,notion_000366,drive_000366 +user_000367,acct_0081,user-000367@customer-081.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000367,looker_000367,notion_000367,drive_000367 +user_000368,acct_0082,user-000368@customer-082.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000368,looker_000368,notion_000368,drive_000368 +user_000369,acct_0083,user-000369@customer-083.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000369,looker_000369,notion_000369,drive_000369 +user_000370,acct_0084,user-000370@customer-084.example.com,approver,false,false,false,2025-10-06T00:00:00Z,U000370,looker_000370,notion_000370,drive_000370 +user_000371,acct_0085,user-000371@customer-085.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000371,looker_000371,notion_000371,drive_000371 +user_000372,acct_0086,user-000372@customer-086.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000372,looker_000372,notion_000372,drive_000372 +user_000373,acct_0087,user-000373@customer-087.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000373,looker_000373,notion_000373,drive_000373 +user_000374,acct_0088,user-000374@customer-088.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000374,looker_000374,notion_000374,drive_000374 +user_000375,acct_0089,user-000375@customer-089.example.com,admin,false,false,false,2025-03-11T00:00:00Z,U000375,looker_000375,notion_000375,drive_000375 +user_000376,acct_0090,user-000376@customer-090.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000376,looker_000376,notion_000376,drive_000376 +user_000377,acct_0091,user-000377@customer-091.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000377,looker_000377,notion_000377,drive_000377 +user_000378,acct_0092,user-000378@customer-092.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000378,looker_000378,notion_000378,drive_000378 +user_000379,acct_0093,user-000379@customer-093.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000379,looker_000379,notion_000379,drive_000379 +user_000380,acct_0094,user-000380@customer-094.example.com,finance,false,false,false,2025-08-16T00:00:00Z,U000380,looker_000380,notion_000380,drive_000380 +user_000381,acct_0095,user-000381@customer-095.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000381,looker_000381,notion_000381,drive_000381 +user_000382,acct_0096,user-000382@customer-096.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000382,looker_000382,notion_000382,drive_000382 +user_000383,acct_0097,user-000383@customer-097.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000383,looker_000383,notion_000383,drive_000383 +user_000384,acct_0098,user-000384@customer-098.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000384,looker_000384,notion_000384,drive_000384 +user_000385,acct_0099,user-000385@customer-099.example.com,requester,false,false,false,2025-01-21T00:00:00Z,U000385,looker_000385,notion_000385,drive_000385 +user_000386,acct_0100,user-000386@customer-100.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000386,looker_000386,notion_000386,drive_000386 +user_000387,acct_0101,user-000387@customer-101.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000387,looker_000387,notion_000387,drive_000387 +user_000388,acct_0102,user-000388@customer-102.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000388,looker_000388,notion_000388,drive_000388 +user_000389,acct_0103,user-000389@customer-103.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000389,looker_000389,notion_000389,drive_000389 +user_000390,acct_0104,user-000390@customer-104.example.com,approver,false,false,false,2025-06-26T00:00:00Z,U000390,looker_000390,notion_000390,drive_000390 +user_000391,acct_0105,user-000391@customer-105.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000391,looker_000391,notion_000391,drive_000391 +user_000392,acct_0106,user-000392@customer-106.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000392,looker_000392,notion_000392,drive_000392 +user_000393,acct_0107,user-000393@customer-107.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000393,looker_000393,notion_000393,drive_000393 +user_000394,acct_0108,user-000394@customer-108.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000394,looker_000394,notion_000394,drive_000394 +user_000395,acct_0109,user-000395@customer-109.example.com,admin,false,false,false,2025-11-03T00:00:00Z,U000395,looker_000395,notion_000395,drive_000395 +user_000396,acct_0110,user-000396@customer-110.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000396,looker_000396,notion_000396,drive_000396 +user_000397,acct_0111,user-000397@customer-111.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000397,looker_000397,notion_000397,drive_000397 +user_000398,acct_0112,user-000398@customer-112.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000398,looker_000398,notion_000398,drive_000398 +user_000399,acct_0113,user-000399@customer-113.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000399,looker_000399,notion_000399,drive_000399 +user_000400,acct_0114,user-000400@customer-114.example.com,finance,false,false,false,2025-04-08T00:00:00Z,U000400,looker_000400,notion_000400,drive_000400 +user_000401,acct_0115,user-000401@customer-115.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000401,looker_000401,notion_000401,drive_000401 +user_000402,acct_0116,user-000402@customer-116.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000402,looker_000402,notion_000402,drive_000402 +user_000403,acct_0117,user-000403@customer-117.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000403,looker_000403,notion_000403,drive_000403 +user_000404,acct_0118,user-000404@customer-118.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000404,looker_000404,notion_000404,drive_000404 +user_000405,acct_0119,user-000405@customer-119.example.com,requester,false,false,false,2025-09-13T00:00:00Z,U000405,looker_000405,notion_000405,drive_000405 +user_000406,acct_0120,user-000406@customer-120.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000406,looker_000406,notion_000406,drive_000406 +user_000407,acct_0121,user-000407@customer-121.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000407,looker_000407,notion_000407,drive_000407 +user_000408,acct_0122,user-000408@customer-122.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000408,looker_000408,notion_000408,drive_000408 +user_000409,acct_0123,user-000409@customer-123.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000409,looker_000409,notion_000409,drive_000409 +user_000410,acct_0124,user-000410@customer-124.example.com,approver,false,false,false,2025-02-18T00:00:00Z,U000410,looker_000410,notion_000410,drive_000410 +user_000411,acct_0125,user-000411@customer-125.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000411,looker_000411,notion_000411,drive_000411 +user_000412,acct_0126,user-000412@customer-126.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000412,looker_000412,notion_000412,drive_000412 +user_000413,acct_0127,user-000413@customer-127.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000413,looker_000413,notion_000413,drive_000413 +user_000414,acct_0128,user-000414@customer-128.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000414,looker_000414,notion_000414,drive_000414 +user_000415,acct_0129,user-000415@customer-129.example.com,admin,false,false,false,2025-07-23T00:00:00Z,U000415,looker_000415,notion_000415,drive_000415 +user_000416,acct_0130,user-000416@customer-130.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000416,looker_000416,notion_000416,drive_000416 +user_000417,acct_0131,user-000417@customer-131.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000417,looker_000417,notion_000417,drive_000417 +user_000418,acct_0132,user-000418@customer-132.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000418,looker_000418,notion_000418,drive_000418 +user_000419,acct_0133,user-000419@customer-133.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000419,looker_000419,notion_000419,drive_000419 +user_000420,acct_0134,user-000420@customer-134.example.com,finance,false,false,false,2025-12-28T00:00:00Z,U000420,looker_000420,notion_000420,drive_000420 +user_000421,acct_0135,user-000421@customer-135.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000421,looker_000421,notion_000421,drive_000421 +user_000422,acct_0136,user-000422@customer-136.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000422,looker_000422,notion_000422,drive_000422 +user_000423,acct_0137,user-000423@customer-137.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000423,looker_000423,notion_000423,drive_000423 +user_000424,acct_0138,user-000424@customer-138.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000424,looker_000424,notion_000424,drive_000424 +user_000425,acct_0139,user-000425@customer-139.example.com,requester,false,false,false,2025-05-05T00:00:00Z,U000425,looker_000425,notion_000425,drive_000425 +user_000426,acct_0140,user-000426@customer-140.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000426,looker_000426,notion_000426,drive_000426 +user_000427,acct_0141,user-000427@customer-141.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000427,looker_000427,notion_000427,drive_000427 +user_000428,acct_0142,user-000428@customer-142.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000428,looker_000428,notion_000428,drive_000428 +user_000429,acct_0143,user-000429@customer-143.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000429,looker_000429,notion_000429,drive_000429 +user_000430,acct_0144,user-000430@customer-144.example.com,approver,false,false,false,2025-10-10T00:00:00Z,U000430,looker_000430,notion_000430,drive_000430 +user_000431,acct_0145,user-000431@customer-145.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000431,looker_000431,notion_000431,drive_000431 +user_000432,acct_0146,user-000432@customer-146.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000432,looker_000432,notion_000432,drive_000432 +user_000433,acct_0147,user-000433@customer-147.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000433,looker_000433,notion_000433,drive_000433 +user_000434,acct_0148,user-000434@customer-148.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000434,looker_000434,notion_000434,drive_000434 +user_000435,acct_0149,user-000435@customer-149.example.com,admin,false,false,false,2025-03-15T00:00:00Z,U000435,looker_000435,notion_000435,drive_000435 +user_000436,acct_0150,user-000436@customer-150.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000436,looker_000436,notion_000436,drive_000436 +user_000437,acct_0151,user-000437@customer-151.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000437,looker_000437,notion_000437,drive_000437 +user_000438,acct_0152,user-000438@customer-152.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000438,looker_000438,notion_000438,drive_000438 +user_000439,acct_0153,user-000439@customer-153.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000439,looker_000439,notion_000439,drive_000439 +user_000440,acct_0154,user-000440@customer-154.example.com,finance,false,false,false,2025-08-20T00:00:00Z,U000440,looker_000440,notion_000440,drive_000440 +user_000441,acct_0155,user-000441@customer-155.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000441,looker_000441,notion_000441,drive_000441 +user_000442,acct_0156,user-000442@customer-156.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000442,looker_000442,notion_000442,drive_000442 +user_000443,acct_0157,user-000443@customer-157.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000443,looker_000443,notion_000443,drive_000443 +user_000444,acct_0158,user-000444@customer-158.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000444,looker_000444,notion_000444,drive_000444 +user_000445,acct_0159,user-000445@customer-159.example.com,requester,false,false,false,2025-01-25T00:00:00Z,U000445,looker_000445,notion_000445,drive_000445 +user_000446,acct_0160,user-000446@customer-160.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000446,looker_000446,notion_000446,drive_000446 +user_000447,acct_0161,user-000447@customer-161.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000447,looker_000447,notion_000447,drive_000447 +user_000448,acct_0162,user-000448@customer-162.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000448,looker_000448,notion_000448,drive_000448 +user_000449,acct_0163,user-000449@customer-163.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000449,looker_000449,notion_000449,drive_000449 +user_000450,acct_0164,user-000450@customer-164.example.com,approver,false,false,false,2025-06-02T00:00:00Z,U000450,looker_000450,notion_000450,drive_000450 +user_000451,acct_0165,user-000451@customer-165.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000451,looker_000451,notion_000451,drive_000451 +user_000452,acct_0166,user-000452@customer-166.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000452,looker_000452,notion_000452,drive_000452 +user_000453,acct_0167,user-000453@customer-167.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000453,looker_000453,notion_000453,drive_000453 +user_000454,acct_0168,user-000454@customer-168.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000454,looker_000454,notion_000454,drive_000454 +user_000455,acct_0169,user-000455@customer-169.example.com,admin,false,false,false,2025-11-07T00:00:00Z,U000455,looker_000455,notion_000455,drive_000455 +user_000456,acct_0170,user-000456@customer-170.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000456,looker_000456,notion_000456,drive_000456 +user_000457,acct_0171,user-000457@customer-171.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000457,looker_000457,notion_000457,drive_000457 +user_000458,acct_0172,user-000458@customer-172.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000458,looker_000458,notion_000458,drive_000458 +user_000459,acct_0173,user-000459@customer-173.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000459,looker_000459,notion_000459,drive_000459 +user_000460,acct_0174,user-000460@customer-174.example.com,finance,false,false,false,2025-04-12T00:00:00Z,U000460,looker_000460,notion_000460,drive_000460 +user_000461,acct_0175,user-000461@customer-175.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000461,looker_000461,notion_000461,drive_000461 +user_000462,acct_0176,user-000462@customer-176.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000462,looker_000462,notion_000462,drive_000462 +user_000463,acct_0177,user-000463@customer-177.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000463,looker_000463,notion_000463,drive_000463 +user_000464,acct_0178,user-000464@customer-178.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000464,looker_000464,notion_000464,drive_000464 +user_000465,acct_0179,user-000465@customer-179.example.com,requester,false,false,false,2025-09-17T00:00:00Z,U000465,looker_000465,notion_000465,drive_000465 +user_000466,acct_0180,user-000466@customer-180.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000466,looker_000466,notion_000466,drive_000466 +user_000467,acct_0181,user-000467@customer-181.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000467,looker_000467,notion_000467,drive_000467 +user_000468,acct_0182,user-000468@customer-182.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000468,looker_000468,notion_000468,drive_000468 +user_000469,acct_0183,user-000469@customer-183.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000469,looker_000469,notion_000469,drive_000469 +user_000470,acct_0184,user-000470@customer-184.example.com,approver,false,false,false,2025-02-22T00:00:00Z,U000470,looker_000470,notion_000470,drive_000470 +user_000471,acct_0185,user-000471@customer-185.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000471,looker_000471,notion_000471,drive_000471 +user_000472,acct_0186,user-000472@customer-186.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000472,looker_000472,notion_000472,drive_000472 +user_000473,acct_0187,user-000473@customer-187.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000473,looker_000473,notion_000473,drive_000473 +user_000474,acct_0188,user-000474@customer-188.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000474,looker_000474,notion_000474,drive_000474 +user_000475,acct_0189,user-000475@customer-189.example.com,admin,false,false,false,2025-07-27T00:00:00Z,U000475,looker_000475,notion_000475,drive_000475 +user_000476,acct_0190,user-000476@customer-190.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000476,looker_000476,notion_000476,drive_000476 +user_000477,acct_0001,user-000477@customer-001.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000477,looker_000477,notion_000477,drive_000477 +user_000478,acct_0002,user-000478@customer-002.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000478,looker_000478,notion_000478,drive_000478 +user_000479,acct_0003,user-000479@customer-003.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000479,looker_000479,notion_000479,drive_000479 +user_000480,acct_0004,user-000480@customer-004.example.com,finance,false,false,false,2025-12-04T00:00:00Z,U000480,looker_000480,notion_000480,drive_000480 +user_000481,acct_0005,user-000481@customer-005.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000481,looker_000481,notion_000481,drive_000481 +user_000482,acct_0006,user-000482@customer-006.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000482,looker_000482,notion_000482,drive_000482 +user_000483,acct_0007,user-000483@customer-007.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000483,looker_000483,notion_000483,drive_000483 +user_000484,acct_0008,user-000484@customer-008.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000484,looker_000484,notion_000484,drive_000484 +user_000485,acct_0009,user-000485@customer-009.example.com,requester,false,false,false,2025-05-09T00:00:00Z,U000485,looker_000485,notion_000485,drive_000485 +user_000486,acct_0010,user-000486@customer-010.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000486,looker_000486,notion_000486,drive_000486 +user_000487,acct_0011,user-000487@customer-011.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000487,looker_000487,notion_000487,drive_000487 +user_000488,acct_0012,user-000488@customer-012.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000488,looker_000488,notion_000488,drive_000488 +user_000489,acct_0013,user-000489@customer-013.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000489,looker_000489,notion_000489,drive_000489 +user_000490,acct_0014,user-000490@customer-014.example.com,approver,false,false,false,2025-10-14T00:00:00Z,U000490,looker_000490,notion_000490,drive_000490 +user_000491,acct_0015,user-000491@customer-015.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000491,looker_000491,notion_000491,drive_000491 +user_000492,acct_0016,user-000492@customer-016.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000492,looker_000492,notion_000492,drive_000492 +user_000493,acct_0017,user-000493@customer-017.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000493,looker_000493,notion_000493,drive_000493 +user_000494,acct_0018,user-000494@customer-018.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000494,looker_000494,notion_000494,drive_000494 +user_000495,acct_0019,user-000495@customer-019.example.com,admin,false,false,false,2025-03-19T00:00:00Z,U000495,looker_000495,notion_000495,drive_000495 +user_000496,acct_0020,user-000496@customer-020.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000496,looker_000496,notion_000496,drive_000496 +user_000497,acct_0021,user-000497@customer-021.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000497,looker_000497,notion_000497,drive_000497 +user_000498,acct_0022,user-000498@customer-022.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000498,looker_000498,notion_000498,drive_000498 +user_000499,acct_0023,user-000499@customer-023.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000499,looker_000499,notion_000499,drive_000499 +user_000500,acct_0024,user-000500@customer-024.example.com,finance,false,false,false,2025-08-24T00:00:00Z,U000500,looker_000500,notion_000500,drive_000500 +user_000501,acct_0025,user-000501@customer-025.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000501,looker_000501,notion_000501,drive_000501 +user_000502,acct_0026,user-000502@customer-026.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000502,looker_000502,notion_000502,drive_000502 +user_000503,acct_0027,user-000503@customer-027.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000503,looker_000503,notion_000503,drive_000503 +user_000504,acct_0028,user-000504@customer-028.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000504,looker_000504,notion_000504,drive_000504 +user_000505,acct_0029,user-000505@customer-029.example.com,requester,false,false,false,2025-01-01T00:00:00Z,U000505,looker_000505,notion_000505,drive_000505 +user_000506,acct_0030,user-000506@customer-030.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000506,looker_000506,notion_000506,drive_000506 +user_000507,acct_0031,user-000507@customer-031.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000507,looker_000507,notion_000507,drive_000507 +user_000508,acct_0032,user-000508@customer-032.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000508,looker_000508,notion_000508,drive_000508 +user_000509,acct_0033,user-000509@customer-033.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000509,looker_000509,notion_000509,drive_000509 +user_000510,acct_0034,user-000510@customer-034.example.com,approver,false,false,false,2025-06-06T00:00:00Z,U000510,looker_000510,notion_000510,drive_000510 +user_000511,acct_0035,user-000511@customer-035.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000511,looker_000511,notion_000511,drive_000511 +user_000512,acct_0036,user-000512@customer-036.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000512,looker_000512,notion_000512,drive_000512 +user_000513,acct_0037,user-000513@customer-037.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000513,looker_000513,notion_000513,drive_000513 +user_000514,acct_0038,user-000514@customer-038.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000514,looker_000514,notion_000514,drive_000514 +user_000515,acct_0039,user-000515@customer-039.example.com,admin,false,false,false,2025-11-11T00:00:00Z,U000515,looker_000515,notion_000515,drive_000515 +user_000516,acct_0040,user-000516@customer-040.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000516,looker_000516,notion_000516,drive_000516 +user_000517,acct_0041,user-000517@customer-041.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000517,looker_000517,notion_000517,drive_000517 +user_000518,acct_0042,user-000518@customer-042.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000518,looker_000518,notion_000518,drive_000518 +user_000519,acct_0043,user-000519@customer-043.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000519,looker_000519,notion_000519,drive_000519 +user_000520,acct_0044,user-000520@customer-044.example.com,finance,false,false,false,2025-04-16T00:00:00Z,U000520,looker_000520,notion_000520,drive_000520 +user_000521,acct_0045,user-000521@customer-045.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000521,looker_000521,notion_000521,drive_000521 +user_000522,acct_0046,user-000522@customer-046.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000522,looker_000522,notion_000522,drive_000522 +user_000523,acct_0047,user-000523@customer-047.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000523,looker_000523,notion_000523,drive_000523 +user_000524,acct_0048,user-000524@customer-048.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000524,looker_000524,notion_000524,drive_000524 +user_000525,acct_0049,user-000525@customer-049.example.com,requester,false,false,false,2025-09-21T00:00:00Z,U000525,looker_000525,notion_000525,drive_000525 +user_000526,acct_0050,user-000526@customer-050.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000526,looker_000526,notion_000526,drive_000526 +user_000527,acct_0051,user-000527@customer-051.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000527,looker_000527,notion_000527,drive_000527 +user_000528,acct_0052,user-000528@customer-052.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000528,looker_000528,notion_000528,drive_000528 +user_000529,acct_0053,user-000529@customer-053.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000529,looker_000529,notion_000529,drive_000529 +user_000530,acct_0054,user-000530@customer-054.example.com,approver,false,false,false,2025-02-26T00:00:00Z,U000530,looker_000530,notion_000530,drive_000530 +user_000531,acct_0055,user-000531@customer-055.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000531,looker_000531,notion_000531,drive_000531 +user_000532,acct_0056,user-000532@customer-056.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000532,looker_000532,notion_000532,drive_000532 +user_000533,acct_0057,user-000533@customer-057.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000533,looker_000533,notion_000533,drive_000533 +user_000534,acct_0058,user-000534@customer-058.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000534,looker_000534,notion_000534,drive_000534 +user_000535,acct_0059,user-000535@customer-059.example.com,admin,false,false,false,2025-07-03T00:00:00Z,U000535,looker_000535,notion_000535,drive_000535 +user_000536,acct_0060,user-000536@customer-060.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000536,looker_000536,notion_000536,drive_000536 +user_000537,acct_0061,user-000537@customer-061.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000537,looker_000537,notion_000537,drive_000537 +user_000538,acct_0062,user-000538@customer-062.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000538,looker_000538,notion_000538,drive_000538 +user_000539,acct_0063,user-000539@customer-063.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000539,looker_000539,notion_000539,drive_000539 +user_000540,acct_0064,user-000540@customer-064.example.com,finance,false,false,false,2025-12-08T00:00:00Z,U000540,looker_000540,notion_000540,drive_000540 +user_000541,acct_0065,user-000541@customer-065.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000541,looker_000541,notion_000541,drive_000541 +user_000542,acct_0066,user-000542@customer-066.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000542,looker_000542,notion_000542,drive_000542 +user_000543,acct_0067,user-000543@customer-067.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000543,looker_000543,notion_000543,drive_000543 +user_000544,acct_0068,user-000544@customer-068.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000544,looker_000544,notion_000544,drive_000544 +user_000545,acct_0069,user-000545@customer-069.example.com,requester,false,false,false,2025-05-13T00:00:00Z,U000545,looker_000545,notion_000545,drive_000545 +user_000546,acct_0070,user-000546@customer-070.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000546,looker_000546,notion_000546,drive_000546 +user_000547,acct_0071,user-000547@customer-071.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000547,looker_000547,notion_000547,drive_000547 +user_000548,acct_0072,user-000548@customer-072.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000548,looker_000548,notion_000548,drive_000548 +user_000549,acct_0073,user-000549@customer-073.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000549,looker_000549,notion_000549,drive_000549 +user_000550,acct_0074,user-000550@customer-074.example.com,approver,false,false,false,2025-10-18T00:00:00Z,U000550,looker_000550,notion_000550,drive_000550 +user_000551,acct_0075,user-000551@customer-075.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000551,looker_000551,notion_000551,drive_000551 +user_000552,acct_0076,user-000552@customer-076.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000552,looker_000552,notion_000552,drive_000552 +user_000553,acct_0077,user-000553@customer-077.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000553,looker_000553,notion_000553,drive_000553 +user_000554,acct_0078,user-000554@customer-078.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000554,looker_000554,notion_000554,drive_000554 +user_000555,acct_0079,user-000555@customer-079.example.com,admin,false,false,false,2025-03-23T00:00:00Z,U000555,looker_000555,notion_000555,drive_000555 +user_000556,acct_0080,user-000556@customer-080.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000556,looker_000556,notion_000556,drive_000556 +user_000557,acct_0081,user-000557@customer-081.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000557,looker_000557,notion_000557,drive_000557 +user_000558,acct_0082,user-000558@customer-082.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000558,looker_000558,notion_000558,drive_000558 +user_000559,acct_0083,user-000559@customer-083.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000559,looker_000559,notion_000559,drive_000559 +user_000560,acct_0084,user-000560@customer-084.example.com,finance,false,false,false,2025-08-28T00:00:00Z,U000560,looker_000560,notion_000560,drive_000560 +user_000561,acct_0085,user-000561@customer-085.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000561,looker_000561,notion_000561,drive_000561 +user_000562,acct_0086,user-000562@customer-086.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000562,looker_000562,notion_000562,drive_000562 +user_000563,acct_0087,user-000563@customer-087.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000563,looker_000563,notion_000563,drive_000563 +user_000564,acct_0088,user-000564@customer-088.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000564,looker_000564,notion_000564,drive_000564 +user_000565,acct_0089,user-000565@customer-089.example.com,requester,false,false,false,2025-01-05T00:00:00Z,U000565,looker_000565,notion_000565,drive_000565 +user_000566,acct_0090,user-000566@customer-090.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000566,looker_000566,notion_000566,drive_000566 +user_000567,acct_0091,user-000567@customer-091.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000567,looker_000567,notion_000567,drive_000567 +user_000568,acct_0092,user-000568@customer-092.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000568,looker_000568,notion_000568,drive_000568 +user_000569,acct_0093,user-000569@customer-093.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000569,looker_000569,notion_000569,drive_000569 +user_000570,acct_0094,user-000570@customer-094.example.com,approver,false,false,false,2025-06-10T00:00:00Z,U000570,looker_000570,notion_000570,drive_000570 +user_000571,acct_0095,user-000571@customer-095.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000571,looker_000571,notion_000571,drive_000571 +user_000572,acct_0096,user-000572@customer-096.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000572,looker_000572,notion_000572,drive_000572 +user_000573,acct_0097,user-000573@customer-097.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000573,looker_000573,notion_000573,drive_000573 +user_000574,acct_0098,user-000574@customer-098.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000574,looker_000574,notion_000574,drive_000574 +user_000575,acct_0099,user-000575@customer-099.example.com,admin,false,false,false,2025-11-15T00:00:00Z,U000575,looker_000575,notion_000575,drive_000575 +user_000576,acct_0100,user-000576@customer-100.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000576,looker_000576,notion_000576,drive_000576 +user_000577,acct_0101,user-000577@customer-101.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000577,looker_000577,notion_000577,drive_000577 +user_000578,acct_0102,user-000578@customer-102.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000578,looker_000578,notion_000578,drive_000578 +user_000579,acct_0103,user-000579@customer-103.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000579,looker_000579,notion_000579,drive_000579 +user_000580,acct_0104,user-000580@customer-104.example.com,finance,false,false,false,2025-04-20T00:00:00Z,U000580,looker_000580,notion_000580,drive_000580 +user_000581,acct_0105,user-000581@customer-105.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000581,looker_000581,notion_000581,drive_000581 +user_000582,acct_0106,user-000582@customer-106.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000582,looker_000582,notion_000582,drive_000582 +user_000583,acct_0107,user-000583@customer-107.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000583,looker_000583,notion_000583,drive_000583 +user_000584,acct_0108,user-000584@customer-108.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000584,looker_000584,notion_000584,drive_000584 +user_000585,acct_0109,user-000585@customer-109.example.com,requester,false,false,false,2025-09-25T00:00:00Z,U000585,looker_000585,notion_000585,drive_000585 +user_000586,acct_0110,user-000586@customer-110.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000586,looker_000586,notion_000586,drive_000586 +user_000587,acct_0111,user-000587@customer-111.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000587,looker_000587,notion_000587,drive_000587 +user_000588,acct_0112,user-000588@customer-112.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000588,looker_000588,notion_000588,drive_000588 +user_000589,acct_0113,user-000589@customer-113.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000589,looker_000589,notion_000589,drive_000589 +user_000590,acct_0114,user-000590@customer-114.example.com,approver,false,false,false,2025-02-02T00:00:00Z,U000590,looker_000590,notion_000590,drive_000590 +user_000591,acct_0115,user-000591@customer-115.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000591,looker_000591,notion_000591,drive_000591 +user_000592,acct_0116,user-000592@customer-116.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000592,looker_000592,notion_000592,drive_000592 +user_000593,acct_0117,user-000593@customer-117.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000593,looker_000593,notion_000593,drive_000593 +user_000594,acct_0118,user-000594@customer-118.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000594,looker_000594,notion_000594,drive_000594 +user_000595,acct_0119,user-000595@customer-119.example.com,admin,false,false,false,2025-07-07T00:00:00Z,U000595,looker_000595,notion_000595,drive_000595 +user_000596,acct_0120,user-000596@customer-120.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000596,looker_000596,notion_000596,drive_000596 +user_000597,acct_0121,user-000597@customer-121.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000597,looker_000597,notion_000597,drive_000597 +user_000598,acct_0122,user-000598@customer-122.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000598,looker_000598,notion_000598,drive_000598 +user_000599,acct_0123,user-000599@customer-123.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000599,looker_000599,notion_000599,drive_000599 +user_000600,acct_0124,user-000600@customer-124.example.com,finance,false,false,false,2025-12-12T00:00:00Z,U000600,looker_000600,notion_000600,drive_000600 +user_000601,acct_0125,user-000601@customer-125.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000601,looker_000601,notion_000601,drive_000601 +user_000602,acct_0126,user-000602@customer-126.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000602,looker_000602,notion_000602,drive_000602 +user_000603,acct_0127,user-000603@customer-127.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000603,looker_000603,notion_000603,drive_000603 +user_000604,acct_0128,user-000604@customer-128.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000604,looker_000604,notion_000604,drive_000604 +user_000605,acct_0129,user-000605@customer-129.example.com,requester,false,false,false,2025-05-17T00:00:00Z,U000605,looker_000605,notion_000605,drive_000605 +user_000606,acct_0130,user-000606@customer-130.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000606,looker_000606,notion_000606,drive_000606 +user_000607,acct_0131,user-000607@customer-131.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000607,looker_000607,notion_000607,drive_000607 +user_000608,acct_0132,user-000608@customer-132.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000608,looker_000608,notion_000608,drive_000608 +user_000609,acct_0133,user-000609@customer-133.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000609,looker_000609,notion_000609,drive_000609 +user_000610,acct_0134,user-000610@customer-134.example.com,approver,false,false,false,2025-10-22T00:00:00Z,U000610,looker_000610,notion_000610,drive_000610 +user_000611,acct_0135,user-000611@customer-135.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000611,looker_000611,notion_000611,drive_000611 +user_000612,acct_0136,user-000612@customer-136.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000612,looker_000612,notion_000612,drive_000612 +user_000613,acct_0137,user-000613@customer-137.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000613,looker_000613,notion_000613,drive_000613 +user_000614,acct_0138,user-000614@customer-138.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000614,looker_000614,notion_000614,drive_000614 +user_000615,acct_0139,user-000615@customer-139.example.com,admin,false,false,false,2025-03-27T00:00:00Z,U000615,looker_000615,notion_000615,drive_000615 +user_000616,acct_0140,user-000616@customer-140.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000616,looker_000616,notion_000616,drive_000616 +user_000617,acct_0141,user-000617@customer-141.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000617,looker_000617,notion_000617,drive_000617 +user_000618,acct_0142,user-000618@customer-142.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000618,looker_000618,notion_000618,drive_000618 +user_000619,acct_0143,user-000619@customer-143.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000619,looker_000619,notion_000619,drive_000619 +user_000620,acct_0144,user-000620@customer-144.example.com,finance,false,false,false,2025-08-04T00:00:00Z,U000620,looker_000620,notion_000620,drive_000620 +user_000621,acct_0145,user-000621@customer-145.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000621,looker_000621,notion_000621,drive_000621 +user_000622,acct_0146,user-000622@customer-146.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000622,looker_000622,notion_000622,drive_000622 +user_000623,acct_0147,user-000623@customer-147.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000623,looker_000623,notion_000623,drive_000623 +user_000624,acct_0148,user-000624@customer-148.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000624,looker_000624,notion_000624,drive_000624 +user_000625,acct_0149,user-000625@customer-149.example.com,requester,false,false,false,2025-01-09T00:00:00Z,U000625,looker_000625,notion_000625,drive_000625 +user_000626,acct_0150,user-000626@customer-150.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000626,looker_000626,notion_000626,drive_000626 +user_000627,acct_0151,user-000627@customer-151.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000627,looker_000627,notion_000627,drive_000627 +user_000628,acct_0152,user-000628@customer-152.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000628,looker_000628,notion_000628,drive_000628 +user_000629,acct_0153,user-000629@customer-153.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000629,looker_000629,notion_000629,drive_000629 +user_000630,acct_0154,user-000630@customer-154.example.com,approver,false,false,false,2025-06-14T00:00:00Z,U000630,looker_000630,notion_000630,drive_000630 +user_000631,acct_0155,user-000631@customer-155.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000631,looker_000631,notion_000631,drive_000631 +user_000632,acct_0156,user-000632@customer-156.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000632,looker_000632,notion_000632,drive_000632 +user_000633,acct_0157,user-000633@customer-157.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000633,looker_000633,notion_000633,drive_000633 +user_000634,acct_0158,user-000634@customer-158.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000634,looker_000634,notion_000634,drive_000634 +user_000635,acct_0159,user-000635@customer-159.example.com,admin,false,false,false,2025-11-19T00:00:00Z,U000635,looker_000635,notion_000635,drive_000635 +user_000636,acct_0160,user-000636@customer-160.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000636,looker_000636,notion_000636,drive_000636 +user_000637,acct_0161,user-000637@customer-161.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000637,looker_000637,notion_000637,drive_000637 +user_000638,acct_0162,user-000638@customer-162.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000638,looker_000638,notion_000638,drive_000638 +user_000639,acct_0163,user-000639@customer-163.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000639,looker_000639,notion_000639,drive_000639 +user_000640,acct_0164,user-000640@customer-164.example.com,finance,false,false,false,2025-04-24T00:00:00Z,U000640,looker_000640,notion_000640,drive_000640 +user_000641,acct_0165,user-000641@customer-165.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000641,looker_000641,notion_000641,drive_000641 +user_000642,acct_0166,user-000642@customer-166.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000642,looker_000642,notion_000642,drive_000642 +user_000643,acct_0167,user-000643@customer-167.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000643,looker_000643,notion_000643,drive_000643 +user_000644,acct_0168,user-000644@customer-168.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000644,looker_000644,notion_000644,drive_000644 +user_000645,acct_0169,user-000645@customer-169.example.com,requester,false,false,false,2025-09-01T00:00:00Z,U000645,looker_000645,notion_000645,drive_000645 +user_000646,acct_0170,user-000646@customer-170.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000646,looker_000646,notion_000646,drive_000646 +user_000647,acct_0171,user-000647@customer-171.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000647,looker_000647,notion_000647,drive_000647 +user_000648,acct_0172,user-000648@customer-172.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000648,looker_000648,notion_000648,drive_000648 +user_000649,acct_0173,user-000649@customer-173.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000649,looker_000649,notion_000649,drive_000649 +user_000650,acct_0174,user-000650@customer-174.example.com,approver,false,false,false,2025-02-06T00:00:00Z,U000650,looker_000650,notion_000650,drive_000650 +user_000651,acct_0175,user-000651@customer-175.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000651,looker_000651,notion_000651,drive_000651 +user_000652,acct_0176,user-000652@customer-176.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000652,looker_000652,notion_000652,drive_000652 +user_000653,acct_0177,user-000653@customer-177.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000653,looker_000653,notion_000653,drive_000653 +user_000654,acct_0178,user-000654@customer-178.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000654,looker_000654,notion_000654,drive_000654 +user_000655,acct_0179,user-000655@customer-179.example.com,admin,false,false,false,2025-07-11T00:00:00Z,U000655,looker_000655,notion_000655,drive_000655 +user_000656,acct_0180,user-000656@customer-180.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000656,looker_000656,notion_000656,drive_000656 +user_000657,acct_0181,user-000657@customer-181.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000657,looker_000657,notion_000657,drive_000657 +user_000658,acct_0182,user-000658@customer-182.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000658,looker_000658,notion_000658,drive_000658 +user_000659,acct_0183,user-000659@customer-183.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000659,looker_000659,notion_000659,drive_000659 +user_000660,acct_0184,user-000660@customer-184.example.com,finance,false,false,false,2025-12-16T00:00:00Z,U000660,looker_000660,notion_000660,drive_000660 +user_000661,acct_0185,user-000661@customer-185.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000661,looker_000661,notion_000661,drive_000661 +user_000662,acct_0186,user-000662@customer-186.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000662,looker_000662,notion_000662,drive_000662 +user_000663,acct_0187,user-000663@customer-187.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000663,looker_000663,notion_000663,drive_000663 +user_000664,acct_0188,user-000664@customer-188.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000664,looker_000664,notion_000664,drive_000664 +user_000665,acct_0189,user-000665@customer-189.example.com,requester,false,false,false,2025-05-21T00:00:00Z,U000665,looker_000665,notion_000665,drive_000665 +user_000666,acct_0190,user-000666@customer-190.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000666,looker_000666,notion_000666,drive_000666 +user_000667,acct_0001,user-000667@customer-001.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000667,looker_000667,notion_000667,drive_000667 +user_000668,acct_0002,user-000668@customer-002.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000668,looker_000668,notion_000668,drive_000668 +user_000669,acct_0003,user-000669@customer-003.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000669,looker_000669,notion_000669,drive_000669 +user_000670,acct_0004,user-000670@customer-004.example.com,approver,false,false,false,2025-10-26T00:00:00Z,U000670,looker_000670,notion_000670,drive_000670 +user_000671,acct_0005,user-000671@customer-005.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000671,looker_000671,notion_000671,drive_000671 +user_000672,acct_0006,user-000672@customer-006.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000672,looker_000672,notion_000672,drive_000672 +user_000673,acct_0007,user-000673@customer-007.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000673,looker_000673,notion_000673,drive_000673 +user_000674,acct_0008,user-000674@customer-008.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000674,looker_000674,notion_000674,drive_000674 +user_000675,acct_0009,user-000675@customer-009.example.com,admin,false,false,false,2025-03-03T00:00:00Z,U000675,looker_000675,notion_000675,drive_000675 +user_000676,acct_0010,user-000676@customer-010.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000676,looker_000676,notion_000676,drive_000676 +user_000677,acct_0011,user-000677@customer-011.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000677,looker_000677,notion_000677,drive_000677 +user_000678,acct_0012,user-000678@customer-012.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000678,looker_000678,notion_000678,drive_000678 +user_000679,acct_0013,user-000679@customer-013.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000679,looker_000679,notion_000679,drive_000679 +user_000680,acct_0014,user-000680@customer-014.example.com,finance,false,false,false,2025-08-08T00:00:00Z,U000680,looker_000680,notion_000680,drive_000680 +user_000681,acct_0015,user-000681@customer-015.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000681,looker_000681,notion_000681,drive_000681 +user_000682,acct_0016,user-000682@customer-016.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000682,looker_000682,notion_000682,drive_000682 +user_000683,acct_0017,user-000683@customer-017.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000683,looker_000683,notion_000683,drive_000683 +user_000684,acct_0018,user-000684@customer-018.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000684,looker_000684,notion_000684,drive_000684 +user_000685,acct_0019,user-000685@customer-019.example.com,requester,false,false,false,2025-01-13T00:00:00Z,U000685,looker_000685,notion_000685,drive_000685 +user_000686,acct_0020,user-000686@customer-020.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000686,looker_000686,notion_000686,drive_000686 +user_000687,acct_0021,user-000687@customer-021.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000687,looker_000687,notion_000687,drive_000687 +user_000688,acct_0022,user-000688@customer-022.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000688,looker_000688,notion_000688,drive_000688 +user_000689,acct_0023,user-000689@customer-023.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000689,looker_000689,notion_000689,drive_000689 +user_000690,acct_0024,user-000690@customer-024.example.com,approver,false,false,false,2025-06-18T00:00:00Z,U000690,looker_000690,notion_000690,drive_000690 +user_000691,acct_0025,user-000691@customer-025.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000691,looker_000691,notion_000691,drive_000691 +user_000692,acct_0026,user-000692@customer-026.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000692,looker_000692,notion_000692,drive_000692 +user_000693,acct_0027,user-000693@customer-027.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000693,looker_000693,notion_000693,drive_000693 +user_000694,acct_0028,user-000694@customer-028.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000694,looker_000694,notion_000694,drive_000694 +user_000695,acct_0029,user-000695@customer-029.example.com,admin,false,false,false,2025-11-23T00:00:00Z,U000695,looker_000695,notion_000695,drive_000695 +user_000696,acct_0030,user-000696@customer-030.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000696,looker_000696,notion_000696,drive_000696 +user_000697,acct_0031,user-000697@customer-031.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000697,looker_000697,notion_000697,drive_000697 +user_000698,acct_0032,user-000698@customer-032.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000698,looker_000698,notion_000698,drive_000698 +user_000699,acct_0033,user-000699@customer-033.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000699,looker_000699,notion_000699,drive_000699 +user_000700,acct_0034,user-000700@customer-034.example.com,finance,false,false,false,2025-04-28T00:00:00Z,U000700,looker_000700,notion_000700,drive_000700 +user_000701,acct_0035,user-000701@customer-035.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000701,looker_000701,notion_000701,drive_000701 +user_000702,acct_0036,user-000702@customer-036.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000702,looker_000702,notion_000702,drive_000702 +user_000703,acct_0037,user-000703@customer-037.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000703,looker_000703,notion_000703,drive_000703 +user_000704,acct_0038,user-000704@customer-038.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000704,looker_000704,notion_000704,drive_000704 +user_000705,acct_0039,user-000705@customer-039.example.com,requester,false,false,false,2025-09-05T00:00:00Z,U000705,looker_000705,notion_000705,drive_000705 +user_000706,acct_0040,user-000706@customer-040.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000706,looker_000706,notion_000706,drive_000706 +user_000707,acct_0041,user-000707@customer-041.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000707,looker_000707,notion_000707,drive_000707 +user_000708,acct_0042,user-000708@customer-042.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000708,looker_000708,notion_000708,drive_000708 +user_000709,acct_0043,user-000709@customer-043.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000709,looker_000709,notion_000709,drive_000709 +user_000710,acct_0044,user-000710@customer-044.example.com,approver,false,false,false,2025-02-10T00:00:00Z,U000710,looker_000710,notion_000710,drive_000710 +user_000711,acct_0045,user-000711@customer-045.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000711,looker_000711,notion_000711,drive_000711 +user_000712,acct_0046,user-000712@customer-046.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000712,looker_000712,notion_000712,drive_000712 +user_000713,acct_0047,user-000713@customer-047.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000713,looker_000713,notion_000713,drive_000713 +user_000714,acct_0048,user-000714@customer-048.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000714,looker_000714,notion_000714,drive_000714 +user_000715,acct_0049,user-000715@customer-049.example.com,admin,false,false,false,2025-07-15T00:00:00Z,U000715,looker_000715,notion_000715,drive_000715 +user_000716,acct_0050,user-000716@customer-050.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000716,looker_000716,notion_000716,drive_000716 +user_000717,acct_0051,user-000717@customer-051.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000717,looker_000717,notion_000717,drive_000717 +user_000718,acct_0052,user-000718@customer-052.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000718,looker_000718,notion_000718,drive_000718 +user_000719,acct_0053,user-000719@customer-053.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000719,looker_000719,notion_000719,drive_000719 +user_000720,acct_0054,user-000720@customer-054.example.com,finance,false,false,false,2025-12-20T00:00:00Z,U000720,looker_000720,notion_000720,drive_000720 +user_000721,acct_0055,user-000721@customer-055.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000721,looker_000721,notion_000721,drive_000721 +user_000722,acct_0056,user-000722@customer-056.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000722,looker_000722,notion_000722,drive_000722 +user_000723,acct_0057,user-000723@customer-057.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000723,looker_000723,notion_000723,drive_000723 +user_000724,acct_0058,user-000724@customer-058.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000724,looker_000724,notion_000724,drive_000724 +user_000725,acct_0059,user-000725@customer-059.example.com,requester,false,false,false,2025-05-25T00:00:00Z,U000725,looker_000725,notion_000725,drive_000725 +user_000726,acct_0060,user-000726@customer-060.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000726,looker_000726,notion_000726,drive_000726 +user_000727,acct_0061,user-000727@customer-061.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000727,looker_000727,notion_000727,drive_000727 +user_000728,acct_0062,user-000728@customer-062.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000728,looker_000728,notion_000728,drive_000728 +user_000729,acct_0063,user-000729@customer-063.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000729,looker_000729,notion_000729,drive_000729 +user_000730,acct_0064,user-000730@customer-064.example.com,approver,false,false,false,2025-10-02T00:00:00Z,U000730,looker_000730,notion_000730,drive_000730 +user_000731,acct_0065,user-000731@customer-065.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000731,looker_000731,notion_000731,drive_000731 +user_000732,acct_0066,user-000732@customer-066.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000732,looker_000732,notion_000732,drive_000732 +user_000733,acct_0067,user-000733@customer-067.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000733,looker_000733,notion_000733,drive_000733 +user_000734,acct_0068,user-000734@customer-068.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000734,looker_000734,notion_000734,drive_000734 +user_000735,acct_0069,user-000735@customer-069.example.com,admin,false,false,false,2025-03-07T00:00:00Z,U000735,looker_000735,notion_000735,drive_000735 +user_000736,acct_0070,user-000736@customer-070.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000736,looker_000736,notion_000736,drive_000736 +user_000737,acct_0071,user-000737@customer-071.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000737,looker_000737,notion_000737,drive_000737 +user_000738,acct_0072,user-000738@customer-072.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000738,looker_000738,notion_000738,drive_000738 +user_000739,acct_0073,user-000739@customer-073.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000739,looker_000739,notion_000739,drive_000739 +user_000740,acct_0074,user-000740@customer-074.example.com,finance,false,false,false,2025-08-12T00:00:00Z,U000740,looker_000740,notion_000740,drive_000740 +user_000741,acct_0075,user-000741@customer-075.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000741,looker_000741,notion_000741,drive_000741 +user_000742,acct_0076,user-000742@customer-076.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000742,looker_000742,notion_000742,drive_000742 +user_000743,acct_0077,user-000743@customer-077.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000743,looker_000743,notion_000743,drive_000743 +user_000744,acct_0078,user-000744@customer-078.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000744,looker_000744,notion_000744,drive_000744 +user_000745,acct_0079,user-000745@customer-079.example.com,requester,false,false,false,2025-01-17T00:00:00Z,U000745,looker_000745,notion_000745,drive_000745 +user_000746,acct_0080,user-000746@customer-080.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000746,looker_000746,notion_000746,drive_000746 +user_000747,acct_0081,user-000747@customer-081.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000747,looker_000747,notion_000747,drive_000747 +user_000748,acct_0082,user-000748@customer-082.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000748,looker_000748,notion_000748,drive_000748 +user_000749,acct_0083,user-000749@customer-083.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000749,looker_000749,notion_000749,drive_000749 +user_000750,acct_0084,user-000750@customer-084.example.com,approver,false,false,false,2025-06-22T00:00:00Z,U000750,looker_000750,notion_000750,drive_000750 +user_000751,acct_0085,user-000751@customer-085.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000751,looker_000751,notion_000751,drive_000751 +user_000752,acct_0086,user-000752@customer-086.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000752,looker_000752,notion_000752,drive_000752 +user_000753,acct_0087,user-000753@customer-087.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000753,looker_000753,notion_000753,drive_000753 +user_000754,acct_0088,user-000754@customer-088.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000754,looker_000754,notion_000754,drive_000754 +user_000755,acct_0089,user-000755@customer-089.example.com,admin,false,false,false,2025-11-27T00:00:00Z,U000755,looker_000755,notion_000755,drive_000755 +user_000756,acct_0090,user-000756@customer-090.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000756,looker_000756,notion_000756,drive_000756 +user_000757,acct_0091,user-000757@customer-091.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000757,looker_000757,notion_000757,drive_000757 +user_000758,acct_0092,user-000758@customer-092.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000758,looker_000758,notion_000758,drive_000758 +user_000759,acct_0093,user-000759@customer-093.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000759,looker_000759,notion_000759,drive_000759 +user_000760,acct_0094,user-000760@customer-094.example.com,finance,false,false,false,2025-04-04T00:00:00Z,U000760,looker_000760,notion_000760,drive_000760 +user_000761,acct_0095,user-000761@customer-095.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000761,looker_000761,notion_000761,drive_000761 +user_000762,acct_0096,user-000762@customer-096.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000762,looker_000762,notion_000762,drive_000762 +user_000763,acct_0097,user-000763@customer-097.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000763,looker_000763,notion_000763,drive_000763 +user_000764,acct_0098,user-000764@customer-098.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000764,looker_000764,notion_000764,drive_000764 +user_000765,acct_0099,user-000765@customer-099.example.com,requester,false,false,false,2025-09-09T00:00:00Z,U000765,looker_000765,notion_000765,drive_000765 +user_000766,acct_0100,user-000766@customer-100.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000766,looker_000766,notion_000766,drive_000766 +user_000767,acct_0101,user-000767@customer-101.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000767,looker_000767,notion_000767,drive_000767 +user_000768,acct_0102,user-000768@customer-102.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000768,looker_000768,notion_000768,drive_000768 +user_000769,acct_0103,user-000769@customer-103.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000769,looker_000769,notion_000769,drive_000769 +user_000770,acct_0104,user-000770@customer-104.example.com,approver,false,false,false,2025-02-14T00:00:00Z,U000770,looker_000770,notion_000770,drive_000770 +user_000771,acct_0105,user-000771@customer-105.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000771,looker_000771,notion_000771,drive_000771 +user_000772,acct_0106,user-000772@customer-106.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000772,looker_000772,notion_000772,drive_000772 +user_000773,acct_0107,user-000773@customer-107.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000773,looker_000773,notion_000773,drive_000773 +user_000774,acct_0108,user-000774@customer-108.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000774,looker_000774,notion_000774,drive_000774 +user_000775,acct_0109,user-000775@customer-109.example.com,admin,false,false,false,2025-07-19T00:00:00Z,U000775,looker_000775,notion_000775,drive_000775 +user_000776,acct_0110,user-000776@customer-110.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000776,looker_000776,notion_000776,drive_000776 +user_000777,acct_0111,user-000777@customer-111.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000777,looker_000777,notion_000777,drive_000777 +user_000778,acct_0112,user-000778@customer-112.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000778,looker_000778,notion_000778,drive_000778 +user_000779,acct_0113,user-000779@customer-113.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000779,looker_000779,notion_000779,drive_000779 +user_000780,acct_0114,user-000780@customer-114.example.com,finance,false,false,false,2025-12-24T00:00:00Z,U000780,looker_000780,notion_000780,drive_000780 +user_000781,acct_0115,user-000781@customer-115.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000781,looker_000781,notion_000781,drive_000781 +user_000782,acct_0116,user-000782@customer-116.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000782,looker_000782,notion_000782,drive_000782 +user_000783,acct_0117,user-000783@customer-117.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000783,looker_000783,notion_000783,drive_000783 +user_000784,acct_0118,user-000784@customer-118.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000784,looker_000784,notion_000784,drive_000784 +user_000785,acct_0119,user-000785@customer-119.example.com,requester,false,false,false,2025-05-01T00:00:00Z,U000785,looker_000785,notion_000785,drive_000785 +user_000786,acct_0120,user-000786@customer-120.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000786,looker_000786,notion_000786,drive_000786 +user_000787,acct_0121,user-000787@customer-121.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000787,looker_000787,notion_000787,drive_000787 +user_000788,acct_0122,user-000788@customer-122.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000788,looker_000788,notion_000788,drive_000788 +user_000789,acct_0123,user-000789@customer-123.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000789,looker_000789,notion_000789,drive_000789 +user_000790,acct_0124,user-000790@customer-124.example.com,approver,false,false,false,2025-10-06T00:00:00Z,U000790,looker_000790,notion_000790,drive_000790 +user_000791,acct_0125,user-000791@customer-125.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000791,looker_000791,notion_000791,drive_000791 +user_000792,acct_0126,user-000792@customer-126.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000792,looker_000792,notion_000792,drive_000792 +user_000793,acct_0127,user-000793@customer-127.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000793,looker_000793,notion_000793,drive_000793 +user_000794,acct_0128,user-000794@customer-128.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000794,looker_000794,notion_000794,drive_000794 +user_000795,acct_0129,user-000795@customer-129.example.com,admin,false,false,false,2025-03-11T00:00:00Z,U000795,looker_000795,notion_000795,drive_000795 +user_000796,acct_0130,user-000796@customer-130.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000796,looker_000796,notion_000796,drive_000796 +user_000797,acct_0131,user-000797@customer-131.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000797,looker_000797,notion_000797,drive_000797 +user_000798,acct_0132,user-000798@customer-132.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000798,looker_000798,notion_000798,drive_000798 +user_000799,acct_0133,user-000799@customer-133.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000799,looker_000799,notion_000799,drive_000799 +user_000800,acct_0134,user-000800@customer-134.example.com,finance,false,false,false,2025-08-16T00:00:00Z,U000800,looker_000800,notion_000800,drive_000800 +user_000801,acct_0135,user-000801@customer-135.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000801,looker_000801,notion_000801,drive_000801 +user_000802,acct_0136,user-000802@customer-136.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000802,looker_000802,notion_000802,drive_000802 +user_000803,acct_0137,user-000803@customer-137.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000803,looker_000803,notion_000803,drive_000803 +user_000804,acct_0138,user-000804@customer-138.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000804,looker_000804,notion_000804,drive_000804 +user_000805,acct_0139,user-000805@customer-139.example.com,requester,false,false,false,2025-01-21T00:00:00Z,U000805,looker_000805,notion_000805,drive_000805 +user_000806,acct_0140,user-000806@customer-140.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000806,looker_000806,notion_000806,drive_000806 +user_000807,acct_0141,user-000807@customer-141.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000807,looker_000807,notion_000807,drive_000807 +user_000808,acct_0142,user-000808@customer-142.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000808,looker_000808,notion_000808,drive_000808 +user_000809,acct_0143,user-000809@customer-143.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000809,looker_000809,notion_000809,drive_000809 +user_000810,acct_0144,user-000810@customer-144.example.com,approver,false,false,false,2025-06-26T00:00:00Z,U000810,looker_000810,notion_000810,drive_000810 +user_000811,acct_0145,user-000811@customer-145.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000811,looker_000811,notion_000811,drive_000811 +user_000812,acct_0146,user-000812@customer-146.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000812,looker_000812,notion_000812,drive_000812 +user_000813,acct_0147,user-000813@customer-147.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000813,looker_000813,notion_000813,drive_000813 +user_000814,acct_0148,user-000814@customer-148.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000814,looker_000814,notion_000814,drive_000814 +user_000815,acct_0149,user-000815@customer-149.example.com,admin,false,false,false,2025-11-03T00:00:00Z,U000815,looker_000815,notion_000815,drive_000815 +user_000816,acct_0150,user-000816@customer-150.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000816,looker_000816,notion_000816,drive_000816 +user_000817,acct_0151,user-000817@customer-151.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000817,looker_000817,notion_000817,drive_000817 +user_000818,acct_0152,user-000818@customer-152.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000818,looker_000818,notion_000818,drive_000818 +user_000819,acct_0153,user-000819@customer-153.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000819,looker_000819,notion_000819,drive_000819 +user_000820,acct_0154,user-000820@customer-154.example.com,finance,false,false,false,2025-04-08T00:00:00Z,U000820,looker_000820,notion_000820,drive_000820 +user_000821,acct_0155,user-000821@customer-155.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000821,looker_000821,notion_000821,drive_000821 +user_000822,acct_0156,user-000822@customer-156.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000822,looker_000822,notion_000822,drive_000822 +user_000823,acct_0157,user-000823@customer-157.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000823,looker_000823,notion_000823,drive_000823 +user_000824,acct_0158,user-000824@customer-158.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000824,looker_000824,notion_000824,drive_000824 +user_000825,acct_0159,user-000825@customer-159.example.com,requester,false,false,false,2025-09-13T00:00:00Z,U000825,looker_000825,notion_000825,drive_000825 +user_000826,acct_0160,user-000826@customer-160.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000826,looker_000826,notion_000826,drive_000826 +user_000827,acct_0161,user-000827@customer-161.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000827,looker_000827,notion_000827,drive_000827 +user_000828,acct_0162,user-000828@customer-162.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000828,looker_000828,notion_000828,drive_000828 +user_000829,acct_0163,user-000829@customer-163.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000829,looker_000829,notion_000829,drive_000829 +user_000830,acct_0164,user-000830@customer-164.example.com,approver,false,false,false,2025-02-18T00:00:00Z,U000830,looker_000830,notion_000830,drive_000830 +user_000831,acct_0165,user-000831@customer-165.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000831,looker_000831,notion_000831,drive_000831 +user_000832,acct_0166,user-000832@customer-166.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000832,looker_000832,notion_000832,drive_000832 +user_000833,acct_0167,user-000833@customer-167.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000833,looker_000833,notion_000833,drive_000833 +user_000834,acct_0168,user-000834@customer-168.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000834,looker_000834,notion_000834,drive_000834 +user_000835,acct_0169,user-000835@customer-169.example.com,admin,false,false,false,2025-07-23T00:00:00Z,U000835,looker_000835,notion_000835,drive_000835 +user_000836,acct_0170,user-000836@customer-170.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U000836,looker_000836,notion_000836,drive_000836 +user_000837,acct_0171,user-000837@customer-171.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000837,looker_000837,notion_000837,drive_000837 +user_000838,acct_0172,user-000838@customer-172.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000838,looker_000838,notion_000838,drive_000838 +user_000839,acct_0173,user-000839@customer-173.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000839,looker_000839,notion_000839,drive_000839 +user_000840,acct_0174,user-000840@customer-174.example.com,finance,false,false,false,2025-12-28T00:00:00Z,U000840,looker_000840,notion_000840,drive_000840 +user_000841,acct_0175,user-000841@customer-175.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U000841,looker_000841,notion_000841,drive_000841 +user_000842,acct_0176,user-000842@customer-176.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000842,looker_000842,notion_000842,drive_000842 +user_000843,acct_0177,user-000843@customer-177.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000843,looker_000843,notion_000843,drive_000843 +user_000844,acct_0178,user-000844@customer-178.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000844,looker_000844,notion_000844,drive_000844 +user_000845,acct_0179,user-000845@customer-179.example.com,requester,false,false,false,2025-05-05T00:00:00Z,U000845,looker_000845,notion_000845,drive_000845 +user_000846,acct_0180,user-000846@customer-180.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U000846,looker_000846,notion_000846,drive_000846 +user_000847,acct_0181,user-000847@customer-181.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000847,looker_000847,notion_000847,drive_000847 +user_000848,acct_0182,user-000848@customer-182.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000848,looker_000848,notion_000848,drive_000848 +user_000849,acct_0183,user-000849@customer-183.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000849,looker_000849,notion_000849,drive_000849 +user_000850,acct_0184,user-000850@customer-184.example.com,approver,false,false,false,2025-10-10T00:00:00Z,U000850,looker_000850,notion_000850,drive_000850 +user_000851,acct_0185,user-000851@customer-185.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U000851,looker_000851,notion_000851,drive_000851 +user_000852,acct_0186,user-000852@customer-186.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000852,looker_000852,notion_000852,drive_000852 +user_000853,acct_0187,user-000853@customer-187.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000853,looker_000853,notion_000853,drive_000853 +user_000854,acct_0188,user-000854@customer-188.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000854,looker_000854,notion_000854,drive_000854 +user_000855,acct_0189,user-000855@customer-189.example.com,admin,false,false,false,2025-03-15T00:00:00Z,U000855,looker_000855,notion_000855,drive_000855 +user_000856,acct_0190,user-000856@customer-190.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U000856,looker_000856,notion_000856,drive_000856 +user_000857,acct_0001,user-000857@customer-001.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000857,looker_000857,notion_000857,drive_000857 +user_000858,acct_0002,user-000858@customer-002.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000858,looker_000858,notion_000858,drive_000858 +user_000859,acct_0003,user-000859@customer-003.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000859,looker_000859,notion_000859,drive_000859 +user_000860,acct_0004,user-000860@customer-004.example.com,finance,false,false,false,2025-08-20T00:00:00Z,U000860,looker_000860,notion_000860,drive_000860 +user_000861,acct_0005,user-000861@customer-005.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U000861,looker_000861,notion_000861,drive_000861 +user_000862,acct_0006,user-000862@customer-006.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000862,looker_000862,notion_000862,drive_000862 +user_000863,acct_0007,user-000863@customer-007.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000863,looker_000863,notion_000863,drive_000863 +user_000864,acct_0008,user-000864@customer-008.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000864,looker_000864,notion_000864,drive_000864 +user_000865,acct_0009,user-000865@customer-009.example.com,requester,false,false,false,2025-01-25T00:00:00Z,U000865,looker_000865,notion_000865,drive_000865 +user_000866,acct_0010,user-000866@customer-010.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U000866,looker_000866,notion_000866,drive_000866 +user_000867,acct_0011,user-000867@customer-011.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000867,looker_000867,notion_000867,drive_000867 +user_000868,acct_0012,user-000868@customer-012.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000868,looker_000868,notion_000868,drive_000868 +user_000869,acct_0013,user-000869@customer-013.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000869,looker_000869,notion_000869,drive_000869 +user_000870,acct_0014,user-000870@customer-014.example.com,approver,false,false,false,2025-06-02T00:00:00Z,U000870,looker_000870,notion_000870,drive_000870 +user_000871,acct_0015,user-000871@customer-015.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U000871,looker_000871,notion_000871,drive_000871 +user_000872,acct_0016,user-000872@customer-016.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000872,looker_000872,notion_000872,drive_000872 +user_000873,acct_0017,user-000873@customer-017.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000873,looker_000873,notion_000873,drive_000873 +user_000874,acct_0018,user-000874@customer-018.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000874,looker_000874,notion_000874,drive_000874 +user_000875,acct_0019,user-000875@customer-019.example.com,admin,false,false,false,2025-11-07T00:00:00Z,U000875,looker_000875,notion_000875,drive_000875 +user_000876,acct_0020,user-000876@customer-020.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U000876,looker_000876,notion_000876,drive_000876 +user_000877,acct_0021,user-000877@customer-021.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000877,looker_000877,notion_000877,drive_000877 +user_000878,acct_0022,user-000878@customer-022.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000878,looker_000878,notion_000878,drive_000878 +user_000879,acct_0023,user-000879@customer-023.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000879,looker_000879,notion_000879,drive_000879 +user_000880,acct_0024,user-000880@customer-024.example.com,finance,false,false,false,2025-04-12T00:00:00Z,U000880,looker_000880,notion_000880,drive_000880 +user_000881,acct_0025,user-000881@customer-025.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U000881,looker_000881,notion_000881,drive_000881 +user_000882,acct_0026,user-000882@customer-026.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000882,looker_000882,notion_000882,drive_000882 +user_000883,acct_0027,user-000883@customer-027.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000883,looker_000883,notion_000883,drive_000883 +user_000884,acct_0028,user-000884@customer-028.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000884,looker_000884,notion_000884,drive_000884 +user_000885,acct_0029,user-000885@customer-029.example.com,requester,false,false,false,2025-09-17T00:00:00Z,U000885,looker_000885,notion_000885,drive_000885 +user_000886,acct_0030,user-000886@customer-030.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U000886,looker_000886,notion_000886,drive_000886 +user_000887,acct_0031,user-000887@customer-031.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000887,looker_000887,notion_000887,drive_000887 +user_000888,acct_0032,user-000888@customer-032.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000888,looker_000888,notion_000888,drive_000888 +user_000889,acct_0033,user-000889@customer-033.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000889,looker_000889,notion_000889,drive_000889 +user_000890,acct_0034,user-000890@customer-034.example.com,approver,false,false,false,2025-02-22T00:00:00Z,U000890,looker_000890,notion_000890,drive_000890 +user_000891,acct_0035,user-000891@customer-035.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U000891,looker_000891,notion_000891,drive_000891 +user_000892,acct_0036,user-000892@customer-036.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000892,looker_000892,notion_000892,drive_000892 +user_000893,acct_0037,user-000893@customer-037.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000893,looker_000893,notion_000893,drive_000893 +user_000894,acct_0038,user-000894@customer-038.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000894,looker_000894,notion_000894,drive_000894 +user_000895,acct_0039,user-000895@customer-039.example.com,admin,false,false,false,2025-07-27T00:00:00Z,U000895,looker_000895,notion_000895,drive_000895 +user_000896,acct_0040,user-000896@customer-040.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U000896,looker_000896,notion_000896,drive_000896 +user_000897,acct_0041,user-000897@customer-041.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000897,looker_000897,notion_000897,drive_000897 +user_000898,acct_0042,user-000898@customer-042.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000898,looker_000898,notion_000898,drive_000898 +user_000899,acct_0043,user-000899@customer-043.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000899,looker_000899,notion_000899,drive_000899 +user_000900,acct_0044,user-000900@customer-044.example.com,finance,false,false,false,2025-12-04T00:00:00Z,U000900,looker_000900,notion_000900,drive_000900 +user_000901,acct_0045,user-000901@customer-045.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U000901,looker_000901,notion_000901,drive_000901 +user_000902,acct_0046,user-000902@customer-046.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000902,looker_000902,notion_000902,drive_000902 +user_000903,acct_0047,user-000903@customer-047.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000903,looker_000903,notion_000903,drive_000903 +user_000904,acct_0048,user-000904@customer-048.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000904,looker_000904,notion_000904,drive_000904 +user_000905,acct_0049,user-000905@customer-049.example.com,requester,false,false,false,2025-05-09T00:00:00Z,U000905,looker_000905,notion_000905,drive_000905 +user_000906,acct_0050,user-000906@customer-050.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U000906,looker_000906,notion_000906,drive_000906 +user_000907,acct_0051,user-000907@customer-051.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000907,looker_000907,notion_000907,drive_000907 +user_000908,acct_0052,user-000908@customer-052.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000908,looker_000908,notion_000908,drive_000908 +user_000909,acct_0053,user-000909@customer-053.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000909,looker_000909,notion_000909,drive_000909 +user_000910,acct_0054,user-000910@customer-054.example.com,approver,false,false,false,2025-10-14T00:00:00Z,U000910,looker_000910,notion_000910,drive_000910 +user_000911,acct_0055,user-000911@customer-055.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U000911,looker_000911,notion_000911,drive_000911 +user_000912,acct_0056,user-000912@customer-056.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000912,looker_000912,notion_000912,drive_000912 +user_000913,acct_0057,user-000913@customer-057.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000913,looker_000913,notion_000913,drive_000913 +user_000914,acct_0058,user-000914@customer-058.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000914,looker_000914,notion_000914,drive_000914 +user_000915,acct_0059,user-000915@customer-059.example.com,admin,false,false,false,2025-03-19T00:00:00Z,U000915,looker_000915,notion_000915,drive_000915 +user_000916,acct_0060,user-000916@customer-060.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U000916,looker_000916,notion_000916,drive_000916 +user_000917,acct_0061,user-000917@customer-061.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U000917,looker_000917,notion_000917,drive_000917 +user_000918,acct_0062,user-000918@customer-062.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U000918,looker_000918,notion_000918,drive_000918 +user_000919,acct_0063,user-000919@customer-063.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U000919,looker_000919,notion_000919,drive_000919 +user_000920,acct_0064,user-000920@customer-064.example.com,finance,false,false,false,2025-08-24T00:00:00Z,U000920,looker_000920,notion_000920,drive_000920 +user_000921,acct_0065,user-000921@customer-065.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U000921,looker_000921,notion_000921,drive_000921 +user_000922,acct_0066,user-000922@customer-066.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U000922,looker_000922,notion_000922,drive_000922 +user_000923,acct_0067,user-000923@customer-067.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U000923,looker_000923,notion_000923,drive_000923 +user_000924,acct_0068,user-000924@customer-068.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U000924,looker_000924,notion_000924,drive_000924 +user_000925,acct_0069,user-000925@customer-069.example.com,requester,false,false,false,2025-01-01T00:00:00Z,U000925,looker_000925,notion_000925,drive_000925 +user_000926,acct_0070,user-000926@customer-070.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U000926,looker_000926,notion_000926,drive_000926 +user_000927,acct_0071,user-000927@customer-071.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U000927,looker_000927,notion_000927,drive_000927 +user_000928,acct_0072,user-000928@customer-072.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U000928,looker_000928,notion_000928,drive_000928 +user_000929,acct_0073,user-000929@customer-073.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U000929,looker_000929,notion_000929,drive_000929 +user_000930,acct_0074,user-000930@customer-074.example.com,approver,false,false,false,2025-06-06T00:00:00Z,U000930,looker_000930,notion_000930,drive_000930 +user_000931,acct_0075,user-000931@customer-075.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U000931,looker_000931,notion_000931,drive_000931 +user_000932,acct_0076,user-000932@customer-076.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U000932,looker_000932,notion_000932,drive_000932 +user_000933,acct_0077,user-000933@customer-077.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U000933,looker_000933,notion_000933,drive_000933 +user_000934,acct_0078,user-000934@customer-078.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U000934,looker_000934,notion_000934,drive_000934 +user_000935,acct_0079,user-000935@customer-079.example.com,admin,false,false,false,2025-11-11T00:00:00Z,U000935,looker_000935,notion_000935,drive_000935 +user_000936,acct_0080,user-000936@customer-080.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U000936,looker_000936,notion_000936,drive_000936 +user_000937,acct_0081,user-000937@customer-081.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U000937,looker_000937,notion_000937,drive_000937 +user_000938,acct_0082,user-000938@customer-082.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U000938,looker_000938,notion_000938,drive_000938 +user_000939,acct_0083,user-000939@customer-083.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U000939,looker_000939,notion_000939,drive_000939 +user_000940,acct_0084,user-000940@customer-084.example.com,finance,false,false,false,2025-04-16T00:00:00Z,U000940,looker_000940,notion_000940,drive_000940 +user_000941,acct_0085,user-000941@customer-085.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U000941,looker_000941,notion_000941,drive_000941 +user_000942,acct_0086,user-000942@customer-086.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U000942,looker_000942,notion_000942,drive_000942 +user_000943,acct_0087,user-000943@customer-087.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U000943,looker_000943,notion_000943,drive_000943 +user_000944,acct_0088,user-000944@customer-088.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U000944,looker_000944,notion_000944,drive_000944 +user_000945,acct_0089,user-000945@customer-089.example.com,requester,false,false,false,2025-09-21T00:00:00Z,U000945,looker_000945,notion_000945,drive_000945 +user_000946,acct_0090,user-000946@customer-090.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U000946,looker_000946,notion_000946,drive_000946 +user_000947,acct_0091,user-000947@customer-091.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U000947,looker_000947,notion_000947,drive_000947 +user_000948,acct_0092,user-000948@customer-092.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U000948,looker_000948,notion_000948,drive_000948 +user_000949,acct_0093,user-000949@customer-093.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U000949,looker_000949,notion_000949,drive_000949 +user_000950,acct_0094,user-000950@customer-094.example.com,approver,false,false,false,2025-02-26T00:00:00Z,U000950,looker_000950,notion_000950,drive_000950 +user_000951,acct_0095,user-000951@customer-095.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U000951,looker_000951,notion_000951,drive_000951 +user_000952,acct_0096,user-000952@customer-096.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U000952,looker_000952,notion_000952,drive_000952 +user_000953,acct_0097,user-000953@customer-097.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U000953,looker_000953,notion_000953,drive_000953 +user_000954,acct_0098,user-000954@customer-098.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U000954,looker_000954,notion_000954,drive_000954 +user_000955,acct_0099,user-000955@customer-099.example.com,admin,false,false,false,2025-07-03T00:00:00Z,U000955,looker_000955,notion_000955,drive_000955 +user_000956,acct_0100,user-000956@customer-100.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U000956,looker_000956,notion_000956,drive_000956 +user_000957,acct_0101,user-000957@customer-101.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U000957,looker_000957,notion_000957,drive_000957 +user_000958,acct_0102,user-000958@customer-102.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U000958,looker_000958,notion_000958,drive_000958 +user_000959,acct_0103,user-000959@customer-103.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U000959,looker_000959,notion_000959,drive_000959 +user_000960,acct_0104,user-000960@customer-104.example.com,finance,false,false,false,2025-12-08T00:00:00Z,U000960,looker_000960,notion_000960,drive_000960 +user_000961,acct_0105,user-000961@customer-105.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U000961,looker_000961,notion_000961,drive_000961 +user_000962,acct_0106,user-000962@customer-106.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U000962,looker_000962,notion_000962,drive_000962 +user_000963,acct_0107,user-000963@customer-107.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U000963,looker_000963,notion_000963,drive_000963 +user_000964,acct_0108,user-000964@customer-108.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U000964,looker_000964,notion_000964,drive_000964 +user_000965,acct_0109,user-000965@customer-109.example.com,requester,false,false,false,2025-05-13T00:00:00Z,U000965,looker_000965,notion_000965,drive_000965 +user_000966,acct_0110,user-000966@customer-110.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U000966,looker_000966,notion_000966,drive_000966 +user_000967,acct_0111,user-000967@customer-111.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U000967,looker_000967,notion_000967,drive_000967 +user_000968,acct_0112,user-000968@customer-112.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U000968,looker_000968,notion_000968,drive_000968 +user_000969,acct_0113,user-000969@customer-113.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U000969,looker_000969,notion_000969,drive_000969 +user_000970,acct_0114,user-000970@customer-114.example.com,approver,false,false,false,2025-10-18T00:00:00Z,U000970,looker_000970,notion_000970,drive_000970 +user_000971,acct_0115,user-000971@customer-115.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U000971,looker_000971,notion_000971,drive_000971 +user_000972,acct_0116,user-000972@customer-116.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U000972,looker_000972,notion_000972,drive_000972 +user_000973,acct_0117,user-000973@customer-117.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U000973,looker_000973,notion_000973,drive_000973 +user_000974,acct_0118,user-000974@customer-118.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U000974,looker_000974,notion_000974,drive_000974 +user_000975,acct_0119,user-000975@customer-119.example.com,admin,false,false,false,2025-03-23T00:00:00Z,U000975,looker_000975,notion_000975,drive_000975 +user_000976,acct_0120,user-000976@customer-120.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U000976,looker_000976,notion_000976,drive_000976 +user_000977,acct_0121,user-000977@customer-121.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U000977,looker_000977,notion_000977,drive_000977 +user_000978,acct_0122,user-000978@customer-122.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U000978,looker_000978,notion_000978,drive_000978 +user_000979,acct_0123,user-000979@customer-123.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U000979,looker_000979,notion_000979,drive_000979 +user_000980,acct_0124,user-000980@customer-124.example.com,finance,false,false,false,2025-08-28T00:00:00Z,U000980,looker_000980,notion_000980,drive_000980 +user_000981,acct_0125,user-000981@customer-125.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U000981,looker_000981,notion_000981,drive_000981 +user_000982,acct_0126,user-000982@customer-126.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U000982,looker_000982,notion_000982,drive_000982 +user_000983,acct_0127,user-000983@customer-127.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U000983,looker_000983,notion_000983,drive_000983 +user_000984,acct_0128,user-000984@customer-128.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U000984,looker_000984,notion_000984,drive_000984 +user_000985,acct_0129,user-000985@customer-129.example.com,requester,false,false,false,2025-01-05T00:00:00Z,U000985,looker_000985,notion_000985,drive_000985 +user_000986,acct_0130,user-000986@customer-130.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U000986,looker_000986,notion_000986,drive_000986 +user_000987,acct_0131,user-000987@customer-131.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U000987,looker_000987,notion_000987,drive_000987 +user_000988,acct_0132,user-000988@customer-132.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U000988,looker_000988,notion_000988,drive_000988 +user_000989,acct_0133,user-000989@customer-133.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U000989,looker_000989,notion_000989,drive_000989 +user_000990,acct_0134,user-000990@customer-134.example.com,approver,false,false,false,2025-06-10T00:00:00Z,U000990,looker_000990,notion_000990,drive_000990 +user_000991,acct_0135,user-000991@customer-135.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U000991,looker_000991,notion_000991,drive_000991 +user_000992,acct_0136,user-000992@customer-136.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U000992,looker_000992,notion_000992,drive_000992 +user_000993,acct_0137,user-000993@customer-137.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U000993,looker_000993,notion_000993,drive_000993 +user_000994,acct_0138,user-000994@customer-138.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U000994,looker_000994,notion_000994,drive_000994 +user_000995,acct_0139,user-000995@customer-139.example.com,admin,false,false,false,2025-11-15T00:00:00Z,U000995,looker_000995,notion_000995,drive_000995 +user_000996,acct_0140,user-000996@customer-140.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U000996,looker_000996,notion_000996,drive_000996 +user_000997,acct_0141,user-000997@customer-141.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U000997,looker_000997,notion_000997,drive_000997 +user_000998,acct_0142,user-000998@customer-142.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U000998,looker_000998,notion_000998,drive_000998 +user_000999,acct_0143,user-000999@customer-143.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U000999,looker_000999,notion_000999,drive_000999 +user_001000,acct_0144,user-001000@customer-144.example.com,finance,false,false,false,2025-04-20T00:00:00Z,U001000,looker_001000,notion_001000,drive_001000 +user_001001,acct_0145,user-001001@customer-145.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U001001,looker_001001,notion_001001,drive_001001 +user_001002,acct_0146,user-001002@customer-146.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U001002,looker_001002,notion_001002,drive_001002 +user_001003,acct_0147,user-001003@customer-147.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U001003,looker_001003,notion_001003,drive_001003 +user_001004,acct_0148,user-001004@customer-148.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U001004,looker_001004,notion_001004,drive_001004 +user_001005,acct_0149,user-001005@customer-149.example.com,requester,false,false,false,2025-09-25T00:00:00Z,U001005,looker_001005,notion_001005,drive_001005 +user_001006,acct_0150,user-001006@customer-150.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U001006,looker_001006,notion_001006,drive_001006 +user_001007,acct_0151,user-001007@customer-151.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U001007,looker_001007,notion_001007,drive_001007 +user_001008,acct_0152,user-001008@customer-152.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U001008,looker_001008,notion_001008,drive_001008 +user_001009,acct_0153,user-001009@customer-153.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U001009,looker_001009,notion_001009,drive_001009 +user_001010,acct_0154,user-001010@customer-154.example.com,approver,false,false,false,2025-02-02T00:00:00Z,U001010,looker_001010,notion_001010,drive_001010 +user_001011,acct_0155,user-001011@customer-155.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U001011,looker_001011,notion_001011,drive_001011 +user_001012,acct_0156,user-001012@customer-156.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U001012,looker_001012,notion_001012,drive_001012 +user_001013,acct_0157,user-001013@customer-157.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U001013,looker_001013,notion_001013,drive_001013 +user_001014,acct_0158,user-001014@customer-158.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U001014,looker_001014,notion_001014,drive_001014 +user_001015,acct_0159,user-001015@customer-159.example.com,admin,false,false,false,2025-07-07T00:00:00Z,U001015,looker_001015,notion_001015,drive_001015 +user_001016,acct_0160,user-001016@customer-160.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U001016,looker_001016,notion_001016,drive_001016 +user_001017,acct_0161,user-001017@customer-161.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U001017,looker_001017,notion_001017,drive_001017 +user_001018,acct_0162,user-001018@customer-162.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U001018,looker_001018,notion_001018,drive_001018 +user_001019,acct_0163,user-001019@customer-163.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U001019,looker_001019,notion_001019,drive_001019 +user_001020,acct_0164,user-001020@customer-164.example.com,finance,false,false,false,2025-12-12T00:00:00Z,U001020,looker_001020,notion_001020,drive_001020 +user_001021,acct_0165,user-001021@customer-165.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U001021,looker_001021,notion_001021,drive_001021 +user_001022,acct_0166,user-001022@customer-166.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U001022,looker_001022,notion_001022,drive_001022 +user_001023,acct_0167,user-001023@customer-167.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U001023,looker_001023,notion_001023,drive_001023 +user_001024,acct_0168,user-001024@customer-168.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U001024,looker_001024,notion_001024,drive_001024 +user_001025,acct_0169,user-001025@customer-169.example.com,requester,false,false,false,2025-05-17T00:00:00Z,U001025,looker_001025,notion_001025,drive_001025 +user_001026,acct_0170,user-001026@customer-170.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U001026,looker_001026,notion_001026,drive_001026 +user_001027,acct_0171,user-001027@customer-171.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U001027,looker_001027,notion_001027,drive_001027 +user_001028,acct_0172,user-001028@customer-172.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U001028,looker_001028,notion_001028,drive_001028 +user_001029,acct_0173,user-001029@customer-173.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U001029,looker_001029,notion_001029,drive_001029 +user_001030,acct_0174,user-001030@customer-174.example.com,approver,false,false,false,2025-10-22T00:00:00Z,U001030,looker_001030,notion_001030,drive_001030 +user_001031,acct_0175,user-001031@customer-175.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U001031,looker_001031,notion_001031,drive_001031 +user_001032,acct_0176,user-001032@customer-176.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U001032,looker_001032,notion_001032,drive_001032 +user_001033,acct_0177,user-001033@customer-177.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U001033,looker_001033,notion_001033,drive_001033 +user_001034,acct_0178,user-001034@customer-178.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U001034,looker_001034,notion_001034,drive_001034 +user_001035,acct_0179,user-001035@customer-179.example.com,admin,false,false,false,2025-03-27T00:00:00Z,U001035,looker_001035,notion_001035,drive_001035 +user_001036,acct_0180,user-001036@customer-180.example.com,finance,true,false,false,2025-04-28T00:00:00Z,U001036,looker_001036,notion_001036,drive_001036 +user_001037,acct_0181,user-001037@customer-181.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U001037,looker_001037,notion_001037,drive_001037 +user_001038,acct_0182,user-001038@customer-182.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U001038,looker_001038,notion_001038,drive_001038 +user_001039,acct_0183,user-001039@customer-183.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U001039,looker_001039,notion_001039,drive_001039 +user_001040,acct_0184,user-001040@customer-184.example.com,finance,false,false,false,2025-08-04T00:00:00Z,U001040,looker_001040,notion_001040,drive_001040 +user_001041,acct_0185,user-001041@customer-185.example.com,requester,true,false,false,2025-09-05T00:00:00Z,U001041,looker_001041,notion_001041,drive_001041 +user_001042,acct_0186,user-001042@customer-186.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U001042,looker_001042,notion_001042,drive_001042 +user_001043,acct_0187,user-001043@customer-187.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U001043,looker_001043,notion_001043,drive_001043 +user_001044,acct_0188,user-001044@customer-188.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U001044,looker_001044,notion_001044,drive_001044 +user_001045,acct_0189,user-001045@customer-189.example.com,requester,false,false,false,2025-01-09T00:00:00Z,U001045,looker_001045,notion_001045,drive_001045 +user_001046,acct_0190,user-001046@customer-190.example.com,approver,true,false,false,2025-02-10T00:00:00Z,U001046,looker_001046,notion_001046,drive_001046 +user_001047,acct_0001,user-001047@customer-001.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U001047,looker_001047,notion_001047,drive_001047 +user_001048,acct_0002,user-001048@customer-002.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U001048,looker_001048,notion_001048,drive_001048 +user_001049,acct_0003,user-001049@customer-003.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U001049,looker_001049,notion_001049,drive_001049 +user_001050,acct_0004,user-001050@customer-004.example.com,approver,false,false,false,2025-06-14T00:00:00Z,U001050,looker_001050,notion_001050,drive_001050 +user_001051,acct_0005,user-001051@customer-005.example.com,admin,true,false,false,2025-07-15T00:00:00Z,U001051,looker_001051,notion_001051,drive_001051 +user_001052,acct_0006,user-001052@customer-006.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U001052,looker_001052,notion_001052,drive_001052 +user_001053,acct_0007,user-001053@customer-007.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U001053,looker_001053,notion_001053,drive_001053 +user_001054,acct_0008,user-001054@customer-008.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U001054,looker_001054,notion_001054,drive_001054 +user_001055,acct_0009,user-001055@customer-009.example.com,admin,false,false,false,2025-11-19T00:00:00Z,U001055,looker_001055,notion_001055,drive_001055 +user_001056,acct_0010,user-001056@customer-010.example.com,finance,true,false,false,2025-12-20T00:00:00Z,U001056,looker_001056,notion_001056,drive_001056 +user_001057,acct_0011,user-001057@customer-011.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U001057,looker_001057,notion_001057,drive_001057 +user_001058,acct_0012,user-001058@customer-012.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U001058,looker_001058,notion_001058,drive_001058 +user_001059,acct_0013,user-001059@customer-013.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U001059,looker_001059,notion_001059,drive_001059 +user_001060,acct_0014,user-001060@customer-014.example.com,finance,false,false,false,2025-04-24T00:00:00Z,U001060,looker_001060,notion_001060,drive_001060 +user_001061,acct_0015,user-001061@customer-015.example.com,requester,true,false,false,2025-05-25T00:00:00Z,U001061,looker_001061,notion_001061,drive_001061 +user_001062,acct_0016,user-001062@customer-016.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U001062,looker_001062,notion_001062,drive_001062 +user_001063,acct_0017,user-001063@customer-017.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U001063,looker_001063,notion_001063,drive_001063 +user_001064,acct_0018,user-001064@customer-018.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U001064,looker_001064,notion_001064,drive_001064 +user_001065,acct_0019,user-001065@customer-019.example.com,requester,false,false,false,2025-09-01T00:00:00Z,U001065,looker_001065,notion_001065,drive_001065 +user_001066,acct_0020,user-001066@customer-020.example.com,approver,true,false,false,2025-10-02T00:00:00Z,U001066,looker_001066,notion_001066,drive_001066 +user_001067,acct_0021,user-001067@customer-021.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U001067,looker_001067,notion_001067,drive_001067 +user_001068,acct_0022,user-001068@customer-022.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U001068,looker_001068,notion_001068,drive_001068 +user_001069,acct_0023,user-001069@customer-023.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U001069,looker_001069,notion_001069,drive_001069 +user_001070,acct_0024,user-001070@customer-024.example.com,approver,false,false,false,2025-02-06T00:00:00Z,U001070,looker_001070,notion_001070,drive_001070 +user_001071,acct_0025,user-001071@customer-025.example.com,admin,true,false,false,2025-03-07T00:00:00Z,U001071,looker_001071,notion_001071,drive_001071 +user_001072,acct_0026,user-001072@customer-026.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U001072,looker_001072,notion_001072,drive_001072 +user_001073,acct_0027,user-001073@customer-027.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U001073,looker_001073,notion_001073,drive_001073 +user_001074,acct_0028,user-001074@customer-028.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U001074,looker_001074,notion_001074,drive_001074 +user_001075,acct_0029,user-001075@customer-029.example.com,admin,false,false,false,2025-07-11T00:00:00Z,U001075,looker_001075,notion_001075,drive_001075 +user_001076,acct_0030,user-001076@customer-030.example.com,finance,true,false,false,2025-08-12T00:00:00Z,U001076,looker_001076,notion_001076,drive_001076 +user_001077,acct_0031,user-001077@customer-031.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U001077,looker_001077,notion_001077,drive_001077 +user_001078,acct_0032,user-001078@customer-032.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U001078,looker_001078,notion_001078,drive_001078 +user_001079,acct_0033,user-001079@customer-033.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U001079,looker_001079,notion_001079,drive_001079 +user_001080,acct_0034,user-001080@customer-034.example.com,finance,false,false,false,2025-12-16T00:00:00Z,U001080,looker_001080,notion_001080,drive_001080 +user_001081,acct_0035,user-001081@customer-035.example.com,requester,true,false,false,2025-01-17T00:00:00Z,U001081,looker_001081,notion_001081,drive_001081 +user_001082,acct_0036,user-001082@customer-036.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U001082,looker_001082,notion_001082,drive_001082 +user_001083,acct_0037,user-001083@customer-037.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U001083,looker_001083,notion_001083,drive_001083 +user_001084,acct_0038,user-001084@customer-038.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U001084,looker_001084,notion_001084,drive_001084 +user_001085,acct_0039,user-001085@customer-039.example.com,requester,false,false,false,2025-05-21T00:00:00Z,U001085,looker_001085,notion_001085,drive_001085 +user_001086,acct_0040,user-001086@customer-040.example.com,approver,true,false,false,2025-06-22T00:00:00Z,U001086,looker_001086,notion_001086,drive_001086 +user_001087,acct_0041,user-001087@customer-041.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U001087,looker_001087,notion_001087,drive_001087 +user_001088,acct_0042,user-001088@customer-042.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U001088,looker_001088,notion_001088,drive_001088 +user_001089,acct_0043,user-001089@customer-043.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U001089,looker_001089,notion_001089,drive_001089 +user_001090,acct_0044,user-001090@customer-044.example.com,approver,false,false,false,2025-10-26T00:00:00Z,U001090,looker_001090,notion_001090,drive_001090 +user_001091,acct_0045,user-001091@customer-045.example.com,admin,true,false,false,2025-11-27T00:00:00Z,U001091,looker_001091,notion_001091,drive_001091 +user_001092,acct_0046,user-001092@customer-046.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U001092,looker_001092,notion_001092,drive_001092 +user_001093,acct_0047,user-001093@customer-047.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U001093,looker_001093,notion_001093,drive_001093 +user_001094,acct_0048,user-001094@customer-048.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U001094,looker_001094,notion_001094,drive_001094 +user_001095,acct_0049,user-001095@customer-049.example.com,admin,false,false,false,2025-03-03T00:00:00Z,U001095,looker_001095,notion_001095,drive_001095 +user_001096,acct_0050,user-001096@customer-050.example.com,finance,true,false,false,2025-04-04T00:00:00Z,U001096,looker_001096,notion_001096,drive_001096 +user_001097,acct_0051,user-001097@customer-051.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U001097,looker_001097,notion_001097,drive_001097 +user_001098,acct_0052,user-001098@customer-052.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U001098,looker_001098,notion_001098,drive_001098 +user_001099,acct_0053,user-001099@customer-053.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U001099,looker_001099,notion_001099,drive_001099 +user_001100,acct_0054,user-001100@customer-054.example.com,finance,false,false,false,2025-08-08T00:00:00Z,U001100,looker_001100,notion_001100,drive_001100 +user_001101,acct_0055,user-001101@customer-055.example.com,requester,true,false,false,2025-09-09T00:00:00Z,U001101,looker_001101,notion_001101,drive_001101 +user_001102,acct_0056,user-001102@customer-056.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U001102,looker_001102,notion_001102,drive_001102 +user_001103,acct_0057,user-001103@customer-057.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U001103,looker_001103,notion_001103,drive_001103 +user_001104,acct_0058,user-001104@customer-058.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U001104,looker_001104,notion_001104,drive_001104 +user_001105,acct_0059,user-001105@customer-059.example.com,requester,false,false,false,2025-01-13T00:00:00Z,U001105,looker_001105,notion_001105,drive_001105 +user_001106,acct_0060,user-001106@customer-060.example.com,approver,true,false,false,2025-02-14T00:00:00Z,U001106,looker_001106,notion_001106,drive_001106 +user_001107,acct_0061,user-001107@customer-061.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U001107,looker_001107,notion_001107,drive_001107 +user_001108,acct_0062,user-001108@customer-062.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U001108,looker_001108,notion_001108,drive_001108 +user_001109,acct_0063,user-001109@customer-063.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U001109,looker_001109,notion_001109,drive_001109 +user_001110,acct_0064,user-001110@customer-064.example.com,approver,false,false,false,2025-06-18T00:00:00Z,U001110,looker_001110,notion_001110,drive_001110 +user_001111,acct_0065,user-001111@customer-065.example.com,admin,true,false,false,2025-07-19T00:00:00Z,U001111,looker_001111,notion_001111,drive_001111 +user_001112,acct_0066,user-001112@customer-066.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U001112,looker_001112,notion_001112,drive_001112 +user_001113,acct_0067,user-001113@customer-067.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U001113,looker_001113,notion_001113,drive_001113 +user_001114,acct_0068,user-001114@customer-068.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U001114,looker_001114,notion_001114,drive_001114 +user_001115,acct_0069,user-001115@customer-069.example.com,admin,false,false,false,2025-11-23T00:00:00Z,U001115,looker_001115,notion_001115,drive_001115 +user_001116,acct_0070,user-001116@customer-070.example.com,finance,true,false,false,2025-12-24T00:00:00Z,U001116,looker_001116,notion_001116,drive_001116 +user_001117,acct_0071,user-001117@customer-071.example.com,requester,true,false,false,2025-01-25T00:00:00Z,U001117,looker_001117,notion_001117,drive_001117 +user_001118,acct_0072,user-001118@customer-072.example.com,approver,true,false,false,2025-02-26T00:00:00Z,U001118,looker_001118,notion_001118,drive_001118 +user_001119,acct_0073,user-001119@customer-073.example.com,admin,true,false,false,2025-03-27T00:00:00Z,U001119,looker_001119,notion_001119,drive_001119 +user_001120,acct_0074,user-001120@customer-074.example.com,finance,false,false,false,2025-04-28T00:00:00Z,U001120,looker_001120,notion_001120,drive_001120 +user_001121,acct_0075,user-001121@customer-075.example.com,requester,true,false,false,2025-05-01T00:00:00Z,U001121,looker_001121,notion_001121,drive_001121 +user_001122,acct_0076,user-001122@customer-076.example.com,approver,true,false,false,2025-06-02T00:00:00Z,U001122,looker_001122,notion_001122,drive_001122 +user_001123,acct_0077,user-001123@customer-077.example.com,admin,true,false,false,2025-07-03T00:00:00Z,U001123,looker_001123,notion_001123,drive_001123 +user_001124,acct_0078,user-001124@customer-078.example.com,finance,true,false,false,2025-08-04T00:00:00Z,U001124,looker_001124,notion_001124,drive_001124 +user_001125,acct_0079,user-001125@customer-079.example.com,requester,false,false,false,2025-09-05T00:00:00Z,U001125,looker_001125,notion_001125,drive_001125 +user_001126,acct_0080,user-001126@customer-080.example.com,approver,true,false,false,2025-10-06T00:00:00Z,U001126,looker_001126,notion_001126,drive_001126 +user_001127,acct_0081,user-001127@customer-081.example.com,admin,true,false,false,2025-11-07T00:00:00Z,U001127,looker_001127,notion_001127,drive_001127 +user_001128,acct_0082,user-001128@customer-082.example.com,finance,true,false,false,2025-12-08T00:00:00Z,U001128,looker_001128,notion_001128,drive_001128 +user_001129,acct_0083,user-001129@customer-083.example.com,requester,true,false,false,2025-01-09T00:00:00Z,U001129,looker_001129,notion_001129,drive_001129 +user_001130,acct_0084,user-001130@customer-084.example.com,approver,false,false,false,2025-02-10T00:00:00Z,U001130,looker_001130,notion_001130,drive_001130 +user_001131,acct_0085,user-001131@customer-085.example.com,admin,true,false,false,2025-03-11T00:00:00Z,U001131,looker_001131,notion_001131,drive_001131 +user_001132,acct_0086,user-001132@customer-086.example.com,finance,true,false,false,2025-04-12T00:00:00Z,U001132,looker_001132,notion_001132,drive_001132 +user_001133,acct_0087,user-001133@customer-087.example.com,requester,true,false,false,2025-05-13T00:00:00Z,U001133,looker_001133,notion_001133,drive_001133 +user_001134,acct_0088,user-001134@customer-088.example.com,approver,true,false,false,2025-06-14T00:00:00Z,U001134,looker_001134,notion_001134,drive_001134 +user_001135,acct_0089,user-001135@customer-089.example.com,admin,false,false,false,2025-07-15T00:00:00Z,U001135,looker_001135,notion_001135,drive_001135 +user_001136,acct_0090,user-001136@customer-090.example.com,finance,true,false,false,2025-08-16T00:00:00Z,U001136,looker_001136,notion_001136,drive_001136 +user_001137,acct_0091,user-001137@customer-091.example.com,requester,true,false,false,2025-09-17T00:00:00Z,U001137,looker_001137,notion_001137,drive_001137 +user_001138,acct_0092,user-001138@customer-092.example.com,approver,true,false,false,2025-10-18T00:00:00Z,U001138,looker_001138,notion_001138,drive_001138 +user_001139,acct_0093,user-001139@customer-093.example.com,admin,true,false,false,2025-11-19T00:00:00Z,U001139,looker_001139,notion_001139,drive_001139 +user_001140,acct_0094,user-001140@customer-094.example.com,finance,false,false,false,2025-12-20T00:00:00Z,U001140,looker_001140,notion_001140,drive_001140 +user_001141,acct_0095,user-001141@customer-095.example.com,requester,true,false,false,2025-01-21T00:00:00Z,U001141,looker_001141,notion_001141,drive_001141 +user_001142,acct_0096,user-001142@customer-096.example.com,approver,true,false,false,2025-02-22T00:00:00Z,U001142,looker_001142,notion_001142,drive_001142 +user_001143,acct_0097,user-001143@customer-097.example.com,admin,true,false,false,2025-03-23T00:00:00Z,U001143,looker_001143,notion_001143,drive_001143 +user_001144,acct_0098,user-001144@customer-098.example.com,finance,true,false,false,2025-04-24T00:00:00Z,U001144,looker_001144,notion_001144,drive_001144 +user_001145,acct_0099,user-001145@customer-099.example.com,requester,false,false,false,2025-05-25T00:00:00Z,U001145,looker_001145,notion_001145,drive_001145 +user_001146,acct_0100,user-001146@customer-100.example.com,approver,true,false,false,2025-06-26T00:00:00Z,U001146,looker_001146,notion_001146,drive_001146 +user_001147,acct_0101,user-001147@customer-101.example.com,admin,true,false,false,2025-07-27T00:00:00Z,U001147,looker_001147,notion_001147,drive_001147 +user_001148,acct_0102,user-001148@customer-102.example.com,finance,true,false,false,2025-08-28T00:00:00Z,U001148,looker_001148,notion_001148,drive_001148 +user_001149,acct_0103,user-001149@customer-103.example.com,requester,true,false,false,2025-09-01T00:00:00Z,U001149,looker_001149,notion_001149,drive_001149 +user_001150,acct_0104,user-001150@customer-104.example.com,approver,false,false,false,2025-10-02T00:00:00Z,U001150,looker_001150,notion_001150,drive_001150 +user_001151,acct_0105,user-001151@customer-105.example.com,admin,true,false,false,2025-11-03T00:00:00Z,U001151,looker_001151,notion_001151,drive_001151 +user_001152,acct_0106,user-001152@customer-106.example.com,finance,true,false,false,2025-12-04T00:00:00Z,U001152,looker_001152,notion_001152,drive_001152 +user_001153,acct_0107,user-001153@customer-107.example.com,requester,true,false,false,2025-01-05T00:00:00Z,U001153,looker_001153,notion_001153,drive_001153 +user_001154,acct_0108,user-001154@customer-108.example.com,approver,true,false,false,2025-02-06T00:00:00Z,U001154,looker_001154,notion_001154,drive_001154 +user_001155,acct_0109,user-001155@customer-109.example.com,admin,false,false,false,2025-03-07T00:00:00Z,U001155,looker_001155,notion_001155,drive_001155 +user_001156,acct_0110,user-001156@customer-110.example.com,finance,true,false,false,2025-04-08T00:00:00Z,U001156,looker_001156,notion_001156,drive_001156 +user_001157,acct_0111,user-001157@customer-111.example.com,requester,true,false,false,2025-05-09T00:00:00Z,U001157,looker_001157,notion_001157,drive_001157 +user_001158,acct_0112,user-001158@customer-112.example.com,approver,true,false,false,2025-06-10T00:00:00Z,U001158,looker_001158,notion_001158,drive_001158 +user_001159,acct_0113,user-001159@customer-113.example.com,admin,true,false,false,2025-07-11T00:00:00Z,U001159,looker_001159,notion_001159,drive_001159 +user_001160,acct_0114,user-001160@customer-114.example.com,finance,false,false,false,2025-08-12T00:00:00Z,U001160,looker_001160,notion_001160,drive_001160 +user_001161,acct_0115,user-001161@customer-115.example.com,requester,true,false,false,2025-09-13T00:00:00Z,U001161,looker_001161,notion_001161,drive_001161 +user_001162,acct_0116,user-001162@customer-116.example.com,approver,true,false,false,2025-10-14T00:00:00Z,U001162,looker_001162,notion_001162,drive_001162 +user_001163,acct_0117,user-001163@customer-117.example.com,admin,true,false,false,2025-11-15T00:00:00Z,U001163,looker_001163,notion_001163,drive_001163 +user_001164,acct_0118,user-001164@customer-118.example.com,finance,true,false,false,2025-12-16T00:00:00Z,U001164,looker_001164,notion_001164,drive_001164 +user_001165,acct_0119,user-001165@customer-119.example.com,requester,false,false,false,2025-01-17T00:00:00Z,U001165,looker_001165,notion_001165,drive_001165 +user_001166,acct_0120,user-001166@customer-120.example.com,approver,true,false,false,2025-02-18T00:00:00Z,U001166,looker_001166,notion_001166,drive_001166 +user_001167,acct_0121,user-001167@customer-121.example.com,admin,true,false,false,2025-03-19T00:00:00Z,U001167,looker_001167,notion_001167,drive_001167 +user_001168,acct_0122,user-001168@customer-122.example.com,finance,true,false,false,2025-04-20T00:00:00Z,U001168,looker_001168,notion_001168,drive_001168 +user_001169,acct_0123,user-001169@customer-123.example.com,requester,true,false,false,2025-05-21T00:00:00Z,U001169,looker_001169,notion_001169,drive_001169 +user_001170,acct_0124,user-001170@customer-124.example.com,approver,false,false,false,2025-06-22T00:00:00Z,U001170,looker_001170,notion_001170,drive_001170 +user_001171,acct_0125,user-001171@customer-125.example.com,admin,true,false,false,2025-07-23T00:00:00Z,U001171,looker_001171,notion_001171,drive_001171 +user_001172,acct_0126,user-001172@customer-126.example.com,finance,true,false,false,2025-08-24T00:00:00Z,U001172,looker_001172,notion_001172,drive_001172 +user_001173,acct_0127,user-001173@customer-127.example.com,requester,true,false,false,2025-09-25T00:00:00Z,U001173,looker_001173,notion_001173,drive_001173 +user_001174,acct_0128,user-001174@customer-128.example.com,approver,true,false,false,2025-10-26T00:00:00Z,U001174,looker_001174,notion_001174,drive_001174 +user_001175,acct_0129,user-001175@customer-129.example.com,admin,false,false,false,2025-11-27T00:00:00Z,U001175,looker_001175,notion_001175,drive_001175 +user_001176,acct_0130,user-001176@customer-130.example.com,finance,true,false,false,2025-12-28T00:00:00Z,U001176,looker_001176,notion_001176,drive_001176 +user_001177,acct_0131,user-001177@customer-131.example.com,requester,true,false,false,2025-01-01T00:00:00Z,U001177,looker_001177,notion_001177,drive_001177 +user_001178,acct_0132,user-001178@customer-132.example.com,approver,true,false,false,2025-02-02T00:00:00Z,U001178,looker_001178,notion_001178,drive_001178 +user_001179,acct_0133,user-001179@customer-133.example.com,admin,true,false,false,2025-03-03T00:00:00Z,U001179,looker_001179,notion_001179,drive_001179 +user_001180,acct_0134,user-001180@customer-134.example.com,finance,false,false,false,2025-04-04T00:00:00Z,U001180,looker_001180,notion_001180,drive_001180 +user_001181,acct_0135,user-001181@customer-135.example.com,requester,true,false,false,2025-05-05T00:00:00Z,U001181,looker_001181,notion_001181,drive_001181 +user_001182,acct_0136,user-001182@customer-136.example.com,approver,true,false,false,2025-06-06T00:00:00Z,U001182,looker_001182,notion_001182,drive_001182 +user_001183,acct_0137,user-001183@customer-137.example.com,admin,true,false,false,2025-07-07T00:00:00Z,U001183,looker_001183,notion_001183,drive_001183 +user_001184,acct_0138,user-001184@customer-138.example.com,finance,true,false,false,2025-08-08T00:00:00Z,U001184,looker_001184,notion_001184,drive_001184 +user_001185,acct_0139,user-001185@customer-139.example.com,requester,false,false,false,2025-09-09T00:00:00Z,U001185,looker_001185,notion_001185,drive_001185 +user_001186,acct_0140,user-001186@customer-140.example.com,approver,true,false,false,2025-10-10T00:00:00Z,U001186,looker_001186,notion_001186,drive_001186 +user_001187,acct_0141,user-001187@customer-141.example.com,admin,true,false,false,2025-11-11T00:00:00Z,U001187,looker_001187,notion_001187,drive_001187 +user_001188,acct_0142,user-001188@customer-142.example.com,finance,true,false,false,2025-12-12T00:00:00Z,U001188,looker_001188,notion_001188,drive_001188 +user_001189,acct_0143,user-001189@customer-143.example.com,requester,true,false,false,2025-01-13T00:00:00Z,U001189,looker_001189,notion_001189,drive_001189 +user_001190,acct_0144,user-001190@customer-144.example.com,approver,false,false,false,2025-02-14T00:00:00Z,U001190,looker_001190,notion_001190,drive_001190 +user_001191,acct_0145,user-001191@customer-145.example.com,admin,true,false,false,2025-03-15T00:00:00Z,U001191,looker_001191,notion_001191,drive_001191 +user_001192,acct_0146,user-001192@customer-146.example.com,finance,true,false,false,2025-04-16T00:00:00Z,U001192,looker_001192,notion_001192,drive_001192 +user_001193,acct_0147,user-001193@customer-147.example.com,requester,true,false,false,2025-05-17T00:00:00Z,U001193,looker_001193,notion_001193,drive_001193 +user_001194,acct_0148,user-001194@customer-148.example.com,approver,true,false,false,2025-06-18T00:00:00Z,U001194,looker_001194,notion_001194,drive_001194 +user_001195,acct_0149,user-001195@customer-149.example.com,admin,false,false,false,2025-07-19T00:00:00Z,U001195,looker_001195,notion_001195,drive_001195 +user_001196,acct_0150,user-001196@customer-150.example.com,finance,true,false,false,2025-08-20T00:00:00Z,U001196,looker_001196,notion_001196,drive_001196 +user_001197,acct_0151,user-001197@customer-151.example.com,requester,true,false,false,2025-09-21T00:00:00Z,U001197,looker_001197,notion_001197,drive_001197 +user_001198,acct_0152,user-001198@customer-152.example.com,approver,true,false,false,2025-10-22T00:00:00Z,U001198,looker_001198,notion_001198,drive_001198 +user_001199,acct_0153,user-001199@customer-153.example.com,admin,true,false,false,2025-11-23T00:00:00Z,U001199,looker_001199,notion_001199,drive_001199 +user_001200,acct_0154,user-001200@customer-154.example.com,finance,false,false,false,2025-12-24T00:00:00Z,U001200,looker_001200,notion_001200,drive_001200 +user_001201,acct_0201,user-001201@customer-201.example.com,requester,false,true,false,2025-01-25T00:00:00Z,U001201,looker_001201,notion_001201,drive_001201 +user_001202,acct_0202,user-001202@customer-202.example.com,approver,false,true,false,2025-02-26T00:00:00Z,U001202,looker_001202,notion_001202,drive_001202 +user_001203,acct_0203,user-001203@customer-203.example.com,admin,false,true,false,2025-03-27T00:00:00Z,U001203,looker_001203,notion_001203,drive_001203 +user_001204,acct_0204,user-001204@customer-204.example.com,finance,false,true,false,2025-04-28T00:00:00Z,U001204,looker_001204,notion_001204,drive_001204 +user_001205,acct_0205,user-001205@customer-205.example.com,requester,false,true,false,2025-05-01T00:00:00Z,U001205,looker_001205,notion_001205,drive_001205 +user_001206,acct_0201,user-001206@customer-201.example.com,approver,false,true,false,2025-06-02T00:00:00Z,U001206,looker_001206,notion_001206,drive_001206 +user_001207,acct_0202,user-001207@customer-202.example.com,admin,false,true,false,2025-07-03T00:00:00Z,U001207,looker_001207,notion_001207,drive_001207 +user_001208,acct_0203,user-001208@customer-203.example.com,finance,false,true,false,2025-08-04T00:00:00Z,U001208,looker_001208,notion_001208,drive_001208 +user_001209,acct_0204,user-001209@customer-204.example.com,requester,false,true,false,2025-09-05T00:00:00Z,U001209,looker_001209,notion_001209,drive_001209 +user_001210,acct_0205,user-001210@customer-205.example.com,approver,false,true,false,2025-10-06T00:00:00Z,U001210,looker_001210,notion_001210,drive_001210 +user_001211,acct_0201,user-001211@customer-201.example.com,admin,false,true,false,2025-11-07T00:00:00Z,U001211,looker_001211,notion_001211,drive_001211 +user_001212,acct_0202,user-001212@customer-202.example.com,finance,false,true,false,2025-12-08T00:00:00Z,U001212,looker_001212,notion_001212,drive_001212 +user_001213,acct_0203,user-001213@customer-203.example.com,requester,false,true,false,2025-01-09T00:00:00Z,U001213,looker_001213,notion_001213,drive_001213 +user_001214,acct_0204,user-001214@customer-204.example.com,approver,false,true,false,2025-02-10T00:00:00Z,U001214,looker_001214,notion_001214,drive_001214 +user_001215,acct_0205,user-001215@customer-205.example.com,admin,false,true,false,2025-03-11T00:00:00Z,U001215,looker_001215,notion_001215,drive_001215 +user_001216,acct_0201,user-001216@customer-201.example.com,finance,false,true,false,2025-04-12T00:00:00Z,U001216,looker_001216,notion_001216,drive_001216 +user_001217,acct_0202,user-001217@customer-202.example.com,requester,false,true,false,2025-05-13T00:00:00Z,U001217,looker_001217,notion_001217,drive_001217 +user_001218,acct_0203,user-001218@customer-203.example.com,approver,false,true,false,2025-06-14T00:00:00Z,U001218,looker_001218,notion_001218,drive_001218 +user_001219,acct_0204,user-001219@customer-204.example.com,admin,false,true,false,2025-07-15T00:00:00Z,U001219,looker_001219,notion_001219,drive_001219 +user_001220,acct_0205,user-001220@customer-205.example.com,finance,false,true,false,2025-08-16T00:00:00Z,U001220,looker_001220,notion_001220,drive_001220 +user_001221,acct_0201,user-001221@customer-201.example.com,requester,false,true,false,2025-09-17T00:00:00Z,U001221,looker_001221,notion_001221,drive_001221 +user_001222,acct_0202,user-001222@customer-202.example.com,approver,false,true,false,2025-10-18T00:00:00Z,U001222,looker_001222,notion_001222,drive_001222 +user_001223,acct_0203,user-001223@customer-203.example.com,admin,false,true,false,2025-11-19T00:00:00Z,U001223,looker_001223,notion_001223,drive_001223 +user_001224,acct_0204,user-001224@customer-204.example.com,finance,false,true,false,2025-12-20T00:00:00Z,U001224,looker_001224,notion_001224,drive_001224 +user_001225,acct_0205,user-001225@customer-205.example.com,requester,false,true,false,2025-01-21T00:00:00Z,U001225,looker_001225,notion_001225,drive_001225 +user_001226,acct_0201,user-001226@customer-201.example.com,approver,false,true,false,2025-02-22T00:00:00Z,U001226,looker_001226,notion_001226,drive_001226 +user_001227,acct_0202,user-001227@customer-202.example.com,admin,false,true,false,2025-03-23T00:00:00Z,U001227,looker_001227,notion_001227,drive_001227 +user_001228,acct_0203,user-001228@customer-203.example.com,finance,false,true,false,2025-04-24T00:00:00Z,U001228,looker_001228,notion_001228,drive_001228 +user_001229,acct_0204,user-001229@customer-204.example.com,requester,false,true,false,2025-05-25T00:00:00Z,U001229,looker_001229,notion_001229,drive_001229 +user_001230,acct_0205,user-001230@customer-205.example.com,approver,false,true,false,2025-06-26T00:00:00Z,U001230,looker_001230,notion_001230,drive_001230 +user_001231,acct_0206,user-001231@customer-206.example.com,admin,false,false,true,2025-07-27T00:00:00Z,U001231,looker_001231,notion_001231,drive_001231 +user_001232,acct_0207,user-001232@customer-207.example.com,finance,false,false,true,2025-08-28T00:00:00Z,U001232,looker_001232,notion_001232,drive_001232 +user_001233,acct_0208,user-001233@customer-208.example.com,requester,false,false,true,2025-09-01T00:00:00Z,U001233,looker_001233,notion_001233,drive_001233 +user_001234,acct_0209,user-001234@customer-209.example.com,approver,false,false,true,2025-10-02T00:00:00Z,U001234,looker_001234,notion_001234,drive_001234 +user_001235,acct_0210,user-001235@customer-210.example.com,admin,false,false,true,2025-11-03T00:00:00Z,U001235,looker_001235,notion_001235,drive_001235 +user_001236,acct_0206,user-001236@customer-206.example.com,finance,false,false,true,2025-12-04T00:00:00Z,U001236,looker_001236,notion_001236,drive_001236 +user_001237,acct_0207,user-001237@customer-207.example.com,requester,false,false,true,2025-01-05T00:00:00Z,U001237,looker_001237,notion_001237,drive_001237 +user_001238,acct_0208,user-001238@customer-208.example.com,approver,false,false,true,2025-02-06T00:00:00Z,U001238,looker_001238,notion_001238,drive_001238 +user_001239,acct_0209,user-001239@customer-209.example.com,admin,false,false,true,2025-03-07T00:00:00Z,U001239,looker_001239,notion_001239,drive_001239 +user_001240,acct_0210,user-001240@customer-210.example.com,finance,false,false,true,2025-04-08T00:00:00Z,U001240,looker_001240,notion_001240,drive_001240 +user_001241,acct_0206,user-001241@customer-206.example.com,requester,false,false,true,2025-05-09T00:00:00Z,U001241,looker_001241,notion_001241,drive_001241 +user_001242,acct_0207,user-001242@customer-207.example.com,approver,false,false,true,2025-06-10T00:00:00Z,U001242,looker_001242,notion_001242,drive_001242 +user_001243,acct_0208,user-001243@customer-208.example.com,admin,false,false,true,2025-07-11T00:00:00Z,U001243,looker_001243,notion_001243,drive_001243 +user_001244,acct_0209,user-001244@customer-209.example.com,finance,false,false,true,2025-08-12T00:00:00Z,U001244,looker_001244,notion_001244,drive_001244 +user_001245,acct_0210,user-001245@customer-210.example.com,requester,false,false,true,2025-09-13T00:00:00Z,U001245,looker_001245,notion_001245,drive_001245 +user_001246,acct_0206,user-001246@customer-206.example.com,approver,false,false,true,2025-10-14T00:00:00Z,U001246,looker_001246,notion_001246,drive_001246 +user_001247,acct_0207,user-001247@customer-207.example.com,admin,false,false,true,2025-11-15T00:00:00Z,U001247,looker_001247,notion_001247,drive_001247 +user_001248,acct_0208,user-001248@customer-208.example.com,finance,false,false,true,2025-12-16T00:00:00Z,U001248,looker_001248,notion_001248,drive_001248 +user_001249,acct_0209,user-001249@customer-209.example.com,requester,false,false,true,2025-01-17T00:00:00Z,U001249,looker_001249,notion_001249,drive_001249 +user_001250,acct_0210,user-001250@customer-210.example.com,approver,false,false,true,2025-02-18T00:00:00Z,U001250,looker_001250,notion_001250,drive_001250 +user_001251,acct_0206,user-001251@customer-206.example.com,admin,false,false,true,2025-03-19T00:00:00Z,U001251,looker_001251,notion_001251,drive_001251 +user_001252,acct_0207,user-001252@customer-207.example.com,finance,false,false,true,2025-04-20T00:00:00Z,U001252,looker_001252,notion_001252,drive_001252 +user_001253,acct_0208,user-001253@customer-208.example.com,requester,false,false,true,2025-05-21T00:00:00Z,U001253,looker_001253,notion_001253,drive_001253 +user_001254,acct_0209,user-001254@customer-209.example.com,approver,false,false,true,2025-06-22T00:00:00Z,U001254,looker_001254,notion_001254,drive_001254 +user_001255,acct_0210,user-001255@customer-210.example.com,admin,false,false,true,2025-07-23T00:00:00Z,U001255,looker_001255,notion_001255,drive_001255 +user_001256,acct_0206,user-001256@customer-206.example.com,finance,false,false,true,2025-08-24T00:00:00Z,U001256,looker_001256,notion_001256,drive_001256 +user_001257,acct_0207,user-001257@customer-207.example.com,requester,false,false,true,2025-09-25T00:00:00Z,U001257,looker_001257,notion_001257,drive_001257 +user_001258,acct_0208,user-001258@customer-208.example.com,approver,false,false,true,2025-10-26T00:00:00Z,U001258,looker_001258,notion_001258,drive_001258 +user_001259,acct_0209,user-001259@customer-209.example.com,admin,false,false,true,2025-11-27T00:00:00Z,U001259,looker_001259,notion_001259,drive_001259 +user_001260,acct_0210,user-001260@customer-210.example.com,finance,false,false,true,2025-12-28T00:00:00Z,U001260,looker_001260,notion_001260,drive_001260 diff --git a/packages/cli/assets/demo/orbit/replay.memory-flow.v1.json b/packages/cli/assets/demo/orbit/replay.memory-flow.v1.json new file mode 100644 index 00000000..af4c1aa9 --- /dev/null +++ b/packages/cli/assets/demo/orbit/replay.memory-flow.v1.json @@ -0,0 +1,707 @@ +{ + "memoryFlowReplaySchemaVersion": 1, + "replay": { + "runId": "demo-seeded-orbit", + "connectionId": "orbit_demo", + "adapter": "live-database", + "status": "done", + "sourceDir": null, + "syncId": "demo-seeded-sync", + "reportId": "demo-seeded-report", + "reportPath": "reports/seeded-demo-report.json", + "errors": [], + "metadata": { + "schemaVersion": 1, + "mode": "seeded", + "origin": "packaged", + "timing": "prebuilt", + "capturedAt": "2026-05-06T00:00:00.000Z", + "sourceReportId": "demo-seeded-report", + "sourceReportPath": "reports/seeded-demo-report.json", + "fallbackReason": null + }, + "events": [ + { + "type": "source_acquired", + "adapter": "live-database", + "trigger": "demo_seeded", + "fileCount": 8 + }, + { + "type": "source_acquired", + "adapter": "dbt_descriptions", + "trigger": "demo_seeded", + "fileCount": 6 + }, + { + "type": "source_acquired", + "adapter": "looker", + "trigger": "demo_seeded", + "fileCount": 7 + }, + { + "type": "source_acquired", + "adapter": "notion", + "trigger": "demo_seeded", + "fileCount": 8 + }, + { + "type": "scope_detected", + "fingerprint": "sqlite:orbit-demo" + }, + { + "type": "raw_snapshot_written", + "syncId": "demo-seeded-sync", + "rawFileCount": 29 + }, + { + "type": "diff_computed", + "added": 29, + "modified": 0, + "deleted": 0, + "unchanged": 0 + }, + { + "type": "chunks_planned", + "chunkCount": 5, + "workUnitCount": 5, + "evictionCount": 0 + }, + { + "type": "work_unit_started", + "unitKey": "revenue-and-contracts", + "skills": [ + "knowledge_capture", + "sl_capture" + ], + "stepBudget": 40 + }, + { + "type": "candidate_action", + "unitKey": "revenue-and-contracts", + "target": "wiki", + "action": "created", + "key": "knowledge/global/arr-contract-first.md" + }, + { + "type": "candidate_action", + "unitKey": "revenue-and-contracts", + "target": "wiki", + "action": "created", + "key": "knowledge/global/revenue-gross-to-net.md" + }, + { + "type": "candidate_action", + "unitKey": "revenue-and-contracts", + "target": "wiki", + "action": "created", + "key": "knowledge/global/discount-expiration.md" + }, + { + "type": "candidate_action", + "unitKey": "revenue-and-contracts", + "target": "sl", + "action": "created", + "key": "orbit_demo.contracts" + }, + { + "type": "candidate_action", + "unitKey": "revenue-and-contracts", + "target": "sl", + "action": "created", + "key": "orbit_demo.invoices" + }, + { + "type": "candidate_action", + "unitKey": "revenue-and-contracts", + "target": "sl", + "action": "created", + "key": "orbit_demo.arr_movements" + }, + { + "type": "work_unit_finished", + "unitKey": "revenue-and-contracts", + "status": "success" + }, + { + "type": "work_unit_started", + "unitKey": "retention-and-segments", + "skills": [ + "knowledge_capture", + "sl_capture" + ], + "stepBudget": 40 + }, + { + "type": "candidate_action", + "unitKey": "retention-and-segments", + "target": "wiki", + "action": "created", + "key": "knowledge/global/nrr-retention.md" + }, + { + "type": "candidate_action", + "unitKey": "retention-and-segments", + "target": "wiki", + "action": "created", + "key": "knowledge/global/segment-classification.md" + }, + { + "type": "candidate_action", + "unitKey": "retention-and-segments", + "target": "sl", + "action": "created", + "key": "orbit_demo.accounts" + }, + { + "type": "work_unit_finished", + "unitKey": "retention-and-segments", + "status": "success" + }, + { + "type": "work_unit_started", + "unitKey": "procurement-and-activation", + "skills": [ + "knowledge_capture", + "sl_capture" + ], + "stepBudget": 40 + }, + { + "type": "candidate_action", + "unitKey": "procurement-and-activation", + "target": "wiki", + "action": "created", + "key": "knowledge/global/activation-policy.md" + }, + { + "type": "candidate_action", + "unitKey": "procurement-and-activation", + "target": "wiki", + "action": "created", + "key": "knowledge/global/procurement-workflows.md" + }, + { + "type": "candidate_action", + "unitKey": "procurement-and-activation", + "target": "sl", + "action": "created", + "key": "orbit_demo.purchase_requests" + }, + { + "type": "work_unit_finished", + "unitKey": "procurement-and-activation", + "status": "success" + }, + { + "type": "work_unit_started", + "unitKey": "support-and-health", + "skills": [ + "knowledge_capture", + "sl_capture" + ], + "stepBudget": 40 + }, + { + "type": "candidate_action", + "unitKey": "support-and-health", + "target": "wiki", + "action": "created", + "key": "knowledge/global/customer-health-scoring.md" + }, + { + "type": "candidate_action", + "unitKey": "support-and-health", + "target": "wiki", + "action": "created", + "key": "knowledge/global/support-escalation.md" + }, + { + "type": "candidate_action", + "unitKey": "support-and-health", + "target": "sl", + "action": "created", + "key": "orbit_demo.support_tickets" + }, + { + "type": "work_unit_finished", + "unitKey": "support-and-health", + "status": "success" + }, + { + "type": "work_unit_started", + "unitKey": "governance-and-exclusions", + "skills": [ + "knowledge_capture" + ], + "stepBudget": 40 + }, + { + "type": "candidate_action", + "unitKey": "governance-and-exclusions", + "target": "wiki", + "action": "created", + "key": "knowledge/global/internal-test-exclusion.md" + }, + { + "type": "work_unit_finished", + "unitKey": "governance-and-exclusions", + "status": "success" + }, + { + "type": "reconciliation_finished", + "conflictCount": 0, + "fallbackCount": 0 + }, + { + "type": "saved", + "commitSha": "demo-seeded", + "wikiCount": 10, + "slCount": 6 + }, + { + "type": "provenance_recorded", + "rowCount": 23 + }, + { + "type": "report_created", + "runId": "demo-seeded-orbit", + "reportPath": "reports/seeded-demo-report.json" + } + ], + "plannedWorkUnits": [ + { + "unitKey": "revenue-and-contracts", + "rawFiles": [ + "contracts", + "invoices", + "arr_movements" + ], + "peerFileCount": 3, + "dependencyCount": 3 + }, + { + "unitKey": "retention-and-segments", + "rawFiles": [ + "accounts", + "plans" + ], + "peerFileCount": 2, + "dependencyCount": 2 + }, + { + "unitKey": "procurement-and-activation", + "rawFiles": [ + "purchase_requests", + "users" + ], + "peerFileCount": 2, + "dependencyCount": 2 + }, + { + "unitKey": "support-and-health", + "rawFiles": [ + "support_tickets" + ], + "peerFileCount": 1, + "dependencyCount": 1 + }, + { + "unitKey": "governance-and-exclusions", + "rawFiles": [ + "notion/export/pages/analyst-onboarding.md" + ], + "peerFileCount": 1, + "dependencyCount": 0 + } + ], + "details": { + "actions": [ + { + "unitKey": "revenue-and-contracts", + "target": "wiki", + "action": "created", + "key": "knowledge/global/arr-contract-first.md", + "summary": "ARR follows contract precedence with cancellation and discount caveats.", + "rawFiles": [ + "contracts", + "arr_movements", + "raw-sources/notion/arr-and-contract-reporting-notes.md" + ], + "status": "success" + }, + { + "unitKey": "revenue-and-contracts", + "target": "wiki", + "action": "created", + "key": "knowledge/global/revenue-gross-to-net.md", + "summary": "Invoice, refund, and revenue dashboard evidence reconcile gross to net revenue.", + "rawFiles": [ + "invoices", + "raw-sources/bi/revenue_exec.dashboard.lookml" + ], + "status": "success" + }, + { + "unitKey": "revenue-and-contracts", + "target": "wiki", + "action": "created", + "key": "knowledge/global/discount-expiration.md", + "summary": "Discount expiration is separated from organic contraction for retention reporting.", + "rawFiles": [ + "contracts", + "arr_movements" + ], + "status": "success" + }, + { + "unitKey": "revenue-and-contracts", + "target": "sl", + "action": "created", + "key": "orbit_demo.contracts", + "summary": "Contract grain with active ARR measures and account joins.", + "rawFiles": [ + "contracts", + "raw-sources/dbt/schema.yml" + ], + "status": "success" + }, + { + "unitKey": "revenue-and-contracts", + "target": "sl", + "action": "created", + "key": "orbit_demo.invoices", + "summary": "Invoice status measures tied to gross and net revenue reporting.", + "rawFiles": [ + "invoices", + "raw-sources/bi/revenue_daily.view.lkml" + ], + "status": "success" + }, + { + "unitKey": "revenue-and-contracts", + "target": "sl", + "action": "created", + "key": "orbit_demo.arr_movements", + "summary": "ARR movement ledger for expansion, contraction, churn, and NRR.", + "rawFiles": [ + "arr_movements", + "raw-sources/bi/account_retention.view.lkml" + ], + "status": "success" + }, + { + "unitKey": "retention-and-segments", + "target": "wiki", + "action": "created", + "key": "knowledge/global/nrr-retention.md", + "summary": "NRR uses parent-account rollups and quarterly ARR movement windows.", + "rawFiles": [ + "accounts", + "arr_movements", + "raw-sources/notion/retention-and-nrr-definition-notes.md" + ], + "status": "success" + }, + { + "unitKey": "retention-and-segments", + "target": "wiki", + "action": "created", + "key": "knowledge/global/segment-classification.md", + "summary": "Segment labels come from plan mapping and sales-ops policy notes.", + "rawFiles": [ + "accounts", + "plans", + "raw-sources/notion/sales-ops-segmentation-guide.md" + ], + "status": "success" + }, + { + "unitKey": "retention-and-segments", + "target": "sl", + "action": "created", + "key": "orbit_demo.accounts", + "summary": "Account dimensions with lifecycle, segment, and internal-test exclusions.", + "rawFiles": [ + "accounts", + "plans" + ], + "status": "success" + }, + { + "unitKey": "procurement-and-activation", + "target": "wiki", + "action": "created", + "key": "knowledge/global/activation-policy.md", + "summary": "Activation policy changed on January 15, 2026 and is encoded for agents.", + "rawFiles": [ + "purchase_requests", + "users", + "raw-sources/notion/activation-policy-decision-record.md" + ], + "status": "success" + }, + { + "unitKey": "procurement-and-activation", + "target": "wiki", + "action": "created", + "key": "knowledge/global/procurement-workflows.md", + "summary": "Procurement requester activity and approval events explain product usage.", + "rawFiles": [ + "purchase_requests", + "raw-sources/bi/procurement_activity.view.lkml" + ], + "status": "success" + }, + { + "unitKey": "procurement-and-activation", + "target": "sl", + "action": "created", + "key": "orbit_demo.purchase_requests", + "summary": "Procurement request facts with requester and approval-state measures.", + "rawFiles": [ + "purchase_requests" + ], + "status": "success" + }, + { + "unitKey": "support-and-health", + "target": "wiki", + "action": "created", + "key": "knowledge/global/customer-health-scoring.md", + "summary": "Customer health combines support severity, ARR exposure, and product usage.", + "rawFiles": [ + "support_tickets", + "raw-sources/notion/customer-health-playbook.md" + ], + "status": "success" + }, + { + "unitKey": "support-and-health", + "target": "wiki", + "action": "created", + "key": "knowledge/global/support-escalation.md", + "summary": "Escalation tiers map ticket severity to SLA expectations.", + "rawFiles": [ + "support_tickets", + "raw-sources/notion/support-escalation-runbook.md" + ], + "status": "success" + }, + { + "unitKey": "support-and-health", + "target": "sl", + "action": "created", + "key": "orbit_demo.support_tickets", + "summary": "Support ticket facts with severity, status, and resolution-hour measures.", + "rawFiles": [ + "support_tickets" + ], + "status": "success" + }, + { + "unitKey": "governance-and-exclusions", + "target": "wiki", + "action": "created", + "key": "knowledge/global/internal-test-exclusion.md", + "summary": "Canonical metrics exclude internal and test accounts across source families.", + "rawFiles": [ + "raw-sources/notion/analyst-onboarding.md" + ], + "status": "success" + } + ], + "provenance": [ + { + "rawPath": "contracts", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/arr-contract-first.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/arr-and-contract-reporting-notes.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/arr-contract-first.md", + "actionType": "wiki_written" + }, + { + "rawPath": "invoices", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/revenue-gross-to-net.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/revenue-reporting-policy.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/revenue-gross-to-net.md", + "actionType": "wiki_written" + }, + { + "rawPath": "arr_movements", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/discount-expiration.md", + "actionType": "wiki_written" + }, + { + "rawPath": "arr_movements", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/nrr-retention.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/retention-and-nrr-definition-notes.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/nrr-retention.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/bi/account_retention.view.lkml", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/nrr-retention.md", + "actionType": "wiki_written" + }, + { + "rawPath": "plans", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/segment-classification.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/sales-ops-segmentation-guide.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/segment-classification.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/activation-policy-decision-record.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/activation-policy.md", + "actionType": "wiki_written" + }, + { + "rawPath": "purchase_requests", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/procurement-workflows.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/customer-health-playbook.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/customer-health-scoring.md", + "actionType": "wiki_written" + }, + { + "rawPath": "support_tickets", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/customer-health-scoring.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/support-escalation-runbook.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/support-escalation.md", + "actionType": "wiki_written" + }, + { + "rawPath": "raw-sources/notion/analyst-onboarding.md", + "artifactKind": "wiki", + "artifactKey": "knowledge/global/internal-test-exclusion.md", + "actionType": "wiki_written" + }, + { + "rawPath": "accounts", + "artifactKind": "sl", + "artifactKey": "orbit_demo.accounts", + "actionType": "sl_written" + }, + { + "rawPath": "raw-sources/dbt/schema.yml", + "artifactKind": "sl", + "artifactKey": "orbit_demo.accounts", + "actionType": "sl_written" + }, + { + "rawPath": "contracts", + "artifactKind": "sl", + "artifactKey": "orbit_demo.contracts", + "actionType": "sl_written" + }, + { + "rawPath": "invoices", + "artifactKind": "sl", + "artifactKey": "orbit_demo.invoices", + "actionType": "sl_written" + }, + { + "rawPath": "arr_movements", + "artifactKind": "sl", + "artifactKey": "orbit_demo.arr_movements", + "actionType": "sl_written" + }, + { + "rawPath": "purchase_requests", + "artifactKind": "sl", + "artifactKey": "orbit_demo.purchase_requests", + "actionType": "sl_written" + }, + { + "rawPath": "support_tickets", + "artifactKind": "sl", + "artifactKey": "orbit_demo.support_tickets", + "actionType": "sl_written" + } + ], + "transcripts": [ + { + "unitKey": "revenue-and-contracts", + "path": "transcripts/revenue-and-contracts.jsonl", + "toolCallCount": 5, + "errorCount": 0, + "toolNames": [ + "wiki_write", + "sl_write_source" + ] + }, + { + "unitKey": "retention-and-segments", + "path": "transcripts/retention-and-segments.jsonl", + "toolCallCount": 5, + "errorCount": 0, + "toolNames": [ + "wiki_write", + "sl_write_source" + ] + }, + { + "unitKey": "procurement-and-activation", + "path": "transcripts/procurement-and-activation.jsonl", + "toolCallCount": 5, + "errorCount": 0, + "toolNames": [ + "wiki_write", + "sl_write_source" + ] + }, + { + "unitKey": "support-and-health", + "path": "transcripts/support-and-health.jsonl", + "toolCallCount": 5, + "errorCount": 0, + "toolNames": [ + "wiki_write", + "sl_write_source" + ] + }, + { + "unitKey": "governance-and-exclusions", + "path": "transcripts/governance-and-exclusions.jsonl", + "toolCallCount": 2, + "errorCount": 0, + "toolNames": [ + "wiki_write" + ] + } + ] + } + } +} diff --git a/packages/cli/assets/demo/orbit/reports/seeded-demo-report.json b/packages/cli/assets/demo/orbit/reports/seeded-demo-report.json new file mode 100644 index 00000000..8185cebb --- /dev/null +++ b/packages/cli/assets/demo/orbit/reports/seeded-demo-report.json @@ -0,0 +1,40 @@ +{ + "id": "demo-seeded-report", + "runId": "demo-seeded-orbit", + "connectionId": "orbit_demo", + "mode": "seeded", + "status": "complete", + "createdAt": "2026-05-06T00:00:00.000Z", + "summary": { + "sources": { + "warehouse": { + "tables": 8, + "rows": 11234 + }, + "dbt": { + "models": 3, + "sources": 8 + }, + "bi": { + "explores": 5, + "dashboards": 2, + "views": 5 + }, + "notion": { + "pages": 8 + } + }, + "generated": { + "semanticLayerSources": 6, + "knowledgePages": 10, + "provenanceLinks": 23 + }, + "metadata": { + "mode": "seeded", + "origin": "packaged", + "llmCalls": 0, + "timing": "prebuilt", + "source": "packaged-orbit-demo" + } + } +} diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/accounts.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/accounts.yaml new file mode 100644 index 00000000..a9dc698f --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/accounts.yaml @@ -0,0 +1,44 @@ +name: accounts +table: accounts +description: Customer accounts with industry, region, lifecycle, and internal/test flags. +grain: + - account_id +columns: + - name: account_id + type: string + - name: parent_account_id + type: string + - name: account_name + type: string + - name: domain + type: string + - name: industry + type: string + - name: sales_region + type: string + - name: size_band + type: string + - name: lifecycle_status + type: string + - name: is_internal + type: boolean + - name: is_test + type: boolean + - name: created_at + type: time +joins: + - to: contracts + "on": "account_id = contracts.account_id" + relationship: one_to_many + - to: purchase_requests + "on": "account_id = purchase_requests.account_id" + relationship: one_to_many +measures: + - name: account_count + expr: "count(distinct account_id)" + - name: enterprise_count + expr: "count(distinct account_id)" + filter: "size_band = 'enterprise'" +segments: + - name: external_only + expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/arr_movements.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/arr_movements.yaml new file mode 100644 index 00000000..cfe4d7fb --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/arr_movements.yaml @@ -0,0 +1,38 @@ +name: arr_movements +table: arr_movements +description: ARR movement ledger for expansion, contraction, churn, and reactivation analysis. +grain: + - arr_movement_id +columns: + - name: arr_movement_id + type: string + - name: account_id + type: string + - name: parent_account_id + type: string + - name: contract_id + type: string + - name: movement_date + type: time + - name: movement_type + type: string + - name: movement_reason + type: string + - name: arr_delta_cents + type: number + - name: starting_arr_cents + type: number + - name: ending_arr_cents + type: number +joins: + - to: accounts + "on": "account_id = accounts.account_id" + relationship: many_to_one +measures: + - name: movement_count + expr: "count(*)" + - name: net_arr_delta + expr: "sum(arr_delta_cents) / 100.0" +segments: + - name: external_only + expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/contracts.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/contracts.yaml new file mode 100644 index 00000000..cf6c4c7c --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/contracts.yaml @@ -0,0 +1,39 @@ +name: contracts +table: contracts +description: Subscription contracts with ARR, plan, renewal, and status details. +grain: + - contract_id +columns: + - name: contract_id + type: string + - name: account_id + type: string + - name: parent_account_id + type: string + - name: plan_id + type: string + - name: contract_arr_cents + type: number + - name: booked_arr_cents + type: number + - name: start_date + type: time + - name: end_date + type: time + - name: status + type: string + - name: renewal_type + type: string +joins: + - to: accounts + "on": "account_id = accounts.account_id" + relationship: many_to_one +measures: + - name: contract_count + expr: "count(distinct contract_id)" + - name: total_arr + expr: "sum(contract_arr_cents) / 100.0" + filter: "status = 'active'" +segments: + - name: external_only + expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/invoices.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/invoices.yaml new file mode 100644 index 00000000..178c6bad --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/invoices.yaml @@ -0,0 +1,33 @@ +name: invoices +table: invoices +description: Billing invoices with payment status and revenue-recognition dates. +grain: + - invoice_id +columns: + - name: invoice_id + type: string + - name: account_id + type: string + - name: subscription_id + type: string + - name: invoice_date + type: time + - name: paid_at + type: time + - name: status + type: string + - name: currency + type: string +joins: + - to: accounts + "on": "account_id = accounts.account_id" + relationship: many_to_one +measures: + - name: invoice_count + expr: "count(*)" + - name: paid_invoice_count + expr: "count(*)" + filter: "status = 'paid'" +segments: + - name: external_only + expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/purchase_requests.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/purchase_requests.yaml new file mode 100644 index 00000000..db9df059 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/purchase_requests.yaml @@ -0,0 +1,33 @@ +name: purchase_requests +table: purchase_requests +description: Procurement workflow requests with requester, status, supplier, and spend fields. +grain: + - purchase_request_id +columns: + - name: purchase_request_id + type: string + - name: account_id + type: string + - name: requester_user_id + type: string + - name: created_at + type: time + - name: status + type: string + - name: amount_cents + type: number + - name: supplier_id + type: string +joins: + - to: accounts + "on": "account_id = accounts.account_id" + relationship: many_to_one +measures: + - name: request_count + expr: "count(*)" + - name: approved_spend + expr: "sum(amount_cents) / 100.0" + filter: "status = 'approved'" +segments: + - name: external_only + expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/support_tickets.yaml b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/support_tickets.yaml new file mode 100644 index 00000000..ddbc97e7 --- /dev/null +++ b/packages/cli/assets/demo/orbit/semantic-layer/orbit_demo/support_tickets.yaml @@ -0,0 +1,37 @@ +name: support_tickets +table: support_tickets +description: Customer support tickets with severity, category, status, and resolution tracking. +grain: + - support_ticket_id +columns: + - name: support_ticket_id + type: string + - name: account_id + type: string + - name: requester_user_id + type: string + - name: severity + type: string + - name: category + type: string + - name: status + type: string + - name: created_at + type: time + - name: resolved_at + type: time + - name: owner_user_id + type: string +joins: + - to: accounts + "on": "account_id = accounts.account_id" + relationship: many_to_one +measures: + - name: ticket_count + expr: "count(*)" + - name: open_ticket_count + expr: "count(*)" + filter: "status != 'resolved'" +segments: + - name: external_only + expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0" diff --git a/packages/cli/package.json b/packages/cli/package.json new file mode 100644 index 00000000..498c7305 --- /dev/null +++ b/packages/cli/package.json @@ -0,0 +1,72 @@ +{ + "name": "@klo/cli", + "version": "0.0.0-private", + "description": "CLI wrapper for klo context packages", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "bin": { + "klo": "./dist/bin.js" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist", + "assets" + ], + "scripts": { + "assets:demo": "node scripts/build-demo-assets.mjs", + "build": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node ../../scripts/prepare-cli-bin.mjs", + "smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@clack/prompts": "1.3.0", + "@commander-js/extra-typings": "14.0.0", + "@klo/connector-bigquery": "workspace:*", + "@klo/connector-clickhouse": "workspace:*", + "@klo/connector-mysql": "workspace:*", + "@klo/connector-postgres": "workspace:*", + "@klo/connector-posthog": "workspace:*", + "@klo/connector-snowflake": "workspace:*", + "@klo/connector-sqlite": "workspace:*", + "@klo/connector-sqlserver": "workspace:*", + "@klo/context": "workspace:*", + "@klo/llm": "workspace:*", + "@modelcontextprotocol/sdk": "^1.27.1", + "commander": "14.0.3", + "ink": "^7.0.1", + "react": "^19.2.5", + "zod": "^4.4.3" + }, + "devDependencies": { + "@types/better-sqlite3": "^7.6.13", + "@types/node": "^24.3.0", + "@types/react": "^19.2.14", + "better-sqlite3": "^12.6.2", + "ink-testing-library": "^4.0.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/cli" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/cli/scripts/build-demo-assets.mjs b/packages/cli/scripts/build-demo-assets.mjs new file mode 100644 index 00000000..036ac79b --- /dev/null +++ b/packages/cli/scripts/build-demo-assets.mjs @@ -0,0 +1,954 @@ +import { constants as fsConstants } from 'node:fs'; +import { access, copyFile, mkdir, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import Database from 'better-sqlite3'; + +const packageRoot = dirname(dirname(fileURLToPath(import.meta.url))); +const repoRoot = resolve(packageRoot, '../..'); +const defaultDemoSource = resolve(repoRoot, '../../../orbit-demo-source'); +const sourceRoot = resolve(process.env.KLO_DEMO_SOURCE_DIR ?? defaultDemoSource); +const assetDir = join(packageRoot, 'assets/demo/orbit'); +const dbPath = join(assetDir, 'demo.db'); +const exampleDbtProjectDir = ['dbt', `${'kae'}lio_demo`].join('/'); +const packagedDemoSource = 'packaged-orbit-demo'; + +const warehouseTables = [ + 'accounts', + 'contracts', + 'users', + 'invoices', + 'arr_movements', + 'support_tickets', + 'purchase_requests', + 'plans', +]; + +const copyFiles = [ + [`${exampleDbtProjectDir}/dbt_project.yml`, 'raw-sources/dbt/dbt_project.yml'], + [`${exampleDbtProjectDir}/models/sources.yml`, 'raw-sources/dbt/sources.yml'], + [`${exampleDbtProjectDir}/models/schema.yml`, 'raw-sources/dbt/schema.yml'], + [`${exampleDbtProjectDir}/models/marts/mart_revenue_daily.sql`, 'raw-sources/dbt/models/marts/mart_revenue_daily.sql'], + [`${exampleDbtProjectDir}/models/marts/mart_arr_daily.sql`, 'raw-sources/dbt/models/marts/mart_arr_daily.sql'], + [ + `${exampleDbtProjectDir}/models/marts/mart_customer_health.sql`, + 'raw-sources/dbt/models/marts/mart_customer_health.sql', + ], + ['views/account_retention.view.lkml', 'raw-sources/bi/account_retention.view.lkml'], + ['views/arr_daily.view.lkml', 'raw-sources/bi/arr_daily.view.lkml'], + ['views/customer_health.view.lkml', 'raw-sources/bi/customer_health.view.lkml'], + ['views/procurement_activity.view.lkml', 'raw-sources/bi/procurement_activity.view.lkml'], + ['views/revenue_daily.view.lkml', 'raw-sources/bi/revenue_daily.view.lkml'], + ['dashboards/revenue_exec.dashboard.lookml', 'raw-sources/bi/revenue_exec.dashboard.lookml'], + ['dashboards/retention_exec_q1.dashboard.lookml', 'raw-sources/bi/retention_exec_q1.dashboard.lookml'], + ['notion/export/pages/revenue-reporting-policy.md', 'raw-sources/notion/revenue-reporting-policy.md'], + ['notion/export/pages/sales-ops-segmentation-guide.md', 'raw-sources/notion/sales-ops-segmentation-guide.md'], + ['notion/export/pages/customer-health-playbook.md', 'raw-sources/notion/customer-health-playbook.md'], + ['notion/export/pages/support-escalation-runbook.md', 'raw-sources/notion/support-escalation-runbook.md'], + [ + 'notion/export/pages/arr-and-contract-reporting-notes.md', + 'raw-sources/notion/arr-and-contract-reporting-notes.md', + ], + [ + 'notion/export/pages/activation-policy-decision-record.md', + 'raw-sources/notion/activation-policy-decision-record.md', + ], + [ + 'notion/export/pages/retention-and-nrr-definition-notes.md', + 'raw-sources/notion/retention-and-nrr-definition-notes.md', + ], + ['notion/export/pages/analyst-onboarding.md', 'raw-sources/notion/analyst-onboarding.md'], +]; + +const semanticLayerTables = [ + 'accounts', + 'contracts', + 'invoices', + 'arr_movements', + 'purchase_requests', + 'support_tickets', +]; + +const semanticLayerDescriptions = { + accounts: 'Customer accounts with industry, region, lifecycle, and internal/test flags.', + contracts: 'Subscription contracts with ARR, plan, renewal, and status details.', + invoices: 'Billing invoices with payment status and revenue-recognition dates.', + arr_movements: 'ARR movement ledger for expansion, contraction, churn, and reactivation analysis.', + purchase_requests: 'Procurement workflow requests with requester, status, supplier, and spend fields.', + support_tickets: 'Customer support tickets with severity, category, status, and resolution tracking.', +}; + +const semanticLayerMeasures = { + accounts: [ + { name: 'account_count', expr: 'count(distinct account_id)' }, + { name: 'enterprise_count', expr: 'count(distinct account_id)', filter: "size_band = 'enterprise'" }, + ], + contracts: [ + { name: 'contract_count', expr: 'count(distinct contract_id)' }, + { name: 'total_arr', expr: 'sum(contract_arr_cents) / 100.0', filter: "status = 'active'" }, + ], + invoices: [ + { name: 'invoice_count', expr: 'count(*)' }, + { name: 'paid_invoice_count', expr: 'count(*)', filter: "status = 'paid'" }, + ], + arr_movements: [ + { name: 'movement_count', expr: 'count(*)' }, + { name: 'net_arr_delta', expr: 'sum(arr_delta_cents) / 100.0' }, + ], + purchase_requests: [ + { name: 'request_count', expr: 'count(*)' }, + { name: 'approved_spend', expr: 'sum(amount_cents) / 100.0', filter: "status = 'approved'" }, + ], + support_tickets: [ + { name: 'ticket_count', expr: 'count(*)' }, + { name: 'open_ticket_count', expr: 'count(*)', filter: "status != 'resolved'" }, + ], +}; + +const knowledgePages = [ + { + file: 'arr-contract-first.md', + summary: 'ARR uses contract-first precedence before subscription-derived revenue.', + tags: ['finance', 'arr', 'revenue'], + refs: [], + slRefs: ['orbit_demo.contracts', 'orbit_demo.arr_movements'], + body: [ + 'ARR is calculated from active recurring contract ARR before falling back to subscription-derived revenue.', + 'Do not double-count subscription MRR when an active contract row covers the same account and period.', + 'Exclude cancelled contracts ending before the metric date, future-starting contracts, internal accounts, and test accounts.', + ], + }, + { + file: 'revenue-gross-to-net.md', + summary: 'Gross-to-net revenue reconciles paid invoices, credits, and refunds.', + tags: ['finance', 'revenue'], + refs: ['arr-contract-first'], + slRefs: ['orbit_demo.invoices'], + body: [ + 'Gross revenue starts from paid invoice activity. Net revenue subtracts credits and successful refunds in the month they are recorded.', + 'Exclude unpaid, void, draft, failed, internal, and test-account invoice activity from canonical revenue reporting.', + 'February 2026 has an elevated refund event captured in the source notes and revenue dashboard.', + ], + }, + { + file: 'discount-expiration.md', + summary: 'Discount expirations are tracked separately from organic contraction.', + tags: ['finance', 'retention'], + refs: ['arr-contract-first', 'nrr-retention'], + slRefs: ['orbit_demo.contracts', 'orbit_demo.arr_movements'], + body: [ + 'Discount expiration events identify pricing changes when negotiated discounts end.', + 'Track these separately from organic contraction so board reporting can split pricing-driven and usage-driven changes.', + 'Use movement_reason on arr_movements when separating discount expiration from churn or seat-reduction events.', + ], + }, + { + file: 'nrr-retention.md', + summary: 'NRR is calculated at parent-account grain by calendar quarter.', + tags: ['analytics', 'retention', 'nrr'], + refs: ['arr-contract-first'], + slRefs: ['orbit_demo.arr_movements', 'orbit_demo.accounts'], + body: [ + 'Net Revenue Retention uses parent-account rollups by calendar quarter.', + 'The formula is starting ARR plus expansion minus contraction and churn, divided by starting ARR.', + 'Exclude parent accounts with zero starting ARR, new business, reactivations, and internal/test accounts from the denominator.', + ], + }, + { + file: 'segment-classification.md', + summary: 'Account segments derive from plan normalization and effective-dated mapping.', + tags: ['sales-ops', 'segmentation'], + refs: [], + slRefs: ['orbit_demo.accounts', 'orbit_demo.contracts'], + body: [ + 'Account segment labels combine plan_code, canonical_plan_code, and size_band fields.', + 'Historical plan code pro_plus maps to growth for current segment analysis.', + 'Use the mapping active at the metric date when segment definitions change over time.', + ], + }, + { + file: 'activation-policy.md', + summary: 'Account activation policy changed on January 15, 2026.', + tags: ['growth', 'activation', 'policy'], + refs: [], + slRefs: ['orbit_demo.accounts', 'orbit_demo.purchase_requests'], + body: [ + 'Before January 15, 2026, activation meant first requester login.', + 'On and after January 15, 2026, activation requires an approved purchase request and at least three activated requesters.', + 'Always separate pre-policy and post-policy cohorts when comparing activation rates.', + ], + }, + { + file: 'procurement-workflows.md', + summary: 'Procurement workflow activity measures active requesters and qualifying actions.', + tags: ['product', 'procurement'], + refs: ['activation-policy'], + slRefs: ['orbit_demo.purchase_requests'], + body: [ + 'Weekly active requesters counts distinct non-internal requesters with a qualifying procurement action in the calendar week.', + 'Qualifying actions include purchase request creation, approval decisions, supplier invites, and purchase-order creation.', + 'Purchase-request comments and short sessions are excluded from the canonical requester activity metric.', + ], + }, + { + file: 'customer-health-scoring.md', + summary: 'Customer health combines support severity and procurement activity.', + tags: ['customer-success', 'health', 'churn-risk'], + refs: ['nrr-retention'], + slRefs: ['orbit_demo.support_tickets', 'orbit_demo.purchase_requests', 'orbit_demo.accounts'], + body: [ + 'High-risk accounts have multiple recent high-severity tickets or no recent procurement activity on growth and enterprise plans.', + 'Medium risk captures partial support pressure or a material month-over-month decline in procurement activity.', + 'Internal and test accounts are excluded from customer health scoring.', + ], + }, + { + file: 'support-escalation.md', + summary: 'Support escalation tiers map ticket severity to SLA targets.', + tags: ['support', 'sla'], + refs: ['customer-health-scoring'], + slRefs: ['orbit_demo.support_tickets'], + body: [ + 'Critical support tickets require immediate response and on-call escalation.', + 'High severity tickets should receive first response within four business hours.', + 'Resolution time is measured from created_at to resolved_at and only applies to resolved tickets.', + ], + }, + { + file: 'internal-test-exclusion.md', + summary: 'Canonical metrics exclude internal and test accounts and users.', + tags: ['data-quality', 'governance'], + refs: [], + slRefs: ['orbit_demo.accounts'], + body: [ + 'All canonical customer metrics exclude rows marked as internal or test fixtures.', + 'This exclusion applies at both account and user grain when joining procurement, support, and revenue activity.', + 'If a metric unexpectedly increases, check whether new internal or test accounts were created without proper flags.', + ], + }, +]; + +const provenanceLinks = [ + ['wiki', 'knowledge/global/arr-contract-first.md', 'warehouse', 'contracts', 'describes', 1], + [ + 'wiki', + 'knowledge/global/arr-contract-first.md', + 'notion', + 'raw-sources/notion/arr-and-contract-reporting-notes.md', + 'derived_from', + 0.95, + ], + ['wiki', 'knowledge/global/revenue-gross-to-net.md', 'warehouse', 'invoices', 'describes', 1], + [ + 'wiki', + 'knowledge/global/revenue-gross-to-net.md', + 'notion', + 'raw-sources/notion/revenue-reporting-policy.md', + 'derived_from', + 0.95, + ], + ['wiki', 'knowledge/global/discount-expiration.md', 'warehouse', 'arr_movements', 'describes', 1], + ['wiki', 'knowledge/global/nrr-retention.md', 'warehouse', 'arr_movements', 'describes', 1], + [ + 'wiki', + 'knowledge/global/nrr-retention.md', + 'notion', + 'raw-sources/notion/retention-and-nrr-definition-notes.md', + 'derived_from', + 0.95, + ], + ['wiki', 'knowledge/global/nrr-retention.md', 'bi', 'raw-sources/bi/account_retention.view.lkml', 'derived_from', 0.85], + ['wiki', 'knowledge/global/segment-classification.md', 'warehouse', 'plans', 'describes', 1], + [ + 'wiki', + 'knowledge/global/segment-classification.md', + 'notion', + 'raw-sources/notion/sales-ops-segmentation-guide.md', + 'derived_from', + 0.9, + ], + [ + 'wiki', + 'knowledge/global/activation-policy.md', + 'notion', + 'raw-sources/notion/activation-policy-decision-record.md', + 'derived_from', + 0.95, + ], + ['wiki', 'knowledge/global/procurement-workflows.md', 'warehouse', 'purchase_requests', 'describes', 1], + [ + 'wiki', + 'knowledge/global/customer-health-scoring.md', + 'notion', + 'raw-sources/notion/customer-health-playbook.md', + 'derived_from', + 0.9, + ], + ['wiki', 'knowledge/global/customer-health-scoring.md', 'warehouse', 'support_tickets', 'describes', 1], + [ + 'wiki', + 'knowledge/global/support-escalation.md', + 'notion', + 'raw-sources/notion/support-escalation-runbook.md', + 'derived_from', + 0.9, + ], + [ + 'wiki', + 'knowledge/global/internal-test-exclusion.md', + 'notion', + 'raw-sources/notion/analyst-onboarding.md', + 'derived_from', + 0.9, + ], + ['sl', 'orbit_demo.accounts', 'warehouse', 'accounts', 'models', 1], + ['sl', 'orbit_demo.accounts', 'dbt', 'raw-sources/dbt/schema.yml', 'inherits_from', 0.95], + ['sl', 'orbit_demo.contracts', 'warehouse', 'contracts', 'models', 1], + ['sl', 'orbit_demo.invoices', 'warehouse', 'invoices', 'models', 1], + ['sl', 'orbit_demo.arr_movements', 'warehouse', 'arr_movements', 'models', 1], + ['sl', 'orbit_demo.purchase_requests', 'warehouse', 'purchase_requests', 'models', 1], + ['sl', 'orbit_demo.support_tickets', 'warehouse', 'support_tickets', 'models', 1], +].map(([artifactKind, artifactKey, sourceKind, sourcePath, relationship, confidence], index) => ({ + id: `link-${String(index + 1).padStart(3, '0')}`, + artifactKind, + artifactKey, + sourceKind, + sourcePath, + relationship, + confidence, +})); + +async function pathExists(path) { + try { + await access(path, fsConstants.F_OK); + return true; + } catch { + return false; + } +} + +async function assertReadable(path, label) { + if (!(await pathExists(path))) { + throw new Error( + `${label} not found at ${path}. Set KLO_DEMO_SOURCE_DIR to the Orbit demo source directory.`, + ); + } +} + +function parseCsvLine(line) { + const values = []; + let current = ''; + let quoted = false; + for (let index = 0; index < line.length; index += 1) { + const char = line[index]; + const next = line[index + 1]; + if (char === '"' && quoted && next === '"') { + current += '"'; + index += 1; + } else if (char === '"') { + quoted = !quoted; + } else if (char === ',' && !quoted) { + values.push(current); + current = ''; + } else { + current += char; + } + } + values.push(current); + return values; +} + +function parseCsv(raw) { + const lines = raw.trimEnd().split(/\r?\n/); + const headers = parseCsvLine(lines[0]); + const rows = lines.slice(1).map((line) => parseCsvLine(line)); + return { headers, rows }; +} + +function quoteIdentifier(value) { + return `"${value.replace(/"/g, '""')}"`; +} + +function inferColumnType(column) { + if (column.startsWith('is_')) { + return 'boolean'; + } + if (column.endsWith('_at') || column.endsWith('_date') || column === 'retired_at') { + return 'time'; + } + if (column.endsWith('_cents') || column.endsWith('_count')) { + return 'number'; + } + return 'string'; +} + +function renderKnowledgePage(page) { + const refs = page.refs.length > 0 ? ['refs:', ...page.refs.map((ref) => ` - ${ref}`)] : ['refs: []']; + const slRefs = page.slRefs.map((ref) => ` - ${ref}`).join('\n'); + return [ + '---', + `summary: ${page.summary}`, + 'tags:', + ...page.tags.map((tag) => ` - ${tag}`), + ...refs, + 'sl_refs:', + slRefs, + 'usage_mode: auto', + '---', + '', + page.body.join('\n\n'), + '', + ].join('\n'); +} + +function renderMeasure(measure) { + const lines = [` - name: ${measure.name}`, ` expr: ${JSON.stringify(measure.expr)}`]; + if (measure.filter) { + lines.push(` filter: ${JSON.stringify(measure.filter)}`); + } + return lines.join('\n'); +} + +async function renderSemanticLayerSource(table) { + const raw = await readFile(join(sourceRoot, 'database/seeds', `${table}.csv`), 'utf-8'); + const { headers } = parseCsv(raw); + const primaryKey = headers[0]; + const joins = + table === 'accounts' + ? [ + ' - to: contracts', + ' "on": "account_id = contracts.account_id"', + ' relationship: one_to_many', + ' - to: purchase_requests', + ' "on": "account_id = purchase_requests.account_id"', + ' relationship: one_to_many', + ] + : [' - to: accounts', ' "on": "account_id = accounts.account_id"', ' relationship: many_to_one']; + + return [ + `name: ${table}`, + `table: ${table}`, + `description: ${semanticLayerDescriptions[table]}`, + 'grain:', + ` - ${primaryKey}`, + 'columns:', + ...headers.flatMap((header) => [` - name: ${header}`, ` type: ${inferColumnType(header)}`]), + 'joins:', + ...joins, + 'measures:', + ...semanticLayerMeasures[table].map(renderMeasure), + 'segments:', + ' - name: external_only', + ' expr: "coalesce(is_internal, 0) = 0 AND coalesce(is_test, 0) = 0"', + '', + ].join('\n'); +} + +async function writeWarehouse(db, rowCounts) { + for (const table of warehouseTables) { + const sourceCsv = join(sourceRoot, 'database/seeds', `${table}.csv`); + const raw = await readFile(sourceCsv, 'utf-8'); + const { headers, rows } = parseCsv(raw); + const columnsSql = headers.map((header) => `${quoteIdentifier(header)} TEXT`).join(', '); + db.exec(`CREATE TABLE ${quoteIdentifier(table)} (${columnsSql});`); + const placeholders = headers.map(() => '?').join(', '); + const statement = db.prepare(`INSERT INTO ${quoteIdentifier(table)} VALUES (${placeholders})`); + const insertAll = db.transaction((records) => { + for (const record of records) { + statement.run(record); + } + }); + insertAll(rows); + rowCounts[table] = rows.length; + await copyFile(sourceCsv, join(assetDir, 'raw-sources/warehouse', `${table}.csv`)); + } +} + +async function copyCuratedSourceFiles() { + for (const [from, to] of copyFiles) { + const destination = join(assetDir, to); + await mkdir(dirname(destination), { recursive: true }); + await copyFile(join(sourceRoot, from), destination); + } +} + +async function writeJson(relativePath, value) { + const destination = join(assetDir, relativePath); + await mkdir(dirname(destination), { recursive: true }); + await writeFile(destination, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +async function writeText(relativePath, value) { + const destination = join(assetDir, relativePath); + await mkdir(dirname(destination), { recursive: true }); + await writeFile(destination, value, 'utf-8'); +} + +function buildActions() { + return [ + { + unitKey: 'revenue-and-contracts', + target: 'wiki', + action: 'created', + key: 'knowledge/global/arr-contract-first.md', + summary: 'ARR follows contract precedence with cancellation and discount caveats.', + rawFiles: ['contracts', 'arr_movements', 'raw-sources/notion/arr-and-contract-reporting-notes.md'], + status: 'success', + }, + { + unitKey: 'revenue-and-contracts', + target: 'wiki', + action: 'created', + key: 'knowledge/global/revenue-gross-to-net.md', + summary: 'Invoice, refund, and revenue dashboard evidence reconcile gross to net revenue.', + rawFiles: ['invoices', 'raw-sources/bi/revenue_exec.dashboard.lookml'], + status: 'success', + }, + { + unitKey: 'revenue-and-contracts', + target: 'wiki', + action: 'created', + key: 'knowledge/global/discount-expiration.md', + summary: 'Discount expiration is separated from organic contraction for retention reporting.', + rawFiles: ['contracts', 'arr_movements'], + status: 'success', + }, + { + unitKey: 'revenue-and-contracts', + target: 'sl', + action: 'created', + key: 'orbit_demo.contracts', + summary: 'Contract grain with active ARR measures and account joins.', + rawFiles: ['contracts', 'raw-sources/dbt/schema.yml'], + status: 'success', + }, + { + unitKey: 'revenue-and-contracts', + target: 'sl', + action: 'created', + key: 'orbit_demo.invoices', + summary: 'Invoice status measures tied to gross and net revenue reporting.', + rawFiles: ['invoices', 'raw-sources/bi/revenue_daily.view.lkml'], + status: 'success', + }, + { + unitKey: 'revenue-and-contracts', + target: 'sl', + action: 'created', + key: 'orbit_demo.arr_movements', + summary: 'ARR movement ledger for expansion, contraction, churn, and NRR.', + rawFiles: ['arr_movements', 'raw-sources/bi/account_retention.view.lkml'], + status: 'success', + }, + { + unitKey: 'retention-and-segments', + target: 'wiki', + action: 'created', + key: 'knowledge/global/nrr-retention.md', + summary: 'NRR uses parent-account rollups and quarterly ARR movement windows.', + rawFiles: ['accounts', 'arr_movements', 'raw-sources/notion/retention-and-nrr-definition-notes.md'], + status: 'success', + }, + { + unitKey: 'retention-and-segments', + target: 'wiki', + action: 'created', + key: 'knowledge/global/segment-classification.md', + summary: 'Segment labels come from plan mapping and sales-ops policy notes.', + rawFiles: ['accounts', 'plans', 'raw-sources/notion/sales-ops-segmentation-guide.md'], + status: 'success', + }, + { + unitKey: 'retention-and-segments', + target: 'sl', + action: 'created', + key: 'orbit_demo.accounts', + summary: 'Account dimensions with lifecycle, segment, and internal-test exclusions.', + rawFiles: ['accounts', 'plans'], + status: 'success', + }, + { + unitKey: 'procurement-and-activation', + target: 'wiki', + action: 'created', + key: 'knowledge/global/activation-policy.md', + summary: 'Activation policy changed on January 15, 2026 and is encoded for agents.', + rawFiles: ['purchase_requests', 'users', 'raw-sources/notion/activation-policy-decision-record.md'], + status: 'success', + }, + { + unitKey: 'procurement-and-activation', + target: 'wiki', + action: 'created', + key: 'knowledge/global/procurement-workflows.md', + summary: 'Procurement requester activity and approval events explain product usage.', + rawFiles: ['purchase_requests', 'raw-sources/bi/procurement_activity.view.lkml'], + status: 'success', + }, + { + unitKey: 'procurement-and-activation', + target: 'sl', + action: 'created', + key: 'orbit_demo.purchase_requests', + summary: 'Procurement request facts with requester and approval-state measures.', + rawFiles: ['purchase_requests'], + status: 'success', + }, + { + unitKey: 'support-and-health', + target: 'wiki', + action: 'created', + key: 'knowledge/global/customer-health-scoring.md', + summary: 'Customer health combines support severity, ARR exposure, and product usage.', + rawFiles: ['support_tickets', 'raw-sources/notion/customer-health-playbook.md'], + status: 'success', + }, + { + unitKey: 'support-and-health', + target: 'wiki', + action: 'created', + key: 'knowledge/global/support-escalation.md', + summary: 'Escalation tiers map ticket severity to SLA expectations.', + rawFiles: ['support_tickets', 'raw-sources/notion/support-escalation-runbook.md'], + status: 'success', + }, + { + unitKey: 'support-and-health', + target: 'sl', + action: 'created', + key: 'orbit_demo.support_tickets', + summary: 'Support ticket facts with severity, status, and resolution-hour measures.', + rawFiles: ['support_tickets'], + status: 'success', + }, + { + unitKey: 'governance-and-exclusions', + target: 'wiki', + action: 'created', + key: 'knowledge/global/internal-test-exclusion.md', + summary: 'Canonical metrics exclude internal and test accounts across source families.', + rawFiles: ['raw-sources/notion/analyst-onboarding.md'], + status: 'success', + }, + ]; +} + +function buildReplay(provenance, transcripts) { + return { + memoryFlowReplaySchemaVersion: 1, + replay: { + runId: 'demo-seeded-orbit', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'done', + sourceDir: null, + syncId: 'demo-seeded-sync', + reportId: 'demo-seeded-report', + reportPath: 'reports/seeded-demo-report.json', + errors: [], + metadata: { + schemaVersion: 1, + mode: 'seeded', + origin: 'packaged', + timing: 'prebuilt', + capturedAt: '2026-05-06T00:00:00.000Z', + sourceReportId: 'demo-seeded-report', + sourceReportPath: 'reports/seeded-demo-report.json', + fallbackReason: null, + }, + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'demo_seeded', fileCount: 8 }, + { type: 'source_acquired', adapter: 'dbt_descriptions', trigger: 'demo_seeded', fileCount: 6 }, + { type: 'source_acquired', adapter: 'looker', trigger: 'demo_seeded', fileCount: 7 }, + { type: 'source_acquired', adapter: 'notion', trigger: 'demo_seeded', fileCount: 8 }, + { type: 'scope_detected', fingerprint: 'sqlite:orbit-demo' }, + { type: 'raw_snapshot_written', syncId: 'demo-seeded-sync', rawFileCount: 29 }, + { type: 'diff_computed', added: 29, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 5, workUnitCount: 5, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'revenue-and-contracts', skills: ['knowledge_capture', 'sl_capture'], stepBudget: 40 }, + { + type: 'candidate_action', + unitKey: 'revenue-and-contracts', + target: 'wiki', + action: 'created', + key: 'knowledge/global/arr-contract-first.md', + }, + { + type: 'candidate_action', + unitKey: 'revenue-and-contracts', + target: 'wiki', + action: 'created', + key: 'knowledge/global/revenue-gross-to-net.md', + }, + { + type: 'candidate_action', + unitKey: 'revenue-and-contracts', + target: 'wiki', + action: 'created', + key: 'knowledge/global/discount-expiration.md', + }, + { + type: 'candidate_action', + unitKey: 'revenue-and-contracts', + target: 'sl', + action: 'created', + key: 'orbit_demo.contracts', + }, + { + type: 'candidate_action', + unitKey: 'revenue-and-contracts', + target: 'sl', + action: 'created', + key: 'orbit_demo.invoices', + }, + { + type: 'candidate_action', + unitKey: 'revenue-and-contracts', + target: 'sl', + action: 'created', + key: 'orbit_demo.arr_movements', + }, + { type: 'work_unit_finished', unitKey: 'revenue-and-contracts', status: 'success' }, + { type: 'work_unit_started', unitKey: 'retention-and-segments', skills: ['knowledge_capture', 'sl_capture'], stepBudget: 40 }, + { + type: 'candidate_action', + unitKey: 'retention-and-segments', + target: 'wiki', + action: 'created', + key: 'knowledge/global/nrr-retention.md', + }, + { + type: 'candidate_action', + unitKey: 'retention-and-segments', + target: 'wiki', + action: 'created', + key: 'knowledge/global/segment-classification.md', + }, + { + type: 'candidate_action', + unitKey: 'retention-and-segments', + target: 'sl', + action: 'created', + key: 'orbit_demo.accounts', + }, + { type: 'work_unit_finished', unitKey: 'retention-and-segments', status: 'success' }, + { + type: 'work_unit_started', + unitKey: 'procurement-and-activation', + skills: ['knowledge_capture', 'sl_capture'], + stepBudget: 40, + }, + { + type: 'candidate_action', + unitKey: 'procurement-and-activation', + target: 'wiki', + action: 'created', + key: 'knowledge/global/activation-policy.md', + }, + { + type: 'candidate_action', + unitKey: 'procurement-and-activation', + target: 'wiki', + action: 'created', + key: 'knowledge/global/procurement-workflows.md', + }, + { + type: 'candidate_action', + unitKey: 'procurement-and-activation', + target: 'sl', + action: 'created', + key: 'orbit_demo.purchase_requests', + }, + { type: 'work_unit_finished', unitKey: 'procurement-and-activation', status: 'success' }, + { type: 'work_unit_started', unitKey: 'support-and-health', skills: ['knowledge_capture', 'sl_capture'], stepBudget: 40 }, + { + type: 'candidate_action', + unitKey: 'support-and-health', + target: 'wiki', + action: 'created', + key: 'knowledge/global/customer-health-scoring.md', + }, + { + type: 'candidate_action', + unitKey: 'support-and-health', + target: 'wiki', + action: 'created', + key: 'knowledge/global/support-escalation.md', + }, + { + type: 'candidate_action', + unitKey: 'support-and-health', + target: 'sl', + action: 'created', + key: 'orbit_demo.support_tickets', + }, + { type: 'work_unit_finished', unitKey: 'support-and-health', status: 'success' }, + { type: 'work_unit_started', unitKey: 'governance-and-exclusions', skills: ['knowledge_capture'], stepBudget: 40 }, + { + type: 'candidate_action', + unitKey: 'governance-and-exclusions', + target: 'wiki', + action: 'created', + key: 'knowledge/global/internal-test-exclusion.md', + }, + { type: 'work_unit_finished', unitKey: 'governance-and-exclusions', status: 'success' }, + { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, + { type: 'saved', commitSha: 'demo-seeded', wikiCount: 10, slCount: 6 }, + { type: 'provenance_recorded', rowCount: provenance.length }, + { type: 'report_created', runId: 'demo-seeded-orbit', reportPath: 'reports/seeded-demo-report.json' }, + ], + plannedWorkUnits: [ + { + unitKey: 'revenue-and-contracts', + rawFiles: ['contracts', 'invoices', 'arr_movements'], + peerFileCount: 3, + dependencyCount: 3, + }, + { + unitKey: 'retention-and-segments', + rawFiles: ['accounts', 'plans'], + peerFileCount: 2, + dependencyCount: 2, + }, + { + unitKey: 'procurement-and-activation', + rawFiles: ['purchase_requests', 'users'], + peerFileCount: 2, + dependencyCount: 2, + }, + { unitKey: 'support-and-health', rawFiles: ['support_tickets'], peerFileCount: 1, dependencyCount: 1 }, + { + unitKey: 'governance-and-exclusions', + rawFiles: ['notion/export/pages/analyst-onboarding.md'], + peerFileCount: 1, + dependencyCount: 0, + }, + ], + details: { + actions: buildActions(), + provenance, + transcripts, + }, + }, + }; +} + +async function writeGeneratedContext(rowCounts) { + for (const page of knowledgePages) { + await writeText(join('knowledge/global', page.file), renderKnowledgePage(page)); + } + + for (const table of semanticLayerTables) { + await writeText(join('semantic-layer/orbit_demo', `${table}.yaml`), await renderSemanticLayerSource(table)); + } + + const provenance = provenanceLinks.map((link) => ({ + rawPath: link.sourcePath, + artifactKind: link.artifactKind, + artifactKey: link.artifactKey, + actionType: link.artifactKind === 'sl' ? 'sl_written' : 'wiki_written', + })); + const transcripts = [ + 'revenue-and-contracts', + 'retention-and-segments', + 'procurement-and-activation', + 'support-and-health', + 'governance-and-exclusions', + ].map((unitKey) => ({ + unitKey, + path: `transcripts/${unitKey}.jsonl`, + toolCallCount: unitKey === 'governance-and-exclusions' ? 2 : 5, + errorCount: 0, + toolNames: unitKey === 'governance-and-exclusions' ? ['wiki_write'] : ['wiki_write', 'sl_write_source'], + })); + + await writeJson('links/provenance.json', provenanceLinks); + await writeJson('reports/seeded-demo-report.json', { + id: 'demo-seeded-report', + runId: 'demo-seeded-orbit', + connectionId: 'orbit_demo', + mode: 'seeded', + status: 'complete', + createdAt: '2026-05-06T00:00:00.000Z', + summary: { + sources: { + warehouse: { tables: 8, rows: Object.values(rowCounts).reduce((sum, count) => sum + count, 0) }, + dbt: { models: 3, sources: 8 }, + bi: { explores: 5, dashboards: 2, views: 5 }, + notion: { pages: 8 }, + }, + generated: { + semanticLayerSources: 6, + knowledgePages: 10, + provenanceLinks: provenanceLinks.length, + }, + metadata: { + mode: 'seeded', + origin: 'packaged', + llmCalls: 0, + timing: 'prebuilt', + source: packagedDemoSource, + }, + }, + }); + await writeJson('manifest.json', { + demoAssetSchemaVersion: 2, + name: 'orbit', + displayName: 'Orbit Demo', + mode: 'seeded', + sqliteDatabase: 'demo.db', + replay: 'replay.memory-flow.v1.json', + report: 'reports/seeded-demo-report.json', + source: packagedDemoSource, + sources: { + warehouse: { label: 'Warehouse', path: 'demo.db', tables: 8, rowCounts }, + dbt: { label: 'dbt', path: 'raw-sources/dbt', models: 3, sourceTables: 8 }, + bi: { label: 'BI', path: 'raw-sources/bi', explores: 5, dashboards: 2 }, + notion: { label: 'Notion', path: 'raw-sources/notion', pages: 8 }, + }, + generated: { + semanticLayer: { path: 'semantic-layer/orbit_demo', sourceCount: 6 }, + knowledge: { path: 'knowledge/global', pageCount: 10 }, + links: { path: 'links', linkCount: provenanceLinks.length }, + }, + }); + await writeJson('replay.memory-flow.v1.json', buildReplay(provenance, transcripts)); +} + +await assertReadable(join(sourceRoot, 'database/seeds/accounts.csv'), `${packagedDemoSource} seed data`); +await assertReadable(join(sourceRoot, `${exampleDbtProjectDir}/models/schema.yml`), `${packagedDemoSource} dbt schema`); +await assertReadable(join(sourceRoot, 'views/revenue_daily.view.lkml'), `${packagedDemoSource} LookML views`); +await assertReadable( + join(sourceRoot, 'notion/export/pages/revenue-reporting-policy.md'), + `${packagedDemoSource} Notion export`, +); + +await rm(assetDir, { recursive: true, force: true }); +for (const relativeDir of [ + 'raw-sources/warehouse', + 'raw-sources/dbt/models/marts', + 'raw-sources/bi', + 'raw-sources/notion', + 'semantic-layer/orbit_demo', + 'knowledge/global', + 'links', + 'reports', +]) { + await mkdir(join(assetDir, relativeDir), { recursive: true }); +} + +const rowCounts = {}; +await rm(dbPath, { force: true }); +const db = new Database(dbPath); +try { + await writeWarehouse(db, rowCounts); +} finally { + db.close(); +} +await copyCuratedSourceFiles(); +await writeGeneratedContext(rowCounts); + +const dbStat = await stat(dbPath); +if (dbStat.size >= 10 * 1024 * 1024) { + throw new Error(`Seeded demo SQLite bundle is too large: ${dbStat.size} bytes`); +} diff --git a/packages/cli/src/agent-runtime.test.ts b/packages/cli/src/agent-runtime.test.ts new file mode 100644 index 00000000..c376edb7 --- /dev/null +++ b/packages/cli/src/agent-runtime.test.ts @@ -0,0 +1,108 @@ +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + KLO_AGENT_MAX_ROWS_CAP, + createKloAgentRuntime, + parseAgentMaxRows, + readAgentJsonFile, + writeAgentJson, + writeAgentJsonError, +} from './agent-runtime.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { write: (chunk: string) => (stdout += chunk) }, + stderr: { write: (chunk: string) => (stderr += chunk) }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('agent runtime helpers', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-agent-runtime-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('writes JSON success and error envelopes without color or spinners', () => { + const successIo = makeIo(); + const errorIo = makeIo(); + + writeAgentJson(successIo.io, { ok: true }); + writeAgentJsonError(errorIo.io, 'missing source', { code: 'NOT_FOUND' }); + + expect(JSON.parse(successIo.stdout())).toEqual({ ok: true }); + expect(successIo.stderr()).toBe(''); + expect(JSON.parse(errorIo.stderr())).toEqual({ + ok: false, + error: { message: 'missing source', code: 'NOT_FOUND' }, + }); + expect(errorIo.stdout()).toBe(''); + }); + + it('reads JSON query files as objects', async () => { + const path = join(tempDir, 'query.json'); + await writeFile(path, '{"measures":["revenue"],"limit":50}', 'utf-8'); + + await expect(readAgentJsonFile(path)).resolves.toEqual({ measures: ['revenue'], limit: 50 }); + }); + + it('rejects non-object JSON query files', async () => { + const path = join(tempDir, 'query.json'); + await writeFile(path, '["revenue"]', 'utf-8'); + + await expect(readAgentJsonFile(path)).rejects.toThrow('must contain a JSON object'); + }); + + it('requires positive row limits and enforces the agent cap', () => { + expect(parseAgentMaxRows(100)).toBe(100); + expect(() => parseAgentMaxRows(undefined)).toThrow('maxRows is required'); + expect(() => parseAgentMaxRows(0)).toThrow('positive integer'); + expect(() => parseAgentMaxRows(KLO_AGENT_MAX_ROWS_CAP + 1)).toThrow(String(KLO_AGENT_MAX_ROWS_CAP)); + }); + + it('constructs local context ports with semantic compute and query executor', async () => { + const project = { + projectDir: tempDir, + configPath: join(tempDir, 'klo.yaml'), + config: { project: 'revenue', connections: {} }, + coreConfig: {}, + git: {}, + fileStore: {}, + } as never; + const ports = { knowledge: {}, semanticLayer: {} } as never; + const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; + const queryExecutor = { execute: vi.fn() }; + const loadProject = vi.fn(async () => project); + const createContextTools = vi.fn(() => ports); + + await expect( + createKloAgentRuntime( + { projectDir: tempDir, enableSemanticCompute: true, enableQueryExecution: true }, + { + loadProject, + createContextTools, + createSemanticLayerCompute: () => semanticLayerCompute, + createQueryExecutor: () => queryExecutor, + }, + ), + ).resolves.toMatchObject({ project, ports, queryExecutor }); + + expect(loadProject).toHaveBeenCalledWith({ projectDir: tempDir }); + expect(createContextTools).toHaveBeenCalledWith(project, { + semanticLayerCompute, + queryExecutor, + }); + }); +}); diff --git a/packages/cli/src/agent-runtime.ts b/packages/cli/src/agent-runtime.ts new file mode 100644 index 00000000..a9cc5cce --- /dev/null +++ b/packages/cli/src/agent-runtime.ts @@ -0,0 +1,81 @@ +import { readFile } from 'node:fs/promises'; +import { createDefaultLocalQueryExecutor, type KloSqlQueryExecutorPort } from '@klo/context/connections'; +import { createPythonSemanticLayerComputePort, type KloSemanticLayerComputePort } from '@klo/context/daemon'; +import { createLocalProjectMcpContextPorts, type KloMcpContextPorts } from '@klo/context/mcp'; +import { type KloLocalProject, loadKloProject } from '@klo/context/project'; +import type { KloCliIo } from './cli-runtime.js'; + +export const KLO_AGENT_MAX_ROWS_CAP = 1000; + +export interface KloAgentRuntimeOptions { + projectDir: string; + enableSemanticCompute: boolean; + enableQueryExecution: boolean; +} + +export interface KloAgentRuntime { + project: KloLocalProject; + ports: KloMcpContextPorts; + semanticLayerCompute?: KloSemanticLayerComputePort; + queryExecutor?: KloSqlQueryExecutorPort; +} + +export interface KloAgentRuntimeDeps { + loadProject?: typeof loadKloProject; + createContextTools?: typeof createLocalProjectMcpContextPorts; + createSemanticLayerCompute?: () => KloSemanticLayerComputePort; + createQueryExecutor?: () => KloSqlQueryExecutorPort; +} + +export function writeAgentJson(io: KloCliIo, value: unknown): void { + io.stdout.write(`${JSON.stringify(value, null, 2)}\n`); +} + +export function writeAgentJsonError( + io: KloCliIo, + message: string, + detail: Record = {}, +): void { + io.stderr.write(`${JSON.stringify({ ok: false, error: { message, ...detail } }, null, 2)}\n`); +} + +export async function readAgentJsonFile(path: string): Promise> { + const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error(`${path} must contain a JSON object.`); + } + return parsed as Record; +} + +export function parseAgentMaxRows(value: number | undefined): number { + if (!Number.isInteger(value) || value === undefined || value <= 0) { + throw new Error('maxRows is required and must be a positive integer.'); + } + if (value > KLO_AGENT_MAX_ROWS_CAP) { + throw new Error(`maxRows must be less than or equal to ${KLO_AGENT_MAX_ROWS_CAP}.`); + } + return value; +} + +export async function createKloAgentRuntime( + options: KloAgentRuntimeOptions, + deps: KloAgentRuntimeDeps = {}, +): Promise { + const project = await (deps.loadProject ?? loadKloProject)({ projectDir: options.projectDir }); + const semanticLayerCompute = options.enableSemanticCompute + ? (deps.createSemanticLayerCompute ?? createPythonSemanticLayerComputePort)() + : undefined; + const queryExecutor = options.enableQueryExecution + ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() + : undefined; + const ports = (deps.createContextTools ?? createLocalProjectMcpContextPorts)(project, { + ...(semanticLayerCompute ? { semanticLayerCompute } : {}), + ...(queryExecutor ? { queryExecutor } : {}), + }); + return { + project, + ports, + ...(semanticLayerCompute ? { semanticLayerCompute } : {}), + ...(queryExecutor ? { queryExecutor } : {}), + }; +} diff --git a/packages/cli/src/agent-search-readiness.test.ts b/packages/cli/src/agent-search-readiness.test.ts new file mode 100644 index 00000000..e8699892 --- /dev/null +++ b/packages/cli/src/agent-search-readiness.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it } from 'vitest'; +import { + isMissingProjectConfigError, + missingConnectionSlSearchReadiness, + missingProjectSlSearchReadiness, + noConnectionsSlSearchReadiness, + noIndexedSourcesSlSearchReadiness, +} from './agent-search-readiness.js'; + +describe('agent semantic-layer search readiness guidance', () => { + it('formats missing project guidance with exact recovery commands', () => { + expect(missingProjectSlSearchReadiness('/tmp/klo-search', 'gross revenue')).toEqual({ + code: 'agent_sl_search_missing_project', + message: 'Semantic-layer search needs an initialized KLO project at /tmp/klo-search.', + nextSteps: [ + 'klo demo', + 'klo setup --project-dir /tmp/klo-search', + 'klo ingest ', + 'klo agent sl list --json --query "gross revenue" --project-dir /tmp/klo-search', + ], + }); + }); + + it('formats no-connection and no-index guidance without hiding the project path', () => { + expect(noConnectionsSlSearchReadiness('/tmp/klo-search', 'revenue')).toMatchObject({ + code: 'agent_sl_search_no_connections', + message: 'Semantic-layer search found no configured connections in /tmp/klo-search.', + }); + expect(noIndexedSourcesSlSearchReadiness('/tmp/klo-search', 'orders')).toMatchObject({ + code: 'agent_sl_search_no_indexed_sources', + message: 'Semantic-layer search found no indexed semantic-layer sources in /tmp/klo-search.', + }); + }); + + it('formats unknown connection guidance', () => { + expect(missingConnectionSlSearchReadiness('/tmp/klo-search', 'warehouse', 'revenue')).toMatchObject({ + code: 'agent_sl_search_unknown_connection', + message: 'Semantic-layer search connection "warehouse" is not configured in /tmp/klo-search.', + }); + }); + + it('detects missing klo.yaml read errors', () => { + const error = Object.assign(new Error('ENOENT: no such file or directory'), { + code: 'ENOENT', + path: '/tmp/klo-search/klo.yaml', + }); + + expect(isMissingProjectConfigError(error)).toBe(true); + expect(isMissingProjectConfigError(new Error('other'))).toBe(false); + }); +}); diff --git a/packages/cli/src/agent-search-readiness.ts b/packages/cli/src/agent-search-readiness.ts new file mode 100644 index 00000000..2e019b73 --- /dev/null +++ b/packages/cli/src/agent-search-readiness.ts @@ -0,0 +1,94 @@ +export type KloAgentSlSearchReadinessCode = + | 'agent_sl_search_missing_project' + | 'agent_sl_search_no_connections' + | 'agent_sl_search_unknown_connection' + | 'agent_sl_search_no_indexed_sources'; + +export interface KloAgentSlSearchReadinessDetail { + code: KloAgentSlSearchReadinessCode; + message: string; + nextSteps: string[]; +} + +function queryForCommand(query: string | undefined): string { + const trimmed = query?.trim(); + return trimmed && trimmed.length > 0 ? trimmed : 'revenue'; +} + +function projectSearchCommand(projectDir: string, query: string | undefined): string { + return `klo agent sl list --json --query ${JSON.stringify(queryForCommand(query))} --project-dir ${projectDir}`; +} + +function baseNextSteps(projectDir: string, query: string | undefined): string[] { + return [ + 'klo demo', + `klo setup --project-dir ${projectDir}`, + 'klo ingest ', + projectSearchCommand(projectDir, query), + ]; +} + +export function missingProjectSlSearchReadiness( + projectDir: string, + query: string | undefined, +): KloAgentSlSearchReadinessDetail { + return { + code: 'agent_sl_search_missing_project', + message: `Semantic-layer search needs an initialized KLO project at ${projectDir}.`, + nextSteps: baseNextSteps(projectDir, query), + }; +} + +export function noConnectionsSlSearchReadiness( + projectDir: string, + query: string | undefined, +): KloAgentSlSearchReadinessDetail { + return { + code: 'agent_sl_search_no_connections', + message: `Semantic-layer search found no configured connections in ${projectDir}.`, + nextSteps: baseNextSteps(projectDir, query), + }; +} + +export function missingConnectionSlSearchReadiness( + projectDir: string, + connectionId: string, + query: string | undefined, +): KloAgentSlSearchReadinessDetail { + return { + code: 'agent_sl_search_unknown_connection', + message: `Semantic-layer search connection "${connectionId}" is not configured in ${projectDir}.`, + nextSteps: baseNextSteps(projectDir, query), + }; +} + +export function noIndexedSourcesSlSearchReadiness( + projectDir: string, + query: string | undefined, +): KloAgentSlSearchReadinessDetail { + return { + code: 'agent_sl_search_no_indexed_sources', + message: `Semantic-layer search found no indexed semantic-layer sources in ${projectDir}.`, + nextSteps: baseNextSteps(projectDir, query), + }; +} + +function errorCode(error: unknown): string | undefined { + if (typeof error !== 'object' || error === null || !('code' in error)) { + return undefined; + } + const code = (error as { code?: unknown }).code; + return typeof code === 'string' ? code : undefined; +} + +function errorPath(error: unknown): string | undefined { + if (typeof error !== 'object' || error === null || !('path' in error)) { + return undefined; + } + const path = (error as { path?: unknown }).path; + return typeof path === 'string' ? path : undefined; +} + +export function isMissingProjectConfigError(error: unknown): boolean { + return errorCode(error) === 'ENOENT' && (errorPath(error)?.endsWith('klo.yaml') ?? false); +} diff --git a/packages/cli/src/agent.test.ts b/packages/cli/src/agent.test.ts new file mode 100644 index 00000000..981a5f4c --- /dev/null +++ b/packages/cli/src/agent.test.ts @@ -0,0 +1,393 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { buildDefaultKloProjectConfig } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runKloAgent } from './agent.js'; +import type { KloAgentRuntime } from './agent-runtime.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { write: (chunk: string) => (stdout += chunk) }, + stderr: { write: (chunk: string) => (stderr += chunk) }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function runtime(overrides: Record = {}): KloAgentRuntime { + const config = buildDefaultKloProjectConfig('revenue'); + return { + project: { + projectDir: '/tmp/revenue', + configPath: '/tmp/revenue/klo.yaml', + config: { + ...config, + connections: { + warehouse: { driver: 'sqlite', path: 'warehouse.sqlite', readonly: true as const }, + }, + }, + coreConfig: {} as KloAgentRuntime['project']['coreConfig'], + git: {} as KloAgentRuntime['project']['git'], + fileStore: {} as KloAgentRuntime['project']['fileStore'], + }, + ports: { + connections: { list: vi.fn(async () => [{ id: 'warehouse', name: 'warehouse', connectionType: 'sqlite' }]) }, + semanticLayer: { + listSources: vi.fn(async () => ({ + sources: [ + { + connectionId: 'warehouse', + connectionName: 'warehouse', + name: 'orders', + columnCount: 2, + measureCount: 1, + joinCount: 0, + }, + ], + totalSources: 1, + })), + readSource: vi.fn(async () => ({ sourceName: 'orders', yaml: 'name: orders\n' })), + writeSource: vi.fn(async () => ({ success: true, sourceName: 'orders' })), + validate: vi.fn(async () => ({ success: true, errors: [], warnings: [] })), + query: vi.fn(async () => ({ sql: 'select 1', headers: ['x'], rows: [[1]], totalRows: 1, plan: {} })), + }, + knowledge: { + search: vi.fn(async () => ({ + results: [ + { + key: 'page-1', + path: 'knowledge/global/page-1.md', + scope: 'GLOBAL' as const, + summary: 'Revenue logic', + score: 0.9, + matchReasons: ['lexical' as const], + }, + ], + totalFound: 1, + })), + read: vi.fn(async () => ({ + key: 'page-1', + scope: 'GLOBAL' as const, + summary: 'Revenue logic', + content: 'Use net revenue.', + })), + write: vi.fn(async () => ({ success: true, key: 'page-1', action: 'created' as const })), + }, + }, + queryExecutor: { + execute: vi.fn(async () => ({ headers: ['x'], rows: [[1]], totalRows: 1, command: 'SELECT', rowCount: 1 })), + }, + ...overrides, + }; +} + +function runtimeWithoutConnections(): KloAgentRuntime { + const base = runtime(); + return { + ...base, + project: { + ...base.project, + config: { + ...base.project.config, + connections: {}, + }, + }, + ports: { + ...base.ports, + semanticLayer: { + ...base.ports.semanticLayer!, + listSources: vi.fn(async () => ({ sources: [], totalSources: 0 })), + }, + }, + }; +} + +describe('runKloAgent', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-agent-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('prints tool discovery with every stable command', async () => { + const io = makeIo(); + + await expect(runKloAgent({ command: 'tools', projectDir: tempDir, json: true }, io.io)).resolves.toBe(0); + + const body = JSON.parse(io.stdout()); + expect(body.projectDir).toBe(tempDir); + expect(body.tools.map((tool: { name: string }) => tool.name)).toEqual([ + 'context', + 'sl.list', + 'sl.read', + 'sl.query', + 'wiki.search', + 'wiki.read', + 'sql.execute', + ]); + expect(io.stderr()).toBe(''); + }); + + it('prints project context from setup status, connections, and SL summaries', async () => { + const io = makeIo(); + const createRuntime = vi.fn(async () => runtime()); + const readSetupStatus = vi.fn(async () => ({ project: { path: tempDir, ready: true }, agents: [] })); + + await expect( + runKloAgent({ command: 'context', projectDir: tempDir, json: true }, io.io, { createRuntime, readSetupStatus }), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + projectDir: tempDir, + status: { project: { ready: true } }, + connections: [{ id: 'warehouse' }], + semanticLayer: { totalSources: 1 }, + }); + }); + + it('dispatches SL list, SL read, wiki search, and wiki read through local ports', async () => { + for (const args of [ + { command: 'sl-list' as const, projectDir: tempDir, json: true as const, connectionId: 'warehouse' }, + { + command: 'sl-read' as const, + projectDir: tempDir, + json: true as const, + connectionId: 'warehouse', + sourceName: 'orders', + }, + { command: 'wiki-search' as const, projectDir: tempDir, json: true as const, query: 'revenue', limit: 10 }, + { command: 'wiki-read' as const, projectDir: tempDir, json: true as const, pageId: 'page-1' }, + ]) { + const io = makeIo(); + await expect(runKloAgent(args, io.io, { createRuntime: async () => runtime() })).resolves.toBe(0); + expect(JSON.parse(io.stdout())).toBeTruthy(); + expect(io.stderr()).toBe(''); + } + }); + + it('prints wiki hybrid search metadata from the hidden agent wiki search command', async () => { + const fakeRuntime = runtime(); + const knowledge = fakeRuntime.ports.knowledge; + if (!knowledge) { + throw new Error('Expected runtime knowledge port'); + } + fakeRuntime.ports.knowledge = { + ...knowledge, + search: vi.fn(async () => ({ + results: [ + { + key: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + scope: 'GLOBAL' as const, + summary: 'Revenue metric definition', + score: 0.02459016393442623, + matchReasons: ['lexical' as const, 'token' as const], + }, + ], + totalFound: 1, + })), + }; + const io = makeIo(); + + await expect( + runKloAgent({ command: 'wiki-search', projectDir: tempDir, json: true, query: 'paid order', limit: 5 }, io.io, { + createRuntime: async () => fakeRuntime, + }), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toEqual({ + results: [ + expect.objectContaining({ + key: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + matchReasons: ['lexical', 'token'], + }), + ], + totalFound: 1, + }); + }); + + it('executes SL queries from a JSON query file', async () => { + const queryFile = join(tempDir, 'sl-query.json'); + const io = makeIo(); + await writeFile(queryFile, '{"measures":["total_revenue"],"dimensions":[]}', 'utf-8'); + + await expect( + runKloAgent( + { + command: 'sl-query', + projectDir: tempDir, + json: true, + connectionId: 'warehouse', + queryFile, + execute: true, + maxRows: 100, + }, + io.io, + { createRuntime: async () => runtime() }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ sql: 'select 1', rows: [[1]] }); + }); + + it('executes read-only SQL from a SQL file with an explicit row limit', async () => { + const sqlFile = join(tempDir, 'query.sql'); + const fakeRuntime = runtime(); + const io = makeIo(); + await writeFile(sqlFile, 'select 1', 'utf-8'); + + await expect( + runKloAgent( + { + command: 'sql-execute', + projectDir: tempDir, + json: true, + connectionId: 'warehouse', + sqlFile, + maxRows: 100, + }, + io.io, + { createRuntime: async () => fakeRuntime as never }, + ), + ).resolves.toBe(0); + + expect(fakeRuntime.queryExecutor?.execute).toHaveBeenCalledWith({ + connectionId: 'warehouse', + projectDir: '/tmp/revenue', + connection: { driver: 'sqlite', path: 'warehouse.sqlite', readonly: true }, + sql: 'select 1', + maxRows: 100, + }); + }); + + it('prints guided JSON when semantic-layer search runs outside a project', async () => { + const io = makeIo(); + const missingProjectError = Object.assign(new Error('ENOENT: no such file or directory'), { + code: 'ENOENT', + path: join(tempDir, 'klo.yaml'), + }); + + await expect( + runKloAgent( + { command: 'sl-list', projectDir: tempDir, json: true, query: 'gross revenue' }, + io.io, + { createRuntime: vi.fn(async () => Promise.reject(missingProjectError)) }, + ), + ).resolves.toBe(1); + + expect(JSON.parse(io.stderr())).toEqual({ + ok: false, + error: { + code: 'agent_sl_search_missing_project', + message: `Semantic-layer search needs an initialized KLO project at ${tempDir}.`, + nextSteps: [ + 'klo demo', + `klo setup --project-dir ${tempDir}`, + 'klo ingest ', + `klo agent sl list --json --query "gross revenue" --project-dir ${tempDir}`, + ], + }, + }); + expect(io.stdout()).toBe(''); + }); + + it('prints guided JSON when semantic-layer search has no configured connections', async () => { + const io = makeIo(); + + await expect( + runKloAgent( + { command: 'sl-list', projectDir: tempDir, json: true, query: 'revenue' }, + io.io, + { createRuntime: async () => runtimeWithoutConnections() }, + ), + ).resolves.toBe(1); + + expect(JSON.parse(io.stderr())).toMatchObject({ + ok: false, + error: { + code: 'agent_sl_search_no_connections', + message: `Semantic-layer search found no configured connections in ${tempDir}.`, + nextSteps: [ + 'klo demo', + `klo setup --project-dir ${tempDir}`, + 'klo ingest ', + `klo agent sl list --json --query "revenue" --project-dir ${tempDir}`, + ], + }, + }); + }); + + it('prints guided JSON when semantic-layer search asks for an unknown connection', async () => { + const io = makeIo(); + + await expect( + runKloAgent( + { command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'missing', query: 'revenue' }, + io.io, + { createRuntime: async () => runtime() }, + ), + ).resolves.toBe(1); + + expect(JSON.parse(io.stderr())).toMatchObject({ + ok: false, + error: { + code: 'agent_sl_search_unknown_connection', + message: `Semantic-layer search connection "missing" is not configured in ${tempDir}.`, + }, + }); + }); + + it('prints guided JSON when semantic-layer search has no indexed sources', async () => { + const fakeRuntime = runtime(); + const semanticLayer = fakeRuntime.ports.semanticLayer!; + fakeRuntime.ports.semanticLayer = { + ...semanticLayer, + listSources: vi.fn(async () => ({ sources: [], totalSources: 0 })), + }; + const io = makeIo(); + + await expect( + runKloAgent( + { command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'warehouse', query: 'revenue' }, + io.io, + { createRuntime: async () => fakeRuntime }, + ), + ).resolves.toBe(1); + + expect(JSON.parse(io.stderr())).toMatchObject({ + ok: false, + error: { + code: 'agent_sl_search_no_indexed_sources', + message: `Semantic-layer search found no indexed semantic-layer sources in ${tempDir}.`, + }, + }); + }); + + it('returns JSON errors when required ports or records are missing', async () => { + const io = makeIo(); + + await expect( + runKloAgent({ command: 'wiki-read', projectDir: tempDir, json: true, pageId: 'missing' }, io.io, { + createRuntime: async () => + runtime({ + ports: { knowledge: { read: vi.fn(async () => null) } }, + }) as never, + }), + ).resolves.toBe(1); + + expect(JSON.parse(io.stderr())).toMatchObject({ + ok: false, + error: { message: expect.stringContaining('missing') }, + }); + }); +}); diff --git a/packages/cli/src/agent.ts b/packages/cli/src/agent.ts new file mode 100644 index 00000000..072e7901 --- /dev/null +++ b/packages/cli/src/agent.ts @@ -0,0 +1,214 @@ +import { readFile } from 'node:fs/promises'; +import type { KloCliIo } from './cli-runtime.js'; +import { + createKloAgentRuntime, + parseAgentMaxRows, + readAgentJsonFile, + writeAgentJson, + writeAgentJsonError, + type KloAgentRuntime, + type KloAgentRuntimeDeps, +} from './agent-runtime.js'; +import { + isMissingProjectConfigError, + missingConnectionSlSearchReadiness, + missingProjectSlSearchReadiness, + noConnectionsSlSearchReadiness, + noIndexedSourcesSlSearchReadiness, + type KloAgentSlSearchReadinessDetail, +} from './agent-search-readiness.js'; +import { readKloSetupStatus, type KloSetupStatus } from './setup.js'; + +export type KloAgentArgs = + | { command: 'tools'; projectDir: string; json: true } + | { command: 'context'; projectDir: string; json: true } + | { command: 'sl-list'; projectDir: string; json: true; connectionId?: string; query?: string } + | { command: 'sl-read'; projectDir: string; json: true; connectionId?: string; sourceName: string } + | { + command: 'sl-query'; + projectDir: string; + json: true; + connectionId: string; + queryFile: string; + execute: boolean; + maxRows?: number; + } + | { command: 'wiki-search'; projectDir: string; json: true; query: string; limit: number } + | { command: 'wiki-read'; projectDir: string; json: true; pageId: string } + | { command: 'sql-execute'; projectDir: string; json: true; connectionId: string; sqlFile: string; maxRows?: number }; + +export interface KloAgentDeps extends KloAgentRuntimeDeps { + createRuntime?: (options: { + projectDir: string; + enableSemanticCompute: boolean; + enableQueryExecution: boolean; + }) => Promise; + readSetupStatus?: ( + projectDir: string, + ) => Promise; +} + +const AGENT_TOOLS = [ + { name: 'context', command: 'klo agent context --json' }, + { name: 'sl.list', command: 'klo agent sl list --json [--connection-id ] [--query ]' }, + { name: 'sl.read', command: 'klo agent sl read --json [--connection-id ]' }, + { + name: 'sl.query', + command: 'klo agent sl query --json --connection-id --query-file --execute --max-rows 100', + }, + { name: 'wiki.search', command: 'klo agent wiki search --json [--limit 10]' }, + { name: 'wiki.read', command: 'klo agent wiki read --json' }, + { + name: 'sql.execute', + command: 'klo agent sql execute --json --connection-id --sql-file --max-rows 100', + }, +] as const; + +function writeAgentSlSearchReadinessError(io: KloCliIo, detail: KloAgentSlSearchReadinessDetail): void { + writeAgentJsonError(io, detail.message, { code: detail.code, nextSteps: detail.nextSteps }); +} + +async function runtimeFor(args: KloAgentArgs, deps: KloAgentDeps): Promise { + const needsSemanticCompute = args.command === 'sl-query'; + const needsQueryExecution = args.command === 'sql-execute' || (args.command === 'sl-query' && args.execute); + return deps.createRuntime + ? deps.createRuntime({ + projectDir: args.projectDir, + enableSemanticCompute: needsSemanticCompute, + enableQueryExecution: needsQueryExecution, + }) + : createKloAgentRuntime( + { + projectDir: args.projectDir, + enableSemanticCompute: needsSemanticCompute, + enableQueryExecution: needsQueryExecution, + }, + deps, + ); +} + +function connectionIdForSource(runtime: KloAgentRuntime, requested: string | undefined): string { + if (requested) return requested; + const ids = Object.keys(runtime.project.config.connections ?? {}); + if (ids.length === 1) return ids[0] as string; + throw new Error('Use --connection-id when the project has zero or multiple connections.'); +} + +export async function runKloAgent(args: KloAgentArgs, io: KloCliIo, deps: KloAgentDeps = {}): Promise { + try { + if (args.command === 'tools') { + writeAgentJson(io, { projectDir: args.projectDir, tools: AGENT_TOOLS }); + return 0; + } + + const runtime = await runtimeFor(args, deps); + + if (args.command === 'context') { + const [status, connections, semanticLayer] = await Promise.all([ + (deps.readSetupStatus ?? readKloSetupStatus)(args.projectDir), + runtime.ports.connections?.list() ?? [], + runtime.ports.semanticLayer?.listSources({}) ?? { sources: [], totalSources: 0 }, + ]); + writeAgentJson(io, { projectDir: args.projectDir, status, connections, semanticLayer, tools: AGENT_TOOLS }); + return 0; + } + + if (args.command === 'sl-list') { + const semanticLayer = runtime.ports.semanticLayer; + if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.'); + if (args.query) { + const connectionIds = Object.keys(runtime.project.config.connections ?? {}); + if (args.connectionId && !runtime.project.config.connections[args.connectionId]) { + writeAgentSlSearchReadinessError( + io, + missingConnectionSlSearchReadiness(args.projectDir, args.connectionId, args.query), + ); + return 1; + } + if (connectionIds.length === 0) { + writeAgentSlSearchReadinessError(io, noConnectionsSlSearchReadiness(args.projectDir, args.query)); + return 1; + } + } + + const listed = await semanticLayer.listSources({ connectionId: args.connectionId, query: args.query }); + if (args.query && listed.sources.length === 0) { + const allSources = await semanticLayer.listSources({ connectionId: args.connectionId }); + if (allSources.totalSources === 0) { + writeAgentSlSearchReadinessError(io, noIndexedSourcesSlSearchReadiness(args.projectDir, args.query)); + return 1; + } + } + + writeAgentJson(io, listed); + return 0; + } + + if (args.command === 'sl-read') { + const semanticLayer = runtime.ports.semanticLayer; + if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.'); + const source = await semanticLayer.readSource({ + connectionId: connectionIdForSource(runtime, args.connectionId), + sourceName: args.sourceName, + }); + if (!source) throw new Error(`Semantic-layer source "${args.sourceName}" was not found.`); + writeAgentJson(io, source); + return 0; + } + + if (args.command === 'sl-query') { + const semanticLayer = runtime.ports.semanticLayer; + if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.'); + const query = await readAgentJsonFile(args.queryFile); + const maxRows = args.execute ? parseAgentMaxRows(args.maxRows) : args.maxRows; + writeAgentJson( + io, + await semanticLayer.query({ + connectionId: args.connectionId, + query: { ...query, ...(maxRows !== undefined ? { limit: maxRows } : {}) } as never, + }), + ); + return 0; + } + + if (args.command === 'wiki-search') { + const knowledge = runtime.ports.knowledge; + if (!knowledge) throw new Error('Wiki tools are not available for this project.'); + writeAgentJson(io, await knowledge.search({ userId: 'agent', query: args.query, limit: args.limit })); + return 0; + } + + if (args.command === 'wiki-read') { + const knowledge = runtime.ports.knowledge; + if (!knowledge) throw new Error('Wiki tools are not available for this project.'); + const page = await knowledge.read({ userId: 'agent', key: args.pageId }); + if (!page) throw new Error(`Wiki page "${args.pageId}" was not found.`); + writeAgentJson(io, page); + return 0; + } + + const queryExecutor = runtime.queryExecutor; + if (!queryExecutor) throw new Error('SQL execution is not available for this project.'); + const connection = runtime.project.config.connections[args.connectionId]; + if (!connection) throw new Error(`Connection "${args.connectionId}" was not found.`); + const maxRows = parseAgentMaxRows(args.maxRows); + writeAgentJson( + io, + await queryExecutor.execute({ + connectionId: args.connectionId, + projectDir: runtime.project.projectDir, + connection, + sql: await readFile(args.sqlFile, 'utf-8'), + maxRows, + }), + ); + return 0; + } catch (error) { + if (args.command === 'sl-list' && args.query && isMissingProjectConfigError(error)) { + writeAgentSlSearchReadinessError(io, missingProjectSlSearchReadiness(args.projectDir, args.query)); + return 1; + } + writeAgentJsonError(io, error instanceof Error ? error.message : String(error)); + return 1; + } +} diff --git a/packages/cli/src/bin.ts b/packages/cli/src/bin.ts new file mode 100644 index 00000000..11a4dbd1 --- /dev/null +++ b/packages/cli/src/bin.ts @@ -0,0 +1,9 @@ +#!/usr/bin/env node + +import { installStartupProfileReporter, profileMark, profileSpan } from './startup-profile.js'; + +installStartupProfileReporter(); +profileMark('bin:entry'); +const { runKloCli } = await profileSpan('import ./cli-runtime.js', () => import('./cli-runtime.js')); +profileMark('bin:runKloCli'); +process.exitCode = await runKloCli(process.argv.slice(2)); diff --git a/packages/cli/src/clack.ts b/packages/cli/src/clack.ts new file mode 100644 index 00000000..668ccee6 --- /dev/null +++ b/packages/cli/src/clack.ts @@ -0,0 +1,11 @@ +import { spinner } from '@clack/prompts'; + +export interface KloCliSpinner { + start(message: string): void; + stop(message: string): void; + error(message: string): void; +} + +export function createClackSpinner(): KloCliSpinner { + return spinner(); +} diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts new file mode 100644 index 00000000..74ae4f16 --- /dev/null +++ b/packages/cli/src/cli-program.ts @@ -0,0 +1,268 @@ +import { Command, InvalidArgumentError } from '@commander-js/extra-typings'; +import type { KloCliDeps, KloCliIo, KloCliPackageInfo } from './cli-runtime.js'; +import { registerAgentCommands } from './commands/agent-commands.js'; +import { registerConnectionCommands } from './commands/connection-commands.js'; +import { registerWikiCommands } from './commands/knowledge-commands.js'; +import { registerPublicIngestCommands } from './commands/public-ingest-commands.js'; +import { registerServeCommands } from './commands/serve-commands.js'; +import { registerSetupCommands } from './commands/setup-commands.js'; +import { registerSlCommands } from './commands/sl-commands.js'; +import { registerStatusCommands } from './commands/status-commands.js'; +import { registerDevCommands } from './dev.js'; +import { findNearestKloProjectDir, resolveKloProjectDir } from './project-resolver.js'; +import { profileMark, profileSpan } from './startup-profile.js'; + +profileMark('module:cli-program'); + +export interface KloCliCommandContext { + io: KloCliIo; + deps: KloCliDeps; + setExitCode: (code: number) => void; + runInit: (args: { projectDir: string; projectName?: string; force: boolean }, io: KloCliIo) => Promise; + writeDebug?: (command: string, commandContext: CommandWithGlobalOptions) => void; +} + +export interface OutputModeOptions { + plain?: boolean; + json?: boolean; + viz?: boolean; + input?: boolean; +} + +interface KloCommanderProgramOptions { + runInit: (args: { projectDir: string; projectName?: string; force: boolean }, io: KloCliIo) => Promise; +} + +type CommanderExitLike = { exitCode: number; code: string; message: string }; + +interface KloGlobalOptionValues { + projectDir?: string; + debug?: boolean; +} + +export interface CommandWithGlobalOptions { + opts: () => object; + optsWithGlobals?: () => object; +} + +function isCommanderExit(error: unknown): error is CommanderExitLike { + return ( + typeof error === 'object' && + error !== null && + 'exitCode' in error && + typeof (error as { exitCode: unknown }).exitCode === 'number' && + 'code' in error && + typeof (error as { code: unknown }).code === 'string' + ); +} + +export function collectOption(value: string, previous: string[] = []): string[] { + return [...previous, value]; +} + +export function parsePositiveIntegerOption(value: string): number { + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed < 1) { + throw new InvalidArgumentError('must be a positive integer'); + } + return parsed; +} + +export function parseNonNegativeIntegerOption(value: string): number { + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed < 0) { + throw new InvalidArgumentError('must be a non-negative integer'); + } + return parsed; +} + +export function parseBooleanStringOption(value: string): boolean { + if (value === 'true') { + return true; + } + if (value === 'false') { + return false; + } + throw new InvalidArgumentError('must be true or false'); +} + +export function parseSafeConnectionIdOption(value: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(value)) { + throw new InvalidArgumentError(`Unsafe connection id: ${value}`); + } + return value; +} + +export function parseNonEmptyAssignmentOption(value: string): { key: string; value: string } { + const separatorIndex = value.indexOf('='); + if (separatorIndex <= 0 || separatorIndex === value.length - 1) { + throw new InvalidArgumentError('must be a non-empty = assignment'); + } + return { + key: value.slice(0, separatorIndex), + value: value.slice(separatorIndex + 1), + }; +} + +function optionsWithGlobals(command: CommandWithGlobalOptions): KloGlobalOptionValues { + const options = command.optsWithGlobals ? command.optsWithGlobals() : command.opts(); + const values = options as { projectDir?: unknown; debug?: unknown }; + return { + projectDir: typeof values.projectDir === 'string' ? values.projectDir : undefined, + debug: typeof values.debug === 'boolean' ? values.debug : undefined, + }; +} + +export function resolveCommandProjectDir(command: CommandWithGlobalOptions): string { + return resolveKloProjectDir({ explicitProjectDir: optionsWithGlobals(command).projectDir }); +} + +export function resolveCommandProjectDirOverride(command: CommandWithGlobalOptions): string | undefined { + return optionsWithGlobals(command).projectDir ?? process.env.KLO_PROJECT_DIR; +} + +function createBaseProgram(info: KloCliPackageInfo, io: KloCliIo): Command { + return new Command() + .name('klo') + .description('Standalone KLO developer CLI') + .option('--project-dir ', 'KLO project directory (default: KLO_PROJECT_DIR, nearest klo.yaml, or cwd)') + .option('--debug', 'Enable diagnostic logging to stderr') + .version(`${info.name} ${info.version}`, '-v, --version', 'Show CLI version') + .helpOption('-h, --help', 'Show this help text') + .configureHelp({ showGlobalOptions: true }) + .addHelpText( + 'after', + '\nAdvanced:\n klo dev Low-level diagnostics, scans, adapter commands, and mapping tools.\n', + ) + .showHelpAfterError() + .exitOverride() + .configureOutput({ + writeOut: (chunk) => io.stdout.write(chunk), + writeErr: (chunk) => io.stderr.write(chunk), + outputError: (chunk, write) => write(chunk), + }); +} + +function writeDebug(io: KloCliIo, commandContext: CommandWithGlobalOptions, command: string): void { + const global = optionsWithGlobals(commandContext); + if (global.debug !== true) { + return; + } + io.stderr.write(`[debug] projectDir=${resolveCommandProjectDir(commandContext)}\n`); + io.stderr.write(`[debug] dispatch=${command}\n`); +} + +function formatCliError(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +async function runBareInteractiveCommand( + program: Command, + io: KloCliIo, + context: KloCliCommandContext, +): Promise { + const nearestProjectDir = findNearestKloProjectDir(process.cwd()); + const envProjectDir = process.env.KLO_PROJECT_DIR; + const runner = context.deps.setup ?? (await import('./setup.js')).runKloSetup; + + if (!nearestProjectDir && !envProjectDir) { + return await runner( + { + command: 'run', + projectDir: resolveKloProjectDir(), + mode: 'auto', + agents: false, + agentScope: 'project', + agentInstallMode: 'cli', + skipAgents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: false, + skipSources: false, + }, + io, + ); + } + + program.outputHelp(); + return 0; +} + +export async function runCommanderKloCli( + argv: string[], + io: KloCliIo, + deps: KloCliDeps, + info: KloCliPackageInfo, + options: KloCommanderProgramOptions, +): Promise { + profileMark('commander:entry'); + let exitCode = 0; + const program = createBaseProgram(info, io); + profileMark('commander:base-program'); + const context: KloCliCommandContext = { + io, + deps, + setExitCode: (code: number) => { + exitCode = code; + }, + runInit: options.runInit, + writeDebug: (command: string, commandContext: CommandWithGlobalOptions) => { + writeDebug(io, commandContext, command); + }, + }; + + registerSetupCommands(program, context); + profileMark('commander:register-setup'); + + registerConnectionCommands(program, context); + profileMark('commander:register-connection'); + + registerPublicIngestCommands(program, context); + profileMark('commander:register-public-ingest'); + + registerWikiCommands(program, context); + profileMark('commander:register-wiki'); + + registerSlCommands(program, context); + profileMark('commander:register-sl'); + + registerServeCommands(program, context); + profileMark('commander:register-serve'); + + registerStatusCommands(program, context); + profileMark('commander:register-status'); + + registerAgentCommands(program, context); + profileMark('commander:register-agent'); + + registerDevCommands(program, context); + profileMark('commander:register-dev'); + + if (argv.length === 0) { + if (io.stdout.isTTY === true) { + try { + return await runBareInteractiveCommand(program, io, context); + } catch (error) { + io.stderr.write(`${formatCliError(error)}\n`); + return 1; + } + } + program.outputHelp(); + return 0; + } + + try { + await profileSpan('commander:parseAsync', () => program.parseAsync(argv, { from: 'user' })); + } catch (error) { + if (isCommanderExit(error)) { + return error.exitCode === 0 ? 0 : 1; + } + io.stderr.write(`${formatCliError(error)}\n`); + return 1; + } + + return exitCode; +} diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts new file mode 100644 index 00000000..993fcf31 --- /dev/null +++ b/packages/cli/src/cli-runtime.ts @@ -0,0 +1,89 @@ +import type { KloConnectionMetabaseSetupArgs } from './commands/connection-metabase-setup.js'; +import type { KloConnectionNotionArgs } from './commands/connection-notion.js'; +import type { KloAgentArgs } from './agent.js'; +import type { KloConnectionArgs } from './connection.js'; +import type { KloDemoArgs } from './demo.js'; +import type { KloDoctorArgs } from './doctor.js'; +import type { KloIngestArgs } from './ingest.js'; +import type { KloKnowledgeArgs } from './knowledge.js'; +import type { KloPublicIngestArgs } from './public-ingest.js'; +import type { KloScanArgs } from './scan.js'; +import type { KloServeArgs } from './serve.js'; +import type { KloSetupArgs } from './setup.js'; +import type { KloSlArgs } from './sl.js'; +import { profileMark, profileSpan } from './startup-profile.js'; + +profileMark('module:cli-runtime'); + +export interface KloCliPackageInfo { + name: '@klo/cli'; + version: '0.0.0-private'; + contextPackageName: '@klo/context'; +} + +export interface KloCliIo { + stdout: { isTTY?: boolean; write(chunk: string): void }; + stderr: { write(chunk: string): void }; +} + +export interface KloCliDeps { + serveStdio?: (args: KloServeArgs) => Promise; + setup?: (args: KloSetupArgs, io: KloCliIo) => Promise; + agent?: (args: KloAgentArgs, io: KloCliIo) => Promise; + connection?: (args: KloConnectionArgs, io: KloCliIo) => Promise; + connectionNotion?: (args: KloConnectionNotionArgs, io: KloCliIo) => Promise; + connectionMetabaseSetup?: (args: KloConnectionMetabaseSetupArgs, io: KloCliIo) => Promise; + demo?: (args: KloDemoArgs, io: KloCliIo) => Promise; + doctor?: (args: KloDoctorArgs, io: KloCliIo) => Promise; + ingest?: (args: KloIngestArgs, io: KloCliIo) => Promise; + publicIngest?: (args: KloPublicIngestArgs, io: KloCliIo) => Promise; + scan?: (args: KloScanArgs, io: KloCliIo) => Promise; + knowledge?: (args: KloKnowledgeArgs, io: KloCliIo) => Promise; + sl?: (args: KloSlArgs, io: KloCliIo) => Promise; +} + +export function getKloCliPackageInfo(): KloCliPackageInfo { + return { + name: '@klo/cli', + version: '0.0.0-private', + contextPackageName: '@klo/context', + }; +} + +async function runInit( + args: { projectDir: string; projectName?: string; force: boolean }, + io: KloCliIo, +): Promise { + const { initKloProject } = await import('@klo/context/project'); + const result = await initKloProject({ + projectDir: args.projectDir, + projectName: args.projectName, + force: args.force, + }); + + io.stdout.write(`Initialized KLO project at ${result.projectDir}\n`); + io.stdout.write(`Config: ${result.configPath}\n`); + io.stdout.write(`Commit: ${result.commitHash ?? 'none'}\n`); + return 0; +} + +export async function runInitForCommander( + args: { projectDir: string; projectName?: string; force: boolean }, + io: KloCliIo, +): Promise { + return await runInit(args, io); +} + +export async function runKloCli( + argv = process.argv.slice(2), + io: KloCliIo = process, + deps: KloCliDeps = {}, +): Promise { + const info = getKloCliPackageInfo(); + profileMark('runtime:runKloCli'); + const { runCommanderKloCli } = await profileSpan('import ./cli-program.js', () => import('./cli-program.js')); + + return await runCommanderKloCli(argv, io, deps, info, { + runInit: runInitForCommander, + }); +} diff --git a/packages/cli/src/command-schemas.ts b/packages/cli/src/command-schemas.ts new file mode 100644 index 00000000..0e251b96 --- /dev/null +++ b/packages/cli/src/command-schemas.ts @@ -0,0 +1,85 @@ +import { z } from 'zod'; + +const projectDirSchema = z.string().min(1); +const safeConnectionIdSchema = z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/, 'Unsafe connection id'); +const stringArraySchema = z.array(z.string()); + +export const connectionAddCommandSchema = z.object({ + command: z.literal('add'), + projectDir: projectDirSchema, + driver: z.string().min(1), + connectionId: safeConnectionIdSchema, + url: z.string().optional(), + schemas: stringArraySchema, + readonly: z.boolean(), + force: z.boolean(), + allowLiteralCredentials: z.boolean(), + notion: z + .object({ + authTokenRef: z.string().min(1), + crawlMode: z.enum(['all_accessible', 'selected_roots']), + rootPageIds: stringArraySchema, + rootDatabaseIds: stringArraySchema, + rootDataSourceIds: stringArraySchema, + maxPagesPerRun: z.number().int().positive().optional(), + maxKnowledgeCreatesPerRun: z.number().int().nonnegative().optional(), + maxKnowledgeUpdatesPerRun: z.number().int().nonnegative().optional(), + }) + .optional(), +}); + +export const wikiWriteCommandSchema = z.object({ + command: z.literal('write'), + projectDir: projectDirSchema, + key: z.string().min(1), + scope: z.enum(['GLOBAL', 'USER']), + userId: z.string().min(1), + summary: z.string().min(1), + content: z.string().min(1), + tags: stringArraySchema, + refs: stringArraySchema, + slRefs: stringArraySchema, +}); + +const orderBySchema = z.union([ + z.string().min(1), + z.object({ + field: z.string().min(1), + direction: z.enum(['asc', 'desc']).optional(), + }), +]); + +export const slQueryCommandSchema = z.object({ + command: z.literal('query'), + projectDir: projectDirSchema, + connectionId: z.string().min(1).optional(), + query: z.object({ + measures: z.array(z.string().min(1)).min(1), + dimensions: stringArraySchema, + filters: stringArraySchema.optional(), + segments: stringArraySchema.optional(), + order_by: z.array(orderBySchema).optional(), + limit: z.number().int().positive().optional(), + include_empty: z.literal(true).optional(), + }), + format: z.enum(['json', 'sql']), + execute: z.boolean(), + maxRows: z.number().int().positive().optional(), +}); + +export const publicIngestRunCommandSchema = z.object({ + command: z.literal('run'), + projectDir: projectDirSchema, + targetConnectionId: safeConnectionIdSchema.optional(), + all: z.boolean(), + json: z.boolean(), + inputMode: z.enum(['auto', 'disabled']), +}); + +export const publicIngestReadCommandSchema = z.object({ + command: z.enum(['status', 'watch']), + projectDir: projectDirSchema, + runId: z.string().min(1).optional(), + json: z.boolean(), + inputMode: z.enum(['auto', 'disabled']), +}); diff --git a/packages/cli/src/commands/agent-commands.ts b/packages/cli/src/commands/agent-commands.ts new file mode 100644 index 00000000..7f47da50 --- /dev/null +++ b/packages/cli/src/commands/agent-commands.ts @@ -0,0 +1,137 @@ +import { Option, type Command } from '@commander-js/extra-typings'; +import type { KloAgentArgs } from '../agent.js'; +import type { KloCliCommandContext } from '../cli-program.js'; +import { parsePositiveIntegerOption, resolveCommandProjectDir } from '../cli-program.js'; + +async function runAgent(context: KloCliCommandContext, args: KloAgentArgs): Promise { + const runner = context.deps.agent ?? (await import('../agent.js')).runKloAgent; + context.setExitCode(await runner(args, context.io)); +} + +function jsonOption(): Option { + return new Option('--json', 'Print JSON output').makeOptionMandatory(); +} + +export function registerAgentCommands(program: Command, context: KloCliCommandContext): void { + const agent = program + .command('agent', { hidden: true }) + .description('Machine-readable KLO commands for coding agents') + .showHelpAfterError(); + + agent.hook('preAction', (_thisCommand, actionCommand) => { + context.writeDebug?.('agent', actionCommand); + }); + + agent + .command('tools') + .description('Print available agent-facing KLO tools') + .addOption(jsonOption()) + .action(async (_options, command) => { + await runAgent(context, { command: 'tools', projectDir: resolveCommandProjectDir(command), json: true }); + }); + + agent + .command('context') + .description('Print project context for agent planning') + .addOption(jsonOption()) + .action(async (_options, command) => { + await runAgent(context, { command: 'context', projectDir: resolveCommandProjectDir(command), json: true }); + }); + + const sl = agent.command('sl').description('Semantic-layer agent commands'); + sl.command('list') + .description('List semantic-layer sources') + .addOption(jsonOption()) + .option('--connection-id ', 'Filter by connection id') + .option('--query ', 'Search source names and descriptions') + .action(async (options: { connectionId?: string; query?: string }, command) => { + await runAgent(context, { + command: 'sl-list', + projectDir: resolveCommandProjectDir(command), + json: true, + ...(options.connectionId ? { connectionId: options.connectionId } : {}), + ...(options.query ? { query: options.query } : {}), + }); + }); + sl.command('read') + .description('Read one semantic-layer source') + .argument('') + .addOption(jsonOption()) + .option('--connection-id ', 'Connection id containing the source') + .action(async (sourceName: string, options: { connectionId?: string }, command) => { + await runAgent(context, { + command: 'sl-read', + projectDir: resolveCommandProjectDir(command), + json: true, + sourceName, + ...(options.connectionId ? { connectionId: options.connectionId } : {}), + }); + }); + sl.command('query') + .description('Run a semantic-layer query JSON file') + .addOption(jsonOption()) + .requiredOption('--connection-id ', 'Connection id for execution') + .requiredOption('--query-file ', 'JSON semantic-layer query file') + .option('--execute', 'Execute the compiled query against the connection', false) + .option('--max-rows ', 'Maximum rows to return when executing', parsePositiveIntegerOption) + .action( + async ( + options: { connectionId: string; queryFile: string; execute: boolean; maxRows?: number }, + command, + ) => { + await runAgent(context, { + command: 'sl-query', + projectDir: resolveCommandProjectDir(command), + json: true, + connectionId: options.connectionId, + queryFile: options.queryFile, + execute: options.execute, + ...(options.maxRows !== undefined ? { maxRows: options.maxRows } : {}), + }); + }, + ); + + const wiki = agent.command('wiki').description('KLO wiki agent commands'); + wiki + .command('search') + .description('Search KLO wiki pages') + .argument('') + .addOption(jsonOption()) + .option('--limit ', 'Maximum search results', parsePositiveIntegerOption, 10) + .action(async (query: string, options: { limit: number }, command) => { + await runAgent(context, { + command: 'wiki-search', + projectDir: resolveCommandProjectDir(command), + json: true, + query, + limit: options.limit, + }); + }); + wiki + .command('read') + .description('Read one KLO wiki page') + .argument('') + .addOption(jsonOption()) + .action(async (pageId: string, _options, command) => { + await runAgent(context, { command: 'wiki-read', projectDir: resolveCommandProjectDir(command), json: true, pageId }); + }); + + const sql = agent.command('sql').description('Safe SQL execution commands'); + sql + .command('execute') + .description('Execute read-only SQL with a row limit') + .addOption(jsonOption()) + .requiredOption('--connection-id ', 'Connection id for execution') + .requiredOption('--sql-file ', 'SQL file to execute') + .requiredOption('--max-rows ', 'Maximum rows to return', parsePositiveIntegerOption) + .action(async (options: { connectionId: string; sqlFile: string; maxRows: number }, command) => { + await runAgent(context, { + command: 'sql-execute', + projectDir: resolveCommandProjectDir(command), + json: true, + connectionId: options.connectionId, + sqlFile: options.sqlFile, + maxRows: options.maxRows, + }); + }); +} diff --git a/packages/cli/src/commands/completion-commands.ts b/packages/cli/src/commands/completion-commands.ts new file mode 100644 index 00000000..234ed0e7 --- /dev/null +++ b/packages/cli/src/commands/completion-commands.ts @@ -0,0 +1,47 @@ +import type { CommandUnknownOpts } from '@commander-js/extra-typings'; +import type { KloCliCommandContext } from '../cli-program.js'; +import { completeCommanderInput, installZshCompletion, zshCompletionScript } from '../completion.js'; + +export function registerCompletionCommands( + program: CommandUnknownOpts, + context: KloCliCommandContext, + completionRoot: CommandUnknownOpts = program, +): void { + program + .command('completion') + .description('Generate shell completion scripts') + .command('zsh') + .description('Generate zsh completion script') + .option('--install', 'Install zsh completion into ~/.zfunc and update ~/.zshrc', false) + .action(async (options: { install?: boolean }) => { + if (options.install === true) { + const result = await installZshCompletion(); + context.io.stdout.write(`Installed zsh completion: ${result.completionPath}\n`); + context.io.stdout.write(`Updated zsh config: ${result.zshrcPath}\n`); + context.io.stdout.write('Restart your shell or run: source ~/.zshrc\n'); + context.setExitCode(0); + return; + } + context.io.stdout.write(zshCompletionScript()); + context.setExitCode(0); + }); + + program + .command('__complete', { hidden: true }) + .description('Internal shell completion endpoint') + .requiredOption('--shell ', 'Shell requesting completions') + .requiredOption('--position ', 'Current shell word position', (value) => Number(value)) + .argument('[words...]', 'Current shell words') + .allowUnknownOption() + .allowExcessArguments() + .action((words: string[], options: { shell: string; position: number }) => { + if (options.shell !== 'zsh') { + context.setExitCode(1); + return; + } + for (const completion of completeCommanderInput(completionRoot, { position: options.position, words })) { + context.io.stdout.write(`${completion}\n`); + } + context.setExitCode(0); + }); +} diff --git a/packages/cli/src/commands/connection-commands.ts b/packages/cli/src/commands/connection-commands.ts new file mode 100644 index 00000000..f1330118 --- /dev/null +++ b/packages/cli/src/commands/connection-commands.ts @@ -0,0 +1,346 @@ +import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings'; +import { + collectOption, + type KloCliCommandContext, + parseBooleanStringOption, + parseNonEmptyAssignmentOption, + parseNonNegativeIntegerOption, + parsePositiveIntegerOption, + parseSafeConnectionIdOption, + resolveCommandProjectDir, +} from '../cli-program.js'; +import { connectionAddCommandSchema } from '../command-schemas.js'; +import type { KloConnectionArgs } from '../connection.js'; +import { profileMark } from '../startup-profile.js'; +import type { KloConnectionMappingArgs } from './connection-mapping.js'; +import { registerConnectionMetabaseCommands } from './connection-metabase-commands.js'; +import { registerConnectionNotionCommands } from './connection-notion-commands.js'; + +profileMark('module:commands/connection-commands'); + +const CRAWL_MODE_CHOICES = ['all_accessible', 'selected_roots'] as const; +const SYNC_MODE_CHOICES = ['ALL', 'ONLY', 'EXCEPT'] as const; + +function parseCsvIds(value: string): number[] { + return value + .split(',') + .filter(Boolean) + .map((item) => parsePositiveIntegerOption(item)); +} + +function parseCsvStrings(value: string): string[] { + return value + .split(',') + .map((item) => item.trim()) + .filter(Boolean); +} + +function parseMappingFieldOption(value: string): 'databaseMappings' | 'connectionMappings' { + if (value === 'databaseMappings' || value === 'connectionMappings') { + return value; + } + throw new InvalidArgumentError('must be databaseMappings or connectionMappings'); +} + +async function runConnectionArgs(context: KloCliCommandContext, args: KloConnectionArgs): Promise { + const runner = context.deps.connection ?? (await import('../connection.js')).runKloConnection; + context.setExitCode(await runner(args, context.io)); +} + +async function runMappingArgs(context: KloCliCommandContext, args: KloConnectionMappingArgs): Promise { + const { runKloConnectionMapping } = await import('./connection-mapping.js'); + context.setExitCode(await runKloConnectionMapping(args, context.io)); +} + +export function registerConnectionCommands(program: Command, context: KloCliCommandContext, commandName = 'connection'): void { + const connection = program + .command(commandName) + .description('Add, list, test, and map data sources') + .showHelpAfterError() + .addHelpText( + 'after', + '\nProject directory defaults to KLO_PROJECT_DIR when set, otherwise the nearest klo.yaml or current working directory.\n', + ); + connection.hook('preAction', (_thisCommand, actionCommand) => { + context.writeDebug?.(commandName, actionCommand); + }); + + connection + .command('list') + .description('List configured connections') + .action(async (_options: unknown, command) => { + await runConnectionArgs(context, { command: 'list', projectDir: resolveCommandProjectDir(command) }); + }); + + connection + .command('test') + .description('Test a configured connection') + .argument('', 'KLO connection id') + .action(async (connectionId: string, _options: unknown, command) => { + await runConnectionArgs(context, { + command: 'test', + projectDir: resolveCommandProjectDir(command), + connectionId, + }); + }); + + connection + .command('add') + .description('Add or replace a configured connection') + .argument('', 'Connection driver') + .argument('', 'KLO connection id') + .option('--url ', 'Connection URL, env:NAME, or file:/path reference') + .option('--schema ', 'Schema to include; repeatable', collectOption, []) + .option('--readonly', 'Mark the connection as read-only', false) + .option('--force', 'Replace an existing connection', false) + .option('--allow-literal-credentials', 'Allow writing a literal credential URL to klo.yaml', false) + .addOption(new Option('--token-env ', 'Environment variable containing Notion auth token').conflicts('tokenFile')) + .addOption(new Option('--token-file ', 'File containing Notion auth token').conflicts('tokenEnv')) + .addOption( + new Option('--crawl-mode ', 'Notion crawl mode: all_accessible or selected_roots') + .choices(CRAWL_MODE_CHOICES) + .default('selected_roots'), + ) + .option('--root-page-id ', 'Root page to crawl; repeatable', collectOption, []) + .option('--root-database-id ', 'Root database to crawl; repeatable', collectOption, []) + .option('--root-data-source-id ', 'Root data source to crawl; repeatable', collectOption, []) + .option('--max-pages ', 'Maximum pages per run', parsePositiveIntegerOption) + .option('--max-knowledge-creates ', 'Maximum knowledge creates per run', parseNonNegativeIntegerOption) + .option('--max-knowledge-updates ', 'Maximum knowledge updates per run', parseNonNegativeIntegerOption) + .action(async (driver: string, connectionId: string, options, command) => { + const notion = + driver === 'notion' + ? { + authTokenRef: options.tokenEnv + ? `env:${options.tokenEnv}` + : options.tokenFile + ? `file:${options.tokenFile}` + : '', + crawlMode: options.crawlMode, + rootPageIds: options.rootPageId, + rootDatabaseIds: options.rootDatabaseId, + rootDataSourceIds: options.rootDataSourceId, + maxPagesPerRun: options.maxPages, + maxKnowledgeCreatesPerRun: options.maxKnowledgeCreates, + maxKnowledgeUpdatesPerRun: options.maxKnowledgeUpdates, + } + : undefined; + + if (driver === 'notion' && !notion?.authTokenRef) { + throw new Error('connection add notion requires --token-env NAME or --token-file PATH'); + } + if ( + driver === 'notion' && + notion?.crawlMode === 'selected_roots' && + notion.rootPageIds.length + notion.rootDatabaseIds.length + notion.rootDataSourceIds.length === 0 + ) { + throw new Error('connection add notion selected_roots requires at least one root id'); + } + + const args = connectionAddCommandSchema.parse({ + command: 'add', + projectDir: resolveCommandProjectDir(command), + driver, + connectionId, + url: options.url, + schemas: options.schema.filter(Boolean), + readonly: options.readonly === true, + force: options.force === true, + allowLiteralCredentials: options.allowLiteralCredentials === true, + notion, + }); + + await runConnectionArgs(context, args); + }); + + connection + .command('remove') + .description('Remove a configured connection from klo.yaml') + .argument('', 'KLO connection id') + .option('--force', 'Remove without prompting', false) + .option('--no-input', 'Disable interactive terminal input') + .action(async (connectionId: string, options: { force?: boolean; input?: boolean }, command) => { + await runConnectionArgs(context, { + command: 'remove', + projectDir: resolveCommandProjectDir(command), + connectionId, + force: options.force === true, + ...(options.input === false ? { inputMode: 'disabled' } : {}), + }); + }); + + connection + .command('map') + .description('Refresh and validate BI-to-warehouse mappings') + .argument('', 'Source BI connection id') + .option('--json', 'Print JSON output', false) + .action(async (sourceConnectionId: string, options: { json?: boolean }, command) => { + await runConnectionArgs(context, { + command: 'map', + projectDir: resolveCommandProjectDir(command), + sourceConnectionId, + json: options.json === true, + }); + }); + + registerConnectionMappingCommands(connection, context); + registerConnectionMetabaseCommands(connection, context); + registerConnectionNotionCommands(connection, context); +} + +export function registerConnectionMappingCommands(connection: Command, context: KloCliCommandContext): void { + const mapping = connection + .command('mapping') + .description('Manage Metabase warehouse mappings') + .showHelpAfterError() + .addHelpText( + 'after', + '\nProject directory defaults to KLO_PROJECT_DIR when set, otherwise the current working directory.\n', + ); + + mapping + .command('list') + .description('List Metabase database mappings') + .argument('', 'Metabase connection id') + .option('--json', 'Print JSON output where supported', false) + .action(async (connectionId: string, options: { json?: boolean }, command) => { + await runMappingArgs(context, { + command: 'list', + projectDir: resolveCommandProjectDir(command), + connectionId, + json: options.json === true, + }); + }); + + mapping + .command('set') + .description('Set a Metabase or Looker warehouse mapping') + .argument('', 'Source connection id', parseSafeConnectionIdOption) + .argument('', 'Mapping field', parseMappingFieldOption) + .argument('', 'Mapping assignment such as 1=prod-warehouse', parseNonEmptyAssignmentOption) + .action( + async ( + connectionId: string, + field: 'databaseMappings' | 'connectionMappings', + assignment: { key: string; value: string }, + _options: unknown, + command, + ) => { + await runMappingArgs(context, { + command: 'set', + projectDir: resolveCommandProjectDir(command), + connectionId, + field, + key: assignment.key, + value: assignment.value, + }); + }, + ); + + mapping + .command('apply-bulk') + .description('Apply mappings from JSON') + .argument('', 'Metabase connection id') + .requiredOption('--file ', 'JSON mapping file') + .action(async (connectionId: string, options: { file: string }, command) => { + await runMappingArgs(context, { + command: 'apply-bulk', + projectDir: resolveCommandProjectDir(command), + connectionId, + filePath: options.file, + }); + }); + + mapping + .command('set-sync-enabled') + .description('Enable or disable sync for one Metabase database') + .argument('', 'Metabase connection id') + .argument('', 'Metabase database id', parsePositiveIntegerOption) + .requiredOption('--enabled ', 'true or false', parseBooleanStringOption) + .action( + async (connectionId: string, metabaseDatabaseId: number, options: { enabled: boolean }, command) => { + await runMappingArgs(context, { + command: 'set-sync-enabled', + projectDir: resolveCommandProjectDir(command), + connectionId, + metabaseDatabaseId, + enabled: options.enabled, + }); + }, + ); + + const syncState = mapping.command('sync-state').description('Manage Metabase sync-state selection'); + syncState + .command('get') + .description('Read sync-state selection') + .argument('', 'Metabase connection id') + .option('--json', 'Print JSON output where supported', false) + .action(async (connectionId: string, options: { json?: boolean }, command) => { + await runMappingArgs(context, { + command: 'sync-state-get', + projectDir: resolveCommandProjectDir(command), + connectionId, + json: options.json === true, + }); + }); + + syncState + .command('set') + .description('Write sync-state selection') + .argument('', 'Metabase connection id') + .addOption(new Option('--mode ', 'ALL, ONLY, or EXCEPT').choices(SYNC_MODE_CHOICES).makeOptionMandatory()) + .option('--collections ', 'Comma-separated collection ids', parseCsvIds, []) + .option('--items ', 'Comma-separated item ids', parseCsvIds, []) + .option('--tag-names ', 'Comma-separated tag names', parseCsvStrings, []) + .action(async (connectionId: string, options, command) => { + await runMappingArgs(context, { + command: 'sync-state-set', + projectDir: resolveCommandProjectDir(command), + connectionId, + syncMode: options.mode, + collectionIds: options.collections, + itemIds: options.items, + tagNames: options.tagNames, + }); + }); + + mapping + .command('refresh') + .description('Refresh Metabase database mappings') + .argument('', 'Metabase connection id') + .option('--auto-accept', 'Accept refresh changes without prompting', false) + .action(async (connectionId: string, options: { autoAccept?: boolean }, command) => { + await runMappingArgs(context, { + command: 'refresh', + projectDir: resolveCommandProjectDir(command), + connectionId, + autoAccept: options.autoAccept === true, + }); + }); + + mapping + .command('validate') + .description('Validate Metabase database mappings') + .argument('', 'Metabase connection id') + .action(async (connectionId: string, _options: unknown, command) => { + await runMappingArgs(context, { + command: 'validate', + projectDir: resolveCommandProjectDir(command), + connectionId, + }); + }); + + mapping + .command('clear') + .description('Clear Metabase database mappings') + .argument('', 'Metabase connection id') + .argument('[metabaseDatabaseId]', 'Metabase database id', parsePositiveIntegerOption) + .action(async (connectionId: string, metabaseDatabaseId: number | undefined, _options: unknown, command) => { + await runMappingArgs(context, { + command: 'clear', + projectDir: resolveCommandProjectDir(command), + connectionId, + ...(metabaseDatabaseId ? { metabaseDatabaseId } : {}), + }); + }); +} diff --git a/packages/cli/src/commands/connection-mapping.test.ts b/packages/cli/src/commands/connection-mapping.test.ts new file mode 100644 index 00000000..1ef5d1c5 --- /dev/null +++ b/packages/cli/src/commands/connection-mapping.test.ts @@ -0,0 +1,329 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { LocalMetabaseSourceStateReader } from '@klo/context/ingest'; +import { initKloProject, loadKloProject, serializeKloProjectConfig } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runKloConnectionMapping } from './connection-mapping.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('runKloConnectionMapping', () => { + let tempDir: string; + let projectDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-metabase-mapping-')); + projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'mapping' }); + const project = await loadKloProject({ projectDir }); + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig({ + ...project.config, + connections: { + 'prod-metabase': { + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret + }, + 'prod-warehouse': { + driver: 'postgres', + url: 'env:WAREHOUSE_URL', + readonly: true, + }, + }, + }), + 'klo', + 'klo@example.com', + 'Seed Metabase mapping test connections', + ); + }); + + async function replaceConnections(connections: Record) { + const project = await loadKloProject({ projectDir }); + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig({ + ...project.config, + connections, + }), + 'klo', + 'klo@example.com', + 'Replace mapping test connections', + ); + } + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('sets, lists, disables, and clears local Metabase mappings', async () => { + const io = makeIo(); + await expect( + runKloConnectionMapping( + { + command: 'set', + projectDir, + connectionId: 'prod-metabase', + field: 'databaseMappings', + key: '1', + value: 'prod-warehouse', + }, + io.io, + ), + ).resolves.toBe(0); + + const listIo = makeIo(); + await expect( + runKloConnectionMapping({ command: 'list', projectDir, connectionId: 'prod-metabase', json: false }, listIo.io), + ).resolves.toBe(0); + expect(listIo.stdout()).toContain('1 -> prod-warehouse'); + expect(listIo.stdout()).toContain('unhydrated'); + + await expect( + runKloConnectionMapping( + { + command: 'set-sync-enabled', + projectDir, + connectionId: 'prod-metabase', + metabaseDatabaseId: 1, + enabled: false, + }, + makeIo().io, + ), + ).resolves.toBe(0); + + await expect( + runKloConnectionMapping( + { + command: 'clear', + projectDir, + connectionId: 'prod-metabase', + metabaseDatabaseId: 1, + }, + makeIo().io, + ), + ).resolves.toBe(0); + }); + + it('lists Metabase yaml mapping bootstrap rows before any SQLite command writes', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-cli-yaml-mapping-')); + await initKloProject({ projectDir, projectName: 'yaml-mapping' }); + const project = await loadKloProject({ projectDir }); + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig({ + ...project.config, + connections: { + 'prod-metabase': { + driver: 'metabase', + mappings: { + databaseMappings: { '1': 'prod-warehouse' }, + syncEnabled: { '1': true }, + }, + }, + 'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' }, + }, + }), + 'klo', + 'klo@example.com', + 'Seed yaml mappings', + ); + const io = makeIo(); + + await expect( + runKloConnectionMapping( + { command: 'list', projectDir, connectionId: 'prod-metabase', json: false }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('1 -> prod-warehouse'); + expect(io.stdout()).toContain('source: klo.yaml'); + }); + + it('refreshes Metabase discovery metadata through the injected runtime client', async () => { + const client = { + getDatabases: vi.fn().mockResolvedValue([ + { + id: 1, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ]), + cleanup: vi.fn(), + }; + const io = makeIo(); + + await expect( + runKloConnectionMapping( + { + command: 'refresh', + projectDir, + connectionId: 'prod-metabase', + autoAccept: true, + }, + io.io, + { + createMetabaseClient: async () => client as never, + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Discovery: 1 database'); + expect(client.cleanup).toHaveBeenCalledTimes(1); + const store = new LocalMetabaseSourceStateReader({ dbPath: join(projectDir, '.klo', 'db.sqlite') }); + await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ + { metabaseDatabaseId: 1, metabaseDatabaseName: 'Analytics', source: 'refresh' }, + ]); + }); + + it('sets and lists Looker connection mappings', async () => { + await replaceConnections({ + 'prod-looker': { + driver: 'looker', + base_url: 'https://looker.example.test', + client_id: 'id', + }, + 'prod-warehouse': { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + }); + const io = makeIo(); + + await expect( + runKloConnectionMapping( + { + command: 'set', + projectDir, + connectionId: 'prod-looker', + field: 'connectionMappings', + key: 'analytics', + value: 'prod-warehouse', + }, + io.io, + ), + ).resolves.toBe(0); + await expect( + runKloConnectionMapping({ command: 'list', projectDir, connectionId: 'prod-looker', json: false }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('analytics -> prod-warehouse'); + }); + + it('keeps driver-specific mapping field validation in the runner', async () => { + await replaceConnections({ + 'prod-looker': { driver: 'looker', base_url: 'https://looker.example.com' }, + warehouse: { driver: 'postgres', url: 'env:WAREHOUSE_URL' }, + }); + + const io = makeIo(); + await expect( + runKloConnectionMapping( + { + command: 'set', + projectDir, + connectionId: 'prod-looker', + field: 'databaseMappings', + key: '1', + value: 'warehouse', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Looker mapping set requires connectionMappings'); + }); + + it('refreshes Looker mapping metadata and reports drift', async () => { + await replaceConnections({ + 'prod-looker': { + driver: 'looker', + base_url: 'https://looker.example.test', + client_id: 'id', + }, + 'prod-warehouse': { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + }); + const io = makeIo(); + + await expect( + runKloConnectionMapping( + { command: 'refresh', projectDir, connectionId: 'prod-looker', autoAccept: true }, + io.io, + { + createLookerClient: async () => ({ + listLookerConnections: async () => [ + { + name: 'analytics', + host: 'db.example.test', + database: 'analytics', + schema: null, + dialect: 'postgres', + }, + ], + cleanup: async () => {}, + }), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Discovery: 1 connection'); + expect(io.stdout()).toContain('Unmapped discovered: 1'); + }); + + it('validates Looker mappings through the canonical local warehouse descriptor', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-cli-descriptor-validation-')); + await initKloProject({ projectDir, projectName: 'descriptor-validation' }); + const project = await loadKloProject({ projectDir }); + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig({ + ...project.config, + connections: { + 'prod-looker': { + driver: 'looker', + mappings: { connectionMappings: { analytics: 'prod-warehouse' } }, + }, + 'prod-warehouse': { driver: 'postgresql', url: 'postgresql://readonly@db.test/analytics' }, + }, + }), + 'klo', + 'klo@example.com', + 'Seed descriptor validation', + ); + const io = makeIo(); + + await expect( + runKloConnectionMapping({ command: 'validate', projectDir, connectionId: 'prod-looker' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Mapping validation passed: prod-looker'); + expect(io.stderr()).toBe(''); + }); +}); diff --git a/packages/cli/src/commands/connection-mapping.ts b/packages/cli/src/commands/connection-mapping.ts new file mode 100644 index 00000000..dcf84a7d --- /dev/null +++ b/packages/cli/src/commands/connection-mapping.ts @@ -0,0 +1,426 @@ +import { readFile } from 'node:fs/promises'; +import { localConnectionToWarehouseDescriptor } from '@klo/context/connections'; +import { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultLookerConnectionClientFactory, + DefaultMetabaseConnectionClientFactory, + LocalLookerRuntimeStore, + LocalMetabaseSourceStateReader, + computeLookerMappingDrift, + computeMetabaseMappingDrift, + discoverLookerConnections, + discoverMetabaseDatabases, + lookerCredentialsFromLocalConnection, + metabaseRuntimeConfigFromLocalConnection, + seedLocalMappingStateFromKloYaml, + validateLookerMappings, + validateMappingPhysicalMatch, + type LookerMappingClient, + type MetabaseRuntimeClient, + type MetabaseSyncMode, +} from '@klo/context/ingest'; +import { type KloLocalProject, kloLocalStateDbPath, loadKloProject } from '@klo/context/project'; +import type { KloCliIo } from '../index.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/connection-mapping'); + +export type KloConnectionMappingArgs = + | { command: 'list'; projectDir: string; connectionId: string; json: boolean } + | { + command: 'set'; + projectDir: string; + connectionId: string; + field: 'databaseMappings' | 'connectionMappings'; + key: string; + value: string; + } + | { command: 'apply-bulk'; projectDir: string; connectionId: string; filePath: string } + | { + command: 'set-sync-enabled'; + projectDir: string; + connectionId: string; + metabaseDatabaseId: number; + enabled: boolean; + } + | { command: 'sync-state-get'; projectDir: string; connectionId: string; json: boolean } + | { + command: 'sync-state-set'; + projectDir: string; + connectionId: string; + syncMode: MetabaseSyncMode; + collectionIds: number[]; + itemIds: number[]; + tagNames: string[]; + } + | { command: 'refresh'; projectDir: string; connectionId: string; autoAccept: boolean } + | { command: 'validate'; projectDir: string; connectionId: string } + | { command: 'clear'; projectDir: string; connectionId: string; metabaseDatabaseId?: number; mappingKey?: string }; + +interface KloConnectionMappingDeps { + createMetabaseClient?: ( + project: KloLocalProject, + connectionId: string, + ) => Promise>; + createLookerClient?: ( + project: KloLocalProject, + connectionId: string, + ) => Promise & { cleanup?(): Promise }>; +} + +interface MetabaseBulkMappingPayload { + databaseMappings?: Record; + syncEnabled?: Record; + syncMode?: MetabaseSyncMode; + selections?: { collections?: number[]; items?: number[] }; + defaultTagNames?: string[]; +} + +function parseId(value: string, label: string): number { + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed < 1) { + throw new Error(`${label} must be a positive integer`); + } + return parsed; +} + +async function createDefaultMetabaseClient( + project: KloLocalProject, + connectionId: string, +): Promise> { + const factory = new DefaultMetabaseConnectionClientFactory( + (metabaseConnectionId) => + metabaseRuntimeConfigFromLocalConnection(metabaseConnectionId, project.config.connections[metabaseConnectionId]), + DEFAULT_METABASE_CLIENT_CONFIG, + ); + return factory.createClient(connectionId); +} + +async function createDefaultLookerClient( + project: KloLocalProject, + connectionId: string, +): Promise & { cleanup?(): Promise }> { + const factory = new DefaultLookerConnectionClientFactory({ + async resolve(lookerConnectionId) { + return lookerCredentialsFromLocalConnection(lookerConnectionId, project.config.connections[lookerConnectionId]); + }, + }); + return factory.createClient(connectionId) as unknown as Pick & { + cleanup?(): Promise; + }; +} + +function isLookerConnection(project: KloLocalProject, connectionId: string): boolean { + return String(project.config.connections[connectionId]?.driver ?? '').toLowerCase() === 'looker'; +} + +function assertLookerConnection(project: KloLocalProject, connectionId: string): void { + if (!isLookerConnection(project, connectionId)) { + throw new Error(`Connection "${connectionId}" is not a Looker connection`); + } +} + +function assertMetabaseConnection(project: KloLocalProject, connectionId: string): void { + const connection = project.config.connections[connectionId]; + if (!connection || String(connection.driver).toLowerCase() !== 'metabase') { + throw new Error(`Connection "${connectionId}" is not a Metabase connection`); + } +} + +function assertTargetConnection(project: KloLocalProject, connectionId: string): void { + if (!project.config.connections[connectionId]) { + throw new Error(`Target connection "${connectionId}" does not exist`); + } +} + +function targetPhysicalInfo(project: KloLocalProject, connectionId: string) { + const descriptor = localConnectionToWarehouseDescriptor(connectionId, project.config.connections[connectionId]); + if (!descriptor) { + return { connection_type: 'UNKNOWN' }; + } + return { + connection_type: descriptor.connection_type, + host: descriptor.host ?? null, + database: descriptor.database ?? null, + account: descriptor.account ?? null, + project_id: descriptor.project_id ?? null, + dataset_id: descriptor.dataset_id ?? null, + ...descriptor.connection_params, + }; +} + +function renderMapping( + row: Awaited>[number], +): string { + const name = row.metabaseDatabaseName ?? 'unhydrated'; + const target = row.targetConnectionId ?? '[unmapped]'; + return `${row.metabaseDatabaseId} -> ${target} (${name}, sync: ${row.syncEnabled ? 'on' : 'off'}, source: ${ + row.source + })`; +} + +function renderLookerMapping(row: Awaited>[number]): string { + const target = row.kloConnectionId ?? '[unmapped]'; + const metadata = [row.lookerDialect, row.lookerHost, row.lookerDatabase].filter(Boolean).join(', '); + return `${row.lookerConnectionName} -> ${target}${metadata ? ` (${metadata}, source: ${row.source})` : ` (source: ${row.source})`}`; +} + +export async function runKloConnectionMapping( + args: KloConnectionMappingArgs, + io: KloCliIo = process, + deps: KloConnectionMappingDeps = {}, +): Promise { + try { + const project = await loadKloProject({ projectDir: args.projectDir }); + await seedLocalMappingStateFromKloYaml(project, args.connectionId); + if (isLookerConnection(project, args.connectionId)) { + assertLookerConnection(project, args.connectionId); + const store = new LocalLookerRuntimeStore({ dbPath: kloLocalStateDbPath(project) }); + + if (args.command === 'list') { + const rows = await store.listConnectionMappings(args.connectionId); + io.stdout.write(args.json ? `${JSON.stringify(rows, null, 2)}\n` : `${rows.map(renderLookerMapping).join('\n')}\n`); + return 0; + } + + if (args.command === 'set') { + if (args.field !== 'connectionMappings') { + throw new Error('Looker mapping set requires connectionMappings ='); + } + assertTargetConnection(project, args.value); + await store.upsertConnectionMapping({ + lookerConnectionId: args.connectionId, + lookerConnectionName: args.key, + kloConnectionId: args.value, + source: 'cli', + }); + io.stdout.write(`Set connectionMappings.${args.key} = ${args.value}\n`); + return 0; + } + + if (args.command === 'refresh') { + const client = await (deps.createLookerClient ?? createDefaultLookerClient)(project, args.connectionId); + try { + const discovered = await discoverLookerConnections(client); + const drift = computeLookerMappingDrift({ + storedMappings: await store.readMappings(args.connectionId), + discovered, + }); + if (args.autoAccept) { + await store.refreshDiscoveredConnections({ lookerConnectionId: args.connectionId, discovered }); + } + io.stdout.write(`Discovery: ${discovered.length} ${discovered.length === 1 ? 'connection' : 'connections'}\n`); + io.stdout.write(`Unmapped discovered: ${drift.unmappedDiscovered.length}\n`); + io.stdout.write(`Stale mappings: ${drift.staleMappings.length}\n`); + return 0; + } finally { + await client.cleanup?.(); + } + } + + if (args.command === 'validate') { + const knownKloConnectionIds = new Set(Object.keys(project.config.connections)); + const knownConnectionTypes = new Map( + Object.entries(project.config.connections).map(([id, _config]) => [id, targetPhysicalInfo(project, id).connection_type]), + ); + const validation = validateLookerMappings({ + mappings: await store.readMappings(args.connectionId), + knownKloConnectionIds, + knownConnectionTypes, + }); + if (!validation.ok) { + for (const error of validation.errors) { + io.stderr.write(`${error.key}: ${error.reason}\n`); + } + return 1; + } + io.stdout.write(`Mapping validation passed: ${args.connectionId}\n`); + return 0; + } + + if (args.command === 'clear') { + await store.clearConnectionMappings({ + lookerConnectionId: args.connectionId, + lookerConnectionName: args.mappingKey ?? (args.metabaseDatabaseId ? String(args.metabaseDatabaseId) : undefined), + }); + io.stdout.write( + args.mappingKey + ? `Cleared connectionMappings.${args.mappingKey}\n` + : `Cleared mappings for ${args.connectionId}\n`, + ); + return 0; + } + + throw new Error(`Looker connection mapping does not support ${args.command}`); + } + + assertMetabaseConnection(project, args.connectionId); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(project) }); + + if (args.command === 'list') { + const rows = await store.listDatabaseMappings(args.connectionId); + io.stdout.write(args.json ? `${JSON.stringify(rows, null, 2)}\n` : `${rows.map(renderMapping).join('\n')}\n`); + return 0; + } + + if (args.command === 'set') { + assertTargetConnection(project, args.value); + await store.upsertDatabaseMapping({ + connectionId: args.connectionId, + metabaseDatabaseId: parseId(args.key, 'metabaseDatabaseId'), + targetConnectionId: args.value, + syncEnabled: true, + source: 'cli', + }); + io.stdout.write(`Set databaseMappings.${args.key} = ${args.value}\n`); + return 0; + } + + if (args.command === 'apply-bulk') { + const payload = JSON.parse(await readFile(args.filePath, 'utf8')) as MetabaseBulkMappingPayload; + const existingState = await store.getSourceState(args.connectionId); + const existingRows = await store.listDatabaseMappings(args.connectionId); + const existingById = new Map(existingRows.map((row) => [row.metabaseDatabaseId, row])); + const databaseMappings = payload.databaseMappings ?? {}; + for (const targetConnectionId of Object.values(databaseMappings)) { + if (targetConnectionId) { + assertTargetConnection(project, targetConnectionId); + } + } + const mappingIds = new Set([ + ...existingRows.map((row) => row.metabaseDatabaseId), + ...Object.keys(databaseMappings).map((id) => parseId(id, 'metabaseDatabaseId')), + ...Object.keys(payload.syncEnabled ?? {}).map((id) => parseId(id, 'metabaseDatabaseId')), + ]); + await store.replaceSourceState({ + connectionId: args.connectionId, + syncMode: payload.syncMode ?? existingState.syncMode, + defaultTagNames: payload.defaultTagNames ?? existingState.defaultTagNames, + selections: + payload.selections === undefined + ? existingState.selections + : [ + ...(payload.selections.collections ?? []).map((id) => ({ + selectionType: 'collection' as const, + metabaseObjectId: id, + })), + ...(payload.selections.items ?? []).map((id) => ({ + selectionType: 'item' as const, + metabaseObjectId: id, + })), + ], + mappings: [...mappingIds] + .sort((a, b) => a - b) + .map((id) => { + const existing = existingById.get(id); + return { + metabaseDatabaseId: id, + metabaseDatabaseName: existing?.metabaseDatabaseName ?? null, + metabaseEngine: existing?.metabaseEngine ?? null, + metabaseHost: existing?.metabaseHost ?? null, + metabaseDbName: existing?.metabaseDbName ?? null, + targetConnectionId: databaseMappings[String(id)] ?? existing?.targetConnectionId ?? null, + syncEnabled: payload.syncEnabled?.[String(id)] ?? existing?.syncEnabled ?? false, + source: 'cli', + }; + }), + }); + io.stdout.write(`Applied bulk mappings for ${args.connectionId}\n`); + return 0; + } + + if (args.command === 'set-sync-enabled') { + await store.setMappingSyncEnabled({ + connectionId: args.connectionId, + metabaseDatabaseId: args.metabaseDatabaseId, + syncEnabled: args.enabled, + }); + io.stdout.write(`Set syncEnabled.${args.metabaseDatabaseId} = ${args.enabled}\n`); + return 0; + } + + if (args.command === 'sync-state-get') { + const state = await store.getSourceState(args.connectionId); + const payload = { + syncMode: state.syncMode, + selections: state.selections, + defaultTagNames: state.defaultTagNames, + }; + io.stdout.write(args.json ? `${JSON.stringify(payload, null, 2)}\n` : `${payload.syncMode}\n`); + return 0; + } + + if (args.command === 'sync-state-set') { + await store.setSyncState({ + connectionId: args.connectionId, + syncMode: args.syncMode, + defaultTagNames: args.tagNames, + selections: [ + ...args.collectionIds.map((id) => ({ selectionType: 'collection' as const, metabaseObjectId: id })), + ...args.itemIds.map((id) => ({ selectionType: 'item' as const, metabaseObjectId: id })), + ], + }); + io.stdout.write(`Set sync state for ${args.connectionId}\n`); + return 0; + } + + if (args.command === 'refresh') { + const client = await (deps.createMetabaseClient ?? createDefaultMetabaseClient)(project, args.connectionId); + try { + const discovered = await discoverMetabaseDatabases(client); + const existing = Object.fromEntries( + (await store.listDatabaseMappings(args.connectionId)).map((row) => [ + String(row.metabaseDatabaseId), + row.targetConnectionId, + ]), + ); + const drift = computeMetabaseMappingDrift({ currentMappings: existing, discovered }); + if (args.autoAccept) { + await store.refreshDiscoveredDatabases({ connectionId: args.connectionId, discovered }); + } + io.stdout.write(`Discovery: ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}\n`); + io.stdout.write(`Unmapped discovered: ${drift.unmappedDiscovered.length}\n`); + io.stdout.write(`Stale mappings: ${drift.staleMappings.length}\n`); + return 0; + } finally { + await client.cleanup(); + } + } + + if (args.command === 'validate') { + const rows = await store.listDatabaseMappings(args.connectionId); + const failures = rows.flatMap((row) => { + if (!row.targetConnectionId) { + return []; + } + const reason = validateMappingPhysicalMatch( + { metabaseEngine: row.metabaseEngine, metabaseDbName: row.metabaseDbName, metabaseHost: row.metabaseHost }, + project.config.connections[row.targetConnectionId] + ? targetPhysicalInfo(project, row.targetConnectionId) + : { connection_type: 'UNKNOWN' }, + ); + return reason ? [`${row.metabaseDatabaseId}: ${reason}`] : []; + }); + if (failures.length > 0) { + for (const failure of failures) { + io.stderr.write(`${failure}\n`); + } + return 1; + } + io.stdout.write(`Mapping validation passed: ${args.connectionId}\n`); + return 0; + } + + const metabaseDatabaseId = args.metabaseDatabaseId ?? (args.mappingKey ? parseId(args.mappingKey, 'metabaseDatabaseId') : undefined); + await store.clearDatabaseMappings({ connectionId: args.connectionId, metabaseDatabaseId }); + io.stdout.write( + metabaseDatabaseId + ? `Cleared databaseMappings.${metabaseDatabaseId}\n` + : `Cleared mappings for ${args.connectionId}\n`, + ); + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/commands/connection-metabase-commands.ts b/packages/cli/src/commands/connection-metabase-commands.ts new file mode 100644 index 00000000..236ef780 --- /dev/null +++ b/packages/cli/src/commands/connection-metabase-commands.ts @@ -0,0 +1,132 @@ +import { type Command, Option } from '@commander-js/extra-typings'; + +import { + type KloCliCommandContext, + parseNonEmptyAssignmentOption, + parsePositiveIntegerOption, + parseSafeConnectionIdOption, + resolveCommandProjectDir, +} from '../cli-program.js'; +import { + type KloConnectionMetabaseSetupArgs, + type MetabaseSetupMappingAssignment, + type MetabaseSetupSyncMode, + runKloConnectionMetabaseSetup, +} from './connection-metabase-setup.js'; + +const SYNC_MODE_CHOICES = ['ALL', 'ONLY', 'EXCEPT'] as const satisfies readonly MetabaseSetupSyncMode[]; + +interface ConnectionMetabaseSetupOptions { + id?: string; + url?: string; + apiKey?: string; + mintApiKey?: boolean; + username?: string; + password?: string; + map: MetabaseSetupMappingAssignment[]; + sync: number[]; + syncMode: MetabaseSetupSyncMode; + runIngest?: boolean; + yes?: boolean; + input?: boolean; +} + +function collectPositiveIntegerOption(value: string, previous: number[] = []): number[] { + return [...previous, parsePositiveIntegerOption(value)]; +} + +function parseMappingAssignment(value: string): MetabaseSetupMappingAssignment { + const assignment = parseNonEmptyAssignmentOption(value); + return { + metabaseDatabaseId: parsePositiveIntegerOption(assignment.key), + targetConnectionId: parseSafeConnectionIdOption(assignment.value), + }; +} + +function collectMappingOption( + value: string, + previous: MetabaseSetupMappingAssignment[] = [], +): MetabaseSetupMappingAssignment[] { + return [...previous, parseMappingAssignment(value)]; +} + +async function runMetabaseSetupArgs( + context: KloCliCommandContext, + args: KloConnectionMetabaseSetupArgs, +): Promise { + const runner = context.deps.connectionMetabaseSetup ?? runKloConnectionMetabaseSetup; + context.setExitCode(await runner(args, context.io)); +} + +export function registerConnectionMetabaseCommands(connection: Command, context: KloCliCommandContext): void { + const metabase = connection + .command('metabase') + .description('Configure Metabase connections') + .showHelpAfterError() + .addHelpText( + 'after', + '\nProject directory defaults to KLO_PROJECT_DIR when set, otherwise the current working directory.\n', + ); + + metabase.action(() => { + metabase.outputHelp(); + context.setExitCode(0); + }); + + metabase + .command('setup') + .description('Guided setup for a Metabase connection') + .option('--id ', 'KLO connection id to write', parseSafeConnectionIdOption) + .option('--url ', 'Metabase API URL') + .addOption(new Option('--api-key ', 'Metabase API key').conflicts('mintApiKey')) + .option('--mint-api-key', 'Mint a Metabase API key with credentials', false) + .option('--username ', 'Metabase admin username for API-key minting') + .option('--password ', 'Metabase admin password for API-key minting') + .addHelpText( + 'after', + '\nGuided equivalent of:\n' + + ' klo connection mapping refresh --auto-accept\n' + + ' klo connection mapping set databaseMappings =\n' + + ' klo connection mapping set-sync-enabled --enabled true\n' + + ' klo ingest \n', + ) + .option( + '--map ', + 'Assign a Metabase database id to a warehouse connection; repeatable', + collectMappingOption, + [], + ) + .option( + '--sync ', + 'Enable Metabase sync for a discovered database; repeatable', + collectPositiveIntegerOption, + [], + ) + .addOption( + new Option('--sync-mode ', 'Metabase sync selection mode') + .choices(SYNC_MODE_CHOICES) + .default('ALL' satisfies MetabaseSetupSyncMode), + ) + .option('--run-ingest', 'Run ingest after setup', false) + .option('--yes', 'Confirm and apply setup changes without prompting', false) + .option('--no-input', 'Disable interactive terminal input') + .showHelpAfterError() + .action(async (options: ConnectionMetabaseSetupOptions, command) => { + await runMetabaseSetupArgs(context, { + command: 'setup', + projectDir: resolveCommandProjectDir(command), + connectionId: options.id, + url: options.url, + apiKey: options.apiKey, + mintApiKey: options.mintApiKey === true, + metabaseUsername: options.username, + metabasePassword: options.password, + mappings: options.map, + syncEnabledDatabaseIds: options.sync, + syncMode: options.syncMode ?? 'ALL', + runIngest: options.runIngest === true, + yes: options.yes === true, + inputMode: options.input === false ? 'disabled' : 'auto', + }); + }); +} diff --git a/packages/cli/src/commands/connection-metabase-setup.test.ts b/packages/cli/src/commands/connection-metabase-setup.test.ts new file mode 100644 index 00000000..e1dd1063 --- /dev/null +++ b/packages/cli/src/commands/connection-metabase-setup.test.ts @@ -0,0 +1,1136 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { LocalMetabaseSourceStateReader } from '@klo/context/ingest'; +import { initKloProject, kloLocalStateDbPath, loadKloProject, serializeKloProjectConfig } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { runKloConnectionMetabaseSetup } from './connection-metabase-setup.js'; + +const CANCEL_PROMPT = Symbol('cancel'); + +function createTestMetabaseSetupPromptAdapter(options: { + selects?: Array; + multiselects?: Array | typeof CANCEL_PROMPT>; + texts?: Array; + passwords?: Array; + confirms?: Array; + events?: string[]; +}) { + const selects = [...(options.selects ?? [])]; + const multiselects = [...(options.multiselects ?? [])]; + const texts = [...(options.texts ?? [])]; + const passwords = [...(options.passwords ?? [])]; + const confirms = [...(options.confirms ?? [])]; + const events = options.events ?? []; + + const cancelWithError = () => { + throw new Error('Setup cancelled.'); + }; + + return { + intro(title?: string): void { + events.push(`intro:${title ?? ''}`); + }, + outro(message?: string): void { + events.push(`outro:${message ?? ''}`); + }, + note(message: string, title: string): void { + events.push(`note:${title}:${message}`); + }, + log: { + info(message: string): void { + events.push(`log.info:${message}`); + }, + step(message: string): void { + events.push(`log.step:${message}`); + }, + success(message: string): void { + events.push(`log.success:${message}`); + }, + warn(message: string): void { + events.push(`log.warn:${message}`); + }, + error(message: string): void { + events.push(`log.error:${message}`); + }, + }, + spinner() { + return { + start(message: string): void { + events.push(`spinner.start:${message}`); + }, + stop(message: string): void { + events.push(`spinner.stop:${message}`); + }, + error(message: string): void { + events.push(`spinner.error:${message}`); + }, + }; + }, + async select(): Promise { + const next = selects.shift(); + if (next === CANCEL_PROMPT) { + cancelWithError(); + } + return next as T; + }, + async multiselect(options?: { message: string }): Promise { + events.push(`multiselect:${options?.message ?? ''}`); + const next = multiselects.shift(); + if (next === CANCEL_PROMPT) { + cancelWithError(); + } + return (next ?? []) as Value[]; + }, + async text(): Promise { + const next = texts.shift(); + if (next === CANCEL_PROMPT) { + cancelWithError(); + } + return (next ?? '').toString(); + }, + async password(): Promise { + const next = passwords.shift(); + if (next === CANCEL_PROMPT) { + cancelWithError(); + } + return (next ?? '').toString(); + }, + async confirm(): Promise { + const next = confirms.shift(); + if (next === CANCEL_PROMPT) { + cancelWithError(); + } + return next === true; + }, + cancel(): void { + return; + }, + }; +} + +function makeIo(options: { isTTY?: boolean; stdinIsTTY?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdin: { + isTTY: options.stdinIsTTY, + }, + stdout: { + isTTY: options.isTTY, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('runKloConnectionMetabaseSetup', () => { + const fakeMetabaseCredential = 'mb_example'; + const existingMetabaseCredential = 'mb_existing'; + const fakeAdminCredential = 'pw'; + + let tempDir: string; + let projectDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-metabase-setup-')); + projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'metabase-setup' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + async function writeConnections(connections: Record) { + const project = await loadKloProject({ projectDir }); + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig({ + ...project.config, + connections, + }), + 'klo', + 'klo@example.com', + 'Seed Metabase setup test connections', + ); + } + + function makeMetabaseClient(options: { + testConnectionSuccess: boolean; + databases: Array<{ + id: number; + name: string; + engine: string; + details?: { host?: string; dbname?: string }; + is_sample?: boolean; + }>; + }) { + return { + testConnection: vi.fn().mockResolvedValue({ success: options.testConnectionSuccess }), + getDatabases: vi.fn().mockResolvedValue(options.databases), + cleanup: vi.fn().mockResolvedValue(undefined), + }; + } + + it('covers the headless happy path', async () => { + await writeConnections({ + orbit: { + driver: 'postgres', + url: 'postgresql://readonly@pg.internal/analytics', + readonly: true, + }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], + syncEnabledDatabaseIds: [2], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Connection: metabase'); + expect(io.stdout()).toContain('Discovered 1 database'); + expect(io.stdout()).toContain(`klo ingest metabase --project-dir ${projectDir}`); + expect(io.stdout()).not.toContain('mb_example'); + expect(io.stderr()).not.toContain('mb_example'); + + const config = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(config).toContain('driver: metabase'); + expect(config).toContain('api_url: http://metabase.example.test:3000'); + expect(config).toContain('api_key: mb_example'); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + { + metabaseDatabaseId: 2, + metabaseDatabaseName: 'Analytics', + targetConnectionId: 'orbit', + syncEnabled: true, + }, + ]); + }); + + it('auto-maps and enables sync in --no-input --yes when deterministic', async () => { + await writeConnections({ + orbit: { + driver: 'postgres', + url: 'postgresql://readonly@pg.internal/analytics', + readonly: true, + }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(0); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, + ]); + }); + + it('fails in --no-input when mapping/sync are missing and --yes is false', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [{ id: 2, name: 'Analytics', engine: 'postgres', is_sample: false }], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: false, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toMatch(/--map/i); + expect(io.stderr()).toMatch(/--sync/i); + }); + + it('enables sync for explicitly mapped databases in --no-input --yes when --sync is omitted', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [{ id: 2, name: 'Analytics', engine: 'postgres', is_sample: false }], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(0); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, + ]); + }); + + it('fails in no-input mode when the Metabase URL is missing', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('missing Metabase URL'); + }); + + it('fails in no-input mode when the Metabase API key is missing', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('missing Metabase API key'); + }); + + it('names missing minting flags before rejecting minting', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + + const missingUsernameIo = makeIo(); + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + mintApiKey: true, + metabasePassword: fakeAdminCredential, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + missingUsernameIo.io, + ), + ).resolves.toBe(1); + expect(missingUsernameIo.stderr()).toContain('--username'); + + const missingPasswordIo = makeIo(); + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + mintApiKey: true, + metabaseUsername: 'user', + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + missingPasswordIo.io, + ), + ).resolves.toBe(1); + expect(missingPasswordIo.stderr()).toContain('--password'); + + const mintedMetabaseCredential = 'mb_minted'; + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const createMetabaseClient = vi.fn(async () => metabaseClient as never); + const mintMetabaseApiKey = vi.fn(async () => mintedMetabaseCredential); + const mintingIo = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + mintApiKey: true, + metabaseUsername: 'user', + metabasePassword: fakeAdminCredential, + mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], + syncEnabledDatabaseIds: [2], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + mintingIo.io, + { createMetabaseClient, mintMetabaseApiKey }, + ), + ).resolves.toBe(0); + + expect(mintMetabaseApiKey).toHaveBeenCalledTimes(1); + expect(mintMetabaseApiKey).toHaveBeenCalledWith( + expect.objectContaining({ + url: 'http://metabase.example.test:3000', + username: 'user', + password: fakeAdminCredential, + }), + expect.anything(), + ); + + expect(createMetabaseClient).toHaveBeenCalledTimes(1); + expect(mintingIo.stdout()).not.toContain(mintedMetabaseCredential); + expect(mintingIo.stderr()).not.toContain(mintedMetabaseCredential); + expect(mintingIo.stdout()).not.toContain(fakeAdminCredential); + expect(mintingIo.stderr()).not.toContain(fakeAdminCredential); + + const config = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(config).toContain('driver: metabase'); + expect(config).toContain('api_url: http://metabase.example.test:3000'); + expect(config).toContain(`api_key: ${mintedMetabaseCredential}`); + }); + + it('requires at least one warehouse connection', async () => { + await writeConnections({}); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Add a warehouse connection first'); + }); + + it('fails in --no-input --yes when a deterministic warehouse mapping cannot be derived', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + warehouse2: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toMatch(/--map/i); + expect(io.stderr()).toMatch(/--sync/i); + }); + + it('auto-enables sync in --no-input --yes from explicit mappings even when multiple databases are discovered', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 1, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + { + id: 2, + name: 'Finance', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'finance' }, + is_sample: false, + }, + ], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [{ metabaseDatabaseId: 1, targetConnectionId: 'orbit' }], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(0); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + { metabaseDatabaseId: 1, targetConnectionId: 'orbit', syncEnabled: true }, + { metabaseDatabaseId: 2, targetConnectionId: null, syncEnabled: false }, + ]); + }); + + it('suggests updating api_key or using minting when authentication fails', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + const metabaseClient = makeMetabaseClient({ testConnectionSuccess: false, databases: [] }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('connections.metabase.api_key'); + expect(io.stderr()).toContain('--mint-api-key'); + expect(io.stderr()).not.toContain('mb_example'); + }); + + it('fails when Metabase returns no usable databases', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [{ id: 1, name: 'Sample', engine: 'h2', is_sample: true }], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('no usable databases'); + }); + + it('preserves setup writes when --run-ingest fails and reports the debug command', async () => { + await writeConnections({ + orbit: { + driver: 'postgres', + url: 'postgresql://readonly@pg.internal/analytics', + readonly: true, + }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], + syncEnabledDatabaseIds: [2], + syncMode: 'ALL', + runIngest: true, + yes: true, + inputMode: 'disabled', + }, + io.io, + { + createMetabaseClient: async () => metabaseClient as never, + runPublicIngest: vi.fn(async () => 1), + }, + ), + ).resolves.toBe(1); + + const config = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(config).toContain('driver: metabase'); + expect(io.stderr()).toContain(`klo ingest metabase --project-dir ${projectDir}`); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + { metabaseDatabaseId: 2, targetConnectionId: 'orbit' }, + ]); + }); + + it('reuses existing connection id and values when --id, --url, and --api-key are omitted', async () => { + await writeConnections({ + 'prod-metabase': { + driver: 'metabase', + api_url: 'http://metabase.example.test:3000', + api_key: existingMetabaseCredential, + }, + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [{ id: 2, name: 'Analytics', engine: 'postgres', is_sample: false }], + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + mintApiKey: false, + mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], + syncEnabledDatabaseIds: [2], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + { createMetabaseClient: async () => metabaseClient as never }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Connection: prod-metabase'); + expect(io.stdout()).not.toContain('mb_existing'); + expect(io.stderr()).not.toContain('mb_existing'); + }); + + it('covers interactive happy path when URL/key/mapping/sync are missing but deterministic', async () => { + await writeConnections({ + orbit: { + driver: 'postgres', + url: 'postgresql://readonly@pg.internal/analytics', + readonly: true, + }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const io = makeIo({ isTTY: true, stdinIsTTY: true }); + const interactiveMetabaseCredential = 'mb_interactive_fixture'; + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: false, + inputMode: 'auto', + }, + io.io, + { + createMetabaseClient: async () => metabaseClient as never, + prompts: createTestMetabaseSetupPromptAdapter({ + texts: ['http://metabase.example.test:3000'], + selects: ['paste'], + passwords: [interactiveMetabaseCredential], + confirms: [true], + }), + }, + ), + ).resolves.toBe(0); + + const config = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(config).toContain('driver: metabase'); + expect(config).toContain('api_url: http://metabase.example.test:3000'); + expect(config).toContain(`api_key: ${interactiveMetabaseCredential}`); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + { + metabaseDatabaseId: 2, + targetConnectionId: 'orbit', + syncEnabled: true, + }, + ]); + + expect(io.stdout()).not.toContain(interactiveMetabaseCredential); + expect(io.stderr()).not.toContain(interactiveMetabaseCredential); + }); + + it('guides interactive setup for multiple databases and warehouses', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics', readonly: true }, + warehouse2: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/finance', readonly: true }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + { + id: 3, + name: 'Finance', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'finance' }, + is_sample: false, + }, + ], + }); + const io = makeIo({ isTTY: true, stdinIsTTY: true }); + const interactiveMetabaseCredential = 'mb_interactive_multi'; + const events: string[] = []; + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: false, + inputMode: 'auto', + }, + io.io, + { + createMetabaseClient: async () => metabaseClient as never, + prompts: createTestMetabaseSetupPromptAdapter({ + texts: ['http://metabase.example.test:3000'], + selects: ['paste', 'orbit', 'warehouse2'], + passwords: [interactiveMetabaseCredential], + multiselects: [[2, 3], [2]], + confirms: [true], + events, + }), + }, + ), + ).resolves.toBe(0); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, + { metabaseDatabaseId: 3, targetConnectionId: 'warehouse2', syncEnabled: false }, + ]); + + expect(io.stdout()).not.toContain(interactiveMetabaseCredential); + expect(io.stderr()).not.toContain(interactiveMetabaseCredential); + expect(events).toContain( + 'multiselect:Select Metabase databases to configure\nUse Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', + ); + expect(events).toContain( + 'multiselect:Enable sync for which databases?\nUse Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', + ); + }); + + it('emits guided progress via the interaction toolkit in interactive mode', async () => { + await writeConnections({ + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics', readonly: true }, + }); + + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const io = makeIo({ isTTY: true, stdinIsTTY: true }); + const interactiveMetabaseCredential = 'mb_interaction_toolkit'; + const events: string[] = []; + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: false, + inputMode: 'auto', + }, + io.io, + { + createMetabaseClient: async () => metabaseClient as never, + prompts: createTestMetabaseSetupPromptAdapter({ + events, + texts: ['http://metabase.example.test:3000'], + selects: ['paste'], + passwords: [interactiveMetabaseCredential], + confirms: [true], + }), + }, + ), + ).resolves.toBe(0); + + expect(events).toContain('intro:KLO Metabase setup'); + expect(events.some((event) => event.startsWith('spinner.start:Testing Metabase connection'))).toBe(true); + expect(events.some((event) => event.startsWith('spinner.stop:Metabase reachable'))).toBe(true); + expect(events.some((event) => event.startsWith('spinner.start:Discovering Metabase databases'))).toBe(true); + expect(events.some((event) => event.startsWith('log.success:Discovered 1 database'))).toBe(true); + expect(events.some((event) => event.startsWith('note:Summary:'))).toBe(true); + expect(events).toContain('outro:Metabase setup complete'); + + expect(events.join('\n')).not.toContain(interactiveMetabaseCredential); + expect(io.stdout()).not.toContain(interactiveMetabaseCredential); + expect(io.stderr()).not.toContain(interactiveMetabaseCredential); + }); + + it('fails in --no-input when multiple Metabase connections exist and --id is omitted', async () => { + await writeConnections({ + metabase1: { + driver: 'metabase', + api_url: 'http://metabase.example.test:3000', + api_key: existingMetabaseCredential, + }, + metabase2: { + driver: 'metabase', + api_url: 'http://metabase.example.test:3000', + api_key: existingMetabaseCredential, + }, + orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, + }); + const io = makeIo(); + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toMatch(/--id/i); + }); + + it('treats prompt cancellation as a clean exit without writes', async () => { + await writeConnections({ + orbit: { + driver: 'postgres', + url: 'postgresql://readonly@pg.internal/analytics', + readonly: true, + }, + }); + + const beforeConfig = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + const metabaseClient = makeMetabaseClient({ + testConnectionSuccess: true, + databases: [ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ], + }); + const io = makeIo({ isTTY: true, stdinIsTTY: true }); + const cancelMetabaseCredential = 'mb_cancel_fixture'; + + await expect( + runKloConnectionMetabaseSetup( + { + command: 'setup', + projectDir, + mintApiKey: false, + mappings: [], + syncEnabledDatabaseIds: [], + syncMode: 'ALL', + runIngest: false, + yes: false, + inputMode: 'auto', + }, + io.io, + { + createMetabaseClient: async () => metabaseClient as never, + prompts: createTestMetabaseSetupPromptAdapter({ + texts: ['http://metabase.example.test:3000'], + selects: ['paste'], + passwords: [cancelMetabaseCredential], + confirms: [CANCEL_PROMPT], + }), + }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Setup cancelled.'); + expect(io.stderr()).not.toContain(cancelMetabaseCredential); + + const afterConfig = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(afterConfig).toBe(beforeConfig); + + const updatedProject = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + await expect(store.listDatabaseMappings('metabase')).resolves.toEqual([]); + }); +}); diff --git a/packages/cli/src/commands/connection-metabase-setup.ts b/packages/cli/src/commands/connection-metabase-setup.ts new file mode 100644 index 00000000..62f1d086 --- /dev/null +++ b/packages/cli/src/commands/connection-metabase-setup.ts @@ -0,0 +1,782 @@ +import type { Option as ClackOption } from '@clack/prompts'; +import { + cancel, + confirm, + intro, + isCancel, + log, + multiselect, + note, + outro, + password, + select, + text, +} from '@clack/prompts'; +import { localConnectionToWarehouseDescriptor } from '@klo/context/connections'; +import { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultMetabaseConnectionClientFactory, + LocalMetabaseSourceStateReader, + MetabaseClient, + type MetabaseDatabase, + type MetabaseRuntimeClient, + type MetabaseSyncMode, + metabaseRuntimeConfigFromLocalConnection, + validateMappingPhysicalMatch, +} from '@klo/context/ingest'; +import { + type KloLocalProject, + type KloProjectConnectionConfig, + kloLocalStateDbPath, + loadKloProject, + serializeKloProjectConfig, +} from '@klo/context/project'; + +import { createClackSpinner, type KloCliSpinner } from '../clack.js'; +import type { KloCliIo } from '../cli-runtime.js'; +import { withMenuOptionsSpacing, withMultiselectNavigation } from '../prompt-navigation.js'; +import { type KloPublicIngestArgs, runKloPublicIngest } from '../public-ingest.js'; + +export type KloMetabaseSetupInputMode = 'auto' | 'disabled'; + +export type MetabaseSetupSyncMode = MetabaseSyncMode; + +type MetabaseSetupPromptOption = ClackOption; + +export interface MetabaseSetupLogger { + info(message: string): void; + step(message: string): void; + success(message: string): void; + warn(message: string): void; + error(message: string): void; +} + +export interface MetabaseSetupPromptAdapter { + intro(title?: string): void; + outro(message?: string): void; + note(message: string, title: string): void; + log: MetabaseSetupLogger; + spinner(): KloCliSpinner; + select(options: { message: string; options: Array> }): Promise; + multiselect(options: { + message: string; + options: Array>; + initialValues?: Value[]; + required?: boolean; + maxItems?: number; + }): Promise; + text(options: { message: string; placeholder?: string }): Promise; + password(options: { message: string }): Promise; + confirm(options: { message: string; initialValue?: boolean }): Promise; + cancel(message: string): void; +} + +type KloMetabaseSetupInteractiveIo = KloCliIo & { + stdin?: { isTTY?: boolean }; +}; + +export interface MetabaseSetupMappingAssignment { + metabaseDatabaseId: number; + targetConnectionId: string; +} + +export interface MintMetabaseApiKeyArgs { + url: string; + username: string; + password: string; +} + +export type MintMetabaseApiKey = (args: MintMetabaseApiKeyArgs, io: KloCliIo) => Promise; + +export interface KloConnectionMetabaseSetupArgs { + command: 'setup'; + projectDir: string; + connectionId?: string; + url?: string; + apiKey?: string; + mintApiKey: boolean; + metabaseUsername?: string; + metabasePassword?: string; + mappings: MetabaseSetupMappingAssignment[]; + syncEnabledDatabaseIds: number[]; + syncMode: MetabaseSetupSyncMode; + runIngest: boolean; + yes: boolean; + inputMode: KloMetabaseSetupInputMode; +} + +export interface KloConnectionMetabaseSetupDeps { + createMetabaseClient?: ( + project: KloLocalProject, + connectionId: string, + ) => Promise>; + mintMetabaseApiKey?: MintMetabaseApiKey; + prompts?: MetabaseSetupPromptAdapter; + runPublicIngest?: (args: Extract, io: KloCliIo) => Promise; +} + +function isMetabaseConnection(connection: KloProjectConnectionConfig | undefined): boolean { + return ( + String(connection?.driver ?? '') + .trim() + .toLowerCase() === 'metabase' + ); +} + +function stringField(value: unknown): string | undefined { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; +} + +function uniqueSorted(values: number[]): number[] { + return [...new Set(values)].sort((a, b) => a - b); +} + +function resolveMetabaseUrl(connection: KloProjectConnectionConfig | undefined): string | undefined { + return stringField(connection?.api_url) ?? stringField(connection?.apiUrl) ?? stringField(connection?.url); +} + +function resolveLiteralMetabaseApiKey(connection: KloProjectConnectionConfig | undefined): string | undefined { + return stringField(connection?.api_key) ?? stringField(connection?.apiKey); +} + +function listMetabaseConnectionIds(project: KloLocalProject): string[] { + return Object.entries(project.config.connections) + .filter(([_connectionId, connection]) => isMetabaseConnection(connection)) + .map(([connectionId]) => connectionId) + .sort(); +} + +function listWarehouseConnectionIds(project: KloLocalProject): string[] { + return Object.entries(project.config.connections) + .filter(([connectionId, connection]) => localConnectionToWarehouseDescriptor(connectionId, connection) != null) + .map(([connectionId]) => connectionId) + .sort(); +} + +function redactSecrets(message: string, secrets: string[]): string { + let result = message; + for (const secret of secrets) { + if (!secret) { + continue; + } + result = result.split(secret).join('[redacted]'); + } + return result; +} + +async function createDefaultMetabaseClient( + project: KloLocalProject, + connectionId: string, +): Promise> { + const factory = new DefaultMetabaseConnectionClientFactory( + (metabaseConnectionId) => + metabaseRuntimeConfigFromLocalConnection(metabaseConnectionId, project.config.connections[metabaseConnectionId]), + DEFAULT_METABASE_CLIENT_CONFIG, + ); + return factory.createClient(connectionId); +} + +async function defaultMintMetabaseApiKey(args: MintMetabaseApiKeyArgs): Promise { + const loginClient = new MetabaseClient({ apiUrl: args.url, apiKey: '' }, DEFAULT_METABASE_CLIENT_CONFIG); + const sessionId = await loginClient.createSession(args.username, args.password); + const sessionClient = new MetabaseClient( + { apiUrl: args.url, apiKey: sessionId, authHeaderName: 'X-Metabase-Session' }, + DEFAULT_METABASE_CLIENT_CONFIG, + ); + const groups = await sessionClient.getPermissionGroups(); + const adminGroup = groups.find((group) => group.name === 'Administrators'); + + if (!adminGroup) { + throw new Error('Metabase Administrators group was not found; create an API key manually and pass --api-key'); + } + + const mintedKey = await sessionClient.createApiKey({ + groupId: adminGroup.id, + name: `KLO CLI ${new Date().toISOString()}`, + }); + const trimmedKey = stringField(mintedKey); + if (!trimmedKey) { + throw new Error('Metabase API key minting returned an empty key'); + } + return trimmedKey; +} + +function ensureNotCancelled(value: T | symbol, prompts: Pick): T { + if (isCancel(value)) { + prompts.cancel('Setup cancelled.'); + throw new Error('Setup cancelled.'); + } + return value as T; +} + +export function createClackMetabaseSetupPromptAdapter(): MetabaseSetupPromptAdapter { + return { + intro(title?: string): void { + intro(title); + }, + outro(message?: string): void { + outro(message); + }, + note(message: string, title: string): void { + note(message, title); + }, + log: { + info(message: string): void { + log.info(message); + }, + step(message: string): void { + log.step(message); + }, + success(message: string): void { + log.success(message); + }, + warn(message: string): void { + log.warn(message); + }, + error(message: string): void { + log.error(message); + }, + }, + spinner(): KloCliSpinner { + return createClackSpinner(); + }, + async select(options: { + message: string; + options: Array>; + }): Promise { + return ensureNotCancelled(await select(withMenuOptionsSpacing(options)), this); + }, + async multiselect(options: { + message: string; + options: Array>; + initialValues?: Value[]; + required?: boolean; + maxItems?: number; + }): Promise { + return ensureNotCancelled(await multiselect(withMenuOptionsSpacing(options)), this); + }, + async text(options: { message: string; placeholder?: string }): Promise { + return ensureNotCancelled(await text(options), this); + }, + async password(options: { message: string }): Promise { + return ensureNotCancelled(await password(options), this); + }, + async confirm(options: { message: string; initialValue?: boolean }): Promise { + return ensureNotCancelled(await confirm(options), this); + }, + cancel(message: string): void { + cancel(message); + }, + }; +} + +function isInteractiveMetabaseSetupIo( + args: Pick, + io: KloMetabaseSetupInteractiveIo, +): boolean { + return args.inputMode !== 'disabled' && io.stdin?.isTTY === true && io.stdout.isTTY === true; +} + +function normalizeDiscoveredDatabases(databases: MetabaseDatabase[]): Array<{ + id: number; + name: string; + engine: string; + host: string | null; + dbName: string | null; +}> { + return databases + .filter((database) => database.is_sample !== true) + .map((database) => ({ + id: database.id, + name: database.name, + engine: stringField(database.engine) ?? 'unknown', + host: stringField(database.details?.host) ?? null, + dbName: stringField(database.details?.dbname) ?? null, + })); +} + +function targetPhysicalInfo(project: KloLocalProject, connectionId: string) { + const descriptor = localConnectionToWarehouseDescriptor(connectionId, project.config.connections[connectionId]); + if (!descriptor) { + return { connection_type: 'UNKNOWN' }; + } + return { + connection_type: descriptor.connection_type, + host: descriptor.host ?? null, + database: descriptor.database ?? null, + account: descriptor.account ?? null, + project_id: descriptor.project_id ?? null, + dataset_id: descriptor.dataset_id ?? null, + ...descriptor.connection_params, + }; +} + +function noteMetabaseSetupSummary(options: { + prompts: MetabaseSetupPromptAdapter; + connectionId: string; + url: string; + mappings: MetabaseSetupMappingAssignment[]; + syncEnabledDatabaseIds: number[]; +}): void { + const mappingLines = options.mappings + .map((mapping) => ` ${mapping.metabaseDatabaseId} -> ${mapping.targetConnectionId}`) + .join('\n'); + const syncLines = options.syncEnabledDatabaseIds.map((id) => ` ${id}`).join('\n'); + + options.prompts.note( + [ + `Connection: ${options.connectionId}`, + `URL: ${options.url}`, + '', + 'Mappings:', + mappingLines || ' (none)', + '', + 'Sync enabled:', + syncLines || ' (none)', + ].join('\n'), + 'Summary', + ); +} + +export async function runKloConnectionMetabaseSetup( + args: KloConnectionMetabaseSetupArgs, + io: KloCliIo, + deps: KloConnectionMetabaseSetupDeps = {}, +): Promise { + let apiKeyForRedaction = args.apiKey; + let passwordForRedaction = args.metabasePassword; + const interactiveIo = io as KloMetabaseSetupInteractiveIo; + const isInteractive = isInteractiveMetabaseSetupIo(args, interactiveIo); + const prompts = deps.prompts ?? (isInteractive ? createClackMetabaseSetupPromptAdapter() : undefined); + + try { + if (isInteractive && prompts) { + prompts.intro('KLO Metabase setup'); + } + + const project = await loadKloProject({ projectDir: args.projectDir }); + const existingMetabaseConnectionIds = listMetabaseConnectionIds(project); + let connectionId: string; + + if (args.connectionId) { + connectionId = args.connectionId; + } else if (existingMetabaseConnectionIds.length === 1) { + const onlyMetabaseConnectionId = existingMetabaseConnectionIds[0]; + if (!onlyMetabaseConnectionId) { + throw new Error('No Metabase connection id was resolved'); + } + connectionId = onlyMetabaseConnectionId; + } else if (existingMetabaseConnectionIds.length > 1) { + if (!isInteractive || !prompts) { + throw new Error( + `Multiple Metabase connections found (${existingMetabaseConnectionIds.join(', ')}); select one with --id`, + ); + } + connectionId = await prompts.select({ + message: 'Select the Metabase connection to configure', + options: existingMetabaseConnectionIds.map((id) => ({ value: id, label: id })), + }); + } else { + connectionId = 'metabase'; + } + + const existingConnection = project.config.connections[connectionId]; + const warehouseConnectionIds = listWarehouseConnectionIds(project); + + if (warehouseConnectionIds.length === 0) { + throw new Error('Add a warehouse connection first'); + } + + let url = args.url ?? resolveMetabaseUrl(existingConnection); + let apiKey = args.apiKey ?? resolveLiteralMetabaseApiKey(existingConnection); + apiKeyForRedaction = apiKey; + + if (!url && isInteractive && prompts) { + url = stringField( + await prompts.text({ + message: 'Metabase API URL', + placeholder: 'http://localhost:3000', + }), + ); + } + + if (args.inputMode === 'disabled' && !url) { + throw new Error('missing Metabase URL'); + } + + if (!args.apiKey && !args.mintApiKey && apiKey && isInteractive && prompts && !args.yes) { + const reuse = await prompts.confirm({ + message: `Reuse the existing Metabase API key from connections.${connectionId}?`, + initialValue: true, + }); + if (!reuse) { + apiKey = undefined; + apiKeyForRedaction = undefined; + } + } + + if (args.mintApiKey) { + let username = stringField(args.metabaseUsername); + let metabasePassword = stringField(args.metabasePassword); + + if (isInteractive && prompts) { + if (!username) { + username = stringField(await prompts.text({ message: 'Metabase admin username' })); + } + if (!metabasePassword) { + metabasePassword = stringField(await prompts.password({ message: 'Metabase admin password' })); + } + } + + if (!username) { + throw new Error('--mint-api-key requires --username'); + } + if (!metabasePassword) { + throw new Error('--mint-api-key requires --password'); + } + if (!url) { + throw new Error('Metabase URL is required (use --url)'); + } + + passwordForRedaction = metabasePassword; + apiKey = await (deps.mintMetabaseApiKey ?? defaultMintMetabaseApiKey)( + { url, username, password: metabasePassword }, + io, + ); + apiKeyForRedaction = apiKey; + } + + if (!apiKey && isInteractive && prompts) { + const credentialMode = await prompts.select({ + message: 'Metabase credentials', + options: [ + { value: 'paste', label: 'Paste API key' }, + { value: 'mint', label: 'Mint API key' }, + ], + }); + + if (credentialMode === 'paste') { + apiKey = stringField(await prompts.password({ message: 'Metabase API key' })); + apiKeyForRedaction = apiKey; + } else { + const username = stringField(await prompts.text({ message: 'Metabase admin username' })); + const metabasePassword = stringField(await prompts.password({ message: 'Metabase admin password' })); + if (!username) { + throw new Error('Metabase username is required'); + } + if (!metabasePassword) { + throw new Error('Metabase password is required'); + } + if (!url) { + throw new Error('Metabase URL is required (use --url)'); + } + + passwordForRedaction = metabasePassword; + apiKey = await (deps.mintMetabaseApiKey ?? defaultMintMetabaseApiKey)( + { url, username, password: metabasePassword }, + io, + ); + apiKeyForRedaction = apiKey; + } + } + + if (args.inputMode === 'disabled' && !apiKey) { + throw new Error('missing Metabase API key'); + } + + if (!url) { + throw new Error('Metabase URL is required (use --url)'); + } + if (!apiKey) { + throw new Error('Metabase API key is required (use --api-key)'); + } + + const transientConnectionConfig: KloProjectConnectionConfig = { + ...(existingConnection ?? {}), + driver: 'metabase', + api_url: url, + api_key: apiKey, + }; + const configWithTransient = { + ...project.config, + connections: { + ...project.config.connections, + [connectionId]: transientConnectionConfig, + }, + }; + const discoveryProject: KloLocalProject = { ...project, config: configWithTransient }; + + for (const mapping of args.mappings) { + if (!configWithTransient.connections[mapping.targetConnectionId]) { + throw new Error(`Target connection "${mapping.targetConnectionId}" does not exist`); + } + } + + const client = await (deps.createMetabaseClient ?? createDefaultMetabaseClient)(discoveryProject, connectionId); + try { + const authSpinner = isInteractive && prompts ? prompts.spinner() : undefined; + authSpinner?.start('Testing Metabase connection'); + const testResult = await client.testConnection(); + if (!testResult.success) { + authSpinner?.error('Metabase authentication failed'); + throw new Error( + `Metabase authentication failed. Replace connections.${connectionId}.api_key or use --mint-api-key.`, + ); + } + authSpinner?.stop('Metabase reachable'); + + const discoverySpinner = isInteractive && prompts ? prompts.spinner() : undefined; + discoverySpinner?.start('Discovering Metabase databases'); + const discovered = normalizeDiscoveredDatabases(await client.getDatabases()); + discoverySpinner?.stop(`Discovered ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}`); + if (isInteractive && prompts) { + prompts.log.success( + `Discovered ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}`, + ); + } + if (discovered.length === 0) { + throw new Error('Metabase auth worked but no usable databases were returned'); + } + + let resolvedMappings = args.mappings; + let resolvedSyncEnabledDatabaseIds = args.syncEnabledDatabaseIds; + + if (resolvedSyncEnabledDatabaseIds.length === 0 && args.yes && resolvedMappings.length > 0) { + resolvedSyncEnabledDatabaseIds = uniqueSorted(resolvedMappings.map((mapping) => mapping.metabaseDatabaseId)); + } + + if (resolvedMappings.length === 0 && resolvedSyncEnabledDatabaseIds.length === 0) { + const onlyDiscoveredDatabase = discovered.length === 1 ? discovered[0] : undefined; + const compatibleWarehouses = onlyDiscoveredDatabase + ? warehouseConnectionIds.filter((warehouseConnectionId) => { + const mismatchReason = validateMappingPhysicalMatch( + { + metabaseEngine: onlyDiscoveredDatabase.engine, + metabaseDbName: onlyDiscoveredDatabase.dbName, + metabaseHost: onlyDiscoveredDatabase.host, + }, + targetPhysicalInfo(project, warehouseConnectionId), + ); + return !mismatchReason; + }) + : []; + const onlyWarehouseConnectionId = compatibleWarehouses[0]; + + if (onlyDiscoveredDatabase && compatibleWarehouses.length === 1 && onlyWarehouseConnectionId) { + if (args.yes) { + resolvedMappings = [ + { metabaseDatabaseId: onlyDiscoveredDatabase.id, targetConnectionId: onlyWarehouseConnectionId }, + ]; + resolvedSyncEnabledDatabaseIds = [onlyDiscoveredDatabase.id]; + } else if (isInteractive && prompts) { + const proposedMappings = [ + { metabaseDatabaseId: onlyDiscoveredDatabase.id, targetConnectionId: onlyWarehouseConnectionId }, + ]; + const proposedSyncEnabledDatabaseIds = [onlyDiscoveredDatabase.id]; + noteMetabaseSetupSummary({ + prompts, + connectionId, + url, + mappings: proposedMappings, + syncEnabledDatabaseIds: proposedSyncEnabledDatabaseIds, + }); + const confirmed = await prompts.confirm({ + message: `Map Metabase database "${onlyDiscoveredDatabase.name}" (${onlyDiscoveredDatabase.id}) to "${onlyWarehouseConnectionId}" and enable sync?`, + initialValue: true, + }); + if (!confirmed) { + prompts.cancel('Setup cancelled.'); + throw new Error('Setup cancelled.'); + } + resolvedMappings = proposedMappings; + resolvedSyncEnabledDatabaseIds = proposedSyncEnabledDatabaseIds; + } else { + throw new Error('Metabase mapping/sync is required in --no-input mode; pass --map and --sync'); + } + } else if (isInteractive && prompts) { + const selectedDatabaseIds = await prompts.multiselect({ + message: withMultiselectNavigation('Select Metabase databases to configure'), + options: discovered.map((database) => ({ + value: database.id, + label: `${database.id}: ${database.name}`, + hint: [database.engine, database.host, database.dbName].filter(Boolean).join(' • '), + })), + required: true, + }); + + resolvedMappings = []; + for (const databaseId of selectedDatabaseIds) { + const database = discovered.find((candidate) => candidate.id === databaseId); + if (!database) { + throw new Error(`Selected database id ${databaseId} was not discovered`); + } + + const existingMapping = args.mappings.find((mapping) => mapping.metabaseDatabaseId === databaseId); + if (existingMapping) { + resolvedMappings.push(existingMapping); + continue; + } + + const targetConnectionId = await prompts.select({ + message: `Map Metabase database ${database.id} ("${database.name}") to which KLO connection?`, + options: warehouseConnectionIds.map((warehouseId) => ({ value: warehouseId, label: warehouseId })), + }); + resolvedMappings.push({ metabaseDatabaseId: databaseId, targetConnectionId }); + } + + const syncIds = await prompts.multiselect({ + message: withMultiselectNavigation('Enable sync for which databases?'), + options: selectedDatabaseIds.map((id) => ({ value: id, label: String(id) })), + initialValues: selectedDatabaseIds, + required: true, + }); + resolvedSyncEnabledDatabaseIds = uniqueSorted(syncIds); + + if (!args.yes) { + noteMetabaseSetupSummary({ + prompts, + connectionId, + url, + mappings: resolvedMappings, + syncEnabledDatabaseIds: resolvedSyncEnabledDatabaseIds, + }); + const confirmed = await prompts.confirm({ + message: 'Write changes to klo.yaml and enable sync?', + initialValue: true, + }); + if (!confirmed) { + prompts.cancel('Setup cancelled.'); + throw new Error('Setup cancelled.'); + } + } + } else if (args.inputMode === 'disabled') { + throw new Error('Metabase mapping/sync is required in --no-input mode; pass --map and --sync'); + } + } + + if ( + args.inputMode === 'disabled' && + resolvedMappings.length > 0 && + resolvedSyncEnabledDatabaseIds.length === 0 + ) { + throw new Error('Metabase sync selection is required in --no-input mode; pass --sync '); + } + + const discoveredIds = new Set(discovered.map((database) => database.id)); + for (const mapping of resolvedMappings) { + if (!discoveredIds.has(mapping.metabaseDatabaseId)) { + throw new Error(`Mapped database id ${mapping.metabaseDatabaseId} was not discovered`); + } + } + for (const syncId of resolvedSyncEnabledDatabaseIds) { + if (!discoveredIds.has(syncId)) { + throw new Error(`Sync database id ${syncId} was not discovered`); + } + } + + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig(configWithTransient), + 'klo', + 'klo@example.com', + `Setup Metabase connection ${connectionId}`, + ); + + const updatedProject = await loadKloProject({ projectDir: args.projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(updatedProject) }); + + await store.refreshDiscoveredDatabases({ connectionId, discovered }); + + for (const mapping of resolvedMappings) { + await store.upsertDatabaseMapping({ + connectionId, + metabaseDatabaseId: mapping.metabaseDatabaseId, + targetConnectionId: mapping.targetConnectionId, + syncEnabled: false, + source: 'cli', + }); + } + + for (const metabaseDatabaseId of resolvedSyncEnabledDatabaseIds) { + await store.setMappingSyncEnabled({ + connectionId, + metabaseDatabaseId, + syncEnabled: true, + }); + } + + const existingSyncState = await store.getSourceState(connectionId); + await store.setSyncState({ + connectionId, + syncMode: args.syncMode, + defaultTagNames: existingSyncState.defaultTagNames, + selections: existingSyncState.selections, + }); + + const unhydrated = await store.getUnhydratedSyncEnabledMappingIds(connectionId); + if (unhydrated.length > 0) { + io.stderr.write( + `Sync-enabled mappings are missing discovery metadata; run klo connection mapping refresh ${connectionId} --auto-accept\n`, + ); + return 1; + } + + const rows = await store.listDatabaseMappings(connectionId); + const physicalFailures = rows.flatMap((row) => { + if (!row.targetConnectionId) { + return []; + } + const reason = validateMappingPhysicalMatch( + { metabaseEngine: row.metabaseEngine, metabaseDbName: row.metabaseDbName, metabaseHost: row.metabaseHost }, + updatedProject.config.connections[row.targetConnectionId] + ? targetPhysicalInfo(updatedProject, row.targetConnectionId) + : { connection_type: 'UNKNOWN' }, + ); + return reason ? [`${row.metabaseDatabaseId}: ${reason}`] : []; + }); + if (physicalFailures.length > 0) { + for (const failure of physicalFailures) { + io.stderr.write(`${failure}\n`); + } + return 1; + } + + io.stdout.write(`Connection: ${connectionId}\n`); + io.stdout.write(`Discovered ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}\n`); + io.stdout.write(`Next: klo ingest ${connectionId} --project-dir ${args.projectDir}\n`); + + if (args.runIngest) { + const ingestRunner = deps.runPublicIngest ?? runKloPublicIngest; + const exitCode = await ingestRunner( + { + command: 'run', + projectDir: args.projectDir, + targetConnectionId: connectionId, + all: false, + json: false, + inputMode: 'disabled', + }, + io, + ); + if (exitCode !== 0) { + io.stderr.write(`Ingest failed; re-run: klo ingest ${connectionId} --project-dir ${args.projectDir}\n`); + return 1; + } + } + + if (isInteractive && prompts) { + prompts.outro('Metabase setup complete'); + } + + return 0; + } finally { + await client.cleanup(); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + io.stderr.write( + `${redactSecrets(message, [apiKeyForRedaction ?? '', passwordForRedaction ?? '', args.apiKey ?? ''])}\n`, + ); + return 1; + } +} diff --git a/packages/cli/src/commands/connection-notion-commands.ts b/packages/cli/src/commands/connection-notion-commands.ts new file mode 100644 index 00000000..fc3ccb32 --- /dev/null +++ b/packages/cli/src/commands/connection-notion-commands.ts @@ -0,0 +1,92 @@ +import { type Command, InvalidArgumentError } from '@commander-js/extra-typings'; +import { collectOption, type KloCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; +import type { KloConnectionNotionArgs } from './connection-notion.js'; + +interface NotionPickOptions { + input?: boolean; + rootPageId: string[]; +} + +function parseSafeConnectionId(value: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(value)) { + throw new InvalidArgumentError(`Unsafe connection id: ${value}`); + } + return value; +} + +function uniqueInOrder(values: string[]): string[] { + const seen = new Set(); + const result: string[] = []; + for (const value of values) { + if (!seen.has(value)) { + seen.add(value); + result.push(value); + } + } + return result; +} + +function normalizeNotionPageId(value: string): string { + const trimmed = value.trim(); + const compact = trimmed.includes('-') ? trimmed.replace(/-/g, '') : trimmed; + if (!/^[0-9a-fA-F]{32}$/.test(compact)) { + throw new Error(`Invalid Notion page UUID: ${value}`); + } + const lower = compact.toLowerCase(); + return `${lower.slice(0, 8)}-${lower.slice(8, 12)}-${lower.slice(12, 16)}-${lower.slice(16, 20)}-${lower.slice(20)}`; +} + +function buildPickArgs(connectionId: string, projectDir: string, options: NotionPickOptions): KloConnectionNotionArgs { + if (options.input !== false) { + return { + command: 'pick', + projectDir, + connectionId, + mode: 'interactive', + }; + } + + const rootPageIds = uniqueInOrder(options.rootPageId.map(normalizeNotionPageId)); + if (rootPageIds.length === 0) { + throw new Error('connection notion pick --no-input requires at least one --root-page-id'); + } + return { + command: 'pick', + projectDir, + connectionId, + mode: 'non-interactive', + rootPageIds, + }; +} + +async function runConnectionNotionArgs(context: KloCliCommandContext, args: KloConnectionNotionArgs): Promise { + const runner = context.deps.connectionNotion ?? (await import('./connection-notion.js')).runKloConnectionNotion; + context.setExitCode(await runner(args, context.io)); +} + +export function registerConnectionNotionCommands(connect: Command, context: KloCliCommandContext): void { + const notion = connect + .command('notion') + .description('Configure Notion source selection') + .showHelpAfterError() + .addHelpText( + 'after', + '\nProject directory defaults to KLO_PROJECT_DIR when set, otherwise the current working directory.\n', + ); + + notion.action(() => { + notion.outputHelp(); + context.setExitCode(0); + }); + + notion + .command('pick') + .description('Pick Notion root pages for a configured Notion connection') + .argument('', 'Notion connection id', parseSafeConnectionId) + .option('--no-input', 'Disable interactive terminal input') + .option('--root-page-id ', 'Root page UUID to crawl; repeatable with --no-input', collectOption, []) + .showHelpAfterError() + .action(async (connectionId: string, options: NotionPickOptions, command) => { + await runConnectionNotionArgs(context, buildPickArgs(connectionId, resolveCommandProjectDir(command), options)); + }); +} diff --git a/packages/cli/src/commands/connection-notion-tree.test.ts b/packages/cli/src/commands/connection-notion-tree.test.ts new file mode 100644 index 00000000..ed1126d4 --- /dev/null +++ b/packages/cli/src/commands/connection-notion-tree.test.ts @@ -0,0 +1,283 @@ +import { describe, expect, it } from 'vitest'; +import { + buildInitialState, + buildPickerTree, + canToggle, + clearExpiredTransientHint, + filterTree, + flattenSelection, + moveCursor, + reducer, + selectAllVisible, + selectNone, + toggleChecked, + TRANSIENT_HINT_DURATION_MS, + visibleNodeIds, + type NotionPickerPageInput, +} from './connection-notion-tree.js'; + +const IDS = { + engineering: '11111111-1111-1111-1111-111111111111', + architecture: '22222222-2222-2222-2222-222222222222', + onboarding: '33333333-3333-3333-3333-333333333333', + marketing: '44444444-4444-4444-4444-444444444444', + journal: '55555555-5555-5555-5555-555555555555', + orphan: '66666666-6666-6666-6666-666666666666', + duplicate: '77777777-7777-7777-7777-777777777777', + cycleA: '88888888-8888-8888-8888-888888888888', + cycleB: '99999999-9999-9999-9999-999999999999', +}; + +function pages(): NotionPickerPageInput[] { + return [ + { id: IDS.marketing, title: 'Marketing', archived: false, parentId: null }, + { id: IDS.onboarding, title: 'Onboarding', archived: false, parentId: IDS.engineering }, + { id: IDS.engineering, title: 'Engineering Docs', archived: false, parentId: null }, + { id: IDS.architecture, title: 'Architecture', archived: false, parentId: IDS.engineering }, + { id: IDS.journal, title: 'Daily journal', archived: true, parentId: IDS.marketing }, + { id: IDS.orphan, title: '', archived: false, parentId: 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' }, + { id: IDS.duplicate, title: 'Original duplicate', archived: false, parentId: null }, + { id: IDS.duplicate, title: 'Ignored duplicate', archived: true, parentId: IDS.marketing }, + { id: IDS.cycleA, title: 'Cycle A', archived: false, parentId: IDS.cycleB }, + { id: IDS.cycleB, title: 'Cycle B', archived: false, parentId: IDS.cycleA }, + ]; +} + +describe('buildPickerTree', () => { + it('deduplicates pages, sorts siblings, preserves archived flags, roots orphans, and breaks cycles', () => { + const tree = buildPickerTree(pages()); + const byId = new Map(tree.map((node) => [node.id, node])); + + expect(tree.map((node) => node.title)).toEqual([ + 'Cycle A', + 'Cycle B', + 'Engineering Docs', + 'Architecture', + 'Onboarding', + 'Marketing', + 'Daily journal', + 'Original duplicate', + 'Untitled', + ]); + expect(byId.get(IDS.engineering)?.childIds).toEqual([IDS.architecture, IDS.onboarding]); + expect(byId.get(IDS.architecture)).toMatchObject({ + depth: 1, + parentId: IDS.engineering, + path: 'Engineering Docs / Architecture', + }); + expect(byId.get(IDS.journal)).toMatchObject({ + archived: true, + depth: 1, + path: 'Marketing / Daily journal', + }); + expect(byId.get(IDS.orphan)).toMatchObject({ + title: 'Untitled', + parentId: null, + depth: 0, + path: 'Untitled', + }); + expect(byId.get(IDS.duplicate)).toMatchObject({ + title: 'Original duplicate', + archived: false, + parentId: null, + }); + expect(byId.get(IDS.cycleA)?.parentId).toBeNull(); + expect(byId.get(IDS.cycleB)?.parentId).toBe(IDS.cycleA); + }); +}); + +describe('selection invariants', () => { + it('checking a parent locks descendants and keeps checked ids minimal', () => { + const state = buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [], + currentCrawlMode: 'selected_roots', + }); + + const checkedParent = toggleChecked(state, IDS.engineering, 1000); + expect([...checkedParent.checked]).toEqual([IDS.engineering]); + expect(canToggle(IDS.architecture, checkedParent)).toEqual({ + ok: false, + reason: "Locked by 'Engineering Docs' - uncheck parent first", + }); + + const lockedChildAttempt = toggleChecked(checkedParent, IDS.architecture, 2000); + expect([...lockedChildAttempt.checked]).toEqual([IDS.engineering]); + expect(lockedChildAttempt.transientHint).toEqual({ + text: "Locked by 'Engineering Docs' - uncheck parent first", + expiresAt: 4500, + }); + + const uncheckedParent = toggleChecked(lockedChildAttempt, IDS.engineering, 3000); + expect([...uncheckedParent.checked]).toEqual([]); + expect(canToggle(IDS.architecture, uncheckedParent)).toEqual({ ok: true }); + }); + + it('normalizes stored roots, reports stale roots, expands checked ancestors, and flattens descendants', () => { + const state = buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [ + IDS.engineering.replaceAll('-', ''), + IDS.architecture, + 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', + ], + currentCrawlMode: 'selected_roots', + }); + + expect([...state.checked]).toEqual([IDS.engineering]); + expect([...state.expanded]).toEqual([]); + expect(state.cursorId).toBe(IDS.cycleA); + expect(state.preLoadWarnings).toEqual(['1 stored root_page_ids no longer visible']); + expect(flattenSelection(new Set([IDS.engineering, IDS.architecture]), state.byId)).toEqual([IDS.engineering]); + }); +}); + +describe('search and cursor movement', () => { + it('filters by title and path while deriving auto-expanded ancestors', () => { + const state = buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [], + currentCrawlMode: 'selected_roots', + }); + const searching = { + ...state, + search: { editing: false, query: 'architecture' }, + }; + + expect(filterTree(searching)).toEqual({ + visibleIds: new Set([IDS.engineering, IDS.architecture]), + autoExpand: new Set([IDS.engineering]), + }); + expect(visibleNodeIds(searching)).toEqual([IDS.engineering, IDS.architecture]); + }); + + it('moves the cursor through visible nodes and implements left/right tree semantics', () => { + const state = buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [], + currentCrawlMode: 'selected_roots', + }); + + const atEngineering = { + ...state, + cursorId: IDS.engineering, + expanded: new Set([IDS.engineering]), + }; + expect(moveCursor(atEngineering, 'down').cursorId).toBe(IDS.architecture); + expect(moveCursor({ ...atEngineering, cursorId: IDS.architecture }, 'up').cursorId).toBe(IDS.engineering); + expect(moveCursor(atEngineering, 'right').cursorId).toBe(IDS.architecture); + expect(moveCursor({ ...atEngineering, cursorId: IDS.architecture }, 'left').cursorId).toBe(IDS.engineering); + expect([...moveCursor(atEngineering, 'left').expanded]).toEqual([]); + expect([...moveCursor({ ...state, cursorId: IDS.marketing }, 'right').expanded]).toContain(IDS.marketing); + }); +}); + +describe('bulk actions and reducer effects', () => { + it('selects only matching visible roots under search and clears selection', () => { + const state = buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [IDS.marketing], + currentCrawlMode: 'selected_roots', + }); + const searching = { + ...state, + search: { editing: false, query: 'architecture' }, + }; + + const selected = selectAllVisible(searching); + expect(flattenSelection(selected.checked, selected.byId)).toEqual([IDS.architecture, IDS.marketing]); + expect([...selectNone(selected).checked]).toEqual([]); + }); + + it('returns save immediately for selected_roots and requires confirmation for all_accessible', () => { + const selectedRoots = toggleChecked( + buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [], + currentCrawlMode: 'selected_roots', + }), + IDS.marketing, + 1000, + ); + expect(reducer(selectedRoots, 'save-request')).toEqual({ + next: selectedRoots, + effect: 'save', + }); + + const allAccessible = { + ...selectedRoots, + currentCrawlMode: 'all_accessible' as const, + }; + const confirm = reducer(allAccessible, 'save-request'); + expect(confirm).toEqual({ + next: { ...allAccessible, pendingConfirm: 'mode-switch' }, + effect: null, + }); + expect(reducer(confirm.next, 'save-cancel')).toEqual({ + next: { ...allAccessible, pendingConfirm: null }, + effect: null, + }); + expect(reducer(confirm.next, 'save-confirm')).toEqual({ + next: { ...allAccessible, pendingConfirm: null }, + effect: 'save', + }); + }); + + it('blocks empty saves, updates search state, and quits without saving', () => { + const state = buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [], + currentCrawlMode: 'selected_roots', + }); + + const blockedSave = reducer(state, 'save-request', 9000); + expect(blockedSave).toEqual({ + next: { + ...state, + transientHint: { + text: 'Select at least one page or press q to quit', + expiresAt: 9000 + TRANSIENT_HINT_DURATION_MS, + }, + }, + effect: null, + }); + expect( + reducer( + reducer(reducer(state, 'search-start').next, { type: 'search-input', value: 'a' }).next, + 'search-submit', + ).next.search, + ).toEqual({ editing: false, query: 'a' }); + expect(reducer(state, 'quit')).toEqual({ + next: state, + effect: 'quit-without-save', + }); + }); + + it('clears transient hints only when their expiry time has passed', () => { + const state = buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [], + currentCrawlMode: 'selected_roots', + }); + const withHint = { + ...state, + transientHint: { + text: 'Select at least one page or press q to quit', + expiresAt: 11500, + }, + }; + + expect(clearExpiredTransientHint(withHint, 11499)).toBe(withHint); + expect(clearExpiredTransientHint(withHint, 11500)).toEqual({ + ...withHint, + transientHint: null, + }); + expect(reducer(withHint, 'clear-transient-hint', 11501)).toEqual({ + next: { + ...withHint, + transientHint: null, + }, + effect: null, + }); + }); +}); diff --git a/packages/cli/src/commands/connection-notion-tree.ts b/packages/cli/src/commands/connection-notion-tree.ts new file mode 100644 index 00000000..57a8f335 --- /dev/null +++ b/packages/cli/src/commands/connection-notion-tree.ts @@ -0,0 +1,529 @@ +export interface NotionPickerPageInput { + id: string; + title?: string | null; + archived?: boolean; + parentId?: string | null; +} + +interface NotionPickerNode { + id: string; + title: string; + archived: boolean; + parentId: string | null; + depth: number; + childIds: string[]; + path: string; +} + +export interface PickerState { + tree: NotionPickerNode[]; + byId: Map; + expanded: Set; + checked: Set; + cursorId: string; + search: { editing: boolean; query: string }; + pendingConfirm: 'mode-switch' | null; + preLoadWarnings: string[]; + transientHint: { text: string; expiresAt: number } | null; + currentCrawlMode: 'all_accessible' | 'selected_roots'; +} + +export type PickerCommand = + | 'cursor-up' + | 'cursor-down' + | 'cursor-left' + | 'cursor-right' + | 'expand' + | 'collapse' + | 'expand-all' + | 'collapse-all' + | 'toggle-check' + | 'select-all-visible' + | 'select-none' + | 'clear-transient-hint' + | 'search-start' + | 'search-cancel' + | 'search-submit' + | 'search-backspace' + | { type: 'search-input'; value: string } + | 'save-request' + | 'save-confirm' + | 'save-cancel' + | 'quit'; + +type PickerEffect = null | 'save' | 'quit-without-save'; + +interface MutableNode { + id: string; + title: string; + archived: boolean; + parentId: string | null; + childIds: string[]; +} + +export const TRANSIENT_HINT_DURATION_MS = 2500; + +const collator = new Intl.Collator('en', { sensitivity: 'base', numeric: true }); + +function normalizePageId(value: string): string { + const trimmed = value.trim(); + const compact = trimmed.replace(/-/g, ''); + if (/^[0-9a-fA-F]{32}$/.test(compact)) { + const lower = compact.toLowerCase(); + return `${lower.slice(0, 8)}-${lower.slice(8, 12)}-${lower.slice(12, 16)}-${lower.slice( + 16, + 20, + )}-${lower.slice(20)}`; + } + return trimmed; +} + +function titleValue(value: string | null | undefined): string { + const trimmed = value?.trim() ?? ''; + return trimmed.length > 0 ? trimmed : 'Untitled'; +} + +function sortedNodeIds(ids: string[], nodes: Map): string[] { + return [...ids].sort((leftId, rightId) => { + const left = nodes.get(leftId); + const right = nodes.get(rightId); + const byTitle = collator.compare(left?.title ?? '', right?.title ?? ''); + return byTitle === 0 ? leftId.localeCompare(rightId) : byTitle; + }); +} + +function cloneState(state: PickerState, patch: Partial): PickerState { + return { ...state, ...patch }; +} + +function transientHint(text: string, now: number): PickerState['transientHint'] { + return { text, expiresAt: now + TRANSIENT_HINT_DURATION_MS }; +} + +export function clearExpiredTransientHint(state: PickerState, now = Date.now()): PickerState { + if (!state.transientHint || state.transientHint.expiresAt > now) { + return state; + } + return cloneState(state, { transientHint: null }); +} + +function ancestorsOf(nodeId: string, byId: Map): string[] { + const ancestors: string[] = []; + let parentId = byId.get(nodeId)?.parentId ?? null; + const seen = new Set(); + while (parentId && !seen.has(parentId)) { + ancestors.push(parentId); + seen.add(parentId); + parentId = byId.get(parentId)?.parentId ?? null; + } + return ancestors; +} + +function descendantsOf(nodeId: string, byId: Map): string[] { + const result: string[] = []; + const stack = [...(byId.get(nodeId)?.childIds ?? [])].reverse(); + while (stack.length > 0) { + const id = stack.pop(); + if (!id) { + continue; + } + result.push(id); + const node = byId.get(id); + if (node) { + stack.push(...[...node.childIds].reverse()); + } + } + return result; +} + +function matchingIds(state: PickerState): Set { + const query = state.search.query.trim().toLocaleLowerCase(); + if (!query) { + return new Set(state.tree.map((node) => node.id)); + } + return new Set( + state.tree + .filter((node) => { + const title = node.title.toLocaleLowerCase(); + const path = node.path.toLocaleLowerCase(); + return title.includes(query) || path.includes(query); + }) + .map((node) => node.id), + ); +} + +export function buildPickerTree(searchResults: NotionPickerPageInput[]): NotionPickerNode[] { + const nodes = new Map(); + for (const result of searchResults) { + const id = normalizePageId(result.id); + if (nodes.has(id)) { + continue; + } + nodes.set(id, { + id, + title: titleValue(result.title), + archived: result.archived === true, + parentId: result.parentId ? normalizePageId(result.parentId) : null, + childIds: [], + }); + } + + for (const node of nodes.values()) { + if (!node.parentId || node.parentId === node.id || !nodes.has(node.parentId)) { + node.parentId = null; + continue; + } + + const seen = new Set([node.id]); + let cursor: string | null = node.parentId; + while (cursor) { + if (seen.has(cursor)) { + node.parentId = null; + break; + } + seen.add(cursor); + cursor = nodes.get(cursor)?.parentId ?? null; + } + } + + for (const node of nodes.values()) { + node.childIds = []; + } + for (const node of nodes.values()) { + if (node.parentId) { + nodes.get(node.parentId)?.childIds.push(node.id); + } + } + for (const node of nodes.values()) { + node.childIds = sortedNodeIds(node.childIds, nodes); + } + + const roots = sortedNodeIds( + [...nodes.values()].filter((node) => node.parentId === null).map((node) => node.id), + nodes, + ); + const tree: NotionPickerNode[] = []; + + function visit(nodeId: string, depth: number, pathPrefix: string[]): void { + const raw = nodes.get(nodeId); + if (!raw) { + return; + } + const path = [...pathPrefix, raw.title].join(' / '); + const node: NotionPickerNode = { + id: raw.id, + title: raw.title, + archived: raw.archived, + parentId: raw.parentId, + depth, + childIds: raw.childIds, + path, + }; + tree.push(node); + for (const childId of raw.childIds) { + visit(childId, depth + 1, [...pathPrefix, raw.title]); + } + } + + for (const rootId of roots) { + visit(rootId, 0, []); + } + + return tree; +} + +export function isAncestorChecked(nodeId: string, checked: Set, byId: Map): boolean { + return ancestorsOf(nodeId, byId).some((ancestorId) => checked.has(ancestorId)); +} + +function checkedAncestor(nodeId: string, state: PickerState): NotionPickerNode | null { + for (const ancestorId of ancestorsOf(nodeId, state.byId)) { + if (state.checked.has(ancestorId)) { + return state.byId.get(ancestorId) ?? null; + } + } + return null; +} + +export function canToggle(nodeId: string, state: PickerState): { ok: true } | { ok: false; reason: string } { + if (!state.byId.has(nodeId)) { + return { ok: false, reason: 'Page not found' }; + } + const ancestor = checkedAncestor(nodeId, state); + if (ancestor) { + return { ok: false, reason: `Locked by '${ancestor.title}' - uncheck parent first` }; + } + return { ok: true }; +} + +export function toggleChecked(state: PickerState, nodeId: string, now = Date.now()): PickerState { + const toggle = canToggle(nodeId, state); + if (!toggle.ok) { + return cloneState(state, { + transientHint: transientHint(toggle.reason, now), + }); + } + + const checked = new Set(state.checked); + if (checked.has(nodeId)) { + checked.delete(nodeId); + } else { + checked.add(nodeId); + for (const descendantId of descendantsOf(nodeId, state.byId)) { + checked.delete(descendantId); + } + } + return cloneState(state, { checked, transientHint: null }); +} + +export function flattenSelection(checked: Set, byId: Map): string[] { + const result: string[] = []; + for (const node of byId.values()) { + if (checked.has(node.id) && !isAncestorChecked(node.id, checked, byId)) { + result.push(node.id); + } + } + return result; +} + +export function filterTree(state: PickerState): { visibleIds: Set; autoExpand: Set } { + const matches = matchingIds(state); + if (state.search.query.trim().length === 0) { + return { visibleIds: matches, autoExpand: new Set() }; + } + + const visibleIds = new Set(); + const autoExpand = new Set(); + for (const matchId of matches) { + visibleIds.add(matchId); + for (const ancestorId of ancestorsOf(matchId, state.byId)) { + visibleIds.add(ancestorId); + autoExpand.add(ancestorId); + } + } + return { visibleIds, autoExpand }; +} + +export function visibleNodeIds(state: PickerState): string[] { + const { visibleIds, autoExpand } = filterTree(state); + const result: string[] = []; + const roots = state.tree.filter((node) => node.parentId === null).map((node) => node.id); + + function visit(nodeId: string): void { + if (!visibleIds.has(nodeId)) { + return; + } + result.push(nodeId); + const node = state.byId.get(nodeId); + if (!node) { + return; + } + if (state.expanded.has(nodeId) || autoExpand.has(nodeId)) { + for (const childId of node.childIds) { + visit(childId); + } + } + } + + for (const rootId of roots) { + visit(rootId); + } + return result; +} + +export function selectAllVisible(state: PickerState): PickerState { + const candidates = state.search.query.trim().length > 0 ? matchingIds(state) : new Set(visibleNodeIds(state)); + const checked = new Set(state.checked); + + for (const node of state.tree) { + if (!candidates.has(node.id)) { + continue; + } + const hasCandidateAncestor = ancestorsOf(node.id, state.byId).some((ancestorId) => candidates.has(ancestorId)); + if (!hasCandidateAncestor && !isAncestorChecked(node.id, checked, state.byId)) { + checked.add(node.id); + for (const descendantId of descendantsOf(node.id, state.byId)) { + checked.delete(descendantId); + } + } + } + + return cloneState(state, { + checked: new Set(flattenSelection(checked, state.byId)), + transientHint: null, + }); +} + +export function selectNone(state: PickerState): PickerState { + return cloneState(state, { checked: new Set(), transientHint: null }); +} + +function setExpanded(state: PickerState, nodeId: string, value: boolean | 'toggle'): PickerState { + const expanded = new Set(state.expanded); + const nextValue = value === 'toggle' ? !expanded.has(nodeId) : value; + if (nextValue) { + expanded.add(nodeId); + } else { + expanded.delete(nodeId); + } + return cloneState(state, { expanded }); +} + +function expandPath(state: PickerState, nodeId: string): PickerState { + const expanded = new Set(state.expanded); + for (const ancestorId of ancestorsOf(nodeId, state.byId)) { + expanded.add(ancestorId); + } + return cloneState(state, { expanded }); +} + +export function moveCursor(state: PickerState, dir: 'up' | 'down' | 'left' | 'right'): PickerState { + const node = state.byId.get(state.cursorId); + if (!node) { + return state; + } + + if (dir === 'left') { + if (node.childIds.length > 0 && state.expanded.has(node.id)) { + return setExpanded(state, node.id, false); + } + return node.parentId ? cloneState(state, { cursorId: node.parentId }) : state; + } + + if (dir === 'right') { + if (node.childIds.length === 0) { + return state; + } + if (!state.expanded.has(node.id)) { + return setExpanded(state, node.id, true); + } + return cloneState(state, { cursorId: node.childIds[0] ?? node.id }); + } + + const ids = visibleNodeIds(state); + const index = ids.indexOf(state.cursorId); + if (index === -1) { + return ids[0] ? cloneState(state, { cursorId: ids[0] }) : state; + } + const nextIndex = dir === 'up' ? Math.max(0, index - 1) : Math.min(ids.length - 1, index + 1); + return cloneState(state, { cursorId: ids[nextIndex] ?? state.cursorId }); +} + +export function buildInitialState(args: { + tree: NotionPickerNode[]; + existingRootPageIds: string[]; + currentCrawlMode?: 'all_accessible' | 'selected_roots'; +}): PickerState { + const byId = new Map(args.tree.map((node) => [node.id, node])); + const checked = new Set(); + let staleCount = 0; + + for (const rawId of args.existingRootPageIds) { + const id = normalizePageId(rawId); + if (byId.has(id)) { + checked.add(id); + } else { + staleCount += 1; + } + } + + const minimalChecked = new Set(flattenSelection(checked, byId)); + const expanded = new Set(); + for (const checkedId of minimalChecked) { + for (const ancestorId of ancestorsOf(checkedId, byId)) { + expanded.add(ancestorId); + } + } + + return { + tree: args.tree, + byId, + expanded, + checked: minimalChecked, + cursorId: args.tree[0]?.id ?? '', + search: { editing: false, query: '' }, + pendingConfirm: null, + preLoadWarnings: staleCount > 0 ? [`${staleCount} stored root_page_ids no longer visible`] : [], + transientHint: null, + currentCrawlMode: args.currentCrawlMode ?? 'selected_roots', + }; +} + +export function reducer(state: PickerState, cmd: PickerCommand, now = Date.now()): { next: PickerState; effect: PickerEffect } { + if (state.pendingConfirm) { + if (cmd === 'save-confirm') { + return { next: cloneState(state, { pendingConfirm: null }), effect: 'save' }; + } + if (cmd === 'save-cancel') { + return { next: cloneState(state, { pendingConfirm: null }), effect: null }; + } + if (cmd === 'quit') { + return { next: state, effect: 'quit-without-save' }; + } + return { next: state, effect: null }; + } + + switch (cmd) { + case 'cursor-up': + return { next: moveCursor(state, 'up'), effect: null }; + case 'cursor-down': + return { next: moveCursor(state, 'down'), effect: null }; + case 'cursor-left': + return { next: moveCursor(state, 'left'), effect: null }; + case 'cursor-right': + return { next: moveCursor(state, 'right'), effect: null }; + case 'expand': + return { next: setExpanded(state, state.cursorId, 'toggle'), effect: null }; + case 'collapse': + return { next: setExpanded(state, state.cursorId, false), effect: null }; + case 'expand-all': + return { + next: cloneState(state, { + expanded: new Set(state.tree.filter((node) => node.childIds.length > 0).map((node) => node.id)), + }), + effect: null, + }; + case 'collapse-all': + return { next: cloneState(state, { expanded: new Set() }), effect: null }; + case 'toggle-check': + return { next: toggleChecked(state, state.cursorId, now), effect: null }; + case 'select-all-visible': + return { next: selectAllVisible(state), effect: null }; + case 'select-none': + return { next: selectNone(state), effect: null }; + case 'clear-transient-hint': + return { next: clearExpiredTransientHint(state, now), effect: null }; + case 'search-start': + return { next: cloneState(state, { search: { ...state.search, editing: true } }), effect: null }; + case 'search-cancel': + return { next: cloneState(state, { search: { editing: false, query: '' } }), effect: null }; + case 'search-submit': + return { next: cloneState(state, { search: { ...state.search, editing: false } }), effect: null }; + case 'search-backspace': + return { + next: cloneState(state, { search: { ...state.search, query: state.search.query.slice(0, -1) } }), + effect: null, + }; + case 'save-request': + if (state.checked.size === 0) { + return { + next: cloneState(state, { + transientHint: transientHint('Select at least one page or press q to quit', now), + }), + effect: null, + }; + } + if (state.currentCrawlMode === 'all_accessible') { + return { next: cloneState(state, { pendingConfirm: 'mode-switch' }), effect: null }; + } + return { next: state, effect: 'save' }; + case 'save-confirm': + return { next: state, effect: 'save' }; + case 'save-cancel': + return { next: state, effect: null }; + case 'quit': + return { next: state, effect: 'quit-without-save' }; + default: + return { next: cloneState(state, { search: { ...state.search, query: state.search.query + cmd.value } }), effect: null }; + } +} diff --git a/packages/cli/src/commands/connection-notion-tui.test.tsx b/packages/cli/src/commands/connection-notion-tui.test.tsx new file mode 100644 index 00000000..0b7efc82 --- /dev/null +++ b/packages/cli/src/commands/connection-notion-tui.test.tsx @@ -0,0 +1,384 @@ +/* @jsxImportSource react */ +import { render as renderInkTest } from 'ink-testing-library'; +import React, { act, type ReactNode } from 'react'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './connection-notion-tree.js'; +import { + NotionPickerApp, + notionPickerCommandForInkInput, + renderNotionPickerTui, + resolveNotionPickerWidth, + sanitizeNotionPickerTuiError, + windowItems, + windowOffset, + type NotionPickerInkInstance, + type NotionPickerInkRenderOptions, +} from './connection-notion-tui.js'; + +const IDS = { + engineering: '11111111-1111-1111-1111-111111111111', + architecture: '22222222-2222-2222-2222-222222222222', + marketing: '33333333-3333-3333-3333-333333333333', + finance: '44444444-4444-4444-4444-444444444444', + ops: '55555555-5555-5555-5555-555555555555', + sales: '66666666-6666-6666-6666-666666666666', + support: '77777777-7777-7777-7777-777777777777', + product: '88888888-8888-8888-8888-888888888888', + design: '99999999-9999-9999-9999-999999999999', +}; + +function pages(): NotionPickerPageInput[] { + return [ + { id: IDS.engineering, title: 'Engineering Docs', archived: false, parentId: null }, + { id: IDS.architecture, title: 'Architecture', archived: false, parentId: IDS.engineering }, + { id: IDS.marketing, title: 'Marketing', archived: false, parentId: null }, + ]; +} + +function manyPages(): NotionPickerPageInput[] { + return [ + { id: IDS.engineering, title: 'Engineering Docs', archived: false, parentId: null }, + { id: IDS.architecture, title: 'Architecture', archived: false, parentId: IDS.engineering }, + { id: IDS.marketing, title: 'Marketing', archived: false, parentId: null }, + { id: IDS.finance, title: 'Finance', archived: false, parentId: null }, + { id: IDS.ops, title: 'Operations', archived: false, parentId: null }, + { id: IDS.sales, title: 'Sales', archived: false, parentId: null }, + { id: IDS.support, title: 'Support', archived: false, parentId: null }, + { id: IDS.product, title: 'Product', archived: false, parentId: null }, + { id: IDS.design, title: 'Design', archived: false, parentId: null }, + ]; +} + +function state(mode: 'all_accessible' | 'selected_roots' = 'selected_roots') { + return buildInitialState({ + tree: buildPickerTree(pages()), + existingRootPageIds: [], + currentCrawlMode: mode, + }); +} + +async function waitForInkInput(): Promise { + await new Promise((resolve) => setTimeout(resolve, 10)); +} + +function fakeInkInstance(): NotionPickerInkInstance { + return { + rerender: vi.fn(), + unmount: vi.fn(), + waitUntilExit: vi.fn(async () => undefined), + }; +} + +function normalizeFrameWrap(frame: string | undefined): string { + return frame?.replace(/\n/g, ' ') ?? ''; +} + +afterEach(() => { + vi.useRealTimers(); +}); + +describe('notionPickerCommandForInkInput', () => { + it('maps browse, search, and confirm input to reducer commands', () => { + expect(notionPickerCommandForInkInput('', { downArrow: true }, state().search, null)).toBe('cursor-down'); + expect(notionPickerCommandForInkInput('', { upArrow: true }, state().search, null)).toBe('cursor-up'); + expect(notionPickerCommandForInkInput('', { rightArrow: true }, state().search, null)).toBe('cursor-right'); + expect(notionPickerCommandForInkInput('', { leftArrow: true }, state().search, null)).toBe('cursor-left'); + expect(notionPickerCommandForInkInput(' ', {}, state().search, null)).toBe('toggle-check'); + expect(notionPickerCommandForInkInput('/', {}, state().search, null)).toBe('search-start'); + expect(notionPickerCommandForInkInput('a', {}, state().search, null)).toBe('select-all-visible'); + expect(notionPickerCommandForInkInput('n', {}, state().search, null)).toBe('select-none'); + expect(notionPickerCommandForInkInput('s', {}, state().search, null)).toBe('save-request'); + expect(notionPickerCommandForInkInput('q', {}, state().search, null)).toBe('quit'); + expect(notionPickerCommandForInkInput('c', { ctrl: true }, state().search, null)).toBe('quit'); + + expect(notionPickerCommandForInkInput('x', {}, { editing: true, query: '' }, null)).toEqual({ + type: 'search-input', + value: 'x', + }); + expect(notionPickerCommandForInkInput('', { backspace: true }, { editing: true, query: 'x' }, null)).toBe( + 'search-backspace', + ); + expect(notionPickerCommandForInkInput('', { return: true }, { editing: true, query: 'x' }, null)).toBe( + 'search-submit', + ); + expect(notionPickerCommandForInkInput('', { escape: true }, { editing: true, query: 'x' }, null)).toBe( + 'search-cancel', + ); + + expect(notionPickerCommandForInkInput('y', {}, state().search, 'mode-switch')).toBe('save-confirm'); + expect(notionPickerCommandForInkInput('', { return: true }, state().search, 'mode-switch')).toBe('save-confirm'); + expect(notionPickerCommandForInkInput('n', {}, state().search, 'mode-switch')).toBe('save-cancel'); + }); +}); + +describe('window helpers', () => { + it('centers the selected row and returns the visible slice', () => { + expect(windowOffset(20, 10, 5)).toBe(8); + expect(windowItems(['a', 'b', 'c', 'd', 'e'], 3, 3)).toEqual({ items: ['c', 'd', 'e'], offset: 2 }); + }); + + it('clamps picker width to the design rule', () => { + expect(resolveNotionPickerWidth(200)).toBe(120); + expect(resolveNotionPickerWidth(100)).toBe(96); + expect(resolveNotionPickerWidth(50)).toBe(60); + expect(resolveNotionPickerWidth(undefined)).toBe(96); + }); +}); + +describe('NotionPickerApp', () => { + it('renders spec banners, row glyphs, search visibility, and hint text', () => { + const initialState = { + ...state('all_accessible'), + preLoadWarnings: ['1 stored root_page_ids no longer visible'], + }; + const { lastFrame } = renderInkTest( + , + ); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('Notion pages visible to integration "Design Workspace"'); + expect(frame).toContain('5000-page cap reached - some pages not shown'); + expect(frame).toContain('1 stored root_page_ids no longer visible - they will be removed if you save'); + expect(frame).toContain('▸ [ ] Engineering Docs ▸ (1)'); + expect(frame).toContain(' [ ] Marketing'); + expect(frame).not.toContain('Search ready: -'); + expect(frame).toContain('space toggle · enter expand · / search · a all · n none · s save & exit · q quit'); + }); + + it('renders partial discovery warnings without stale-root save suffix', () => { + const initialState = { + ...state(), + preLoadWarnings: ['Notion search stopped early: rate limit after first page'], + }; + const { lastFrame } = renderInkTest( + , + ); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('Notion search stopped early: rate limit after first page'); + expect(frame).not.toContain( + 'Notion search stopped early: rate limit after first page - they will be removed if you save', + ); + }); + + it('renders checked parents and locked descendants with the locked design glyphs', () => { + const initialState = { + ...state(), + checked: new Set([IDS.engineering]), + expanded: new Set([IDS.engineering]), + }; + const { lastFrame } = renderInkTest( + , + ); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('▸ [×] Engineering Docs ▾'); + expect(frame).toContain(' [~] Architecture'); + }); + + it('supports keyboard selection, all_accessible confirmation, and save callback', async () => { + const onExit = vi.fn(); + const { stdin, lastFrame } = renderInkTest( + , + ); + + stdin.write(' '); + await waitForInkInput(); + expect(lastFrame()).toContain('[×] Engineering Docs'); + + stdin.write('s'); + await waitForInkInput(); + expect(normalizeFrameWrap(lastFrame())).toContain( + 'Save will switch crawl_mode all_accessible -> selected_roots and limit ingest to 1 selected page. [y] confirm [esc] back', + ); + + stdin.write('y'); + await waitForInkInput(); + expect(onExit).toHaveBeenCalledWith({ kind: 'save', rootPageIds: [IDS.engineering] }); + }); + + it('removes transient hints after their expiry time', async () => { + vi.useFakeTimers(); + const onExit = vi.fn(); + const { stdin, lastFrame } = renderInkTest( + , + ); + + await act(async () => { + stdin.write('s'); + await vi.advanceTimersByTimeAsync(10); + }); + expect(lastFrame()).toContain('Select at least one page or press q to quit'); + + await act(async () => { + await vi.advanceTimersByTimeAsync(2500); + }); + expect(lastFrame()).not.toContain('Select at least one page or press q to quit'); + expect(onExit).not.toHaveBeenCalled(); + }); + + it('renders row-window overflow indicators when the visible list is clipped', async () => { + const onExit = vi.fn(); + const initialState = buildInitialState({ + tree: buildPickerTree(manyPages()), + existingRootPageIds: [], + currentCrawlMode: 'selected_roots', + }); + initialState.expanded = new Set([IDS.engineering]); + const { stdin, lastFrame } = renderInkTest( + , + ); + + expect(lastFrame()).toContain('↓ 4 more'); + + stdin.write('\u001B[B'); + stdin.write('\u001B[B'); + stdin.write('\u001B[B'); + stdin.write('\u001B[B'); + await waitForInkInput(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('↑ '); + expect(frame).toContain('↓ '); + expect(onExit).not.toHaveBeenCalled(); + }); + + it('returns quit without saving', async () => { + const onExit = vi.fn(); + const { stdin } = renderInkTest( + , + ); + + stdin.write('q'); + await waitForInkInput(); + expect(onExit).toHaveBeenCalledWith({ kind: 'quit' }); + }); +}); + +describe('renderNotionPickerTui', () => { + it('returns the app result from the Ink runtime', async () => { + const io = { + stdin: { isTTY: true, setRawMode: vi.fn() }, + stdout: { isTTY: true, columns: 100, rows: 24, write: vi.fn() }, + stderr: { write: vi.fn() }, + }; + const renderInk = vi.fn((_tree: ReactNode, _options: NotionPickerInkRenderOptions) => fakeInkInstance()); + + await expect( + renderNotionPickerTui( + { + initialState: state(), + connectionId: 'notion-main', + workspaceLabel: 'Design Workspace', + cappedAtCount: null, + currentCrawlMode: 'selected_roots', + }, + io, + { renderInk }, + ), + ).resolves.toEqual({ kind: 'quit' }); + expect(renderInk).toHaveBeenCalledOnce(); + }); + + it('sanitizes render errors and tells the user to use no-input mode', async () => { + expect(sanitizeNotionPickerTuiError(new Error('token=secret https://api.notion.com/v1/search'))).toBe( + '[redacted] [redacted-url]', + ); + }); + + it('falls back to quit with a scripted-mode hint when Ink cannot initialize', async () => { + let stderr = ''; + const io = { + stdin: { isTTY: false, setRawMode: vi.fn() }, + stdout: { isTTY: false, columns: 100, rows: 24, write: vi.fn() }, + stderr: { + write(chunk: string) { + stderr += chunk; + }, + }, + }; + + await expect( + renderNotionPickerTui( + { + initialState: state(), + connectionId: 'notion-main', + workspaceLabel: 'Design Workspace', + cappedAtCount: null, + currentCrawlMode: 'selected_roots', + }, + io, + { + renderInk: vi.fn(() => { + throw new Error('token=secret'); + }), + }, + ), + ).resolves.toEqual({ kind: 'quit' }); + expect(stderr).toContain('Use --no-input --root-page-id for scripted mode'); + expect(stderr).not.toContain('secret'); + }); +}); diff --git a/packages/cli/src/commands/connection-notion-tui.tsx b/packages/cli/src/commands/connection-notion-tui.tsx new file mode 100644 index 00000000..a624d0c7 --- /dev/null +++ b/packages/cli/src/commands/connection-notion-tui.tsx @@ -0,0 +1,338 @@ +/* @jsxImportSource react */ +import { Box, Text, render as renderInkRuntime, useApp, useInput } from 'ink'; +import React, { type ReactNode, useEffect, useMemo, useRef, useState } from 'react'; +import { + filterTree, + flattenSelection, + isAncestorChecked, + reducer, + visibleNodeIds, + type PickerCommand, + type PickerState, +} from './connection-notion-tree.js'; +import type { KloCliIo } from '../index.js'; + +const COLOR_THEME = { + text: 'white', + muted: 'gray', + active: 'cyan', + warning: 'yellow', +} as const; + +const NO_COLOR_THEME = { + text: 'white', + muted: 'white', + active: 'white', + warning: 'white', +} as const; + +type NotionPickerTheme = Record; + +export interface NotionPickerTuiIo extends KloCliIo { + stdin?: { isTTY?: boolean; setRawMode?(value: boolean): void }; + stdout: KloCliIo['stdout'] & { isTTY?: boolean; columns?: number; rows?: number }; +} + +interface InkKey { + leftArrow?: boolean; + rightArrow?: boolean; + upArrow?: boolean; + downArrow?: boolean; + return?: boolean; + escape?: boolean; + ctrl?: boolean; + backspace?: boolean; + delete?: boolean; +} + +export type PickerRenderResult = { kind: 'save'; rootPageIds: string[] } | { kind: 'quit' }; + +export interface PickerRenderInput { + initialState: PickerState; + connectionId: string; + workspaceLabel: string; + cappedAtCount: number | null; + currentCrawlMode: 'all_accessible' | 'selected_roots'; +} + +interface NotionPickerAppProps extends PickerRenderInput { + terminalRows?: number; + terminalWidth?: number; + env?: NodeJS.ProcessEnv; + onExit(result: PickerRenderResult): void; +} + +export interface NotionPickerInkInstance { + rerender(tree: ReactNode): void; + unmount(): void; + waitUntilExit(): Promise; +} + +export interface NotionPickerInkRenderOptions { + stdin?: NotionPickerTuiIo['stdin']; + stdout: NotionPickerTuiIo['stdout']; + stderr: NotionPickerTuiIo['stderr']; + exitOnCtrlC: boolean; + patchConsole: boolean; + maxFps: number; + alternateScreen: boolean; +} + +function resolveTheme(env: NodeJS.ProcessEnv = process.env): NotionPickerTheme { + return env.NO_COLOR || env.TERM === 'dumb' ? NO_COLOR_THEME : COLOR_THEME; +} + +export function resolveNotionPickerWidth(columns: number | undefined): number { + const resolvedColumns = columns ?? 100; + return Math.max(60, Math.min(120, resolvedColumns - 4)); +} + +function staleWarningText(warning: string): string { + return warning.includes('stored root_page_ids no longer visible') + ? `${warning} - they will be removed if you save` + : warning; +} + +function selectedPageCountText(count: number): string { + return `${count} selected ${count === 1 ? 'page' : 'pages'}`; +} + +function rowMatchesSearch(state: PickerState, nodeId: string): boolean { + const query = state.search.query.trim().toLocaleLowerCase(); + if (!query) { + return false; + } + const node = state.byId.get(nodeId); + if (!node) { + return false; + } + return node.title.toLocaleLowerCase().includes(query) || node.path.toLocaleLowerCase().includes(query); +} + +export function sanitizeNotionPickerTuiError(error: unknown): string { + const message = error instanceof Error ? error.message : String(error); + return message + .replace(/[a-z][a-z0-9+.-]*:\/\/[^\s]+/gi, '[redacted-url]') + .replace(/\b(api[_-]?key|password|token|secret)=\S+/gi, '[redacted]'); +} + +export function windowOffset(count: number, selected: number, visible: number): number { + if (count <= visible) return 0; + return Math.max(0, Math.min(count - visible, selected - Math.floor(visible / 2))); +} + +export function windowItems(items: T[], selected: number, visible: number): { items: T[]; offset: number } { + const offset = windowOffset(items.length, selected, visible); + return { items: items.slice(offset, offset + visible), offset }; +} + +function truncateText(value: string, width: number): string { + if (value.length <= width) return value; + if (width <= 3) return value.slice(0, width); + return `${value.slice(0, width - 3)}...`; +} + +export function notionPickerCommandForInkInput( + input: string, + key: InkKey, + search: PickerState['search'], + pendingConfirm: PickerState['pendingConfirm'], +): PickerCommand | null { + if (pendingConfirm) { + if (input === 'y' || key.return) return 'save-confirm'; + if (input === 'n' || key.escape) return 'save-cancel'; + if (key.ctrl === true && input === 'c') return 'quit'; + return null; + } + if (search.editing) { + if (key.escape) return 'search-cancel'; + if (key.return) return 'search-submit'; + if (key.backspace || key.delete) return 'search-backspace'; + if (key.downArrow) return 'cursor-down'; + if (key.upArrow) return 'cursor-up'; + if (input.length === 1 && input >= ' ' && input !== '\u007f') return { type: 'search-input', value: input }; + return null; + } + if (key.ctrl === true && input === 'c') return 'quit'; + if (key.upArrow) return 'cursor-up'; + if (key.downArrow) return 'cursor-down'; + if (key.leftArrow) return 'cursor-left'; + if (key.rightArrow) return 'cursor-right'; + if (key.return) return 'expand'; + if (input === ' ') return 'toggle-check'; + if (input === '/') return 'search-start'; + if (input === 'a') return 'select-all-visible'; + if (input === 'n') return 'select-none'; + if (input === 's') return 'save-request'; + if (input === 'q' || key.escape) return 'quit'; + return null; +} + +function PickerRow(props: { state: PickerState; nodeId: string; width: number; theme: NotionPickerTheme }): ReactNode { + const node = props.state.byId.get(props.nodeId); + if (!node) return null; + const focused = props.state.cursorId === node.id; + const locked = isAncestorChecked(node.id, props.state.checked, props.state.byId); + const checked = props.state.checked.has(node.id); + const glyph = locked ? '[~]' : checked ? '[×]' : '[ ]'; + const children = + node.childIds.length > 0 ? (props.state.expanded.has(node.id) ? ' ▾' : ` ▸ (${node.childIds.length})`) : ''; + const prefix = `${focused ? '▸' : ' '} ${glyph} ${' '.repeat(node.depth * 2)}`; + const color = focused ? props.theme.active : locked || node.archived ? props.theme.muted : props.theme.text; + const title = truncateText(`${node.title}${children}`, Math.max(10, props.width - prefix.length)); + const inverse = rowMatchesSearch(props.state, node.id); + + return ( + + {prefix} + {title} + + ); +} + +export function NotionPickerApp(props: NotionPickerAppProps): ReactNode { + const app = useApp(); + const [state, setState] = useState(props.initialState); + const stateRef = useRef(state); + const theme = useMemo(() => resolveTheme(props.env), [props.env]); + const visibleIds = visibleNodeIds(state); + const selectedIndex = Math.max(0, visibleIds.indexOf(state.cursorId)); + const reservedRows = state.pendingConfirm === 'mode-switch' ? 9 : 8; + const visibleRows = Math.max(5, Math.min(20, (props.terminalRows ?? 24) - reservedRows)); + const rows = windowItems(visibleIds, selectedIndex, visibleRows); + const hiddenAbove = rows.offset; + const hiddenBelow = Math.max(0, visibleIds.length - rows.offset - rows.items.length); + const searchMatchCount = filterTree(state).visibleIds.size; + const width = resolveNotionPickerWidth(props.terminalWidth); + const showSearch = state.search.editing || state.search.query.trim().length > 0; + const selectedCount = flattenSelection(state.checked, state.byId).length; + + stateRef.current = state; + + useEffect(() => { + const hint = state.transientHint; + if (!hint) { + return; + } + + const clearHint = () => { + setState((current) => { + const { next } = reducer(current, 'clear-transient-hint'); + stateRef.current = next; + return next; + }); + }; + const delay = hint.expiresAt - Date.now(); + if (delay <= 0) { + clearHint(); + return; + } + + const timeout = setTimeout(clearHint, delay); + + return () => clearTimeout(timeout); + }, [state.transientHint?.expiresAt]); + + useInput((input, key) => { + const command = notionPickerCommandForInkInput(input, key, stateRef.current.search, stateRef.current.pendingConfirm); + if (!command) { + return; + } + const { next, effect } = reducer(stateRef.current, command); + stateRef.current = next; + setState(next); + if (effect === 'save') { + props.onExit({ kind: 'save', rootPageIds: flattenSelection(next.checked, next.byId) }); + app.exit(); + return; + } + if (effect === 'quit-without-save') { + props.onExit({ kind: 'quit' }); + app.exit(); + } + }); + + return ( + + Notion pages visible to integration "{props.workspaceLabel}" + {props.cappedAtCount ? {props.cappedAtCount}-page cap reached - some pages not shown : null} + {state.preLoadWarnings.map((warning) => ( + + {staleWarningText(warning)} + + ))} + {showSearch ? ( + + / {state.search.query} + {state.search.editing ? '█' : ''} ({searchMatchCount} matches) + + ) : null} + + {hiddenAbove > 0 ? ↑ {hiddenAbove} more : null} + {rows.items.map((nodeId) => ( + + ))} + {hiddenBelow > 0 ? ↓ {hiddenBelow} more : null} + + {state.pendingConfirm === 'mode-switch' ? ( + + Save will switch crawl_mode all_accessible -> selected_roots and limit ingest to{' '} + {selectedPageCountText(selectedCount)}. [y] confirm [esc] back + + ) : null} + {state.transientHint ? {state.transientHint.text} : null} + space toggle · enter expand · / search · a all · n none · s save & exit · q quit + + ); +} + +function renderInk(tree: ReactNode, options: NotionPickerInkRenderOptions): NotionPickerInkInstance { + return renderInkRuntime(tree, { + stdin: options.stdin as NodeJS.ReadStream | undefined, + stdout: options.stdout as NodeJS.WriteStream, + stderr: options.stderr as NodeJS.WriteStream, + exitOnCtrlC: options.exitOnCtrlC, + patchConsole: options.patchConsole, + maxFps: options.maxFps, + alternateScreen: options.alternateScreen, + }) as NotionPickerInkInstance; +} + +export async function renderNotionPickerTui( + input: PickerRenderInput, + io: NotionPickerTuiIo, + options: { renderInk?: (tree: ReactNode, options: NotionPickerInkRenderOptions) => NotionPickerInkInstance } = {}, +): Promise { + let result: PickerRenderResult = { kind: 'quit' }; + let instance: NotionPickerInkInstance | null = null; + try { + instance = (options.renderInk ?? renderInk)( + { + result = next; + instance?.unmount(); + }} + />, + { + stdin: io.stdin, + stdout: io.stdout, + stderr: io.stderr, + exitOnCtrlC: false, + patchConsole: false, + maxFps: 30, + alternateScreen: true, + }, + ); + await instance.waitUntilExit(); + instance.unmount(); + return result; + } catch (error) { + io.stderr.write( + `Notion picker requires a TTY. Use --no-input --root-page-id for scripted mode. ${sanitizeNotionPickerTuiError(error)}\n`, + ); + return { kind: 'quit' }; + } +} diff --git a/packages/cli/src/commands/connection-notion.test.ts b/packages/cli/src/commands/connection-notion.test.ts new file mode 100644 index 00000000..4024c40a --- /dev/null +++ b/packages/cli/src/commands/connection-notion.test.ts @@ -0,0 +1,466 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + initKloProject, + loadKloProject, + serializeKloProjectConfig, + type KloProjectConfig, +} from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + applyNotionPickerWriteback, + discoverNotionPickerPages, + notionPickerPageFromSearchResult, + normalizeNotionPageId, + resolveNotionWorkspaceLabel, + runKloConnectionNotion, + type NotionPickerApi, + type PickerRenderInput, + type PickerRenderResult, +} from './connection-notion.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +type FakeNotionSearchPage = Record & { id: string; object: 'page' }; + +const PAGE_IDS = { + engineering: '11111111-1111-1111-1111-111111111111', + architecture: '22222222-2222-2222-2222-222222222222', + stale: '99999999-9999-9999-9999-999999999999', +}; + +function notionPage(id: string, title: string, parentId: string | null = null): FakeNotionSearchPage { + return { + object: 'page', + id, + archived: false, + parent: parentId ? { type: 'page_id', page_id: parentId } : { type: 'workspace', workspace: true }, + properties: { + title: { + type: 'title', + title: [{ plain_text: title }], + }, + }, + }; +} + +function fakeNotionApi(pages: FakeNotionSearchPage[]): NotionPickerApi { + return { + search: vi.fn(async (_filterValue, startCursor) => { + if (startCursor === 'page-2') { + return { results: pages.slice(2), hasMore: false, nextCursor: null }; + } + return { + results: pages.slice(0, 2), + hasMore: pages.length > 2, + nextCursor: pages.length > 2 ? 'page-2' : null, + }; + }), + retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot', bot: { workspace_name: 'Design Workspace' } })), + }; +} + +describe('normalizeNotionPageId', () => { + it('accepts dashed and compact UUIDs', () => { + expect(normalizeNotionPageId('11111111222233334444555555555555')).toBe( + '11111111-2222-3333-4444-555555555555', + ); + expect(normalizeNotionPageId('AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE')).toBe( + 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', + ); + }); +}); + +describe('runKloConnectionNotion', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-notion-pick-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + async function writeProjectConfig(projectDir: string, config: KloProjectConfig): Promise { + const project = await loadKloProject({ projectDir }); + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig(config), + 'klo', + 'klo@example.com', + 'seed test config', + ); + } + + it('rejects unsafe connection ids before loading a project', async () => { + const io = makeIo(); + const loadProject = vi.fn(async () => { + throw new Error('loadProject should not be called'); + }); + + await expect( + runKloConnectionNotion( + { + command: 'pick', + projectDir: '/tmp/project', + connectionId: '../evil', + mode: 'interactive', + }, + io.io, + { loadProject }, + ), + ).resolves.toBe(1); + + expect(loadProject).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('Unsafe connection id: ../evil'); + }); + + it('writes selected root_page_ids while preserving every other Notion connection field', async () => { + const projectDir = join(tempDir, 'project'); + const initialized = await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeProjectConfig(projectDir, { + ...initialized.config, + connections: { + 'notion-main': { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'all_accessible', + root_page_ids: ['99999999-9999-9999-9999-999999999999'], + root_database_ids: ['database-1'], + root_data_source_ids: ['data-source-1'], + max_pages_per_run: 12, + max_knowledge_creates_per_run: 2, + max_knowledge_updates_per_run: 7, + last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}', + unknown_future_field: 'keep-me', + }, + }, + }); + const io = makeIo(); + + await expect( + runKloConnectionNotion( + { + command: 'pick', + projectDir, + connectionId: 'notion-main', + mode: 'non-interactive', + rootPageIds: [ + '11111111-2222-3333-4444-555555555555', + '66666666-7777-8888-9999-aaaaaaaaaaaa', + ], + }, + io.io, + ), + ).resolves.toBe(0); + + const yaml = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(yaml).toContain('crawl_mode: selected_roots'); + expect(yaml).toContain('root_page_ids:'); + expect(yaml).toContain('11111111-2222-3333-4444-555555555555'); + expect(yaml).toContain('66666666-7777-8888-9999-aaaaaaaaaaaa'); + expect(yaml).toContain('root_database_ids:'); + expect(yaml).toContain('database-1'); + expect(yaml).toContain('root_data_source_ids:'); + expect(yaml).toContain('data-source-1'); + expect(yaml).toContain('last_successful_cursor: \'{"phase":"all_accessible_pages","cursor":"cursor-1"}\''); + expect(yaml).toContain('unknown_future_field: keep-me'); + expect(io.stdout()).toContain('Connection: notion-main'); + expect(io.stdout()).toContain('rootPageIds: 2'); + expect(io.stdout()).toContain('crawlMode: selected_roots'); + }); + + it('rejects empty writeback, missing connections, and non-Notion connections', async () => { + const projectDir = join(tempDir, 'project'); + const initialized = await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeProjectConfig(projectDir, { + ...initialized.config, + connections: { + warehouse: { + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }, + }, + }); + const project = await loadKloProject({ projectDir }); + + await expect(applyNotionPickerWriteback(project, 'warehouse', [])).rejects.toThrow( + 'connection notion pick requires at least one root page id', + ); + await expect( + applyNotionPickerWriteback(project, 'missing', ['11111111-2222-3333-4444-555555555555']), + ).rejects.toThrow('Connection "missing" not found'); + await expect( + applyNotionPickerWriteback(project, 'warehouse', ['11111111-2222-3333-4444-555555555555']), + ).rejects.toThrow('Connection "warehouse" is not a Notion connection'); + }); + + it('extracts picker page inputs from Notion search results', () => { + expect(notionPickerPageFromSearchResult(notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering))) + .toEqual({ + id: PAGE_IDS.architecture, + title: 'Architecture', + archived: false, + parentId: PAGE_IDS.engineering, + }); + + expect( + notionPickerPageFromSearchResult({ + object: 'page', + id: PAGE_IDS.engineering.replaceAll('-', ''), + archived: true, + parent: { type: 'workspace', workspace: true }, + properties: {}, + }), + ).toEqual({ + id: PAGE_IDS.engineering, + title: 'Untitled', + archived: true, + parentId: null, + }); + }); + + it('discovers visible pages up to the cap and reports cap state', async () => { + const api = fakeNotionApi([ + notionPage(PAGE_IDS.engineering, 'Engineering'), + notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering), + notionPage('33333333-3333-3333-3333-333333333333', 'Onboarding', PAGE_IDS.engineering), + ]); + + await expect(discoverNotionPickerPages(api, { cap: 2 })).resolves.toEqual({ + pages: [ + { id: PAGE_IDS.engineering, title: 'Engineering', archived: false, parentId: null }, + { id: PAGE_IDS.architecture, title: 'Architecture', archived: false, parentId: PAGE_IDS.engineering }, + ], + cappedAtCount: 2, + warnings: [], + }); + expect(api.search).toHaveBeenCalledTimes(1); + }); + + it('keeps partial discovery results when Notion search fails after at least one page', async () => { + const api: NotionPickerApi = { + search: vi + .fn() + .mockResolvedValueOnce({ + results: [notionPage(PAGE_IDS.engineering, 'Engineering')], + hasMore: true, + nextCursor: 'cursor-2', + }) + .mockRejectedValueOnce(new Error('rate limit after first page')), + retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot' })), + }; + + await expect(discoverNotionPickerPages(api)).resolves.toEqual({ + pages: [{ id: PAGE_IDS.engineering, title: 'Engineering', archived: false, parentId: null }], + cappedAtCount: null, + warnings: ['Notion search stopped early: rate limit after first page'], + }); + }); + + it('uses the Notion workspace name when available and falls back to the connection id', async () => { + await expect(resolveNotionWorkspaceLabel(fakeNotionApi([]), 'notion-main')).resolves.toBe('Design Workspace'); + await expect( + resolveNotionWorkspaceLabel( + { + search: vi.fn(), + retrieveBotUser: vi.fn(async () => { + throw new Error('users.me unavailable'); + }), + }, + 'notion-main', + ), + ).resolves.toBe('notion-main'); + }); + + it('runs interactive discovery, warns about stale roots, renders the TUI, and saves selected roots', async () => { + const projectDir = join(tempDir, 'project'); + const initialized = await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeProjectConfig(projectDir, { + ...initialized.config, + connections: { + 'notion-main': { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'all_accessible', + root_page_ids: [PAGE_IDS.stale], + root_database_ids: ['database-1'], + root_data_source_ids: ['data-source-1'], + max_pages_per_run: 12, + max_knowledge_creates_per_run: 2, + max_knowledge_updates_per_run: 7, + last_successful_cursor: null, + }, + }, + }); + const api = fakeNotionApi([ + notionPage(PAGE_IDS.engineering, 'Engineering'), + notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering), + ]); + const renderPicker = vi.fn(async (input): Promise => { + expect(input.connectionId).toBe('notion-main'); + expect(input.workspaceLabel).toBe('Design Workspace'); + expect(input.currentCrawlMode).toBe('all_accessible'); + expect(input.cappedAtCount).toBeNull(); + expect(input.initialState.preLoadWarnings).toEqual(['1 stored root_page_ids no longer visible']); + return { kind: 'save', rootPageIds: [PAGE_IDS.engineering] }; + }); + const io = makeIo(); + + await expect( + runKloConnectionNotion( + { + command: 'pick', + projectDir, + connectionId: 'notion-main', + mode: 'interactive', + }, + io.io, + { + env: { NOTION_TOKEN: 'ntn_test_token' }, + createNotionApi: vi.fn(() => api), + renderPicker, + }, + ), + ).resolves.toBe(0); + + const yaml = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(yaml).toContain('crawl_mode: selected_roots'); + expect(yaml).toContain(PAGE_IDS.engineering); + expect(yaml).not.toContain(PAGE_IDS.stale); + expect(io.stderr()).toContain('1 stored root_page_ids no longer visible'); + expect(io.stdout()).toContain('Connection: notion-main'); + expect(io.stdout()).toContain('rootPageIds: 1'); + }); + + it('passes partial-discovery warnings into the TUI banner state', async () => { + const projectDir = join(tempDir, 'project'); + const initialized = await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeProjectConfig(projectDir, { + ...initialized.config, + connections: { + 'notion-main': { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: [PAGE_IDS.engineering], + root_database_ids: [], + root_data_source_ids: [], + max_pages_per_run: 12, + max_knowledge_creates_per_run: 2, + max_knowledge_updates_per_run: 7, + last_successful_cursor: null, + }, + }, + }); + const api: NotionPickerApi = { + search: vi + .fn() + .mockResolvedValueOnce({ + results: [notionPage(PAGE_IDS.engineering, 'Engineering')], + hasMore: true, + nextCursor: 'cursor-2', + }) + .mockRejectedValueOnce(new Error('rate limit after first page')), + retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot', bot: { workspace_name: 'Design Workspace' } })), + }; + let renderInput: PickerRenderInput | undefined; + const renderPicker = vi.fn(async (input: PickerRenderInput): Promise => { + renderInput = input; + return { kind: 'quit' }; + }); + const io = makeIo(); + + await expect( + runKloConnectionNotion( + { + command: 'pick', + projectDir, + connectionId: 'notion-main', + mode: 'interactive', + }, + io.io, + { + env: { NOTION_TOKEN: 'ntn_test_token' }, + createNotionApi: vi.fn(() => api), + renderPicker, + }, + ), + ).resolves.toBe(0); + + expect(renderPicker).toHaveBeenCalledOnce(); + if (!renderInput) { + throw new Error('renderPicker was not called'); + } + expect(renderInput.initialState.preLoadWarnings).toEqual(['Notion search stopped early: rate limit after first page']); + expect(renderInput.initialState.tree.map((node) => node.title)).toEqual(['Engineering']); + expect(io.stderr()).toContain('Notion search stopped early: rate limit after first page'); + expect(io.stdout()).toContain('No changes saved.'); + }); + + it('quits interactive mode without writing when the TUI returns quit', async () => { + const projectDir = join(tempDir, 'project'); + const initialized = await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeProjectConfig(projectDir, { + ...initialized.config, + connections: { + 'notion-main': { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: [PAGE_IDS.engineering], + root_database_ids: [], + root_data_source_ids: [], + max_pages_per_run: 12, + max_knowledge_creates_per_run: 2, + max_knowledge_updates_per_run: 7, + last_successful_cursor: null, + }, + }, + }); + const before = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + const io = makeIo(); + + await expect( + runKloConnectionNotion( + { + command: 'pick', + projectDir, + connectionId: 'notion-main', + mode: 'interactive', + }, + io.io, + { + env: { NOTION_TOKEN: 'ntn_test_token' }, + createNotionApi: vi.fn(() => fakeNotionApi([notionPage(PAGE_IDS.engineering, 'Engineering')])), + renderPicker: vi.fn(async (): Promise => ({ kind: 'quit' })), + }, + ), + ).resolves.toBe(0); + + await expect(readFile(join(projectDir, 'klo.yaml'), 'utf-8')).resolves.toBe(before); + expect(io.stdout()).toContain('No changes saved.'); + }); +}); diff --git a/packages/cli/src/commands/connection-notion.ts b/packages/cli/src/commands/connection-notion.ts new file mode 100644 index 00000000..91c6a294 --- /dev/null +++ b/packages/cli/src/commands/connection-notion.ts @@ -0,0 +1,278 @@ +import { parseNotionConnectionConfig, resolveNotionAuthToken } from '@klo/context/connections'; +import { type NotionApi, type NotionBotInfo, NotionClient } from '@klo/context/ingest'; +import { + type KloLocalProject, + type KloProjectConnectionConfig, + loadKloProject, + serializeKloProjectConfig, +} from '@klo/context/project'; +import type { KloCliIo } from '../index.js'; +import { profileMark } from '../startup-profile.js'; +import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './connection-notion-tree.js'; +import { + type NotionPickerTuiIo, + type PickerRenderInput, + type PickerRenderResult, + renderNotionPickerTui, +} from './connection-notion-tui.js'; + +profileMark('module:commands/connection-notion'); + +export type KloConnectionNotionArgs = + | { + command: 'pick'; + projectDir: string; + connectionId: string; + mode: 'interactive'; + } + | { + command: 'pick'; + projectDir: string; + connectionId: string; + mode: 'non-interactive'; + rootPageIds: string[]; + }; + +export type NotionPickerApi = Pick; +export type { PickerRenderInput, PickerRenderResult }; + +interface KloConnectionNotionDeps { + env?: Record; + loadProject?: typeof loadKloProject; + createNotionApi?: (authToken: string) => NotionPickerApi; + renderPicker?: (input: PickerRenderInput, io: NotionPickerTuiIo) => Promise; +} + +const NOTION_PICKER_PAGE_CAP = 5000; + +function assertSafeConnectionId(connectionId: string): void { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } +} + +export function normalizeNotionPageId(value: string): string { + const trimmed = value.trim(); + const compact = trimmed.includes('-') ? trimmed.replace(/-/g, '') : trimmed; + if (!/^[0-9a-fA-F]{32}$/.test(compact)) { + throw new Error(`Invalid Notion page UUID: ${value}`); + } + const lower = compact.toLowerCase(); + return `${lower.slice(0, 8)}-${lower.slice(8, 12)}-${lower.slice(12, 16)}-${lower.slice(16, 20)}-${lower.slice(20)}`; +} + +function recordValue(value: unknown): Record | null { + return typeof value === 'object' && value !== null && !Array.isArray(value) + ? (value as Record) + : null; +} + +function extractTitleFromNotionPage(page: Record): string { + const properties = recordValue(page.properties); + if (!properties) { + return 'Untitled'; + } + for (const property of Object.values(properties)) { + const value = recordValue(property); + if (!value || value.type !== 'title' || !Array.isArray(value.title)) { + continue; + } + const text = value.title + .map((part) => { + const richText = recordValue(part); + return typeof richText?.plain_text === 'string' ? richText.plain_text : ''; + }) + .join('') + .trim(); + if (text.length > 0) { + return text; + } + } + return 'Untitled'; +} + +function extractParentPageId(page: Record): string | null { + const parent = recordValue(page.parent); + if (!parent || parent.type !== 'page_id' || typeof parent.page_id !== 'string') { + return null; + } + return normalizeNotionPageId(parent.page_id); +} + +export function notionPickerPageFromSearchResult(result: Record): NotionPickerPageInput { + const id = typeof result.id === 'string' ? normalizeNotionPageId(result.id) : ''; + if (!id) { + throw new Error('Notion page search result is missing id'); + } + return { + id, + title: extractTitleFromNotionPage(result), + archived: result.archived === true, + parentId: extractParentPageId(result), + }; +} + +export async function discoverNotionPickerPages( + api: NotionPickerApi, + options: { cap?: number } = {}, +): Promise<{ pages: NotionPickerPageInput[]; cappedAtCount: number | null; warnings: string[] }> { + const cap = options.cap ?? NOTION_PICKER_PAGE_CAP; + const pages: NotionPickerPageInput[] = []; + const warnings: string[] = []; + let cursor: string | null | undefined = null; + + while (pages.length < cap) { + let response: Awaited>; + try { + response = await api.search('page', cursor, Math.min(100, cap - pages.length)); + } catch (error) { + if (pages.length === 0) { + throw error; + } + const message = error instanceof Error ? error.message : String(error); + warnings.push(`Notion search stopped early: ${message}`); + return { pages, cappedAtCount: null, warnings }; + } + + for (const result of response.results) { + pages.push(notionPickerPageFromSearchResult(result)); + if (pages.length >= cap) { + break; + } + } + + if (!response.hasMore || !response.nextCursor || pages.length >= cap) { + return { + pages, + cappedAtCount: response.hasMore ? cap : null, + warnings, + }; + } + cursor = response.nextCursor; + } + + return { pages, cappedAtCount: cap, warnings }; +} + +export async function resolveNotionWorkspaceLabel(api: NotionPickerApi, connectionId: string): Promise { + try { + const bot = (await api.retrieveBotUser()) as NotionBotInfo; + const workspaceName = typeof bot.bot?.workspace_name === 'string' ? bot.bot.workspace_name.trim() : ''; + if (workspaceName.length > 0) { + return workspaceName; + } + const name = typeof bot.name === 'string' ? bot.name.trim() : ''; + return name.length > 0 ? name : connectionId; + } catch { + return connectionId; + } +} + +function notionConnection(project: KloLocalProject, connectionId: string): KloProjectConnectionConfig { + const connection = project.config.connections[connectionId]; + if (!connection) { + throw new Error(`Connection "${connectionId}" not found`); + } + if (connection.driver !== 'notion') { + throw new Error(`Connection "${connectionId}" is not a Notion connection`); + } + return connection; +} + +export async function applyNotionPickerWriteback( + project: KloLocalProject, + connectionId: string, + rootPageIds: string[], +): Promise { + if (rootPageIds.length === 0) { + throw new Error('connection notion pick requires at least one root page id'); + } + + const existing = notionConnection(project, connectionId); + const nextConfig = { + ...project.config, + connections: { + ...project.config.connections, + [connectionId]: { + ...existing, + crawl_mode: 'selected_roots', + root_page_ids: rootPageIds, + }, + }, + }; + + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig(nextConfig), + 'klo', + 'klo@example.com', + `Pick Notion roots: ${connectionId} (${rootPageIds.length} pages)`, + ); +} + +export async function runKloConnectionNotion( + args: KloConnectionNotionArgs, + io: KloCliIo = process, + deps: KloConnectionNotionDeps = {}, +): Promise { + try { + assertSafeConnectionId(args.connectionId); + const loadProject = deps.loadProject ?? loadKloProject; + + if (args.mode === 'interactive') { + const project = await loadProject({ projectDir: args.projectDir }); + const rawConnection = notionConnection(project, args.connectionId); + const notion = parseNotionConnectionConfig(rawConnection); + const authToken = await resolveNotionAuthToken(notion.auth_token_ref, { env: deps.env }); + const api = deps.createNotionApi ? deps.createNotionApi(authToken) : new NotionClient(authToken); + const discovery = await discoverNotionPickerPages(api); + const tree = buildPickerTree(discovery.pages); + const initialState = buildInitialState({ + tree, + existingRootPageIds: notion.root_page_ids, + currentCrawlMode: notion.crawl_mode, + }); + const preLoadWarnings = [...discovery.warnings, ...initialState.preLoadWarnings]; + const renderState = + preLoadWarnings.length > 0 + ? { + ...initialState, + preLoadWarnings, + } + : initialState; + for (const warning of preLoadWarnings) { + io.stderr.write(`${warning}\n`); + } + const workspaceLabel = await resolveNotionWorkspaceLabel(api, args.connectionId); + const result = await (deps.renderPicker ?? renderNotionPickerTui)( + { + initialState: renderState, + connectionId: args.connectionId, + workspaceLabel, + cappedAtCount: discovery.cappedAtCount, + currentCrawlMode: notion.crawl_mode, + }, + io as NotionPickerTuiIo, + ); + if (result.kind === 'quit') { + io.stdout.write('No changes saved.\n'); + return 0; + } + await applyNotionPickerWriteback(project, args.connectionId, result.rootPageIds); + io.stdout.write(`Connection: ${args.connectionId}\n`); + io.stdout.write(`rootPageIds: ${result.rootPageIds.length}\n`); + io.stdout.write('crawlMode: selected_roots\n'); + return 0; + } + + const project = await loadProject({ projectDir: args.projectDir }); + await applyNotionPickerWriteback(project, args.connectionId, args.rootPageIds); + io.stdout.write(`Connection: ${args.connectionId}\n`); + io.stdout.write(`rootPageIds: ${args.rootPageIds.length}\n`); + io.stdout.write('crawlMode: selected_roots\n'); + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/commands/demo-commands.test.ts b/packages/cli/src/commands/demo-commands.test.ts new file mode 100644 index 00000000..16e8e088 --- /dev/null +++ b/packages/cli/src/commands/demo-commands.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, it } from 'vitest'; +import { resolveDemoCommandOptions } from './demo-commands.js'; + +describe('resolveDemoCommandOptions', () => { + it('lets parent --no-input override a child default from optsWithGlobals', () => { + const rootCommand = { + opts: () => ({}), + }; + const setupCommand = { + parent: rootCommand, + opts: () => ({ input: false }), + getOptionValueSource: (name: string) => (name === 'input' ? 'cli' : undefined), + }; + const demoCommand = { + parent: setupCommand, + opts: () => ({ input: true, mode: 'seeded' }), + optsWithGlobals: () => ({ input: true, mode: 'seeded' }), + getOptionValueSource: (name: string) => (name === 'input' ? 'default' : name === 'mode' ? 'default' : undefined), + }; + + expect(resolveDemoCommandOptions<{ input: boolean; mode: string }>(demoCommand)).toEqual({ + input: false, + mode: 'seeded', + }); + }); +}); diff --git a/packages/cli/src/commands/demo-commands.ts b/packages/cli/src/commands/demo-commands.ts new file mode 100644 index 00000000..7fa16d24 --- /dev/null +++ b/packages/cli/src/commands/demo-commands.ts @@ -0,0 +1,273 @@ +import { type Command, Option } from '@commander-js/extra-typings'; +import { + type CommandWithGlobalOptions, + type KloCliCommandContext, + resolveCommandProjectDirOverride, +} from '../cli-program.js'; +import { + type KloDemoArgs, + type KloDemoInputMode, + type KloDemoMode, + type KloDemoOutputMode, +} from '../demo.js'; +import { defaultDemoProjectDir } from '../demo-assets.js'; +import { resolveProjectDir } from '../project-dir.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/demo-commands'); + +interface DemoOptions { + plain?: boolean; + json?: boolean; + input?: boolean; + projectDir?: string; +} + +function demoOutputMode(options: { plain?: boolean; json?: boolean }): KloDemoOutputMode { + if (options.json === true) { + return 'json'; + } + if (options.plain === true) { + return 'plain'; + } + return 'viz'; +} + +function demoDoctorOutputMode(options: { json?: boolean }): 'plain' | 'json' { + return options.json === true ? 'json' : 'plain'; +} + +function demoInspectOutputMode(options: { plain?: boolean; json?: boolean }): KloDemoOutputMode { + if (options.json === true) { + return 'json'; + } + return 'plain'; +} + +function demoInputMode(options: { input?: boolean }): { inputMode?: KloDemoInputMode } { + return options.input === false ? { inputMode: 'disabled' } : {}; +} + +function demoProjectDir(options: { projectDir?: string }, command: CommandWithGlobalOptions): string { + return resolveProjectDir( + options.projectDir ?? resolveCommandProjectDirOverride(command), + defaultDemoProjectDir(), + ); +} + +type CommandOptionSourceReader = { + getOptionValueSource?: (name: string) => string | undefined; + parent?: unknown; +}; + +function inheritedOptionSource(command: CommandOptionSourceReader, key: string): string | undefined { + let current = command.parent as (CommandOptionSourceReader & { opts?: () => Record }) | undefined; + while (current) { + const source = current.getOptionValueSource?.(key); + if (source !== undefined) { + return source; + } + current = current.parent as (CommandOptionSourceReader & { opts?: () => Record }) | undefined; + } + return undefined; +} + +function definedOptions( + options: Record, + inherited: Record = {}, + command?: CommandOptionSourceReader, +): Record { + return Object.fromEntries( + Object.entries(options).filter(([key, value]) => { + if (value === undefined) return false; + if (key === 'input' && value === true && inherited.input === false) return false; + if ( + key === 'mode' && + command?.getOptionValueSource?.(key) === 'default' && + inherited[key] !== undefined && + inherited[key] !== value && + inheritedOptionSource(command, key) === 'cli' + ) { + return false; + } + return true; + }), + ); +} + +export function resolveDemoCommandOptions(command: { opts: () => T; optsWithGlobals?: () => T; parent?: unknown }): T { + const chain: Array<{ opts?: () => Record; parent?: unknown }> = []; + let current = command.parent as { opts?: () => Record; parent?: unknown } | undefined; + while (current) { + chain.unshift(current); + current = current.parent as { opts?: () => Record; parent?: unknown } | undefined; + } + const inherited = Object.assign({}, ...chain.map((parent) => definedOptions(parent.opts?.() ?? {}))); + + if (command.optsWithGlobals) { + const withGlobals = { + ...inherited, + ...definedOptions(command.optsWithGlobals() as Record, inherited, command), + }; + return { + ...withGlobals, + ...definedOptions(command.opts() as Record, withGlobals, command), + } as T; + } + + return { ...inherited, ...definedOptions(command.opts() as Record, inherited, command) } as T; +} + +async function runDemoArgs(context: KloCliCommandContext, args: KloDemoArgs): Promise { + const runner = context.deps.demo ?? (await import('../demo.js')).runKloDemo; + context.setExitCode(await runner(args, context.io)); +} + +export function registerDemoCommands( + program: Command, + context: KloCliCommandContext, + options: { description?: string } = {}, +): void { + const demo = program + .command('demo') + .description(options.description ?? 'Run the pre-seeded KLO demo or a full LLM-backed demo') + .addOption( + new Option('--mode ', 'Demo mode: seeded (default), replay, or full') + .choices(['seeded', 'replay', 'full']) + .default('seeded'), + ) + .option('--project-dir ', 'Demo project directory') + .addOption(new Option('--plain', 'Print plain text output instead of the visual demo').conflicts('json')) + .addOption(new Option('--json', 'Print JSON output').conflicts('plain')) + .option('--no-input', 'Disable interactive terminal input') + .showHelpAfterError() + .action(async (options: { mode: 'seeded' | 'replay' | 'full' } & DemoOptions, command) => { + const resolvedOptions = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: resolvedOptions.mode, + projectDir: demoProjectDir(resolvedOptions, command), + outputMode: demoOutputMode(resolvedOptions), + ...demoInputMode(resolvedOptions), + }); + }); + + demo + .command('init') + .description('Initialize the packaged demo project') + .option('--project-dir ', 'Demo project directory') + .option('--force', 'Recreate an existing demo project', false) + .option('--no-input', 'Disable interactive terminal input') + .action(async (_options, command: { opts: () => { projectDir?: string; force?: boolean; input?: boolean } }) => { + const options = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: 'init', + projectDir: demoProjectDir(options, command), + force: options.force === true, + ...demoInputMode(options), + }); + }); + + demo + .command('reset') + .description('Reset the packaged demo project') + .option('--project-dir ', 'Demo project directory') + .option('--force', 'Recreate the demo project without prompting', false) + .option('--no-input', 'Disable interactive terminal input') + .action(async (_options, command: { opts: () => { projectDir?: string; force?: boolean; input?: boolean } }) => { + const options = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: 'reset', + projectDir: demoProjectDir(options, command), + force: options.force === true, + ...demoInputMode(options), + }); + }); + + demo + .command('replay') + .description('Replay the packaged demo memory-flow') + .option('--project-dir ', 'Demo project directory') + .addOption(new Option('--plain', 'Print plain text output instead of the visual demo').conflicts('json')) + .addOption(new Option('--json', 'Print JSON output').conflicts('plain')) + .option('--no-input', 'Disable interactive terminal input') + .action(async (_options, command: { opts: () => DemoOptions }) => { + const options = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: 'replay', + projectDir: demoProjectDir(options, command), + outputMode: demoOutputMode(options), + ...demoInputMode(options), + }); + }); + + demo + .command('scan') + .description('Run the packaged demo scan') + .option('--project-dir ', 'Demo project directory') + .option('--no-input', 'Disable interactive terminal input') + .action(async (_options, command: { opts: () => { projectDir?: string; input?: boolean } }) => { + const options = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: 'scan', + projectDir: demoProjectDir(options, command), + ...demoInputMode(options), + }); + }); + + demo + .command('inspect') + .description('Inspect packaged demo outputs') + .option('--project-dir ', 'Demo project directory') + .addOption(new Option('--plain', 'Print plain text output').conflicts('json')) + .addOption(new Option('--json', 'Print JSON output').conflicts('plain')) + .option('--no-input', 'Disable interactive terminal input') + .action(async (_options, command: { opts: () => DemoOptions }) => { + const options = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: 'inspect', + projectDir: demoProjectDir(options, command), + outputMode: demoInspectOutputMode(options), + ...demoInputMode(options), + }); + }); + + demo + .command('doctor') + .description('Check packaged demo readiness') + .option('--project-dir ', 'Demo project directory') + .addOption(new Option('--plain', 'Print plain text output').conflicts('json')) + .addOption(new Option('--json', 'Print JSON output').conflicts('plain')) + .option('--no-input', 'Disable interactive terminal input') + .action(async (_options, command: { opts: () => DemoOptions }) => { + const options = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: 'doctor', + projectDir: demoProjectDir(options, command), + outputMode: demoDoctorOutputMode(options), + ...demoInputMode(options), + }); + }); + + demo + .command('ingest') + .description('Run packaged demo ingest') + .addOption( + new Option('--mode ', 'Demo ingest mode: full or seeded') + .choices(['full', 'seeded']) + .default('full'), + ) + .option('--project-dir ', 'Demo project directory') + .addOption(new Option('--plain', 'Print plain text output instead of the visual demo').conflicts('json')) + .addOption(new Option('--json', 'Print JSON output').conflicts('plain')) + .option('--no-input', 'Disable interactive terminal input') + .action(async (_options, command: { opts: () => { mode: KloDemoMode } & DemoOptions }) => { + const options = resolveDemoCommandOptions(command); + await runDemoArgs(context, { + command: 'ingest', + mode: options.mode, + projectDir: demoProjectDir(options, command), + outputMode: demoOutputMode(options), + ...demoInputMode(options), + }); + }); +} diff --git a/packages/cli/src/commands/doctor-commands.ts b/packages/cli/src/commands/doctor-commands.ts new file mode 100644 index 00000000..f2cb9a61 --- /dev/null +++ b/packages/cli/src/commands/doctor-commands.ts @@ -0,0 +1,53 @@ +import type { Command } from '@commander-js/extra-typings'; +import { type CommandWithGlobalOptions, type KloCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; +import type { KloDoctorArgs } from '../doctor.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/doctor-commands'); + +function outputMode(options: { json?: boolean }): 'plain' | 'json' { + return options.json === true ? 'json' : 'plain'; +} + +function inputMode(options: { input?: boolean }): { inputMode?: 'disabled' } { + return options.input === false ? { inputMode: 'disabled' } : {}; +} + +async function runDoctorArgs(context: KloCliCommandContext, args: KloDoctorArgs): Promise { + const runner = context.deps.doctor ?? (await import('../doctor.js')).runKloDoctor; + context.setExitCode(await runner(args, context.io)); +} + +export function registerDoctorCommands(program: Command, context: KloCliCommandContext): void { + const doctor = program + .command('doctor') + .description('Check KLO setup, project, and demo readiness') + .option('--json', 'Print JSON output', false) + .option('--no-input', 'Disable interactive terminal input') + .action(async (options: { json?: boolean; input?: boolean }, command) => { + await runDoctorArgs(context, { + command: 'project', + projectDir: resolveCommandProjectDir(command), + outputMode: outputMode(options), + ...inputMode(options), + }); + }); + + doctor + .command('setup') + .description('Check KLO install, build, and local runtime readiness') + .option('--json', 'Print JSON output', false) + .option('--no-input', 'Disable interactive terminal input') + .action( + async ( + _options: { json?: boolean; input?: boolean }, + command: CommandWithGlobalOptions, + ) => { + const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { + json?: boolean; + input?: boolean; + }; + await runDoctorArgs(context, { command: 'setup', outputMode: outputMode(options), ...inputMode(options) }); + }, + ); +} diff --git a/packages/cli/src/commands/ingest-commands.ts b/packages/cli/src/commands/ingest-commands.ts new file mode 100644 index 00000000..90a50ab2 --- /dev/null +++ b/packages/cli/src/commands/ingest-commands.ts @@ -0,0 +1,171 @@ +import { resolve } from 'node:path'; +import { type Command, Option } from '@commander-js/extra-typings'; +import { type KloCliCommandContext, type OutputModeOptions, resolveCommandProjectDir } from '../cli-program.js'; +import type { KloCliDeps, KloCliIo } from '../index.js'; +import type { KloIngestArgs, KloIngestOutputMode } from '../ingest.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/ingest-commands'); + +interface IngestCommandOptions { + runIngestWithProgress: ( + args: KloIngestArgs, + io: KloCliIo, + deps: KloCliDeps, + defaultRunIngest: (args: KloIngestArgs, io: KloCliIo) => Promise, + ) => Promise; +} + +function outputMode(options: OutputModeOptions): KloIngestOutputMode { + if (options.json === true) { + return 'json'; + } + if (options.viz === true) { + return 'viz'; + } + return 'plain'; +} + +function watchOutputMode(options: OutputModeOptions): KloIngestOutputMode { + if (options.json === true) { + return 'json'; + } + if (options.plain === true) { + return 'plain'; + } + return 'viz'; +} + +function inputMode(options: OutputModeOptions): Pick { + return options.input === false ? { inputMode: 'disabled' } : {}; +} + +async function runIngestArgs( + context: KloCliCommandContext, + args: KloIngestArgs, + options: IngestCommandOptions, +): Promise { + const { runKloIngest } = await import('../ingest.js'); + context.setExitCode(await options.runIngestWithProgress(args, context.io, context.deps, runKloIngest)); +} + +export function registerIngestCommands( + program: Command, + context: KloCliCommandContext, + commandOptions: IngestCommandOptions, +): void { + const ingest = program + .command('ingest') + .description('Run or inspect local ingest memory-flow output') + .showHelpAfterError(); + + ingest.hook('preAction', (_thisCommand, actionCommand) => { + context.writeDebug?.('ingest', actionCommand); + }); + + ingest + .command('run') + .description('Run local ingest for one configured connection and source adapter') + .requiredOption('--connection-id ', 'KLO connection id') + .requiredOption('--adapter ', 'Ingest source adapter name') + .option('--source-dir ', 'Directory containing source files') + .option('--database-introspection-url ', 'Daemon URL for live-database introspection') + .option('--debug-llm-request-file ', 'Write sanitized LLM request structure to a JSONL file') + .option('--report-file ', 'Unsupported for ingest run; use ingest status/watch instead') + .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) + .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) + .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) + .option('--no-input', 'Disable interactive terminal input for visualization') + .action(async (options, command) => { + if (options.reportFile) { + throw new Error('--report-file is only supported for ingest status/watch'); + } + await runIngestArgs( + context, + { + command: 'run', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + adapter: options.adapter, + sourceDir: options.sourceDir ? resolve(options.sourceDir) : undefined, + databaseIntrospectionUrl: options.databaseIntrospectionUrl || undefined, + ...(options.debugLlmRequestFile ? { debugLlmRequestFile: resolve(options.debugLlmRequestFile) } : {}), + outputMode: outputMode(options), + ...inputMode(options), + }, + commandOptions, + ); + }); + + ingest + .command('status') + .description('Print status for the latest or selected stored local ingest run or report file') + .argument('[runId]', 'Local ingest run id, report id, run id, or job id') + .option('--report-file ', 'Bundle ingest report JSON file to render') + .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) + .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) + .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) + .option('--no-input', 'Disable interactive terminal input for visualization') + .action(async (runId: string | undefined, options, command) => { + await runIngestArgs( + context, + { + command: 'status', + projectDir: resolveCommandProjectDir(command), + ...(runId ? { runId } : {}), + ...(options.reportFile ? { reportFile: resolve(options.reportFile) } : {}), + outputMode: outputMode(options), + ...inputMode(options), + }, + commandOptions, + ); + }); + + ingest + .command('watch') + .description('Open the latest or selected stored ingest visual report') + .argument('[runId]', 'Local ingest run id, report id, run id, or job id') + .option('--report-file ', 'Bundle ingest report JSON file to render') + .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) + .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) + .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) + .option('--no-input', 'Disable interactive terminal input for visualization') + .action(async (runId: string | undefined, options, command) => { + await runIngestArgs( + context, + { + command: 'watch', + projectDir: resolveCommandProjectDir(command), + ...(runId ? { runId } : {}), + ...(options.reportFile ? { reportFile: resolve(options.reportFile) } : {}), + outputMode: watchOutputMode(options), + ...inputMode(options), + }, + commandOptions, + ); + }); + + ingest + .command('replay') + .description('Replay a stored ingest run or bundle report through memory-flow output') + .argument('', 'Local ingest run id, report id, run id, or job id') + .option('--report-file ', 'Bundle ingest report JSON file to render') + .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) + .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) + .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) + .option('--no-input', 'Disable interactive terminal input for visualization') + .action(async (runId: string, options, command) => { + await runIngestArgs( + context, + { + command: 'replay', + projectDir: resolveCommandProjectDir(command), + runId, + ...(options.reportFile ? { reportFile: resolve(options.reportFile) } : {}), + outputMode: outputMode(options), + ...inputMode(options), + }, + commandOptions, + ); + }); +} diff --git a/packages/cli/src/commands/knowledge-commands.ts b/packages/cli/src/commands/knowledge-commands.ts new file mode 100644 index 00000000..5ef9f58f --- /dev/null +++ b/packages/cli/src/commands/knowledge-commands.ts @@ -0,0 +1,90 @@ +import { type Command, Option } from '@commander-js/extra-typings'; +import { collectOption, type KloCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; +import { wikiWriteCommandSchema } from '../command-schemas.js'; +import type { KloKnowledgeArgs } from '../knowledge.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/knowledge-commands'); + +async function runKnowledgeArgs(context: KloCliCommandContext, args: KloKnowledgeArgs): Promise { + const runner = context.deps.knowledge ?? (await import('../knowledge.js')).runKloKnowledge; + context.setExitCode(await runner(args, context.io)); +} + +export function registerWikiCommands(program: Command, context: KloCliCommandContext): void { + const wiki = program + .command('wiki') + .description('List, read, search, or write local wiki pages') + .showHelpAfterError() + .addHelpText( + 'after', + '\nProject directory defaults to KLO_PROJECT_DIR when set, otherwise the current working directory.\n', + ); + + wiki + .command('list') + .description('List local wiki pages') + .option('--user-id ', 'Local user id', 'local') + .action(async (options: { userId: string }, command) => { + await runKnowledgeArgs(context, { + command: 'list', + projectDir: resolveCommandProjectDir(command), + userId: options.userId, + }); + }); + + wiki + .command('read') + .description('Read one local wiki page') + .argument('', 'Wiki page key') + .option('--user-id ', 'Local user id', 'local') + .action(async (key: string, options: { userId: string }, command) => { + await runKnowledgeArgs(context, { + command: 'read', + projectDir: resolveCommandProjectDir(command), + key, + userId: options.userId, + }); + }); + + wiki + .command('search') + .description('Search local wiki pages') + .argument('', 'Search query') + .option('--user-id ', 'Local user id', 'local') + .action(async (query: string, options: { userId: string }, command) => { + await runKnowledgeArgs(context, { + command: 'search', + projectDir: resolveCommandProjectDir(command), + query, + userId: options.userId, + }); + }); + + wiki + .command('write') + .description('Write one local wiki page') + .argument('', 'Wiki page key') + .option('--user-id ', 'Local user id', 'local') + .addOption(new Option('--scope ', 'global or user').choices(['global', 'user']).default('global')) + .requiredOption('--summary

', 'Wiki summary') + .requiredOption('--content ', 'Wiki content') + .option('--tag ', 'Wiki tag; repeatable', collectOption, []) + .option('--ref ', 'Wiki ref; repeatable', collectOption, []) + .option('--sl-ref ', 'Semantic-layer ref; repeatable', collectOption, []) + .action(async (key: string, options, command) => { + const args = wikiWriteCommandSchema.parse({ + command: 'write', + projectDir: resolveCommandProjectDir(command), + key, + scope: options.scope === 'user' ? 'USER' : 'GLOBAL', + userId: options.userId, + summary: options.summary, + content: options.content, + tags: options.tag, + refs: options.ref, + slRefs: options.slRef, + }); + await runKnowledgeArgs(context, args); + }); +} diff --git a/packages/cli/src/commands/public-ingest-commands.ts b/packages/cli/src/commands/public-ingest-commands.ts new file mode 100644 index 00000000..1bc138a2 --- /dev/null +++ b/packages/cli/src/commands/public-ingest-commands.ts @@ -0,0 +1,109 @@ +import { InvalidArgumentError, type Command } from '@commander-js/extra-typings'; +import { type KloCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; +import { publicIngestReadCommandSchema, publicIngestRunCommandSchema } from '../command-schemas.js'; +import type { KloPublicIngestArgs, KloPublicIngestInputMode } from '../public-ingest.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/public-ingest-commands'); + +interface PublicIngestOptions { + all?: boolean; + json?: boolean; + input?: boolean; +} + +function inputMode(options: { input?: boolean }): KloPublicIngestInputMode { + return options.input === false ? 'disabled' : 'auto'; +} + +async function runPublicIngestArgs(context: KloCliCommandContext, args: KloPublicIngestArgs): Promise { + const runner = context.deps.publicIngest ?? (await import('../public-ingest.js')).runKloPublicIngest; + context.setExitCode(await runner(args, context.io)); +} + +function parsePublicIngestConnectionId(value: string): string { + if (value === 'run') { + throw new InvalidArgumentError('run is reserved; use klo dev ingest run for low-level adapter syntax'); + } + return value; +} + +export function registerPublicIngestCommands(program: Command, context: KloCliCommandContext): void { + const ingest = program + .command('ingest') + .description('Build and refresh KLO context from configured sources') + .usage('[options] [connectionId]') + .argument('[connectionId]', 'Connection id to ingest', parsePublicIngestConnectionId) + .option('--all', 'Ingest every eligible configured source', false) + .option('--json', 'Print JSON output', false) + .option('--no-input', 'Disable interactive terminal input') + .addHelpText( + 'after', + [ + '', + 'Examples:', + ' klo ingest [options]', + ' klo ingest --all [options]', + ' klo ingest status [runId] [options]', + ' klo ingest watch [runId] [options]', + '', + 'Project directory defaults to KLO_PROJECT_DIR when set, otherwise the current working directory.', + '', + ].join('\n'), + ) + .showHelpAfterError() + .hook('preAction', (_thisCommand, actionCommand) => { + context.writeDebug?.('ingest', actionCommand); + }) + .action(async (connectionId: string | undefined, _options: PublicIngestOptions, command) => { + const options = command.opts(); + if (options.all === true && connectionId) { + throw new Error('klo ingest accepts either --all or , not both'); + } + const args = publicIngestRunCommandSchema.parse({ + command: 'run', + projectDir: resolveCommandProjectDir(command), + ...(connectionId ? { targetConnectionId: connectionId } : {}), + all: options.all === true, + json: options.json === true, + inputMode: inputMode(options), + }); + await runPublicIngestArgs(context, args); + }); + + ingest + .command('status') + .description('Print status for the latest or selected public ingest run') + .argument('[runId]', 'Public ingest run id') + .option('--json', 'Print JSON output', false) + .option('--no-input', 'Disable interactive terminal input') + .action(async (runId: string | undefined, _options: PublicIngestOptions, command) => { + const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as PublicIngestOptions; + const args = publicIngestReadCommandSchema.parse({ + command: 'status', + projectDir: resolveCommandProjectDir(command), + ...(runId ? { runId } : {}), + json: options.json === true, + inputMode: inputMode(options), + }); + await runPublicIngestArgs(context, args); + }); + + ingest + .command('watch') + .description('Open the latest or selected public ingest visual report') + .argument('[runId]', 'Public ingest run id') + .option('--json', 'Print JSON output instead of the visual report', false) + .option('--no-input', 'Disable interactive terminal input') + .action(async (runId: string | undefined, _options: PublicIngestOptions, command) => { + const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as PublicIngestOptions; + const args = publicIngestReadCommandSchema.parse({ + command: 'watch', + projectDir: resolveCommandProjectDir(command), + ...(runId ? { runId } : {}), + json: options.json === true, + inputMode: inputMode(options), + }); + await runPublicIngestArgs(context, args); + }); +} diff --git a/packages/cli/src/commands/scan-commands.ts b/packages/cli/src/commands/scan-commands.ts new file mode 100644 index 00000000..c67511d4 --- /dev/null +++ b/packages/cli/src/commands/scan-commands.ts @@ -0,0 +1,353 @@ +import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings'; +import { type KloCliCommandContext, parsePositiveIntegerOption, resolveCommandProjectDir } from '../cli-program.js'; +import type { KloScanArgs } from '../scan.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/scan-commands'); + +async function runScanArgs(context: KloCliCommandContext, args: KloScanArgs): Promise { + const runner = context.deps.scan ?? (await import('../scan.js')).runKloScan; + context.setExitCode(await runner(args, context.io)); +} + +type KloScanModeOption = Extract['mode']; + +function parseScanModeOption(value: string): KloScanModeOption { + if (value === 'structural' || value === 'enriched' || value === 'relationships') { + return value; + } + throw new InvalidArgumentError('Allowed choices are structural, enriched, relationships'); +} + +type KloRelationshipStatusOption = Extract['status']; +type KloRelationshipFeedbackDecisionOption = Extract['decision']; + +function parseRelationshipStatusOption(value: string): KloRelationshipStatusOption { + if (value === 'accepted' || value === 'review' || value === 'rejected' || value === 'skipped' || value === 'all') { + return value; + } + throw new InvalidArgumentError('Allowed choices are accepted, review, rejected, skipped, all'); +} + +function parseRelationshipFeedbackDecisionOption(value: string): KloRelationshipFeedbackDecisionOption { + if (value === 'accepted' || value === 'rejected' || value === 'all') { + return value; + } + throw new InvalidArgumentError('Allowed choices are accepted, rejected, all'); +} + +function parseNonEmptyOption(value: string): string { + if (value.trim().length === 0) { + throw new InvalidArgumentError('must not be empty'); + } + return value; +} + +function parseRelationshipCalibrationThreshold(value: string): number { + const parsed = Number(value); + if (Number.isFinite(parsed) && parsed >= 0 && parsed <= 1) { + return parsed; + } + throw new InvalidArgumentError('Allowed range is 0 through 1'); +} + +function relationshipDecisionArgs(options: { + accept?: string; + reject?: string; + reviewer?: string; + note?: string; + json?: boolean; +}): Pick< + Extract, + 'candidateId' | 'decision' | 'reviewer' | 'note' | 'json' +> | null { + const decisionCount = [options.accept !== undefined, options.reject !== undefined].filter(Boolean).length; + if (decisionCount > 1) { + throw new Error('Only one relationship review decision option can be used: --accept and --reject conflict'); + } + if (options.accept !== undefined) { + return { + candidateId: options.accept, + decision: 'accepted', + reviewer: options.reviewer ?? 'klo', + note: options.note ?? null, + json: options.json === true, + }; + } + if (options.reject !== undefined) { + return { + candidateId: options.reject, + decision: 'rejected', + reviewer: options.reviewer ?? 'klo', + note: options.note ?? null, + json: options.json === true, + }; + } + return null; +} + +function collectRelationshipCandidateOption(value: string, previous: string[]): string[] { + return [...previous, parseNonEmptyOption(value)]; +} + +export function registerScanCommands(program: Command, context: KloCliCommandContext): void { + const scan = program + .command('scan') + .description('Run or inspect standalone connection scans') + .argument('[connectionId]', 'KLO connection id to scan') + .option( + '--mode ', + 'Scan mode: structural, enriched, relationships (default: structural)', + parseScanModeOption, + ) + .option('--dry-run', 'Run without writing scan results', false) + .option('--database-introspection-url ', 'Daemon URL for live-database introspection') + .showHelpAfterError() + .addHelpText( + 'after', + '\nProject directory defaults to KLO_PROJECT_DIR when set, otherwise the current working directory.\n', + ) + .hook('preAction', (_thisCommand, actionCommand) => { + context.writeDebug?.('scan', actionCommand); + }) + .action(async (connectionId: string | undefined, options, command) => { + if (!connectionId) { + scan.outputHelp(); + context.io.stderr.write('klo dev scan requires or a subcommand\n'); + context.setExitCode(1); + return; + } + const mode = options.mode ?? 'structural'; + await runScanArgs(context, { + command: 'run', + projectDir: resolveCommandProjectDir(command), + connectionId, + mode, + detectRelationships: mode === 'relationships', + dryRun: options.dryRun === true, + databaseIntrospectionUrl: options.databaseIntrospectionUrl, + }); + }); + + scan + .command('status') + .description('Print status for a local scan run') + .argument('', 'Local scan run id') + .addHelpText( + 'after', + '\n--project-dir is inherited from `klo dev scan` (default: KLO_PROJECT_DIR or current working directory).\n', + ) + .action(async (runId: string, _options: unknown, command) => { + await runScanArgs(context, { + command: 'status', + projectDir: resolveCommandProjectDir(command), + runId, + }); + }); + + scan + .command('report') + .description('Print a local scan report') + .argument('', 'Local scan run id') + .option('--json', 'Print the raw scan report JSON', false) + .addHelpText( + 'after', + '\n--project-dir is inherited from `klo dev scan` (default: KLO_PROJECT_DIR or current working directory).\n', + ) + .action(async (runId: string, options, command) => { + await runScanArgs(context, { + command: 'report', + projectDir: resolveCommandProjectDir(command), + runId, + json: options.json === true, + }); + }); + + scan + .command('relationships') + .description('Print relationship artifacts for a local scan run') + .argument('', 'Local scan run id') + .option( + '--status ', + 'Relationship status: accepted, review, rejected, skipped, all', + parseRelationshipStatusOption, + 'review', + ) + .option('--limit ', 'Maximum relationships to print per status', parsePositiveIntegerOption, 25) + .addOption( + new Option('--accept ', 'Record a reviewer accepted decision for a relationship candidate') + .argParser(parseNonEmptyOption) + .conflicts('reject'), + ) + .addOption( + new Option('--reject ', 'Record a reviewer rejected decision for a relationship candidate') + .argParser(parseNonEmptyOption) + .conflicts('accept'), + ) + .option('--note ', 'Attach a note when recording a relationship review decision') + .option('--reviewer ', 'Reviewer name for a relationship review decision') + .option('--json', 'Print relationship artifacts as JSON', false) + .addHelpText( + 'after', + '\n--project-dir is inherited from `klo dev scan` (default: KLO_PROJECT_DIR or current working directory).\n', + ) + .action(async (runId: string, options, command) => { + const decision = relationshipDecisionArgs(options); + if (decision) { + await runScanArgs(context, { + command: 'relationshipDecision', + projectDir: resolveCommandProjectDir(command), + runId, + candidateId: decision.candidateId, + decision: decision.decision, + reviewer: decision.reviewer, + note: decision.note, + json: decision.json, + }); + return; + } + await runScanArgs(context, { + command: 'relationships', + projectDir: resolveCommandProjectDir(command), + runId, + status: options.status, + json: options.json === true, + limit: options.limit, + }); + }); + + scan + .command('relationship-apply') + .description('Apply accepted relationship review decisions as manual manifest joins') + .argument('', 'Local scan run id') + .option('--all-accepted', 'Apply all accepted relationship review decisions for the scan run', false) + .option( + '--candidate ', + 'Apply one accepted relationship review decision', + collectRelationshipCandidateOption, + [], + ) + .option('--dry-run', 'Preview relationships that would be written without rewriting manifest shards', false) + .option('--json', 'Print the apply result as JSON', false) + .addHelpText( + 'after', + '\n--project-dir is inherited from `klo dev scan` (default: KLO_PROJECT_DIR or current working directory).\n', + ) + .action(async (runId: string, options, command) => { + const parentOptions = command.parent?.opts() as { dryRun?: boolean } | undefined; + await runScanArgs(context, { + command: 'relationshipApply', + projectDir: resolveCommandProjectDir(command), + runId, + applyAllAccepted: options.allAccepted === true, + candidateIds: options.candidate, + dryRun: options.dryRun === true || parentOptions?.dryRun === true, + json: options.json === true, + }); + }); + + scan + .command('relationship-feedback') + .description('Export persisted relationship review decisions as calibration labels') + .option('--connection ', 'Only export labels for one KLO connection') + .option( + '--decision ', + 'Relationship feedback decision: accepted, rejected, all', + parseRelationshipFeedbackDecisionOption, + 'all', + ) + .addOption(new Option('--json', 'Print the export as JSON').default(false).conflicts('jsonl')) + .addOption(new Option('--jsonl', 'Print labels as newline-delimited JSON').default(false).conflicts('json')) + .addHelpText( + 'after', + '\n--project-dir is inherited from `klo dev scan` (default: KLO_PROJECT_DIR or current working directory).\n', + ) + .action(async (options, command) => { + await runScanArgs(context, { + command: 'relationshipFeedback', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connection ?? null, + decision: options.decision, + json: options.json === true, + jsonl: options.jsonl === true, + }); + }); + + scan + .command('relationship-calibration') + .description('Summarize relationship feedback labels against current score thresholds') + .option('--connection ', 'Only calibrate labels for one KLO connection') + .option( + '--decision ', + 'Relationship feedback decision: accepted, rejected, all', + parseRelationshipFeedbackDecisionOption, + 'all', + ) + .option( + '--accept-threshold ', + 'Score threshold treated as predicted accepted', + parseRelationshipCalibrationThreshold, + 0.85, + ) + .option( + '--review-threshold ', + 'Score threshold treated as predicted review', + parseRelationshipCalibrationThreshold, + 0.55, + ) + .option('--json', 'Print the calibration report as JSON', false) + .addHelpText( + 'after', + '\n--project-dir is inherited from `klo dev scan` (default: KLO_PROJECT_DIR or current working directory).\n', + ) + .action(async (options, command) => { + await runScanArgs(context, { + command: 'relationshipCalibration', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connection ?? null, + decision: options.decision, + acceptThreshold: options.acceptThreshold, + reviewThreshold: options.reviewThreshold, + json: options.json === true, + }); + }); + + scan + .command('relationship-thresholds') + .description('Evaluate relationship feedback labels for offline threshold advice') + .option('--connection ', 'Only evaluate labels for one KLO connection') + .option( + '--min-total-labels ', + 'Minimum scored labels before advice can be ready', + parsePositiveIntegerOption, + 20, + ) + .option( + '--min-accepted-labels ', + 'Minimum accepted labels before advice can be ready', + parsePositiveIntegerOption, + 5, + ) + .option( + '--min-rejected-labels ', + 'Minimum rejected labels before advice can be ready', + parsePositiveIntegerOption, + 5, + ) + .option('--json', 'Print the threshold advice report as JSON', false) + .addHelpText( + 'after', + '\n--project-dir is inherited from `klo dev scan` (default: KLO_PROJECT_DIR or current working directory).\n', + ) + .action(async (options, command) => { + await runScanArgs(context, { + command: 'relationshipThresholds', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connection ?? null, + minTotalLabels: options.minTotalLabels, + minAcceptedLabels: options.minAcceptedLabels, + minRejectedLabels: options.minRejectedLabels, + json: options.json === true, + }); + }); +} diff --git a/packages/cli/src/commands/serve-commands.ts b/packages/cli/src/commands/serve-commands.ts new file mode 100644 index 00000000..bf26d31a --- /dev/null +++ b/packages/cli/src/commands/serve-commands.ts @@ -0,0 +1,47 @@ +import { type Command, InvalidArgumentError } from '@commander-js/extra-typings'; +import { type KloCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; +import type { KloServeArgs } from '../serve.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/serve-commands'); + +function parseMcp(value: string): 'stdio' { + if (value === 'stdio') { + return 'stdio'; + } + throw new InvalidArgumentError('Only stdio is supported in this phase'); +} + +export function registerServeCommands(program: Command, context: KloCliCommandContext): void { + program + .command('serve') + .description('Run standalone KLO services such as MCP stdio') + .requiredOption('--mcp ', 'MCP transport mode', parseMcp) + .option('--user-id ', 'Local user id', 'local') + .option('--semantic-compute', 'Enable semantic-layer compute', false) + .option('--semantic-compute-url ', 'HTTP semantic-layer compute URL') + .option('--database-introspection-url ', 'Daemon URL for live-database introspection') + .option('--execute-queries', 'Allow semantic-layer query execution', false) + .option('--memory-capture', 'Enable memory capture', false) + .option('--memory-model ', 'Memory capture model') + .showHelpAfterError() + .action(async (options, command): Promise => { + const semanticCompute = options.semanticCompute === true || Boolean(options.semanticComputeUrl); + if (options.executeQueries === true && !semanticCompute) { + throw new Error('--execute-queries requires --semantic-compute'); + } + const args: KloServeArgs = { + mcp: options.mcp, + projectDir: resolveCommandProjectDir(command), + userId: options.userId, + semanticCompute, + semanticComputeUrl: options.semanticComputeUrl, + databaseIntrospectionUrl: options.databaseIntrospectionUrl, + executeQueries: options.executeQueries === true, + memoryCapture: options.memoryCapture === true, + memoryModel: options.memoryModel, + }; + const runner = context.deps.serveStdio ?? (await import('../serve.js')).runKloServeStdio; + context.setExitCode(await runner(args)); + }); +} diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts new file mode 100644 index 00000000..5d7c0aad --- /dev/null +++ b/packages/cli/src/commands/setup-commands.ts @@ -0,0 +1,517 @@ +import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings'; +import type { KloCliCommandContext } from '../cli-program.js'; +import { resolveCommandProjectDir } from '../cli-program.js'; +import type { KloSetupDatabaseDriver } from '../setup-databases.js'; +import type { KloSetupSourceType } from '../setup-sources.js'; +import { registerDemoCommands } from './demo-commands.js'; + +async function runSetupArgs( + context: KloCliCommandContext, + args: Parameters>[0], +) { + const runner = context.deps.setup ?? (await import('../setup.js')).runKloSetup; + context.setExitCode(await runner(args, context.io)); +} + +function positiveInteger(value: string): number { + const parsed = Number.parseInt(value, 10); + if (!Number.isInteger(parsed) || parsed <= 0) { + throw new Error(`Expected a positive integer, received ${value}`); + } + return parsed; +} + +function embeddingBackend(value: string): 'openai' | 'sentence-transformers' { + if (value === 'openai' || value === 'sentence-transformers') { + return value; + } + throw new InvalidArgumentError(`invalid choice '${value}'`); +} + +function databaseDriver(value: string): KloSetupDatabaseDriver { + if ( + value === 'sqlite' || + value === 'postgres' || + value === 'mysql' || + value === 'clickhouse' || + value === 'sqlserver' || + value === 'bigquery' || + value === 'snowflake' + ) { + return value; + } + throw new InvalidArgumentError(`invalid choice '${value}'`); +} + +function sourceType(value: string): KloSetupSourceType { + if ( + value === 'dbt' || + value === 'metricflow' || + value === 'metabase' || + value === 'looker' || + value === 'lookml' || + value === 'notion' + ) { + return value; + } + throw new InvalidArgumentError(`invalid choice '${value}'`); +} + +function agentScope(value: string): 'project' | 'global' { + if (value === 'project' || value === 'global') { + return value; + } + throw new InvalidArgumentError(`invalid choice '${value}'`); +} + +function agentInstallMode(value: string): 'cli' | 'mcp' | 'both' { + if (value === 'cli' || value === 'mcp' || value === 'both') { + return value; + } + throw new InvalidArgumentError(`invalid choice '${value}'`); +} + +function positiveNumber(value: string): number { + const parsed = Number.parseInt(value, 10); + if (!Number.isInteger(parsed) || parsed <= 0) { + throw new InvalidArgumentError(`Expected a positive integer, received ${value}`); + } + return parsed; +} + +function optionWasSpecified(command: Command, optionName: string): boolean { + const commandWithSources = command as Command & { + getOptionValueSource?: (name: string) => string | undefined; + getOptionValueSourceWithGlobals?: (name: string) => string | undefined; + }; + const source = + commandWithSources.getOptionValueSourceWithGlobals?.(optionName) ?? + commandWithSources.getOptionValueSource?.(optionName); + return source !== undefined && source !== 'default'; +} + +function shouldShowSetupEntryMenu( + options: { + new?: boolean; + existing?: boolean; + agents?: boolean; + target?: string; + global?: boolean; + project?: boolean; + skipAgents?: boolean; + yes?: boolean; + input?: boolean; + anthropicApiKeyEnv?: string; + anthropicApiKeyFile?: string; + anthropicModel?: string; + skipLlm?: boolean; + embeddingBackend?: string; + embeddingApiKeyEnv?: string; + embeddingApiKeyFile?: string; + skipEmbeddings?: boolean; + database?: KloSetupDatabaseDriver[]; + databaseConnectionId?: string[]; + newDatabaseConnectionId?: string; + databaseUrl?: string; + databaseSchema?: string[]; + enableHistoricSql?: boolean; + disableHistoricSql?: boolean; + historicSqlWindowDays?: number; + historicSqlMinCalls?: number; + historicSqlServiceAccountPattern?: string[]; + historicSqlRedactionPattern?: string[]; + skipDatabases?: boolean; + source?: KloSetupSourceType; + sourceConnectionId?: string; + sourcePath?: string; + sourceGitUrl?: string; + sourceBranch?: string; + sourceSubpath?: string; + sourceAuthTokenRef?: string; + sourceUrl?: string; + sourceApiKeyRef?: string; + sourceClientId?: string; + sourceClientSecretRef?: string; + sourceWarehouseConnectionId?: string; + sourceProjectName?: string; + sourceProfilesPath?: string; + sourceTarget?: string; + metabaseDatabaseId?: number; + notionCrawlMode?: string; + notionRootPageId?: string[]; + skipInitialSourceIngest?: boolean; + skipSources?: boolean; + }, + command: Command, +): boolean { + if (options.database && options.database.length > 0) { + return false; + } + if (options.databaseConnectionId && options.databaseConnectionId.length > 0) { + return false; + } + if (options.databaseSchema && options.databaseSchema.length > 0) { + return false; + } + if (options.historicSqlServiceAccountPattern && options.historicSqlServiceAccountPattern.length > 0) { + return false; + } + if (options.historicSqlRedactionPattern && options.historicSqlRedactionPattern.length > 0) { + return false; + } + if (options.notionRootPageId && options.notionRootPageId.length > 0) { + return false; + } + + return ![ + 'new', + 'existing', + 'agents', + 'target', + 'global', + 'project', + 'skipAgents', + 'yes', + 'input', + 'anthropicApiKeyEnv', + 'anthropicApiKeyFile', + 'anthropicModel', + 'skipLlm', + 'embeddingBackend', + 'embeddingApiKeyEnv', + 'embeddingApiKeyFile', + 'skipEmbeddings', + 'newDatabaseConnectionId', + 'databaseUrl', + 'enableHistoricSql', + 'disableHistoricSql', + 'historicSqlWindowDays', + 'historicSqlMinCalls', + 'skipDatabases', + 'source', + 'sourceConnectionId', + 'sourcePath', + 'sourceGitUrl', + 'sourceBranch', + 'sourceSubpath', + 'sourceAuthTokenRef', + 'sourceUrl', + 'sourceApiKeyRef', + 'sourceClientId', + 'sourceClientSecretRef', + 'sourceWarehouseConnectionId', + 'sourceProjectName', + 'sourceProfilesPath', + 'sourceTarget', + 'metabaseDatabaseId', + 'notionCrawlMode', + 'skipInitialSourceIngest', + 'skipSources', + ].some((optionName) => optionWasSpecified(command, optionName)); +} + +export function registerSetupCommands(program: Command, context: KloCliCommandContext): void { + const setup = program + .command('setup') + .description('Set up or resume a local KLO project') + .option('--project-dir ', 'KLO project directory') + .option('--new', 'Create a new KLO project before setup', false) + .option('--existing', 'Use an existing KLO project', false) + .option('--agents', 'Install agent integration only', false) + .addOption( + new Option('--target ', 'Agent target').choices([ + 'claude-code', + 'codex', + 'cursor', + 'opencode', + 'universal', + ]), + ) + .addOption(new Option('--agent-scope ', 'Agent install scope').argParser(agentScope).default('project')) + .option('--project', 'Install agent integration into the project scope', false) + .option('--global', 'Install agent integration into the global target scope', false) + .addOption( + new Option('--agent-install-mode ', 'Agent install mode').argParser(agentInstallMode).default('cli'), + ) + .option('--skip-agents', 'Leave agent integration incomplete for now', false) + .option('--yes', 'Accept safe defaults in non-interactive setup', false) + .option('--no-input', 'Disable interactive terminal input') + .option('--anthropic-api-key-env ', 'Environment variable containing the Anthropic API key') + .option('--anthropic-api-key-file ', 'File containing the Anthropic API key') + .option('--anthropic-model ', 'Anthropic model ID to validate and save') + .addOption(new Option('--skip-llm', 'Leave LLM setup incomplete for now').hideHelp().default(false)) + .addOption(new Option('--embedding-backend ', 'Embedding backend').argParser(embeddingBackend)) + .option('--embedding-api-key-env ', 'Environment variable containing the embedding provider API key') + .option('--embedding-api-key-file ', 'File containing the embedding provider API key') + .addOption(new Option('--skip-embeddings', 'Leave embedding setup incomplete for now').hideHelp().default(false)) + .option( + '--database ', + 'Database driver to configure; repeatable', + (value, previous: KloSetupDatabaseDriver[]) => { + return [...previous, databaseDriver(value)]; + }, + [] as KloSetupDatabaseDriver[], + ) + .option( + '--database-connection-id ', + 'Existing selected connection id or new connection id', + (value, previous: string[]) => [...previous, value], + [], + ) + .option('--new-database-connection-id ', 'Connection id for one new database connection', (value) => { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(value)) { + throw new InvalidArgumentError(`Unsafe connection id: ${value}`); + } + return value; + }) + .option('--database-url ', 'URL, env:NAME, or file:/path for one new URL-style database connection') + .option( + '--database-schema ', + 'Database schema to include; repeatable', + (value, previous: string[]) => [...previous, value], + [], + ) + .option('--enable-historic-sql', 'Enable Historic SQL when the selected database supports it', false) + .option('--disable-historic-sql', 'Disable Historic SQL for the selected database', false) + .option('--historic-sql-window-days ', 'Historic SQL query-history window', positiveInteger) + .option( + '--historic-sql-min-calls ', + 'Postgres Historic SQL pg_stat_statements minimum calls floor', + positiveInteger, + ) + .option( + '--historic-sql-service-account-pattern ', + 'Historic SQL service-account regex; repeatable', + (value, previous: string[]) => [...previous, value], + [], + ) + .option( + '--historic-sql-redaction-pattern ', + 'Historic SQL SQL-literal redaction regex; repeatable', + (value, previous: string[]) => [...previous, value], + [], + ) + .option('--skip-databases', 'Leave database setup incomplete; KLO cannot work until a primary source is added', false) + .addOption(new Option('--source ', 'Source connector type').argParser(sourceType)) + .option('--source-connection-id ', 'Connection id for source setup') + .option('--source-path ', 'Local source path for dbt, MetricFlow, or LookML') + .option('--source-git-url ', 'Git URL for dbt, MetricFlow, or LookML') + .option('--source-branch ', 'Git branch for source setup') + .option('--source-subpath ', 'Repo subpath for source setup') + .option('--source-auth-token-ref ', 'env: or file: credential ref for source repo auth') + .option('--source-url ', 'Source service URL for Metabase or Looker') + .option('--source-api-key-ref ', 'env: or file: API key ref for Metabase or Notion') + .option('--source-client-id ', 'Looker client id') + .option('--source-client-secret-ref ', 'env: or file: Looker client secret ref') + .option('--source-warehouse-connection-id ', 'Mapped warehouse connection id') + .option('--source-project-name ', 'dbt project name override') + .option('--source-profiles-path ', 'dbt profiles path') + .option('--source-target ', 'dbt target or source-specific mapping target') + .option('--metabase-database-id ', 'Metabase database id to map', positiveNumber) + .addOption( + new Option('--notion-crawl-mode ', 'Notion crawl mode').choices(['all_accessible', 'selected_roots']), + ) + .option( + '--notion-root-page-id ', + 'Notion root page id; repeatable', + (value, previous: string[]) => [...previous, value], + [], + ) + .option('--skip-initial-source-ingest', 'Validate source setup without building source context during setup', false) + .option('--skip-sources', 'Mark optional source setup complete with no sources', false) + .showHelpAfterError(); + + setup.hook('preAction', (_thisCommand, actionCommand) => { + context.writeDebug?.('setup', actionCommand); + }); + + setup.action(async (options, command) => { + if (options.anthropicApiKeyEnv && options.anthropicApiKeyFile) { + context.io.stderr.write( + 'Choose only one Anthropic credential source: --anthropic-api-key-env or --anthropic-api-key-file.\n', + ); + context.setExitCode(1); + return; + } + if (options.embeddingApiKeyEnv && options.embeddingApiKeyFile) { + context.io.stderr.write( + 'Choose only one embedding credential source: --embedding-api-key-env or --embedding-api-key-file.\n', + ); + context.setExitCode(1); + return; + } + if (options.enableHistoricSql && options.disableHistoricSql) { + context.io.stderr.write( + 'Choose only one Historic SQL action: --enable-historic-sql or --disable-historic-sql.\n', + ); + context.setExitCode(1); + return; + } + if (options.sourcePath && options.sourceGitUrl) { + context.io.stderr.write('Choose only one source location: --source-path or --source-git-url.\n'); + context.setExitCode(1); + return; + } + if (options.skipSources && options.source) { + context.io.stderr.write('Choose either --source or --skip-sources.\n'); + context.setExitCode(1); + return; + } + + const mode = options.new ? 'new' : options.existing ? 'existing' : 'auto'; + const resolvedAgentScope = options.global ? 'global' : options.agentScope; + await runSetupArgs(context, { + command: 'run', + projectDir: resolveCommandProjectDir(command), + mode, + agents: options.agents === true, + ...(options.target ? { target: options.target } : {}), + agentScope: resolvedAgentScope, + agentInstallMode: options.agentInstallMode, + skipAgents: options.skipAgents === true, + inputMode: options.input === false ? 'disabled' : 'auto', + yes: options.yes === true, + ...(options.anthropicApiKeyEnv ? { anthropicApiKeyEnv: options.anthropicApiKeyEnv } : {}), + ...(options.anthropicApiKeyFile ? { anthropicApiKeyFile: options.anthropicApiKeyFile } : {}), + ...(options.anthropicModel ? { anthropicModel: options.anthropicModel } : {}), + skipLlm: options.skipLlm === true, + ...(options.embeddingBackend ? { embeddingBackend: options.embeddingBackend } : {}), + ...(options.embeddingApiKeyEnv ? { embeddingApiKeyEnv: options.embeddingApiKeyEnv } : {}), + ...(options.embeddingApiKeyFile ? { embeddingApiKeyFile: options.embeddingApiKeyFile } : {}), + skipEmbeddings: options.skipEmbeddings === true, + ...(options.database.length > 0 ? { databaseDrivers: options.database } : {}), + ...(options.databaseConnectionId.length > 0 ? { databaseConnectionIds: options.databaseConnectionId } : {}), + ...(options.newDatabaseConnectionId ? { databaseConnectionId: options.newDatabaseConnectionId } : {}), + ...(options.databaseUrl ? { databaseUrl: options.databaseUrl } : {}), + databaseSchemas: options.databaseSchema, + ...(options.enableHistoricSql ? { enableHistoricSql: true } : {}), + ...(options.disableHistoricSql ? { disableHistoricSql: true } : {}), + ...(options.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: options.historicSqlWindowDays } : {}), + ...(options.historicSqlMinCalls !== undefined ? { historicSqlMinCalls: options.historicSqlMinCalls } : {}), + ...(options.historicSqlServiceAccountPattern.length > 0 + ? { historicSqlServiceAccountPatterns: options.historicSqlServiceAccountPattern } + : {}), + ...(options.historicSqlRedactionPattern.length > 0 + ? { historicSqlRedactionPatterns: options.historicSqlRedactionPattern } + : {}), + skipDatabases: options.skipDatabases === true, + ...(options.source ? { source: options.source } : {}), + ...(options.sourceConnectionId ? { sourceConnectionId: options.sourceConnectionId } : {}), + ...(options.sourcePath ? { sourcePath: options.sourcePath } : {}), + ...(options.sourceGitUrl ? { sourceGitUrl: options.sourceGitUrl } : {}), + ...(options.sourceBranch ? { sourceBranch: options.sourceBranch } : {}), + ...(options.sourceSubpath ? { sourceSubpath: options.sourceSubpath } : {}), + ...(options.sourceAuthTokenRef ? { sourceAuthTokenRef: options.sourceAuthTokenRef } : {}), + ...(options.sourceUrl ? { sourceUrl: options.sourceUrl } : {}), + ...(options.sourceApiKeyRef ? { sourceApiKeyRef: options.sourceApiKeyRef } : {}), + ...(options.sourceClientId ? { sourceClientId: options.sourceClientId } : {}), + ...(options.sourceClientSecretRef ? { sourceClientSecretRef: options.sourceClientSecretRef } : {}), + ...(options.sourceWarehouseConnectionId + ? { sourceWarehouseConnectionId: options.sourceWarehouseConnectionId } + : {}), + ...(options.sourceProjectName ? { sourceProjectName: options.sourceProjectName } : {}), + ...(options.sourceProfilesPath ? { sourceProfilesPath: options.sourceProfilesPath } : {}), + ...(options.sourceTarget ? { sourceTarget: options.sourceTarget } : {}), + ...(options.metabaseDatabaseId !== undefined ? { metabaseDatabaseId: options.metabaseDatabaseId } : {}), + ...(options.notionCrawlMode ? { notionCrawlMode: options.notionCrawlMode } : {}), + ...(options.notionRootPageId.length > 0 ? { notionRootPageIds: options.notionRootPageId } : {}), + runInitialSourceIngest: false, + skipSources: options.skipSources === true, + showEntryMenu: shouldShowSetupEntryMenu(options, command), + }); + }); + + registerDemoCommands(setup, context, { description: 'Run the packaged KLO demo from setup' }); + + const setupContext = setup.command('context').description('Build, inspect, and recover setup-managed KLO context'); + + function setupContextInputMode(command: { + optsWithGlobals?: () => unknown; + opts?: () => unknown; + }): 'auto' | 'disabled' { + const options = command.optsWithGlobals?.() as { input?: boolean } | undefined; + return options?.input === false ? 'disabled' : 'auto'; + } + + setupContext + .command('build') + .description('Build agent-ready KLO context for setup') + .option('--no-input', 'Disable interactive terminal input') + .action(async (options: { input?: boolean }, command) => { + await runSetupArgs(context, { + command: 'context-build', + projectDir: resolveCommandProjectDir(command), + inputMode: options.input === false ? 'disabled' : setupContextInputMode(command), + }); + }); + + setupContext + .command('watch') + .description('Watch a setup-managed context build') + .argument('[runId]', 'Setup context build run id') + .option('--no-input', 'Disable interactive terminal input') + .action(async (runId: string | undefined, options: { input?: boolean }, command) => { + await runSetupArgs(context, { + command: 'context-watch', + projectDir: resolveCommandProjectDir(command), + ...(runId ? { runId } : {}), + inputMode: options.input === false ? 'disabled' : setupContextInputMode(command), + }); + }); + + setupContext + .command('status') + .description('Print setup-managed context build status') + .argument('[runId]', 'Setup context build run id') + .option('--json', 'Print JSON output', false) + .action(async (runId: string | undefined, options: { json?: boolean }, command) => { + await runSetupArgs(context, { + command: 'context-status', + projectDir: resolveCommandProjectDir(command), + ...(runId ? { runId } : {}), + json: options.json === true, + }); + }); + + setupContext + .command('stop') + .description('Request a pause for a setup-managed context build') + .argument('[runId]', 'Setup context build run id') + .option('--force', 'Request the pause without an interactive confirmation', false) + .action(async (runId: string | undefined, _options: { force?: boolean }, command) => { + await runSetupArgs(context, { + command: 'context-stop', + projectDir: resolveCommandProjectDir(command), + ...(runId ? { runId } : {}), + }); + }); + + setup + .command('remove') + .description('Remove setup-managed local integrations') + .option('--agents', 'Remove setup-managed agent integration files', false) + .action(async (options: { agents?: boolean }, command) => { + const parentOptions = command.parent?.opts() as { agents?: boolean } | undefined; + if (options.agents !== true && parentOptions?.agents !== true) { + context.io.stderr.write('Choose what to remove: --agents.\n'); + context.setExitCode(1); + return; + } + await runSetupArgs(context, { + command: 'remove-agents', + projectDir: resolveCommandProjectDir(command), + }); + }); + + setup + .command('status') + .description('Show setup readiness for the resolved KLO project') + .option('--json', 'Print JSON output', false) + .action(async (options: { json?: boolean }, command) => { + await runSetupArgs(context, { + command: 'status', + projectDir: resolveCommandProjectDir(command), + json: options.json === true, + }); + }); +} diff --git a/packages/cli/src/commands/sl-commands.ts b/packages/cli/src/commands/sl-commands.ts new file mode 100644 index 00000000..03dd4e6e --- /dev/null +++ b/packages/cli/src/commands/sl-commands.ts @@ -0,0 +1,148 @@ +import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings'; +import { + collectOption, + type KloCliCommandContext, + parsePositiveIntegerOption, + resolveCommandProjectDir, +} from '../cli-program.js'; +import { slQueryCommandSchema } from '../command-schemas.js'; +import type { KloSlArgs } from '../sl.js'; +import { profileMark } from '../startup-profile.js'; + +profileMark('module:commands/sl-commands'); + +function parseOrderBy(value: string): string | { field: string; direction?: string } { + const [field, direction] = value.split(':'); + if (!field) { + throw new InvalidArgumentError('requires a field'); + } + if (!direction) { + return field; + } + if (direction !== 'asc' && direction !== 'desc') { + throw new InvalidArgumentError('direction must be asc or desc'); + } + return { field, direction }; +} + +function collectOrderBy( + value: string, + previous: Array = [], +): Array { + return [...previous, parseOrderBy(value)]; +} + +async function runSlArgs(context: KloCliCommandContext, args: KloSlArgs): Promise { + const runner = context.deps.sl ?? (await import('../sl.js')).runKloSl; + context.setExitCode(await runner(args, context.io)); +} + +export function registerSlCommands(program: Command, context: KloCliCommandContext, commandName = 'sl'): void { + const sl = program + .command(commandName) + .description('List, read, validate, query, or write local semantic-layer sources') + .showHelpAfterError() + .addHelpText( + 'after', + '\nProject directory defaults to KLO_PROJECT_DIR when set, otherwise the current working directory.\n', + ); + + sl.command('list') + .description('List semantic-layer sources') + .option('--connection-id ', 'KLO connection id') + .addOption( + new Option('--output ', 'Output mode: pretty (default in TTY), plain (TSV), or json').choices([ + 'pretty', + 'plain', + 'json', + ]), + ) + .option('--json', 'Shortcut for --output=json (overrides --output)', false) + .action(async (options: { connectionId?: string; output?: 'pretty' | 'plain' | 'json'; json?: boolean }, command) => { + await runSlArgs(context, { + command: 'list', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + output: options.output, + json: options.json, + }); + }); + + sl.command('read') + .description('Read a semantic-layer source') + .argument('', 'Semantic-layer source name') + .requiredOption('--connection-id ', 'KLO connection id') + .action(async (sourceName: string, options: { connectionId: string }, command) => { + await runSlArgs(context, { + command: 'read', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + sourceName, + }); + }); + + sl.command('validate') + .description('Validate a semantic-layer source') + .argument('', 'Semantic-layer source name') + .requiredOption('--connection-id ', 'KLO connection id') + .action(async (sourceName: string, options: { connectionId: string }, command) => { + await runSlArgs(context, { + command: 'validate', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + sourceName, + }); + }); + + sl.command('write') + .description('Write a semantic-layer source') + .argument('', 'Semantic-layer source name') + .requiredOption('--connection-id ', 'KLO connection id') + .requiredOption('--yaml ', 'Semantic-layer source YAML') + .action(async (sourceName: string, options: { connectionId: string; yaml: string }, command) => { + await runSlArgs(context, { + command: 'write', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + sourceName, + yaml: options.yaml, + }); + }); + + sl.command('query') + .description('Compile or execute a semantic-layer query') + .option('--connection-id ', 'KLO connection id') + .option('--measure ', 'Measure to query; repeatable', collectOption, []) + .option('--dimension ', 'Dimension to include; repeatable', collectOption, []) + .option('--filter ', 'Filter expression; repeatable', collectOption, []) + .option('--segment ', 'Segment to include; repeatable', collectOption, []) + .option('--order-by ', 'Order field, optionally suffixed with :asc or :desc', collectOrderBy, []) + .option('--limit ', 'Query limit', parsePositiveIntegerOption) + .option('--include-empty', 'Include empty rows', false) + .addOption(new Option('--format ', 'json or sql').choices(['json', 'sql']).default('json')) + .option('--execute', 'Execute the compiled query', false) + .option('--max-rows ', 'Maximum rows to return when executing', parsePositiveIntegerOption) + .action(async (options, command) => { + if (options.measure.length === 0) { + throw new Error('sl query requires at least one --measure'); + } + const args = slQueryCommandSchema.parse({ + command: 'query', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + query: { + measures: options.measure, + dimensions: options.dimension, + ...(options.filter.length > 0 ? { filters: options.filter } : {}), + ...(options.segment.length > 0 ? { segments: options.segment } : {}), + ...(options.orderBy.length > 0 ? { order_by: options.orderBy } : {}), + ...(options.limit !== undefined ? { limit: options.limit } : {}), + ...(options.includeEmpty === true ? { include_empty: true } : {}), + }, + format: options.format, + execute: options.execute === true, + ...(options.maxRows !== undefined ? { maxRows: options.maxRows } : {}), + }); + await runSlArgs(context, args); + }); +} diff --git a/packages/cli/src/commands/status-commands.ts b/packages/cli/src/commands/status-commands.ts new file mode 100644 index 00000000..4eb08798 --- /dev/null +++ b/packages/cli/src/commands/status-commands.ts @@ -0,0 +1,23 @@ +import type { Command } from '@commander-js/extra-typings'; +import type { KloCliCommandContext } from '../cli-program.js'; +import { resolveCommandProjectDir } from '../cli-program.js'; + +export function registerStatusCommands(program: Command, context: KloCliCommandContext): void { + program + .command('status') + .description('Show current KLO project setup status') + .option('--json', 'Print JSON output', false) + .action(async (options: { json?: boolean }, command) => { + const runner = context.deps.setup ?? (await import('../setup.js')).runKloSetup; + context.setExitCode( + await runner( + { + command: 'status', + projectDir: resolveCommandProjectDir(command), + json: options.json === true, + }, + context.io, + ), + ); + }); +} diff --git a/packages/cli/src/completion.ts b/packages/cli/src/completion.ts new file mode 100644 index 00000000..70f03675 --- /dev/null +++ b/packages/cli/src/completion.ts @@ -0,0 +1,353 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { homedir } from 'node:os'; +import { dirname, join } from 'node:path'; +import type { CommandUnknownOpts, Option } from '@commander-js/extra-typings'; + +export interface CompletionRequest { + position: number; + words: string[]; +} + +interface CompletionCandidate { + value: string; + description?: string; +} + +interface CommandWithHiddenFlag extends CommandUnknownOpts { + _hidden?: boolean; +} + +interface ResolveState { + command: CommandUnknownOpts; + pendingOption?: Option; + positionalIndex: number; +} + +export interface ZshCompletionInstallResult { + completionPath: string; + zshrcPath: string; +} + +const KLO_COMPLETION_BLOCK_START = '# >>> klo completion >>>'; +const KLO_COMPLETION_BLOCK_END = '# <<< klo completion <<<'; +const KLO_COMPLETION_BLOCK_PATTERN = new RegExp( + `\\n?${escapeRegExp(KLO_COMPLETION_BLOCK_START)}[\\s\\S]*?${escapeRegExp(KLO_COMPLETION_BLOCK_END)}\\n?`, + 'g', +); + +export function zshCompletionScript(): string { + const zshWords = '$' + '{words[@]}'; + const zshCompletionCapture = [ + '$', + `{(@f)$("${'$'}{klo_completion_command[@]}" dev __complete --shell zsh --position "$CURRENT" -- "${zshWords}" 2>/dev/null)}`, + ].join(''); + const zshCompletionsCount = '$' + '{#completions[@]}'; + const zshCompletionCommand = '$' + '(eval "print -r -- $' + '{KLO_COMPLETION_COMMAND:-klo}")'; + + return [ + '#compdef klo', + '', + '_klo() {', + ' local -a completions', + ' local -a klo_completion_command', + ` klo_completion_command=("\${(@z)${zshCompletionCommand}}")`, + ` completions=("${zshCompletionCapture}")`, + ` if (( ${zshCompletionsCount} )); then`, + " _describe 'klo completions' completions", + ' else', + ' _files', + ' fi', + '}', + '', + 'compdef _klo klo', + '', + ].join('\n'); +} + +export async function installZshCompletion(): Promise { + const homeDir = process.env.HOME || homedir(); + const zshConfigDir = process.env.ZDOTDIR || homeDir; + const completionDir = join(homeDir, '.zfunc'); + const completionPath = join(completionDir, '_klo'); + const zshrcPath = join(zshConfigDir, '.zshrc'); + + await mkdir(completionDir, { recursive: true }); + await mkdir(dirname(zshrcPath), { recursive: true }); + await writeFile(completionPath, zshCompletionScript(), 'utf-8'); + + const existingZshrc = await readOptionalTextFile(zshrcPath); + const nextZshrc = updateZshrcCompletionBlock(existingZshrc); + await writeFile(zshrcPath, nextZshrc, 'utf-8'); + + return { completionPath, zshrcPath }; +} + +export function completeCommanderInput(program: CommandUnknownOpts, request: CompletionRequest): string[] { + const words = completionWordsForPosition(request.words, request.position); + const tokens = stripProgramName(program, words); + const current = tokens.at(-1) ?? ''; + const previous = tokens.slice(0, -1); + const state = resolveCommandState(program, previous); + + return candidatesForState(state, current).map(formatZshCandidate); +} + +function completionWordsForPosition(words: string[], position: number): string[] { + if (!Number.isInteger(position) || position < 1) { + return words; + } + return words.slice(0, position); +} + +function stripProgramName(program: CommandUnknownOpts, words: string[]): string[] { + const [first, ...rest] = words; + if (!first) { + return []; + } + return first === program.name() || first.endsWith(`/${program.name()}`) ? rest : words; +} + +function resolveCommandState(program: CommandUnknownOpts, tokens: string[]): ResolveState { + let command = program; + let positionalIndex = 0; + let pendingOption: Option | undefined; + let positionalOnly = false; + + for (let index = 0; index < tokens.length; index += 1) { + const token = tokens[index]; + if (pendingOption) { + pendingOption = undefined; + continue; + } + + if (token === '--') { + positionalOnly = true; + continue; + } + + if (!positionalOnly && token.startsWith('-')) { + const option = findOption(command, optionNameFromToken(token)); + if (option && !token.includes('=') && optionTakesValue(option)) { + if (index === tokens.length - 1) { + pendingOption = option; + } else if (option.required || !tokens[index + 1]?.startsWith('-')) { + index += 1; + } + } + continue; + } + + const child = findVisibleSubcommand(command, token); + if (child) { + command = child; + positionalIndex = 0; + continue; + } + + positionalIndex += 1; + } + + return { command, pendingOption, positionalIndex }; +} + +function candidatesForState(state: ResolveState, current: string): CompletionCandidate[] { + const optionValue = splitOptionValueToken(current); + if (optionValue) { + const option = findOption(state.command, optionValue.optionName); + return choiceCandidates(option?.argChoices, optionValue.valuePrefix, optionValue.optionPrefix); + } + + if (state.pendingOption) { + return choiceCandidates(state.pendingOption.argChoices, current); + } + + if (current.startsWith('-')) { + return visibleOptions(state.command) + .map(optionCandidate) + .filter((candidate) => candidate.value.startsWith(current)); + } + + const commandCandidates = visibleSubcommands(state.command) + .map(commandCandidate) + .filter((candidate) => candidate.value.startsWith(current)); + const argument = state.command.registeredArguments[state.positionalIndex]; + return [...commandCandidates, ...choiceCandidates(argument?.argChoices, current)]; +} + +function visibleSubcommands(command: CommandUnknownOpts): CommandUnknownOpts[] { + return command.commands.filter((subcommand) => (subcommand as CommandWithHiddenFlag)._hidden !== true); +} + +function findVisibleSubcommand(command: CommandUnknownOpts, name: string): CommandUnknownOpts | undefined { + return visibleSubcommands(command).find( + (subcommand) => subcommand.name() === name || subcommand.aliases().includes(name), + ); +} + +function visibleOptions(command: CommandUnknownOpts): Option[] { + const options: Option[] = []; + const seen = new Set(); + for (const current of commandChain(command)) { + for (const option of current.options) { + if (option.hidden) { + continue; + } + const key = option.long ?? option.short ?? option.flags; + if (seen.has(key)) { + continue; + } + seen.add(key); + options.push(option); + } + } + return options; +} + +function commandChain(command: CommandUnknownOpts): CommandUnknownOpts[] { + const chain: CommandUnknownOpts[] = []; + let current: CommandUnknownOpts | null = command; + while (current) { + chain.unshift(current); + current = current.parent; + } + return chain; +} + +function findOption(command: CommandUnknownOpts, name: string): Option | undefined { + return visibleOptions(command).find((option) => option.long === name || option.short === name); +} + +function optionTakesValue(option: Option): boolean { + return option.required || option.optional; +} + +function optionNameFromToken(token: string): string { + return token.split('=', 1)[0] ?? token; +} + +function splitOptionValueToken( + token: string, +): { optionName: string; optionPrefix: string; valuePrefix: string } | null { + const separatorIndex = token.indexOf('='); + if (!token.startsWith('-') || separatorIndex < 0) { + return null; + } + return { + optionName: token.slice(0, separatorIndex), + optionPrefix: token.slice(0, separatorIndex + 1), + valuePrefix: token.slice(separatorIndex + 1), + }; +} + +function commandCandidate(command: CommandUnknownOpts): CompletionCandidate { + return { + value: command.name(), + description: command.summary() || command.description(), + }; +} + +function optionCandidate(option: Option): CompletionCandidate { + return { + value: option.long ?? option.short ?? option.flags, + description: option.description, + }; +} + +function choiceCandidates( + choices: readonly string[] | undefined, + prefix: string, + completionPrefix = '', +): CompletionCandidate[] { + return (choices ?? []) + .filter((choice) => choice.startsWith(prefix)) + .map((choice) => ({ value: `${completionPrefix}${choice}` })); +} + +function formatZshCandidate(candidate: CompletionCandidate): string { + if (!candidate.description) { + return escapeZshCompletion(candidate.value); + } + return `${escapeZshCompletion(candidate.value)}:${escapeZshDescription(candidate.description)}`; +} + +function escapeZshCompletion(value: string): string { + return value.replace(/\\/g, '\\\\').replace(/:/g, '\\:'); +} + +function escapeZshDescription(value: string): string { + return value.replace(/\s+/g, ' ').replace(/\\/g, '\\\\').replace(/:/g, '\\:').trim(); +} + +async function readOptionalTextFile(path: string): Promise { + try { + return await readFile(path, 'utf-8'); + } catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return ''; + } + throw error; + } +} + +function updateZshrcCompletionBlock(contents: string): string { + const withoutManagedBlock = contents.replace(KLO_COMPLETION_BLOCK_PATTERN, normalizeTrailingNewline); + const hasCompinit = /^.*\bcompinit\b.*$/m.test(withoutManagedBlock); + const block = zshrcCompletionBlock({ includeCompinit: !hasCompinit }); + + if (!hasCompinit) { + return appendBlock(withoutManagedBlock, block); + } + + const compinitMatch = /^.*\bcompinit\b.*$/m.exec(withoutManagedBlock); + if (!compinitMatch || compinitMatch.index === undefined) { + return appendBlock(withoutManagedBlock, block); + } + + return [ + withoutManagedBlock.slice(0, compinitMatch.index), + block, + '\n', + withoutManagedBlock.slice(compinitMatch.index), + ].join(''); +} + +function zshrcCompletionBlock(options: { includeCompinit: boolean }): string { + return [ + KLO_COMPLETION_BLOCK_START, + '_klo_completion_command() {', + ' local dir="$PWD"', + ' while [[ "$dir" != "/" ]]; do', + ` if [[ -f "$dir/package.json" ]] && command grep -q '"name": "klo-workspace"' "$dir/package.json" 2>/dev/null; then`, + ' print -r -- "node $dir/scripts/run-klo.mjs --"', + ' return', + ' fi', + ' dir="' + '$' + '{dir:h}"', + ' done', + ' print -r -- "klo"', + '}', + "export KLO_COMPLETION_COMMAND='$(_klo_completion_command)'", + 'setopt complete_aliases', + 'fpath=("$HOME/.zfunc" $fpath)', + ...(options.includeCompinit ? ['autoload -Uz compinit', 'compinit'] : []), + KLO_COMPLETION_BLOCK_END, + ].join('\n'); +} + +function appendBlock(contents: string, block: string): string { + if (!contents.trim()) { + return `${block}\n`; + } + return `${contents.replace(/\s*$/, '\n\n')}${block}\n`; +} + +function normalizeTrailingNewline(match: string): string { + return match.startsWith('\n') || match.endsWith('\n') ? '\n' : ''; +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function isNodeError(error: unknown): error is NodeJS.ErrnoException { + return error instanceof Error && 'code' in error; +} diff --git a/packages/cli/src/connection.test.ts b/packages/cli/src/connection.test.ts new file mode 100644 index 00000000..94f07b86 --- /dev/null +++ b/packages/cli/src/connection.test.ts @@ -0,0 +1,649 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { initKloProject, parseKloProjectConfig } from '@klo/context/project'; +import type { KloConnectionDriver, KloScanConnector, KloSchemaSnapshot } from '@klo/context/scan'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runKloConnection } from './connection.js'; +import { runKloCli, type KloCliIo } from './index.js'; + +function makeIo(options: { stdoutIsTty?: boolean; stdinIsTty?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdin: { + isTTY: options.stdinIsTty, + }, + stdout: { + isTTY: options.stdoutIsTty, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function snapshotFor(driver: KloConnectionDriver, tableNames: string[]): KloSchemaSnapshot { + return { + connectionId: 'warehouse', + driver, + extractedAt: '2026-04-29T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: tableNames.map((name) => ({ + catalog: null, + db: null, + name, + kind: 'table', + comment: null, + estimatedRows: null, + columns: [], + foreignKeys: [], + })), + }; +} + +function nativeConnector(driver: KloConnectionDriver, tableNames: string[]) { + const introspect = vi.fn(async () => snapshotFor(driver, tableNames)); + const cleanup = vi.fn(async () => undefined); + const connector: KloScanConnector = { + id: `${driver}:warehouse`, + driver, + capabilities: { + structuralIntrospection: true, + tableSampling: false, + columnSampling: false, + columnStats: false, + readOnlySql: false, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: false, + }, + introspect, + cleanup, + }; + return { connector, introspect, cleanup }; +} + +describe('runKloConnection', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-connection-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('adds and lists env-referenced connections without resolving secrets', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const io = makeIo(); + + await expect( + runKloConnection( + { + command: 'add', + projectDir, + driver: 'postgres', + connectionId: 'warehouse', + url: 'env:DATABASE_URL', + schemas: ['public'], + readonly: true, + force: false, + allowLiteralCredentials: false, + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Connection: warehouse'); + await expect(readFile(join(projectDir, 'klo.yaml'), 'utf-8')).resolves.toContain('url: env:DATABASE_URL'); + + const listIo = makeIo(); + await expect(runKloConnection({ command: 'list', projectDir }, listIo.io)).resolves.toBe(0); + expect(listIo.stdout()).toContain('warehouse'); + expect(listIo.stdout()).toContain('postgres'); + }); + + it('removes a configured connection from klo.yaml without deleting local artifacts when forced', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await runKloConnection( + { + command: 'add', + projectDir, + driver: 'sqlite', + connectionId: 'warehouse', + url: undefined, + schemas: [], + readonly: true, + force: false, + allowLiteralCredentials: false, + }, + makeIo().io, + ); + const artifactPath = join(projectDir, '.klo', 'artifacts', 'warehouse.txt'); + await mkdir(join(projectDir, '.klo', 'artifacts'), { recursive: true }); + await writeFile(artifactPath, 'keep me', 'utf-8'); + + const io = makeIo(); + + await expect( + runKloConnection( + { + command: 'remove', + projectDir, + connectionId: 'warehouse', + force: true, + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + const parsed = parseKloProjectConfig(await readFile(join(projectDir, 'klo.yaml'), 'utf-8')); + expect(parsed.connections.warehouse).toBeUndefined(); + await expect(readFile(artifactPath, 'utf-8')).resolves.toBe('keep me'); + expect(io.stdout()).toContain('Connection removed from klo.yaml.'); + expect(io.stdout()).toContain( + 'Ingested artifacts from this connection remain in .klo/. Run klo dev artifacts to inspect.', + ); + expect(io.stderr()).toBe(''); + }); + + it('requires --force when removing in non-interactive mode', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await runKloConnection( + { + command: 'add', + projectDir, + driver: 'sqlite', + connectionId: 'warehouse', + url: undefined, + schemas: [], + readonly: true, + force: false, + allowLiteralCredentials: false, + }, + makeIo().io, + ); + const io = makeIo(); + + await expect( + runKloConnection( + { + command: 'remove', + projectDir, + connectionId: 'warehouse', + force: false, + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('connection remove warehouse requires --force when input is disabled or not interactive'); + }); + + it('returns a clear error when removing an unknown connection', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const io = makeIo(); + + await expect( + runKloConnection( + { + command: 'remove', + projectDir, + connectionId: 'missing', + force: true, + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Connection "missing" is not configured in klo.yaml'); + }); + + it('asks for confirmation before removing in an interactive terminal', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await runKloConnection( + { + command: 'add', + projectDir, + driver: 'sqlite', + connectionId: 'warehouse', + url: undefined, + schemas: [], + readonly: true, + force: false, + allowLiteralCredentials: false, + }, + makeIo().io, + ); + const io = makeIo({ stdoutIsTty: true, stdinIsTty: true }); + const prompts = { + confirm: vi.fn(async () => true), + cancel: vi.fn(), + }; + + await expect( + runKloConnection( + { + command: 'remove', + projectDir, + connectionId: 'warehouse', + force: false, + }, + io.io, + { prompts }, + ), + ).resolves.toBe(0); + + expect(prompts.confirm).toHaveBeenCalledWith({ + message: 'Remove connection "warehouse" from klo.yaml? Ingested artifacts will remain in .klo/.', + initialValue: false, + }); + }); + + it('runs public connect map as refresh, validate, and list over the low-level mapping runner', async () => { + const io = makeIo(); + const runMapping = vi.fn(async (argv: string[], mappingIo: KloCliIo) => { + if (argv[0] === 'refresh') { + mappingIo.stdout.write('Discovery: 1 database\n'); + mappingIo.stdout.write('Unmapped discovered: 1\n'); + mappingIo.stdout.write('Stale mappings: 0\n'); + return 0; + } + if (argv[0] === 'validate') { + mappingIo.stdout.write('Mapping validation passed: prod-metabase\n'); + return 0; + } + if (argv[0] === 'list') { + mappingIo.stdout.write('1 -> [unmapped] (Analytics, sync: on, source: refresh)\n'); + return 0; + } + return 1; + }); + + await expect( + runKloConnection( + { command: 'map', projectDir: '/tmp/project', sourceConnectionId: 'prod-metabase', json: false }, + io.io, + { runMapping }, + ), + ).resolves.toBe(0); + + expect(runMapping).toHaveBeenNthCalledWith( + 1, + ['refresh', 'prod-metabase', '--auto-accept', '--project-dir', '/tmp/project'], + expect.any(Object), + ); + expect(runMapping).toHaveBeenNthCalledWith( + 2, + ['validate', 'prod-metabase', '--project-dir', '/tmp/project'], + expect.any(Object), + ); + expect(runMapping).toHaveBeenNthCalledWith( + 3, + ['list', 'prod-metabase', '--project-dir', '/tmp/project'], + expect.any(Object), + ); + expect(io.stdout()).toContain('Mapping: prod-metabase'); + expect(io.stdout()).toContain('Discovery: 1 database'); + expect(io.stdout()).toContain('Mappings:'); + expect(io.stdout()).toContain('1 -> [unmapped]'); + expect(io.stdout()).toContain('Next:'); + expect(io.stdout()).toContain('klo ingest prod-metabase'); + expect(io.stdout()).toContain('klo dev mapping'); + expect(io.stderr()).toBe(''); + }); + + it('prints stable JSON for public connect map without leaking low-level stdout', async () => { + const io = makeIo(); + const runMapping = vi.fn(async (argv: string[], mappingIo: KloCliIo) => { + if (argv[0] === 'refresh') { + mappingIo.stdout.write('Discovery: 1 connection\nUnmapped discovered: 0\nStale mappings: 0\n'); + return 0; + } + if (argv[0] === 'validate') { + mappingIo.stdout.write('Mapping validation passed: prod-looker\n'); + return 0; + } + if (argv[0] === 'list') { + expect(argv).toContain('--json'); + mappingIo.stdout.write( + `${JSON.stringify( + [ + { + lookerConnectionName: 'analytics', + kloConnectionId: 'prod-warehouse', + source: 'klo.yaml', + }, + ], + null, + 2, + )}\n`, + ); + return 0; + } + return 1; + }); + + await expect( + runKloConnection( + { command: 'map', projectDir: '/tmp/project', sourceConnectionId: 'prod-looker', json: true }, + io.io, + { runMapping }, + ), + ).resolves.toBe(0); + + const parsed = JSON.parse(io.stdout()) as { + connectionId: string; + refresh: { ok: boolean; output: string[] }; + validation: { ok: boolean; output: string[] }; + mappings: Array<{ lookerConnectionName: string; kloConnectionId: string; source: string }>; + }; + expect(parsed).toEqual({ + connectionId: 'prod-looker', + refresh: { + ok: true, + output: ['Discovery: 1 connection', 'Unmapped discovered: 0', 'Stale mappings: 0'], + }, + validation: { + ok: true, + output: ['Mapping validation passed: prod-looker'], + }, + mappings: [ + { + lookerConnectionName: 'analytics', + kloConnectionId: 'prod-warehouse', + source: 'klo.yaml', + }, + ], + }); + expect(io.stderr()).toBe(''); + }); + + it('returns the refresh failure when public connect map cannot discover source metadata', async () => { + const io = makeIo(); + const runMapping = vi.fn(async (argv: string[], mappingIo: KloCliIo) => { + if (argv[0] === 'refresh') { + mappingIo.stderr.write('Metabase API key is not configured\n'); + return 1; + } + return 0; + }); + + await expect( + runKloConnection( + { command: 'map', projectDir: '/tmp/project', sourceConnectionId: 'prod-metabase', json: false }, + io.io, + { runMapping }, + ), + ).resolves.toBe(1); + + expect(runMapping).toHaveBeenCalledTimes(1); + expect(io.stdout()).toBe(''); + expect(io.stderr()).toContain('Metabase API key is not configured'); + }); + + it('rejects literal credential URLs unless explicitly allowed', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const io = makeIo(); + + await expect( + runKloConnection( + { + command: 'add', + projectDir, + driver: 'postgres', + connectionId: 'warehouse', + url: 'postgres://localhost:5432/warehouse', + schemas: [], + readonly: true, + force: false, + allowLiteralCredentials: false, + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Literal credential URLs require --allow-literal-credentials'); + }); + + it('warns before writing explicitly allowed literal credential URLs without echoing the URL', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const io = makeIo(); + const literalUrl = 'postgres://localhost:5432/warehouse'; + + await expect( + runKloConnection( + { + command: 'add', + projectDir, + driver: 'postgres', + connectionId: 'warehouse', + url: literalUrl, + schemas: ['public'], + readonly: true, + force: false, + allowLiteralCredentials: true, + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toContain( + 'Warning: writing a literal credential URL to klo.yaml for connection "warehouse". Prefer env:NAME or file:/path references.', + ); + expect(io.stderr()).not.toContain(literalUrl); + await expect(readFile(join(projectDir, 'klo.yaml'), 'utf-8')).resolves.toContain(literalUrl); + }); + + it('adds a Notion connection without writing token values', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const io = makeIo(); + + await expect( + runKloConnection( + { + command: 'add', + projectDir, + driver: 'notion', + connectionId: 'notion-main', + url: undefined, + schemas: [], + readonly: false, + force: false, + allowLiteralCredentials: false, + notion: { + authTokenRef: 'env:NOTION_AUTH_TOKEN', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 50, + maxKnowledgeCreatesPerRun: 4, + maxKnowledgeUpdatesPerRun: 12, + }, + }, + io.io, + ), + ).resolves.toBe(0); + + const yaml = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(yaml).toContain('driver: notion'); + expect(yaml).toContain('auth_token_ref: env:NOTION_AUTH_TOKEN'); + expect(yaml).toContain('crawl_mode: all_accessible'); + expect(yaml).toContain('max_pages_per_run: 50'); + expect(yaml).not.toContain('ntn_'); + expect(io.stdout()).toContain('Connection: notion-main'); + expect(io.stdout()).toContain('Driver: notion'); + }); + + it('runs connection notion pick --no-input through the public connection entrypoint', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await runKloConnection( + { + command: 'add', + projectDir, + driver: 'notion', + connectionId: 'notion-main', + url: undefined, + schemas: [], + readonly: false, + force: false, + allowLiteralCredentials: false, + notion: { + authTokenRef: 'env:NOTION_AUTH_TOKEN', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: ['database-1'], + rootDataSourceIds: ['data-source-1'], + maxPagesPerRun: 50, + maxKnowledgeCreatesPerRun: 4, + maxKnowledgeUpdatesPerRun: 12, + }, + }, + makeIo().io, + ); + const io = makeIo(); + + await expect( + runKloCli( + [ + 'connection', + 'notion', + 'pick', + 'notion-main', + '--project-dir', + projectDir, + '--no-input', + '--root-page-id', + '11111111222233334444555555555555', + ], + io.io, + ), + ).resolves.toBe(0); + + const yaml = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(yaml).toContain('crawl_mode: selected_roots'); + expect(yaml).toContain('11111111-2222-3333-4444-555555555555'); + expect(yaml).toContain('database-1'); + expect(yaml).toContain('data-source-1'); + expect(io.stdout()).toContain('Connection: notion-main'); + }); + + it('tests a configured connection through the native scan connector', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await runKloConnection( + { + command: 'add', + projectDir, + driver: 'sqlite', + connectionId: 'warehouse', + url: undefined, + schemas: [], + readonly: true, + force: false, + allowLiteralCredentials: false, + }, + makeIo().io, + ); + const { connector, introspect, cleanup } = nativeConnector('sqlite', ['customers', 'orders']); + const createScanConnector = vi.fn(async () => connector); + const io = makeIo(); + + await expect( + runKloConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, { + createScanConnector, + }), + ).resolves.toBe(0); + + expect(createScanConnector).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'warehouse'); + expect(introspect).toHaveBeenCalledWith( + { + connectionId: 'warehouse', + driver: 'sqlite', + mode: 'structural', + dryRun: true, + detectRelationships: false, + }, + { runId: 'connection-test-warehouse' }, + ); + expect(cleanup).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('Connection test passed: warehouse'); + expect(io.stdout()).toContain('Driver: sqlite'); + expect(io.stdout()).toContain('Tables: 2'); + }); + + it('cleans up the native scan connector when connection testing fails', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await runKloConnection( + { + command: 'add', + projectDir, + driver: 'sqlite', + connectionId: 'warehouse', + url: undefined, + schemas: [], + readonly: true, + force: false, + allowLiteralCredentials: false, + }, + makeIo().io, + ); + const cleanup = vi.fn(async () => undefined); + const connector: KloScanConnector = { + id: 'sqlite:warehouse', + driver: 'sqlite', + capabilities: { + structuralIntrospection: true, + tableSampling: false, + columnSampling: false, + columnStats: false, + readOnlySql: false, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: false, + }, + introspect: vi.fn(async () => { + throw new Error('database file is unreadable'); + }), + cleanup, + }; + const io = makeIo(); + + await expect( + runKloConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, { + createScanConnector: vi.fn(async () => connector), + }), + ).resolves.toBe(1); + + expect(cleanup).toHaveBeenCalledTimes(1); + expect(io.stderr()).toContain('database file is unreadable'); + }); +}); diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts new file mode 100644 index 00000000..421c3230 --- /dev/null +++ b/packages/cli/src/connection.ts @@ -0,0 +1,415 @@ +import { cancel, confirm, isCancel } from '@clack/prompts'; +import { type KloLocalProject, loadKloProject, serializeKloProjectConfig } from '@klo/context/project'; +import type { KloScanConnector } from '@klo/context/scan'; +import type { KloConnectionMappingArgs } from './commands/connection-mapping.js'; +import type { KloCliIo } from './index.js'; +import { createKloCliScanConnector } from './local-scan-connectors.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:connection'); + +interface KloNotionConnectionCliConfig { + authTokenRef: string; + crawlMode: 'all_accessible' | 'selected_roots'; + rootPageIds: string[]; + rootDatabaseIds: string[]; + rootDataSourceIds: string[]; + maxPagesPerRun?: number; + maxKnowledgeCreatesPerRun?: number; + maxKnowledgeUpdatesPerRun?: number; +} + +type KloConnectionInputMode = 'disabled'; + +export type KloConnectionArgs = + | { command: 'list'; projectDir: string } + | { + command: 'add'; + projectDir: string; + driver: string; + connectionId: string; + url?: string; + schemas: string[]; + readonly: boolean; + force: boolean; + allowLiteralCredentials: boolean; + notion?: KloNotionConnectionCliConfig; + } + | { command: 'test'; projectDir: string; connectionId: string } + | { + command: 'remove'; + projectDir: string; + connectionId: string; + force: boolean; + inputMode?: KloConnectionInputMode; + } + | { + command: 'map'; + projectDir: string; + sourceConnectionId: string; + json: boolean; + }; + +interface KloConnectionPromptAdapter { + confirm(options: { message: string; initialValue?: boolean }): Promise; + cancel(message: string): void; +} + +interface KloConnectionIo extends KloCliIo { + stdin?: { isTTY?: boolean }; +} + +interface KloConnectionDeps { + createScanConnector?: typeof createKloCliScanConnector; + runMapping?: (argv: string[], io: KloCliIo) => Promise; + prompts?: KloConnectionPromptAdapter; +} + +function assertSafeConnectionId(connectionId: string): void { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } +} + +function isCredentialReference(value: string): boolean { + return value.startsWith('env:') || value.startsWith('file:'); +} + +function literalCredentialWarning(connectionId: string): string { + return `Warning: writing a literal credential URL to klo.yaml for connection "${connectionId}". Prefer env:NAME or file:/path references.`; +} + +function createClackConnectionPromptAdapter(): KloConnectionPromptAdapter { + return { + async confirm(options: { message: string; initialValue?: boolean }): Promise { + const value = await confirm(options); + return isCancel(value) ? false : value; + }, + cancel(message: string): void { + cancel(message); + }, + }; +} + +function isInteractiveConnectionIo( + args: Extract, + io: KloConnectionIo, +): boolean { + return args.inputMode !== 'disabled' && io.stdin?.isTTY === true && io.stdout.isTTY === true; +} + +async function cleanupConnector(connector: KloScanConnector | null): Promise { + if (connector?.cleanup) { + await connector.cleanup(); + } +} + +async function testNativeConnection( + project: KloLocalProject, + connectionId: string, + createScanConnector: typeof createKloCliScanConnector, +): Promise<{ driver: string; tableCount: number }> { + let connector: KloScanConnector | null = null; + try { + connector = await createScanConnector(project, connectionId); + const snapshot = await connector.introspect( + { + connectionId, + driver: connector.driver, + mode: 'structural', + dryRun: true, + detectRelationships: false, + }, + { runId: `connection-test-${connectionId}` }, + ); + return { + driver: connector.driver, + tableCount: snapshot.tables.length, + }; + } finally { + await cleanupConnector(connector); + } +} + +interface BufferedIo extends KloCliIo { + stdoutText(): string; + stderrText(): string; +} + +function createBufferedIo(): BufferedIo { + let stdout = ''; + let stderr = ''; + return { + stdout: { + write(chunk: string) { + stdout += chunk; + }, + }, + stderr: { + write(chunk: string) { + stderr += chunk; + }, + }, + stdoutText() { + return stdout; + }, + stderrText() { + return stderr; + }, + }; +} + +function splitOutputLines(output: string): string[] { + return output + .split('\n') + .map((line) => line.trim()) + .filter(Boolean); +} + +async function runLowLevelMapping( + args: KloConnectionMappingArgs, + argv: string[], + io: KloCliIo, + deps: KloConnectionDeps, +): Promise { + if (deps.runMapping) { + return await deps.runMapping(argv, io); + } + + const { runKloConnectionMapping } = await import('./commands/connection-mapping.js'); + return await runKloConnectionMapping(args, io); +} + +function parseMappingListJson(output: string): unknown[] { + const trimmed = output.trim(); + if (!trimmed) { + return []; + } + const parsed = JSON.parse(trimmed) as unknown; + return Array.isArray(parsed) ? parsed : []; +} + +async function runPublicConnectionMap( + args: Extract, + io: KloCliIo, + deps: KloConnectionDeps, +): Promise { + const refreshIo = createBufferedIo(); + const refreshArgs: KloConnectionMappingArgs = { + command: 'refresh', + projectDir: args.projectDir, + connectionId: args.sourceConnectionId, + autoAccept: true, + }; + const refreshCode = await runLowLevelMapping( + refreshArgs, + ['refresh', args.sourceConnectionId, '--auto-accept', '--project-dir', args.projectDir], + refreshIo, + deps, + ); + if (refreshCode !== 0) { + io.stderr.write( + refreshIo.stderrText() || + refreshIo.stdoutText() || + `Failed to refresh mapping metadata for ${args.sourceConnectionId}\n`, + ); + return refreshCode; + } + + const validationIo = createBufferedIo(); + const validationArgs: KloConnectionMappingArgs = { + command: 'validate', + projectDir: args.projectDir, + connectionId: args.sourceConnectionId, + }; + const validationCode = await runLowLevelMapping( + validationArgs, + ['validate', args.sourceConnectionId, '--project-dir', args.projectDir], + validationIo, + deps, + ); + if (validationCode !== 0) { + io.stderr.write( + validationIo.stderrText() || validationIo.stdoutText() || `Mapping validation failed for ${args.sourceConnectionId}\n`, + ); + return validationCode; + } + + const listIo = createBufferedIo(); + const listArgv = ['list', args.sourceConnectionId, '--project-dir', args.projectDir]; + const listArgs: KloConnectionMappingArgs = { + command: 'list', + projectDir: args.projectDir, + connectionId: args.sourceConnectionId, + json: args.json, + }; + const listCode = await runLowLevelMapping(listArgs, args.json ? [...listArgv, '--json'] : listArgv, listIo, deps); + if (listCode !== 0) { + io.stderr.write(listIo.stderrText() || listIo.stdoutText() || `Failed to list mappings for ${args.sourceConnectionId}\n`); + return listCode; + } + + if (args.json) { + io.stdout.write( + `${JSON.stringify( + { + connectionId: args.sourceConnectionId, + refresh: { ok: true, output: splitOutputLines(refreshIo.stdoutText()) }, + validation: { ok: true, output: splitOutputLines(validationIo.stdoutText()) }, + mappings: parseMappingListJson(listIo.stdoutText()), + }, + null, + 2, + )}\n`, + ); + return 0; + } + + io.stdout.write(`Mapping: ${args.sourceConnectionId}\n`); + io.stdout.write(refreshIo.stdoutText()); + io.stdout.write(validationIo.stdoutText()); + io.stdout.write('\nMappings:\n'); + io.stdout.write(listIo.stdoutText().trim() ? listIo.stdoutText() : 'No mappings found.\n'); + io.stdout.write('\nNext:\n'); + io.stdout.write(` klo ingest ${args.sourceConnectionId}\n`); + io.stdout.write(` klo dev mapping list ${args.sourceConnectionId}\n`); + return 0; +} + +export async function runKloConnection( + args: KloConnectionArgs, + io: KloConnectionIo = process, + deps: KloConnectionDeps = {}, +): Promise { + try { + if (args.command === 'map') { + return await runPublicConnectionMap(args, io, deps); + } + + const project = await loadKloProject({ projectDir: args.projectDir }); + if (args.command === 'list') { + const entries = Object.entries(project.config.connections).sort(([a], [b]) => a.localeCompare(b)); + if (entries.length === 0) { + io.stdout.write('No connections configured. Run `klo connection add --driver ` to add one.\n'); + return 0; + } + const idWidth = Math.max('ID'.length, ...entries.map(([id]) => id.length)); + const driverWidth = Math.max( + 'DRIVER'.length, + ...entries.map(([, c]) => (c.driver ?? 'unknown').length), + ); + io.stdout.write(`${'ID'.padEnd(idWidth)} ${'DRIVER'.padEnd(driverWidth)}\n`); + for (const [id, connection] of entries) { + io.stdout.write(`${id.padEnd(idWidth)} ${(connection.driver ?? 'unknown').padEnd(driverWidth)}\n`); + } + return 0; + } + + if (args.command === 'add') { + assertSafeConnectionId(args.connectionId); + const hasLiteralCredentialUrl = !!args.url && !isCredentialReference(args.url); + if (hasLiteralCredentialUrl && !args.allowLiteralCredentials) { + throw new Error('Literal credential URLs require --allow-literal-credentials'); + } + if (hasLiteralCredentialUrl) { + io.stderr.write(`${literalCredentialWarning(args.connectionId)}\n`); + } + if (project.config.connections[args.connectionId] && !args.force) { + throw new Error(`Connection "${args.connectionId}" already exists; pass --force to replace it`); + } + const connectionConfig = + args.driver === 'notion' && args.notion + ? { + driver: 'notion', + auth_token_ref: args.notion.authTokenRef, + crawl_mode: args.notion.crawlMode, + root_page_ids: args.notion.rootPageIds, + root_database_ids: args.notion.rootDatabaseIds, + root_data_source_ids: args.notion.rootDataSourceIds, + ...(args.notion.maxPagesPerRun !== undefined ? { max_pages_per_run: args.notion.maxPagesPerRun } : {}), + ...(args.notion.maxKnowledgeCreatesPerRun !== undefined + ? { max_knowledge_creates_per_run: args.notion.maxKnowledgeCreatesPerRun } + : {}), + ...(args.notion.maxKnowledgeUpdatesPerRun !== undefined + ? { max_knowledge_updates_per_run: args.notion.maxKnowledgeUpdatesPerRun } + : {}), + } + : { + driver: args.driver, + ...(args.url ? { url: args.url } : {}), + ...(args.schemas.length > 0 ? { schemas: args.schemas } : {}), + readonly: args.readonly, + }; + const nextConfig = { + ...project.config, + connections: { + ...project.config.connections, + [args.connectionId]: connectionConfig, + }, + }; + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig(nextConfig), + 'klo', + 'klo@example.com', + `Update KLO connection: ${args.connectionId}`, + ); + io.stdout.write(`Connection: ${args.connectionId}\n`); + io.stdout.write(`Driver: ${args.driver}\n`); + return 0; + } + + if (args.command === 'remove') { + if (!project.config.connections[args.connectionId]) { + throw new Error(`Connection "${args.connectionId}" is not configured in klo.yaml`); + } + + if (!args.force) { + if (!isInteractiveConnectionIo(args, io)) { + throw new Error( + `connection remove ${args.connectionId} requires --force when input is disabled or not interactive`, + ); + } + + const prompts = deps.prompts ?? createClackConnectionPromptAdapter(); + const confirmed = await prompts.confirm({ + message: `Remove connection "${args.connectionId}" from klo.yaml? Ingested artifacts will remain in .klo/.`, + initialValue: false, + }); + if (!confirmed) { + prompts.cancel('Connection removal cancelled.'); + return 1; + } + } + + const { [args.connectionId]: _removedConnection, ...connections } = project.config.connections; + const nextConfig = { + ...project.config, + connections, + }; + await project.fileStore.writeFile( + 'klo.yaml', + serializeKloProjectConfig(nextConfig), + 'klo', + 'klo@example.com', + `Remove KLO connection: ${args.connectionId}`, + ); + io.stdout.write('Connection removed from klo.yaml.\n'); + io.stdout.write('Ingested artifacts from this connection remain in .klo/. Run klo dev artifacts to inspect.\n'); + return 0; + } + + const result = await testNativeConnection( + project, + args.connectionId, + deps.createScanConnector ?? createKloCliScanConnector, + ); + io.stdout.write(`Connection test passed: ${args.connectionId}\n`); + io.stdout.write(`Driver: ${result.driver}\n`); + io.stdout.write(`Tables: ${result.tableCount}\n`); + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts new file mode 100644 index 00000000..1176113c --- /dev/null +++ b/packages/cli/src/context-build-view.test.ts @@ -0,0 +1,303 @@ +import { buildDefaultKloProjectConfig, type KloProjectConfig } from '@klo/context/project'; +import { describe, expect, it, vi } from 'vitest'; +import type { KloPublicIngestProject, KloPublicIngestTargetResult } from './public-ingest.js'; +import { + extractProgressMessage, + initViewState, + parseIngestSummary, + parseScanSummary, + renderContextBuildView, + runContextBuild, +} from './context-build-view.js'; + +function makeIo(options: { isTTY?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: options.isTTY, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function projectWithConnections(connections: KloProjectConfig['connections']): KloPublicIngestProject { + return { + projectDir: '/tmp/project', + config: { + ...buildDefaultKloProjectConfig('warehouse'), + connections, + }, + }; +} + +function successResult(connectionId: string, driver: string, operation: 'scan' | 'source-ingest'): KloPublicIngestTargetResult { + return { + connectionId, + driver, + steps: [ + { operation: 'scan', status: operation === 'scan' ? 'done' : 'skipped' }, + { operation: 'source-ingest', status: operation === 'source-ingest' ? 'done' : 'skipped' }, + { operation: 'enrich', status: 'skipped' }, + { operation: 'memory-update', status: operation === 'source-ingest' ? 'done' : 'skipped' }, + ], + }; +} + +function failedResult(connectionId: string, driver: string, operation: 'scan' | 'source-ingest'): KloPublicIngestTargetResult { + return { + connectionId, + driver, + steps: [ + { operation: 'scan', status: operation === 'scan' ? 'failed' : 'skipped', detail: `${connectionId} failed at scan.` }, + { operation: 'source-ingest', status: operation === 'source-ingest' ? 'failed' : 'skipped' }, + { operation: 'enrich', status: 'skipped' }, + { operation: 'memory-update', status: 'not-run' }, + ], + }; +} + +describe('extractProgressMessage', () => { + it('extracts percentage and message from scan progress', () => { + expect(extractProgressMessage('\r[45%] Scanning tables...')).toBe('[45%] Scanning tables...'); + }); + + it('extracts from permanent progress lines', () => { + expect(extractProgressMessage('[100%] Done\n')).toBe('[100%] Done'); + }); + + it('returns null for non-progress output', () => { + expect(extractProgressMessage('KLO scan completed\n')).toBeNull(); + }); +}); + +describe('parseScanSummary', () => { + it('extracts table count from scan output', () => { + expect(parseScanSummary('Semantic layer comparison found 5 changes across 42 tables')).toBe('42 tables'); + }); + + it('handles singular form', () => { + expect(parseScanSummary('found 1 change across 1 table')).toBe('1 tables'); + }); + + it('returns null when no match', () => { + expect(parseScanSummary('No changes detected')).toBeNull(); + }); +}); + +describe('parseIngestSummary', () => { + it('extracts work units and saved memory', () => { + expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('5 work units · 3 wiki, 2 SL'); + }); + + it('extracts work units alone when no saved memory', () => { + expect(parseIngestSummary('Work units: 5\nStatus: done')).toBe('5 work units'); + }); + + it('extracts saved memory alone when no work units', () => { + expect(parseIngestSummary('Saved memory: 3 wiki, 2 SL')).toBe('3 wiki, 2 SL'); + }); + + it('returns null when no match', () => { + expect(parseIngestSummary('Status: done')).toBeNull(); + }); +}); + +describe('initViewState', () => { + it('partitions targets into primary and context sources', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + + expect(state.primarySources).toHaveLength(1); + expect(state.primarySources[0].target.connectionId).toBe('warehouse'); + expect(state.contextSources).toHaveLength(1); + expect(state.contextSources[0].target.connectionId).toBe('dbt-main'); + expect(state.frame).toBe(0); + }); +}); + +describe('renderContextBuildView', () => { + it('renders all-queued state', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Building KLO context'); + expect(output).toContain('Primary sources:'); + expect(output).toContain('warehouse'); + expect(output).toContain('queued'); + expect(output).toContain('Context sources:'); + expect(output).toContain('dbt-main'); + }); + + it('renders completed state with summary', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'done'; + state.primarySources[0].elapsedMs = 72000; + state.primarySources[0].summaryText = '42 tables'; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('42 tables'); + expect(output).toContain('1m12s'); + }); + + it('renders failed state', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + state.primarySources[0].status = 'failed'; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('✗'); + expect(output).toContain('failed'); + }); + + it('omits empty groups', () => { + const state = initViewState([ + { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).not.toContain('Primary sources:'); + expect(output).toContain('Context sources:'); + }); +}); + +describe('runContextBuild', () => { + it('executes scan targets before source-ingest targets', async () => { + const io = makeIo(); + const project = projectWithConnections({ + dbt_main: { driver: 'dbt' }, + warehouse: { driver: 'postgres' }, + }); + const callOrder: string[] = []; + const executeTarget = vi.fn(async (target) => { + callOrder.push(target.connectionId); + return successResult(target.connectionId, target.driver, target.operation); + }); + + const result = await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(result).toEqual({ exitCode: 0, detached: false }); + expect(callOrder).toEqual(['warehouse', 'dbt_main']); + }); + + it('returns exit code 1 when any target fails', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + }); + const executeTarget = vi.fn(async (target) => failedResult(target.connectionId, target.driver, target.operation)); + + const result = await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(result).toEqual({ exitCode: 1, detached: false }); + }); + + it('renders final view for non-TTY output', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + dbt_main: { driver: 'dbt' }, + }); + const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation)); + + await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + const output = io.stdout(); + expect(output).toContain('Building KLO context'); + expect(output).toContain('Primary sources:'); + expect(output).toContain('warehouse'); + expect(output).toContain('Context sources:'); + expect(output).toContain('dbt_main'); + }); + + it('passes scan mode and detect relationships through to target execution', async () => { + const io = makeIo(); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation)); + + await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled', scanMode: 'enriched', detectRelationships: true }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(executeTarget).toHaveBeenCalledWith( + expect.objectContaining({ connectionId: 'warehouse', operation: 'scan' }), + expect.objectContaining({ scanMode: 'enriched', detectRelationships: true }), + expect.anything(), + {}, + ); + }); + + it('exits immediately with paused message when d is pressed', async () => { + const mockExit = vi.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('process.exit'); + }); + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + dbt_main: { driver: 'dbt' }, + }); + let triggerDetach: (() => void) | null = null; + const executeTarget = vi.fn(async (target) => { + if (target.connectionId === 'warehouse') triggerDetach?.(); + return successResult(target.connectionId, target.driver, target.operation); + }); + + await expect( + runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { + executeTarget, + now: () => 1000, + setupKeystroke: (onDetach) => { + triggerDetach = onDetach; + return () => {}; + }, + }, + ), + ).rejects.toThrow('process.exit'); + + expect(mockExit).toHaveBeenCalledWith(0); + expect(io.stdout()).toContain('Context build continuing in the background.'); + expect(io.stdout()).toContain('Resume: klo setup --project-dir /tmp/project'); + mockExit.mockRestore(); + }); +}); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts new file mode 100644 index 00000000..28a7c5b7 --- /dev/null +++ b/packages/cli/src/context-build-view.ts @@ -0,0 +1,414 @@ +import { spawn } from 'node:child_process'; +import { mkdirSync, openSync } from 'node:fs'; +import { join, resolve } from 'node:path'; +import type { KloCliIo } from './index.js'; +import type { + KloPublicIngestArgs, + KloPublicIngestPlanTarget, + KloPublicIngestProject, + KloPublicIngestTargetResult, +} from './public-ingest.js'; +import { buildPublicIngestPlan, executePublicIngestTarget } from './public-ingest.js'; +import { formatDuration } from './demo-metrics.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:context-build-view'); + +const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] as const; +const ESC = String.fromCharCode(0x1b); + +export interface ContextBuildTargetState { + target: KloPublicIngestPlanTarget; + status: 'queued' | 'running' | 'done' | 'failed'; + detailLine: string | null; + summaryText: string | null; + startedAt: number | null; + elapsedMs: number; +} + +export interface ContextBuildViewState { + primarySources: ContextBuildTargetState[]; + contextSources: ContextBuildTargetState[]; + frame: number; +} + +export interface ContextBuildArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + scanMode?: 'structural' | 'enriched'; + detectRelationships?: boolean; +} + +export interface ContextBuildResult { + exitCode: number; + detached: boolean; +} + +export interface ContextBuildDeps { + executeTarget?: typeof executePublicIngestTarget; + now?: () => number; + setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; + onDetach?: () => void; +} + +// --- Rendering --- + +function green(text: string): string { + return `${ESC}[32m${text}${ESC}[39m`; +} + +function red(text: string): string { + return `${ESC}[31m${text}${ESC}[39m`; +} + +function cyan(text: string): string { + return `${ESC}[36m${text}${ESC}[39m`; +} + +function dim(text: string): string { + return `${ESC}[2m${text}${ESC}[22m`; +} + +function statusIcon(status: ContextBuildTargetState['status'], frame: number, styled: boolean): string { + if (!styled) { + switch (status) { + case 'done': + return '✓'; + case 'failed': + return '✗'; + case 'running': + return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'; + default: + return '·'; + } + } + switch (status) { + case 'done': + return green('✓'); + case 'failed': + return red('✗'); + case 'running': + return cyan(SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'); + default: + return dim('·'); + } +} + +function targetDetail(target: ContextBuildTargetState, styled: boolean): string { + if (target.status === 'done') { + const parts: string[] = []; + if (target.summaryText) parts.push(target.summaryText); + parts.push(formatDuration(target.elapsedMs)); + return parts.join(' · '); + } + if (target.status === 'failed') { + return styled ? red('failed') : 'failed'; + } + if (target.status === 'running') { + return target.detailLine ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); + } + return styled ? dim('queued') : 'queued'; +} + +function columnWidth(state: ContextBuildViewState): number { + const all = [...state.primarySources, ...state.contextSources]; + return Math.max(12, ...all.map((t) => t.target.connectionId.length)) + 2; +} + +function renderTargetLine(target: ContextBuildTargetState, frame: number, styled: boolean, width: number): string { + return ` ${statusIcon(target.status, frame, styled)} ${target.target.connectionId.padEnd(width)} ${targetDetail(target, styled)}`; +} + +function renderTargetGroup( + label: string, + targets: ContextBuildTargetState[], + frame: number, + styled: boolean, + width: number, +): string[] { + if (targets.length === 0) return []; + return ['', ` ${label}:`, ...targets.map((t) => renderTargetLine(t, frame, styled, width))]; +} + +function resumeCommand(projectDir?: string): string { + return projectDir ? `klo setup --project-dir ${projectDir}` : 'klo setup'; +} + +export function renderContextBuildView( + state: ContextBuildViewState, + options: { styled?: boolean; showHint?: boolean; projectDir?: string } = {}, +): string { + const styled = options.styled ?? true; + const width = columnWidth(state); + const lines: string[] = [ + '', + 'Building KLO context', + '─────────────────────', + ...renderTargetGroup('Primary sources', state.primarySources, state.frame, styled, width), + ...renderTargetGroup('Context sources', state.contextSources, state.frame, styled, width), + '', + ]; + const hasActive = [...state.primarySources, ...state.contextSources].some( + (t) => t.status === 'running' || t.status === 'queued', + ); + if (options.showHint && hasActive) { + const hint = ` d to detach · ${resumeCommand(options.projectDir)} to resume`; + lines.push(styled ? dim(hint) : hint); + lines.push(''); + } + return `${lines.join('\n')}\n`; +} + +// --- IO Capture --- + +const ESC_K_RE = new RegExp(`${ESC.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\[K`, 'g'); + +export function extractProgressMessage(chunk: string): string | null { + const cleaned = chunk.replace(/^\r/, '').replace(ESC_K_RE, '').replace(/\n$/, '').trim(); + const match = cleaned.match(/^\[(\d+)%\]\s*(.+)$/); + return match ? `[${match[1]}%] ${match[2]}` : null; +} + +export function parseScanSummary(output: string): string | null { + const match = output.match(/(\d+) changes? across (\d+) tables?/); + return match ? `${match[2]} tables` : null; +} + +export function parseIngestSummary(output: string): string | null { + const parts: string[] = []; + const workUnits = output.match(/Work units: (\d+)/); + if (workUnits) parts.push(`${workUnits[1]} work units`); + const savedMemory = output.match(/Saved memory: (.+)/); + if (savedMemory) parts.push(savedMemory[1]); + return parts.length > 0 ? parts.join(' · ') : null; +} + +interface CapturedIo { + io: KloCliIo; + captured(): string; +} + +function createCaptureIo(onProgress: (message: string) => void, isTTY: boolean): CapturedIo { + let buffer = ''; + return { + io: { + stdout: { + isTTY, + write(chunk: string) { + buffer += chunk; + const progress = extractProgressMessage(chunk); + if (progress) onProgress(progress); + }, + }, + stderr: { + write(chunk: string) { + buffer += chunk; + }, + }, + }, + captured: () => buffer, + }; +} + +// --- Repaint --- + +function createRepainter(io: KloCliIo) { + let lastLineCount = 0; + + return { + paint(content: string) { + if (lastLineCount > 0) { + io.stdout.write(`${ESC}[${lastLineCount}A\r`); + } + io.stdout.write(content); + io.stdout.write(`${ESC}[J`); + lastLineCount = (content.match(/\n/g) ?? []).length; + }, + }; +} + +// --- Background build --- + +function resolveKloEntryScript(): string | null { + const argv1 = process.argv[1]; + if (argv1 && (argv1.endsWith('.js') || argv1.endsWith('.ts') || argv1.endsWith('.mjs'))) { + return argv1; + } + return null; +} + +function spawnBackgroundBuild(projectDir: string): { logPath: string } | null { + const entryScript = resolveKloEntryScript(); + if (!entryScript) return null; + + const resolvedDir = resolve(projectDir); + const logDir = join(resolvedDir, '.klo', 'setup'); + mkdirSync(logDir, { recursive: true }); + const logPath = join(logDir, 'context-build.log'); + const logFd = openSync(logPath, 'w'); + + const child = spawn( + process.execPath, + [entryScript, 'setup', 'context', 'build', '--project-dir', resolvedDir, '--no-input'], + { detached: true, stdio: ['ignore', logFd, logFd] }, + ); + child.unref(); + return { logPath }; +} + +// --- Keystroke handling --- + +function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { + const stdin = process.stdin; + if (!stdin.isTTY || typeof stdin.setRawMode !== 'function') { + return null; + } + stdin.setRawMode(true); + stdin.resume(); + const onData = (data: Buffer) => { + const char = data.toString(); + if (char === 'd' || char === 'D') onDetach(); + else if (char === '\x03') onCtrlC(); + }; + stdin.on('data', onData); + return () => { + stdin.off('data', onData); + if (typeof stdin.setRawMode === 'function') stdin.setRawMode(false); + stdin.pause(); + }; +} + +// --- Orchestration --- + +function makeTargetState(target: KloPublicIngestPlanTarget): ContextBuildTargetState { + return { target, status: 'queued', detailLine: null, summaryText: null, startedAt: null, elapsedMs: 0 }; +} + +export function initViewState(targets: KloPublicIngestPlanTarget[]): ContextBuildViewState { + return { + primarySources: targets.filter((t) => t.operation === 'scan').map(makeTargetState), + contextSources: targets.filter((t) => t.operation === 'source-ingest').map(makeTargetState), + frame: 0, + }; +} + +export async function runContextBuild( + project: KloPublicIngestProject, + args: ContextBuildArgs, + io: KloCliIo, + deps: ContextBuildDeps = {}, +): Promise { + const plan = buildPublicIngestPlan(project, { projectDir: args.projectDir, all: true }); + const state = initViewState(plan.targets); + const isTTY = io.stdout.isTTY === true; + const nowFn = deps.now ?? (() => Date.now()); + + const repainter = isTTY ? createRepainter(io) : null; + const viewOpts = { styled: true, projectDir: args.projectDir }; + const paint = (hint: boolean) => repainter?.paint(renderContextBuildView(state, { ...viewOpts, showHint: hint })); + paint(true); + + let spinnerInterval: ReturnType | null = null; + if (repainter) { + spinnerInterval = setInterval(() => { + state.frame++; + for (const t of [...state.primarySources, ...state.contextSources]) { + if (t.status === 'running' && t.startedAt !== null) { + t.elapsedMs = nowFn() - t.startedAt; + } + } + paint(true); + }, 140); + } + + const orderedTargets = [...state.primarySources, ...state.contextSources]; + const execTarget = deps.executeTarget ?? executePublicIngestTarget; + + let detached = false; + let cleanupKeystroke: (() => void) | null = null; + + if (isTTY || deps.setupKeystroke) { + const cleanup = () => { + if (spinnerInterval) clearInterval(spinnerInterval); + cleanupKeystroke?.(); + }; + cleanupKeystroke = (deps.setupKeystroke ?? defaultSetupKeystroke)( + () => { + cleanup(); + deps.onDetach?.(); + const bg = spawnBackgroundBuild(args.projectDir); + io.stdout.write('\n\nContext build continuing in the background.\n'); + if (bg) io.stdout.write(`Log: ${bg.logPath}\n`); + io.stdout.write(`Status: klo setup context status --project-dir ${resolve(args.projectDir)}\n`); + io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); + process.exit(0); + }, + () => { + cleanup(); + io.stdout.write('\n\nContext build stopped. Nothing is running in the background.\n'); + io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); + process.exit(130); + }, + ); + } + const runArgs: Extract = { + command: 'run', + projectDir: args.projectDir, + all: true, + json: false, + inputMode: args.inputMode, + scanMode: args.scanMode, + detectRelationships: args.detectRelationships, + }; + + let hasFailure = false; + + try { + for (const targetState of orderedTargets) { + if (detached) break; + + targetState.status = 'running'; + targetState.startedAt = nowFn(); + paint(true); + + const capture = createCaptureIo( + (message) => { + targetState.detailLine = message; + paint(true); + }, + false, + ); + + const result = await execTarget(targetState.target, runArgs, capture.io, {}); + + targetState.elapsedMs = nowFn() - (targetState.startedAt ?? nowFn()); + const failed = result.steps.some((s) => s.status === 'failed'); + targetState.status = failed ? 'failed' : 'done'; + targetState.detailLine = null; + if (!failed) { + targetState.summaryText = + targetState.target.operation === 'scan' + ? parseScanSummary(capture.captured()) + : parseIngestSummary(capture.captured()); + } + if (failed) hasFailure = true; + + paint(true); + } + } finally { + if (spinnerInterval) clearInterval(spinnerInterval); + cleanupKeystroke?.(); + } + + if (detached) { + return { exitCode: 0, detached: true }; + } + + if (!repainter) { + io.stdout.write(renderContextBuildView(state, { styled: false })); + } else { + paint(false); + } + + return { exitCode: hasFailure ? 1 : 0, detached: false }; +} diff --git a/packages/cli/src/demo-assets.test.ts b/packages/cli/src/demo-assets.test.ts new file mode 100644 index 00000000..ebadff4e --- /dev/null +++ b/packages/cli/src/demo-assets.test.ts @@ -0,0 +1,272 @@ +import { access, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { afterEach, describe, expect, it } from 'vitest'; +import { + DEMO_ADAPTER, + DEMO_CONNECTION_ID, + DEMO_FULL_JOB_ID, + DEMO_REPLAY_FILE, + defaultDemoProjectDir, + ensureDemoProject, + inspectDemoProjectState, + loadPackagedDemoReplay, + loadProjectDemoReplay, + resetDemoProject, +} from './demo-assets.js'; +import { writeDemoReplay } from './demo-replay-store.js'; + +const packagedDemoSource = 'packaged-orbit-demo'; + +function packagedDemoAssetPath(relativePath: string): string { + return fileURLToPath(new URL(`../assets/demo/orbit/${relativePath}`, import.meta.url)); +} + +async function readPackagedJson(relativePath: string): Promise { + return JSON.parse(await readFile(packagedDemoAssetPath(relativePath), 'utf-8')) as T; +} + +describe('demo assets', () => { + const projectDir = join(tmpdir(), `klo-demo-assets-${process.pid}`); + + afterEach(async () => { + await rm(projectDir, { recursive: true, force: true }); + }); + + it('resolves the default demo root under the OS temp directory', () => { + const dir = defaultDemoProjectDir(); + expect(dir.startsWith(join(tmpdir(), 'klo-demo-'))).toBe(true); + expect(dir).toMatch(/klo-demo-[a-f0-9]{8}$/); + }); + + it('exports the packaged Orbit demo identity', () => { + expect(DEMO_CONNECTION_ID).toBe('orbit_demo'); + expect(DEMO_ADAPTER).toBe('live-database'); + expect(DEMO_REPLAY_FILE).toBe('replay.memory-flow.v1.json'); + expect(DEMO_FULL_JOB_ID).toBe('demo-full-ingest'); + }); + + it('ships the seeded demo bundle required by the May 6 PRD', async () => { + const manifest = await readPackagedJson<{ + demoAssetSchemaVersion: number; + mode: string; + source: string; + sources: { + warehouse: { tables: number; rowCounts: Record }; + dbt: { models: number; sourceTables: number }; + bi: { explores: number; dashboards: number }; + notion: { pages: number }; + }; + name: string; + displayName: string; + generated: { + semanticLayer: { path: string; sourceCount: number }; + knowledge: { pageCount: number }; + links: { linkCount: number }; + }; + }>('manifest.json'); + + expect(manifest).toMatchObject({ + demoAssetSchemaVersion: 2, + name: 'orbit', + displayName: 'Orbit Demo', + mode: 'seeded', + source: packagedDemoSource, + }); + expect(manifest.sources.warehouse.tables).toBeGreaterThanOrEqual(5); + expect(manifest.sources.warehouse.tables).toBeLessThanOrEqual(10); + expect(Object.keys(manifest.sources.warehouse.rowCounts).sort()).toEqual([ + 'accounts', + 'arr_movements', + 'contracts', + 'invoices', + 'plans', + 'purchase_requests', + 'support_tickets', + 'users', + ]); + expect(manifest.sources.dbt.models).toBeGreaterThanOrEqual(3); + expect(manifest.sources.dbt.models).toBeLessThanOrEqual(6); + expect(manifest.sources.bi.explores).toBeGreaterThanOrEqual(2); + expect(manifest.sources.bi.dashboards).toBeGreaterThanOrEqual(2); + expect(manifest.sources.notion.pages).toBeGreaterThanOrEqual(5); + expect(manifest.generated.semanticLayer.sourceCount).toBeGreaterThanOrEqual(5); + expect(manifest.generated.knowledge.pageCount).toBeGreaterThanOrEqual(10); + expect(manifest.generated.links.linkCount).toBeGreaterThanOrEqual(10); + + const dbStat = await stat(packagedDemoAssetPath('demo.db')); + expect(dbStat.size).toBeGreaterThan(0); + expect(dbStat.size).toBeLessThan(10 * 1024 * 1024); + + await expect(access(packagedDemoAssetPath('raw-sources/warehouse/accounts.csv'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('raw-sources/dbt/schema.yml'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('raw-sources/bi/revenue_exec.dashboard.lookml'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('raw-sources/notion/revenue-reporting-policy.md'))).resolves.toBeUndefined(); + expect(manifest.generated.semanticLayer.path).toBe('semantic-layer/orbit_demo'); + + await expect(access(packagedDemoAssetPath('semantic-layer/orbit_demo/accounts.yaml'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('knowledge/global/arr-contract-first.md'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('links/provenance.json'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('reports/seeded-demo-report.json'))).resolves.toBeUndefined(); + }); + + it('initializes a flat demo project without writing literal credentials', async () => { + const result = await ensureDemoProject({ projectDir, force: false }); + + expect(result.projectDir).toBe(projectDir); + await expect(access(join(projectDir, 'demo.db'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'state.sqlite'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'reports'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'semantic-layer'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'knowledge'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'replays', 'replay.memory-flow.v1.json'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'raw-sources'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, '_schema'))).rejects.toMatchObject({ code: 'ENOENT' }); + + const config = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(config).toContain('backend: anthropic'); + expect(config).toContain('api_key: env:ANTHROPIC_API_KEY'); + expect(config).not.toContain('sk-ant-'); + }); + + it('rejects an existing demo project unless force is set', async () => { + await ensureDemoProject({ projectDir, force: false }); + await expect(ensureDemoProject({ projectDir, force: false })).rejects.toThrow('Demo project already exists'); + await expect(ensureDemoProject({ projectDir, force: true })).resolves.toMatchObject({ projectDir }); + }); + + it('loads packaged and copied demo replays', async () => { + const packaged = await loadPackagedDemoReplay(); + expect(packaged.runId).toBe('demo-seeded-orbit'); + expect(packaged.connectionId).toBe('orbit_demo'); + expect(packaged.metadata?.mode).toBe('seeded'); + + await ensureDemoProject({ projectDir, force: false }); + const copied = await loadProjectDemoReplay(projectDir); + expect(copied).toEqual(packaged); + }); + + it('loads the latest local replay before the packaged replay', async () => { + await ensureDemoProject({ projectDir, force: false }); + await writeDemoReplay( + projectDir, + { + metadata: { + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: null, + sourceReportPath: 'raw-sources/orbit_demo/live-database/sync/scan-report.json', + fallbackReason: null, + }, + runId: 'demo-full-run', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'done', + sourceDir: null, + syncId: 'sync', + reportPath: 'raw-sources/orbit_demo/live-database/sync/scan-report.json', + errors: [], + events: [{ type: 'report_created', runId: 'scan-run' }], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }, + { label: 'full' }, + ); + + await expect(loadProjectDemoReplay(projectDir)).resolves.toMatchObject({ + runId: 'demo-full-run', + metadata: { mode: 'full', origin: 'captured' }, + }); + }); + + it('reports missing, ready, and corrupted demo project state', async () => { + await expect(inspectDemoProjectState(projectDir)).resolves.toEqual({ + status: 'missing', + projectDir, + missing: ['klo.yaml', 'demo.db', 'state.sqlite', 'replays/replay.memory-flow.v1.json'], + }); + + await ensureDemoProject({ projectDir, force: false }); + await expect(inspectDemoProjectState(projectDir)).resolves.toEqual({ + status: 'ready', + projectDir, + missing: [], + }); + + await rm(join(projectDir, 'demo.db'), { force: true }); + await expect(inspectDemoProjectState(projectDir)).resolves.toEqual({ + status: 'corrupt', + projectDir, + missing: ['demo.db'], + }); + }); + + it('requires explicit force for demo reset and recreates packaged assets', async () => { + await ensureDemoProject({ projectDir, force: false }); + await rm(join(projectDir, 'demo.db'), { force: true }); + + await expect(resetDemoProject({ projectDir, force: false })).rejects.toThrow( + `klo setup demo reset is destructive; pass --force to recreate ${projectDir}`, + ); + + await expect(resetDemoProject({ projectDir, force: true })).resolves.toMatchObject({ projectDir }); + await expect(access(join(projectDir, 'demo.db'))).resolves.toBeUndefined(); + await expect(inspectDemoProjectState(projectDir)).resolves.toMatchObject({ status: 'ready' }); + }); + + it('preserves a user-edited klo.yaml across reset --force', async () => { + await ensureDemoProject({ projectDir, force: false }); + const customConfig = [ + 'project: klo-demo-orbit', + 'connections:', + ` ${DEMO_CONNECTION_ID}:`, + ' driver: sqlite', + ` path: ${JSON.stringify(join(projectDir, 'demo.db'))}`, + ' readonly: true', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + ' git:', + ' auto_commit: true', + ' author: klo ', + 'llm:', + ' provider:', + ' backend: vertex', + ' vertex:', + ' project: example-gcp-project', + ' location: us-east5', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' adapters:', + ` - ${DEMO_ADAPTER}`, + ' embeddings:', + ' backend: none', + ' dimensions: 8', + ' workUnits:', + ' stepBudget: 40', + ' maxConcurrency: 1', + ' failureMode: continue', + '', + ].join('\n'); + await writeFile(join(projectDir, 'klo.yaml'), customConfig, 'utf-8'); + + await resetDemoProject({ projectDir, force: true }); + + const preserved = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(preserved).toBe(customConfig); + expect(preserved).toContain('backend: vertex'); + expect(preserved).not.toContain('backend: anthropic'); + await expect(inspectDemoProjectState(projectDir)).resolves.toMatchObject({ status: 'ready' }); + }); + + it('still writes the default klo.yaml on reset when none exists', async () => { + await resetDemoProject({ projectDir, force: true }); + const config = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(config).toContain('backend: anthropic'); + }); +}); diff --git a/packages/cli/src/demo-assets.ts b/packages/cli/src/demo-assets.ts new file mode 100644 index 00000000..ee4867c1 --- /dev/null +++ b/packages/cli/src/demo-assets.ts @@ -0,0 +1,281 @@ +import { constants as fsConstants } from 'node:fs'; +import { access, copyFile, cp, mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { randomBytes } from 'node:crypto'; +import type { MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import { loadDemoReplayFile, loadLatestDemoReplay } from './demo-replay-store.js'; + +interface DemoProjectResult { + projectDir: string; + configPath: string; + databasePath: string; + replayPath: string; +} + +interface EnsureDemoProjectOptions { + projectDir: string; + force: boolean; +} + +type DemoProjectStateStatus = 'missing' | 'ready' | 'corrupt'; + +interface DemoProjectState { + status: DemoProjectStateStatus; + projectDir: string; + missing: string[]; +} + +export const DEMO_CONNECTION_ID = 'orbit_demo'; +export const DEMO_ADAPTER = 'live-database'; +export const DEMO_REPLAY_FILE = 'replay.memory-flow.v1.json'; +export const DEMO_FULL_JOB_ID = 'demo-full-ingest'; + +const REQUIRED_BASE_PROJECT_PATHS = [ + 'klo.yaml', + 'demo.db', + 'state.sqlite', + join('replays', DEMO_REPLAY_FILE), +] as const; + +const REQUIRED_PACKAGED_BASE_ASSET_PATHS = ['demo.db', 'manifest.json', DEMO_REPLAY_FILE] as const; + +const REQUIRED_SEEDED_ASSET_PATHS = [ + 'demo.db', + 'manifest.json', + DEMO_REPLAY_FILE, + join('raw-sources', 'warehouse', 'accounts.csv'), + join('raw-sources', 'dbt', 'schema.yml'), + join('raw-sources', 'bi', 'revenue_exec.dashboard.lookml'), + join('raw-sources', 'notion', 'revenue-reporting-policy.md'), + join('semantic-layer', 'orbit_demo', 'accounts.yaml'), + join('knowledge', 'global', 'arr-contract-first.md'), + join('links', 'provenance.json'), + join('reports', 'seeded-demo-report.json'), +] as const; + +function assetDir(): string { + return fileURLToPath(new URL('../assets/demo/orbit/', import.meta.url)); +} + +async function exists(path: string): Promise { + try { + await access(path, fsConstants.F_OK); + return true; + } catch { + return false; + } +} + +export function defaultDemoProjectDir(): string { + const suffix = randomBytes(4).toString('hex'); + return join(tmpdir(), `klo-demo-${suffix}`); +} + +export async function inspectDemoProjectState(projectDir: string): Promise { + const root = resolve(projectDir); + const missing: string[] = []; + + for (const relativePath of REQUIRED_BASE_PROJECT_PATHS) { + if (!(await exists(join(root, relativePath)))) { + missing.push(relativePath); + } + } + + if (missing.length === REQUIRED_BASE_PROJECT_PATHS.length) { + return { status: 'missing', projectDir: root, missing }; + } + + if (missing.length > 0) { + return { status: 'corrupt', projectDir: root, missing }; + } + + return { status: 'ready', projectDir: root, missing: [] }; +} + +export async function resetDemoProject(options: EnsureDemoProjectOptions): Promise { + const projectDir = resolve(options.projectDir); + if (!options.force) { + throw new Error(`klo setup demo reset is destructive; pass --force to recreate ${projectDir}`); + } + + const preservedConfig = await readExistingConfig(join(projectDir, 'klo.yaml')); + const result = await ensureDemoProject({ projectDir, force: true }); + if (preservedConfig !== null) { + await writeFile(result.configPath, preservedConfig, 'utf-8'); + } + return result; +} + +async function readExistingConfig(configPath: string): Promise { + try { + return await readFile(configPath, 'utf-8'); + } catch { + return null; + } +} + +function demoConfig(databasePath: string): string { + return [ + 'project: klo-demo-orbit', + 'connections:', + ` ${DEMO_CONNECTION_ID}:`, + ' driver: sqlite', + ` path: ${JSON.stringify(databasePath)}`, + ' readonly: true', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + ' git:', + ' auto_commit: true', + ' author: klo ', + 'llm:', + ' provider:', + ' backend: anthropic', + ' anthropic:', + ' api_key: env:ANTHROPIC_API_KEY', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' adapters:', + ` - ${DEMO_ADAPTER}`, + ' embeddings:', + ' backend: none', + ' dimensions: 8', + ' workUnits:', + ' stepBudget: 40', + ' maxConcurrency: 1', + ' failureMode: continue', + '', + ].join('\n'); +} + +async function copyPackagedReplay(projectDir: string): Promise { + const replayDir = join(projectDir, 'replays'); + await mkdir(replayDir, { recursive: true }); + const replayPath = join(replayDir, DEMO_REPLAY_FILE); + await copyFile(join(assetDir(), DEMO_REPLAY_FILE), replayPath); + return replayPath; +} + +async function assertPackagedBaseAssetsPresent(): Promise { + const missing: string[] = []; + for (const relativePath of REQUIRED_PACKAGED_BASE_ASSET_PATHS) { + if (!(await exists(join(assetDir(), relativePath)))) { + missing.push(relativePath); + } + } + if (missing.length > 0) { + throw new Error(`Packaged demo assets are incomplete: missing ${missing.join(', ')}`); + } +} + +async function assertPackagedSeededAssetsPresent(): Promise { + const missing: string[] = []; + for (const relativePath of REQUIRED_SEEDED_ASSET_PATHS) { + if (!(await exists(join(assetDir(), relativePath)))) { + missing.push(relativePath); + } + } + if (missing.length > 0) { + throw new Error(`Packaged seeded demo assets are incomplete: missing ${missing.join(', ')}`); + } +} + +export async function ensureDemoProject(options: EnsureDemoProjectOptions): Promise { + const projectDir = resolve(options.projectDir); + const configPath = join(projectDir, 'klo.yaml'); + if (!options.force && (await exists(configPath))) { + throw new Error(`Demo project already exists at ${projectDir}; pass --force to recreate it`); + } + + await assertPackagedBaseAssetsPresent(); + + if (options.force) { + await rm(projectDir, { recursive: true, force: true }); + } + + await mkdir(projectDir, { recursive: true }); + for (const relativeDir of ['reports', 'semantic-layer', 'knowledge', 'replays', 'raw-sources', 'links']) { + await mkdir(join(projectDir, relativeDir), { recursive: true }); + } + + const databasePath = join(projectDir, 'demo.db'); + await copyFile(join(assetDir(), 'demo.db'), databasePath); + await writeFile(join(projectDir, 'state.sqlite'), '', { flag: 'a' }); + await copyFile(join(assetDir(), 'manifest.json'), join(projectDir, 'manifest.json')); + const replayPath = await copyPackagedReplay(projectDir); + await writeFile(configPath, demoConfig(databasePath), 'utf-8'); + + return { projectDir, configPath, databasePath, replayPath }; +} + +async function copyDirIfExists(src: string, dest: string): Promise { + if (await exists(src)) { + await cp(src, dest, { recursive: true }); + } +} + +async function copySeededAssetDirectories(projectDir: string): Promise { + const src = assetDir(); + const dest = resolve(projectDir); + + await Promise.all([ + copyDirIfExists(join(src, 'semantic-layer'), join(dest, 'semantic-layer')), + copyDirIfExists(join(src, 'knowledge'), join(dest, 'knowledge')), + copyDirIfExists(join(src, 'raw-sources'), join(dest, 'raw-sources')), + copyDirIfExists(join(src, 'links'), join(dest, 'links')), + copyDirIfExists(join(src, 'reports'), join(dest, 'reports')), + ]); +} + +export async function ensureSeededDemoProject(options: EnsureDemoProjectOptions): Promise { + await assertPackagedSeededAssetsPresent(); + const projectDir = resolve(options.projectDir); + const result = await ensureDemoProject(options).catch((error) => { + if (!options.force && error instanceof Error && error.message.includes('Demo project already exists')) { + return { + projectDir, + configPath: join(projectDir, 'klo.yaml'), + databasePath: join(projectDir, 'demo.db'), + replayPath: join(projectDir, 'replays', DEMO_REPLAY_FILE), + }; + } + throw error; + }); + + await copySeededAssetDirectories(result.projectDir); + return result; +} + +export async function loadPackagedDemoReplay(): Promise { + const replay = await loadDemoReplayFile(join(assetDir(), DEMO_REPLAY_FILE)); + return { + ...replay, + metadata: { + schemaVersion: 1, + mode: replay.metadata?.mode ?? 'seeded', + origin: 'packaged', + timing: replay.metadata?.timing ?? 'prebuilt', + capturedAt: replay.metadata?.capturedAt ?? null, + sourceReportId: replay.metadata?.sourceReportId ?? 'demo-seeded-report', + sourceReportPath: replay.metadata?.sourceReportPath ?? `reports/seeded-demo-report.json`, + fallbackReason: null, + }, + }; +} + +export async function loadProjectDemoReplay(projectDir: string): Promise { + const latest = await loadLatestDemoReplay(projectDir); + if (latest) { + return latest; + } + + const replayPath = join(resolve(projectDir), 'replays', DEMO_REPLAY_FILE); + if (!(await exists(replayPath))) { + await mkdir(dirname(replayPath), { recursive: true }); + await copyPackagedReplay(resolve(projectDir)); + } + return loadPackagedDemoReplay(); +} diff --git a/packages/cli/src/demo-full.test.ts b/packages/cli/src/demo-full.test.ts new file mode 100644 index 00000000..b1911897 --- /dev/null +++ b/packages/cli/src/demo-full.test.ts @@ -0,0 +1,201 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { IngestReportSnapshot, LocalIngestResult, RunLocalIngestOptions } from '@klo/context/ingest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { DEMO_ADAPTER, DEMO_CONNECTION_ID, DEMO_FULL_JOB_ID, ensureDemoProject } from './demo-assets.js'; +import { + assertFullDemoCredentials, + buildFullDemoReplay, + formatFullDemoSummary, + fullDemoCredentialStatus, + runDemoFull, +} from './demo-full.js'; + +function fakeFullReport(): IngestReportSnapshot { + return { + id: 'report-full', + runId: 'run-full', + jobId: DEMO_FULL_JOB_ID, + connectionId: DEMO_CONNECTION_ID, + sourceKey: DEMO_ADAPTER, + createdAt: '2026-05-01T00:00:00.000Z', + body: { + syncId: 'sync-full', + diffSummary: { added: 7, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + workUnits: [ + { + unitKey: 'accounts', + rawFiles: ['accounts.schema.json'], + status: 'success', + actions: [ + { target: 'wiki', type: 'created', key: 'knowledge/accounts.md', detail: 'account lifecycle context' }, + { target: 'sl', type: 'created', key: 'orbit_demo.accounts', detail: 'accounts semantic source' }, + ], + touchedSlSources: [{ connectionId: 'orbit_demo', sourceName: 'orbit_demo.accounts' }], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [ + { + rawPath: 'accounts.schema.json', + artifactKind: 'wiki', + artifactKey: 'knowledge/accounts.md', + actionType: 'wiki_written', + }, + { + rawPath: 'accounts.schema.json', + artifactKind: 'sl', + artifactKey: 'orbit_demo.accounts', + actionType: 'source_created', + }, + ], + toolTranscripts: [], + }, + }; +} + +describe('full demo helpers', () => { + let tempDir: string; + let projectDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-demo-full-')); + projectDir = join(tempDir, 'demo'); + await ensureDemoProject({ projectDir, force: false }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('fails full mode with exact Anthropic env guidance when the key is missing', async () => { + const project = await import('@klo/context/project').then((mod) => mod.loadKloProject({ projectDir })); + + expect(() => assertFullDemoCredentials(project, {})).toThrow( + 'klo setup demo --mode full needs ANTHROPIC_API_KEY. Export ANTHROPIC_API_KEY and rerun `klo setup demo --mode full --no-input`, or run `klo setup demo --mode seeded --no-input` without credentials.', + ); + }); + + it('respects an existing gateway provider project for full mode', async () => { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: klo-demo-orbit', + 'connections:', + ' orbit_demo:', + ' driver: sqlite', + ` path: ${JSON.stringify(join(projectDir, 'demo.db'))}`, + 'llm:', + ' provider:', + ' backend: gateway', + ' models:', + ' default: anthropic/claude-sonnet-4-6', + 'ingest:', + ' adapters:', + ' - live-database', + ' embeddings:', + ' backend: none', + '', + ].join('\n'), + 'utf-8', + ); + const project = await import('@klo/context/project').then((mod) => mod.loadKloProject({ projectDir })); + + expect(() => assertFullDemoCredentials(project, {})).not.toThrow(); + expect(fullDemoCredentialStatus(project, {})).toEqual({ status: 'ready' }); + }); + + it('reports full-demo credential status without throwing', async () => { + const project = await import('@klo/context/project').then((mod) => mod.loadKloProject({ projectDir })); + + expect(fullDemoCredentialStatus(project, {})).toEqual({ status: 'missing-anthropic-key' }); + expect(fullDemoCredentialStatus(project, { ANTHROPIC_API_KEY: 'sk-ant-test' })).toEqual({ status: 'ready' }); // pragma: allowlist secret + + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: klo-demo-orbit', + 'connections:', + ' orbit_demo:', + ' driver: sqlite', + ` path: ${JSON.stringify(join(projectDir, 'demo.db'))}`, + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); + const disabledProject = await import('@klo/context/project').then((mod) => mod.loadKloProject({ projectDir })); + expect(fullDemoCredentialStatus(disabledProject, {})).toEqual({ status: 'unsupported-provider', provider: 'none' }); + }); + + it('runs scan first and then full ingest with the canonical demo connection', async () => { + const report = fakeFullReport(); + const runLocalScan = vi.fn().mockResolvedValue({ + report: { + runId: 'scan-run', + connectionId: DEMO_CONNECTION_ID, + driver: 'sqlite', + mode: 'structural', + syncId: 'sync-scan', + diffSummary: { tablesAdded: 7, tablesModified: 0, tablesDeleted: 0, tablesUnchanged: 0 }, + artifactPaths: { rawSourcesDir: 'raw-sources/orbit_demo/live-database/sync-scan', manifestShards: [], reportPath: 'scan-report.json' }, + }, + }); + const runLocalIngest = vi.fn(async (options: RunLocalIngestOptions): Promise => { + expect(options.adapter).toBe(DEMO_ADAPTER); + expect(options.connectionId).toBe(DEMO_CONNECTION_ID); + expect(options.jobId).toBe(DEMO_FULL_JOB_ID); + expect(options.memoryFlow?.snapshot()).toMatchObject({ runId: DEMO_FULL_JOB_ID, status: 'running' }); + options.memoryFlow?.emit({ type: 'source_acquired', adapter: DEMO_ADAPTER, trigger: 'demo_full', fileCount: 7 }); + return { result: { ok: true } as never, report }; + }); + const snapshots: unknown[] = []; + + const result = await runDemoFull({ + projectDir, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret + runLocalScan, + runLocalIngest, + onMemoryFlowChange: (snapshot) => snapshots.push(snapshot), + }); + + expect(runLocalScan).toHaveBeenCalledTimes(1); + expect(runLocalIngest).toHaveBeenCalledTimes(1); + expect(result.report).toBe(report); + expect(result.replay.runId).toBe('run-full'); + expect(snapshots).toHaveLength(1); + }); + + it('builds replay and plain summary from the full report', () => { + const report = fakeFullReport(); + const replay = buildFullDemoReplay(report); + const summary = formatFullDemoSummary(report); + + expect(replay).toMatchObject({ + runId: 'run-full', + connectionId: DEMO_CONNECTION_ID, + adapter: DEMO_ADAPTER, + status: 'done', + }); + expect(summary).toContain('Full demo ingest: done'); + expect(summary).toContain('Saved memory: 1 wiki, 1 semantic layer'); + expect(summary).toContain('Provenance rows: 2'); + expect(summary).toContain('Next: klo setup demo inspect'); + expect(summary).toContain('Shows the files, semantic-layer sources, and memory KLO just produced.'); + expect(summary).toContain('Next: klo setup demo replay'); + expect(summary).toContain('Replays the same visual story without calling the LLM again.'); + expect(summary).not.toContain('--viz'); + }); +}); diff --git a/packages/cli/src/demo-full.ts b/packages/cli/src/demo-full.ts new file mode 100644 index 00000000..3d6beff8 --- /dev/null +++ b/packages/cli/src/demo-full.ts @@ -0,0 +1,213 @@ +import { resolveKloConfigReference } from '@klo/context/core'; +import { + createMemoryFlowLiveBuffer, + ingestReportToMemoryFlowReplay, + runLocalIngest, + type IngestReportSnapshot, + type LocalIngestResult, + type MemoryFlowReplayInput, + type RunLocalIngestOptions, +} from '@klo/context/ingest'; +import { loadKloProject, type KloLocalProject } from '@klo/context/project'; +import { runLocalScan, type LocalScanRunResult } from '@klo/context/scan'; +import { DEMO_ADAPTER, DEMO_CONNECTION_ID, DEMO_FULL_JOB_ID, ensureDemoProject } from './demo-assets.js'; +import { runDemoScan } from './demo-scan.js'; +import { createKloCliLocalIngestAdapters } from './local-adapters.js'; +import { formatNextStepLines } from './next-steps.js'; + +interface DemoFullOptions { + projectDir: string; + env?: NodeJS.ProcessEnv; + runLocalScan?: typeof runLocalScan; + runLocalIngest?: typeof runLocalIngest; + onMemoryFlowChange?: (snapshot: MemoryFlowReplayInput) => void; +} + +export interface DemoFullResult { + project: KloLocalProject; + scan: LocalScanRunResult; + ingest: LocalIngestResult; + report: IngestReportSnapshot; + replay: MemoryFlowReplayInput; +} + +type FullDemoCredentialStatus = + | { status: 'ready' } + | { status: 'missing-anthropic-key' } + | { status: 'unsupported-provider'; provider: string }; + +async function ensureDemoProjectForReuse(projectDir: string): Promise { + await ensureDemoProject({ projectDir, force: false }).catch((error) => { + if (error instanceof Error && error.message.includes('Demo project already exists')) { + return; + } + throw error; + }); +} + +function savedCounts(report: IngestReportSnapshot): { wikiCount: number; slCount: number } { + const actions = report.body.workUnits.flatMap((workUnit) => workUnit.actions); + return { + wikiCount: actions.filter((action) => action.target === 'wiki').length, + slCount: actions.filter((action) => action.target === 'sl').length, + }; +} + +export function fullDemoCredentialStatus( + project: KloLocalProject, + env: NodeJS.ProcessEnv = process.env, +): FullDemoCredentialStatus { + const llm = project.config.llm; + if (llm.provider.backend === 'none') { + return { status: 'unsupported-provider', provider: llm.provider.backend }; + } + + if (llm.provider.backend === 'anthropic' && !resolveKloConfigReference(llm.provider.anthropic?.api_key, env)) { + return { status: 'missing-anthropic-key' }; + } + + return { status: 'ready' }; +} + +export function assertFullDemoCredentials(project: KloLocalProject, env: NodeJS.ProcessEnv = process.env): void { + const llm = project.config.llm; + const status = fullDemoCredentialStatus(project, env); + if (status.status === 'ready') { + return; + } + + if (status.status === 'unsupported-provider') { + throw new Error( + 'klo setup demo --mode full requires llm.provider.backend: anthropic, vertex, or gateway. Run `klo setup demo init --force --no-input` to recreate the demo config, or run `klo setup demo --mode seeded --no-input` without credentials.', + ); + } + + if (llm.provider.backend === 'anthropic') { + throw new Error( + 'klo setup demo --mode full needs ANTHROPIC_API_KEY. Export ANTHROPIC_API_KEY and rerun `klo setup demo --mode full --no-input`, or run `klo setup demo --mode seeded --no-input` without credentials.', + ); + } +} + +export function buildFullDemoReplay(report: IngestReportSnapshot): MemoryFlowReplayInput { + return ingestReportToMemoryFlowReplay(report, { provenanceRowCount: report.body.provenanceRows.length }); +} + +function initialFullReplay(projectDir: string): MemoryFlowReplayInput { + return { + runId: DEMO_FULL_JOB_ID, + connectionId: DEMO_CONNECTION_ID, + adapter: DEMO_ADAPTER, + status: 'running', + sourceDir: `${projectDir}/raw-sources/${DEMO_CONNECTION_ID}/${DEMO_ADAPTER}`, + syncId: 'pending', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }; +} + +export async function runDemoFull(options: DemoFullOptions): Promise { + await ensureDemoProjectForReuse(options.projectDir); + const project = await loadKloProject({ projectDir: options.projectDir }); + assertFullDemoCredentials(project, options.env); + + const { result: scan } = await runDemoScan({ + projectDir: project.projectDir, + jobId: 'demo-full-scan', + ...(options.runLocalScan ? { runLocalScan: options.runLocalScan } : {}), + }); + + const memoryFlow = options.onMemoryFlowChange + ? createMemoryFlowLiveBuffer(initialFullReplay(project.projectDir), { onChange: options.onMemoryFlowChange }) + : undefined; + const executeLocalIngest = options.runLocalIngest ?? runLocalIngest; + const ingest = await executeLocalIngest({ + project, + adapters: createKloCliLocalIngestAdapters(project), + adapter: DEMO_ADAPTER, + connectionId: DEMO_CONNECTION_ID, + trigger: 'manual_resync', + jobId: DEMO_FULL_JOB_ID, + ...(memoryFlow ? { memoryFlow } : {}), + } satisfies RunLocalIngestOptions); + + return { + project, + scan, + ingest, + report: ingest.report, + replay: buildFullDemoReplay(ingest.report), + }; +} + +export function formatFullDemoSummary(report: IngestReportSnapshot): string { + const counts = savedCounts(report); + return [ + 'Full demo ingest: done', + `Report: ${report.id}`, + `Run: ${report.runId}`, + `Job: ${report.jobId}`, + `Sync: ${report.body.syncId}`, + `Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} semantic layer`, + `Provenance rows: ${report.body.provenanceRows.length}`, + 'Next: klo setup demo inspect', + ' Shows the files, semantic-layer sources, and memory KLO just produced.', + 'Next: klo setup demo replay', + ' Replays the same visual story without calling the LLM again.', + '', + ].join('\n'); +} + +const ADAPTER_PREFIXES = ['live_database_', 'metabase_', 'looker_', 'lookml_', 'metricflow_', 'notion_', 'historic_sql_', 'dbt_descriptions_']; + +function humanizeUnitKeyForReport(unitKey: string): string { + let key = unitKey.replace(/-/g, '_'); + for (const prefix of ADAPTER_PREFIXES) { + if (key.startsWith(prefix)) { key = key.slice(prefix.length); break; } + } + return key.replace(/_/g, ' '); +} + +export function formatCleanDemoSummary(report: IngestReportSnapshot, projectDir: string): string { + const counts = savedCounts(report); + const workUnits = report.body.workUnits; + const conflictCount = report.body.conflictsResolved.length; + const areasAnalyzed = workUnits.filter((wu) => wu.actions.length > 0).length; + + const lines: string[] = ['', '★ KLO finished ingesting your data', '']; + + if (areasAnalyzed > 0) { + lines.push(` ✓ Analyzed ${areasAnalyzed} business area${areasAnalyzed === 1 ? '' : 's'}`); + } + if (!report.body.reconciliationSkipped) { + lines.push(` ✓ Reconciled — ${conflictCount > 0 ? `${conflictCount} conflict${conflictCount === 1 ? '' : 's'} resolved` : 'no conflicts'}`); + } + lines.push(''); + + if (counts.slCount > 0 || counts.wikiCount > 0) { + lines.push(' KLO created:'); + if (counts.slCount > 0) lines.push(` 📊 ${counts.slCount} query definition${counts.slCount === 1 ? '' : 's'} — so agents can write accurate SQL for your data`); + if (counts.wikiCount > 0) lines.push(` 📝 ${counts.wikiCount} knowledge page${counts.wikiCount === 1 ? '' : 's'} — so agents understand your business context`); + lines.push(''); + } + + const memoryFlow = report.body.memoryFlow; + if (memoryFlow) { + for (const detail of memoryFlow.details.actions) { + if (!detail.summary) continue; + const icon = detail.target === 'sl' ? '📊' : '📝'; + lines.push(` ${icon} ${detail.summary}`); + } + } + + lines.push(''); + lines.push(' What to do next:'); + lines.push(...formatNextStepLines()); + lines.push(''); + lines.push(` Your KLO project files are at: ${projectDir}`); + lines.push(''); + + return lines.join('\n'); +} diff --git a/packages/cli/src/demo-interaction.test.ts b/packages/cli/src/demo-interaction.test.ts new file mode 100644 index 00000000..6c1f6828 --- /dev/null +++ b/packages/cli/src/demo-interaction.test.ts @@ -0,0 +1,127 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { ensureDemoProject } from './demo-assets.js'; +import { + chooseDemoProjectForInteractiveRun, + createTestDemoPromptAdapter, + resolveFullCredentialDecision, +} from './demo-interaction.js'; + +function io(isTTY: boolean) { + return { + stdin: { isTTY }, + stdout: { isTTY, write: vi.fn() }, + stderr: { write: vi.fn() }, + }; +} + +describe('demo interaction decisions', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-demo-interaction-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('reuses a valid project without prompting in no-input mode', async () => { + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + chooseDemoProjectForInteractiveRun({ + projectDir: tempDir, + inputMode: 'disabled', + io: io(false), + prompts: createTestDemoPromptAdapter({ choices: [] }), + }), + ).resolves.toEqual({ action: 'use', projectDir: tempDir, reset: false }); + }); + + it('fails corrupted projects in no-input mode with reset guidance', async () => { + await ensureDemoProject({ projectDir: tempDir, force: false }); + await rm(join(tempDir, 'demo.db'), { force: true }); + + await expect( + chooseDemoProjectForInteractiveRun({ + projectDir: tempDir, + inputMode: 'disabled', + io: io(false), + prompts: createTestDemoPromptAdapter({ choices: [] }), + }), + ).rejects.toThrow( + `Demo project is not ready at ${tempDir}: missing demo.db. Run klo setup demo reset --project-dir ${tempDir} --force --no-input`, + ); + }); + + it('lets interactive users reset a corrupted project', async () => { + await ensureDemoProject({ projectDir: tempDir, force: false }); + await rm(join(tempDir, 'demo.db'), { force: true }); + + await expect( + chooseDemoProjectForInteractiveRun({ + projectDir: tempDir, + io: io(true), + prompts: createTestDemoPromptAdapter({ choices: ['reset'], confirms: [true] }), + }), + ).resolves.toEqual({ action: 'use', projectDir: tempDir, reset: true }); + }); + + it('lets interactive users choose another project directory', async () => { + await ensureDemoProject({ projectDir: tempDir, force: false }); + const otherDir = join(tempDir, 'other-demo'); + + await expect( + chooseDemoProjectForInteractiveRun({ + projectDir: tempDir, + io: io(true), + prompts: createTestDemoPromptAdapter({ choices: ['other'], texts: [otherDir] }), + }), + ).resolves.toEqual({ action: 'use', projectDir: otherDir, reset: false }); + }); + + it('uses a pasted Anthropic key only for the returned process env', async () => { + // pragma: allowlist secret + const prompts = createTestDemoPromptAdapter({ choices: ['process_key'], passwords: ['sk-ant-process'] }); + + await expect( + resolveFullCredentialDecision({ + needsAnthropicKey: true, + inputMode: 'auto', + io: io(true), + env: {}, + prompts, + }), + ).resolves.toEqual({ + action: 'full', + env: { ANTHROPIC_API_KEY: 'sk-ant-process' }, // pragma: allowlist secret + }); + }); + + it('lets interactive users explicitly choose seeded mode when the key is missing', async () => { + await expect( + resolveFullCredentialDecision({ + needsAnthropicKey: true, + inputMode: 'auto', + io: io(true), + env: {}, + prompts: createTestDemoPromptAdapter({ choices: ['seeded'] }), + }), + ).resolves.toEqual({ action: 'run-mode', mode: 'seeded' }); + }); + + it('does not prompt when input is disabled', async () => { + await expect( + resolveFullCredentialDecision({ + needsAnthropicKey: true, + inputMode: 'disabled', + io: io(false), + env: {}, + prompts: createTestDemoPromptAdapter({ choices: ['seeded'] }), + }), + ).resolves.toEqual({ action: 'full', env: {} }); + }); +}); diff --git a/packages/cli/src/demo-interaction.ts b/packages/cli/src/demo-interaction.ts new file mode 100644 index 00000000..b8e05f60 --- /dev/null +++ b/packages/cli/src/demo-interaction.ts @@ -0,0 +1,202 @@ +import { cancel, confirm, isCancel, password, select, text } from '@clack/prompts'; +import type { Option as ClackOption } from '@clack/prompts'; +import { resolve } from 'node:path'; +import { inspectDemoProjectState } from './demo-assets.js'; +import type { KloDemoInputMode } from './demo.js'; +import { withMenuOptionsSpacing } from './prompt-navigation.js'; + +type DemoPromptOption = ClackOption; + +export interface DemoPromptAdapter { + select(options: { message: string; options: Array> }): Promise; + confirm(options: { message: string; initialValue?: boolean }): Promise; + password(options: { message: string }): Promise; + text(options: { message: string; placeholder?: string }): Promise; + cancel(message: string): void; +} + +interface DemoInteractiveIo { + stdin?: { isTTY?: boolean }; + stdout: { isTTY?: boolean }; +} + +type DemoProjectDecision = + | { action: 'use'; projectDir: string; reset: boolean } + | { action: 'cancel' }; + +type FullCredentialDecision = + | { action: 'full'; env: NodeJS.ProcessEnv } + | { action: 'run-mode'; mode: 'seeded' | 'replay' } + | { action: 'cancel' }; + +function isInteractive(inputMode: KloDemoInputMode | undefined, io: DemoInteractiveIo): boolean { + return inputMode !== 'disabled' && io.stdin?.isTTY === true && io.stdout.isTTY === true; +} + +function cloneEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + return { ...env }; +} + +function ensureNotCancelled(value: T | symbol, prompts: Pick): T { + if (isCancel(value)) { + prompts.cancel('Demo cancelled.'); + throw new Error('Demo cancelled.'); + } + return value as T; +} + +export function createClackDemoPromptAdapter(): DemoPromptAdapter { + return { + async select(options: { message: string; options: Array> }): Promise { + return ensureNotCancelled(await select(withMenuOptionsSpacing(options)), this); + }, + async confirm(options: { message: string; initialValue?: boolean }): Promise { + return ensureNotCancelled(await confirm(options), this); + }, + async password(options: { message: string }): Promise { + return ensureNotCancelled(await password(options), this); + }, + async text(options: { message: string; placeholder?: string }): Promise { + return ensureNotCancelled(await text(options), this); + }, + cancel(message: string): void { + cancel(message); + }, + }; +} + +export function createTestDemoPromptAdapter(options: { + choices?: string[]; + confirms?: boolean[]; + passwords?: string[]; + texts?: string[]; +}): DemoPromptAdapter { + const choices = [...(options.choices ?? [])]; + const confirms = [...(options.confirms ?? [])]; + const passwords = [...(options.passwords ?? [])]; + const texts = [...(options.texts ?? [])]; + + return { + async select(): Promise { + return choices.shift() as T; + }, + async confirm(): Promise { + return confirms.shift() ?? false; + }, + async password(): Promise { + return passwords.shift() ?? ''; + }, + async text(): Promise { + return texts.shift() ?? ''; + }, + cancel(): void { + return; + }, + }; +} + +export async function chooseDemoProjectForInteractiveRun(options: { + projectDir: string; + inputMode?: KloDemoInputMode; + io: DemoInteractiveIo; + prompts?: DemoPromptAdapter; +}): Promise { + const prompts = options.prompts ?? createClackDemoPromptAdapter(); + const projectDir = resolve(options.projectDir); + const state = await inspectDemoProjectState(projectDir); + + if (!isInteractive(options.inputMode, options.io)) { + if (state.status === 'corrupt') { + throw new Error( + `Demo project is not ready at ${projectDir}: missing ${state.missing.join(', ')}. Run klo setup demo reset --project-dir ${projectDir} --force --no-input`, + ); + } + return { action: 'use', projectDir, reset: false }; + } + + if (state.status === 'missing') { + return { action: 'use', projectDir, reset: false }; + } + + const choices = + state.status === 'ready' + ? [ + { value: 'reuse', label: 'Reuse existing demo project' }, + { value: 'reset', label: 'Reset demo project' }, + { value: 'other', label: 'Choose another directory' }, + { value: 'cancel', label: 'Cancel' }, + ] + : [ + { value: 'reset', label: 'Reset corrupted demo project', hint: `Missing ${state.missing.join(', ')}` }, + { value: 'other', label: 'Choose another directory' }, + { value: 'cancel', label: 'Cancel' }, + ]; + + const choice = await prompts.select({ + message: state.status === 'ready' ? `Demo project exists at ${projectDir}` : `Demo project is not ready at ${projectDir}`, + options: choices, + }); + + if (choice === 'cancel') { + prompts.cancel('Demo cancelled.'); + return { action: 'cancel' }; + } + + if (choice === 'other') { + const nextProjectDir = await prompts.text({ + message: 'Demo project directory', + placeholder: projectDir, + }); + return { action: 'use', projectDir: resolve(nextProjectDir), reset: false }; + } + + if (choice === 'reset') { + const confirmed = await prompts.confirm({ + message: `Recreate ${projectDir}? Existing demo artifacts under that directory will be removed.`, + initialValue: false, + }); + return confirmed ? { action: 'use', projectDir, reset: true } : { action: 'cancel' }; + } + + return { action: 'use', projectDir, reset: false }; +} + +export async function resolveFullCredentialDecision(options: { + needsAnthropicKey: boolean; + inputMode?: KloDemoInputMode; + io: DemoInteractiveIo; + env: NodeJS.ProcessEnv; + prompts?: DemoPromptAdapter; +}): Promise { + const env = cloneEnv(options.env); + if (!options.needsAnthropicKey || env.ANTHROPIC_API_KEY) { + return { action: 'full', env }; + } + + if (!isInteractive(options.inputMode, options.io)) { + return { action: 'full', env }; + } + + const prompts = options.prompts ?? createClackDemoPromptAdapter(); + const choice = await prompts.select({ + message: 'Anthropic credentials are missing for the full demo', + options: [ + { value: 'process_key', label: 'Enter key for this process only' }, + { value: 'seeded', label: 'Run pre-seeded demo without LLM' }, + { value: 'replay', label: 'Run packaged replay' }, + { value: 'cancel', label: 'Cancel' }, + ], + }); + + if (choice === 'cancel') { + prompts.cancel('Demo cancelled.'); + return { action: 'cancel' }; + } + + if (choice === 'seeded' || choice === 'replay') { + return { action: 'run-mode', mode: choice }; + } + + const key = await prompts.password({ message: 'ANTHROPIC_API_KEY' }); + return { action: 'full', env: { ...env, ANTHROPIC_API_KEY: key } }; +} diff --git a/packages/cli/src/demo-metrics.test.ts b/packages/cli/src/demo-metrics.test.ts new file mode 100644 index 00000000..c7fd11f7 --- /dev/null +++ b/packages/cli/src/demo-metrics.test.ts @@ -0,0 +1,137 @@ +import type { MemoryFlowEvent, MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import { describe, expect, it } from 'vitest'; +import { + buildDemoMetrics, + formatCost, + formatDuration, + formatEta, + formatTokens, + formatTokensPerSec, + progressBar, +} from './demo-metrics.js'; + +function snapshot(events: MemoryFlowEvent[], overrides: Partial = {}): MemoryFlowReplayInput { + return { + runId: 'run-1', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'running', + sourceDir: null, + syncId: 'sync-1', + errors: [], + events, + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + ...overrides, + }; +} + +describe('buildDemoMetrics', () => { + it('estimates elapsed, agent steps, tool calls, and cost from event stream', () => { + const start = Date.UTC(2026, 0, 1, 0, 0, 0); + const input = snapshot( + [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'demo_full', fileCount: 5, emittedAt: new Date(start).toISOString() }, + { type: 'work_unit_started', unitKey: 'orders', skills: [], stepBudget: 40, emittedAt: new Date(start + 1000).toISOString() }, + { type: 'work_unit_step', unitKey: 'orders', stepIndex: 6, stepBudget: 40, emittedAt: new Date(start + 6000).toISOString() }, + ], + { + plannedWorkUnits: [ + { unitKey: 'orders', rawFiles: [], peerFileCount: 0, dependencyCount: 0 }, + { unitKey: 'customers', rawFiles: [], peerFileCount: 0, dependencyCount: 0 }, + ], + details: { + actions: [], + provenance: [], + transcripts: [{ unitKey: 'orders', path: '/tmp/orders.jsonl', toolCallCount: 3, errorCount: 0, toolNames: ['x'] }], + }, + }, + ); + + const metrics = buildDemoMetrics(input, { now: () => start + 10_000 }); + + expect(metrics.elapsedMs).toBe(10_000); + expect(metrics.agentSteps).toBe(6); + expect(metrics.agentStepBudget).toBe(40); + expect(metrics.toolCalls).toBe(3); + expect(metrics.workUnitsTotal).toBe(2); + expect(metrics.estimatedTokens).toBeGreaterThan(0); + expect(metrics.estimatedCostUsd).toBeGreaterThan(0); + expect(metrics.isCostEstimated).toBe(true); + }); + + it('returns null ETA before the first work unit completes', () => { + const input = snapshot([{ type: 'source_acquired', adapter: 'live-database', trigger: 'x', fileCount: 1 }]); + const metrics = buildDemoMetrics(input, { now: () => Date.now() }); + expect(metrics.etaMs).toBeNull(); + }); + + it('extrapolates ETA from completed/total ratio when at least one unit finishes', () => { + const start = Date.UTC(2026, 0, 1); + const input = snapshot( + [ + { type: 'source_acquired', adapter: 'a', trigger: 't', fileCount: 1, emittedAt: new Date(start).toISOString() }, + { type: 'work_unit_started', unitKey: 'a', skills: [], stepBudget: 10, emittedAt: new Date(start + 1000).toISOString() }, + { type: 'work_unit_finished', unitKey: 'a', status: 'success', emittedAt: new Date(start + 5000).toISOString() }, + ], + { + plannedWorkUnits: [ + { unitKey: 'a', rawFiles: [], peerFileCount: 0, dependencyCount: 0 }, + { unitKey: 'b', rawFiles: [], peerFileCount: 0, dependencyCount: 0 }, + { unitKey: 'c', rawFiles: [], peerFileCount: 0, dependencyCount: 0 }, + ], + }, + ); + + const metrics = buildDemoMetrics(input, { now: () => start + 6_000 }); + expect(metrics.etaMs).toBe(12_000); + }); + + it('reports ETA=0 when the run is finished', () => { + const input = snapshot([], { status: 'done' }); + const metrics = buildDemoMetrics(input, { now: () => Date.now() }); + expect(metrics.etaMs).toBe(0); + }); +}); + +describe('format helpers', () => { + it('formats duration in s/m/h cascades', () => { + expect(formatDuration(5_000)).toBe('5s'); + expect(formatDuration(95_000)).toBe('1m35s'); + expect(formatDuration(3_700_000)).toBe('1h01m'); + expect(formatDuration(-1)).toBe('--'); + }); + + it('formats ETA as estimating before any data and as duration once running', () => { + expect(formatEta(null, 'running')).toBe('estimating...'); + expect(formatEta(8_000, 'running')).toBe('8s'); + expect(formatEta(8_000, 'done')).toBe('done'); + }); + + it('formats cost with sub-cent guard', () => { + expect(formatCost(0)).toBe('$0.000'); + expect(formatCost(0.0005)).toBe('<$0.001'); + expect(formatCost(0.012)).toBe('$0.012'); + expect(formatCost(2.5)).toBe('$2.50'); + }); + + it('formats token counts with K/M abbreviations', () => { + expect(formatTokens(0)).toBe('0'); + expect(formatTokens(450)).toBe('450'); + expect(formatTokens(2_300)).toBe('2.3K'); + expect(formatTokens(1_500_000)).toBe('1.50M'); + }); + + it('formats tokens per second', () => { + expect(formatTokensPerSec(0)).toBe('0/s'); + expect(formatTokensPerSec(450)).toBe('450/s'); + expect(formatTokensPerSec(2300)).toBe('2.3K/s'); + }); + + it('renders a deterministic progress bar with hash and dash characters', () => { + expect(progressBar(0, 10)).toBe('----------'); + expect(progressBar(0.5, 10)).toBe('#####-----'); + expect(progressBar(1, 10)).toBe('##########'); + expect(progressBar(1.4, 10)).toBe('##########'); + }); +}); diff --git a/packages/cli/src/demo-metrics.ts b/packages/cli/src/demo-metrics.ts new file mode 100644 index 00000000..dba511ec --- /dev/null +++ b/packages/cli/src/demo-metrics.ts @@ -0,0 +1,174 @@ +import type { MemoryFlowEvent, MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; + +const DEFAULT_INPUT_TOKENS_PER_STEP = 4500; +const DEFAULT_OUTPUT_TOKENS_PER_STEP = 700; +const DEFAULT_INPUT_PRICE_PER_MTOK_USD = 3; +const DEFAULT_OUTPUT_PRICE_PER_MTOK_USD = 15; + +interface DemoMetricsTuning { + inputTokensPerStep?: number; + outputTokensPerStep?: number; + inputPricePerMTokUsd?: number; + outputPricePerMTokUsd?: number; +} + +interface DemoMetricsSnapshot { + elapsedMs: number; + etaMs: number | null; + agentSteps: number; + agentStepBudget: number; + toolCalls: number; + workUnitsStarted: number; + workUnitsFinished: number; + workUnitsTotal: number; + estimatedInputTokens: number; + estimatedOutputTokens: number; + estimatedTokens: number; + estimatedCostUsd: number; + tokensPerSec: number; + status: MemoryFlowReplayInput['status']; + isCostEstimated: boolean; +} + +function eventsOf( + events: MemoryFlowEvent[], + type: T, +): Array> { + return events.filter((event): event is Extract => event.type === type); +} + +function maxAgentStep(events: MemoryFlowEvent[]): { step: number; budget: number } { + const steps = eventsOf(events, 'work_unit_step'); + const started = eventsOf(events, 'work_unit_started'); + const stepIndex = steps.reduce((max, event) => Math.max(max, event.stepIndex), 0); + const stepBudget = Math.max( + 0, + ...steps.map((event) => event.stepBudget), + ...started.map((event) => event.stepBudget), + ); + return { step: stepIndex, budget: stepBudget }; +} + +function totalToolCalls(input: MemoryFlowReplayInput): number { + return input.details.transcripts.reduce((total, transcript) => total + transcript.toolCallCount, 0); +} + +function workUnitProgress(input: MemoryFlowReplayInput): { started: number; finished: number; total: number } { + const started = eventsOf(input.events, 'work_unit_started').length; + const finished = eventsOf(input.events, 'work_unit_finished').length; + const planned = input.plannedWorkUnits.length; + const planEvent = eventsOf(input.events, 'chunks_planned').at(-1); + const total = planned || planEvent?.workUnitCount || started || finished || 0; + return { started, finished, total }; +} + +function elapsedMsFromEvents(events: MemoryFlowEvent[], nowMs: number): number { + const stamped = events + .map((event) => (event.emittedAt ? Date.parse(event.emittedAt) : Number.NaN)) + .filter((value) => Number.isFinite(value)); + if (stamped.length === 0) return 0; + const first = Math.min(...stamped); + return Math.max(0, nowMs - first); +} + +function estimateEtaMs( + elapsedMs: number, + finished: number, + total: number, + status: MemoryFlowReplayInput['status'], +): number | null { + if (status !== 'running') return 0; + if (total === 0 || finished === 0 || elapsedMs === 0) return null; + const perUnit = elapsedMs / finished; + const remaining = Math.max(0, total - finished); + return Math.round(perUnit * remaining); +} + +export function buildDemoMetrics( + input: MemoryFlowReplayInput, + options: { now?: () => number; tuning?: DemoMetricsTuning } = {}, +): DemoMetricsSnapshot { + const tuning = options.tuning ?? {}; + const inputTokensPerStep = tuning.inputTokensPerStep ?? DEFAULT_INPUT_TOKENS_PER_STEP; + const outputTokensPerStep = tuning.outputTokensPerStep ?? DEFAULT_OUTPUT_TOKENS_PER_STEP; + const inputPrice = tuning.inputPricePerMTokUsd ?? DEFAULT_INPUT_PRICE_PER_MTOK_USD; + const outputPrice = tuning.outputPricePerMTokUsd ?? DEFAULT_OUTPUT_PRICE_PER_MTOK_USD; + const nowMs = (options.now ?? Date.now)(); + const elapsedMs = elapsedMsFromEvents(input.events, nowMs); + + const { step, budget } = maxAgentStep(input.events); + const toolCalls = totalToolCalls(input); + const progress = workUnitProgress(input); + const finishedCount = eventsOf(input.events, 'work_unit_finished').length; + const stepDriver = Math.max(step, toolCalls, finishedCount * 4); + + const inputTokens = stepDriver * inputTokensPerStep; + const outputTokens = stepDriver * outputTokensPerStep; + const totalTokens = inputTokens + outputTokens; + const cost = (inputTokens / 1_000_000) * inputPrice + (outputTokens / 1_000_000) * outputPrice; + + const elapsedSec = elapsedMs / 1000; + const tokensPerSec = elapsedSec > 0 ? totalTokens / elapsedSec : 0; + + return { + elapsedMs, + etaMs: estimateEtaMs(elapsedMs, progress.finished, progress.total, input.status), + agentSteps: step, + agentStepBudget: budget, + toolCalls, + workUnitsStarted: progress.started, + workUnitsFinished: progress.finished, + workUnitsTotal: progress.total, + estimatedInputTokens: inputTokens, + estimatedOutputTokens: outputTokens, + estimatedTokens: totalTokens, + estimatedCostUsd: cost, + tokensPerSec, + status: input.status, + isCostEstimated: true, + }; +} + +export function formatDuration(ms: number): string { + if (!Number.isFinite(ms) || ms < 0) return '--'; + const totalSec = Math.round(ms / 1000); + if (totalSec < 60) return `${totalSec}s`; + const min = Math.floor(totalSec / 60); + const sec = totalSec % 60; + if (min < 60) return `${min}m${sec.toString().padStart(2, '0')}s`; + const hr = Math.floor(min / 60); + return `${hr}h${(min % 60).toString().padStart(2, '0')}m`; +} + +export function formatEta(ms: number | null, status: MemoryFlowReplayInput['status']): string { + if (status !== 'running') return 'done'; + if (ms === null) return 'estimating...'; + return formatDuration(ms); +} + +export function formatCost(usd: number): string { + if (!Number.isFinite(usd) || usd <= 0) return '$0.000'; + if (usd < 0.001) return '<$0.001'; + if (usd < 1) return `$${usd.toFixed(3)}`; + return `$${usd.toFixed(2)}`; +} + +export function formatTokens(n: number): string { + if (!Number.isFinite(n) || n <= 0) return '0'; + if (n < 1000) return `${Math.round(n)}`; + if (n < 1_000_000) return `${(n / 1000).toFixed(1)}K`; + return `${(n / 1_000_000).toFixed(2)}M`; +} + +export function formatTokensPerSec(n: number): string { + if (!Number.isFinite(n) || n <= 0) return '0/s'; + if (n < 1000) return `${Math.round(n)}/s`; + return `${(n / 1000).toFixed(1)}K/s`; +} + +const PROGRESS_BAR_WIDTH = 12; +export function progressBar(ratio: number, width: number = PROGRESS_BAR_WIDTH): string { + const clamped = Math.max(0, Math.min(1, ratio)); + const filled = Math.round(clamped * width); + return `${'#'.repeat(filled)}${'-'.repeat(Math.max(0, width - filled))}`; +} diff --git a/packages/cli/src/demo-progress.test.ts b/packages/cli/src/demo-progress.test.ts new file mode 100644 index 00000000..4cfd1cc9 --- /dev/null +++ b/packages/cli/src/demo-progress.test.ts @@ -0,0 +1,228 @@ +import type { MemoryFlowEvent, MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import { describe, expect, it } from 'vitest'; +import { createPlainProgressEmitter, formatMemoryFlowEventLine } from './demo-progress.js'; + +function snapshot(events: MemoryFlowEvent[]): MemoryFlowReplayInput { + return { + runId: 'run-1', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'running', + sourceDir: null, + syncId: 'sync-1', + errors: [], + events, + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }; +} + +describe('formatMemoryFlowEventLine', () => { + it('formats source_acquired in plain English with adapter and file count', () => { + expect( + formatMemoryFlowEventLine({ + type: 'source_acquired', + adapter: 'live-database', + trigger: 'manual_resync', + fileCount: 7, + }), + ).toBe('[connect] Connected live-database - 7 database files (manual_resync)'); + }); + + it('formats diff_computed as a comma-separated breakdown', () => { + expect( + formatMemoryFlowEventLine({ + type: 'diff_computed', + added: 3, + modified: 1, + deleted: 0, + unchanged: 4, + }), + ).toBe('[diff] Tables: +3 new, ~1 changed, =4 unchanged'); + }); + + it('formats diff_computed as "no changes" when every counter is zero', () => { + expect( + formatMemoryFlowEventLine({ + type: 'diff_computed', + added: 0, + modified: 0, + deleted: 0, + unchanged: 0, + }), + ).toBe('[diff] Tables: no changes'); + }); + + it('formats chunks_planned without removals as a single readable sentence', () => { + expect( + formatMemoryFlowEventLine({ + type: 'chunks_planned', + chunkCount: 7, + workUnitCount: 5, + evictionCount: 0, + }), + ).toBe('[plan] Grouped 5 tables into 7 business areas'); + }); + + it('formats chunks_planned with removals when evictions are non-zero', () => { + expect( + formatMemoryFlowEventLine({ + type: 'chunks_planned', + chunkCount: 7, + workUnitCount: 5, + evictionCount: 2, + }), + ).toBe('[plan] Grouped 5 tables into 7 business areas (2 removals)'); + }); + + it('formats work_unit_started in human terms', () => { + expect( + formatMemoryFlowEventLine({ + type: 'work_unit_started', + unitKey: 'revenue-policy', + skills: ['sl_expert', 'wiki_writer'], + stepBudget: 40, + }), + ).toBe('[analyze] Reviewing "revenue-policy" - budget 40 agent steps'); + }); + + it('suppresses noisy work_unit_step events', () => { + expect( + formatMemoryFlowEventLine({ + type: 'work_unit_step', + unitKey: 'revenue-policy', + stepIndex: 3, + stepBudget: 40, + }), + ).toBeNull(); + }); + + it('formats candidate_action with friendly target and arrow', () => { + expect( + formatMemoryFlowEventLine({ + type: 'candidate_action', + unitKey: 'revenue-policy', + target: 'sl', + action: 'created', + key: 'warehouse.revenue', + }), + ).toBe('[draft] revenue-policy -> semantic-layer: created warehouse.revenue'); + }); + + it('formats work_unit_finished with status-aware tag', () => { + expect( + formatMemoryFlowEventLine({ + type: 'work_unit_finished', + unitKey: 'revenue-policy', + status: 'success', + }), + ).toBe('[done] revenue-policy reviewed'); + + expect( + formatMemoryFlowEventLine({ + type: 'work_unit_finished', + unitKey: 'revenue-policy', + status: 'failed', + reason: 'budget exhausted', + }), + ).toBe('[fail] revenue-policy needs attention - budget exhausted'); + }); + + it('formats reconciliation_finished with friendly counter wording', () => { + expect( + formatMemoryFlowEventLine({ + type: 'reconciliation_finished', + conflictCount: 0, + fallbackCount: 0, + }), + ).toBe('[validate] Reconciled drafts - no conflicts, nothing flagged for review'); + + expect( + formatMemoryFlowEventLine({ + type: 'reconciliation_finished', + conflictCount: 2, + fallbackCount: 1, + }), + ).toBe('[validate] Reconciled drafts - 2 conflicts, 1 item flagged for review'); + }); + + it('formats saved with optional shortened commit sha and pluralized memory count', () => { + expect( + formatMemoryFlowEventLine({ + type: 'saved', + commitSha: 'abc1234567890', // pragma: allowlist secret + wikiCount: 2, + slCount: 5, + }), + ).toBe('[memory] Saved 7 memories (2 wiki, 5 semantic-layer) - commit abc1234'); + + expect( + formatMemoryFlowEventLine({ + type: 'saved', + commitSha: null, + wikiCount: 0, + slCount: 1, + }), + ).toBe('[memory] Saved 1 memory (0 wiki, 1 semantic-layer)'); + }); + + it('formats report_created with run id', () => { + expect( + formatMemoryFlowEventLine({ + type: 'report_created', + runId: 'run-xyz', + }), + ).toBe('[report] Run report ready: run-xyz'); + }); +}); + +describe('createPlainProgressEmitter', () => { + it('writes one line per new event and never re-emits prior events', () => { + const written: string[] = []; + const io = { + stdout: { write: (chunk: string) => written.push(chunk), isTTY: false }, + stderr: { write: () => undefined }, + }; + const emit = createPlainProgressEmitter(io); + + emit( + snapshot([ + { type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 7 }, + { type: 'diff_computed', added: 0, modified: 0, deleted: 0, unchanged: 7 }, + ]), + ); + + emit( + snapshot([ + { type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 7 }, + { type: 'diff_computed', added: 0, modified: 0, deleted: 0, unchanged: 7 }, + { type: 'work_unit_started', unitKey: 'revenue-policy', skills: ['sl_expert'], stepBudget: 40 }, + ]), + ); + + expect(written).toEqual([ + '[connect] Connected live-database - 7 database files (manual_resync)\n', + '[diff] Tables: =7 unchanged\n', + '[analyze] Reviewing "revenue-policy" - budget 40 agent steps\n', + ]); + }); + + it('skips suppressed events without advancing visible output', () => { + const written: string[] = []; + const io = { + stdout: { write: (chunk: string) => written.push(chunk), isTTY: false }, + stderr: { write: () => undefined }, + }; + const emit = createPlainProgressEmitter(io); + + emit( + snapshot([ + { type: 'work_unit_step', unitKey: 'a', stepIndex: 1, stepBudget: 40 }, + { type: 'work_unit_step', unitKey: 'a', stepIndex: 2, stepBudget: 40 }, + { type: 'work_unit_finished', unitKey: 'a', status: 'success' }, + ]), + ); + + expect(written).toEqual(['[done] a reviewed\n']); + }); +}); diff --git a/packages/cli/src/demo-progress.ts b/packages/cli/src/demo-progress.ts new file mode 100644 index 00000000..8cce9e87 --- /dev/null +++ b/packages/cli/src/demo-progress.ts @@ -0,0 +1,77 @@ +import type { MemoryFlowEvent, MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import type { KloDemoIo } from './demo.js'; + +function plural(n: number, one: string, many = `${one}s`): string { + return `${n} ${n === 1 ? one : many}`; +} + +function formatDiff(added: number, modified: number, deleted: number, unchanged: number): string { + const parts: string[] = []; + if (added > 0) parts.push(`+${added} new`); + if (modified > 0) parts.push(`~${modified} changed`); + if (deleted > 0) parts.push(`-${deleted} removed`); + if (unchanged > 0) parts.push(`=${unchanged} unchanged`); + return parts.length > 0 ? parts.join(', ') : 'no changes'; +} + +export function formatMemoryFlowEventLine(event: MemoryFlowEvent): string | null { + switch (event.type) { + case 'source_acquired': + return `[connect] Connected ${event.adapter} - ${plural(event.fileCount, 'database file')} (${event.trigger})`; + case 'scope_detected': + return event.fingerprint + ? `[scope] Scope locked: ${event.fingerprint}` + : '[scope] Reviewing the whole warehouse (no scope filter)'; + case 'raw_snapshot_written': + return `[snapshot] Captured snapshot ${event.syncId} - ${plural(event.rawFileCount, 'file')}`; + case 'diff_computed': + return `[diff] Tables: ${formatDiff(event.added, event.modified, event.deleted, event.unchanged)}`; + case 'chunks_planned': + return event.evictionCount > 0 + ? `[plan] Grouped ${plural(event.workUnitCount, 'table')} into ${plural(event.chunkCount, 'business area')} (${plural(event.evictionCount, 'removal')})` + : `[plan] Grouped ${plural(event.workUnitCount, 'table')} into ${plural(event.chunkCount, 'business area')}`; + case 'stage_skipped': + return `[skip] ${event.stage} skipped: ${event.reason}`; + case 'work_unit_started': + return `[analyze] Reviewing "${event.unitKey}" - budget ${plural(event.stepBudget, 'agent step')}`; + case 'work_unit_step': + return null; + case 'candidate_action': { + const target = event.target === 'sl' ? 'semantic-layer' : 'wiki'; + return `[draft] ${event.unitKey} -> ${target}: ${event.action} ${event.key}`; + } + case 'work_unit_finished': + if (event.status === 'success') { + return `[done] ${event.unitKey} reviewed`; + } + return `[fail] ${event.unitKey} needs attention${event.reason ? ` - ${event.reason}` : ''}`; + case 'reconciliation_finished': { + const conflicts = event.conflictCount === 0 ? 'no conflicts' : plural(event.conflictCount, 'conflict'); + const fallbacks = event.fallbackCount === 0 ? 'nothing flagged for review' : `${plural(event.fallbackCount, 'item')} flagged for review`; + return `[validate] Reconciled drafts - ${conflicts}, ${fallbacks}`; + } + case 'saved': { + const total = event.wikiCount + event.slCount; + const commit = event.commitSha ? ` - commit ${event.commitSha.slice(0, 7)}` : ''; + return `[memory] Saved ${plural(total, 'memory', 'memories')} (${event.wikiCount} wiki, ${event.slCount} semantic-layer)${commit}`; + } + case 'provenance_recorded': + return `[trace] Recorded provenance for ${plural(event.rowCount, 'row')}`; + case 'report_created': + return `[report] Run report ready: ${event.runId}`; + } +} + +export function createPlainProgressEmitter(io: KloDemoIo): (snapshot: MemoryFlowReplayInput) => void { + let printed = 0; + return (snapshot) => { + while (printed < snapshot.events.length) { + const event = snapshot.events[printed++]; + if (!event) continue; + const line = formatMemoryFlowEventLine(event); + if (line !== null) { + io.stdout.write(`${line}\n`); + } + } + }; +} diff --git a/packages/cli/src/demo-replay-store.test.ts b/packages/cli/src/demo-replay-store.test.ts new file mode 100644 index 00000000..9422fcbf --- /dev/null +++ b/packages/cli/src/demo-replay-store.test.ts @@ -0,0 +1,60 @@ +import { mkdtemp, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { type MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import { describe, expect, it } from 'vitest'; +import { DEMO_LATEST_REPLAY_FILE, loadLatestDemoReplay, writeDemoReplay } from './demo-replay-store.js'; + +function replay(overrides: Partial = {}): MemoryFlowReplayInput { + return { + metadata: { + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: 'report-1', + sourceReportPath: 'report-1', + fallbackReason: null, + }, + runId: 'run-1', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'done', + sourceDir: null, + syncId: 'sync-1', + reportId: 'report-1', + reportPath: 'report-1', + errors: [], + events: [{ type: 'report_created', runId: 'run-1', reportPath: 'report-1', emittedAt: '2026-05-01T10:00:03.000Z' }], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + ...overrides, + }; +} + +describe('demo replay store', () => { + it('writes a versioned replay file and updates latest', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-demo-replay-store-')); + + const saved = await writeDemoReplay(projectDir, replay(), { label: 'full' }); + + expect(saved.replayPath).toMatch(/replays[/\\]full-run-1.memory-flow.v1.json$/); + expect(saved.latestReplayPath).toBe(join(projectDir, 'replays', DEMO_LATEST_REPLAY_FILE)); + expect(await loadLatestDemoReplay(projectDir)).toMatchObject({ + runId: 'run-1', + metadata: { mode: 'full', origin: 'captured', timing: 'captured' }, + }); + + const wrapper = JSON.parse(await readFile(saved.latestReplayPath, 'utf-8')) as { + memoryFlowReplaySchemaVersion?: number; + }; + expect(wrapper.memoryFlowReplaySchemaVersion).toBe(1); + }); + + it('returns null when no latest local replay exists', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-demo-replay-store-empty-')); + + await expect(loadLatestDemoReplay(projectDir)).resolves.toBeNull(); + }); +}); diff --git a/packages/cli/src/demo-replay-store.ts b/packages/cli/src/demo-replay-store.ts new file mode 100644 index 00000000..61a9b5bb --- /dev/null +++ b/packages/cli/src/demo-replay-store.ts @@ -0,0 +1,68 @@ +import { constants as fsConstants } from 'node:fs'; +import { access, copyFile, mkdir, readFile, writeFile } from 'node:fs/promises'; +import { join, resolve } from 'node:path'; +import { parseMemoryFlowReplayInput, type MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; + +interface StoredMemoryFlowReplayFile { + memoryFlowReplaySchemaVersion: 1; + replay: unknown; +} + +interface SavedDemoReplay { + replayPath: string; + latestReplayPath: string; +} + +export const DEMO_LATEST_REPLAY_FILE = 'latest.memory-flow.v1.json'; + +async function exists(path: string): Promise { + try { + await access(path, fsConstants.F_OK); + return true; + } catch { + return false; + } +} + +function safeReplayName(value: string): string { + return value.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, '') || 'replay'; +} + +function demoReplayFileName(input: MemoryFlowReplayInput, label: string): string { + return `${safeReplayName(label)}-${safeReplayName(input.runId)}.memory-flow.v1.json`; +} + +function wrapReplay(input: MemoryFlowReplayInput): StoredMemoryFlowReplayFile { + return { memoryFlowReplaySchemaVersion: 1, replay: input }; +} + +export async function loadDemoReplayFile(path: string): Promise { + const parsed = JSON.parse(await readFile(path, 'utf-8')) as StoredMemoryFlowReplayFile; + if (parsed.memoryFlowReplaySchemaVersion !== 1) { + throw new Error(`Unsupported demo replay schema version in ${path}`); + } + return parseMemoryFlowReplayInput(parsed.replay); +} + +export async function loadLatestDemoReplay(projectDir: string): Promise { + const latestPath = join(resolve(projectDir), 'replays', DEMO_LATEST_REPLAY_FILE); + if (!(await exists(latestPath))) { + return null; + } + return loadDemoReplayFile(latestPath); +} + +export async function writeDemoReplay( + projectDir: string, + input: MemoryFlowReplayInput, + options: { label: 'full' | 'deterministic' | 'seeded' }, +): Promise { + const replayDir = join(resolve(projectDir), 'replays'); + await mkdir(replayDir, { recursive: true }); + const replayPath = join(replayDir, demoReplayFileName(input, options.label)); + const latestReplayPath = join(replayDir, DEMO_LATEST_REPLAY_FILE); + const body = `${JSON.stringify(wrapReplay(input), null, 2)}\n`; + await writeFile(replayPath, body, 'utf-8'); + await copyFile(replayPath, latestReplayPath); + return { replayPath, latestReplayPath }; +} diff --git a/packages/cli/src/demo-scan.test.ts b/packages/cli/src/demo-scan.test.ts new file mode 100644 index 00000000..e33334a9 --- /dev/null +++ b/packages/cli/src/demo-scan.test.ts @@ -0,0 +1,31 @@ +import { rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { findLatestDemoScanReport, runDemoScan } from './demo-scan.js'; + +describe('demo scan helpers', () => { + const projectDir = join(tmpdir(), `klo-demo-scan-${process.pid}`); + + afterEach(async () => { + await rm(projectDir, { recursive: true, force: true }); + }); + + it('runs the packaged SQLite demo scan and finds the latest scan report', async () => { + const { result } = await runDemoScan({ + projectDir, + jobId: 'demo-scan-test', + now: () => new Date('2026-05-06T10:00:00.000Z'), + }); + + expect(result.report).toMatchObject({ + connectionId: 'orbit_demo', + driver: 'sqlite', + runId: 'demo-scan-test', + mode: 'structural', + dryRun: false, + }); + expect(result.report.artifactPaths.reportPath).toContain('raw-sources/orbit_demo/live-database/'); + await expect(findLatestDemoScanReport(projectDir)).resolves.toMatchObject({ runId: 'demo-scan-test' }); + }); +}); diff --git a/packages/cli/src/demo-scan.ts b/packages/cli/src/demo-scan.ts new file mode 100644 index 00000000..1237d328 --- /dev/null +++ b/packages/cli/src/demo-scan.ts @@ -0,0 +1,223 @@ +import { getLocalIngestStatus, type IngestReportSnapshot, type MemoryFlowReplayInput } from '@klo/context/ingest'; +import { loadKloProject, type KloLocalProject } from '@klo/context/project'; +import { runLocalScan, type KloScanReport, type LocalScanRunResult } from '@klo/context/scan'; +import { DEMO_ADAPTER, DEMO_CONNECTION_ID, DEMO_FULL_JOB_ID, ensureDemoProject } from './demo-assets.js'; +import { loadLatestDemoReplay } from './demo-replay-store.js'; +import { createKloCliLocalIngestAdapters } from './local-adapters.js'; + +interface DemoScanOptions { + projectDir: string; + jobId?: string; + now?: () => Date; + runLocalScan?: typeof runLocalScan; +} + +interface DemoScanResult { + project: KloLocalProject; + result: LocalScanRunResult; +} + +interface DemoInspectSummary { + projectDir: string; + scanReport: KloScanReport | null; + fullReport: IngestReportSnapshot | null; + semanticLayerFileCount: number; + knowledgeFileCount: number; + replayFileCount: number; + latestReplay: MemoryFlowReplayInput | null; +} + +interface DemoInspectDeps { + findFullReport?: (project: KloLocalProject) => Promise; +} + +async function ensureDemoProjectForReuse(projectDir: string): Promise { + await ensureDemoProject({ projectDir, force: false }).catch((error) => { + if (error instanceof Error && error.message.includes('Demo project already exists')) { + return; + } + throw error; + }); +} + +async function loadReadyDemoProject(projectDir: string): Promise { + try { + return await loadKloProject({ projectDir }); + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + throw new Error( + `Demo project is not ready at ${projectDir}: ${reason}. Run klo setup demo init --project-dir ${projectDir} --force --no-input to recreate it.`, + ); + } +} + +function reportDiff(report: KloScanReport): string { + return `+${report.diffSummary.tablesAdded}/~${report.diffSummary.tablesModified}/-${report.diffSummary.tablesDeleted}/=${report.diffSummary.tablesUnchanged}`; +} + +function jsonReport(raw: string, path: string): KloScanReport { + try { + return JSON.parse(raw) as KloScanReport; + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + throw new Error(`Invalid demo scan report at ${path}: ${reason}`); + } +} + +async function countFiles(project: KloLocalProject, root: string, predicate: (path: string) => boolean): Promise { + const { files } = await project.fileStore.listFiles(root, true); + return files.filter(predicate).length; +} + +async function findFullDemoReport(project: KloLocalProject): Promise { + return getLocalIngestStatus(project, DEMO_FULL_JOB_ID); +} + +function savedCounts(report: IngestReportSnapshot): { wikiCount: number; slCount: number } { + const actions = report.body.workUnits.flatMap((workUnit) => workUnit.actions); + return { + wikiCount: actions.filter((action) => action.target === 'wiki').length, + slCount: actions.filter((action) => action.target === 'sl').length, + }; +} + +export async function runDemoScan(options: DemoScanOptions): Promise { + await ensureDemoProjectForReuse(options.projectDir); + const project = await loadReadyDemoProject(options.projectDir); + const executeScan = options.runLocalScan ?? runLocalScan; + const result = await executeScan({ + project, + connectionId: DEMO_CONNECTION_ID, + mode: 'structural', + trigger: 'cli', + jobId: options.jobId ?? 'demo-scan', + now: options.now, + adapters: createKloCliLocalIngestAdapters(project), + }); + + return { project, result }; +} + +export async function findLatestDemoScanReport(projectDir: string): Promise { + const project = await loadReadyDemoProject(projectDir); + const root = `raw-sources/${DEMO_CONNECTION_ID}/${DEMO_ADAPTER}`; + const { files } = await project.fileStore.listFiles(root, true); + const latest = files + .filter((path) => path.endsWith('/scan-report.json')) + .sort() + .at(-1); + if (!latest) { + return null; + } + + const reportPath = `${root}/${latest}`; + const report = await project.fileStore.readFile(reportPath); + return jsonReport(report.content, reportPath); +} + +export async function inspectDemoProject( + projectDir: string, + projectOverride?: KloLocalProject, + deps: DemoInspectDeps = {}, +): Promise { + const project = projectOverride ?? (await loadReadyDemoProject(projectDir)); + const scanReport = await findLatestDemoScanReport(project.projectDir); + const fullReport = await (deps.findFullReport ?? findFullDemoReport)(project); + const semanticLayerFileCount = await countFiles( + project, + `semantic-layer/${DEMO_CONNECTION_ID}`, + (path) => path.endsWith('.yaml') || path.endsWith('.yml'), + ); + const knowledgeFileCount = await countFiles(project, 'knowledge', (path) => path.endsWith('.md')); + const replayFileCount = await countFiles(project, 'replays', (path) => path.endsWith('.json')); + const latestReplay = await loadLatestDemoReplay(project.projectDir); + + return { + projectDir: project.projectDir, + scanReport, + fullReport, + semanticLayerFileCount, + knowledgeFileCount, + replayFileCount, + latestReplay, + }; +} + +export function formatDemoScanSummary(report: KloScanReport): string { + return [ + 'Demo scan: done', + `Connection: ${report.connectionId}`, + `Driver: ${report.driver}`, + `Mode: ${report.mode}`, + `Tables: ${reportDiff(report)}`, + `Semantic-layer artifacts: ${report.artifactPaths.manifestShards.length}`, + `Report: ${report.artifactPaths.reportPath ?? 'none'}`, + 'Next: klo setup demo inspect', + ' Shows the files and semantic-layer draft created from the database scan.', + '', + ].join('\n'); +} + +function replayLine(replay: MemoryFlowReplayInput | null): string { + if (!replay?.metadata) { + return 'Latest replay: packaged demo replay'; + } + return `Latest replay: ${replay.metadata.mode} (${replay.metadata.origin}, ${replay.metadata.timing})`; +} + +export function formatDemoInspect(summary: DemoInspectSummary): string { + const report = summary.scanReport; + const fullReport = summary.fullReport; + const fullCounts = fullReport ? savedCounts(fullReport) : null; + const scanLines = report + ? [ + 'Scan artifacts: yes', + `Connection: ${report.connectionId}`, + `Driver: ${report.driver}`, + `Tables: ${reportDiff(report)}`, + `Report: ${report.artifactPaths.reportPath ?? 'none'}`, + ] + : ['Scan artifacts: none']; + + const memoryLines = fullReport + ? [ + 'Memory synthesis: ran', + `Full report: ${fullReport.id}`, + `Full run: ${fullReport.runId}`, + `Saved memory: ${fullCounts?.wikiCount ?? 0} wiki, ${fullCounts?.slCount ?? 0} semantic layer`, + `Provenance rows: ${fullReport.body.provenanceRows.length}`, + ] + : [report ? 'Memory synthesis: full mode not run' : 'Memory synthesis: not run']; + const next = fullReport + ? [ + `Next: klo ingest watch ${fullReport.runId} --project-dir ${summary.projectDir}`, + ' Opens the captured run timeline and lets you inspect what happened.', + 'Next: klo setup demo replay', + ' Replays the same visual story without calling the LLM again.', + ] + : report + ? [ + 'Next: klo setup demo --mode full', + ' Runs the full AI-backed pass with your LLM provider.', + 'Next: klo setup demo replay', + ' Replays the packaged visual story without calling the LLM.', + ] + : [ + 'Next: klo setup demo --no-input', + ' Runs the pre-seeded demo without calling the LLM.', + 'Next: klo setup demo --mode full', + ' Runs the full AI-backed pass with your LLM provider.', + ]; + + return [ + `Demo project: ${summary.projectDir}`, + ...scanLines, + `Semantic-layer files: ${summary.semanticLayerFileCount}`, + `Knowledge files: ${summary.knowledgeFileCount}`, + `Replay files: ${summary.replayFileCount}`, + replayLine(summary.latestReplay), + ...memoryLines, + ...next, + '', + ].join('\n'); +} diff --git a/packages/cli/src/demo-seeded-inspect.test.ts b/packages/cli/src/demo-seeded-inspect.test.ts new file mode 100644 index 00000000..3e0fc3ee --- /dev/null +++ b/packages/cli/src/demo-seeded-inspect.test.ts @@ -0,0 +1,123 @@ +import { access, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { runDemoSeeded } from './demo-seeded.js'; +import { formatSeededInspect, inspectSeededProject } from './demo-seeded-inspect.js'; +import { KLO_NEXT_STEP_COMMANDS } from './next-steps.js'; + +describe('seeded demo inspect contract', () => { + const projectDir = join(tmpdir(), `klo-demo-seeded-inspect-${process.pid}`); + + afterEach(async () => { + await rm(projectDir, { recursive: true, force: true }); + }); + + it('reports the PRD source inventory, generated outputs, status, metadata, and next commands', async () => { + await runDemoSeeded({ projectDir }); + const inspect = await inspectSeededProject(projectDir); + + expect(inspect).toMatchObject({ + projectDir, + mode: 'seeded', + status: { status: 'ready', missing: [] }, + modeMetadata: { + mode: 'seeded', + source: 'packaged demo project', + generatedContext: 'prebuilt from bundled assets', + llmCalls: 'none', + origin: 'packaged', + timing: 'prebuilt', + sourceReportId: 'demo-seeded-report', + sourceReportPath: 'reports/seeded-demo-report.json', + }, + sourceBundle: { + warehouse: { + label: 'Warehouse', + path: 'demo.db', + tableCount: 8, + totalRows: 11234, + rowCounts: { + accounts: 210, + arr_movements: 720, + contracts: 320, + invoices: 3000, + plans: 4, + purchase_requests: 5200, + support_tickets: 520, + users: 1260, + }, + }, + dbt: { label: 'dbt', path: 'raw-sources/dbt', modelCount: 3, sourceTableCount: 8 }, + bi: { label: 'BI', path: 'raw-sources/bi', exploreCount: 5, dashboardCount: 2 }, + notion: { label: 'Notion', path: 'raw-sources/notion', pageCount: 8 }, + }, + generatedOutputs: { + semanticLayer: { path: 'semantic-layer/orbit_demo', manifestSourceCount: 6, fileCount: 6 }, + knowledge: { path: 'knowledge/global', manifestPageCount: 10, fileCount: 10 }, + links: { path: 'links/provenance.json', manifestLinkCount: 23, linkCount: 23 }, + reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 }, + replays: { primaryPath: 'replays/replay.memory-flow.v1.json', latestPath: 'replays/latest.memory-flow.v1.json' }, + }, + nextCommands: KLO_NEXT_STEP_COMMANDS, + }); + + expect(inspect.generatedOutputs.replays.fileCount).toBeGreaterThanOrEqual(3); + await expect(access(join(projectDir, inspect.generatedOutputs.reports.primaryPath))).resolves.toBeUndefined(); + await expect(access(join(projectDir, inspect.generatedOutputs.replays.primaryPath))).resolves.toBeUndefined(); + await expect(access(join(projectDir, inspect.generatedOutputs.replays.latestPath))).resolves.toBeUndefined(); + }); + + it('formats seeded inspect from the normalized contract', async () => { + await runDemoSeeded({ projectDir }); + const output = formatSeededInspect(await inspectSeededProject(projectDir)); + + expect(output).toContain(`Demo project: ${projectDir}`); + expect(output).toContain('Status: ready'); + expect(output).toContain('Mode: seeded (pre-seeded demo project)'); + expect(output).toContain('Source: packaged demo project'); + expect(output).toContain('Generated context: prebuilt from bundled assets'); + expect(output).toContain('LLM calls: none'); + expect(output).toContain('Warehouse: 8 tables, 11,234 rows'); + expect(output).toContain('Rows: accounts 210, arr_movements 720, contracts 320, invoices 3000'); + expect(output).toContain('dbt: 3 models, 8 source tables'); + expect(output).toContain('BI: 5 explores, 2 dashboards'); + expect(output).toContain('Notion: 8 pages'); + expect(output).toContain('Semantic-layer sources: 6 manifest, 6 files'); + expect(output).toContain('Knowledge pages: 10 manifest, 10 files'); + expect(output).toContain('Evidence links: 23 manifest, 23 links'); + expect(output).toContain('Report: reports/seeded-demo-report.json'); + expect(output).toContain('Replay: replays/replay.memory-flow.v1.json'); + expect(output).toContain('Latest replay: seeded (packaged, prebuilt)'); + expect(output).toContain(' $ klo agent tools --json'); + expect(output).toContain(' $ klo agent context --json'); + expect(output).toContain(' $ klo serve --mcp stdio --user-id local'); + expect(output.indexOf('klo agent tools --json')).toBeLessThan( + output.indexOf('klo serve --mcp stdio --user-id local'), + ); + expect(output).not.toContain('klo ask'); + expect(output).not.toContain('deterministic mode'); + }); + + it('reports missing seeded paths without reading stale counts as ready', async () => { + await runDemoSeeded({ projectDir }); + await rm(join(projectDir, 'links', 'provenance.json')); + + const inspect = await inspectSeededProject(projectDir); + + expect(inspect.status).toEqual({ status: 'corrupt', missing: ['links/provenance.json'] }); + expect(formatSeededInspect(inspect)).toContain('Status: corrupt'); + expect(formatSeededInspect(inspect)).toContain('Missing: links/provenance.json'); + }); + + it('keeps provenance link counts tied to the project file', async () => { + await runDemoSeeded({ projectDir }); + + const inspect = await inspectSeededProject(projectDir); + const raw = await readFile(join(projectDir, 'links', 'provenance.json'), 'utf-8'); + const links = JSON.parse(raw) as unknown[]; + + expect(inspect.generatedOutputs.links.linkCount).toBe(links.length); + expect(inspect.generatedOutputs.links.linkCount).toBe(23); + }); +}); diff --git a/packages/cli/src/demo-seeded-inspect.ts b/packages/cli/src/demo-seeded-inspect.ts new file mode 100644 index 00000000..ea9ebe79 --- /dev/null +++ b/packages/cli/src/demo-seeded-inspect.ts @@ -0,0 +1,299 @@ +import { constants as fsConstants } from 'node:fs'; +import { access, readFile, readdir } from 'node:fs/promises'; +import { join, resolve } from 'node:path'; +import type { MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import { loadPackagedDemoReplay } from './demo-assets.js'; +import { DEMO_LATEST_REPLAY_FILE, loadLatestDemoReplay } from './demo-replay-store.js'; +import { KLO_NEXT_STEP_COMMANDS, KLO_NEXT_STEP_COMMAND_WIDTH } from './next-steps.js'; + +type SeededInspectReadiness = 'missing' | 'ready' | 'corrupt'; + +export interface DemoSeededManifest { + demoAssetSchemaVersion: number; + name: string; + displayName: string; + mode: string; + source?: string; + sources: { + warehouse: { label: string; path?: string; tables: number; rowCounts: Record }; + dbt: { label: string; path?: string; models: number; sourceTables: number }; + bi: { label: string; path?: string; explores: number; dashboards: number }; + notion: { label: string; path?: string; pages: number }; + }; + generated: { + semanticLayer: { path?: string; sourceCount: number }; + knowledge: { path?: string; pageCount: number }; + links: { path?: string; linkCount: number }; + }; +} + +export interface SeededInspectSummary { + projectDir: string; + mode: 'seeded'; + manifest: DemoSeededManifest; + status: { status: SeededInspectReadiness; missing: string[] }; + sourceBundle: { + warehouse: { + label: string; + path: string; + tableCount: number; + rowCounts: Record; + totalRows: number; + }; + dbt: { label: string; path: string; modelCount: number; sourceTableCount: number }; + bi: { label: string; path: string; exploreCount: number; dashboardCount: number }; + notion: { label: string; path: string; pageCount: number }; + }; + generatedOutputs: { + semanticLayer: { path: string; manifestSourceCount: number; fileCount: number }; + knowledge: { path: string; manifestPageCount: number; fileCount: number }; + links: { path: string; manifestLinkCount: number; linkCount: number }; + reports: { primaryPath: string; fileCount: number }; + replays: { primaryPath: string; latestPath: string; fileCount: number }; + }; + modeMetadata: { + mode: 'seeded'; + source: 'packaged demo project'; + generatedContext: 'prebuilt from bundled assets'; + llmCalls: 'none'; + origin: string; + timing: string; + sourceReportId: string | null; + sourceReportPath: string | null; + }; + nextCommands: Array<{ command: string; description: string }>; + latestReplay: MemoryFlowReplayInput | null; +} + +const REQUIRED_SEEDED_PROJECT_PATHS = [ + 'klo.yaml', + 'demo.db', + 'state.sqlite', + 'manifest.json', + join('replays', 'replay.memory-flow.v1.json'), + join('raw-sources', 'warehouse', 'accounts.csv'), + join('raw-sources', 'dbt', 'schema.yml'), + join('raw-sources', 'bi', 'revenue_exec.dashboard.lookml'), + join('raw-sources', 'notion', 'revenue-reporting-policy.md'), + join('semantic-layer', 'orbit_demo', 'accounts.yaml'), + join('knowledge', 'global', 'arr-contract-first.md'), + join('links', 'provenance.json'), + join('reports', 'seeded-demo-report.json'), +] as const; + +async function exists(path: string): Promise { + try { + await access(path, fsConstants.F_OK); + return true; + } catch { + return false; + } +} + +async function loadSeededManifest(projectDir: string): Promise { + const raw = await readFile(join(projectDir, 'manifest.json'), 'utf-8'); + return JSON.parse(raw) as DemoSeededManifest; +} + +async function listFilesInDir(dir: string, ext?: string): Promise { + try { + const entries = await readdir(dir, { recursive: true }); + return entries + .filter((entry): entry is string => typeof entry === 'string') + .filter((entry) => !ext || entry.endsWith(ext)) + .sort(); + } catch { + return []; + } +} + +async function inspectSeededProjectStatus(projectDir: string): Promise<{ status: SeededInspectReadiness; missing: string[] }> { + const missing: string[] = []; + for (const relativePath of REQUIRED_SEEDED_PROJECT_PATHS) { + if (!(await exists(join(projectDir, relativePath)))) { + missing.push(relativePath); + } + } + + if (missing.length === REQUIRED_SEEDED_PROJECT_PATHS.length) { + return { status: 'missing', missing }; + } + if (missing.length > 0) { + return { status: 'corrupt', missing }; + } + return { status: 'ready', missing: [] }; +} + +async function loadLinksCount(projectDir: string): Promise { + try { + const raw = await readFile(join(projectDir, 'links', 'provenance.json'), 'utf-8'); + const links = JSON.parse(raw) as unknown[]; + return links.length; + } catch { + return 0; + } +} + +async function loadSeededReplay(projectDir: string): Promise { + const latest = await loadLatestDemoReplay(projectDir); + if (latest) { + return latest; + } + + try { + return await loadPackagedDemoReplay(); + } catch { + return null; + } +} + +function sourceBundleFromManifest(manifest: DemoSeededManifest): SeededInspectSummary['sourceBundle'] { + const warehouse = manifest.sources.warehouse; + const rowCounts = Object.fromEntries(Object.entries(warehouse.rowCounts).sort(([a], [b]) => a.localeCompare(b))); + const totalRows = Object.values(rowCounts).reduce((total, count) => total + count, 0); + + return { + warehouse: { + label: warehouse.label, + path: warehouse.path ?? 'demo.db', + tableCount: warehouse.tables, + rowCounts, + totalRows, + }, + dbt: { + label: manifest.sources.dbt.label, + path: manifest.sources.dbt.path ?? 'raw-sources/dbt', + modelCount: manifest.sources.dbt.models, + sourceTableCount: manifest.sources.dbt.sourceTables, + }, + bi: { + label: manifest.sources.bi.label, + path: manifest.sources.bi.path ?? 'raw-sources/bi', + exploreCount: manifest.sources.bi.explores, + dashboardCount: manifest.sources.bi.dashboards, + }, + notion: { + label: manifest.sources.notion.label, + path: manifest.sources.notion.path ?? 'raw-sources/notion', + pageCount: manifest.sources.notion.pages, + }, + }; +} + +function nextCommands(): SeededInspectSummary['nextCommands'] { + return [...KLO_NEXT_STEP_COMMANDS]; +} + +function modeMetadataFromReplay(replay: MemoryFlowReplayInput | null): SeededInspectSummary['modeMetadata'] { + return { + mode: 'seeded', + source: 'packaged demo project', + generatedContext: 'prebuilt from bundled assets', + llmCalls: 'none', + origin: replay?.metadata?.origin ?? 'packaged', + timing: replay?.metadata?.timing ?? 'prebuilt', + sourceReportId: replay?.metadata?.sourceReportId ?? 'demo-seeded-report', + sourceReportPath: replay?.metadata?.sourceReportPath ?? 'reports/seeded-demo-report.json', + }; +} + +export async function inspectSeededProject(projectDir: string): Promise { + const root = resolve(projectDir); + const manifest = await loadSeededManifest(root); + const latestReplay = await loadSeededReplay(root); + const semanticLayerPath = manifest.generated.semanticLayer.path ?? 'semantic-layer/orbit_demo'; + const knowledgePath = manifest.generated.knowledge.path ?? 'knowledge/global'; + const linksPath = join(manifest.generated.links.path ?? 'links', 'provenance.json'); + const reportFiles = await listFilesInDir(join(root, 'reports'), '.json'); + const replayFiles = await listFilesInDir(join(root, 'replays'), '.json'); + + return { + projectDir: root, + mode: 'seeded', + manifest, + status: await inspectSeededProjectStatus(root), + sourceBundle: sourceBundleFromManifest(manifest), + generatedOutputs: { + semanticLayer: { + path: semanticLayerPath, + manifestSourceCount: manifest.generated.semanticLayer.sourceCount, + fileCount: (await listFilesInDir(join(root, semanticLayerPath), '.yaml')).length, + }, + knowledge: { + path: knowledgePath, + manifestPageCount: manifest.generated.knowledge.pageCount, + fileCount: (await listFilesInDir(join(root, knowledgePath), '.md')).length, + }, + links: { + path: linksPath, + manifestLinkCount: manifest.generated.links.linkCount, + linkCount: await loadLinksCount(root), + }, + reports: { + primaryPath: reportFiles[0] ? join('reports', reportFiles[0]) : 'reports/seeded-demo-report.json', + fileCount: reportFiles.length, + }, + replays: { + primaryPath: join('replays', 'replay.memory-flow.v1.json'), + latestPath: join('replays', DEMO_LATEST_REPLAY_FILE), + fileCount: replayFiles.length, + }, + }, + modeMetadata: modeMetadataFromReplay(latestReplay), + nextCommands: nextCommands(), + latestReplay, + }; +} + +function rowCountPreview(rowCounts: Record): string { + return Object.entries(rowCounts) + .map(([name, count]) => `${name} ${count}`) + .join(', '); +} + +function replayLine(summary: SeededInspectSummary): string { + const metadata = summary.latestReplay?.metadata ?? summary.modeMetadata; + return `Latest replay: ${metadata.mode} (${metadata.origin}, ${metadata.timing})`; +} + +export function formatSeededInspect(summary: SeededInspectSummary): string { + const source = summary.sourceBundle; + const generated = summary.generatedOutputs; + const lines = [`Demo project: ${summary.projectDir}`, `Status: ${summary.status.status}`]; + + if (summary.status.missing.length > 0) { + lines.push(`Missing: ${summary.status.missing.join(', ')}`); + } + + lines.push( + `Mode: seeded (pre-seeded demo project)`, + `Source: ${summary.modeMetadata.source}`, + `Generated context: ${summary.modeMetadata.generatedContext}`, + `LLM calls: ${summary.modeMetadata.llmCalls}`, + '', + 'Source bundle:', + ` Warehouse: ${source.warehouse.tableCount} tables, ${source.warehouse.totalRows.toLocaleString()} rows`, + ` Rows: ${rowCountPreview(source.warehouse.rowCounts)}`, + ` dbt: ${source.dbt.modelCount} models, ${source.dbt.sourceTableCount} source tables`, + ` BI: ${source.bi.exploreCount} explores, ${source.bi.dashboardCount} dashboards`, + ` Notion: ${source.notion.pageCount} pages`, + '', + 'Generated context:', + ` Semantic-layer sources: ${generated.semanticLayer.manifestSourceCount} manifest, ${generated.semanticLayer.fileCount} files`, + ` Knowledge pages: ${generated.knowledge.manifestPageCount} manifest, ${generated.knowledge.fileCount} files`, + ` Evidence links: ${generated.links.manifestLinkCount} manifest, ${generated.links.linkCount} links`, + '', + `Report: ${generated.reports.primaryPath}`, + `Replay: ${generated.replays.primaryPath}`, + replayLine(summary), + '', + 'What to do next:', + ); + + for (const command of summary.nextCommands) { + lines.push(` $ ${command.command.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} ${command.description}`); + } + + lines.push('', `Your KLO project files are at: ${summary.projectDir}`, ''); + return lines.join('\n'); +} diff --git a/packages/cli/src/demo-seeded.test.ts b/packages/cli/src/demo-seeded.test.ts new file mode 100644 index 00000000..a4004bb7 --- /dev/null +++ b/packages/cli/src/demo-seeded.test.ts @@ -0,0 +1,117 @@ +import { access, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { ensureSeededDemoProject } from './demo-assets.js'; +import { runDemoSeeded } from './demo-seeded.js'; + +describe('demo seeded mode', () => { + const projectDir = join(tmpdir(), `klo-demo-seeded-${process.pid}`); + + afterEach(async () => { + await rm(projectDir, { recursive: true, force: true }); + }); + + it('hydrates a complete seeded project with all asset directories', async () => { + const result = await ensureSeededDemoProject({ projectDir, force: false }); + + expect(result.projectDir).toBe(projectDir); + await expect(access(join(projectDir, 'demo.db'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'klo.yaml'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'manifest.json'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'semantic-layer/orbit_demo/accounts.yaml'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'knowledge/global/arr-contract-first.md'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'raw-sources/dbt/schema.yml'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'raw-sources/bi/revenue_exec.dashboard.lookml'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'raw-sources/notion/revenue-reporting-policy.md'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'links/provenance.json'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'reports/seeded-demo-report.json'))).resolves.toBeUndefined(); + }); + + it('does not load or call any LLM provider in seeded mode', async () => { + const result = await runDemoSeeded({ projectDir }); + + expect(result.replay.metadata?.mode).toBe('seeded'); + expect(result.replay.metadata?.timing).toBe('prebuilt'); + expect(result.inspect.mode).toBe('seeded'); + + const config = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(config).toContain('api_key: env:ANTHROPIC_API_KEY'); + expect(config).not.toContain('sk-ant-'); + }); + + it('creates the project under /tmp by default', async () => { + const result = await runDemoSeeded({ projectDir }); + expect(result.projectDir).toBe(projectDir); + }); + + it('replay metadata identifies mode honestly', async () => { + const result = await runDemoSeeded({ projectDir }); + + expect(result.replay.metadata).toMatchObject({ + mode: 'seeded', + origin: 'packaged', + timing: 'prebuilt', + }); + expect(result.replay.runId).toBe('demo-seeded-orbit'); + }); + + it('packaged seeded replay is honest and shows every source family', async () => { + const result = await runDemoSeeded({ projectDir }); + const sourceEvents = result.replay.events.filter((event) => event.type === 'source_acquired'); + const adapters = sourceEvents.map((event) => event.adapter).sort(); + + expect(result.replay.metadata).toMatchObject({ + mode: 'seeded', + origin: 'packaged', + timing: 'prebuilt', + sourceReportPath: 'reports/seeded-demo-report.json', + }); + expect(adapters).toEqual(['dbt_descriptions', 'live-database', 'looker', 'notion']); + expect(result.replay.events).not.toContainEqual( + expect.objectContaining({ type: 'stage_skipped', reason: expect.stringContaining('deterministic') }), + ); + expect(JSON.stringify(result.replay)).not.toContain('LLM ran'); + }); + + it('seeded animation shows all demo source families', async () => { + const result = await runDemoSeeded({ projectDir }); + const adapters = result.replay.events + .filter((e) => e.type === 'source_acquired') + .map((e) => (e as { adapter: string }).adapter); + + expect(adapters).toContain('live-database'); + expect(adapters).toContain('dbt_descriptions'); + expect(adapters).toContain('looker'); + expect(adapters).toContain('notion'); + }); + + it('SL YAML validates correctly', async () => { + await ensureSeededDemoProject({ projectDir, force: false }); + const slYaml = await readFile(join(projectDir, 'semantic-layer/orbit_demo/accounts.yaml'), 'utf-8'); + expect(slYaml).toContain('name: accounts'); + expect(slYaml).toContain('grain:'); + expect(slYaml).toContain('columns:'); + expect(slYaml).toContain('measures:'); + expect(slYaml).toContain('joins:'); + }); + + it('wiki pages have valid frontmatter', async () => { + await ensureSeededDemoProject({ projectDir, force: false }); + const wiki = await readFile(join(projectDir, 'knowledge/global/arr-contract-first.md'), 'utf-8'); + expect(wiki).toContain('---'); + expect(wiki).toContain('summary:'); + expect(wiki).toContain('tags:'); + expect(wiki).toContain('sl_refs:'); + expect(wiki).toContain('usage_mode: auto'); + }); + + it('links are searchable through provenance file', async () => { + await ensureSeededDemoProject({ projectDir, force: false }); + const raw = await readFile(join(projectDir, 'links/provenance.json'), 'utf-8'); + const links = JSON.parse(raw) as Array<{ id: string; artifactKind: string }>; + expect(links.length).toBe(23); + expect(links.some((l) => l.artifactKind === 'wiki')).toBe(true); + expect(links.some((l) => l.artifactKind === 'sl')).toBe(true); + }); +}); diff --git a/packages/cli/src/demo-seeded.ts b/packages/cli/src/demo-seeded.ts new file mode 100644 index 00000000..e14e36f1 --- /dev/null +++ b/packages/cli/src/demo-seeded.ts @@ -0,0 +1,41 @@ +import type { MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import { + ensureSeededDemoProject, + loadPackagedDemoReplay, +} from './demo-assets.js'; +import { writeDemoReplay } from './demo-replay-store.js'; +import { inspectSeededProject, type SeededInspectSummary } from './demo-seeded-inspect.js'; + +export { + formatSeededInspect, + inspectSeededProject, + type DemoSeededManifest, + type SeededInspectSummary, +} from './demo-seeded-inspect.js'; + +export interface DemoSeededResult { + projectDir: string; + replay: MemoryFlowReplayInput; + inspect: SeededInspectSummary; +} + +export async function runDemoSeeded(options: { + projectDir: string; +}): Promise { + const result = await ensureSeededDemoProject({ projectDir: options.projectDir, force: false }); + + const replay = await loadPackagedDemoReplay(); + const replayWithDir: MemoryFlowReplayInput = { + ...replay, + sourceDir: result.projectDir, + }; + + await writeDemoReplay(result.projectDir, replayWithDir, { label: 'seeded' }); + const inspect = await inspectSeededProject(result.projectDir); + + return { + projectDir: result.projectDir, + replay: replayWithDir, + inspect, + }; +} diff --git a/packages/cli/src/demo.test.ts b/packages/cli/src/demo.test.ts new file mode 100644 index 00000000..012b67ef --- /dev/null +++ b/packages/cli/src/demo.test.ts @@ -0,0 +1,751 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { IngestReportSnapshot, MemoryFlowReplayInput } from '@klo/context/ingest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runKloDemo } from './demo.js'; +import { DEMO_FULL_JOB_ID, defaultDemoProjectDir, ensureDemoProject } from './demo-assets.js'; +import type { DemoFullResult } from './demo-full.js'; +import { createTestDemoPromptAdapter } from './demo-interaction.js'; +import type { renderMemoryFlowTui } from './memory-flow-tui.js'; +import { KLO_NEXT_STEP_COMMANDS } from './next-steps.js'; +import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; + +function makeIo(options: { isTTY?: boolean; columns?: number; rawMode?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdin: { + isTTY: options.isTTY ?? false, + ...(options.rawMode === false ? {} : { setRawMode: vi.fn() }), + }, + stdout: { + isTTY: options.isTTY ?? false, + columns: options.columns ?? 140, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function fakeFullResult(projectDir: string): DemoFullResult { + const report: IngestReportSnapshot = { + id: 'report-full', + runId: 'run-full', + jobId: DEMO_FULL_JOB_ID, + connectionId: 'orbit_demo', + sourceKey: 'live-database', + createdAt: '2026-05-01T00:00:00.000Z', + body: { + syncId: 'sync-full', + diffSummary: { added: 7, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + workUnits: [ + { + unitKey: 'accounts', + rawFiles: ['accounts.schema.json'], + status: 'success', + actions: [ + { target: 'wiki', type: 'created', key: 'knowledge/accounts.md', detail: 'account lifecycle context' }, + { target: 'sl', type: 'created', key: 'orbit_demo.accounts', detail: 'accounts semantic source' }, + ], + touchedSlSources: [{ connectionId: 'orbit_demo', sourceName: 'orbit_demo.accounts' }], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [ + { + rawPath: 'accounts.schema.json', + artifactKind: 'wiki', + artifactKey: 'knowledge/accounts.md', + actionType: 'wiki_written', + }, + ], + toolTranscripts: [], + }, + }; + + return { + project: { projectDir } as never, + scan: { report: { runId: 'scan-run' } } as never, + ingest: { result: { ok: true }, report } as never, + report, + replay: { + runId: 'run-full', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'done', + sourceDir: `${projectDir}/raw-sources/orbit_demo/live-database/sync-full`, + syncId: 'sync-full', + errors: [], + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'demo_full', fileCount: 7 }, + { type: 'saved', commitSha: null, wikiCount: 1, slCount: 1 }, + { type: 'provenance_recorded', rowCount: 1 }, + { type: 'report_created', runId: 'run-full', reportPath: 'report-full' }, + ], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }, + }; +} + +describe('runKloDemo', () => { + let tempDir: string; + + beforeEach(async () => { + resetVizFallbackWarningsForTest(); + tempDir = await mkdtemp(join(tmpdir(), 'klo-demo-command-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('initializes the demo project', async () => { + const io = makeIo(); + await expect( + runKloDemo({ command: 'init', projectDir: tempDir, force: false, inputMode: 'disabled' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain(`Demo project: ${tempDir}`); + expect(io.stdout()).toContain('Config:'); + expect(io.stdout()).toContain('Replay:'); + expect(io.stderr()).toBe(''); + }); + + it('renders the packaged replay in no-input viz mode', async () => { + const io = makeIo({ isTTY: true }); + await expect( + runKloDemo( + { command: 'replay', projectDir: tempDir, outputMode: 'viz', inputMode: 'disabled' }, + io.io, + { env: { ...process.env, TERM: 'xterm-256color' } }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KLO memory flow Warehouse + dbt + BI + Docs done'); + expect(io.stdout()).toContain('Saved 16 memories'); + expect(io.stderr()).toBe(''); + }); + + it('routes interactive packaged replay viz through the stored TUI renderer', async () => { + const io = makeIo({ isTTY: true }); + const renderStoredMemoryFlow = vi.fn(async () => true); + + await expect( + runKloDemo( + { command: 'replay', projectDir: tempDir, outputMode: 'viz' }, + io.io, + { env: { ...process.env, TERM: 'xterm-256color' }, renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'demo-seeded-orbit', + connectionId: 'orbit_demo', + adapter: 'live-database', + }); + expect(renderStoredMemoryFlow.mock.calls[0]?.[2]).toEqual({ speedMultiplier: 0.125 }); + expect(io.stdout()).toContain('KLO finished ingesting your data'); + expect(io.stderr()).toBe(''); + }); + + it('routes interactive seeded demo viz through the stored TUI renderer at eighth speed', async () => { + const io = makeIo({ isTTY: true }); + const renderStoredMemoryFlow = vi.fn(async () => true); + + await expect( + runKloDemo( + { command: 'seeded', projectDir: tempDir, outputMode: 'viz' }, + io.io, + { env: { ...process.env, TERM: 'xterm-256color' }, renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[2]).toEqual({ speedMultiplier: 0.125 }); + expect(io.stdout()).toContain('KLO finished ingesting your data'); + expect(io.stderr()).toBe(''); + }); + + it('falls back to plain replay output when interactive replay viz lacks stdin raw mode', async () => { + const io = makeIo({ isTTY: true, rawMode: false }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKloDemo( + { command: 'replay', projectDir: tempDir, outputMode: 'viz' }, + io.io, + { env: { ...process.env, TERM: 'xterm-256color' }, renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Memory-flow summary: done'); + expect(io.stdout()).toContain('Connection: orbit_demo'); + expect(io.stdout()).toContain('klo sl list'); + expect(io.stdout()).toContain('klo wiki list'); + expect(io.stdout()).toContain('klo serve --mcp stdio --user-id local'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('degrades default visual demo replay to a plain memory-flow summary when stdout is redirected', async () => { + const testIo = makeIo({ isTTY: false }); + + await expect( + runKloDemo({ command: 'replay', projectDir: tempDir, outputMode: 'viz', inputMode: 'disabled' }, testIo.io), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Memory-flow summary: done'); + expect(testIo.stdout()).toContain('Connection: orbit_demo'); + expect(testIo.stdout()).toContain('klo sl list'); + expect(testIo.stdout()).toContain('klo wiki list'); + expect(testIo.stdout()).toContain('klo serve --mcp stdio --user-id local'); + expect(testIo.stdout()).not.toContain('KLO memory flow'); + expect(testIo.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('prints JSON replay output when requested', async () => { + const io = makeIo(); + await expect( + runKloDemo({ command: 'replay', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, io.io), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ runId: 'demo-seeded-orbit', connectionId: 'orbit_demo' }); + expect(io.stderr()).toBe(''); + }); + + it('runs the packaged SQLite demo scan', async () => { + const io = makeIo(); + await expect(runKloDemo({ command: 'scan', projectDir: tempDir, inputMode: 'disabled' }, io.io)).resolves.toBe(0); + + expect(io.stdout()).toContain('Demo scan: done'); + expect(io.stdout()).toContain('Connection: orbit_demo'); + expect(io.stdout()).toContain('Driver: sqlite'); + expect(io.stdout()).toContain('Report: raw-sources/orbit_demo/live-database/'); + expect(io.stderr()).toBe(''); + }); + + it('runs seeded mode with pre-seeded assets and inspect summary', async () => { + const io = makeIo({ isTTY: true }); + await expect( + runKloDemo( + { command: 'seeded', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + io.io, + { env: { ...process.env, TERM: 'xterm-256color' } }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Mode: seeded'); + expect(io.stdout()).toContain('LLM calls: none'); + expect(io.stdout()).toContain('Semantic-layer sources:'); + expect(io.stdout()).toContain('Knowledge pages:'); + expect(io.stderr()).toBe(''); + }); + + it('uses seeded mode as the default demo and creates a temp project when no project-dir is supplied', async () => { + const io = makeIo(); + + await expect( + runKloDemo( + { command: 'seeded', projectDir: defaultDemoProjectDir(), outputMode: 'plain', inputMode: 'disabled' }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Mode: seeded'); + expect(io.stdout()).toContain('Source: packaged demo project'); + expect(io.stdout()).toContain('Generated context: prebuilt from bundled assets'); + expect(io.stdout()).toContain('LLM calls: none'); + expect(io.stdout()).toContain('Your KLO project files are at:'); + expect(io.stdout()).toContain(join(tmpdir(), 'klo-demo-')); + expect(io.stdout()).toContain('klo serve --mcp stdio'); + expect(io.stdout()).not.toContain(['klo', 'mcp'].join(' ')); + expect(io.stdout()).not.toContain('deterministic'); + }); + + it('degrades default visual seeded demo to plain output when TERM is dumb', async () => { + const testIo = makeIo({ isTTY: true, columns: 120 }); + + await expect( + runKloDemo( + { command: 'seeded', projectDir: tempDir, outputMode: 'viz', inputMode: 'disabled' }, + testIo.io, + { env: { ...process.env, TERM: 'dumb' } }, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Mode: seeded'); + expect(testIo.stdout()).toContain('LLM calls: none'); + expect(testIo.stderr()).toContain( + 'Visualization requested but TERM=dumb does not support the visual renderer; printing plain output.', + ); + }); + + it('prints demo inspect as plain text and JSON', async () => { + const seededIo = makeIo(); + await expect( + runKloDemo({ command: 'seeded', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, seededIo.io), + ).resolves.toBe(0); + + const plainIo = makeIo(); + await expect( + runKloDemo({ command: 'inspect', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, plainIo.io), + ).resolves.toBe(0); + expect(plainIo.stdout()).toContain('Mode: seeded'); + expect(plainIo.stdout()).toContain('Semantic-layer sources:'); + + const jsonIo = makeIo(); + await expect( + runKloDemo({ command: 'inspect', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, jsonIo.io), + ).resolves.toBe(0); + const parsed = JSON.parse(jsonIo.stdout()); + expect(parsed).toMatchObject({ + projectDir: tempDir, + mode: 'seeded', + status: { status: 'ready', missing: [] }, + sourceBundle: { + warehouse: { tableCount: 8, totalRows: 11234 }, + dbt: { modelCount: 3, sourceTableCount: 8 }, + bi: { exploreCount: 5, dashboardCount: 2 }, + notion: { pageCount: 8 }, + }, + generatedOutputs: { + semanticLayer: { manifestSourceCount: 6, fileCount: 6 }, + knowledge: { manifestPageCount: 10, fileCount: 10 }, + links: { manifestLinkCount: 23, linkCount: 23 }, + reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 }, + }, + modeMetadata: { + mode: 'seeded', + source: 'packaged demo project', + generatedContext: 'prebuilt from bundled assets', + llmCalls: 'none', + }, + nextCommands: KLO_NEXT_STEP_COMMANDS, + }); + expect(parsed.generatedOutputs.replays.fileCount).toBeGreaterThanOrEqual(3); + expect(jsonIo.stderr()).toBe(''); + }); + + it('routes top-level full mode and prints memory-flow plus final summary', async () => { + const testIo = makeIo({ isTTY: true }); + const runFullDemo = vi.fn().mockResolvedValue(fakeFullResult(tempDir)); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo({ command: 'full', projectDir: tempDir, outputMode: 'viz', inputMode: 'disabled' }, testIo.io, { + env: {}, + runFullDemo, + }), + ).resolves.toBe(0); + + expect(runFullDemo).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + env: {}, + onMemoryFlowChange: expect.any(Function), + }), + ); + expect(testIo.stdout()).toContain('KLO memory flow orbit_demo/live-database done'); + expect(testIo.stdout()).toContain('Full demo ingest: done'); + expect(testIo.stdout()).toContain('Next: klo setup demo inspect'); + expect(testIo.stdout()).toContain('Shows the files, semantic-layer sources, and memory KLO just produced.'); + }); + + it('streams live memory-flow snapshots for full demo viz and then prints final summary', async () => { + const testIo = makeIo({ isTTY: true, columns: 120 }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const runFullDemo = vi.fn( + async (options: { projectDir: string; onMemoryFlowChange?: (snapshot: MemoryFlowReplayInput) => void }) => { + options.onMemoryFlowChange?.({ + ...fakeFullResult(tempDir).replay, + status: 'running', + events: [{ type: 'source_acquired', adapter: 'live-database', trigger: 'demo_full', fileCount: 7 }], + }); + return fakeFullResult(tempDir); + }, + ); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo({ command: 'full', projectDir: tempDir, outputMode: 'viz' }, testIo.io, { + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret + prompts: createTestDemoPromptAdapter({ choices: ['reuse'] }), + runFullDemo, + startLiveMemoryFlow, + }), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); + expect(liveSession.update).toHaveBeenCalledTimes(1); + expect(liveSession.close).toHaveBeenCalledTimes(1); + expect(testIo.stdout()).not.toContain('Memory-flow summary: done'); + expect(testIo.stdout()).toContain('KLO finished ingesting your data'); + expect(testIo.stdout()).toContain('klo sl list'); + expect(testIo.stdout()).toContain('klo wiki list'); + expect(testIo.stdout()).toContain('klo serve --mcp stdio --user-id local'); + expect(testIo.stdout()).not.toContain(['klo', 'ask'].join(' ')); + expect(testIo.stdout()).not.toContain(['klo', 'mcp'].join(' ')); + }); + + it('uses plain progress for full demo viz when stdin raw mode is unavailable', async () => { + const testIo = makeIo({ isTTY: true, rawMode: false, columns: 120 }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const runFullDemo = vi.fn( + async (options: { projectDir: string; onMemoryFlowChange?: (snapshot: MemoryFlowReplayInput) => void }) => { + options.onMemoryFlowChange?.({ + ...fakeFullResult(tempDir).replay, + status: 'running', + events: [{ type: 'source_acquired', adapter: 'live-database', trigger: 'demo_full', fileCount: 7 }], + }); + return fakeFullResult(tempDir); + }, + ); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo({ command: 'full', projectDir: tempDir, outputMode: 'viz' }, testIo.io, { + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret + prompts: createTestDemoPromptAdapter({ choices: ['reuse'] }), + runFullDemo, + startLiveMemoryFlow, + }), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).not.toHaveBeenCalled(); + expect(runFullDemo).toHaveBeenCalledWith( + expect.objectContaining({ + onMemoryFlowChange: expect.any(Function), + }), + ); + expect(testIo.stdout()).toContain('[connect] Connected live-database - 7 database files (demo_full)'); + expect(testIo.stdout()).toContain('Full demo ingest: done'); + expect(testIo.stdout()).not.toContain('KLO memory flow'); + expect(testIo.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('streams plain-text progress lines for full demo when no live TUI is active', async () => { + const testIo = makeIo(); + const runFullDemo = vi.fn( + async (options: { projectDir: string; onMemoryFlowChange?: (snapshot: MemoryFlowReplayInput) => void }) => { + const baseSnapshot = fakeFullResult(tempDir).replay; + options.onMemoryFlowChange?.({ + ...baseSnapshot, + status: 'running', + events: [{ type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 7 }], + }); + options.onMemoryFlowChange?.({ + ...baseSnapshot, + status: 'running', + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 7 }, + { type: 'diff_computed', added: 0, modified: 0, deleted: 0, unchanged: 7 }, + ], + }); + return fakeFullResult(tempDir); + }, + ); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo( + { command: 'full', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, runFullDemo }, // pragma: allowlist secret + ), + ).resolves.toBe(0); + + const stdout = testIo.stdout(); + expect(stdout).toContain('[connect] Connected live-database - 7 database files (manual_resync)'); + expect(stdout).toContain('[diff] Tables: =7 unchanged'); + expect(stdout).toContain('Full demo ingest: done'); + }); + + it('skips plain progress lines for json output mode', async () => { + const testIo = makeIo(); + const runFullDemo = vi.fn( + async (options: { projectDir: string; onMemoryFlowChange?: (snapshot: MemoryFlowReplayInput) => void }) => { + expect(options.onMemoryFlowChange).toBeUndefined(); + return fakeFullResult(tempDir); + }, + ); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo( + { command: 'full', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, + testIo.io, + { env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, runFullDemo }, // pragma: allowlist secret + ), + ).resolves.toBe(0); + expect(testIo.stdout()).not.toContain('[connect]'); + expect(testIo.stdout()).not.toContain('[snapshot]'); + }); + + it('routes demo ingest full mode', async () => { + const testIo = makeIo(); + const runFullDemo = vi.fn().mockResolvedValue(fakeFullResult(tempDir)); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo( + { command: 'ingest', mode: 'full', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { env: {}, runFullDemo }, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Full demo ingest: done'); + }); + + it('saves full-demo replay output for the next demo replay command', async () => { + const tempDir = await mkdtemp(join(tmpdir(), 'klo-demo-full-replay-')); + await ensureDemoProject({ projectDir: tempDir, force: false }); + const io = makeIo(); + + await expect( + runKloDemo( + { command: 'full', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + io.io, + { + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret + runFullDemo: vi.fn(async () => fakeFullResult(tempDir)), + }, + ), + ).resolves.toBe(0); + + const replayIo = makeIo(); + await expect( + runKloDemo({ command: 'replay', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, replayIo.io), + ).resolves.toBe(0); + expect(JSON.parse(replayIo.stdout())).toMatchObject({ + runId: 'run-full', + metadata: { mode: 'full', origin: 'captured' }, + }); + }); + + it('routes demo ingest seeded mode through the seeded path', async () => { + const testIo = makeIo(); + + await expect( + runKloDemo( + { command: 'ingest', mode: 'seeded', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Mode: seeded'); + expect(testIo.stdout()).toContain('LLM calls: none'); + }); + + it('routes demo doctor through the doctor module', async () => { + const testIo = makeIo(); + const runDoctor = vi.fn().mockResolvedValue(0); + + await expect( + runKloDemo( + { + command: 'doctor', + projectDir: tempDir, + outputMode: 'plain', + inputMode: 'disabled', + }, + testIo.io, + { runDoctor }, + ), + ).resolves.toBe(0); + + expect(runDoctor).toHaveBeenCalledWith( + { + command: 'demo', + projectDir: tempDir, + outputMode: 'plain', + inputMode: 'disabled', + }, + testIo.io, + ); + }); + + it('resets the demo project only when force is explicit', async () => { + await ensureDemoProject({ projectDir: tempDir, force: false }); + await rm(join(tempDir, 'demo.db'), { force: true }); + + const rejected = makeIo(); + await expect( + runKloDemo({ command: 'reset', projectDir: tempDir, force: false, inputMode: 'disabled' }, rejected.io), + ).resolves.toBe(1); + expect(rejected.stderr()).toContain(`klo setup demo reset is destructive; pass --force to recreate ${tempDir}`); + + const accepted = makeIo(); + await expect( + runKloDemo({ command: 'reset', projectDir: tempDir, force: true, inputMode: 'disabled' }, accepted.io), + ).resolves.toBe(0); + expect(accepted.stdout()).toContain(`Demo project reset: ${tempDir}`); + }); + + it('rehydrates seeded assets after reset --force', async () => { + const resetIo = makeIo(); + await expect( + runKloDemo({ command: 'reset', projectDir: tempDir, force: true, inputMode: 'disabled' }, resetIo.io), + ).resolves.toBe(0); + + const seededIo = makeIo(); + await expect( + runKloDemo( + { command: 'seeded', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + seededIo.io, + ), + ).resolves.toBe(0); + + expect(seededIo.stdout()).toContain('Status: ready'); + expect(seededIo.stdout()).toContain('Semantic-layer sources: 6 manifest, 6 files'); + expect(seededIo.stdout()).toContain('Knowledge pages: 10 manifest, 10 files'); + expect(seededIo.stdout()).not.toContain('Status: corrupt'); + expect(seededIo.stdout()).not.toContain('Semantic-layer sources: 6 manifest, 0 files'); + }); + + it('fails corrupted demo projects in no-input mode with reset guidance', async () => { + await ensureDemoProject({ projectDir: tempDir, force: false }); + await rm(join(tempDir, 'demo.db'), { force: true }); + const testIo = makeIo(); + + await expect( + runKloDemo({ command: 'replay', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, testIo.io), + ).resolves.toBe(1); + + expect(testIo.stderr()).toContain(`Demo project is not ready at ${tempDir}: missing demo.db`); + expect(testIo.stderr()).toContain(`klo setup demo reset --project-dir ${tempDir} --force --no-input`); + }); + + it('uses a process-local Anthropic key from the interactive prompt', async () => { + const testIo = makeIo({ isTTY: true, columns: 120 }); + const runFullDemo = vi.fn().mockResolvedValue(fakeFullResult(tempDir)); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo( + { command: 'full', projectDir: tempDir, outputMode: 'plain' }, + testIo.io, + { + env: {}, + prompts: createTestDemoPromptAdapter({ + choices: ['reuse', 'process_key'], + passwords: ['sk-ant-process'], // pragma: allowlist secret + }), + runFullDemo, + }, + ), + ).resolves.toBe(0); + + expect(runFullDemo).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + env: { ANTHROPIC_API_KEY: 'sk-ant-process' }, // pragma: allowlist secret + onMemoryFlowChange: expect.any(Function), + }), + ); + expect(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')).toContain('api_key: env:ANTHROPIC_API_KEY'); + }); + + it('routes an interactive missing-key choice to seeded mode', async () => { + const testIo = makeIo({ isTTY: true, columns: 120 }); + const runFullDemo = vi.fn(); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo( + { command: 'full', projectDir: tempDir, outputMode: 'plain' }, + testIo.io, + { + env: {}, + prompts: createTestDemoPromptAdapter({ choices: ['reuse', 'seeded'] }), + runFullDemo, + }, + ), + ).resolves.toBe(0); + + expect(runFullDemo).not.toHaveBeenCalled(); + expect(testIo.stdout()).toContain('Mode: seeded'); + expect(testIo.stdout()).toContain('LLM calls: none'); + expect(testIo.stdout()).not.toContain('deterministic'); + }); + + it('routes missing full-mode credentials to seeded when the interactive user chooses the no-LLM demo', async () => { + const testIo = makeIo({ isTTY: true }); + + await expect( + runKloDemo( + { command: 'full', projectDir: tempDir, outputMode: 'plain' }, + testIo.io, + { + env: { ...process.env, ANTHROPIC_API_KEY: '' }, + prompts: createTestDemoPromptAdapter({ choices: ['seeded'] }), + }, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Mode: seeded'); + expect(testIo.stdout()).toContain('LLM calls: none'); + expect(testIo.stdout()).not.toContain('deterministic'); + }); + + it('routes an interactive missing-key choice to replay mode', async () => { + const testIo = makeIo({ isTTY: true, columns: 120 }); + const runFullDemo = vi.fn(); + await ensureDemoProject({ projectDir: tempDir, force: false }); + + await expect( + runKloDemo( + { command: 'full', projectDir: tempDir, outputMode: 'viz' }, + testIo.io, + { + env: {}, + prompts: createTestDemoPromptAdapter({ choices: ['reuse', 'replay'] }), + runFullDemo, + }, + ), + ).resolves.toBe(0); + + expect(runFullDemo).not.toHaveBeenCalled(); + expect(testIo.stdout()).toContain('KLO memory flow'); + expect(testIo.stdout()).toContain('done'); + }); +}); diff --git a/packages/cli/src/demo.ts b/packages/cli/src/demo.ts new file mode 100644 index 00000000..f2096971 --- /dev/null +++ b/packages/cli/src/demo.ts @@ -0,0 +1,544 @@ +import { + buildMemoryFlowViewModel, + formatMemoryFlowFinalSummary, + renderMemoryFlowReplay, + type MemoryFlowReplayInput, +} from '@klo/context/ingest/memory-flow'; +import { resolveKloConfigReference } from '@klo/context/core'; +import { loadKloProject } from '@klo/context/project'; +import { + DEMO_ADAPTER, + DEMO_CONNECTION_ID, + DEMO_FULL_JOB_ID, + ensureDemoProject, + loadProjectDemoReplay, + resetDemoProject, +} from './demo-assets.js'; +import { writeDemoReplay } from './demo-replay-store.js'; +import { + formatDemoInspect, + formatDemoScanSummary, + inspectDemoProject, + runDemoScan, +} from './demo-scan.js'; +import { + formatSeededInspect, + inspectSeededProject, + runDemoSeeded, +} from './demo-seeded.js'; +import { buildFullDemoReplay, formatCleanDemoSummary, formatFullDemoSummary, fullDemoCredentialStatus, runDemoFull } from './demo-full.js'; +import { createPlainProgressEmitter } from './demo-progress.js'; +import { + chooseDemoProjectForInteractiveRun, + createClackDemoPromptAdapter, + resolveFullCredentialDecision, + type DemoPromptAdapter, +} from './demo-interaction.js'; +import type { KloDoctorArgs } from './doctor.js'; +import { + renderMemoryFlowTui, + startLiveMemoryFlowTui, + type KloMemoryFlowTuiIo, + type MemoryFlowTuiLiveSession, +} from './memory-flow-tui.js'; +import { + rendererUnavailableVizFallback, + resolveVizFallback, + warnVizFallbackOnce, +} from './viz-fallback.js'; +import { profileMark } from './startup-profile.js'; +import { formatNextStepLines } from './next-steps.js'; + +profileMark('module:demo'); + +export type KloDemoOutputMode = 'plain' | 'json' | 'viz'; +export type KloDemoInputMode = 'auto' | 'disabled'; +export type KloDemoMode = 'full' | 'seeded'; + +export type KloDemoArgs = + | { command: 'init'; projectDir: string; force: boolean; inputMode?: KloDemoInputMode } + | { command: 'reset'; projectDir: string; force: boolean; inputMode?: KloDemoInputMode } + | { command: 'replay'; projectDir: string; outputMode: KloDemoOutputMode; inputMode?: KloDemoInputMode } + | { command: 'scan'; projectDir: string; inputMode?: KloDemoInputMode } + | { command: 'inspect'; projectDir: string; outputMode: KloDemoOutputMode; inputMode?: KloDemoInputMode } + | { command: 'doctor'; projectDir: string; outputMode: Exclude; inputMode?: KloDemoInputMode } + | { command: 'seeded'; projectDir: string; outputMode: KloDemoOutputMode; inputMode?: KloDemoInputMode } + | { command: 'full'; projectDir: string; outputMode: KloDemoOutputMode; inputMode?: KloDemoInputMode } + | { + command: 'ingest'; + mode: KloDemoMode; + projectDir: string; + outputMode: KloDemoOutputMode; + inputMode?: KloDemoInputMode; + }; + +export interface KloDemoIo { + stdin?: KloMemoryFlowTuiIo['stdin']; + stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void }; + stderr: { write(chunk: string): void }; +} + +interface KloDemoDeps { + runFullDemo?: typeof runDemoFull; + runDoctor?: (args: KloDoctorArgs, io: KloDemoIo) => Promise; + renderStoredMemoryFlow?: typeof renderMemoryFlowTui; + startLiveMemoryFlow?: typeof startLiveMemoryFlowTui; + env?: NodeJS.ProcessEnv; + prompts?: DemoPromptAdapter; +} + +const ADAPTER_PREFIXES = ['live_database_', 'metabase_', 'looker_', 'lookml_', 'metricflow_', 'notion_', 'historic_sql_', 'dbt_descriptions_']; +const DEMO_TUI_SPEED_MULTIPLIER = 0.125; + +function humanizeUnitKeyPlain(unitKey: string): string { + let key = unitKey.replace(/-/g, '_'); + for (const prefix of ADAPTER_PREFIXES) { + if (key.startsWith(prefix)) { key = key.slice(prefix.length); break; } + } + return key.replace(/_/g, ' '); +} + +function formatReplaySummary(input: MemoryFlowReplayInput): string { + let slCount = 0; + let wikiCount = 0; + let chunkCount = 0; + const unitResults: Array<{ unitKey: string; artifacts: Array<{ icon: string; text: string; hasSummary: boolean }> }> = []; + let currentUnit: { unitKey: string; artifacts: Array<{ icon: string; text: string; hasSummary: boolean }> } | null = null; + let conflictCount = 0; + + for (const e of input.events) { + if (e.type === 'chunks_planned') { + chunkCount = e.chunkCount; + } else if (e.type === 'work_unit_started') { + currentUnit = { unitKey: e.unitKey, artifacts: [] }; + } else if (e.type === 'candidate_action') { + if (e.target === 'sl') slCount++; + else wikiCount++; + const detail = input.details.actions.find((a) => a.key === e.key && a.unitKey === e.unitKey); + const icon = e.target === 'sl' ? '📊' : '📝'; + const name = e.key.split('.').pop()?.replace(/[_-]/g, ' ') ?? e.key; + const text = detail?.summary ?? name; + currentUnit?.artifacts.push({ icon, text, hasSummary: !!detail?.summary }); + } else if (e.type === 'work_unit_finished' && currentUnit) { + unitResults.push(currentUnit); + currentUnit = null; + } else if (e.type === 'reconciliation_finished') { + conflictCount = e.conflictCount; + } + } + + const lines: string[] = ['', '★ KLO finished ingesting your data', '']; + + if (chunkCount > 0) { + lines.push(` ✓ Analyzed ${chunkCount} business area${chunkCount === 1 ? '' : 's'}`); + } + + lines.push(` ✓ Reconciled — ${conflictCount > 0 ? `${conflictCount} conflict${conflictCount === 1 ? '' : 's'} resolved` : 'no conflicts'}`); + lines.push(''); + + if (slCount > 0 || wikiCount > 0) { + lines.push(' KLO created:'); + if (slCount > 0) lines.push(` 📊 ${slCount} query definition${slCount === 1 ? '' : 's'} — so agents can write accurate SQL for your data`); + if (wikiCount > 0) lines.push(` 📝 ${wikiCount} knowledge page${wikiCount === 1 ? '' : 's'} — so agents understand your business context`); + lines.push(''); + } + + const described = unitResults.flatMap((u) => u.artifacts).filter((a) => a.hasSummary); + for (const a of described) { + lines.push(` ${a.icon} ${a.text}`); + } + + lines.push(''); + lines.push(' What to do next:'); + lines.push(...formatNextStepLines()); + if (input.sourceDir) { + lines.push(''); + lines.push(` Your KLO project files are at: ${input.sourceDir}`); + } + lines.push(''); + + return lines.join('\n'); +} + +function formatPlainReplaySummary(input: MemoryFlowReplayInput): string { + return [formatMemoryFlowFinalSummary(input).trimEnd(), '', 'What to do next:', ...formatNextStepLines(), ''].join('\n'); +} + +function writeReplay(input: MemoryFlowReplayInput, outputMode: KloDemoOutputMode, io: KloDemoIo): void { + if (outputMode === 'json') { + io.stdout.write(`${JSON.stringify(input, null, 2)}\n`); + return; + } + + if (outputMode === 'plain') { + io.stdout.write(formatPlainReplaySummary(input)); + return; + } + + const view = buildMemoryFlowViewModel(input); + io.stdout.write(renderMemoryFlowReplay(view, { terminalWidth: io.stdout.columns ?? process.stdout.columns })); +} + +async function writeStoredReplay( + input: MemoryFlowReplayInput, + outputMode: KloDemoOutputMode, + inputMode: KloDemoArgs['inputMode'], + io: KloDemoIo, + deps: KloDemoDeps, + env: NodeJS.ProcessEnv, +): Promise { + const resolvedOutputMode = effectiveDemoOutputMode(outputMode, io, env, { + requireInput: inputMode !== 'disabled', + }); + if (resolvedOutputMode !== 'viz') { + writeReplay(input, resolvedOutputMode, io); + return; + } + + if (inputMode !== 'disabled') { + const renderStoredMemoryFlow = deps.renderStoredMemoryFlow ?? renderMemoryFlowTui; + if ( + isTuiCapableDemoIo(io) && + (await renderStoredMemoryFlow(input, io, { speedMultiplier: DEMO_TUI_SPEED_MULTIPLIER })) + ) { + io.stdout.write(formatReplaySummary(input)); + return; + } + } + + writeReplay(input, resolvedOutputMode, io); +} + +function writeInspect( + summary: Awaited>, + outputMode: KloDemoOutputMode, + io: KloDemoIo, +): void { + if (outputMode === 'json') { + io.stdout.write(`${JSON.stringify(summary, null, 2)}\n`); + return; + } + + io.stdout.write(formatDemoInspect(summary)); +} + +function writeFullDemo( + result: Awaited>, + outputMode: KloDemoOutputMode, + io: KloDemoIo, + options: { liveWasRendered?: boolean; projectDir?: string } = {}, +): void { + if (outputMode === 'json') { + io.stdout.write(`${JSON.stringify({ report: result.report, replay: result.replay }, null, 2)}\n`); + return; + } + + if (outputMode === 'viz' && options.liveWasRendered !== true) { + writeReplay(buildFullDemoReplay(result.report), outputMode, io); + io.stdout.write('\n'); + } + + if (outputMode === 'viz' && options.liveWasRendered) { + io.stdout.write(formatCleanDemoSummary(result.report, options.projectDir ?? '')); + return; + } + + if (outputMode === 'viz') { + io.stdout.write(formatMemoryFlowFinalSummary(buildFullDemoReplay(result.report))); + } + + io.stdout.write(formatFullDemoSummary(result.report)); +} + +function replayWithFullMetadata(result: Awaited>): MemoryFlowReplayInput { + if (result.replay.metadata) { + return result.replay; + } + + return { + ...result.replay, + metadata: { + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: result.report.createdAt, + sourceReportId: result.report.id, + sourceReportPath: result.report.id, + fallbackReason: null, + }, + reportId: result.replay.reportId ?? result.report.id, + reportPath: result.replay.reportPath ?? result.report.id, + }; +} + +function pickMemoryFlowProgress( + liveSession: MemoryFlowTuiLiveSession | null, + outputMode: KloDemoOutputMode, + io: KloDemoIo, +): ((snapshot: MemoryFlowReplayInput) => void) | undefined { + if (liveSession) { + return (snapshot: MemoryFlowReplayInput) => { + if (!liveSession.isClosed()) { + liveSession.update(snapshot); + } + }; + } + if (outputMode === 'json') { + return undefined; + } + return createPlainProgressEmitter(io); +} + +function isTuiCapableDemoIo(io: KloDemoIo): io is KloDemoIo & KloMemoryFlowTuiIo { + return ( + io.stdin?.isTTY === true && + io.stdout.isTTY === true && + typeof io.stdin.setRawMode === 'function' && + typeof io.stdout.write === 'function' + ); +} + +interface EffectiveDemoOutputModeOptions { + requireInput?: boolean; +} + +function effectiveDemoOutputMode( + outputMode: KloDemoOutputMode, + io: KloDemoIo, + env: NodeJS.ProcessEnv, + options: EffectiveDemoOutputModeOptions = {}, +): KloDemoOutputMode { + if (outputMode !== 'viz') { + return outputMode; + } + + const fallback = resolveVizFallback(io, env, { requireInput: options.requireInput ?? false }); + if (!fallback.shouldDegrade) { + return outputMode; + } + + warnVizFallbackOnce(io, fallback); + return 'plain'; +} + +function initialFullDemoMemoryFlowInput(projectDir: string): MemoryFlowReplayInput { + return { + runId: DEMO_FULL_JOB_ID, + connectionId: DEMO_CONNECTION_ID, + adapter: DEMO_ADAPTER, + status: 'running', + sourceDir: `${projectDir}/raw-sources/${DEMO_CONNECTION_ID}/${DEMO_ADAPTER}`, + syncId: 'pending', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }; +} + +async function ensureDemoProjectForCommand(projectDir: string): Promise { + await ensureDemoProject({ projectDir, force: false }).catch((error) => { + if (error instanceof Error && error.message.includes('Demo project already exists')) { + return null; + } + throw error; + }); +} + +async function prepareProjectForDemoCommand(args: KloDemoArgs, io: KloDemoIo, deps: KloDemoDeps): Promise { + if (args.command === 'init' || args.command === 'reset' || args.command === 'doctor') { + return args.projectDir; + } + + const prompts = deps.prompts ?? createClackDemoPromptAdapter(); + const decision = await chooseDemoProjectForInteractiveRun({ + projectDir: args.projectDir, + inputMode: args.inputMode, + io, + prompts, + }); + + if (decision.action === 'cancel') { + return null; + } + + if (decision.reset) { + await resetDemoProject({ projectDir: decision.projectDir, force: true }); + } + + return decision.projectDir; +} + +async function runReplayDemo( + projectDir: string, + outputMode: KloDemoOutputMode, + inputMode: KloDemoArgs['inputMode'], + io: KloDemoIo, + deps: KloDemoDeps, + env: NodeJS.ProcessEnv = process.env, +): Promise { + await ensureDemoProjectForCommand(projectDir); + await writeStoredReplay(await loadProjectDemoReplay(projectDir), outputMode, inputMode, io, deps, env); + return 0; +} + +async function runSeededDemo( + projectDir: string, + outputMode: KloDemoOutputMode, + inputMode: KloDemoArgs['inputMode'], + io: KloDemoIo, + deps: KloDemoDeps, + env: NodeJS.ProcessEnv = process.env, +): Promise { + const result = await runDemoSeeded({ projectDir }); + const resolvedOutputMode = effectiveDemoOutputMode(outputMode, io, env, { + requireInput: inputMode !== 'disabled', + }); + + if (resolvedOutputMode === 'json') { + io.stdout.write(`${JSON.stringify({ replay: result.replay, inspect: result.inspect }, null, 2)}\n`); + return 0; + } + + if (resolvedOutputMode === 'viz') { + await writeStoredReplay(result.replay, resolvedOutputMode, inputMode, io, deps, env); + } else { + writeReplay(result.replay, resolvedOutputMode, io); + io.stdout.write('\n'); + io.stdout.write(formatSeededInspect(result.inspect)); + } + return 0; +} + +export async function runKloDemo(args: KloDemoArgs, io: KloDemoIo = process, deps: KloDemoDeps = {}): Promise { + try { + if (args.command === 'init') { + const result = await ensureDemoProject({ projectDir: args.projectDir, force: args.force }); + io.stdout.write(`Demo project: ${result.projectDir}\n`); + io.stdout.write(`Config: ${result.configPath}\n`); + io.stdout.write(`Database: ${result.databasePath}\n`); + io.stdout.write(`Replay: ${result.replayPath}\n`); + io.stdout.write('Next: klo setup demo --no-input\n'); + io.stdout.write(' Runs the pre-seeded demo without calling the LLM.\n'); + return 0; + } + + if (args.command === 'reset') { + const result = await resetDemoProject({ projectDir: args.projectDir, force: args.force }); + io.stdout.write(`Demo project reset: ${result.projectDir}\n`); + io.stdout.write(`Config: ${result.configPath}\n`); + io.stdout.write(`Database: ${result.databasePath}\n`); + io.stdout.write(`Replay: ${result.replayPath}\n`); + io.stdout.write('Next: klo setup demo --mode full\n'); + io.stdout.write(' Runs the full AI-backed pass with your LLM provider.\n'); + return 0; + } + + const preparedProjectDir = await prepareProjectForDemoCommand(args, io, deps); + if (preparedProjectDir === null) { + return 1; + } + const env = deps.env ?? process.env; + + if (args.command === 'scan') { + const { result } = await runDemoScan({ projectDir: preparedProjectDir }); + io.stdout.write(formatDemoScanSummary(result.report)); + return 0; + } + + if (args.command === 'seeded' || (args.command === 'ingest' && args.mode === 'seeded')) { + return await runSeededDemo(preparedProjectDir, args.outputMode, args.inputMode, io, deps, env); + } + + if (args.command === 'full' || (args.command === 'ingest' && args.mode === 'full')) { + const executeFullDemo = deps.runFullDemo ?? runDemoFull; + await ensureDemoProjectForCommand(preparedProjectDir); + const project = await loadKloProject({ projectDir: preparedProjectDir }); + const credentialStatus = fullDemoCredentialStatus(project, env); + const credentialDecision = await resolveFullCredentialDecision({ + needsAnthropicKey: + credentialStatus.status === 'missing-anthropic-key' && + project.config.llm.provider.backend === 'anthropic' && + !resolveKloConfigReference(project.config.llm.provider.anthropic?.api_key, env), + inputMode: args.inputMode, + io, + env, + prompts: deps.prompts ?? createClackDemoPromptAdapter(), + }); + + if (credentialDecision.action === 'cancel') { + return 1; + } + + if (credentialDecision.action === 'run-mode') { + return credentialDecision.mode === 'seeded' + ? await runSeededDemo(preparedProjectDir, args.outputMode, args.inputMode, io, deps, env) + : await runReplayDemo(preparedProjectDir, args.outputMode, args.inputMode, io, deps, env); + } + + let liveSession: MemoryFlowTuiLiveSession | null = null; + let liveWasRendered = false; + const startLiveMemoryFlow = deps.startLiveMemoryFlow ?? startLiveMemoryFlowTui; + let fullOutputMode = effectiveDemoOutputMode(args.outputMode, io, env, { + requireInput: args.inputMode !== 'disabled', + }); + const shouldUseLiveViz = fullOutputMode === 'viz' && args.inputMode !== 'disabled'; + + if (shouldUseLiveViz && isTuiCapableDemoIo(io)) { + liveSession = await startLiveMemoryFlow(initialFullDemoMemoryFlowInput(preparedProjectDir), io); + liveWasRendered = liveSession !== null; + } else if (shouldUseLiveViz) { + warnVizFallbackOnce(io, rendererUnavailableVizFallback()); + fullOutputMode = 'plain'; + } + + const onMemoryFlowChange = pickMemoryFlowProgress(liveSession, fullOutputMode, io); + const result = await executeFullDemo({ + projectDir: preparedProjectDir, + env: credentialDecision.env, + ...(onMemoryFlowChange ? { onMemoryFlowChange } : {}), + }); + await writeDemoReplay(preparedProjectDir, replayWithFullMetadata(result), { label: 'full' }); + liveSession?.close(); + writeFullDemo(result, fullOutputMode, io, { liveWasRendered, projectDir: preparedProjectDir }); + if (fullOutputMode !== 'json' && !liveWasRendered) { + io.stdout.write(formatDemoInspect(await inspectDemoProject(preparedProjectDir))); + } + return 0; + } + + if (args.command === 'inspect') { + const seededInspect = await inspectSeededProject(preparedProjectDir).catch(() => null); + if (seededInspect?.mode === 'seeded') { + if (args.outputMode === 'json') { + io.stdout.write(`${JSON.stringify(seededInspect, null, 2)}\n`); + } else { + io.stdout.write(formatSeededInspect(seededInspect)); + } + return 0; + } + writeInspect(await inspectDemoProject(preparedProjectDir), args.outputMode, io); + return 0; + } + + if (args.command === 'doctor') { + const { runKloDoctor } = await import('./doctor.js'); + const executeDoctor = deps.runDoctor ?? runKloDoctor; + return await executeDoctor( + { + command: 'demo', + projectDir: args.projectDir, + outputMode: args.outputMode, + ...(args.inputMode ? { inputMode: args.inputMode } : {}), + }, + io, + ); + } + + return await runReplayDemo(preparedProjectDir, args.outputMode, args.inputMode, io, deps, env); + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/dev.test.ts b/packages/cli/src/dev.test.ts new file mode 100644 index 00000000..c1473b3d --- /dev/null +++ b/packages/cli/src/dev.test.ts @@ -0,0 +1,670 @@ +import { describe, expect, it, vi } from 'vitest'; +import { runKloCli } from './index.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('dev Commander tree', () => { + it('prints visible dev help with only supported low-level command groups', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['dev', '--help'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo dev [options] [command]'); + for (const command of ['init', 'doctor', 'scan', 'ingest', 'mapping']) { + expect(testIo.stdout()).toContain(command); + } + for (const removed of [ + 'knowledge', + 'model', + 'replay', + 'report', + 'status', + 'artifacts', + 'config', + 'tools', + 'daemon', + ]) { + expect(testIo.stdout()).not.toContain(`${removed} `); + } + expect(testIo.stderr()).toBe(''); + }); + + it('keeps dev callable while hiding it from root command rows', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['--help'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Advanced:'); + expect(testIo.stdout()).toContain('klo dev'); + expect(testIo.stdout()).not.toContain('dev Low-level diagnostics'); + expect(testIo.stderr()).toBe(''); + }); + + it('keeps project scaffolding under dev init', async () => { + const { mkdtemp, readFile, rm } = await import('node:fs/promises'); + const { tmpdir } = await import('node:os'); + const { join } = await import('node:path'); + const tempDir = await mkdtemp(join(tmpdir(), 'klo-dev-init-')); + const projectDir = join(tempDir, 'warehouse'); + const testIo = makeIo(); + + try { + await expect(runKloCli(['dev', 'init', projectDir, '--name', 'warehouse'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain(`Initialized KLO project at ${projectDir}`); + await expect(readFile(join(projectDir, 'klo.yaml'), 'utf-8')).resolves.toContain('project: warehouse'); + expect(testIo.stderr()).toBe(''); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('uses global project-dir for dev init when the positional directory is omitted', async () => { + const { mkdtemp, rm } = await import('node:fs/promises'); + const { tmpdir } = await import('node:os'); + const { join } = await import('node:path'); + const tempDir = await mkdtemp(join(tmpdir(), 'klo-dev-init-global-')); + const projectDir = join(tempDir, 'global-init'); + const testIo = makeIo(); + + try { + await expect( + runKloCli(['--project-dir', projectDir, 'dev', 'init', '--name', 'global-init'], testIo.io), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain(`Initialized KLO project at ${projectDir}`); + expect(testIo.stderr()).toBe(''); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('rejects removed dev command groups', async () => { + for (const argv of [ + ['dev', 'knowledge', 'list'], + ['dev', 'model', 'list'], + ['dev', 'artifacts'], + ]) { + const testIo = makeIo(); + + await expect(runKloCli(argv, testIo.io)).resolves.toBe(1); + + expect(testIo.stderr()).toMatch(/unknown command|error:/); + } + }); + + it.each([ + { + argv: ['dev', 'doctor', '--help'], + expected: ['Usage: klo dev doctor', '--json', '--no-input'], + }, + { + argv: ['dev', 'scan', '--help'], + expected: [ + 'Usage: klo dev scan', + '--mode ', + 'structural', + 'relationships', + '--dry-run', + 'status', + 'report', + 'relationships', + 'relationship-apply', + 'relationship-feedback', + 'relationship-calibration', + 'relationship-thresholds', + ], + }, + { + argv: ['dev', 'scan', 'report', '--help'], + expected: ['Usage: klo dev scan report [options] ', '', '--json'], + }, + { + argv: ['dev', 'scan', 'relationships', '--help'], + expected: [ + 'Usage: klo dev scan relationships [options] ', + '--status ', + '--limit ', + '--accept ', + '--reject ', + '--note ', + '--reviewer ', + '--json', + ], + }, + { + argv: ['dev', 'scan', 'relationship-apply', '--help'], + expected: [ + 'Usage: klo dev scan relationship-apply [options] ', + '--all-accepted', + '--candidate ', + '--dry-run', + ], + }, + { + argv: ['dev', 'scan', 'relationship-thresholds', '--help'], + expected: [ + 'Usage: klo dev scan relationship-thresholds [options]', + '--connection ', + '--min-total-labels ', + '--min-accepted-labels ', + '--min-rejected-labels ', + '--json', + ], + }, + { + argv: ['dev', 'scan', 'relationship-feedback', '--help'], + expected: [ + 'Usage: klo dev scan relationship-feedback [options]', + '--connection ', + '--decision ', + '--json', + '--jsonl', + ], + }, + { + argv: ['dev', 'scan', 'relationship-calibration', '--help'], + expected: [ + 'Usage: klo dev scan relationship-calibration [options]', + '--connection ', + '--decision ', + '--accept-threshold ', + '--review-threshold ', + '--json', + ], + }, + { + argv: ['dev', 'ingest', 'run', '--help'], + expected: ['Usage: klo dev ingest run [options]', '--connection-id ', '--adapter '], + }, + { + argv: ['dev', 'mapping', 'sync-state', 'set', '--help'], + expected: ['Usage: klo dev mapping sync-state set [options] ', '--mode '], + }, + ])('prints generated nested help for $argv', async ({ argv, expected }) => { + const io = makeIo(); + const doctor = vi.fn(async () => 0); + const ingest = vi.fn(async () => 0); + const scan = vi.fn(async () => 0); + + await expect(runKloCli(argv, io.io, { doctor, ingest, scan })).resolves.toBe(0); + + for (const text of expected) { + expect(io.stdout()).toContain(text); + } + expect(io.stderr()).toBe(''); + expect(doctor).not.toHaveBeenCalled(); + expect(ingest).not.toHaveBeenCalled(); + expect(scan).not.toHaveBeenCalled(); + }); + + it('dispatches dev scan through Commander with injected dependencies', async () => { + const scanIo = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli(['dev', 'scan', 'warehouse', '--project-dir', '/tmp/project', '--dry-run'], scanIo.io, { scan }), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: true, + databaseIntrospectionUrl: undefined, + }, + scanIo.io, + ); + expect(scanIo.stderr()).toBe(''); + }); + + it('dispatches dev scan --mode relationships through Commander', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli(['dev', 'scan', 'warehouse', '--project-dir', '/tmp/project', '--mode', 'relationships'], io.io, { + scan, + }), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + databaseIntrospectionUrl: undefined, + }, + io.io, + ); + expect(io.stderr()).toBe(''); + }); + + it.each(['--enrich', '--detect-relationships'])('rejects removed scan shorthand option %s', async (option) => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect(runKloCli(['dev', 'scan', 'warehouse', option], io.io, { scan })).resolves.toBe(1); + + expect(scan).not.toHaveBeenCalled(); + expect(io.stderr()).toContain(`unknown option '${option}'`); + }); + + it('rejects dev scan without a connection id or subcommand', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect(runKloCli(['dev', 'scan', '--dry-run'], io.io, { scan })).resolves.toBe(1); + + expect(scan).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Usage: klo dev scan'); + expect(io.stderr()).toContain('klo dev scan requires or a subcommand'); + }); + + it('rejects invalid scan modes before dispatch', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect(runKloCli(['dev', 'scan', 'warehouse', '--mode', 'deep'], io.io, { scan })).resolves.toBe(1); + + expect(scan).not.toHaveBeenCalled(); + expect(io.stderr()).toContain("argument 'deep' is invalid"); + expect(io.stderr()).toContain('Allowed choices are structural, enriched, relationships'); + }); + + it('prints dev scan subcommand help with the canonical command name', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect(runKloCli(['dev', 'scan', 'report', '--help'], io.io, { scan })).resolves.toBe(0); + + expect(io.stdout()).toContain('--project-dir is inherited from `klo dev scan`'); + expect(io.stdout()).not.toContain('--project-dir is inherited from `klo scan`'); + expect(scan).not.toHaveBeenCalled(); + }); + + it('dispatches dev scan report in human and json modes', async () => { + const humanIo = makeIo(); + const jsonIo = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli(['dev', 'scan', 'report', 'scan-run-1', '--project-dir', '/tmp/project'], humanIo.io, { scan }), + ).resolves.toBe(0); + await expect( + runKloCli(['dev', 'scan', 'report', 'scan-run-2', '--project-dir', '/tmp/project', '--json'], jsonIo.io, { + scan, + }), + ).resolves.toBe(0); + + expect(scan).toHaveBeenNthCalledWith( + 1, + { command: 'report', projectDir: '/tmp/project', runId: 'scan-run-1', json: false }, + humanIo.io, + ); + expect(scan).toHaveBeenNthCalledWith( + 2, + { command: 'report', projectDir: '/tmp/project', runId: 'scan-run-2', json: true }, + jsonIo.io, + ); + }); + + it('dispatches dev scan relationships with filters through Commander', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'scan', + 'relationships', + 'scan-run-review', + '--project-dir', + '/tmp/project', + '--status', + 'rejected', + '--limit', + '5', + '--json', + ], + io.io, + { scan }, + ), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'relationships', + projectDir: '/tmp/project', + runId: 'scan-run-review', + status: 'rejected', + json: true, + limit: 5, + }, + io.io, + ); + expect(io.stderr()).toBe(''); + }); + + it('dispatches dev scan relationship decision recording through Commander', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'scan', + 'relationships', + 'scan-run-review', + '--project-dir', + '/tmp/project', + '--accept', + 'orders:orders.customer_id->customers:customers.id', + '--reviewer', + 'Andrey', + '--note', + 'Looks right', + '--json', + ], + io.io, + { scan }, + ), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'relationshipDecision', + projectDir: '/tmp/project', + runId: 'scan-run-review', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + reviewer: 'Andrey', + note: 'Looks right', + json: true, + }, + io.io, + ); + expect(io.stderr()).toBe(''); + }); + + it.each(['--accept', '--reject'])('rejects empty relationship decision candidate ids for %s', async (option) => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli(['dev', 'scan', 'relationships', 'scan-run-review', option, ''], io.io, { scan }), + ).resolves.toBe(1); + + expect(scan).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('must not be empty'); + }); + + it('rejects relationship feedback JSON and JSONL output together', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli(['dev', 'scan', 'relationship-feedback', '--json', '--jsonl'], io.io, { scan }), + ).resolves.toBe(1); + + expect(scan).not.toHaveBeenCalled(); + expect(io.stderr()).toMatch(/conflict|cannot be used/i); + }); + + it('dispatches relationship apply command args', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'scan', + 'relationship-apply', + 'scan-run-a', + '--project-dir', + '/tmp/project', + '--candidate', + 'orders:orders.customer_id->customers:customers.id', + '--dry-run', + '--json', + ], + io.io, + { scan }, + ), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'relationshipApply', + projectDir: '/tmp/project', + runId: 'scan-run-a', + applyAllAccepted: false, + candidateIds: ['orders:orders.customer_id->customers:customers.id'], + dryRun: true, + json: true, + }, + io.io, + ); + }); + + it('dispatches scan relationship feedback command with filters and JSONL output', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'scan', + 'relationship-feedback', + '--project-dir', + '/tmp/project', + '--connection', + 'warehouse', + '--decision', + 'accepted', + '--jsonl', + ], + io.io, + { scan }, + ), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'relationshipFeedback', + projectDir: '/tmp/project', + connectionId: 'warehouse', + decision: 'accepted', + json: false, + jsonl: true, + }, + io.io, + ); + }); + + it('dispatches scan relationship calibration command with thresholds', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'scan', + 'relationship-calibration', + '--project-dir', + '/tmp/project', + '--connection', + 'warehouse', + '--decision', + 'rejected', + '--accept-threshold', + '0.9', + '--review-threshold', + '0.5', + '--json', + ], + io.io, + { scan }, + ), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'relationshipCalibration', + projectDir: '/tmp/project', + connectionId: 'warehouse', + decision: 'rejected', + acceptThreshold: 0.9, + reviewThreshold: 0.5, + json: true, + }, + io.io, + ); + }); + + it('dispatches relationship threshold advice command args', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'scan', + 'relationship-thresholds', + '--project-dir', + '/tmp/project', + '--connection', + 'warehouse', + '--min-total-labels', + '12', + '--min-accepted-labels', + '4', + '--min-rejected-labels', + '3', + '--json', + ], + io.io, + { scan }, + ), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + { + command: 'relationshipThresholds', + projectDir: '/tmp/project', + connectionId: 'warehouse', + minTotalLabels: 12, + minAcceptedLabels: 4, + minRejectedLabels: 3, + json: true, + }, + io.io, + ); + }); + + it('rejects invalid relationship calibration thresholds before dispatch', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli(['dev', 'scan', 'relationship-calibration', '--accept-threshold', '1.5'], io.io, { scan }), + ).resolves.toBe(1); + + expect(scan).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('Allowed range is 0 through 1'); + }); + + it('rejects relationship accept and reject options together before dispatch', async () => { + const io = makeIo(); + const scan = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'scan', + 'relationships', + 'scan-run-review', + '--accept', + 'orders:orders.customer_id->customers:customers.id', + '--reject', + 'orders:orders.customer_id->customers:customers.id', + ], + io.io, + { scan }, + ), + ).resolves.toBe(1); + + expect(scan).not.toHaveBeenCalled(); + expect(io.stderr()).toMatch(/conflict|cannot be used/i); + }); + + it('dispatches dev ingest run through the low-level ingest Commander registration', async () => { + const io = makeIo(); + const ingest = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'dev', + 'ingest', + 'run', + '--connection-id', + 'warehouse', + '--adapter', + 'metabase', + '--project-dir', + '/tmp/project', + '--json', + ], + io.io, + { ingest }, + ), + ).resolves.toBe(0); + + expect(ingest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + connectionId: 'warehouse', + adapter: 'metabase', + sourceDir: undefined, + databaseIntrospectionUrl: undefined, + outputMode: 'json', + }, + io.io, + ); + expect(io.stderr()).toBe(''); + }); +}); diff --git a/packages/cli/src/dev.ts b/packages/cli/src/dev.ts new file mode 100644 index 00000000..97bb8a5d --- /dev/null +++ b/packages/cli/src/dev.ts @@ -0,0 +1,61 @@ +import { resolve } from 'node:path'; +import type { Command } from '@commander-js/extra-typings'; +import { type CommandWithGlobalOptions, type KloCliCommandContext, resolveCommandProjectDir } from './cli-program.js'; +import { registerCompletionCommands } from './commands/completion-commands.js'; +import { registerConnectionMappingCommands } from './commands/connection-commands.js'; +import { registerDoctorCommands } from './commands/doctor-commands.js'; +import { registerIngestCommands } from './commands/ingest-commands.js'; +import { registerScanCommands } from './commands/scan-commands.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:dev'); + +export function registerDevCommands(program: Command, context: KloCliCommandContext): void { + const dev = program + .command('dev', { hidden: true }) + .description('Low-level diagnostics, scans, adapter commands, and mapping tools') + .showHelpAfterError(); + + dev.hook('preAction', (_thisCommand, actionCommand) => { + context.writeDebug?.('dev', actionCommand); + }); + + dev.action(() => { + dev.outputHelp(); + context.setExitCode(0); + }); + + dev + .command('init') + .description('Initialize a Git-backed KLO project directory for maintenance scripts') + .argument('[directory]', 'Project directory') + .option('--name ', 'Project name written to klo.yaml') + .option('--force', 'Rewrite klo.yaml and scaffold files in an existing project', false) + .action( + async ( + projectDir: string | undefined, + commandOptions: { name?: string; force?: boolean }, + command: CommandWithGlobalOptions, + ) => { + context.setExitCode( + await context.runInit( + { + projectDir: projectDir ? resolve(projectDir) : resolveCommandProjectDir(command), + ...(commandOptions.name ? { projectName: commandOptions.name } : {}), + force: commandOptions.force === true, + }, + context.io, + ), + ); + }, + ); + + registerDoctorCommands(dev, context); + registerScanCommands(dev, context); + registerIngestCommands(dev, context, { + runIngestWithProgress: async (ingestArgs, ingestIo, ingestDeps, defaultRunIngest) => + await (ingestDeps.ingest ?? defaultRunIngest)(ingestArgs, ingestIo), + }); + registerConnectionMappingCommands(dev, context); + registerCompletionCommands(dev, context, program); +} diff --git a/packages/cli/src/doctor.test.ts b/packages/cli/src/doctor.test.ts new file mode 100644 index 00000000..e447d7e5 --- /dev/null +++ b/packages/cli/src/doctor.test.ts @@ -0,0 +1,460 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { KloEmbeddingConfig, KloEmbeddingHealthCheckOptions, KloEmbeddingHealthCheckResult } from '@klo/llm'; +import { + formatDoctorReport, + runKloDoctor, + runSetupDoctorChecks, + type DoctorCheck, +} from './doctor.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +type EmbeddingHealthCheck = ( + config: KloEmbeddingConfig, + options?: KloEmbeddingHealthCheckOptions, +) => Promise; + +async function writeProjectConfig(projectDir: string, embeddingLines: string[]): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: ./warehouse.db', + 'ingest:', + ' adapters:', + ' - live-database', + ' embeddings:', + ...embeddingLines.map((line) => ` ${line}`), + '', + ].join('\n'), + 'utf-8', + ); +} + +describe('formatDoctorReport', () => { + it('prints exact fixes for failing setup checks', () => { + const checks: DoctorCheck[] = [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + { + id: 'native-sqlite', + label: 'Native SQLite', + status: 'fail', + detail: 'Cannot load better-sqlite3', + fix: 'Run: pnpm run native:rebuild', + }, + ]; + + expect(formatDoctorReport({ title: 'KLO setup doctor', checks })).toBe( + [ + 'KLO setup doctor', + 'PASS Node 22+: v22.16.0 ABI 127', + 'FAIL Native SQLite: Cannot load better-sqlite3', + ' Fix: Run: pnpm run native:rebuild', + '', + ].join('\n'), + ); + }); +}); + +describe('runSetupDoctorChecks', () => { + it('returns pass checks when injected commands and file checks succeed', async () => { + const checks = await runSetupDoctorChecks({ + env: { PATH: '/bin' }, + workspaceRoot: '/workspace/klo', + execText: async (command, args) => { + if (command === 'pnpm' && args[0] === '--version') return '10.28.0'; + if (command === 'corepack' && args[0] === '--version') return '0.32.0'; + if (command === 'uv' && args[0] === '--version') return 'uv 0.9.5'; + if (command === process.execPath && args.includes('--version')) return '@klo/cli 0.0.0-private'; + throw new Error(`${command} ${args.join(' ')}`); + }, + pathExists: async () => true, + importBetterSqlite3: async () => ({ default: function Database() {} }), + }); + + expect(checks.map((check) => [check.id, check.status])).toEqual([ + ['node', 'pass'], + ['pnpm', 'pass'], + ['corepack', 'pass'], + ['uv', 'pass'], + ['native-sqlite', 'pass'], + ['package-build', 'pass'], + ['workspace-cli', 'pass'], + ]); + }); + + it('returns exact fixes when setup checks fail', async () => { + const checks = await runSetupDoctorChecks({ + env: {}, + workspaceRoot: '/workspace/klo', + execText: async (command) => { + throw new Error(`${command} not found`); + }, + pathExists: async () => false, + importBetterSqlite3: async () => { + throw new Error('Cannot find module better-sqlite3'); + }, + }); + + expect(checks).toContainEqual({ + id: 'pnpm', + label: 'pnpm 10.20+', + status: 'fail', + detail: 'pnpm not found', + fix: 'Run: corepack enable && corepack prepare pnpm@10.28.0 --activate', + }); + expect(checks).toContainEqual({ + id: 'package-build', + label: 'TypeScript package build', + status: 'fail', + detail: 'Missing packages/cli/dist/bin.js', + fix: 'Run: pnpm run build', + }); + }); + + it('treats missing corepack as a warning so setup doctor can still pass', async () => { + const checks = await runSetupDoctorChecks({ + env: { PATH: '/bin' }, + workspaceRoot: '/workspace/klo', + execText: async (command, args) => { + if (command === 'pnpm' && args[0] === '--version') return '10.28.0'; + if (command === 'corepack' && args[0] === '--version') throw new Error('spawn corepack ENOENT'); + if (command === 'uv' && args[0] === '--version') return 'uv 0.9.5'; + if (command === process.execPath && args.includes('--version')) return '@klo/cli 0.0.0-private'; + throw new Error(`${command} ${args.join(' ')}`); + }, + pathExists: async () => true, + importBetterSqlite3: async () => ({ default: function Database() {} }), + }); + const testIo = makeIo(); + + await expect( + runKloDoctor({ command: 'setup', outputMode: 'plain', inputMode: 'disabled' }, testIo.io, { + runSetupChecks: async () => checks, + }), + ).resolves.toBe(0); + + expect(checks).toContainEqual({ + id: 'corepack', + label: 'Corepack', + status: 'warn', + detail: 'spawn corepack ENOENT', + fix: 'Run: corepack enable', + }); + expect(testIo.stdout()).toContain('WARN Corepack: spawn corepack ENOENT'); + expect(testIo.stderr()).toBe(''); + }); +}); + +describe('runKloDoctor', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-doctor-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('prints setup report and exits nonzero when a check fails', async () => { + const testIo = makeIo(); + + await expect( + runKloDoctor( + { command: 'setup', outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + { + id: 'package-build', + label: 'TypeScript package build', + status: 'fail', + detail: 'Missing packages/cli/dist/bin.js', + fix: 'Run: pnpm run build', + }, + ], + }, + ), + ).resolves.toBe(1); + + expect(testIo.stdout()).toContain('KLO setup doctor'); + expect(testIo.stdout()).toContain('FAIL TypeScript package build: Missing packages/cli/dist/bin.js'); + expect(testIo.stdout()).toContain('Fix: Run: pnpm run build'); + expect(testIo.stderr()).toBe(''); + }); + + it('prints JSON setup report', async () => { + const testIo = makeIo(); + + await expect( + runKloDoctor( + { command: 'setup', outputMode: 'json', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + ], + }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(testIo.stdout())).toEqual({ + title: 'KLO setup doctor', + checks: [{ id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }], + }); + }); + + it('runs project checks against a valid klo.yaml', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: ./warehouse.db', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKloDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + ], + }, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('KLO project doctor'); + expect(testIo.stdout()).toContain('PASS Project config: warehouse'); + expect(testIo.stdout()).toContain('PASS Connections: 1 configured'); + }); + + it('includes Postgres historic-SQL readiness in project doctor output', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' readonly: true', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + 'ingest:', + ' adapters:', + ' - live-database', + ' - historic-sql', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + const runHistoricSqlDoctorChecks = vi.fn(async () => [ + { + id: 'historic-sql-postgres-warehouse', + label: 'Postgres Historic SQL (warehouse)', + status: 'warn' as const, + detail: + 'pg_stat_statements ready (PostgreSQL 16.4) with warnings: pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn', + fix: `Update the Postgres parameter group or config, then rerun \`klo dev doctor --project-dir ${tempDir}\``, + }, + ]); + + await expect( + runKloDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + ], + runHistoricSqlDoctorChecks, + }, + ), + ).resolves.toBe(0); + + expect(runHistoricSqlDoctorChecks).toHaveBeenCalledTimes(1); + expect(testIo.stdout()).toContain('WARN Postgres Historic SQL (warehouse): pg_stat_statements ready'); + expect(testIo.stdout()).toContain('Fix: Update the Postgres parameter group or config'); + }); + + it('warns when semantic-search embeddings are not configured', async () => { + await writeProjectConfig(tempDir, ['backend: deterministic', 'model: deterministic', 'dimensions: 8']); + const testIo = makeIo(); + + await expect( + runKloDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + ], + }, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('WARN Semantic search embeddings: ingest.embeddings.backend is deterministic.'); + expect(testIo.stdout()).toContain( + 'Semantic lane will be skipped; lexical, dictionary, and token lanes remain available.', + ); + expect(testIo.stdout()).toContain( + `Fix: Run: klo setup --project-dir ${tempDir} --no-input`, + ); + }); + + it('probes configured semantic-search embeddings for project doctor', async () => { + await writeProjectConfig(tempDir, [ + 'backend: sentence-transformers', + 'model: all-MiniLM-L6-v2', + 'dimensions: 384', + 'sentenceTransformers:', + ' base_url: http://127.0.0.1:8765', + " pathPrefix: ''", + ]); + const healthCheck = vi.fn(async () => ({ ok: true })); + const testIo = makeIo(); + + await expect( + runKloDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + ], + embeddingHealthCheck: healthCheck, + embeddingProbeTimeoutMs: 1234, + }, + ), + ).resolves.toBe(0); + + expect(healthCheck).toHaveBeenCalledWith( + { + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' }, + }, + { text: 'KLO semantic search doctor probe', timeoutMs: 1234 }, + ); + expect(testIo.stdout()).toContain( + 'PASS Semantic search embeddings: sentence-transformers/all-MiniLM-L6-v2 (384d) probe succeeded', + ); + }); + + it('allows local sentence-transformers semantic-search probes enough time for cold start', async () => { + await writeProjectConfig(tempDir, [ + 'backend: sentence-transformers', + 'model: all-MiniLM-L6-v2', + 'dimensions: 384', + 'sentenceTransformers:', + ' base_url: http://127.0.0.1:8765', + " pathPrefix: ''", + ]); + const healthCheck = vi.fn(async () => ({ ok: true })); + const testIo = makeIo(); + + await expect( + runKloDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + ], + embeddingHealthCheck: healthCheck, + }, + ), + ).resolves.toBe(0); + + expect(healthCheck).toHaveBeenCalledWith( + expect.objectContaining({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + }), + { text: 'KLO semantic search doctor probe', timeoutMs: 120_000 }, + ); + }); + + it('reports unhealthy semantic-search embeddings as a warning in JSON output', async () => { + await writeProjectConfig(tempDir, [ + 'backend: sentence-transformers', + 'model: all-MiniLM-L6-v2', + 'dimensions: 384', + 'sentenceTransformers:', + ' base_url: http://127.0.0.1:8765', + " pathPrefix: ''", + ]); + const healthCheck = vi.fn(async () => ({ + ok: false, + message: 'connect ECONNREFUSED 127.0.0.1:8765', + })); + const testIo = makeIo(); + + await expect( + runKloDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, + testIo.io, + { + runSetupChecks: async () => [ + { id: 'node', label: 'Node 22+', status: 'pass', detail: 'v22.16.0 ABI 127' }, + ], + embeddingHealthCheck: healthCheck, + }, + ), + ).resolves.toBe(0); + + const report = JSON.parse(testIo.stdout()) as { + checks: Array<{ id: string; label: string; status: string; detail: string; fix?: string }>; + }; + expect(report.checks).toContainEqual({ + id: 'semantic-search-embeddings', + label: 'Semantic search embeddings', + status: 'warn', + detail: + 'sentence-transformers/all-MiniLM-L6-v2 (384d) probe failed: connect ECONNREFUSED 127.0.0.1:8765. Semantic lane will be skipped; lexical, dictionary, and token lanes remain available.', + fix: `Run: klo setup --project-dir ${tempDir} --no-input`, + }); + }); +}); diff --git a/packages/cli/src/doctor.ts b/packages/cli/src/doctor.ts new file mode 100644 index 00000000..417f5f43 --- /dev/null +++ b/packages/cli/src/doctor.ts @@ -0,0 +1,488 @@ +import { execFile } from 'node:child_process'; +import { constants as fsConstants } from 'node:fs'; +import { access } from 'node:fs/promises'; +import { join, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { promisify } from 'node:util'; +import type { KloLocalProject, KloProjectEmbeddingConfig } from '@klo/context/project'; +import type { KloEmbeddingConfig, KloEmbeddingHealthCheckOptions, KloEmbeddingHealthCheckResult } from '@klo/llm'; +import type { HistoricSqlDoctorDeps } from './historic-sql-doctor.js'; + +const execFileAsync = promisify(execFile); + +type DoctorStatus = 'pass' | 'warn' | 'fail'; +type KloDoctorOutputMode = 'plain' | 'json'; +type KloDoctorInputMode = 'auto' | 'disabled'; + +export interface DoctorCheck { + id: string; + label: string; + status: DoctorStatus; + detail: string; + fix?: string; +} + +interface DoctorReport { + title: string; + checks: DoctorCheck[]; +} + +export type KloDoctorArgs = + | { command: 'setup'; outputMode: KloDoctorOutputMode; inputMode?: KloDoctorInputMode } + | { command: 'project'; projectDir: string; outputMode: KloDoctorOutputMode; inputMode?: KloDoctorInputMode } + | { command: 'demo'; projectDir: string; outputMode: KloDoctorOutputMode; inputMode?: KloDoctorInputMode }; + +interface KloDoctorIo { + stdout: { write(chunk: string): void }; + stderr: { write(chunk: string): void }; +} + +interface SetupDoctorDeps { + env?: NodeJS.ProcessEnv; + workspaceRoot?: string; + execText?: (command: string, args: string[], options?: { cwd?: string; env?: NodeJS.ProcessEnv }) => Promise; + pathExists?: (path: string) => Promise; + importBetterSqlite3?: () => Promise; +} + +type EmbeddingHealthCheck = ( + config: KloEmbeddingConfig, + options?: KloEmbeddingHealthCheckOptions, +) => Promise; + +interface SemanticSearchDoctorDeps { + env?: NodeJS.ProcessEnv; + embeddingHealthCheck?: EmbeddingHealthCheck; + embeddingProbeTimeoutMs?: number; +} + +interface KloDoctorDeps extends SemanticSearchDoctorDeps, HistoricSqlDoctorDeps { + runSetupChecks?: () => Promise; + runHistoricSqlDoctorChecks?: (project: KloLocalProject, deps: HistoricSqlDoctorDeps) => Promise; +} + +function workspaceRootDir(): string { + return resolve(fileURLToPath(new URL('../../../', import.meta.url))); +} + +async function defaultExecText( + command: string, + args: string[], + options: { cwd?: string; env?: NodeJS.ProcessEnv } = {}, +): Promise { + const result = await execFileAsync(command, args, { + cwd: options.cwd, + env: options.env, + encoding: 'utf8', + maxBuffer: 1024 * 1024, + }); + return `${result.stdout}${result.stderr}`.trim(); +} + +async function defaultPathExists(path: string): Promise { + try { + await access(path, fsConstants.F_OK); + return true; + } catch { + return false; + } +} + +function failureMessage(error: unknown): string { + if (error instanceof Error && error.message.trim().length > 0) { + return error.message.trim().split('\n')[0] ?? error.message.trim(); + } + return String(error); +} + +function parseVersion(value: string): number[] { + const match = value.match(/(\d+)\.(\d+)\.(\d+)/); + if (!match) { + return []; + } + return [Number(match[1]), Number(match[2]), Number(match[3])]; +} + +function versionAtLeast(value: string, minimum: [number, number, number]): boolean { + const parsed = parseVersion(value); + if (parsed.length !== 3) { + return false; + } + for (let index = 0; index < minimum.length; index += 1) { + if (parsed[index] > minimum[index]) return true; + if (parsed[index] < minimum[index]) return false; + } + return true; +} + +function check(status: DoctorStatus, id: string, label: string, detail: string, fix?: string): DoctorCheck { + return fix ? { id, label, status, detail, fix } : { id, label, status, detail }; +} + +const SEMANTIC_SEARCH_HEALTH_TEXT = 'KLO semantic search doctor probe'; +const SEMANTIC_SEARCH_HEALTH_TIMEOUT_MS = 5_000; +const SEMANTIC_SEARCH_LOCAL_HEALTH_TIMEOUT_MS = 120_000; + +function semanticEmbeddingSetupFix(projectDir: string, backend: KloProjectEmbeddingConfig['backend']): string { + if (backend === 'openai') { + return `Set OPENAI_API_KEY or rerun: klo setup --project-dir ${projectDir} --embedding-backend openai --no-input`; + } + return `Run: klo setup --project-dir ${projectDir} --no-input`; +} + +function embeddingConfigLabel(config: KloProjectEmbeddingConfig | KloEmbeddingConfig): string { + const model = config.model?.trim() || 'model not configured'; + return `${config.backend}/${model} (${config.dimensions}d)`; +} + +function semanticLaneFallbackDetail(reason: string): string { + return `${reason}. Semantic lane will be skipped; lexical, dictionary, and token lanes remain available.`; +} + +async function defaultEmbeddingHealthCheck( + config: KloEmbeddingConfig, + options?: KloEmbeddingHealthCheckOptions, +): Promise { + const { runKloEmbeddingHealthCheck } = await import('@klo/llm'); + return runKloEmbeddingHealthCheck(config, options); +} + +async function runSemanticSearchEmbeddingCheck( + config: KloProjectEmbeddingConfig, + projectDir: string, + deps: SemanticSearchDoctorDeps = {}, +): Promise { + if (config.backend === 'none' || config.backend === 'deterministic') { + return check( + 'warn', + 'semantic-search-embeddings', + 'Semantic search embeddings', + semanticLaneFallbackDetail(`ingest.embeddings.backend is ${config.backend}`), + semanticEmbeddingSetupFix(projectDir, config.backend), + ); + } + + try { + const { resolveLocalKloEmbeddingConfig } = await import('@klo/context'); + const resolved = resolveLocalKloEmbeddingConfig(config, deps.env ?? process.env); + if (!resolved) { + return check( + 'warn', + 'semantic-search-embeddings', + 'Semantic search embeddings', + semanticLaneFallbackDetail(`No runtime embedding config resolved for ${embeddingConfigLabel(config)}`), + semanticEmbeddingSetupFix(projectDir, config.backend), + ); + } + + const healthCheck = deps.embeddingHealthCheck ?? defaultEmbeddingHealthCheck; + const timeoutMs = + deps.embeddingProbeTimeoutMs ?? + (resolved.backend === 'sentence-transformers' + ? SEMANTIC_SEARCH_LOCAL_HEALTH_TIMEOUT_MS + : SEMANTIC_SEARCH_HEALTH_TIMEOUT_MS); + const health = await healthCheck(resolved, { + text: SEMANTIC_SEARCH_HEALTH_TEXT, + timeoutMs, + }); + if (health.ok) { + return check( + 'pass', + 'semantic-search-embeddings', + 'Semantic search embeddings', + `${embeddingConfigLabel(resolved)} probe succeeded`, + ); + } + + return check( + 'warn', + 'semantic-search-embeddings', + 'Semantic search embeddings', + semanticLaneFallbackDetail(`${embeddingConfigLabel(resolved)} probe failed: ${health.message}`), + semanticEmbeddingSetupFix(projectDir, config.backend), + ); + } catch (error) { + return check( + 'warn', + 'semantic-search-embeddings', + 'Semantic search embeddings', + semanticLaneFallbackDetail(`${embeddingConfigLabel(config)} probe failed: ${failureMessage(error)}`), + semanticEmbeddingSetupFix(projectDir, config.backend), + ); + } +} + +export async function runSetupDoctorChecks(deps: SetupDoctorDeps = {}): Promise { + const env = deps.env ?? process.env; + const root = deps.workspaceRoot ?? workspaceRootDir(); + const execText = deps.execText ?? defaultExecText; + const pathExists = deps.pathExists ?? defaultPathExists; + const importBetterSqlite3 = deps.importBetterSqlite3 ?? (() => import('better-sqlite3')); + const checks: DoctorCheck[] = []; + + const nodeDetail = `${process.version} ABI ${process.versions.modules}`; + checks.push( + versionAtLeast(process.version, [22, 0, 0]) + ? check('pass', 'node', 'Node 22+', nodeDetail) + : check('fail', 'node', 'Node 22+', nodeDetail, 'Install Node 22 or newer, then rerun `pnpm run setup:dev`'), + ); + + try { + const pnpmVersion = await execText('pnpm', ['--version'], { cwd: root, env }); + checks.push( + versionAtLeast(pnpmVersion, [10, 20, 0]) + ? check('pass', 'pnpm', 'pnpm 10.20+', pnpmVersion) + : check( + 'fail', + 'pnpm', + 'pnpm 10.20+', + pnpmVersion, + 'Run: corepack enable && corepack prepare pnpm@10.28.0 --activate', + ), + ); + } catch (error) { + checks.push( + check( + 'fail', + 'pnpm', + 'pnpm 10.20+', + failureMessage(error), + 'Run: corepack enable && corepack prepare pnpm@10.28.0 --activate', + ), + ); + } + + try { + const corepackVersion = await execText('corepack', ['--version'], { cwd: root, env }); + checks.push(check('pass', 'corepack', 'Corepack', corepackVersion)); + } catch (error) { + checks.push(check('warn', 'corepack', 'Corepack', failureMessage(error), 'Run: corepack enable')); + } + + try { + const uvVersion = await execText('uv', ['--version'], { cwd: root, env }); + checks.push(check('pass', 'uv', 'uv', uvVersion)); + } catch (error) { + checks.push(check('fail', 'uv', 'uv', failureMessage(error), 'Install uv, then rerun `pnpm run setup:dev`')); + } + + try { + await importBetterSqlite3(); + checks.push(check('pass', 'native-sqlite', 'Native SQLite', 'better-sqlite3 loaded')); + } catch (error) { + checks.push( + check('fail', 'native-sqlite', 'Native SQLite', failureMessage(error), 'Run: pnpm run native:rebuild'), + ); + } + + const cliBin = join(root, 'packages/cli/dist/bin.js'); + if (await pathExists(cliBin)) { + checks.push(check('pass', 'package-build', 'TypeScript package build', 'packages/cli/dist/bin.js exists')); + } else { + checks.push( + check( + 'fail', + 'package-build', + 'TypeScript package build', + 'Missing packages/cli/dist/bin.js', + 'Run: pnpm run build', + ), + ); + } + + try { + const output = await execText(process.execPath, [cliBin, '--version'], { cwd: root, env }); + checks.push(check('pass', 'workspace-cli', 'Workspace-local CLI', output)); + } catch (error) { + checks.push( + check( + 'fail', + 'workspace-cli', + 'Workspace-local CLI', + failureMessage(error), + 'Run: pnpm run build && pnpm run klo -- --version', + ), + ); + } + + return checks; +} + +async function runProjectChecks(projectDir: string, deps: KloDoctorDeps = {}): Promise { + const { loadKloProject } = await import('@klo/context/project'); + const checks: DoctorCheck[] = []; + try { + const project = await loadKloProject({ projectDir }); + checks.push(check('pass', 'project-config', 'Project config', project.config.project)); + const connectionCount = Object.keys(project.config.connections).length; + checks.push( + connectionCount > 0 + ? check('pass', 'connections', 'Connections', `${connectionCount} configured`) + : check( + 'warn', + 'connections', + 'Connections', + '0 configured', + 'Add a connection to klo.yaml or run `klo setup demo init`', + ), + ); + checks.push(check('pass', 'storage', 'Storage', `${project.config.storage.state}/${project.config.storage.search}`)); + checks.push(check('pass', 'llm-provider', 'LLM provider', project.config.llm.provider.backend)); + checks.push(await runSemanticSearchEmbeddingCheck(project.config.ingest.embeddings, projectDir, deps)); + const runHistoricSqlDoctorChecks = + deps.runHistoricSqlDoctorChecks ?? (await import('./historic-sql-doctor.js')).runPostgresHistoricSqlDoctorChecks; + checks.push(...(await runHistoricSqlDoctorChecks(project, deps))); + } catch (error) { + checks.push( + check( + 'fail', + 'project-config', + 'Project config', + failureMessage(error), + `Run: klo init ${projectDir} --name `, + ), + ); + } + return checks; +} + +async function runDemoProjectChecks(projectDir: string, deps: KloDoctorDeps = {}): Promise { + const env = deps.env ?? process.env; + const { DEMO_CONNECTION_ID, DEMO_REPLAY_FILE } = await import('./demo-assets.js'); + const { loadKloProject } = await import('@klo/context/project'); + const checks: DoctorCheck[] = []; + const requiredPaths = [ + ['demo-config', 'Demo config', 'klo.yaml'], + ['demo-database', 'Demo dataset', 'demo.db'], + ['demo-state', 'Demo state database', 'state.sqlite'], + ['demo-replay', 'Demo replay', join('replays', DEMO_REPLAY_FILE)], + ['demo-raw-sources', 'Demo raw sources directory', 'raw-sources'], + ['demo-semantic-layer', 'Demo semantic-layer directory', 'semantic-layer'], + ['demo-knowledge', 'Demo knowledge directory', 'knowledge'], + ] as const; + + for (const [id, label, relativePath] of requiredPaths) { + const absolutePath = join(projectDir, relativePath); + checks.push( + (await defaultPathExists(absolutePath)) + ? check('pass', id, label, relativePath) + : check( + 'fail', + id, + label, + `Missing ${relativePath}`, + `Run: klo setup demo init --project-dir ${projectDir} --force --no-input`, + ), + ); + } + + try { + const project = await loadKloProject({ projectDir }); + const connection = project.config.connections[DEMO_CONNECTION_ID]; + checks.push( + connection?.driver === 'sqlite' + ? check('pass', 'demo-connection', 'Demo connection', `${DEMO_CONNECTION_ID} uses sqlite`) + : check( + 'fail', + 'demo-connection', + 'Demo connection', + `${DEMO_CONNECTION_ID} is missing or is not sqlite`, + `Run: klo setup demo init --project-dir ${projectDir} --force --no-input`, + ), + ); + const provider = project.config.llm.provider.backend; + checks.push( + provider === 'anthropic' || provider === 'vertex' || provider === 'gateway' + ? check('pass', 'demo-llm-provider', 'Demo LLM provider', provider) + : check( + 'fail', + 'demo-llm-provider', + 'Demo LLM provider', + provider, + `Run: klo setup demo init --project-dir ${projectDir} --force --no-input`, + ), + ); + if (provider === 'anthropic' && !env.ANTHROPIC_API_KEY) { + checks.push( + check( + 'warn', + 'anthropic-credentials', + 'Anthropic credentials', + 'ANTHROPIC_API_KEY is not set', + 'Export ANTHROPIC_API_KEY to run `klo setup demo --mode full --no-input`', + ), + ); + } else { + checks.push(check('pass', 'anthropic-credentials', 'Anthropic credentials', 'Configured for current provider')); + } + checks.push(await runSemanticSearchEmbeddingCheck(project.config.ingest.embeddings, projectDir, deps)); + const runHistoricSqlDoctorChecks = + deps.runHistoricSqlDoctorChecks ?? (await import('./historic-sql-doctor.js')).runPostgresHistoricSqlDoctorChecks; + checks.push(...(await runHistoricSqlDoctorChecks(project, deps))); + } catch (error) { + checks.push( + check( + 'fail', + 'demo-config-parse', + 'Demo config parse', + failureMessage(error), + `Run: klo setup demo init --project-dir ${projectDir} --force --no-input`, + ), + ); + } + + return checks; +} + +export function formatDoctorReport(report: DoctorReport): string { + const lines = [report.title]; + for (const item of report.checks) { + lines.push(`${item.status.toUpperCase()} ${item.label}: ${item.detail}`); + if (item.fix) { + lines.push(` Fix: ${item.fix}`); + } + } + lines.push(''); + return lines.join('\n'); +} + +function hasFailures(report: DoctorReport): boolean { + return report.checks.some((item) => item.status === 'fail'); +} + +function writeReport(report: DoctorReport, outputMode: KloDoctorOutputMode, io: KloDoctorIo): void { + if (outputMode === 'json') { + io.stdout.write(`${JSON.stringify(report, null, 2)}\n`); + return; + } + io.stdout.write(formatDoctorReport(report)); +} + +export async function runKloDoctor( + args: KloDoctorArgs, + io: KloDoctorIo = process, + deps: KloDoctorDeps = {}, +): Promise { + try { + const runSetupChecks = deps.runSetupChecks ?? (() => runSetupDoctorChecks()); + const setupChecks = await runSetupChecks(); + const report: DoctorReport = + args.command === 'setup' + ? { title: 'KLO setup doctor', checks: setupChecks } + : args.command === 'demo' + ? { + title: 'KLO demo doctor', + checks: [...setupChecks, ...(await runDemoProjectChecks(args.projectDir, deps))], + } + : { + title: 'KLO project doctor', + checks: [...setupChecks, ...(await runProjectChecks(args.projectDir, deps))], + }; + + writeReport(report, args.outputMode, io); + return hasFailures(report) ? 1 : 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/example-smoke.test.ts b/packages/cli/src/example-smoke.test.ts new file mode 100644 index 00000000..0ea974bb --- /dev/null +++ b/packages/cli/src/example-smoke.test.ts @@ -0,0 +1,252 @@ +import { execFile } from 'node:child_process'; +import { cp, mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { promisify } from 'node:util'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +const execFileAsync = promisify(execFile); +const CLI_BIN = resolve(process.cwd(), 'dist/bin.js'); +const EXAMPLE_DIR = resolve(process.cwd(), '../../examples/local-warehouse'); + +interface CliResult { + code: number; + stdout: string; + stderr: string; +} + +interface ExecFailure extends Error { + code?: number; + stdout?: string; + stderr?: string; +} + +function isExecFailure(error: unknown): error is ExecFailure { + return error instanceof Error && ('stdout' in error || 'stderr' in error || 'code' in error); +} + +async function runBuiltCli(args: string[]): Promise { + try { + const result = await execFileAsync(process.execPath, [CLI_BIN, ...args], { + encoding: 'utf8', + timeout: 20_000, + }); + return { + code: 0, + stdout: result.stdout, + stderr: result.stderr, + }; + } catch (error) { + if (!isExecFailure(error)) { + throw error; + } + return { + code: typeof error.code === 'number' ? error.code : 1, + stdout: error.stdout ?? '', + stderr: error.stderr ?? error.message, + }; + } +} + +function structuredContent(result: unknown): T { + const content = (result as { structuredContent?: unknown }).structuredContent; + expect(content).toBeDefined(); + return content as T; +} + +function parseJsonOutput(stdout: string): T { + return JSON.parse(stdout) as T; +} + +async function copyExampleProject(tempDir: string): Promise { + const projectDir = join(tempDir, 'local-warehouse'); + await cp(EXAMPLE_DIR, projectDir, { recursive: true }); + return projectDir; +} + +describe('standalone local warehouse example', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-example-smoke-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('runs local CLI commands against the copied example project', async () => { + const projectDir = await copyExampleProject(tempDir); + const sourceDir = join(projectDir, 'source'); + + const knowledgeList = await runBuiltCli(['agent', 'wiki', 'search', 'revenue', '--json', '--project-dir', projectDir]); + expect(knowledgeList).toMatchObject({ code: 0, stderr: '' }); + expect(parseJsonOutput<{ results: Array<{ key: string; summary: string }> }>(knowledgeList.stdout).results).toContainEqual( + expect.objectContaining({ key: 'revenue', summary: 'Paid order value after refunds' }), + ); + + const knowledgeRead = await runBuiltCli(['agent', 'wiki', 'read', 'revenue', '--json', '--project-dir', projectDir]); + expect(knowledgeRead).toMatchObject({ code: 0, stderr: '' }); + expect(parseJsonOutput<{ content: string }>(knowledgeRead.stdout).content).toContain( + 'Revenue is paid order amount after refund adjustments.', + ); + + const slList = await runBuiltCli(['agent', 'sl', 'list', '--json', '--project-dir', projectDir, '--connection-id', 'warehouse']); + expect(slList).toMatchObject({ code: 0, stderr: '' }); + expect(parseJsonOutput<{ sources: Array<{ connectionId: string; name: string; columnCount: number }> }>(slList.stdout).sources).toContainEqual( + expect.objectContaining({ connectionId: 'warehouse', name: 'orders', columnCount: 3 }), + ); + + const slRead = await runBuiltCli([ + 'agent', + 'sl', + 'read', + 'orders', + '--json', + '--connection-id', + 'warehouse', + '--project-dir', + projectDir, + ]); + expect(slRead).toMatchObject({ code: 0, stderr: '' }); + expect(parseJsonOutput<{ yaml: string }>(slRead.stdout).yaml).toContain('name: orders'); + + const ingest = await runBuiltCli([ + 'dev', + 'ingest', + 'run', + '--project-dir', + projectDir, + '--connection-id', + 'warehouse', + '--adapter', + 'fake', + '--source-dir', + sourceDir, + ]); + expect(ingest).toMatchObject({ code: 1, stdout: '' }); + expect(ingest.stderr).toContain( + 'klo dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', + ); + }, 30_000); + + it('serves local wiki and semantic-layer MCP tools against the copied example project', async () => { + const projectDir = await copyExampleProject(tempDir); + + const client = new Client({ name: 'klo-example-client', version: '0.0.0' }); + const transport = new StdioClientTransport({ + command: process.execPath, + args: [CLI_BIN, 'serve', '--mcp', 'stdio', '--project-dir', projectDir, '--user-id', 'example-user'], + stderr: 'pipe', + }); + + try { + await client.connect(transport); + + const knowledgeSearch = structuredContent<{ + results: Array<{ key: string; summary: string; score: number }>; + totalFound: number; + }>( + await client.callTool({ + name: 'knowledge_search', + arguments: { query: 'refund', limit: 5 }, + }), + ); + expect(knowledgeSearch.totalFound).toBe(1); + expect(knowledgeSearch.results[0]).toMatchObject({ + key: 'revenue', + summary: 'Paid order value after refunds', + }); + + const knowledgeRead = structuredContent<{ key: string; summary: string; content: string; scope: string }>( + await client.callTool({ name: 'knowledge_read', arguments: { key: 'revenue' } }), + ); + expect(knowledgeRead).toMatchObject({ + key: 'revenue', + summary: 'Paid order value after refunds', + scope: 'GLOBAL', + }); + expect(knowledgeRead.content).toContain('Revenue is paid order amount after refund adjustments.'); + + const knowledgeWrite = structuredContent<{ success: boolean; key: string; action: string }>( + await client.callTool({ + name: 'knowledge_write', + arguments: { + key: 'gross_margin', + summary: 'Revenue after direct costs', + content: 'Gross margin subtracts direct order costs from revenue.', + tags: ['finance'], + sl_refs: ['warehouse.orders'], + }, + }), + ); + expect(knowledgeWrite).toEqual({ success: true, key: 'gross_margin', action: 'created' }); + + const slList = structuredContent<{ + sources: Array<{ + connectionId: string; + name: string; + description?: string; + columnCount: number; + measureCount: number; + joinCount: number; + }>; + totalSources: number; + }>(await client.callTool({ name: 'sl_list_sources', arguments: { connectionId: 'warehouse' } })); + expect(slList.totalSources).toBe(1); + expect(slList.sources[0]).toMatchObject({ + connectionId: 'warehouse', + name: 'orders', + description: 'Orders placed through the storefront.', + columnCount: 3, + measureCount: 2, + joinCount: 0, + }); + + const slRead = structuredContent<{ sourceName: string; yaml: string }>( + await client.callTool({ + name: 'sl_read_source', + arguments: { connectionId: 'warehouse', sourceName: 'orders' }, + }), + ); + expect(slRead.sourceName).toBe('orders'); + expect(slRead.yaml).toContain('name: orders'); + expect(slRead.yaml).toContain('total_revenue'); + + const slWrite = structuredContent<{ success: boolean; sourceName: string }>( + await client.callTool({ + name: 'sl_write_source', + arguments: { + connectionId: 'warehouse', + sourceName: 'customers', + source: { + name: 'customers', + table: 'public.customers', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [], + }, + }, + }), + ); + expect(slWrite).toMatchObject({ success: true, sourceName: 'customers' }); + + const slValidate = structuredContent<{ success: boolean; errors: string[]; warnings: string[] }>( + await client.callTool({ + name: 'sl_validate', + arguments: { connectionId: 'warehouse', names: ['orders', 'customers'] }, + }), + ); + expect(slValidate.success).toBe(true); + expect(slValidate.errors).toEqual([]); + expect(slValidate.warnings).toContain( + 'Local stdio validation checks YAML shape only; Python semantic validation is not configured.', + ); + } finally { + await client.close(); + } + }, 30_000); +}); diff --git a/packages/cli/src/historic-sql-doctor.test.ts b/packages/cli/src/historic-sql-doctor.test.ts new file mode 100644 index 00000000..1eaf4c59 --- /dev/null +++ b/packages/cli/src/historic-sql-doctor.test.ts @@ -0,0 +1,173 @@ +import { buildDefaultKloProjectConfig, type KloProjectConnectionConfig } from '@klo/context/project'; +import { HistoricSqlExtensionMissingError } from '@klo/context/ingest'; +import { describe, expect, it, vi } from 'vitest'; +import { + runPostgresHistoricSqlDoctorChecks, + type HistoricSqlDoctorProject, + type PostgresHistoricSqlDoctorProbe, +} from './historic-sql-doctor.js'; + +function projectWithConnections(connections: Record): HistoricSqlDoctorProject { + return { + projectDir: '/tmp/klo-project', + config: { + ...buildDefaultKloProjectConfig('warehouse'), + connections, + ingest: { + ...buildDefaultKloProjectConfig('warehouse').ingest, + adapters: ['live-database', 'historic-sql'], + }, + }, + }; +} + +describe('runPostgresHistoricSqlDoctorChecks', () => { + it('passes when no Postgres historic-SQL connections are enabled', async () => { + const checks = await runPostgresHistoricSqlDoctorChecks( + projectWithConnections({ + warehouse: { driver: 'sqlite', path: './warehouse.db', readonly: true }, + }), + { + postgresHistoricSqlProbe: vi.fn(), + }, + ); + + expect(checks).toEqual([ + { + id: 'historic-sql-postgres', + label: 'Postgres Historic SQL', + status: 'pass', + detail: 'No enabled Postgres historic-SQL connections', + }, + ]); + }); + + it('passes when the PGSS probe succeeds without warnings', async () => { + const probe = vi.fn(async () => ({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + })); + + const checks = await runPostgresHistoricSqlDoctorChecks( + projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + readonly: true, + historicSql: { enabled: true, dialect: 'postgres' }, + }, + }), + { postgresHistoricSqlProbe: probe }, + ); + + expect(probe).toHaveBeenCalledWith({ + projectDir: '/tmp/klo-project', + connectionId: 'warehouse', + connection: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + readonly: true, + historicSql: { enabled: true, dialect: 'postgres' }, + }, + env: process.env, + }); + expect(checks).toEqual([ + { + id: 'historic-sql-postgres-warehouse', + label: 'Postgres Historic SQL (warehouse)', + status: 'pass', + detail: 'pg_stat_statements ready (PostgreSQL 16.4)', + }, + ]); + }); + + it('warns when the PGSS probe succeeds with operational warnings', async () => { + const checks = await runPostgresHistoricSqlDoctorChecks( + projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + readonly: true, + historicSql: { enabled: true, dialect: 'postgres' }, + }, + }), + { + postgresHistoricSqlProbe: async () => ({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [ + 'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn', + ], + }), + }, + ); + + expect(checks).toEqual([ + { + id: 'historic-sql-postgres-warehouse', + label: 'Postgres Historic SQL (warehouse)', + status: 'warn', + detail: + 'pg_stat_statements ready (PostgreSQL 16.4) with warnings: pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn', + fix: 'Update the Postgres parameter group or config, then rerun `klo dev doctor --project-dir /tmp/klo-project`', + }, + ]); + }); + + it('fails when a connection has postgres historic SQL but is not a Postgres driver', async () => { + const checks = await runPostgresHistoricSqlDoctorChecks( + projectWithConnections({ + warehouse: { + driver: 'mysql', + url: 'env:WAREHOUSE_DATABASE_URL', + readonly: true, + historicSql: { enabled: true, dialect: 'postgres' }, + }, + }), + { + postgresHistoricSqlProbe: vi.fn(), + }, + ); + + expect(checks).toEqual([ + { + id: 'historic-sql-postgres-warehouse', + label: 'Postgres Historic SQL (warehouse)', + status: 'fail', + detail: 'connections.warehouse.historicSql.dialect is postgres but driver is mysql', + fix: 'Set connections.warehouse.driver to postgres or disable historicSql for this connection', + }, + ]); + }); + + it('maps PGSS capability errors to actionable failures', async () => { + const checks = await runPostgresHistoricSqlDoctorChecks( + projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + readonly: true, + historicSql: { enabled: true, dialect: 'postgres' }, + }, + }), + { + postgresHistoricSqlProbe: async () => { + throw new HistoricSqlExtensionMissingError({ + dialect: 'postgres', + message: 'pg_stat_statements extension is not installed in the connection database.', + remediation: 'Run CREATE EXTENSION pg_stat_statements; against the connection database.', + }); + }, + }, + ); + + expect(checks).toEqual([ + { + id: 'historic-sql-postgres-warehouse', + label: 'Postgres Historic SQL (warehouse)', + status: 'fail', + detail: 'pg_stat_statements extension is not installed in the connection database.', + fix: 'Run CREATE EXTENSION pg_stat_statements; against the connection database.', + }, + ]); + }); +}); diff --git a/packages/cli/src/historic-sql-doctor.ts b/packages/cli/src/historic-sql-doctor.ts new file mode 100644 index 00000000..e734abe5 --- /dev/null +++ b/packages/cli/src/historic-sql-doctor.ts @@ -0,0 +1,160 @@ +import type { KloProjectConfig, KloProjectConnectionConfig } from '@klo/context/project'; +import type { DoctorCheck } from './doctor.js'; + +export interface HistoricSqlDoctorProject { + projectDir: string; + config: Pick; +} + +export interface PostgresHistoricSqlDoctorProbeInput { + projectDir: string; + connectionId: string; + connection: KloProjectConnectionConfig; + env: NodeJS.ProcessEnv; +} + +export interface PostgresHistoricSqlDoctorProbeResult { + pgServerVersion: string; + warnings: string[]; +} + +export type PostgresHistoricSqlDoctorProbe = ( + input: PostgresHistoricSqlDoctorProbeInput, +) => Promise; + +export interface HistoricSqlDoctorDeps { + env?: NodeJS.ProcessEnv; + postgresHistoricSqlProbe?: PostgresHistoricSqlDoctorProbe; +} + +function check(status: DoctorCheck['status'], id: string, label: string, detail: string, fix?: string): DoctorCheck { + return fix ? { id, label, status, detail, fix } : { id, label, status, detail }; +} + +function historicSqlRecord(connection: KloProjectConnectionConfig): Record | null { + const historicSql = connection.historicSql; + return historicSql && typeof historicSql === 'object' && !Array.isArray(historicSql) + ? (historicSql as Record) + : null; +} + +function isEnabledPostgresHistoricSql(connection: KloProjectConnectionConfig): boolean { + const historicSql = historicSqlRecord(connection); + return historicSql?.enabled === true && historicSql.dialect === 'postgres'; +} + +function isPostgresDriver(connection: KloProjectConnectionConfig): boolean { + const driver = String(connection.driver ?? '').toLowerCase(); + return driver === 'postgres' || driver === 'postgresql'; +} + +function checkId(connectionId: string): string { + return `historic-sql-postgres-${connectionId.replace(/[^a-z0-9_-]+/gi, '-')}`; +} + +function capabilityFailureFix(error: unknown, connectionId: string, projectDir: string): string { + if (error instanceof Error && error.name === 'HistoricSqlExtensionMissingError' && 'remediation' in error) { + return String(error.remediation); + } + if (error instanceof Error && error.name === 'HistoricSqlGrantsMissingError' && 'remediation' in error) { + return String(error.remediation); + } + if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') { + return 'Use PostgreSQL 14 or newer, or disable historicSql for this connection'; + } + return `Fix connections.${connectionId} Postgres settings, then rerun \`klo dev doctor --project-dir ${projectDir}\``; +} + +function failureDetail(error: unknown): string { + if (error instanceof Error && error.message.trim().length > 0) { + return error.message.trim().split('\n')[0] ?? error.message.trim(); + } + return String(error); +} + +async function defaultPostgresHistoricSqlProbe( + input: PostgresHistoricSqlDoctorProbeInput, +): Promise { + const [{ PostgresPgssQueryHistoryReader }, { KloPostgresHistoricSqlQueryClient, isKloPostgresConnectionConfig }] = + await Promise.all([import('@klo/context/ingest'), import('@klo/connector-postgres')]); + + if (!isKloPostgresConnectionConfig(input.connection)) { + throw new Error(`Native PostgreSQL connector cannot run driver "${input.connection.driver ?? 'unknown'}"`); + } + + const client = new KloPostgresHistoricSqlQueryClient({ + connectionId: input.connectionId, + connection: input.connection, + env: input.env, + }); + try { + return await new PostgresPgssQueryHistoryReader().probe(client); + } finally { + await client.cleanup(); + } +} + +export async function runPostgresHistoricSqlDoctorChecks( + project: HistoricSqlDoctorProject, + deps: HistoricSqlDoctorDeps = {}, +): Promise { + const targets = Object.entries(project.config.connections) + .filter(([, connection]) => isEnabledPostgresHistoricSql(connection)) + .sort(([left], [right]) => left.localeCompare(right)); + + if (targets.length === 0) { + return [ + check('pass', 'historic-sql-postgres', 'Postgres Historic SQL', 'No enabled Postgres historic-SQL connections'), + ]; + } + + const probe = deps.postgresHistoricSqlProbe ?? defaultPostgresHistoricSqlProbe; + const env = deps.env ?? process.env; + const checks: DoctorCheck[] = []; + for (const [connectionId, connection] of targets) { + const label = `Postgres Historic SQL (${connectionId})`; + if (!isPostgresDriver(connection)) { + checks.push( + check( + 'fail', + checkId(connectionId), + label, + `connections.${connectionId}.historicSql.dialect is postgres but driver is ${String(connection.driver)}`, + `Set connections.${connectionId}.driver to postgres or disable historicSql for this connection`, + ), + ); + continue; + } + + try { + const result = await probe({ projectDir: project.projectDir, connectionId, connection, env }); + if (result.warnings.length > 0) { + checks.push( + check( + 'warn', + checkId(connectionId), + label, + `pg_stat_statements ready (${result.pgServerVersion}) with warnings: ${result.warnings.join('; ')}`, + `Update the Postgres parameter group or config, then rerun \`klo dev doctor --project-dir ${project.projectDir}\``, + ), + ); + } else { + checks.push( + check('pass', checkId(connectionId), label, `pg_stat_statements ready (${result.pgServerVersion})`), + ); + } + } catch (error) { + checks.push( + check( + 'fail', + checkId(connectionId), + label, + failureDetail(error), + capabilityFailureFix(error, connectionId, project.projectDir), + ), + ); + } + } + + return checks; +} diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts new file mode 100644 index 00000000..6df3811d --- /dev/null +++ b/packages/cli/src/index.test.ts @@ -0,0 +1,1977 @@ +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { createRequire } from 'node:module'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + getKloCliPackageInfo, + rendererUnavailableVizFallback, + renderMemoryFlowTui, + resolveVizFallback, + runKloCli, + sanitizeMemoryFlowTuiError, + startLiveMemoryFlowTui, + warnVizFallbackOnce, +} from './index.js'; + +const require = createRequire(import.meta.url); + +function makeIo(options: { stdoutIsTty?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: options.stdoutIsTty, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('getKloCliPackageInfo', () => { + it('identifies the CLI package and its context dependency', () => { + expect(getKloCliPackageInfo()).toEqual({ + name: '@klo/cli', + version: '0.0.0-private', + contextPackageName: '@klo/context', + }); + }); + + it('exports package metadata for package managers and runtime diagnostics', () => { + const packageJson = require('@klo/cli/package.json') as { name: string; version: string }; + + expect(packageJson).toMatchObject({ + name: '@klo/cli', + version: '0.0.0-private', + }); + }); +}); + +describe('memory-flow renderer exports', () => { + it('exports runtime-agnostic renderer entry points for hosted terminal clients', () => { + expect(renderMemoryFlowTui).toBeTypeOf('function'); + expect(startLiveMemoryFlowTui).toBeTypeOf('function'); + expect(sanitizeMemoryFlowTuiError('token=abc123')).toBe('[redacted]'); + }); + + it('exports shared visualization fallback helpers for hosted terminal clients', () => { + const fallback = resolveVizFallback({ stdout: { isTTY: true }, stderr: { write: vi.fn() } }, { TERM: 'dumb' }); + + expect(fallback).toEqual({ + shouldDegrade: true, + reason: 'term-dumb', + message: 'TERM=dumb does not support the visual renderer', + }); + expect(rendererUnavailableVizFallback()).toEqual({ + shouldDegrade: true, + reason: 'renderer-unavailable', + message: 'the terminal renderer is unavailable', + }); + expect(warnVizFallbackOnce).toBeTypeOf('function'); + }); +}); + +describe('runKloCli', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('prints version information', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['--version'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toBe('@klo/cli 0.0.0-private\n'); + expect(testIo.stderr()).toBe(''); + }); + + it('prints the May 6 public command surface in root help', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['--help'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo [options] [command]'); + for (const command of ['setup', 'connection', 'ingest', 'wiki', 'sl', 'serve', 'status']) { + expect(testIo.stdout()).toContain(`${command}`); + } + for (const removed of ['demo', 'init', 'connect', 'scan', 'ask', 'knowledge', 'agent', 'completion']) { + expect(testIo.stdout()).not.toContain(`${removed} [`); + expect(testIo.stdout()).not.toContain(`${removed} `); + } + expect(testIo.stdout()).toContain('--project-dir '); + expect(testIo.stdout()).toContain('KLO_PROJECT_DIR'); + expect(testIo.stdout()).toContain('--debug'); + expect(testIo.stdout()).not.toContain('--' + 'verbose'); + expect(testIo.stdout()).toContain('Advanced:'); + expect(testIo.stdout()).toContain('klo dev'); + expect(testIo.stderr()).toBe(''); + }); + + it('exposes demo under setup help instead of root help', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['setup', '--help'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo setup [options] [command]'); + expect(testIo.stdout()).toContain('demo'); + expect(testIo.stdout()).toContain('Run the packaged KLO demo from setup'); + expect(testIo.stdout()).not.toContain('--skip-llm'); + expect(testIo.stdout()).not.toContain('--skip-embeddings'); + expect(testIo.stdout()).not.toContain('--embedding-model'); + expect(testIo.stdout()).not.toContain('--embedding-dimensions'); + expect(testIo.stdout()).not.toContain('--embedding-base-url'); + expect(testIo.stderr()).toBe(''); + }); + + it('prints help for bare klo outside a TTY', async () => { + const setup = vi.fn(async () => 0); + const testIo = makeIo({ stdoutIsTty: false }); + + await expect(runKloCli([], testIo.io, { setup })).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo [options] [command]'); + expect(setup).not.toHaveBeenCalled(); + expect(testIo.stderr()).toBe(''); + }); + + it('starts setup for bare klo in a TTY when no project is discoverable', async () => { + const { mkdtemp, realpath, rm } = await import('node:fs/promises'); + const { tmpdir } = await import('node:os'); + const { join } = await import('node:path'); + const originalCwd = process.cwd(); + const tempDir = await mkdtemp(join(tmpdir(), 'klo-bare-setup-')); + const setup = vi.fn(async () => 0); + const testIo = makeIo({ stdoutIsTty: true }); + const previousProjectDir = process.env.KLO_PROJECT_DIR; + const expectedProjectDir = await realpath(tempDir); + + try { + delete process.env.KLO_PROJECT_DIR; + process.chdir(tempDir); + + await expect(runKloCli([], testIo.io, { setup })).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + { + command: 'run', + projectDir: expectedProjectDir, + mode: 'auto', + agents: false, + agentScope: 'project', + agentInstallMode: 'cli', + skipAgents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: false, + skipSources: false, + }, + testIo.io, + ); + expect(testIo.stdout()).not.toContain('Usage: klo [options] [command]'); + expect(testIo.stderr()).toBe(''); + } finally { + process.chdir(originalCwd); + if (previousProjectDir === undefined) { + delete process.env.KLO_PROJECT_DIR; + } else { + process.env.KLO_PROJECT_DIR = previousProjectDir; + } + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('prints help without project status for bare klo in a TTY when a project is discoverable', async () => { + const { mkdtemp, realpath, rm, writeFile } = await import('node:fs/promises'); + const { tmpdir } = await import('node:os'); + const { join } = await import('node:path'); + const originalCwd = process.cwd(); + const previousProjectDir = process.env.KLO_PROJECT_DIR; + const tempDir = await mkdtemp(join(tmpdir(), 'klo-bare-existing-')); + const setup = vi.fn(async () => 0); + const testIo = makeIo({ stdoutIsTty: true }); + const expectedProjectDir = await realpath(tempDir); + + try { + delete process.env.KLO_PROJECT_DIR; + await writeFile(join(tempDir, 'klo.yaml'), 'project: revenue\nconnections: {}\n', 'utf-8'); + process.chdir(tempDir); + + await expect(runKloCli([], testIo.io, { setup })).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo [options] [command]'); + expect(testIo.stdout()).not.toContain(`Project: ${expectedProjectDir}`); + expect(setup).not.toHaveBeenCalled(); + } finally { + process.chdir(originalCwd); + if (previousProjectDir === undefined) { + delete process.env.KLO_PROJECT_DIR; + } else { + process.env.KLO_PROJECT_DIR = previousProjectDir; + } + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('does not invoke status for bare klo in a TTY when status would fail', async () => { + const setup = vi.fn(async () => { + throw new Error('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits'); + }); + const testIo = makeIo({ stdoutIsTty: true }); + const previousProjectDir = process.env.KLO_PROJECT_DIR; + + try { + process.env.KLO_PROJECT_DIR = tempDir; + + await expect(runKloCli([], testIo.io, { setup })).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo [options] [command]'); + expect(setup).not.toHaveBeenCalled(); + expect(testIo.stderr()).toBe(''); + } finally { + if (previousProjectDir === undefined) { + delete process.env.KLO_PROJECT_DIR; + } else { + process.env.KLO_PROJECT_DIR = previousProjectDir; + } + } + }); + + it('rejects removed verbose global option through Commander', async () => { + const testIo = makeIo(); + const removedVerboseOption = '--' + 'verbose'; + + await expect(runKloCli([removedVerboseOption, 'connection', 'list'], testIo.io)).resolves.toBe(1); + + expect(testIo.stderr()).toContain(`unknown option '${removedVerboseOption}'`); + expect(testIo.stdout()).toBe(''); + }); + + it('prints a zsh completion function', async () => { + const testIo = makeIo(); + const zshWords = '$' + '{words[@]}'; + + await expect(runKloCli(['dev', 'completion', 'zsh'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain('#compdef klo'); + expect(testIo.stdout()).toContain('KLO_COMPLETION_COMMAND:-klo'); + expect(testIo.stdout()).toContain(`dev __complete --shell zsh --position "$CURRENT" -- "${zshWords}"`); + expect(testIo.stdout()).toContain('compdef _klo klo'); + expect(testIo.stderr()).toBe(''); + }); + + it('installs zsh completions into the user zsh config directory', async () => { + const testIo = makeIo(); + const previousHome = process.env.HOME; + const previousZdotdir = process.env.ZDOTDIR; + const tempHome = await mkdtemp(join(tmpdir(), 'klo-completion-home-')); + + try { + process.env.HOME = tempHome; + delete process.env.ZDOTDIR; + + await expect(runKloCli(['dev', 'completion', 'zsh', '--install'], testIo.io)).resolves.toBe(0); + + const completionFile = await readFile(join(tempHome, '.zfunc', '_klo'), 'utf-8'); + const zshrc = await readFile(join(tempHome, '.zshrc'), 'utf-8'); + expect(completionFile).toContain('#compdef klo'); + expect(zshrc).toContain('# >>> klo completion >>>'); + expect(zshrc).toContain('_klo_completion_command()'); + expect(zshrc).toContain('"name": "klo-workspace"'); + expect(zshrc).toContain('scripts/run-klo.mjs'); + expect(zshrc).toContain("export KLO_COMPLETION_COMMAND='$(_klo_completion_command)'"); + expect(zshrc).toContain('setopt complete_aliases'); + expect(zshrc).toContain('fpath=("$HOME/.zfunc" $fpath)'); + expect(zshrc).toContain('autoload -Uz compinit'); + expect(zshrc).toContain('compinit'); + expect(testIo.stdout()).toContain('Installed zsh completion:'); + expect(testIo.stdout()).toContain('Restart your shell or run: source ~/.zshrc'); + expect(testIo.stderr()).toBe(''); + } finally { + if (previousHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = previousHome; + } + if (previousZdotdir === undefined) { + delete process.env.ZDOTDIR; + } else { + process.env.ZDOTDIR = previousZdotdir; + } + await rm(tempHome, { recursive: true, force: true }); + } + }); + + it('updates zsh completion install block idempotently before existing compinit', async () => { + const firstIo = makeIo(); + const secondIo = makeIo(); + const previousHome = process.env.HOME; + const previousZdotdir = process.env.ZDOTDIR; + const tempHome = await mkdtemp(join(tmpdir(), 'klo-completion-home-')); + + try { + process.env.HOME = tempHome; + delete process.env.ZDOTDIR; + await writeFile(join(tempHome, '.zshrc'), 'export EDITOR=vim\nautoload -Uz compinit\ncompinit\n', 'utf-8'); + + await expect(runKloCli(['dev', 'completion', 'zsh', '--install'], firstIo.io)).resolves.toBe(0); + await expect(runKloCli(['dev', 'completion', 'zsh', '--install'], secondIo.io)).resolves.toBe(0); + + const zshrc = await readFile(join(tempHome, '.zshrc'), 'utf-8'); + expect(zshrc.match(/# >>> klo completion >>>/g)).toHaveLength(1); + expect(zshrc.indexOf('fpath=("$HOME/.zfunc" $fpath)')).toBeLessThan(zshrc.indexOf('autoload -Uz compinit')); + expect(zshrc.match(/_klo_completion_command\(\)/g)).toHaveLength(1); + expect(zshrc.match(/^compinit$/gm)).toHaveLength(1); + expect(secondIo.stdout()).toContain('Updated zsh config:'); + expect(firstIo.stderr()).toBe(''); + expect(secondIo.stderr()).toBe(''); + } finally { + if (previousHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = previousHome; + } + if (previousZdotdir === undefined) { + delete process.env.ZDOTDIR; + } else { + process.env.ZDOTDIR = previousZdotdir; + } + await rm(tempHome, { recursive: true, force: true }); + } + }); + + it('completes root and nested Commander command names', async () => { + const rootIo = makeIo(); + const connectionIo = makeIo(); + + await expect( + runKloCli(['dev', '__complete', '--shell', 'zsh', '--position', '2', '--', 'klo', 'co'], rootIo.io), + ).resolves.toBe(0); + await expect( + runKloCli( + ['dev', '__complete', '--shell', 'zsh', '--position', '3', '--', 'klo', 'connection', 'm'], + connectionIo.io, + ), + ).resolves.toBe(0); + + expect(rootIo.stdout()).toContain('connection:Add, list, test, and map data sources'); + expect(rootIo.stdout()).not.toContain('__complete'); + expect(connectionIo.stdout()).toContain('map:Refresh and validate BI-to-warehouse mappings'); + expect(connectionIo.stdout()).toContain('mapping:Manage Metabase warehouse mappings'); + expect(rootIo.stderr()).toBe(''); + expect(connectionIo.stderr()).toBe(''); + }); + + it('completes options and Commander choices', async () => { + const optionIo = makeIo(); + const choiceIo = makeIo(); + + await expect( + runKloCli( + ['dev', '__complete', '--shell', 'zsh', '--position', '4', '--', 'klo', 'connection', 'add', '--cr'], + optionIo.io, + ), + ).resolves.toBe(0); + await expect( + runKloCli( + [ + 'dev', + '__complete', + '--shell', + 'zsh', + '--position', + '7', + '--', + 'klo', + 'connection', + 'add', + 'notion', + 'docs', + '--crawl-mode', + '', + ], + choiceIo.io, + ), + ).resolves.toBe(0); + + expect(optionIo.stdout()).toContain('--crawl-mode:Notion crawl mode'); + expect(choiceIo.stdout()).toContain('all_accessible'); + expect(choiceIo.stdout()).toContain('selected_roots'); + expect(optionIo.stderr()).toBe(''); + expect(choiceIo.stderr()).toBe(''); + }); + + it('dispatches serve stdio commands', async () => { + const testIo = makeIo(); + const serveStdio = vi.fn().mockResolvedValue(0); + + await expect( + runKloCli(['--project-dir', tempDir, 'serve', '--mcp', 'stdio', '--user-id', 'agent'], testIo.io, { + serveStdio, + }), + ).resolves.toBe(0); + + expect(serveStdio).toHaveBeenCalledWith({ + mcp: 'stdio', + projectDir: tempDir, + userId: 'agent', + semanticCompute: false, + semanticComputeUrl: undefined, + executeQueries: false, + memoryCapture: false, + memoryModel: undefined, + }); + }); + + it('routes public ingest through the public ingest parser', async () => { + const testIo = makeIo(); + const ingest = vi.fn().mockResolvedValue(0); + + await expect( + runKloCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse'], testIo.io, { publicIngest: ingest }), + ).resolves.toBe(0); + + expect(ingest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'auto', + }, + testIo.io, + ); + }); + + it('prints public ingest watch help from Commander', async () => { + const testIo = makeIo(); + const publicIngest = vi.fn(async () => 0); + const lowLevelIngest = vi.fn(async () => 0); + + await expect( + runKloCli(['ingest', 'watch', '--help'], testIo.io, { publicIngest, ingest: lowLevelIngest }), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo ingest watch [options] [runId]'); + expect(testIo.stdout()).toContain('[runId]'); + expect(testIo.stdout()).toContain('--project-dir '); + expect(testIo.stdout()).toContain('--json'); + expect(testIo.stdout()).toContain('--no-input'); + expect(testIo.stderr()).toBe(''); + expect(publicIngest).not.toHaveBeenCalled(); + expect(lowLevelIngest).not.toHaveBeenCalled(); + }); + + it('dispatches public ingest status and watch through Commander', async () => { + const statusIo = makeIo(); + const watchIo = makeIo(); + const publicIngest = vi.fn(async () => 0); + + await expect( + runKloCli(['--project-dir', tempDir, 'ingest', 'status', 'run-1', '--json', '--no-input'], statusIo.io, { + publicIngest, + }), + ).resolves.toBe(0); + await expect( + runKloCli(['--project-dir', tempDir, 'ingest', 'watch', '--no-input'], watchIo.io, { + publicIngest, + }), + ).resolves.toBe(0); + + expect(publicIngest).toHaveBeenNthCalledWith( + 1, + { + command: 'status', + projectDir: tempDir, + runId: 'run-1', + json: true, + inputMode: 'disabled', + }, + statusIo.io, + ); + expect(publicIngest).toHaveBeenNthCalledWith( + 2, + { + command: 'watch', + projectDir: tempDir, + json: false, + inputMode: 'disabled', + }, + watchIo.io, + ); + expect(statusIo.stderr()).toBe(''); + expect(watchIo.stderr()).toBe(''); + }); + + it('rejects standalone demo commands', async () => { + const testIo = makeIo(); + const demo = vi.fn().mockResolvedValue(0); + + await expect(runKloCli(['demo', '--mode', 'replay', '--no-input'], testIo.io, { demo })).resolves.toBe(1); + + expect(testIo.stderr()).toMatch(/unknown command|error:/i); + expect(demo).not.toHaveBeenCalled(); + }); + + it('dispatches setup demo commands', async () => { + const testIo = makeIo(); + const demo = vi.fn().mockResolvedValue(0); + + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'demo', '--mode', 'replay', '--no-input'], testIo.io, { demo }), + ).resolves.toBe(0); + + expect(demo).toHaveBeenCalledWith( + { + command: 'replay', + projectDir: tempDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + testIo.io, + ); + + demo.mockClear(); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'demo', '--mode', 'seeded', '--no-input'], testIo.io, { + demo, + }), + ).resolves.toBe(0); + expect(demo).toHaveBeenCalledWith( + { + command: 'seeded', + projectDir: tempDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + testIo.io, + ); + + demo.mockClear(); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', '--no-input', 'demo', '--mode', 'seeded'], testIo.io, { + demo, + }), + ).resolves.toBe(0); + expect(demo).toHaveBeenCalledWith( + { + command: 'seeded', + projectDir: tempDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + testIo.io, + ); + + demo.mockClear(); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'demo', 'inspect', '--no-input'], testIo.io, { demo }), + ).resolves.toBe(0); + expect(demo).toHaveBeenCalledWith( + { + command: 'inspect', + projectDir: tempDir, + outputMode: 'plain', + inputMode: 'disabled', + }, + testIo.io, + ); + }); + + it('dispatches demo ingest argv', async () => { + const testIo = makeIo(); + const demo = vi.fn().mockResolvedValue(0); + + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'demo', 'ingest', '--mode', 'full', '--no-input'], testIo.io, { + demo, + }), + ).resolves.toBe(0); + + expect(demo).toHaveBeenCalledWith( + { + command: 'ingest', + mode: 'full', + projectDir: tempDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + testIo.io, + ); + + demo.mockClear(); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', '--no-input', 'demo', 'ingest', '--mode', 'seeded'], testIo.io, { + demo, + }), + ).resolves.toBe(0); + + expect(demo).toHaveBeenCalledWith( + { + command: 'ingest', + mode: 'seeded', + projectDir: tempDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + testIo.io, + ); + + demo.mockClear(); + await expect( + runKloCli( + ['--project-dir', tempDir, 'setup', 'demo', 'ingest', '--mode', 'full', '--no-input', '--plain'], + testIo.io, + { + demo, + }, + ), + ).resolves.toBe(0); + + expect(demo).toHaveBeenCalledWith( + { + command: 'ingest', + mode: 'full', + projectDir: tempDir, + outputMode: 'plain', + inputMode: 'disabled', + }, + testIo.io, + ); + }); + + it('prints public ingest help without invoking ingest execution', async () => { + const testIo = makeIo(); + const publicIngest = vi.fn(); + const lowLevelIngest = vi.fn(); + + await expect(runKloCli(['ingest', '--help'], testIo.io, { publicIngest, ingest: lowLevelIngest })).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo ingest [options] [connectionId]'); + expect(testIo.stdout()).toContain('Build and refresh KLO context from configured sources'); + expect(testIo.stdout()).toContain('status'); + expect(testIo.stdout()).toContain('watch'); + expect(testIo.stdout()).toContain('klo ingest --all [options]'); + expect(testIo.stdout()).toContain('klo ingest status [runId] [options]'); + expect(testIo.stdout()).toContain('klo ingest watch [runId] [options]'); + expect(testIo.stdout()).not.toContain('klo ingest replay [options]'); + expect(testIo.stdout()).toContain('--no-input'); + expect(testIo.stdout()).not.toContain('--adapter'); + expect(testIo.stderr()).toBe(''); + expect(publicIngest).not.toHaveBeenCalled(); + expect(lowLevelIngest).not.toHaveBeenCalled(); + }); + + it('reserves public ingest run while keeping dev ingest run available', async () => { + const publicRunIo = makeIo(); + const publicHelpIo = makeIo(); + const devRunIo = makeIo(); + const publicIngest = vi.fn(async () => 0); + const lowLevelIngest = vi.fn(async () => 0); + + await expect(runKloCli(['ingest', 'run'], publicRunIo.io, { publicIngest, ingest: lowLevelIngest })).resolves.toBe( + 1, + ); + expect(publicRunIo.stderr()).toMatch(/invalid argument|reserved|run/i); + expect(publicIngest).not.toHaveBeenCalled(); + + await expect( + runKloCli(['ingest', 'run', '--help'], publicHelpIo.io, { publicIngest, ingest: lowLevelIngest }), + ).resolves.toBe(0); + expect(publicHelpIo.stdout()).toContain('Usage: klo ingest [options] [connectionId]'); + expect(publicHelpIo.stdout()).not.toContain('Usage: klo ingest ' + 'run'); + + await expect( + runKloCli(['dev', 'ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase'], devRunIo.io, { + publicIngest, + ingest: lowLevelIngest, + }), + ).resolves.toBe(0); + expect(lowLevelIngest).toHaveBeenCalledWith( + expect.objectContaining({ command: 'run', connectionId: 'warehouse', adapter: 'metabase' }), + expect.anything(), + ); + }); + + it('dispatches dev doctor and ingest parser cases through Commander', async () => { + const doctor = vi.fn(async () => 0); + const ingest = vi.fn(async () => 0); + const doctorIo = makeIo(); + const ingestRunIo = makeIo(); + const ingestReplayHelpIo = makeIo(); + + await expect(runKloCli(['dev', 'doctor', 'setup', '--json', '--no-input'], doctorIo.io, { doctor })).resolves.toBe( + 0, + ); + await expect( + runKloCli( + [ + 'dev', + 'ingest', + 'run', + '--project-dir', + tempDir, + '--connection-id', + 'warehouse', + '--adapter', + 'fake', + '--source-dir', + tempDir, + '--debug-llm-request-file', + `${tempDir}/debug.jsonl`, + '--json', + '--no-input', + ], + ingestRunIo.io, + { ingest }, + ), + ).resolves.toBe(0); + await expect(runKloCli(['dev', 'ingest', 'replay', '--help'], ingestReplayHelpIo.io, { ingest })).resolves.toBe(0); + + expect(doctor).toHaveBeenCalledWith({ command: 'setup', outputMode: 'json', inputMode: 'disabled' }, doctorIo.io); + expect(ingest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir: tempDir, + databaseIntrospectionUrl: undefined, + debugLlmRequestFile: `${tempDir}/debug.jsonl`, + outputMode: 'json', + inputMode: 'disabled', + }, + ingestRunIo.io, + ); + expect(ingestReplayHelpIo.stdout()).toContain('Usage: klo dev ingest replay [options] '); + expect(ingestReplayHelpIo.stdout()).toContain(''); + expect(doctorIo.stderr()).toBe(''); + expect(ingestRunIo.stderr()).toBe(''); + expect(ingestReplayHelpIo.stderr()).toBe(''); + }); + + it('dispatches public connection through the existing connection implementation', async () => { + const testIo = makeIo(); + const connection = vi.fn(async () => 0); + + await expect(runKloCli(['--project-dir', tempDir, 'connection', 'list'], testIo.io, { connection })).resolves.toBe( + 0, + ); + + expect(connection).toHaveBeenCalledWith({ command: 'list', projectDir: tempDir }, testIo.io); + expect(testIo.stderr()).toBe(''); + }); + + it('dispatches setup status and top-level status through the setup runner', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + const statusIo = makeIo(); + + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'status', '--json'], setupIo.io, { setup }), + ).resolves.toBe(0); + await expect(runKloCli(['--project-dir', tempDir, 'status', '--json'], statusIo.io, { setup })).resolves.toBe(0); + + expect(setup).toHaveBeenNthCalledWith(1, { command: 'status', projectDir: tempDir, json: true }, setupIo.io); + expect(setup).toHaveBeenNthCalledWith(2, { command: 'status', projectDir: tempDir, json: true }, statusIo.io); + }); + + it('dispatches setup context recovery commands through the setup runner', async () => { + const setup = vi.fn(async () => 0); + const buildIo = makeIo(); + const watchIo = makeIo(); + const statusIo = makeIo(); + const stopIo = makeIo(); + + await expect(runKloCli(['--project-dir', tempDir, 'setup', 'context', 'build'], buildIo.io, { setup })).resolves.toBe( + 0, + ); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'context', 'watch', 'setup-context-local-1'], watchIo.io, { + setup, + }), + ).resolves.toBe(0); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'context', 'status', 'setup-context-local-1', '--json'], statusIo.io, { + setup, + }), + ).resolves.toBe(0); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'context', 'stop', 'setup-context-local-1'], stopIo.io, { + setup, + }), + ).resolves.toBe(0); + + expect(setup).toHaveBeenNthCalledWith( + 1, + { command: 'context-build', projectDir: tempDir, inputMode: 'auto' }, + buildIo.io, + ); + expect(setup).toHaveBeenNthCalledWith( + 2, + { command: 'context-watch', projectDir: tempDir, runId: 'setup-context-local-1', inputMode: 'auto' }, + watchIo.io, + ); + expect(setup).toHaveBeenNthCalledWith( + 3, + { command: 'context-status', projectDir: tempDir, runId: 'setup-context-local-1', json: true }, + statusIo.io, + ); + expect(setup).toHaveBeenNthCalledWith( + 4, + { command: 'context-stop', projectDir: tempDir, runId: 'setup-context-local-1' }, + stopIo.io, + ); + }); + + it('dispatches Anthropic setup flags to the setup runner', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'setup', + '--no-input', + '--anthropic-api-key-env', + 'ANTHROPIC_API_KEY', + '--anthropic-model', + 'claude-sonnet-4-6', + ], + setupIo.io, + { setup }, + ), + ).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + projectDir: tempDir, + inputMode: 'disabled', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }), + setupIo.io, + ); + }); + + it('rejects conflicting Anthropic credential setup flags', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'setup', + '--anthropic-api-key-env', + 'ANTHROPIC_API_KEY', + '--anthropic-api-key-file', + '/tmp/anthropic-key', + ], + setupIo.io, + { setup }, + ), + ).resolves.toBe(1); + + expect(setup).not.toHaveBeenCalled(); + expect(setupIo.stderr()).toContain('Choose only one Anthropic credential source'); + }); + + it('dispatches embedding setup flags to the setup runner', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'setup', + '--no-input', + '--skip-llm', + '--embedding-backend', + 'openai', + '--embedding-api-key-env', + 'OPENAI_API_KEY', + ], + setupIo.io, + { setup }, + ), + ).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + projectDir: tempDir, + inputMode: 'disabled', + skipLlm: true, + embeddingBackend: 'openai', + embeddingApiKeyEnv: 'OPENAI_API_KEY', + skipEmbeddings: false, + }), + setupIo.io, + ); + }); + + it('dispatches database setup flags to the setup runner', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli( + [ + 'setup', + '--project-dir', + '/tmp/project', + '--no-input', + '--yes', + '--skip-llm', + '--skip-embeddings', + '--database', + 'postgres', + '--new-database-connection-id', + 'warehouse', + '--database-url', + 'env:DATABASE_URL', + '--database-schema', + 'public', + '--enable-historic-sql', + '--historic-sql-window-days', + '30', + '--historic-sql-min-calls', + '12', + ], + setupIo.io, + { setup }, + ), + ).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + projectDir: '/tmp/project', + inputMode: 'disabled', + yes: true, + skipLlm: true, + skipEmbeddings: true, + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: ['public'], + enableHistoricSql: true, + historicSqlWindowDays: 30, + historicSqlMinCalls: 12, + skipDatabases: false, + }), + setupIo.io, + ); + }); + + it('dispatches setup source flags', async () => { + const setup = vi.fn(async () => 0); + const testIo = makeIo(); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'setup', + '--no-input', + '--source', + 'metabase', + '--source-connection-id', + 'prod_metabase', + '--source-url', + 'https://metabase.example.com', + '--source-api-key-ref', + 'env:METABASE_API_KEY', + '--source-warehouse-connection-id', + 'warehouse', + '--metabase-database-id', + '1', + '--skip-llm', + '--skip-embeddings', + '--skip-databases', + ], + testIo.io, + { setup }, + ), + ).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + projectDir: tempDir, + source: 'metabase', + sourceConnectionId: 'prod_metabase', + sourceUrl: 'https://metabase.example.com', + sourceApiKeyRef: 'env:METABASE_API_KEY', + sourceWarehouseConnectionId: 'warehouse', + metabaseDatabaseId: 1, + }), + testIo.io, + ); + }); + + it('dispatches setup agent flags and removal', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + const removeIo = makeIo(); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'setup', + '--agents', + '--target', + 'codex', + '--project', + '--agent-install-mode', + 'both', + '--no-input', + '--yes', + ], + setupIo.io, + { setup }, + ), + ).resolves.toBe(0); + await expect( + runKloCli(['--project-dir', tempDir, 'setup', 'remove', '--agents'], removeIo.io, { setup }), + ).resolves.toBe(0); + + expect(setup).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + command: 'run', + agents: true, + target: 'codex', + agentScope: 'project', + agentInstallMode: 'both', + inputMode: 'disabled', + yes: true, + }), + setupIo.io, + ); + expect(setup).toHaveBeenNthCalledWith(2, { command: 'remove-agents', projectDir: tempDir }, removeIo.io); + }); + + it('rejects source-path with source-git-url', async () => { + const setup = vi.fn(async () => 0); + const testIo = makeIo(); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'setup', + '--no-input', + '--source', + 'dbt', + '--source-path', + '/repo/dbt', + '--source-git-url', + 'https://github.com/acme/dbt.git', + ], + testIo.io, + { setup }, + ), + ).resolves.toBe(1); + + expect(setup).not.toHaveBeenCalled(); + expect(testIo.stderr()).toContain('Choose only one source location'); + }); + + it('rejects deterministic as a setup embedding backend', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli(['--project-dir', tempDir, 'setup', '--embedding-backend', 'deterministic'], setupIo.io, { setup }), + ).resolves.toBe(1); + + expect(setup).not.toHaveBeenCalled(); + expect(setupIo.stderr()).toContain("invalid choice 'deterministic'"); + }); + + it('rejects gateway as a setup embedding backend', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli(['--project-dir', tempDir, 'setup', '--embedding-backend', 'gateway'], setupIo.io, { setup }), + ).resolves.toBe(1); + + expect(setup).not.toHaveBeenCalled(); + expect(setupIo.stderr()).toContain("invalid choice 'gateway'"); + }); + + it('rejects conflicting embedding credential setup flags', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'setup', + '--embedding-backend', + 'openai', + '--embedding-api-key-env', + 'OPENAI_API_KEY', + '--embedding-api-key-file', + '/tmp/openai-key', + ], + setupIo.io, + { setup }, + ), + ).resolves.toBe(1); + + expect(setup).not.toHaveBeenCalled(); + expect(setupIo.stderr()).toContain('Choose only one embedding credential source'); + }); + + it('rejects conflicting Historic SQL setup flags', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKloCli(['--project-dir', tempDir, 'setup', '--enable-historic-sql', '--disable-historic-sql'], setupIo.io, { + setup, + }), + ).resolves.toBe(1); + + expect(setup).not.toHaveBeenCalled(); + expect(setupIo.stderr()).toContain('Choose only one Historic SQL action'); + }); + + it('registers hidden agent help and tools discovery without showing agent in root help', async () => { + const helpIo = makeIo(); + const toolsIo = makeIo(); + const agent = vi.fn(async () => 0); + + await expect(runKloCli(['agent', '--help'], helpIo.io, { agent })).resolves.toBe(0); + await expect( + runKloCli(['--project-dir', tempDir, 'agent', 'tools', '--json'], toolsIo.io, { agent }), + ).resolves.toBe(0); + + expect(helpIo.stdout()).toContain('Usage: klo agent'); + expect(toolsIo.stderr()).toBe(''); + expect(agent).toHaveBeenCalledWith({ command: 'tools', projectDir: tempDir, json: true }, toolsIo.io); + }); + + it('dispatches full hidden agent commands without exposing agent in root help', async () => { + const agent = vi.fn(async () => 0); + const cases = [ + { + argv: ['--project-dir', tempDir, 'agent', 'context', '--json'], + args: { command: 'context', projectDir: tempDir, json: true }, + }, + { + argv: [ + '--project-dir', + tempDir, + 'agent', + 'sl', + 'list', + '--json', + '--connection-id', + 'warehouse', + '--query', + 'orders', + ], + args: { command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'warehouse', query: 'orders' }, + }, + { + argv: ['--project-dir', tempDir, 'agent', 'sl', 'read', 'orders', '--json', '--connection-id', 'warehouse'], + args: { command: 'sl-read', projectDir: tempDir, json: true, sourceName: 'orders', connectionId: 'warehouse' }, + }, + { + argv: [ + '--project-dir', + tempDir, + 'agent', + 'sl', + 'query', + '--json', + '--connection-id', + 'warehouse', + '--query-file', + '/tmp/query.json', + '--execute', + '--max-rows', + '100', + ], + args: { + command: 'sl-query', + projectDir: tempDir, + json: true, + connectionId: 'warehouse', + queryFile: '/tmp/query.json', + execute: true, + maxRows: 100, + }, + }, + { + argv: ['--project-dir', tempDir, 'agent', 'wiki', 'search', 'revenue', '--json', '--limit', '5'], + args: { command: 'wiki-search', projectDir: tempDir, json: true, query: 'revenue', limit: 5 }, + }, + { + argv: ['--project-dir', tempDir, 'agent', 'wiki', 'read', 'page-1', '--json'], + args: { command: 'wiki-read', projectDir: tempDir, json: true, pageId: 'page-1' }, + }, + { + argv: [ + '--project-dir', + tempDir, + 'agent', + 'sql', + 'execute', + '--json', + '--connection-id', + 'warehouse', + '--sql-file', + '/tmp/query.sql', + '--max-rows', + '100', + ], + args: { + command: 'sql-execute', + projectDir: tempDir, + json: true, + connectionId: 'warehouse', + sqlFile: '/tmp/query.sql', + maxRows: 100, + }, + }, + ]; + + for (const entry of cases) { + const io = makeIo(); + await expect(runKloCli(entry.argv, io.io, { agent })).resolves.toBe(0); + expect(agent).toHaveBeenLastCalledWith(entry.args, io.io); + expect(io.stderr()).toBe(''); + } + + const helpIo = makeIo(); + await expect(runKloCli(['--help'], helpIo.io, { agent })).resolves.toBe(0); + expect(helpIo.stdout()).not.toContain('agent '); + }); + + it('prints semantic-layer hybrid search metadata from the hidden agent sl list command', async () => { + const agent = vi.fn(async (args, io) => { + expect(args).toEqual({ + command: 'sl-list', + projectDir: tempDir, + json: true, + connectionId: 'warehouse', + query: 'paid', + }); + io.stdout.write( + `${JSON.stringify( + { + sources: [ + { + connectionId: 'warehouse', + connectionName: 'warehouse', + name: 'orders', + columnCount: 2, + measureCount: 1, + joinCount: 0, + score: 0.03278688524590164, + matchReasons: ['dictionary'], + dictionaryMatches: [{ column: 'status', values: ['paid'] }], + }, + ], + totalSources: 1, + }, + null, + 2, + )}\n`, + ); + return 0; + }); + const io = makeIo(); + + await expect( + runKloCli( + ['--project-dir', tempDir, 'agent', 'sl', 'list', '--json', '--connection-id', 'warehouse', '--query', 'paid'], + io.io, + { agent }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toEqual({ + sources: [ + expect.objectContaining({ + connectionId: 'warehouse', + name: 'orders', + matchReasons: ['dictionary'], + dictionaryMatches: [{ column: 'status', values: ['paid'] }], + }), + ], + totalSources: 1, + }); + }); + + it('prints wiki hybrid search metadata from the hidden agent wiki search command', async () => { + const agent = vi.fn(async (args, io) => { + expect(args).toEqual({ + command: 'wiki-search', + projectDir: tempDir, + json: true, + query: 'paid order', + limit: 5, + }); + io.stdout.write( + `${JSON.stringify( + { + results: [ + { + key: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + scope: 'GLOBAL', + summary: 'Revenue metric definition', + score: 0.02459016393442623, + matchReasons: ['lexical', 'token'], + }, + ], + totalFound: 1, + }, + null, + 2, + )}\n`, + ); + return 0; + }); + const io = makeIo(); + + await expect( + runKloCli(['--project-dir', tempDir, 'agent', 'wiki', 'search', 'paid order', '--json', '--limit', '5'], io.io, { + agent, + }), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toEqual({ + results: [ + expect.objectContaining({ + key: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + matchReasons: ['lexical', 'token'], + }), + ], + totalFound: 1, + }); + }); + + it('dispatches public connection subcommands through the existing connection implementation', async () => { + const tempDir = await mkdtemp(join(tmpdir(), 'klo-connection-dispatch-')); + const connection = vi.fn(async () => 0); + + await expect( + runKloCli(['--project-dir', tempDir, 'connection', 'list'], makeIo().io, { connection }), + ).resolves.toBe(0); + + const removeIo = makeIo(); + await expect( + runKloCli(['--project-dir', tempDir, 'connection', 'remove', 'warehouse', '--force', '--no-input'], removeIo.io, { + connection, + }), + ).resolves.toBe(0); + + const mapIo = makeIo(); + await expect( + runKloCli(['--project-dir', tempDir, 'connection', 'map', 'prod-metabase', '--json'], mapIo.io, { + connection, + }), + ).resolves.toBe(0); + + expect(connection).toHaveBeenNthCalledWith(1, { command: 'list', projectDir: tempDir }, expect.anything()); + expect(connection).toHaveBeenNthCalledWith( + 2, + { + command: 'remove', + projectDir: tempDir, + connectionId: 'warehouse', + force: true, + inputMode: 'disabled', + }, + expect.anything(), + ); + expect(connection).toHaveBeenNthCalledWith( + 3, + { + command: 'map', + projectDir: tempDir, + sourceConnectionId: 'prod-metabase', + json: true, + }, + expect.anything(), + ); + + await rm(tempDir, { recursive: true, force: true }); + }); + + it('prints help for connection metabase setup', async () => { + const helpIo = makeIo(); + + await expect(runKloCli(['connection', 'metabase', 'setup', '--help'], helpIo.io)).resolves.toBe(0); + + expect(helpIo.stdout()).toContain('Usage: klo connection metabase setup'); + for (const option of [ + '--id ', + '--url ', + '--api-key ', + '--username ', + '--password ', + '--mint-api-key', + '--map ', + '--sync ', + '--sync-mode ', + '--run-ingest', + '--yes', + '--no-input', + ]) { + expect(helpIo.stdout()).toContain(option); + } + expect(helpIo.stdout()).toContain('Guided equivalent of:'); + for (const line of [ + 'klo connection mapping refresh --auto-accept', + 'klo connection mapping set databaseMappings =', + 'klo connection mapping set-sync-enabled --enabled true', + 'klo ingest ', + ]) { + expect(helpIo.stdout()).toContain(line); + } + expect(helpIo.stderr()).toBe(''); + }); + + it('dispatches connection metabase setup through Commander', async () => { + const connectionMetabaseSetup = vi.fn(async () => 0); + const fakeMetabaseCredential = 'mb_example'; + const setupIo = makeIo(); + + await expect( + runKloCli( + [ + 'connection', + 'metabase', + 'setup', + '--project-dir', + tempDir, + '--id', + 'metabase', + '--url', + 'http://metabase.example.test:3000', + '--api-key', + 'mb_example', + '--map', + '2=orbit', + '--sync', + '2', + '--yes', + '--no-input', + ], + setupIo.io, + { connectionMetabaseSetup }, + ), + ).resolves.toBe(0); + + expect(connectionMetabaseSetup).toHaveBeenCalledWith( + { + command: 'setup', + projectDir: tempDir, + connectionId: 'metabase', + url: 'http://metabase.example.test:3000', + apiKey: fakeMetabaseCredential, + mintApiKey: false, + mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], + syncEnabledDatabaseIds: [2], + syncMode: 'ALL', + runIngest: false, + yes: true, + inputMode: 'disabled', + }, + expect.anything(), + ); + expect(setupIo.stderr()).toBe(''); + }); + + it('validates connection metabase setup option values before runner dispatch', async () => { + const connectionMetabaseSetup = vi.fn(async () => 0); + + for (const argv of [ + [ + 'connection', + 'metabase', + 'setup', + '--project-dir', + tempDir, + '--url', + 'http://metabase.example.test:3000', + '--api-key', + 'mb_example', + '--map', + 'nope=orbit', + ], + [ + 'connection', + 'metabase', + 'setup', + '--project-dir', + tempDir, + '--url', + 'http://metabase.example.test:3000', + '--api-key', + 'mb_example', + '--map', + '2=../orbit', + ], + [ + 'connection', + 'metabase', + 'setup', + '--project-dir', + tempDir, + '--url', + 'http://metabase.example.test:3000', + '--api-key', + 'mb_example', + '--sync', + 'nope', + ], + [ + 'connection', + 'metabase', + 'setup', + '--project-dir', + tempDir, + '--url', + 'http://metabase.example.test:3000', + '--api-key', + 'mb_example', + '--sync-mode', + 'BAD', + ], + [ + 'connection', + 'metabase', + 'setup', + '--project-dir', + tempDir, + '--url', + 'http://metabase.example.test:3000', + '--api-key', + 'mb_example', + '--mint-api-key', + '--api-key', + 'also_bad', + ], + ]) { + const testIo = makeIo(); + await expect(runKloCli(argv, testIo.io, { connectionMetabaseSetup })).resolves.toBe(1); + expect(testIo.stderr()).toMatch(/map|sync|sync-mode|conflict|cannot be used|invalid|integer|choices/i); + } + + expect(connectionMetabaseSetup).not.toHaveBeenCalled(); + }); + + it('rejects commands removed from the May 6 root surface', async () => { + for (const argv of [ + ['init'], + ['connect', 'list'], + ['scan', 'warehouse'], + ['knowledge', 'list'], + ['ask', 'What sources are connected?'], + ]) { + const testIo = makeIo(); + + await expect(runKloCli(argv, testIo.io)).resolves.toBe(1); + + expect(testIo.stderr()).toMatch(/unknown command|error:/); + } + }); + + it('dispatches connection add options through Commander', async () => { + const testIo = makeIo(); + const connection = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'connection', + 'add', + 'notion', + 'notion-main', + '--project-dir', + tempDir, + '--token-env', + 'NOTION_AUTH_TOKEN', + '--crawl-mode', + 'selected_roots', + '--root-page-id', + 'page-1', + '--root-database-id', + 'database-1', + '--max-pages', + '80', + ], + testIo.io, + { connection }, + ), + ).resolves.toBe(0); + + expect(connection).toHaveBeenCalledWith( + { + command: 'add', + projectDir: tempDir, + driver: 'notion', + connectionId: 'notion-main', + url: undefined, + schemas: [], + readonly: false, + force: false, + allowLiteralCredentials: false, + notion: { + authTokenRef: 'env:NOTION_AUTH_TOKEN', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: ['database-1'], + rootDataSourceIds: [], + maxPagesPerRun: 80, + maxKnowledgeCreatesPerRun: undefined, + maxKnowledgeUpdatesPerRun: undefined, + }, + }, + testIo.io, + ); + expect(testIo.stderr()).toBe(''); + }); + + it('prints generated connection notion pick help without invoking execution', async () => { + const helpCases = [ + ['connection', 'notion', '--help'], + ['connection', 'notion', 'pick', '--help'], + ['connection', 'notion', 'pick', 'notion-main', '--help'], + ]; + + for (const argv of helpCases) { + const testIo = makeIo(); + const connectionNotion = vi.fn(async () => 0); + + await expect(runKloCli(argv, testIo.io, { connectionNotion })).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo connection notion'); + expect(testIo.stdout()).toContain('pick'); + expect(testIo.stderr()).toBe(''); + expect(connectionNotion).not.toHaveBeenCalled(); + } + }); + + it('dispatches connection notion pick through Commander', async () => { + const testIo = makeIo(); + const connectionNotion = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + '--project-dir', + tempDir, + 'connection', + 'notion', + 'pick', + 'notion-main', + '--no-input', + '--root-page-id', + '11111111222233334444555555555555', + '--root-page-id', + '11111111-2222-3333-4444-555555555555', + ], + testIo.io, + { connectionNotion }, + ), + ).resolves.toBe(0); + + expect(connectionNotion).toHaveBeenCalledWith( + { + command: 'pick', + projectDir: tempDir, + connectionId: 'notion-main', + mode: 'non-interactive', + rootPageIds: ['11111111-2222-3333-4444-555555555555'], + }, + testIo.io, + ); + expect(testIo.stderr()).toBe(''); + }); + + it('ignores connection notion pick root page flags in interactive mode', async () => { + const testIo = makeIo(); + const connectionNotion = vi.fn(async () => 0); + + await expect( + runKloCli(['connection', 'notion', 'pick', 'notion-main', '--root-page-id', 'not-a-uuid'], testIo.io, { + connectionNotion, + }), + ).resolves.toBe(0); + + expect(connectionNotion).toHaveBeenCalledWith( + { + command: 'pick', + projectDir: expect.any(String), + connectionId: 'notion-main', + mode: 'interactive', + }, + testIo.io, + ); + expect(testIo.stderr()).toBe(''); + }); + + it('rejects connection notion pick no-input mode without root page ids', async () => { + const testIo = makeIo(); + const connectionNotion = vi.fn(async () => 0); + + await expect( + runKloCli(['connection', 'notion', 'pick', 'notion-main', '--no-input'], testIo.io, { connectionNotion }), + ).resolves.toBe(1); + + expect(connectionNotion).not.toHaveBeenCalled(); + expect(testIo.stderr()).toContain('connection notion pick --no-input requires at least one --root-page-id'); + }); + + it('writes basic debug dispatch information when --debug is set', async () => { + const testIo = makeIo(); + const connection = vi.fn().mockResolvedValue(0); + + await expect( + runKloCli(['--project-dir', tempDir, '--debug', 'connection', 'list'], testIo.io, { connection }), + ).resolves.toBe(0); + + expect(testIo.stderr()).toContain(`[debug] projectDir=${tempDir}`); + expect(testIo.stderr()).toContain('[debug] dispatch=connection'); + }); + + it('routes low-level scan through klo dev with top-level project-dir', async () => { + const testIo = makeIo(); + const scan = vi.fn().mockResolvedValue(0); + + await expect(runKloCli(['--project-dir', tempDir, 'dev', 'scan', 'warehouse'], testIo.io, { scan })).resolves.toBe( + 0, + ); + + expect(scan).toHaveBeenCalledWith( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + databaseIntrospectionUrl: undefined, + }, + testIo.io, + ); + }); + + it('dispatches serve public command options through Commander', async () => { + const serveIo = makeIo(); + const serveStdio = vi.fn(async () => 0); + + await expect( + runKloCli( + [ + 'serve', + '--mcp', + 'stdio', + '--project-dir', + tempDir, + '--semantic-compute-url', + 'http://127.0.0.1:18080', + '--execute-queries', + '--memory-capture', + '--memory-model', + 'openai/gpt-5.2', + ], + serveIo.io, + { serveStdio }, + ), + ).resolves.toBe(0); + + expect(serveStdio).toHaveBeenCalledWith({ + mcp: 'stdio', + projectDir: tempDir, + userId: 'local', + semanticCompute: true, + semanticComputeUrl: 'http://127.0.0.1:18080', + databaseIntrospectionUrl: undefined, + executeQueries: true, + memoryCapture: true, + memoryModel: 'openai/gpt-5.2', + }); + expect(serveIo.stderr()).toBe(''); + }); + + it('prints dev help for bare dev commands', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['dev'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo dev [options] [command]'); + expect(testIo.stdout()).toContain('Low-level diagnostics'); + expect(testIo.stdout()).toContain('scan'); + expect(testIo.stdout()).toContain('ingest'); + expect(testIo.stdout()).toContain('mapping'); + expect(testIo.stdout()).not.toContain('model'); + expect(testIo.stdout()).not.toContain('knowledge'); + expect(testIo.stderr()).toBe(''); + }); + + it('prints dev command help without invoking low-level execution', async () => { + for (const [command, expected] of [ + ['scan', ['Usage: klo dev scan', '--dry-run', 'status', 'report']], + ['ingest', ['Usage: klo dev ingest', 'run', 'replay']], + ['mapping', ['Usage: klo dev mapping', 'sync-state', 'validate']], + ] as const) { + const testIo = makeIo(); + const scan = vi.fn().mockResolvedValue(0); + const sl = vi.fn().mockResolvedValue(0); + + await expect(runKloCli(['dev', command, '--help'], testIo.io, { scan, sl })).resolves.toBe(0); + + for (const text of expected) { + expect(testIo.stdout()).toContain(text); + } + expect(testIo.stderr()).toBe(''); + expect(scan).not.toHaveBeenCalled(); + expect(sl).not.toHaveBeenCalled(); + } + }); + + it('prints dev scan subcommand help without invoking scan execution', async () => { + const testIo = makeIo(); + const scan = vi.fn().mockResolvedValue(0); + + await expect(runKloCli(['dev', 'scan', 'report', '--help'], testIo.io, { scan })).resolves.toBe(0); + + expect(testIo.stdout()).toContain('Usage: klo dev scan report [options] '); + expect(testIo.stderr()).toBe(''); + expect(scan).not.toHaveBeenCalled(); + }); + + it('rejects removed reserved dev subcommands', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['dev', 'artifacts'], testIo.io)).resolves.toBe(1); + + expect(testIo.stderr()).toMatch(/unknown command|error:/); + }); + + it('rejects mutually exclusive output modes before invoking runners', async () => { + const ingest = vi.fn(async () => 0); + const demo = vi.fn(async () => 0); + + for (const argv of [ + ['dev', 'ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'fake', '--json', '--plain'], + ['dev', 'ingest', 'status', 'run-1', '--json', '--viz'], + ['setup', 'demo', '--json', '--plain'], + ['setup', 'demo', 'replay', '--json', '--plain'], + ]) { + const testIo = makeIo(); + await expect(runKloCli(argv, testIo.io, { ingest, demo })).resolves.toBe(1); + expect(testIo.stderr()).toMatch(/conflict|cannot be used/i); + } + + expect(ingest).not.toHaveBeenCalled(); + expect(demo).not.toHaveBeenCalled(); + }); + + it('rejects mutually exclusive credential and scan mode options before invoking runners', async () => { + const connection = vi.fn(async () => 0); + const scan = vi.fn(async () => 0); + + const tokenIo = makeIo(); + await expect( + runKloCli( + [ + 'connection', + 'add', + 'notion', + 'notion-main', + '--token-env', + 'NOTION_TOKEN', + '--token-file', + '/tmp/notion-token', + '--root-page-id', + '11111111111111111111111111111111', + ], + tokenIo.io, + { connection }, + ), + ).resolves.toBe(1); + expect(tokenIo.stderr()).toMatch(/conflict|cannot be used/i); + + expect(connection).not.toHaveBeenCalled(); + expect(scan).not.toHaveBeenCalled(); + }); + + it('validates connection mapping set syntax before runner domain validation', async () => { + const badFieldIo = makeIo(); + await expect( + runKloCli(['connection', 'mapping', 'set', 'prod-metabase', 'invalidMappings', '1=warehouse'], badFieldIo.io), + ).resolves.toBe(1); + expect(badFieldIo.stderr()).toContain('databaseMappings or connectionMappings'); + + for (const assignment of ['missing-equals', '=warehouse', '1=']) { + const testIo = makeIo(); + await expect( + runKloCli(['connection', 'mapping', 'set', 'prod-metabase', 'databaseMappings', assignment], testIo.io), + ).resolves.toBe(1); + expect(testIo.stderr()).toContain('non-empty ='); + } + }); + + it('does not expose root init after setup owns project creation', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['init'], testIo.io)).resolves.toBe(1); + + expect(testIo.stderr()).toContain("error: unknown command 'init'"); + }); + + it('returns an error code for unknown commands', async () => { + const testIo = makeIo(); + + await expect(runKloCli(['unknown'], testIo.io)).resolves.toBe(1); + + expect(testIo.stderr()).toContain("error: unknown command 'unknown'"); + }); +}); diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts new file mode 100644 index 00000000..03a3afec --- /dev/null +++ b/packages/cli/src/index.ts @@ -0,0 +1,53 @@ +import { profileMark } from './startup-profile.js'; + +export { + getKloCliPackageInfo, + runInitForCommander, + runKloCli, + type KloCliDeps, + type KloCliIo, + type KloCliPackageInfo, +} from './cli-runtime.js'; +export { runKloAgent, type KloAgentArgs } from './agent.js'; +export { + KLO_AGENT_MAX_ROWS_CAP, + createKloAgentRuntime, + parseAgentMaxRows, + readAgentJsonFile, + writeAgentJson, + writeAgentJsonError, + type KloAgentRuntime, + type KloAgentRuntimeDeps, +} from './agent-runtime.js'; +export { runKloSetup, type KloSetupArgs, type KloSetupStatus } from './setup.js'; +export type { + KloSetupDatabaseDriver, + KloSetupDatabasesArgs, + KloSetupDatabasesDeps, + KloSetupDatabasesResult, +} from './setup-databases.js'; +export { runKloSetupDatabasesStep } from './setup-databases.js'; +export type { + KloSetupEmbeddingBackend, + KloSetupEmbeddingsArgs, + KloSetupEmbeddingsDeps, + KloSetupEmbeddingsResult, +} from './setup-embeddings.js'; +export { runKloSetupEmbeddingsStep } from './setup-embeddings.js'; +export type { + KloSetupSourcesArgs, + KloSetupSourcesDeps, + KloSetupSourcesPromptAdapter, + KloSetupSourcesResult, + KloSetupSourceType, +} from './setup-sources.js'; +export { runKloSetupSourcesStep } from './setup-sources.js'; +export type { KloMemoryFlowTuiIo, MemoryFlowTuiLiveSession } from './memory-flow-tui.js'; +export { + renderMemoryFlowTui, + sanitizeMemoryFlowTuiError, + startLiveMemoryFlowTui, +} from './memory-flow-tui.js'; +export { rendererUnavailableVizFallback, resolveVizFallback, warnVizFallbackOnce } from './viz-fallback.js'; + +profileMark('module:index'); diff --git a/packages/cli/src/ingest-report-file.test.ts b/packages/cli/src/ingest-report-file.test.ts new file mode 100644 index 00000000..65b4b680 --- /dev/null +++ b/packages/cli/src/ingest-report-file.test.ts @@ -0,0 +1,78 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { readIngestReportSnapshotFile } from './ingest-report-file.js'; + +function reportSnapshot() { + return { + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-04-30T12:00:00.000Z', + body: { + syncId: 'sync-1', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }; +} + +describe('readIngestReportSnapshotFile', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-report-file-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('reads and parses an ingest report JSON file', async () => { + const reportPath = join(tempDir, 'report.json'); + await writeFile(reportPath, `${JSON.stringify(reportSnapshot(), null, 2)}\n`, 'utf-8'); + + const report = await readIngestReportSnapshotFile(reportPath); + + expect(report).toMatchObject({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + }); + }); + + it('reports invalid JSON with the file path', async () => { + const reportPath = join(tempDir, 'invalid.json'); + await writeFile(reportPath, '{not json', 'utf-8'); + + await expect(readIngestReportSnapshotFile(reportPath)).rejects.toThrow( + `Invalid JSON in ingest report file ${reportPath}`, + ); + }); + + it('reports schema failures with the file path', async () => { + const reportPath = join(tempDir, 'wrong-shape.json'); + await writeFile(reportPath, JSON.stringify({ id: 'report-1' }), 'utf-8'); + + await expect(readIngestReportSnapshotFile(reportPath)).rejects.toThrow( + `Invalid ingest report file ${reportPath}`, + ); + }); +}); diff --git a/packages/cli/src/ingest-report-file.ts b/packages/cli/src/ingest-report-file.ts new file mode 100644 index 00000000..50627d56 --- /dev/null +++ b/packages/cli/src/ingest-report-file.ts @@ -0,0 +1,20 @@ +import { readFile } from 'node:fs/promises'; +import { parseIngestReportSnapshot, type IngestReportSnapshot } from '@klo/context/ingest'; + +export async function readIngestReportSnapshotFile(reportFile: string): Promise { + const raw = await readFile(reportFile, 'utf-8'); + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Invalid JSON in ingest report file ${reportFile}: ${message}`); + } + + try { + return parseIngestReportSnapshot(parsed); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Invalid ingest report file ${reportFile}: ${message}`); + } +} diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts new file mode 100644 index 00000000..ca4f14b1 --- /dev/null +++ b/packages/cli/src/ingest.test.ts @@ -0,0 +1,2275 @@ +import { EventEmitter } from 'node:events'; +import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { AgentRunnerService, type RunLoopParams } from '@klo/context/agent'; +import { + LocalLookerRuntimeStore, + LocalMetabaseSourceStateReader, + MetabaseSourceAdapter, + getLocalIngestStatus, + type ChunkResult, + type FetchContext, + type IngestReportSnapshot, + type LocalIngestResult, + type LocalMetabaseFanoutProgress, + type MemoryFlowEventSink, + type MemoryFlowReplayInput, + type MetabaseCard, + type MetabaseCardSummary, + type MetabaseClientFactory, + type MetabaseRuntimeClient, + type RunLocalIngestOptions, + type SourceAdapter, + type SqliteBundleIngestStore, +} from '@klo/context/ingest'; +import { initKloProject, kloLocalStateDbPath, loadKloProject } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { type KloIngestArgs, runKloIngest } from './ingest.js'; +import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; + +function makeIo( + options: { + isTTY?: boolean; + stdinIsTTY?: boolean; + columns?: number; + rawMode?: boolean; + keypresses?: { name?: string; ctrl?: boolean }[]; + } = {}, +) { + let stdout = ''; + let stderr = ''; + type TestKey = { name?: string; ctrl?: boolean }; + + class TestStdin extends EventEmitter { + isTTY = options.stdinIsTTY ?? false; + isRaw = false; + + setRawMode = + options.rawMode === false + ? undefined + : (value: boolean): void => { + this.isRaw = value; + }; + + resume(): void { + return undefined; + } + + pause(): void { + return undefined; + } + + override on(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + const result = super.on(eventName, listener); + if (eventName === 'keypress') { + for (const key of options.keypresses ?? []) { + queueMicrotask(() => listener('', key)); + } + } + return result; + } + + override off(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + return super.off(eventName, listener); + } + + override removeListener(eventName: string | symbol, listener: (chunk: string, key: TestKey) => void): this { + return super.removeListener(eventName, listener); + } + } + + const stdin = new TestStdin(); + + return { + io: { + stdin, + stdout: { + isTTY: options.isTTY, + columns: options.columns, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +async function writeWarehouseConfig(projectDir: string): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' prod-metabase:', + ' driver: metabase', + ' warehouse_a:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + '', + ].join('\n'), + 'utf-8', + ); +} + +async function writeMetabaseConfig(projectDir: string): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); +} + +function bundleReportSnapshot(): IngestReportSnapshot { + return { + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-04-30T12:00:00.000Z', + body: { + syncId: 'sync-1', + diffSummary: { added: 2, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: 'abc12345', + workUnits: [ + { + unitKey: 'cards', + rawFiles: ['cards/1.json', 'cards/2.json'], + status: 'success', + actions: [ + { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, + { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, + ], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [ + { + rawPath: 'cards/1.json', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/revenue.md', + actionType: 'wiki_written', + }, + { + rawPath: 'cards/2.json', + artifactKind: 'sl', + artifactKey: 'warehouse.orders', + actionType: 'measure_added', + }, + ], + toolTranscripts: [ + { + unitKey: 'cards', + path: 'tool-transcripts/cards.jsonl', + toolCallCount: 4, + errorCount: 0, + toolNames: ['ingest_triage', 'knowledge_capture', 'sl_capture'], + }, + ], + }, + }; +} + +function completedLocalBundleRun(input: RunLocalIngestOptions, jobId: string): LocalIngestResult { + const nextReport = localFakeBundleReport(jobId, { + id: 'report-live-1', + runId: 'run-live-1', + connectionId: input.connectionId, + sourceKey: input.adapter, + }); + return { + result: { + jobId, + runId: nextReport.runId, + syncId: nextReport.body.syncId, + diffSummary: nextReport.body.diffSummary, + workUnitCount: nextReport.body.workUnits.length, + failedWorkUnits: nextReport.body.failedWorkUnits, + artifactsWritten: nextReport.body.provenanceRows.length, + commitSha: nextReport.body.commitSha, + }, + report: nextReport, + }; +} + +class CliLookerSlWritingAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async (params: RunLoopParams) => { + if ( + params.telemetryTags?.operationName === 'ingest-bundle-wu' && + params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' + ) { + const slWrite = params.toolSet.sl_write_source; + if (!slWrite?.execute) { + throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); + } + const result = await slWrite.execute( + { + connectionId: 'prod-warehouse', + sourceName: 'looker__ecommerce__orders', + source: { + name: 'looker__ecommerce__orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'revenue', type: 'number' }, + ], + measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }], + }, + }, + { toolCallId: 'cli-looker-sl-write', messages: [] }, + ); + if (!result.structured.success) { + throw new Error(result.markdown); + } + } + return { stopReason: 'natural' as const }; + }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +class CliMetabaseAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const })); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +class CliMetabaseSourceAdapter implements SourceAdapter { + readonly source = 'metabase'; + readonly skillNames: string[] = []; + readonly fetchCalls: Array<{ metabaseConnectionId: string; metabaseDatabaseId: number; connectionId: string }> = []; + private readonly databaseByStagedDir = new Map(); + + detect(): Promise { + return Promise.resolve(true); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number }; + this.fetchCalls.push({ + metabaseConnectionId: config.metabaseConnectionId, + metabaseDatabaseId: config.metabaseDatabaseId, + connectionId: ctx.connectionId, + }); + this.databaseByStagedDir.set(stagedDir, config.metabaseDatabaseId); + await mkdir(join(stagedDir, 'cards'), { recursive: true }); + await mkdir(join(stagedDir, 'databases'), { recursive: true }); + await writeFile( + join(stagedDir, 'cards', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ connectionId: ctx.connectionId, databaseId: config.metabaseDatabaseId }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'databases', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ metabaseConnectionId: config.metabaseConnectionId }), + 'utf-8', + ); + } + + async chunk(stagedDir: string): Promise { + const databaseId = this.databaseByStagedDir.get(stagedDir); + if (!databaseId) { + throw new Error(`Missing Metabase database id for staged dir ${stagedDir}`); + } + return { + workUnits: [ + { + unitKey: `metabase-db-${databaseId}`, + rawFiles: [`cards/${databaseId}.json`], + peerFileIndex: [], + dependencyPaths: [`databases/${databaseId}.json`], + }, + ], + }; + } +} + +const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ + { + id: 101, + name: 'Collection 12 Revenue', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 12, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } }, + parameters: [], + dashboard_count: 0, + }, + { + id: 102, + name: 'Collection 12 Margin', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 12, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } }, + parameters: [], + dashboard_count: 0, + }, + { + id: 103, + name: 'Collection 13 Pipeline', + description: null, + type: 'question', + query_type: 'native', + database_id: 1, + collection_id: 13, + archived: false, + result_metadata: [], + dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } }, + parameters: [], + dashboard_count: 0, + }, +]; + +function metabaseCardSummary(card: MetabaseCard): MetabaseCardSummary { + return { + id: card.id, + name: card.name, + archived: card.archived, + database_id: card.database_id, + collection_id: card.collection_id, + }; +} + +function createSyncModeMetabaseClient(): MetabaseRuntimeClient { + const cardsById = new Map(SYNC_MODE_METABASE_CARDS.map((card) => [card.id, card])); + return { + testConnection: async () => ({ success: true }), + getCurrentUser: async () => ({ id: 1, email: 'local@example.test' }), + getDatabases: async () => [{ id: 1, name: 'Warehouse A', engine: 'postgres' }], + getDatabase: async (id) => ({ id, name: 'Warehouse A', engine: 'postgres' }), + getCollectionTree: async () => [ + { id: 12, name: 'Selected Collection', parent_id: 'root', children: [] }, + { id: 13, name: 'Other Collection', parent_id: 'root', children: [] }, + ], + getCollection: async (id) => ({ + id, + name: id === 12 ? 'Selected Collection' : 'Other Collection', + parent_id: 'root', + children: [], + }), + getCollectionItems: async (collectionId) => + SYNC_MODE_METABASE_CARDS.filter((card) => card.collection_id === collectionId).map((card) => ({ + id: card.id, + model: 'card', + name: card.name, + collection_id: card.collection_id, + database_id: card.database_id, + })), + getCard: async (id) => { + const card = cardsById.get(id); + if (!card) { + throw new Error(`unexpected card ${id}`); + } + return card; + }, + getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary), + convertMbqlToNative: async () => ({ query: 'select 1' }), + getNativeSql: (card) => card.dataset_query?.native?.query ?? null, + getTemplateTags: () => ({}), + getCardSql: async (card) => card.dataset_query?.native?.query ?? null, + getResolvedSql: async (card) => ({ + resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`, + templateTags: [], + resolutionStatus: 'resolved', + }), + cleanup: async () => undefined, + }; +} + +class StaticMetabaseClientFactory implements MetabaseClientFactory { + constructor(private readonly client: MetabaseRuntimeClient) {} + + createClient(): MetabaseRuntimeClient { + return this.client; + } +} + +type SyncModeCase = { + name: string; + syncMode: 'ALL' | 'ONLY' | 'EXCEPT'; + selections: Array<{ selectionType: 'collection' | 'item'; metabaseObjectId: number }>; + expectedRawFiles: string[]; + expectedWorkUnitKeys: string[]; +}; + +async function runPublicMetabaseSyncModeCase(tempDir: string, input: SyncModeCase): Promise { + const projectDir = join(tempDir, `metabase-sync-mode-${input.name}`); + await initKloProject({ projectDir, projectName: `metabase-sync-mode-${input.name}` }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + `project: metabase-sync-mode-${input.name}`, + 'connections:', + ' prod-metabase:', + ' driver: metabase', + ' api_url: https://metabase.example.test', + ' api_key: literal-test-key', + ' warehouse_a:', + ' driver: postgres', + ' url: postgresql://readonly@db.example.test/warehouse_a', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + + const project = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(project) }); + await store.replaceSourceState({ + connectionId: 'prod-metabase', + syncMode: input.syncMode, + defaultTagNames: ['sync-mode-smoke'], + selections: input.selections, + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Warehouse A', + metabaseEngine: 'postgres', + metabaseHost: 'db.example.test', + metabaseDbName: 'warehouse_a', + targetConnectionId: 'warehouse_a', + syncEnabled: true, + source: 'refresh', + }, + ], + }); + + const adapter = new MetabaseSourceAdapter({ + clientFactory: new StaticMetabaseClientFactory(createSyncModeMetabaseClient()), + sourceStateReader: store, + }); + const jobId = `metabase-sync-mode-${input.name}-child`; + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + createAdapters: vi.fn(() => [adapter]), + jobIdFactory: () => jobId, + localIngestOptions: { + agentRunner: new CliMetabaseAgentRunner(), + }, + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); + expect(io.stdout()).toContain(`target=warehouse_a database=1 status=done job=${jobId}`); + + const report = await getLocalIngestStatus(project, jobId); + expect(report).not.toBeNull(); + expect(report?.body.workUnits.map((wu) => wu.unitKey).sort()).toEqual(input.expectedWorkUnitKeys); + expect(report?.body.workUnits.flatMap((wu) => wu.rawFiles).sort()).toEqual(input.expectedRawFiles); +} + +function makeCliLookerRuntimeClient() { + const lookerModels = { + source: 'looker', + fetchedAt: '2026-05-05T00:00:00.000Z', + models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], + }; + const lookerExplore = { + source: 'looker', + modelName: 'ecommerce', + exploreName: 'orders', + label: 'Orders', + description: null, + connectionName: 'analytics', + viewName: 'orders', + rawSqlTableName: 'public.orders', + fields: { + dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }], + }, + joins: [ + { + name: 'users', + type: 'left_outer', + relationship: 'many_to_one', + rawSqlTableName: 'public.users', + sqlOn: '${orders.user_id} = ${users.id}', + from: null, + targetTable: null, + }, + ], + targetWarehouseConnectionId: null, + targetTable: null, + }; + + return { + listLookerConnections: vi.fn().mockResolvedValue([ + { + name: 'analytics', + host: 'db.example.test', + database: 'analytics', + schema: null, + dialect: 'postgres', + }, + ]), + listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]), + getDashboard: vi.fn().mockResolvedValue({ + lookerId: '10', + title: 'Revenue Overview', + description: 'Revenue dashboard', + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:00:00.000Z', + tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }], + }), + listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]), + getLook: vi.fn().mockResolvedValue({ + lookerId: '20', + title: 'Revenue Look', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:10:00.000Z', + query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] }, + }), + listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }), + listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]), + listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]), + listLookmlModels: vi.fn().mockResolvedValue(lookerModels), + getExplore: vi.fn().mockResolvedValue(lookerExplore), + getSignals: vi.fn().mockResolvedValue({ + dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }], + lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }], + scheduledPlans: [ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 }, + ], + favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }], + }), + cleanup: vi.fn().mockResolvedValue(undefined), + }; +} + +function makeCliLookerParser() { + return { + parse: vi.fn().mockResolvedValue({ + 'ecommerce.orders': { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + 'ecommerce.orders.users': { + ok: true, + catalog: null, + schema: 'public', + name: 'users', + canonical_table: 'public.users', + }, + }), + }; +} + +function localFakeBundleReport(jobId: string, overrides: Partial = {}): IngestReportSnapshot { + const report = bundleReportSnapshot(); + return { + ...report, + id: `report-${jobId}`, + runId: `run-${jobId}`, + jobId, + connectionId: 'warehouse', + sourceKey: 'fake', + ...overrides, + body: { + ...report.body, + syncId: 'sync-live-1', + ...(overrides.body ?? {}), + }, + }; +} + +async function localBundleStore(projectDir: string, ids: [string, string]): Promise { + const { SqliteBundleIngestStore } = await import('@klo/context/ingest'); + const project = await loadKloProject({ projectDir }); + return new SqliteBundleIngestStore({ + dbPath: kloLocalStateDbPath(project), + idFactory: (() => { + let index = 0; + return () => ids[index++] ?? `generated-${index}`; + })(), + }); +} + +async function persistLocalBundleReport(projectDir: string, report = bundleReportSnapshot()): Promise { + const store = await localBundleStore(projectDir, [report.runId, report.id]); + const run = await store.create({ + jobId: report.jobId, + connectionId: report.connectionId, + sourceKey: report.sourceKey, + syncId: report.body.syncId, + trigger: 'manual_resync', + }); + await store.markCompleted(run.id, report.body.diffSummary); + await store.create({ + runId: run.id, + jobId: report.jobId, + connectionId: report.connectionId, + sourceKey: report.sourceKey, + body: report.body, + }); +} + +async function writeBundleReportFile(tempDir: string, report = bundleReportSnapshot()): Promise { + const reportFile = join(tempDir, 'bundle-report.json'); + await writeFile(reportFile, `${JSON.stringify(report, null, 2)}\n`, 'utf-8'); + return reportFile; +} + +function emitLiveLocalMemoryFlow(memoryFlow: MemoryFlowEventSink | undefined): void { + memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); + memoryFlow?.update({ syncId: 'sync-live-1' }); + memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); + memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); + memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); + memoryFlow?.finish('done'); +} + +describe('runKloIngest', () => { + let tempDir: string; + let originalTerm: string | undefined; + + beforeEach(async () => { + resetVizFallbackWarningsForTest(); + originalTerm = process.env.TERM; + process.env.TERM = 'xterm-256color'; + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-ingest-')); + }); + + afterEach(async () => { + if (originalTerm === undefined) { + delete process.env.TERM; + } else { + process.env.TERM = originalTerm; + } + await rm(tempDir, { recursive: true, force: true }); + }); + + it('runs local ingest and reads status', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + const result = completedLocalBundleRun(input, 'cli-local-run-1'); + await persistLocalBundleReport(projectDir, result.report); + return result; + }); + + const runIo = makeIo(); + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + runIo.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'cli-local-run-1', + }, + ), + ).resolves.toBe(0); + + expect(runIo.stdout()).toContain('Report: report-live-1'); + expect(runIo.stdout()).toContain('Run: run-live-1'); + expect(runIo.stdout()).toContain('Job: cli-local-run-1'); + expect(runIo.stdout()).toContain('Status: done'); + expect(runIo.stdout()).toContain('Diff: +2/~0/-0/=0'); + expect(runIo.stdout()).toContain('Saved memory: 1 wiki, 1 SL'); + + const statusIo = makeIo(); + await expect( + runKloIngest({ command: 'status', projectDir, runId: 'cli-local-run-1', outputMode: 'plain' }, statusIo.io), + ).resolves.toBe(0); + + expect(statusIo.stdout()).toContain('Report: report-live-1'); + expect(statusIo.stdout()).toContain('Run: run-live-1'); + expect(statusIo.stdout()).toContain('Job: cli-local-run-1'); + expect(statusIo.stdout()).toContain('Status: done'); + expect(statusIo.stdout()).toContain('Diff: +2/~0/-0/=0'); + expect(statusIo.stderr()).toBe(''); + }); + + it('routes metabase scheduled pulls to the fan-out runner and prints child summaries', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const report = localFakeBundleReport('metabase-child-1', { + id: 'report-metabase-child-1', + runId: 'run-a', + jobId: 'metabase-child-1', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 2, failedWorkUnits: 0 }, + children: [ + { + jobId: 'metabase-child-1', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'metabase-child-1', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 1, + failedWorkUnits: [], + artifactsWritten: 0, + commitSha: null, + }, + report, + }, + ], + }), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); + expect(io.stdout()).toContain('warehouse_a'); + expect(io.stdout()).toContain('metabase-child-1'); + expect(io.stderr()).toBe(''); + }); + + it('prints Metabase fan-out progress before the final summary', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const report = localFakeBundleReport('metabase-child-1', { + id: 'report-metabase-child-1', + runId: 'run-a', + jobId: 'metabase-child-1', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async (input) => { + const progress = (input as { progress?: LocalMetabaseFanoutProgress }).progress; + progress?.onMetabaseFanoutPlanned?.({ + metabaseConnectionId: 'prod-metabase', + children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }], + }); + progress?.onMetabaseChildStarted?.({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + jobId: 'metabase-child-1', + }); + progress?.onMetabaseChildCompleted?.({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + jobId: 'metabase-child-1', + status: 'done', + }); + return { + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 2, failedWorkUnits: 0 }, + children: [ + { + jobId: 'metabase-child-1', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'metabase-child-1', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 1, + failedWorkUnits: [], + artifactsWritten: 0, + commitSha: null, + }, + report, + }, + ], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Metabase ingest: prod-metabase'); + expect(io.stdout()).toContain('Targets: 1 mapped database'); + expect(io.stdout()).toContain('- database=1 target=warehouse_a status=running job=metabase-child-1'); + expect(io.stdout()).toContain('- database=1 target=warehouse_a status=done job=metabase-child-1'); + expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); + expect(io.stderr()).toBe(''); + }); + + it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => { + const projectDir = join(tempDir, 'metabase-cli-project'); + await initKloProject({ projectDir, projectName: 'metabase-cli' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: metabase-cli', + 'connections:', + ' prod-metabase:', + ' driver: metabase', + ' api_url: https://metabase.example.test', + ' api_key: literal-test-key', + ' warehouse_a:', + ' driver: postgres', + ' url: postgresql://readonly@db.example.test/warehouse_a', + ' warehouse_b:', + ' driver: postgres', + ' url: postgresql://readonly@db.example.test/warehouse_b', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + const project = await loadKloProject({ projectDir }); + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(project) }); + await store.replaceSourceState({ + connectionId: 'prod-metabase', + syncMode: 'ALL', + defaultTagNames: ['klo'], + selections: [], + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Warehouse A', + metabaseEngine: 'postgres', + metabaseHost: 'db.example.test', + metabaseDbName: 'warehouse_a', + targetConnectionId: 'warehouse_a', + syncEnabled: true, + source: 'refresh', + }, + { + metabaseDatabaseId: 2, + metabaseDatabaseName: 'Warehouse B', + metabaseEngine: 'postgres', + metabaseHost: 'db.example.test', + metabaseDbName: 'warehouse_b', + targetConnectionId: 'warehouse_b', + syncEnabled: true, + source: 'refresh', + }, + ], + }); + const adapter = new CliMetabaseSourceAdapter(); + const agentRunner = new CliMetabaseAgentRunner(); + const childJobIds = ['metabase-child-1', 'metabase-child-2']; + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + createAdapters: vi.fn(() => [adapter]), + jobIdFactory: () => childJobIds.shift() ?? 'metabase-child-extra', + localIngestOptions: { + agentRunner, + }, + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); + expect(io.stdout()).toContain('Source: prod-metabase'); + expect(io.stdout()).toContain('Children: 2'); + expect(io.stdout()).toContain('target=warehouse_a database=1 status=done job=metabase-child-1'); + expect(io.stdout()).toContain('target=warehouse_b database=2 status=done job=metabase-child-2'); + expect(adapter.fetchCalls).toEqual([ + { metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 1, connectionId: 'warehouse_a' }, + { metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 2, connectionId: 'warehouse_b' }, + ]); + + const statusIo = makeIo(); + await expect( + runKloIngest( + { command: 'status', projectDir, runId: 'metabase-child-1', outputMode: 'plain' }, + statusIo.io, + ), + ).resolves.toBe(0); + expect(statusIo.stdout()).toContain('Job: metabase-child-1'); + expect(statusIo.stdout()).toContain('Adapter: metabase'); + expect(statusIo.stdout()).toContain('Connection: warehouse_a'); + expect(statusIo.stderr()).toBe(''); + }); + + it('runs public Metabase CLI scheduled ingest for ALL, ONLY, and EXCEPT sync modes', async () => { + await runPublicMetabaseSyncModeCase(tempDir, { + name: 'all', + syncMode: 'ALL', + selections: [], + expectedWorkUnitKeys: ['metabase-col-12', 'metabase-col-13'], + expectedRawFiles: [ + 'cards/101.json', + 'cards/102.json', + 'cards/103.json', + 'collections/12.json', + 'collections/13.json', + ], + }); + + await runPublicMetabaseSyncModeCase(tempDir, { + name: 'only', + syncMode: 'ONLY', + selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], + expectedWorkUnitKeys: ['metabase-col-12'], + expectedRawFiles: ['cards/101.json', 'cards/102.json', 'collections/12.json'], + }); + + await runPublicMetabaseSyncModeCase(tempDir, { + name: 'except', + syncMode: 'EXCEPT', + selections: [{ selectionType: 'item', metabaseObjectId: 102 }], + expectedWorkUnitKeys: ['metabase-col-12', 'metabase-col-13'], + expectedRawFiles: ['cards/101.json', 'cards/103.json', 'collections/12.json', 'collections/13.json'], + }); + }); + + it('prints metabase fan-out JSON results', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'json', + }, + io.io, + { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 0, failedWorkUnits: 0 }, + children: [], + }), + }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + children: [], + }); + expect(io.stderr()).toBe(''); + }); + + it('rejects source-dir uploads through the metabase fan-out route', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + adapter: 'metabase', + connectionId: 'prod-metabase', + sourceDir: projectDir, + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async () => { + throw new Error('fan-out should not be called'); + }, + }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('source-dir uploads are not supported for the Metabase fan-out adapter'); + expect(io.stderr()).not.toContain('klo dev ingest run requires llm.provider.backend'); + expect(io.stdout()).toBe(''); + }); + + it('prints previous run and diff summary for local ingest results', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'local-job-1')); + + const io = makeIo(); + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'local-job-1', + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Report: report-live-1\n'); + expect(io.stdout()).toContain('Job: local-job-1\n'); + expect(io.stdout()).toContain('Diff: +2/~0/-0/=0\n'); + }); + + it('passes the debug LLM request file to local ingest runs', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions) => + completedLocalBundleRun(input, 'job-debug'), + ); + const io = makeIo(); + const debugFile = join(projectDir, '.klo', 'llm-debug.jsonl'); + + const exitCode = await runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'plain', + debugLlmRequestFile: debugFile, + }, + io.io, + { runLocalIngest }, + ); + + expect(exitCode).toBe(0); + expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ llmDebugRequestFile: debugFile })); + }); + + it('passes daemon database introspection URL to default local ingest adapters', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const createdAdapters: SourceAdapter[] = [ + { source: 'fake', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) }, + ]; + const createAdapters = vi.fn(() => createdAdapters as never); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => + completedLocalBundleRun(input, input.jobId ?? 'local-job-1'), + ); + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + outputMode: 'plain', + } satisfies KloIngestArgs, + io.io, + { + createAdapters, + runLocalIngest: runLocal, + jobIdFactory: () => 'local-job-1', + }, + ), + ).resolves.toBe(0); + + expect(createAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), { + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + }); + expect(runLocal).toHaveBeenCalledWith( + expect.objectContaining({ + adapters: createdAdapters, + adapter: 'fake', + connectionId: 'warehouse', + }), + ); + }); + + it('passes the target connection id when constructing local historic-sql adapters', async () => { + const projectDir = join(tempDir, 'historic-sql-project'); + await initKloProject({ projectDir, projectName: 'historic-sql-project' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: historic-sql-project', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + ' minCalls: 2', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + 'utf-8', + ); + const createdAdapters: SourceAdapter[] = [ + { source: 'historic-sql', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) }, + ]; + const createAdapters = vi.fn(() => createdAdapters as never); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => + completedLocalBundleRun(input, input.jobId ?? 'local-historic-job'), + ); + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'historic-sql', + outputMode: 'plain', + }, + io.io, + { + createAdapters, + runLocalIngest: runLocal, + jobIdFactory: () => 'local-historic-job', + }, + ), + ).resolves.toBe(0); + + expect(createAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), { + historicSqlConnectionId: 'warehouse', + }); + expect(runLocal).toHaveBeenCalledWith( + expect.objectContaining({ + adapters: createdAdapters, + adapter: 'historic-sql', + connectionId: 'warehouse', + }), + ); + }); + + it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const pullConfigOptions = { + looker: { + parser: { parse: vi.fn() }, + }, + }; + const agentRunner = { runLoop: vi.fn() } as never; + const createdAdapters: SourceAdapter[] = [ + { source: 'fake', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) }, + ]; + const createAdapters = vi.fn(() => createdAdapters as never); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => + completedLocalBundleRun(input, input.jobId ?? 'local-job-1'), + ); + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'plain', + } satisfies KloIngestArgs, + io.io, + { + createAdapters, + runLocalIngest: runLocal, + jobIdFactory: () => 'local-job-1', + localIngestOptions: { + agentRunner, + pullConfigOptions, + }, + }, + ), + ).resolves.toBe(0); + + expect(createAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), { + looker: { + parser: pullConfigOptions.looker.parser, + }, + }); + expect(runLocal).toHaveBeenCalledWith( + expect.objectContaining({ + agentRunner, + pullConfigOptions, + }), + ); + }); + + it('runs Looker scheduled ingest through the public CLI command path', async () => { + const projectDir = join(tempDir, 'looker-project'); + await initKloProject({ projectDir, projectName: 'looker-cli' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: looker-cli', + 'connections:', + ' prod-looker:', + ' driver: looker', + ' base_url: https://looker.example.test', + ' client_id: client', + ' prod-warehouse:', + ' driver: postgres', + ' url: postgresql://readonly@db.example.test/analytics', + 'ingest:', + ' adapters:', + ' - looker', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + const project = await loadKloProject({ projectDir }); + const store = new LocalLookerRuntimeStore({ dbPath: kloLocalStateDbPath(project) }); + await store.setCursors('prod-looker', { + dashboardsLastSyncedAt: null, + looksLastSyncedAt: null, + }); + await store.upsertConnectionMapping({ + lookerConnectionId: 'prod-looker', + lookerConnectionName: 'analytics', + kloConnectionId: 'prod-warehouse', + source: 'cli', + }); + const runtimeClient = makeCliLookerRuntimeClient(); + const parser = makeCliLookerParser(); + const agentRunner = new CliLookerSlWritingAgentRunner(); + const io = makeIo(); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-looker', + adapter: 'looker', + outputMode: 'plain', + }, + io.io, + { + jobIdFactory: () => 'cli-looker-job', + localIngestOptions: { + agentRunner, + pullConfigOptions: { + looker: { + client: runtimeClient, + runtimeClient, + parser, + }, + }, + }, + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toBe(''); + expect(io.stdout()).toContain('Job: cli-looker-job'); + expect(io.stdout()).toContain('Adapter: looker'); + expect(io.stdout()).toContain('Connection: prod-looker'); + expect(io.stdout()).toContain('Status: done'); + expect(io.stdout()).toContain('Saved memory: 0 wiki, 1 SL'); + expect(parser.parse).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ key: 'ecommerce.orders', sql_table_name: 'public.orders', dialect: 'postgres' }), + expect.objectContaining({ key: 'ecommerce.orders.users', sql_table_name: 'public.users', dialect: 'postgres' }), + ]), + ); + expect(runtimeClient.cleanup).toHaveBeenCalledTimes(1); + const slPath = join(projectDir, 'semantic-layer', 'prod-warehouse', 'looker__ecommerce__orders.yaml'); + await access(slPath); + await expect(readFile(slPath, 'utf-8')).resolves.toContain('table: public.orders'); + + const statusIo = makeIo(); + await expect( + runKloIngest( + { command: 'status', projectDir, runId: 'cli-looker-job', outputMode: 'plain' }, + statusIo.io, + ), + ).resolves.toBe(0); + expect(statusIo.stdout()).toContain('Job: cli-looker-job'); + expect(statusIo.stdout()).toContain('Adapter: looker'); + expect(statusIo.stderr()).toBe(''); + }); + + it('renders live memory-flow frames for run --viz when stdout is interactive', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }); + input.memoryFlow?.update({ syncId: 'sync-live-1' }); + input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-live-1', rawFileCount: 1 }); + input.memoryFlow?.emit({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }); + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + input.memoryFlow?.emit({ type: 'report_created', runId: 'live-viz-run' }); + input.memoryFlow?.finish('done'); + + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + now: () => new Date('2026-04-30T14:00:00.000Z'), + }, + ), + ).resolves.toBe(0); + + expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.any(Object) })); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + expect((io.stdout().match(/KLO memory flow/g) ?? []).length).toBeGreaterThan(1); + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + expect(io.stdout()).toContain('fake-orders'); + expect(io.stderr()).toBe(''); + }); + + it('uses the TUI live session for run --viz when stdin and stdout are interactive', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + now: () => new Date('2026-04-30T14:00:00.000Z'), + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); + expect(startLiveMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'live-viz-run', + connectionId: 'warehouse', + adapter: 'fake', + status: 'running', + }); + expect(liveSession.update).toHaveBeenCalled(); + expect(liveSession.close).toHaveBeenCalledTimes(1); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toBe(''); + }); + + it('prints a final plain summary after live viz completes', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const liveSession = { + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + }; + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-summary'); + }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'viz', + }, + io.io, + { runLocalIngest: runLocal, startLiveMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(liveSession.close).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('Memory-flow summary: done'); + expect(io.stdout()).toContain('Connection: warehouse'); + }); + + it('falls back to text live rendering when the TUI live session is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => null); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + }); + + it('falls back to text live rendering when TUI startup fails with a redacted warning', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + emitLiveLocalMemoryFlow(input.memoryFlow); + return completedLocalBundleRun(input, 'live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn( + async (_input: MemoryFlowReplayInput, ioArg: { stderr: { write(chunk: string): void } }) => { + ioArg.stderr.write('TUI visualization unavailable: Failed [redacted-url] [redacted]; using text renderer.\n'); + return null; + }, + ); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'live-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toContain('TUI visualization unavailable: Failed [redacted-url] [redacted]'); + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + expect(io.stdout()).toContain('\u001b[2J\u001b[H'); + }); + + it('does not start live TUI when run --viz disables input', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + return completedLocalBundleRun(input, 'no-input-live-viz-run'); + }); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + })); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + { runLocalIngest: runLocal, startLiveMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).not.toHaveBeenCalled(); + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + }); + + it('does not attach a live memory-flow sink for plain run output', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'plain-run')); + const io = makeIo({ isTTY: true }); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + io.io, + { runLocalIngest: runLocal }, + ), + ).resolves.toBe(0); + + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('Job: plain-run'); + expect(io.stdout()).not.toContain('KLO memory flow'); + }); + + it('falls back to plain run output for run --viz when stdout is not interactive', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const io = makeIo({ isTTY: false }); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'non-tty-viz-run')); + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'non-tty-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Job: non-tty-viz-run'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('falls back to plain run output for run --viz when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'raw-missing-viz-run')); + const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => ({ + update: vi.fn(), + close: vi.fn(), + isClosed: vi.fn(() => false), + })); + + await expect( + runKloIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'viz', + }, + io.io, + { + runLocalIngest: runLocal, + startLiveMemoryFlow, + jobIdFactory: () => 'raw-missing-viz-run', + }, + ), + ).resolves.toBe(0); + + expect(startLiveMemoryFlow).not.toHaveBeenCalled(); + expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('Job: raw-missing-viz-run'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('returns an error code for missing status', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const io = makeIo(); + + await expect( + runKloIngest({ command: 'status', projectDir, runId: 'missing-run', outputMode: 'plain' }, io.io), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Local ingest run or report "missing-run" was not found'); + }); + + it('uses the latest local ingest report when status has no run id', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + await persistLocalBundleReport(projectDir, localFakeBundleReport('older-run')); + await persistLocalBundleReport(projectDir, localFakeBundleReport('newer-run')); + const io = makeIo(); + + await expect(runKloIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-newer-run'); + expect(io.stdout()).toContain('Job: newer-run'); + expect(io.stderr()).toBe(''); + }); + + it('renders the latest local ingest report through watch when run id is omitted', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + await persistLocalBundleReport(projectDir, localFakeBundleReport('watch-latest')); + const io = makeIo({ isTTY: true }); + + await expect( + runKloIngest({ command: 'watch', projectDir, outputMode: 'viz', inputMode: 'disabled' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + expect(io.stdout()).toContain('Run: run-watch-latest'); + expect(io.stderr()).toBe(''); + }); + + it('renders report-file replay through the memory-flow TUI', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo({ isTTY: true }); + + await expect( + runKloIngest( + { + command: 'replay', + projectDir, + runId: 'job-1', + reportFile, + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KLO memory flow warehouse/metabase done'); + expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); + expect(io.stdout()).toContain('Commit: abc12345 Run: run-1 Report: report-1'); + expect(io.stdout()).toContain('SOURCE'); + expect(io.stdout()).toContain('ACTIONS'); + expect(io.stdout()).toContain('SAVED'); + expect(io.stderr()).toBe(''); + }); + + it('prints report-file JSON without looking up local ingest status', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo(); + + await expect( + runKloIngest({ command: 'status', projectDir, runId: 'report-1', reportFile, outputMode: 'json' }, io.io), + ).resolves.toBe(0); + + const parsed = JSON.parse(io.stdout()); + expect(parsed).toMatchObject({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + }); + expect(io.stderr()).toBe(''); + }); + + it('routes interactive report-file replay through the stored TUI renderer', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKloIngest( + { + command: 'replay', + projectDir, + runId: 'run-1', + reportFile, + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'run-1', + reportId: 'report-1', + connectionId: 'warehouse', + adapter: 'metabase', + }); + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe(''); + }); + + it('rejects report-file replay when the requested id does not match the report', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + const reportFile = await writeBundleReportFile(tempDir); + const io = makeIo(); + + await expect( + runKloIngest({ command: 'replay', projectDir, runId: 'unrelated-id', reportFile, outputMode: 'plain' }, io.io), + ).resolves.toBe(1); + + expect(io.stderr()).toContain( + `Report file ${reportFile} does not match ingest replay id "unrelated-id"; expected one of report-1, run-1, job-1`, + ); + expect(io.stdout()).toBe(''); + }); + + it('renders memory-flow snapshot for status --viz when stdout is interactive', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-1')); + + const io = makeIo({ isTTY: true }); + await expect( + runKloIngest( + { command: 'status', projectDir, runId: 'viz-run-1', outputMode: 'viz', inputMode: 'disabled' }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + expect(io.stdout()).toContain('SOURCE'); + expect(io.stdout()).toContain('CHUNKS'); + expect(io.stdout()).toContain('WORKUNITS'); + expect(io.stdout()).toContain('Saved 2 memories from 2 raw files'); + expect(io.stderr()).toBe(''); + }); + + it('uses the TUI renderer for stored status --viz when stdin and stdout are interactive', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKloIngest( + { + command: 'status', + projectDir, + runId: 'tui-viz-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(renderStoredMemoryFlow.mock.calls[0]?.[0]).toMatchObject({ + runId: 'run-tui-viz-run', + connectionId: 'warehouse', + adapter: 'fake', + }); + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe(''); + }); + + it('falls back to the text renderer when TUI declines stored status --viz', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-fallback-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120, keypresses: [{ name: 'q' }] }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => false); + + await expect( + runKloIngest( + { + command: 'status', + projectDir, + runId: 'tui-fallback-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + }); + + it('does not use TUI for stored --viz when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('tui-no-input-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKloIngest( + { + command: 'replay', + projectDir, + runId: 'tui-no-input-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + }); + + it('falls back to plain status for stored --viz when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('raw-missing-stored-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + const renderStoredMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => true); + + await expect( + runKloIngest( + { + command: 'replay', + projectDir, + runId: 'raw-missing-stored-viz-run', + outputMode: 'viz', + }, + io.io, + { renderStoredMemoryFlow }, + ), + ).resolves.toBe(0); + + expect(renderStoredMemoryFlow).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Run: run-raw-missing-stored-viz-run'); + expect(io.stdout()).toContain('Job: raw-missing-stored-viz-run'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdin raw mode is unavailable; printing plain output.', + ); + }); + + it('keeps stored --viz snapshot-only when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('no-input-viz-run')); + + const io = makeIo({ isTTY: true, columns: 120 }); + await expect( + runKloIngest( + { + command: 'replay', + projectDir, + runId: 'no-input-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stderr()).toBe(''); + }); + + it('keeps disabled-input stored --viz snapshot output even when stdin raw mode is unavailable', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('disabled-raw-missing-viz-run')); + + const io = makeIo({ isTTY: true, stdinIsTTY: true, rawMode: false, columns: 120 }); + await expect( + runKloIngest( + { + command: 'replay', + projectDir, + runId: 'disabled-raw-missing-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('KLO memory flow warehouse/fake done'); + expect(io.stdout()).not.toContain('\u001b[2J\u001b[H'); + expect(io.stderr()).toBe(''); + }); + + it('degrades stored --viz snapshots to plain status when stdout is redirected even when input is disabled', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('redirected-no-input-viz-run')); + + const io = makeIo({ isTTY: false }); + await expect( + runKloIngest( + { + command: 'replay', + projectDir, + runId: 'redirected-no-input-viz-run', + outputMode: 'viz', + inputMode: 'disabled', + }, + io.io, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-redirected-no-input-viz-run'); + expect(io.stdout()).toContain('Job: redirected-no-input-viz-run'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('degrades ingest replay --viz to plain status when TERM is dumb', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('dumb-terminal-viz-run')); + + const io = makeIo({ isTTY: true }); + await expect( + runKloIngest( + { command: 'replay', projectDir, runId: 'dumb-terminal-viz-run', outputMode: 'viz' }, + io.io, + { env: { ...process.env, TERM: 'dumb' } }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-dumb-terminal-viz-run'); + expect(io.stdout()).toContain('Job: dumb-terminal-viz-run'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but TERM=dumb does not support the visual renderer; printing plain output.', + ); + }); + + it('falls back to plain status for --viz when stdout is not interactive', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('viz-run-2')); + + const io = makeIo({ isTTY: false }); + await expect( + runKloIngest({ command: 'replay', projectDir, runId: 'viz-run-2', outputMode: 'viz' }, io.io), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Run: run-viz-run-2'); + expect(io.stdout()).toContain('Job: viz-run-2'); + expect(io.stdout()).not.toContain('KLO memory flow'); + expect(io.stderr()).toContain( + 'Visualization requested but stdout is not an interactive terminal; printing plain output.', + ); + }); + + it('prints JSON for status --json', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await persistLocalBundleReport(projectDir, localFakeBundleReport('json-run-1')); + + const io = makeIo(); + await expect( + runKloIngest({ command: 'status', projectDir, runId: 'json-run-1', outputMode: 'json' }, io.io), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + runId: 'run-json-run-1', + jobId: 'json-run-1', + sourceKey: 'fake', + connectionId: 'warehouse', + }); + expect(io.stderr()).toBe(''); + }); +}); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts new file mode 100644 index 00000000..15f56e52 --- /dev/null +++ b/packages/cli/src/ingest.ts @@ -0,0 +1,425 @@ +import { + buildMemoryFlowViewModel, + createMemoryFlowLiveBuffer, + formatMemoryFlowFinalSummary, + getLatestLocalIngestStatus, + getLocalIngestStatus, + type IngestReportSnapshot, + ingestReportToMemoryFlowReplay, + type LocalMetabaseFanoutResult, + type LocalMetabaseFanoutProgress, + type MemoryFlowReplayInput, + type RunLocalIngestOptions, + renderMemoryFlowReplay, + runLocalIngest, + runLocalMetabaseIngest, +} from '@klo/context/ingest'; +import { loadKloProject } from '@klo/context/project'; +import { readIngestReportSnapshotFile } from './ingest-report-file.js'; +import { createKloCliLocalIngestAdapters } from './local-adapters.js'; +import { type KloMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js'; +import { + type KloMemoryFlowTuiIo, + type MemoryFlowTuiLiveSession, + renderMemoryFlowTui, + startLiveMemoryFlowTui, +} from './memory-flow-tui.js'; +import { resolveVizFallback, warnVizFallbackOnce } from './viz-fallback.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:ingest'); + +export type KloIngestOutputMode = 'plain' | 'json' | 'viz'; +type KloIngestInputMode = 'auto' | 'disabled'; + +export type KloIngestArgs = + | { + command: 'run'; + projectDir: string; + connectionId: string; + adapter: string; + sourceDir?: string; + databaseIntrospectionUrl?: string; + debugLlmRequestFile?: string; + outputMode: KloIngestOutputMode; + inputMode?: KloIngestInputMode; + } + | { + command: 'status' | 'replay' | 'watch'; + projectDir: string; + runId?: string; + reportFile?: string; + outputMode: KloIngestOutputMode; + inputMode?: KloIngestInputMode; + }; + +interface KloIngestIo { + stdin?: KloMemoryFlowStdin; + stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void }; + stderr: { write(chunk: string): void }; +} + +interface KloIngestDeps { + jobIdFactory?: () => string; + now?: () => Date; + createAdapters?: typeof createKloCliLocalIngestAdapters; + runLocalIngest?: typeof runLocalIngest; + runLocalMetabaseIngest?: typeof runLocalMetabaseIngest; + readReportFile?: typeof readIngestReportSnapshotFile; + renderStoredMemoryFlow?: typeof renderMemoryFlowTui; + startLiveMemoryFlow?: typeof startLiveMemoryFlowTui; + env?: NodeJS.ProcessEnv; + localIngestOptions?: Pick< + RunLocalIngestOptions, + | 'agentRunner' + | 'llmProvider' + | 'memoryModel' + | 'semanticLayerCompute' + | 'queryExecutor' + | 'logger' + | 'pullConfigOptions' + >; +} + +function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { + return report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; +} + +function reportActionCounts(report: IngestReportSnapshot): { wikiCount: number; slCount: number } { + const actions = report.body.workUnits.flatMap((workUnit) => workUnit.actions); + return { + wikiCount: actions.filter((action) => action.target === 'wiki').length, + slCount: actions.filter((action) => action.target === 'sl').length, + }; +} + +function writeReportStatus(report: IngestReportSnapshot, io: KloIngestIo): void { + const counts = reportActionCounts(report); + io.stdout.write(`Report: ${report.id}\n`); + io.stdout.write(`Run: ${report.runId}\n`); + io.stdout.write(`Job: ${report.jobId}\n`); + io.stdout.write(`Status: ${reportStatus(report)}\n`); + io.stdout.write(`Adapter: ${report.sourceKey}\n`); + io.stdout.write(`Connection: ${report.connectionId}\n`); + io.stdout.write(`Sync: ${report.body.syncId}\n`); + io.stdout.write( + `Diff: +${report.body.diffSummary.added}/~${report.body.diffSummary.modified}/-${report.body.diffSummary.deleted}/=${report.body.diffSummary.unchanged}\n`, + ); + io.stdout.write(`Work units: ${report.body.workUnits.length}\n`); + io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); + io.stdout.write(`Provenance rows: ${report.body.provenanceRows.length}\n`); +} + +function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KloIngestIo): void { + io.stdout.write(`Metabase fan-out: ${result.status}\n`); + io.stdout.write(`Source: ${result.metabaseConnectionId}\n`); + io.stdout.write(`Children: ${result.children.length}\n`); + if (result.totals) { + io.stdout.write(`Work units: ${result.totals.workUnits}\n`); + io.stdout.write(`Failed work units: ${result.totals.failedWorkUnits}\n`); + } + for (const child of result.children) { + const status = reportStatus(child.report); + io.stdout.write( + `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId}\n`, + ); + } +} + +function pluralize(count: number, singular: string, plural = `${singular}s`): string { + return `${count} ${count === 1 ? singular : plural}`; +} + +function createMetabaseFanoutProgress( + connectionId: string, + io: KloIngestIo, +): LocalMetabaseFanoutProgress { + io.stdout.write(`Metabase ingest: ${connectionId}\n`); + io.stdout.write('Checking mappings and scheduled-pull targets...\n'); + return { + onMetabaseFanoutPlanned(event) { + io.stdout.write(`Targets: ${pluralize(event.children.length, 'mapped database')}\n`); + for (const child of event.children) { + io.stdout.write(`- database=${child.metabaseDatabaseId} target=${child.targetConnectionId} status=queued\n`); + } + }, + onMetabaseChildStarted(event) { + io.stdout.write( + `- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=running job=${event.jobId}\n`, + ); + }, + onMetabaseChildCompleted(event) { + io.stdout.write( + `- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=${event.status} job=${event.jobId}\n`, + ); + }, + }; +} + +function writeReportJson(report: IngestReportSnapshot, io: KloIngestIo): void { + io.stdout.write(`${JSON.stringify(report, null, 2)}\n`); +} + +function assertReportMatchesReplayId(report: IngestReportSnapshot, requestedId: string, reportFile: string): void { + const validIds = [report.id, report.runId, report.jobId]; + if (!validIds.includes(requestedId)) { + throw new Error( + `Report file ${reportFile} does not match ingest replay id "${requestedId}"; expected one of ${validIds.join( + ', ', + )}`, + ); + } +} + +async function readStoredIngestReport( + project: Awaited>, + runId: string | undefined, +): Promise { + return runId ? await getLocalIngestStatus(project, runId) : await getLatestLocalIngestStatus(project); +} + +function isInteractiveTerminal(io: KloIngestIo): boolean { + return io.stdout.isTTY === true; +} + +function terminalWidth(io: KloIngestIo): number | undefined { + return io.stdout.columns ?? process.stdout.columns; +} + +function isTuiCapableIo(io: KloIngestIo): io is KloIngestIo & KloMemoryFlowTuiIo { + return ( + io.stdin?.isTTY === true && + io.stdout.isTTY === true && + typeof io.stdin.on === 'function' && + typeof io.stdin.setRawMode === 'function' && + typeof io.stdout.write === 'function' + ); +} + +interface EffectiveIngestOutputModeOptions { + requireInput?: boolean; +} + +function effectiveIngestOutputMode( + outputMode: KloIngestOutputMode, + io: KloIngestIo, + env: NodeJS.ProcessEnv, + options: EffectiveIngestOutputModeOptions = {}, +): KloIngestOutputMode { + if (outputMode !== 'viz') { + return outputMode; + } + + const fallback = resolveVizFallback(io, env, { requireInput: options.requireInput ?? false }); + if (!fallback.shouldDegrade) { + return outputMode; + } + + warnVizFallbackOnce(io, fallback); + return 'plain'; +} + +function writeMemoryFlowInput(input: MemoryFlowReplayInput, io: KloIngestIo, options: { clear?: boolean } = {}): void { + if (options.clear) { + io.stdout.write('\u001b[2J\u001b[H'); + } + const view = buildMemoryFlowViewModel(input); + io.stdout.write(renderMemoryFlowReplay(view, { terminalWidth: terminalWidth(io) })); +} + +function initialRunMemoryFlowInput( + args: Extract, + runId: string, +): MemoryFlowReplayInput { + return { + runId, + connectionId: args.connectionId, + adapter: args.adapter, + status: 'running', + sourceDir: args.sourceDir ?? null, + syncId: 'pending', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }; +} + +async function writeReportRecord( + report: IngestReportSnapshot, + outputMode: KloIngestOutputMode, + io: KloIngestIo, + options: { + interactive?: boolean; + renderStoredMemoryFlow?: typeof renderMemoryFlowTui; + env?: NodeJS.ProcessEnv; + } = {}, +): Promise { + if (outputMode === 'json') { + writeReportJson(report, io); + return; + } + + const resolvedOutputMode = effectiveIngestOutputMode(outputMode, io, options.env ?? process.env, { + requireInput: options.interactive === true, + }); + + if (resolvedOutputMode === 'viz') { + const input = ingestReportToMemoryFlowReplay(report, { provenanceRowCount: report.body.provenanceRows.length }); + if (options.interactive === true) { + if (io.stdin?.isTTY === true) { + const renderStoredMemoryFlow = options.renderStoredMemoryFlow ?? renderMemoryFlowTui; + if (isTuiCapableIo(io) && (await renderStoredMemoryFlow(input, io))) { + return; + } + + await renderMemoryFlowInteractively(input, io); + return; + } + + writeMemoryFlowInput(input, io); + return; + } + + writeMemoryFlowInput(input, io); + return; + } + + writeReportStatus(report, io); +} + +export async function runKloIngest( + args: KloIngestArgs, + io: KloIngestIo = process, + deps: KloIngestDeps = {}, +): Promise { + try { + const project = await loadKloProject({ projectDir: args.projectDir }); + const env = deps.env ?? process.env; + if (args.command === 'run') { + const createAdapters = deps.createAdapters ?? createKloCliLocalIngestAdapters; + const executeLocalIngest = deps.runLocalIngest ?? runLocalIngest; + const localIngestOptions = deps.localIngestOptions ?? {}; + const adapterOptions = { + ...(localIngestOptions.pullConfigOptions ?? {}), + ...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}), + ...(args.adapter === 'historic-sql' ? { historicSqlConnectionId: args.connectionId } : {}), + }; + if (args.adapter === 'metabase' && args.sourceDir) { + throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter'); + } + if (args.adapter === 'metabase') { + const executeMetabaseFanout = deps.runLocalMetabaseIngest ?? runLocalMetabaseIngest; + const progress = + args.outputMode === 'json' ? undefined : createMetabaseFanoutProgress(args.connectionId, io); + const result = await executeMetabaseFanout({ + project, + adapters: createAdapters(project, adapterOptions), + metabaseConnectionId: args.connectionId, + ...localIngestOptions, + trigger: 'manual_resync', + jobIdFactory: deps.jobIdFactory, + ...(progress ? { progress } : {}), + }); + if (args.outputMode === 'json') { + io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); + } else { + writeMetabaseFanoutStatus(result, io); + } + return 0; + } + + const jobId = deps.jobIdFactory?.(); + let liveTui: MemoryFlowTuiLiveSession | null = null; + const runOutputMode = effectiveIngestOutputMode(args.outputMode, io, env, { + requireInput: (args.inputMode ?? 'auto') === 'auto', + }); + const shouldUseLiveViz = + runOutputMode === 'viz' && (args.inputMode ?? 'auto') === 'auto' && isInteractiveTerminal(io); + const initialMemoryFlow = shouldUseLiveViz ? initialRunMemoryFlowInput(args, jobId ?? 'pending') : undefined; + let latestMemoryFlowSnapshot: MemoryFlowReplayInput | null = initialMemoryFlow ?? null; + + if (initialMemoryFlow && isTuiCapableIo(io)) { + const startLiveMemoryFlow = deps.startLiveMemoryFlow ?? startLiveMemoryFlowTui; + liveTui = await startLiveMemoryFlow(initialMemoryFlow, io); + } + + const memoryFlow = initialMemoryFlow + ? createMemoryFlowLiveBuffer(initialMemoryFlow, { + onChange: (snapshot) => { + latestMemoryFlowSnapshot = snapshot; + if (liveTui && !liveTui.isClosed()) { + liveTui.update(snapshot); + return; + } + if (!liveTui) { + writeMemoryFlowInput(snapshot, io, { clear: true }); + } + }, + }) + : undefined; + + try { + const result = await executeLocalIngest({ + project, + adapters: createAdapters(project, adapterOptions), + adapter: args.adapter, + connectionId: args.connectionId, + sourceDir: args.sourceDir, + trigger: 'manual_resync', + jobId, + ...localIngestOptions, + ...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}), + ...(memoryFlow ? { memoryFlow } : {}), + }); + if (memoryFlow) { + latestMemoryFlowSnapshot = memoryFlow.snapshot(); + liveTui?.close(); + liveTui = null; + io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); + return 0; + } + await writeReportRecord(result.report, runOutputMode, io, { + interactive: (args.inputMode ?? 'auto') === 'auto', + renderStoredMemoryFlow: deps.renderStoredMemoryFlow, + env, + }); + return 0; + } finally { + liveTui?.close(); + } + } + + if (args.reportFile) { + const readReportFile = deps.readReportFile ?? readIngestReportSnapshotFile; + const report = await readReportFile(args.reportFile); + if (args.runId) { + assertReportMatchesReplayId(report, args.runId, args.reportFile); + } + await writeReportRecord(report, args.outputMode, io, { + interactive: (args.inputMode ?? 'auto') === 'auto', + renderStoredMemoryFlow: deps.renderStoredMemoryFlow, + env, + }); + return 0; + } + + const report = await readStoredIngestReport(project, args.runId); + if (!report) { + throw new Error( + args.runId + ? `Local ingest run or report "${args.runId}" was not found` + : 'No local ingest reports were found. Run `klo ingest --all` first.', + ); + } + await writeReportRecord(report, args.outputMode, io, { + interactive: (args.inputMode ?? 'auto') === 'auto', + renderStoredMemoryFlow: deps.renderStoredMemoryFlow, + env, + }); + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/io/mode.test.ts b/packages/cli/src/io/mode.test.ts new file mode 100644 index 00000000..06bb5d63 --- /dev/null +++ b/packages/cli/src/io/mode.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from 'vitest'; +import type { KloCliIo } from '../cli-runtime.js'; +import { resolveOutputMode } from './mode.js'; + +function ioWith(isTTY: boolean | undefined): KloCliIo { + return { + stdout: { isTTY, write: () => {} }, + stderr: { write: () => {} }, + }; +} + +describe('resolveOutputMode', () => { + it('uses explicit value when provided', () => { + expect(resolveOutputMode({ explicit: 'pretty', io: ioWith(false), env: {} })).toBe('pretty'); + expect(resolveOutputMode({ explicit: 'plain', io: ioWith(true), env: {} })).toBe('plain'); + expect(resolveOutputMode({ explicit: 'json', io: ioWith(true), env: {} })).toBe('json'); + }); + + it('json:true takes precedence over explicit value', () => { + expect(resolveOutputMode({ explicit: 'pretty', json: true, io: ioWith(true), env: {} })).toBe('json'); + }); + + it('throws on unknown explicit value', () => { + expect(() => resolveOutputMode({ explicit: 'fancy', io: ioWith(true), env: {} })).toThrow(/Invalid --output/); + }); + + it('honors KLO_OUTPUT env var when no explicit value', () => { + expect(resolveOutputMode({ io: ioWith(true), env: { KLO_OUTPUT: 'plain' } })).toBe('plain'); + expect(resolveOutputMode({ io: ioWith(false), env: { KLO_OUTPUT: 'pretty' } })).toBe('pretty'); + expect(resolveOutputMode({ io: ioWith(false), env: { KLO_OUTPUT: 'json' } })).toBe('json'); + }); + + it('throws on unknown KLO_OUTPUT', () => { + expect(() => resolveOutputMode({ io: ioWith(true), env: { KLO_OUTPUT: 'fancy' } })).toThrow(/Invalid KLO_OUTPUT/); + }); + + it('returns plain when CI is set to a truthy value', () => { + expect(resolveOutputMode({ io: ioWith(true), env: { CI: 'true' } })).toBe('plain'); + expect(resolveOutputMode({ io: ioWith(true), env: { CI: '1' } })).toBe('plain'); + }); + + it('ignores CI when set to a falsy value', () => { + expect(resolveOutputMode({ io: ioWith(true), env: { CI: '' } })).toBe('pretty'); + expect(resolveOutputMode({ io: ioWith(true), env: { CI: '0' } })).toBe('pretty'); + expect(resolveOutputMode({ io: ioWith(true), env: { CI: 'false' } })).toBe('pretty'); + }); + + it('returns pretty when stdout is a TTY and CI is not set', () => { + expect(resolveOutputMode({ io: ioWith(true), env: {} })).toBe('pretty'); + }); + + it('returns plain when stdout is not a TTY', () => { + expect(resolveOutputMode({ io: ioWith(false), env: {} })).toBe('plain'); + expect(resolveOutputMode({ io: ioWith(undefined), env: {} })).toBe('plain'); + }); + + it('explicit value beats KLO_OUTPUT env var', () => { + expect(resolveOutputMode({ explicit: 'json', io: ioWith(true), env: { KLO_OUTPUT: 'plain' } })).toBe('json'); + }); +}); diff --git a/packages/cli/src/io/mode.ts b/packages/cli/src/io/mode.ts new file mode 100644 index 00000000..e25e32a0 --- /dev/null +++ b/packages/cli/src/io/mode.ts @@ -0,0 +1,40 @@ +import type { KloCliIo } from '../cli-runtime.js'; + +export type KloOutputMode = 'pretty' | 'plain' | 'json'; + +const MODES: ReadonlySet = new Set(['pretty', 'plain', 'json']); + +export interface ResolveOutputModeArgs { + explicit?: string; + json?: boolean; + io: KloCliIo; + env?: NodeJS.ProcessEnv; +} + +export function resolveOutputMode(args: ResolveOutputModeArgs): KloOutputMode { + if (args.json === true) { + return 'json'; + } + if (args.explicit !== undefined) { + if (!MODES.has(args.explicit)) { + throw new Error(`Invalid --output value: ${args.explicit}. Expected one of pretty, plain, json.`); + } + return args.explicit as KloOutputMode; + } + const env = args.env ?? process.env; + const envMode = env.KLO_OUTPUT; + if (envMode !== undefined && envMode !== '') { + if (!MODES.has(envMode)) { + throw new Error(`Invalid KLO_OUTPUT value: ${envMode}. Expected one of pretty, plain, json.`); + } + return envMode as KloOutputMode; + } + const ci = env.CI; + if (ci !== undefined && ci !== '' && ci !== '0' && ci !== 'false') { + return 'plain'; + } + if (args.io.stdout.isTTY === true) { + return 'pretty'; + } + return 'plain'; +} diff --git a/packages/cli/src/io/print-list.test.ts b/packages/cli/src/io/print-list.test.ts new file mode 100644 index 00000000..8d42d32a --- /dev/null +++ b/packages/cli/src/io/print-list.test.ts @@ -0,0 +1,171 @@ +import { describe, expect, it } from 'vitest'; +import type { KloCliIo } from '../cli-runtime.js'; +import { printList, type PrintListColumn } from './print-list.js'; +import { SYMBOLS } from './symbols.js'; + +function recorder(): { io: KloCliIo; out: () => string; err: () => string } { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { write: (chunk: string) => { stdout += chunk; } }, + stderr: { write: (chunk: string) => { stderr += chunk; } }, + }, + out: () => stdout, + err: () => stderr, + }; +} + +interface SlRow { + connectionId: string; + name: string; + columnCount: number; + measureCount: number; + joinCount: number; + description?: string; +} + +const SL_COLUMNS: ReadonlyArray> = [ + { key: 'connectionId', label: 'CONNECTION', plain: '' }, + { key: 'name', label: 'NAME', plain: '' }, + { key: 'columnCount', label: 'COLS', plain: 'columns=', dim: true }, + { key: 'measureCount', label: 'MEASURES', plain: 'measures=', dim: true }, + { key: 'joinCount', label: 'JOINS', plain: 'joins=', dim: true }, + { key: 'description', label: 'DESCRIPTION', plain: false, optional: true, dim: true }, +]; + +const ORDERS: SlRow = { connectionId: 'warehouse', name: 'orders', columnCount: 5, measureCount: 3, joinCount: 1 }; +const USERS: SlRow = { connectionId: 'warehouse', name: 'users', columnCount: 8, measureCount: 2, joinCount: 2, description: 'User profile + auth' }; + +describe('printList — plain mode', () => { + it('emits one tab-separated row per item, skipping plain:false columns', () => { + const r = recorder(); + printList({ + rows: [ORDERS, USERS], + columns: SL_COLUMNS, + mode: 'plain', + command: 'sl list', + emptyMessage: 'No sources', + io: r.io, + }); + expect(r.out()).toBe( + 'warehouse\torders\tcolumns=5\tmeasures=3\tjoins=1\n' + + 'warehouse\tusers\tcolumns=8\tmeasures=2\tjoins=2\n', + ); + }); + + it('emits nothing on empty list (preserves current sl list zero-row behavior)', () => { + const r = recorder(); + printList({ + rows: [], + columns: SL_COLUMNS, + mode: 'plain', + command: 'sl list', + emptyMessage: 'No sources', + io: r.io, + }); + expect(r.out()).toBe(''); + }); +}); + +describe('printList — json mode', () => { + it('emits the envelope with kind=list, data.items, and meta.command', () => { + const r = recorder(); + printList({ + rows: [ORDERS, USERS], + columns: SL_COLUMNS, + mode: 'json', + command: 'sl list', + emptyMessage: 'No sources', + io: r.io, + }); + const written = r.out(); + expect(written.endsWith('\n')).toBe(true); + const parsed = JSON.parse(written); + expect(parsed).toEqual({ + kind: 'list', + data: { items: [ORDERS, USERS] }, + meta: { command: 'sl list' }, + }); + }); + + it('emits an empty items array when no rows', () => { + const r = recorder(); + printList({ + rows: [], + columns: SL_COLUMNS, + mode: 'json', + command: 'sl list', + emptyMessage: 'No sources', + io: r.io, + }); + expect(JSON.parse(r.out())).toEqual({ + kind: 'list', + data: { items: [] }, + meta: { command: 'sl list' }, + }); + }); +}); + +function stripAnsi(s: string): string { + // Matches ESC [ ... m sequences emitted by node:util.styleText. + return s.replace(/\[[0-9;]*m/g, ''); +} + +describe('printList — pretty mode', () => { + it('renders a Clack-style header, grouped rows, and footer', () => { + const r = recorder(); + printList({ + rows: [ORDERS, USERS], + columns: SL_COLUMNS, + groupBy: 'connectionId', + mode: 'pretty', + command: 'sl list', + emptyMessage: 'No sources', + io: r.io, + }); + const out = stripAnsi(r.out()); + expect(out).toContain(`${SYMBOLS.barStart} sl list`); + expect(out).toContain(`${SYMBOLS.group} warehouse`); + expect(out).toContain('(2 sources)'); + expect(out).toMatch(new RegExp(`${escapeRegExp(SYMBOLS.item)} orders\\s+5 cols ${escapeRegExp(SYMBOLS.middot)} 3 measures ${escapeRegExp(SYMBOLS.middot)} 1 join\\b`)); + expect(out).toMatch(new RegExp(`${escapeRegExp(SYMBOLS.item)} users\\s+8 cols ${escapeRegExp(SYMBOLS.middot)} 2 measures ${escapeRegExp(SYMBOLS.middot)} 2 joins\\b`)); + expect(out).toContain(`${SYMBOLS.emDash} User profile + auth`); + expect(out).toContain(`${SYMBOLS.barEnd} 2 sources`); + }); + + it('renders an empty-state message when no rows', () => { + const r = recorder(); + printList({ + rows: [], + columns: SL_COLUMNS, + groupBy: 'connectionId', + mode: 'pretty', + command: 'sl list', + emptyMessage: 'No semantic-layer sources found in /tmp/proj', + io: r.io, + }); + const out = stripAnsi(r.out()); + expect(out).toContain(`${SYMBOLS.barStart} sl list`); + expect(out).toContain(`${SYMBOLS.barEnd} No semantic-layer sources found in /tmp/proj`); + }); + + it('singularizes the footer when there is one row', () => { + const r = recorder(); + printList({ + rows: [ORDERS], + columns: SL_COLUMNS, + groupBy: 'connectionId', + mode: 'pretty', + command: 'sl list', + emptyMessage: 'No sources', + io: r.io, + }); + const out = stripAnsi(r.out()); + expect(out).toContain(`${SYMBOLS.barEnd} 1 source`); + }); +}); + +function escapeRegExp(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} diff --git a/packages/cli/src/io/print-list.ts b/packages/cli/src/io/print-list.ts new file mode 100644 index 00000000..f0b10f48 --- /dev/null +++ b/packages/cli/src/io/print-list.ts @@ -0,0 +1,164 @@ +import type { KloCliIo } from '../cli-runtime.js'; +import type { KloOutputMode } from './mode.js'; +import { bold, dim, SYMBOLS } from './symbols.js'; + +export interface PrintListColumn { + key: keyof Row & string; + label?: string; + /** + * Plain-mode rendering control. + * - `string` (including `''`): emit `${plain}${value}` as a tab-separated cell. + * - `false`: omit this column entirely in plain mode. + * - `undefined`: same as `''`. + */ + plain?: string | false; + /** Skip this column when the row's value is null / undefined / empty string. */ + optional?: boolean; + /** Pretty-mode hint: render this column dim. */ + dim?: boolean; +} + +export interface PrintListArgs { + rows: ReadonlyArray; + columns: ReadonlyArray>; + groupBy?: keyof Row & string; + emptyMessage: string; + command: string; + mode: KloOutputMode; + io: KloCliIo; +} + +export function printList(args: PrintListArgs): void { + switch (args.mode) { + case 'json': + printListJson(args); + return; + case 'plain': + printListPlain(args); + return; + case 'pretty': + printListPretty(args); + return; + } +} + +function isEmpty(value: unknown): boolean { + return value === undefined || value === null || value === ''; +} + +function printListPlain(args: PrintListArgs): void { + for (const row of args.rows) { + const cells: string[] = []; + for (const col of args.columns) { + if (col.plain === false) continue; + const value = row[col.key]; + if (col.optional && isEmpty(value)) continue; + const prefix = col.plain ?? ''; + cells.push(`${prefix}${value === undefined || value === null ? '' : String(value)}`); + } + args.io.stdout.write(`${cells.join('\t')}\n`); + } +} + +function printListJson(args: PrintListArgs): void { + const envelope = { + kind: 'list', + data: { items: args.rows }, + meta: { command: args.command }, + }; + args.io.stdout.write(`${JSON.stringify(envelope, null, 2)}\n`); +} + +function pluralize(count: number, singular: string): string { + return `${count} ${count === 1 ? singular : `${singular}s`}`; +} + +function metricCell(label: string, count: number): string { + // "5 cols", "3 measures", "1 join" / "2 joins" + // The label in PrintListColumn is uppercase; pretty mode lowercases it. + const word = label.toLowerCase(); + return `${count} ${count === 1 ? singularize(word) : word}`; +} + +function singularize(word: string): string { + if (word === 'joins') return 'join'; + if (word === 'measures') return 'measure'; + if (word === 'cols') return 'col'; + if (word.endsWith('s')) return word.slice(0, -1); + return word; +} + +function groupRows( + rows: ReadonlyArray, + key: keyof Row & string, +): Map { + const groups = new Map(); + for (const row of rows) { + const value = String(row[key] ?? ''); + const bucket = groups.get(value); + if (bucket) { + bucket.push(row); + } else { + groups.set(value, [row]); + } + } + return groups; +} + +function printListPretty(args: PrintListArgs): void { + const { io, command, rows, columns, groupBy, emptyMessage } = args; + + io.stdout.write(`${SYMBOLS.barStart} ${command}\n`); + io.stdout.write(`${SYMBOLS.bar}\n`); + + if (rows.length === 0) { + io.stdout.write(`${SYMBOLS.barEnd} ${emptyMessage}\n`); + return; + } + + // Identify role of each column. + // - First non-grouped, non-metric, non-optional column = "name" column (bolded) + // - Columns with a `plain` prefix = metric columns (rendered as "N word") + // - optional columns = trailing suffix (em-dash + value), only when value is present + const nameCol = columns.find( + (c) => c.key !== groupBy && !c.plain && !c.optional && c.plain !== false, + ); + const metricCols = columns.filter((c) => typeof c.plain === 'string' && c.plain.length > 0); + const optionalCols = columns.filter((c) => c.optional === true); + + const buckets = groupBy ? groupRows(rows, groupBy) : new Map([['', [...rows]]]); + + const nameWidth = nameCol + ? Math.max(...rows.map((r) => String(r[nameCol.key] ?? '').length)) + : 0; + + for (const [groupValue, groupRowList] of buckets) { + if (groupBy) { + io.stdout.write( + `${SYMBOLS.bar} ${SYMBOLS.group} ${bold(groupValue)} ${dim(`(${pluralize(groupRowList.length, 'source')})`)}\n`, + ); + } + for (const row of groupRowList) { + const segments: string[] = []; + if (nameCol) { + segments.push(String(row[nameCol.key] ?? '').padEnd(nameWidth)); + } + const metrics = metricCols + .map((c) => metricCell(c.label ?? c.key, Number(row[c.key] ?? 0))) + .join(` ${SYMBOLS.middot} `); + if (metrics.length > 0) segments.push(dim(metrics)); + const optionalSuffix = optionalCols + .map((c) => row[c.key]) + .filter((v) => !isEmpty(v)) + .map((v) => `${SYMBOLS.emDash} ${dim(String(v))}`) + .join(' '); + if (optionalSuffix.length > 0) segments.push(optionalSuffix); + + const indent = groupBy ? ' ' : ' '; + io.stdout.write(`${SYMBOLS.bar}${indent}${SYMBOLS.item} ${segments.join(' ')}\n`); + } + } + + io.stdout.write(`${SYMBOLS.bar}\n`); + io.stdout.write(`${SYMBOLS.barEnd} ${pluralize(rows.length, 'source')}\n`); +} diff --git a/packages/cli/src/io/symbols.ts b/packages/cli/src/io/symbols.ts new file mode 100644 index 00000000..8fa88aa4 --- /dev/null +++ b/packages/cli/src/io/symbols.ts @@ -0,0 +1,37 @@ +import { styleText } from 'node:util'; + +function detectUnicodeSupport(env: NodeJS.ProcessEnv = process.env): boolean { + if (process.platform !== 'win32') { + return env.TERM !== 'linux'; + } + return ( + Boolean(env.WT_SESSION) || + env.TERM_PROGRAM === 'vscode' || + env.TERM === 'xterm-256color' || + env.TERM === 'alacritty' + ); +} + +const unicode = detectUnicodeSupport(); + +export const SYMBOLS = { + bar: unicode ? '│' : '|', + barStart: unicode ? '◇' : 'o', + barEnd: unicode ? '└' : '—', + group: unicode ? '●' : '*', + item: unicode ? '◆' : '*', + middot: unicode ? '·' : '-', + emDash: unicode ? '—' : '--', +} as const; + +export function dim(text: string): string { + return styleText('dim', text); +} + +export function bold(text: string): string { + return styleText('bold', text); +} + +export function gray(text: string): string { + return styleText('gray', text); +} diff --git a/packages/cli/src/knowledge.test.ts b/packages/cli/src/knowledge.test.ts new file mode 100644 index 00000000..ed4ac587 --- /dev/null +++ b/packages/cli/src/knowledge.test.ts @@ -0,0 +1,95 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { initKloProject } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { runKloKnowledge } from './knowledge.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('runKloKnowledge', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-knowledge-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('writes, reads, lists, and searches knowledge pages', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + const writeIo = makeIo(); + await expect( + runKloKnowledge( + { + command: 'write', + projectDir, + key: 'metrics/revenue', + scope: 'GLOBAL', + userId: 'local', + summary: 'Revenue', + content: 'Revenue is paid order value.', + tags: ['finance'], + refs: [], + slRefs: ['orders'], + }, + writeIo.io, + ), + ).resolves.toBe(0); + expect(writeIo.stdout()).toContain('Wrote knowledge/global/metrics/revenue.md'); + + const readIo = makeIo(); + await expect( + runKloKnowledge({ command: 'read', projectDir, key: 'metrics/revenue', userId: 'local' }, readIo.io), + ).resolves.toBe(0); + expect(readIo.stdout()).toContain('# metrics/revenue'); + expect(readIo.stdout()).toContain('Revenue is paid order value.'); + + const listIo = makeIo(); + await expect(runKloKnowledge({ command: 'list', projectDir, userId: 'local' }, listIo.io)).resolves.toBe(0); + expect(listIo.stdout()).toContain('GLOBAL\tmetrics/revenue\tRevenue'); + + const searchIo = makeIo(); + await expect( + runKloKnowledge({ command: 'search', projectDir, query: 'paid order', userId: 'local' }, searchIo.io), + ).resolves.toBe(0); + expect(searchIo.stdout()).toContain('metrics/revenue'); + }); + + it('explains empty search results for a project without wiki pages', async () => { + const projectDir = join(tempDir, 'empty-project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + const searchIo = makeIo(); + await expect( + runKloKnowledge({ command: 'search', projectDir, query: 'revenue', userId: 'local' }, searchIo.io), + ).resolves.toBe(0); + + expect(searchIo.stdout()).toBe(''); + expect(searchIo.stderr()).toContain('No local wiki pages found'); + expect(searchIo.stderr()).toContain('klo wiki write'); + }); +}); diff --git a/packages/cli/src/knowledge.ts b/packages/cli/src/knowledge.ts new file mode 100644 index 00000000..ccbb09da --- /dev/null +++ b/packages/cli/src/knowledge.ts @@ -0,0 +1,90 @@ +import { loadKloProject } from '@klo/context/project'; +import { + type LocalKnowledgeScope, + listLocalKnowledgePages, + readLocalKnowledgePage, + searchLocalKnowledgePages, + writeLocalKnowledgePage, +} from '@klo/context/wiki'; + +export type KloKnowledgeArgs = + | { command: 'list'; projectDir: string; userId: string } + | { command: 'read'; projectDir: string; key: string; userId: string } + | { command: 'search'; projectDir: string; query: string; userId: string } + | { + command: 'write'; + projectDir: string; + key: string; + scope: LocalKnowledgeScope; + userId: string; + summary: string; + content: string; + tags: string[]; + refs: string[]; + slRefs: string[]; + }; + +interface KloKnowledgeIo { + stdout: { write(chunk: string): void }; + stderr: { write(chunk: string): void }; +} + +export async function runKloKnowledge(args: KloKnowledgeArgs, io: KloKnowledgeIo = process): Promise { + try { + const project = await loadKloProject({ projectDir: args.projectDir }); + if (args.command === 'list') { + const pages = await listLocalKnowledgePages(project, { userId: args.userId }); + for (const page of pages) { + io.stdout.write(`${page.scope}\t${page.key}\t${page.summary}\n`); + } + return 0; + } + if (args.command === 'read') { + const page = await readLocalKnowledgePage(project, { key: args.key, userId: args.userId }); + if (!page) { + throw new Error(`Knowledge page "${args.key}" was not found`); + } + io.stdout.write(`# ${page.key}\n\n`); + io.stdout.write(`Scope: ${page.scope}\n`); + io.stdout.write(`Summary: ${page.summary}\n\n`); + io.stdout.write(`${page.content}\n`); + return 0; + } + if (args.command === 'search') { + const results = await searchLocalKnowledgePages(project, { query: args.query, userId: args.userId }); + if (results.length === 0) { + const pages = await listLocalKnowledgePages(project, { userId: args.userId }); + if (pages.length === 0) { + io.stderr.write( + `No local wiki pages found in ${project.projectDir}. Create one with \`klo wiki write --summary --content \` or run ingest.\n`, + ); + } else { + io.stderr.write( + `No local wiki pages matched "${args.query}". Run \`klo wiki list\` to inspect available pages.\n`, + ); + } + return 0; + } + for (const result of results) { + io.stdout.write(`${result.score}\t${result.scope}\t${result.key}\t${result.summary}\n`); + } + return 0; + } + + const write = await writeLocalKnowledgePage(project, { + key: args.key, + scope: args.scope, + userId: args.userId, + summary: args.summary, + content: args.content, + tags: args.tags, + refs: args.refs, + slRefs: args.slRefs, + }); + io.stdout.write(`Wrote ${write.path}\n`); + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/local-adapters.ts b/packages/cli/src/local-adapters.ts new file mode 100644 index 00000000..3f6b08ec --- /dev/null +++ b/packages/cli/src/local-adapters.ts @@ -0,0 +1,173 @@ +import { join } from 'node:path'; +import { createBigQueryLiveDatabaseIntrospection, isKloBigQueryConnectionConfig } from '@klo/connector-bigquery'; +import { createClickHouseLiveDatabaseIntrospection, isKloClickHouseConnectionConfig } from '@klo/connector-clickhouse'; +import { createMysqlLiveDatabaseIntrospection, isKloMysqlConnectionConfig } from '@klo/connector-mysql'; +import { + createPostgresLiveDatabaseIntrospection, + isKloPostgresConnectionConfig, + type KloPostgresConnectionConfig, + KloPostgresHistoricSqlQueryClient, +} from '@klo/connector-postgres'; +import { createSqliteLiveDatabaseIntrospection, isKloSqliteConnectionConfig } from '@klo/connector-sqlite'; +import { createSqlServerLiveDatabaseIntrospection, isKloSqlServerConnectionConfig } from '@klo/connector-sqlserver'; +import { + createDaemonLiveDatabaseIntrospection, + createDefaultLocalIngestAdapters, + type DefaultLocalIngestAdaptersOptions, + type LiveDatabaseIntrospectionPort, + LiveDatabaseSourceAdapter, + type SourceAdapter, +} from '@klo/context/ingest'; +import type { KloLocalProject } from '@klo/context/project'; +import { createHttpSqlAnalysisPort } from '@klo/context/sql-analysis'; + +function hasSnowflakeDriver(connection: unknown): boolean { + return ( + typeof connection === 'object' && + connection !== null && + String((connection as { driver?: unknown }).driver ?? '').toLowerCase() === 'snowflake' + ); +} + +function createKloCliLiveDatabaseIntrospection( + project: KloLocalProject, + options: DefaultLocalIngestAdaptersOptions = {}, +): LiveDatabaseIntrospectionPort { + const daemon = createDaemonLiveDatabaseIntrospection({ + connections: project.config.connections, + ...options.databaseIntrospection, + ...(options.databaseIntrospectionUrl ? { baseUrl: options.databaseIntrospectionUrl } : {}), + }); + const sqlite = createSqliteLiveDatabaseIntrospection({ + projectDir: project.projectDir, + connections: project.config.connections, + }); + const mysql = createMysqlLiveDatabaseIntrospection({ + connections: project.config.connections, + }); + const postgres = createPostgresLiveDatabaseIntrospection({ + connections: project.config.connections, + }); + const clickhouse = createClickHouseLiveDatabaseIntrospection({ + connections: project.config.connections, + }); + const sqlserver = createSqlServerLiveDatabaseIntrospection({ + connections: project.config.connections, + }); + const bigquery = createBigQueryLiveDatabaseIntrospection({ + connections: project.config.connections, + }); + return { + async extractSchema(connectionId: string) { + const connection = project.config.connections[connectionId]; + if (isKloPostgresConnectionConfig(connection)) { + return postgres.extractSchema(connectionId); + } + if (isKloSqliteConnectionConfig(connection)) { + return sqlite.extractSchema(connectionId); + } + if (isKloMysqlConnectionConfig(connection)) { + return mysql.extractSchema(connectionId); + } + if (isKloClickHouseConnectionConfig(connection)) { + return clickhouse.extractSchema(connectionId); + } + if (isKloSqlServerConnectionConfig(connection)) { + return sqlserver.extractSchema(connectionId); + } + if (isKloBigQueryConnectionConfig(connection)) { + return bigquery.extractSchema(connectionId); + } + if (hasSnowflakeDriver(connection)) { + const { createSnowflakeLiveDatabaseIntrospection, isKloSnowflakeConnectionConfig } = await import( + '@klo/connector-snowflake' + ); + if (!isKloSnowflakeConnectionConfig(connection)) { + return daemon.extractSchema(connectionId); + } + const snowflake = createSnowflakeLiveDatabaseIntrospection({ + connections: project.config.connections, + }); + return snowflake.extractSchema(connectionId); + } + return daemon.extractSchema(connectionId); + }, + }; +} + +interface KloCliLocalIngestAdaptersOptions extends DefaultLocalIngestAdaptersOptions { + historicSqlConnectionId?: string; + sqlAnalysisUrl?: string; +} + +function isEnabledPostgresHistoricSqlConnection(connection: KloPostgresConnectionConfig | undefined): boolean { + if (!connection || !isKloPostgresConnectionConfig(connection)) { + return false; + } + const historicSql = + typeof connection.historicSql === 'object' && + connection.historicSql !== null && + !Array.isArray(connection.historicSql) + ? (connection.historicSql as Record) + : null; + return historicSql?.enabled === true && historicSql.dialect === 'postgres'; +} + +function createEphemeralPostgresHistoricSqlClient(project: KloLocalProject, connectionId: string) { + const connection = project.config.connections[connectionId] as KloPostgresConnectionConfig | undefined; + if (!isKloPostgresConnectionConfig(connection)) { + throw new Error( + `Historic SQL local ingest requires a Postgres connection, got ${String(connection?.driver ?? 'unknown')}`, + ); + } + return { + async executeQuery(sql: string, params?: unknown[]) { + const client = new KloPostgresHistoricSqlQueryClient({ + connectionId, + connection, + }); + try { + return await client.executeQuery(sql, params); + } finally { + await client.cleanup(); + } + }, + }; +} + +function historicSqlOptionsForLocalRun(project: KloLocalProject, options: KloCliLocalIngestAdaptersOptions) { + const connectionId = options.historicSqlConnectionId; + if (!connectionId) { + return undefined; + } + const connection = project.config.connections[connectionId] as KloPostgresConnectionConfig | undefined; + if (!isEnabledPostgresHistoricSqlConnection(connection)) { + return undefined; + } + return { + sqlAnalysis: createHttpSqlAnalysisPort({ + baseUrl: + options.sqlAnalysisUrl ?? + process.env.KLO_SQL_ANALYSIS_URL ?? + process.env.KLO_DAEMON_URL ?? + 'http://127.0.0.1:8765', + }), + postgresQueryClient: createEphemeralPostgresHistoricSqlClient(project, connectionId), + postgresBaselineRootDir: join(project.projectDir, '.klo/cache/historic-sql'), + }; +} + +export function createKloCliLocalIngestAdapters( + project: KloLocalProject, + options: KloCliLocalIngestAdaptersOptions = {}, +): SourceAdapter[] { + const historicSql = historicSqlOptionsForLocalRun(project, options); + const base = createDefaultLocalIngestAdapters(project, { + ...options, + ...(historicSql ? { historicSql } : {}), + }); + const liveDatabase = new LiveDatabaseSourceAdapter({ + introspection: createKloCliLiveDatabaseIntrospection(project, options), + }); + return base.map((adapter) => (adapter.source === 'live-database' ? liveDatabase : adapter)); +} diff --git a/packages/cli/src/local-scan-connectors.test.ts b/packages/cli/src/local-scan-connectors.test.ts new file mode 100644 index 00000000..638ad3e9 --- /dev/null +++ b/packages/cli/src/local-scan-connectors.test.ts @@ -0,0 +1,163 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { initKloProject, loadKloProject } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { createKloCliScanConnector } from './local-scan-connectors.js'; + +const bigQueryMock = vi.hoisted(() => ({ + constructorInputs: [] as Array<{ + connectionId: string; + connection: unknown; + maxBytesBilled?: number | string; + }>, +})); + +vi.mock('@klo/connector-bigquery', () => ({ + isKloBigQueryConnectionConfig: (connection: { driver?: unknown } | undefined) => + String(connection?.driver ?? '').toLowerCase() === 'bigquery', + KloBigQueryScanConnector: class { + readonly id: string; + readonly driver = 'bigquery'; + + constructor(options: { connectionId: string; connection: unknown; maxBytesBilled?: number | string }) { + bigQueryMock.constructorInputs.push(options); + this.id = `bigquery:${options.connectionId}`; + } + }, +})); + +describe('createKloCliScanConnector', () => { + let tempDir: string; + + beforeEach(async () => { + bigQueryMock.constructorInputs.length = 0; + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-scan-connector-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('creates a native sqlite connector from standalone config', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const project = await loadKloProject({ projectDir: tempDir }); + + const connector = await createKloCliScanConnector(project, 'warehouse'); + + expect(connector.id).toBe('sqlite:warehouse'); + expect(connector.driver).toBe('sqlite'); + }); + + it.each([ + ['maxBytesBilled', ' maxBytesBilled: 123456789', 123456789], + ['max_bytes_billed', ' max_bytes_billed: "987654321"', '987654321'], + ])('passes BigQuery %s from standalone config', async (_label, byteCapLine, expectedMaxBytesBilled) => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: bigquery', + ' dataset_id: analytics', + ' readonly: true', + byteCapLine, + '', + ].join('\n'), + 'utf-8', + ); + const project = await loadKloProject({ projectDir: tempDir }); + + const connector = await createKloCliScanConnector(project, 'warehouse'); + + expect(connector.id).toBe('bigquery:warehouse'); + expect(connector.driver).toBe('bigquery'); + expect(bigQueryMock.constructorInputs).toEqual([ + expect.objectContaining({ + connectionId: 'warehouse', + maxBytesBilled: expectedMaxBytesBilled, + }), + ]); + }); + + it('does not create a standalone PostHog scan connector', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' product:', + ' driver: posthog', + ' api_key: phx_test', + ' project_id: "157881"', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const project = await loadKloProject({ projectDir: tempDir }); + + await expect(createKloCliScanConnector(project, 'product')).rejects.toThrow( + 'Connection "product" uses driver "posthog", which has no native standalone KLO scan connector', + ); + }); + + it('throws for structural daemon-only fallback configs', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: duckdb', + ' path: warehouse.duckdb', + '', + ].join('\n'), + 'utf-8', + ); + const project = await loadKloProject({ projectDir: tempDir }); + + await expect(createKloCliScanConnector(project, 'warehouse')).rejects.toThrow( + 'Connection "warehouse" uses driver "duckdb", which has no native standalone KLO scan connector', + ); + }); + + it('throws a clear error when the connection block has no driver field', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' type: postgres', + ' url: postgresql://example/db', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const project = await loadKloProject({ projectDir: tempDir }); + + await expect(createKloCliScanConnector(project, 'warehouse')).rejects.toThrow( + 'Connection "warehouse" has no `driver` field in klo.yaml', + ); + }); +}); diff --git a/packages/cli/src/local-scan-connectors.ts b/packages/cli/src/local-scan-connectors.ts new file mode 100644 index 00000000..ca9c37f7 --- /dev/null +++ b/packages/cli/src/local-scan-connectors.ts @@ -0,0 +1,84 @@ +import type { KloLocalProject } from '@klo/context/project'; +import type { KloScanConnector } from '@klo/context/scan'; + +const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigquery, snowflake'; + +function bigQueryMaxBytesBilled( + connection: KloLocalProject['config']['connections'][string], +): number | string | undefined { + const raw = connection.maxBytesBilled ?? connection.max_bytes_billed; + if (typeof raw === 'number') { + return Number.isFinite(raw) && raw > 0 ? raw : undefined; + } + if (typeof raw === 'string') { + const trimmed = raw.trim(); + return trimmed.length > 0 ? trimmed : undefined; + } + return undefined; +} + +export async function createKloCliScanConnector( + project: KloLocalProject, + connectionId: string, +): Promise { + const connection = project.config.connections[connectionId]; + if (!connection) { + throw new Error(`Connection "${connectionId}" is not configured in klo.yaml`); + } + const driver = String(connection.driver ?? '').toLowerCase(); + if (!driver) { + throw new Error( + `Connection "${connectionId}" has no \`driver\` field in klo.yaml. Supported drivers: ${SUPPORTED_DRIVERS}.`, + ); + } + if (driver === 'sqlite' || driver === 'sqlite3') { + const { KloSqliteScanConnector, isKloSqliteConnectionConfig } = await import('@klo/connector-sqlite'); + if (isKloSqliteConnectionConfig(connection)) { + return new KloSqliteScanConnector({ connectionId, connection, projectDir: project.projectDir }); + } + } + if (driver === 'postgres' || driver === 'postgresql') { + const { KloPostgresScanConnector, isKloPostgresConnectionConfig } = await import('@klo/connector-postgres'); + if (isKloPostgresConnectionConfig(connection)) { + return new KloPostgresScanConnector({ connectionId, connection }); + } + } + if (driver === 'mysql') { + const { KloMysqlScanConnector, isKloMysqlConnectionConfig } = await import('@klo/connector-mysql'); + if (isKloMysqlConnectionConfig(connection)) { + return new KloMysqlScanConnector({ connectionId, connection }); + } + } + if (driver === 'clickhouse') { + const { KloClickHouseScanConnector, isKloClickHouseConnectionConfig } = await import('@klo/connector-clickhouse'); + if (isKloClickHouseConnectionConfig(connection)) { + return new KloClickHouseScanConnector({ connectionId, connection }); + } + } + if (driver === 'sqlserver') { + const { KloSqlServerScanConnector, isKloSqlServerConnectionConfig } = await import('@klo/connector-sqlserver'); + if (isKloSqlServerConnectionConfig(connection)) { + return new KloSqlServerScanConnector({ connectionId, connection }); + } + } + if (driver === 'bigquery') { + const { KloBigQueryScanConnector, isKloBigQueryConnectionConfig } = await import('@klo/connector-bigquery'); + if (isKloBigQueryConnectionConfig(connection)) { + const maxBytesBilled = bigQueryMaxBytesBilled(connection); + return new KloBigQueryScanConnector({ + connectionId, + connection, + ...(maxBytesBilled !== undefined ? { maxBytesBilled } : {}), + }); + } + } + if (driver === 'snowflake') { + const { KloSnowflakeScanConnector, isKloSnowflakeConnectionConfig } = await import('@klo/connector-snowflake'); + if (isKloSnowflakeConnectionConfig(connection)) { + return new KloSnowflakeScanConnector({ connectionId, connection }); + } + } + throw new Error( + `Connection "${connectionId}" uses driver "${driver}", which has no native standalone KLO scan connector. Supported drivers: ${SUPPORTED_DRIVERS}.`, + ); +} diff --git a/packages/cli/src/memory-flow-hud.tsx b/packages/cli/src/memory-flow-hud.tsx new file mode 100644 index 00000000..29b29170 --- /dev/null +++ b/packages/cli/src/memory-flow-hud.tsx @@ -0,0 +1,597 @@ +/* @jsxImportSource react */ +import type { MemoryFlowEvent, MemoryFlowReplayInput } from '@klo/context/ingest/memory-flow'; +import { Box, Text } from 'ink'; +import React, { type ReactNode } from 'react'; +import { buildDemoMetrics, formatCost, formatDuration } from './demo-metrics.js'; +import { formatNextStepLines } from './next-steps.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:memory-flow-hud'); + +interface HudTheme { + text: string; + muted: string; + active: string; + complete: string; + warning: string; + failed: string; + border: string; +} + +const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] as const; + +function spinner(frame: number): string { + return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'; +} + +function counterValue(target: number, frame: number, framesToFill = 12): number { + if (target <= 0 || frame <= 0) return 0; + if (frame >= framesToFill) return target; + return Math.round((frame / framesToFill) * target); +} + +function hasWorkStarted(input: MemoryFlowReplayInput): boolean { + return input.events.some((e) => e.type === 'work_unit_started'); +} + +function isPrepopulatedDemoReplay(input: MemoryFlowReplayInput): boolean { + return input.metadata?.origin === 'packaged' || input.metadata?.timing === 'prebuilt'; +} + +function flowLine(width: number, frame: number, active: boolean): string { + if (!active) return '━'.repeat(width); + const pulse = ['░', '▒', '▓', '█', '█', '█', '▓', '▒', '░']; + const pw = pulse.length; + const chars: string[] = []; + const offset = (frame * 2) % (width + pw); + for (let i = 0; i < width; i += 1) { + const p = i - offset + pw; + chars.push(p >= 0 && p < pw ? (pulse[p] ?? '━') : '━'); + } + return chars.join(''); +} + +function brailleFlow(width: number, frame: number): string { + // Braille unicode: U+2800 + dot bitmask + // Dots: 1=0x01 2=0x02 3=0x04 4=0x08 5=0x10 6=0x20 7=0x40 8=0x80 + // Layout: col0=[1,2,3,7] col1=[4,5,6,8] + const chars: string[] = []; + for (let i = 0; i < width; i += 1) { + const density = (i + 1) / width; + const phase = (i * 3 + frame * 2) % 12; + let dots = 0; + + // Sparse diagonal streams on the left, dense on the right + // Each "stream" is a diagonal line of dots moving rightward + if ((phase + 0) % 4 < density * 4) dots |= 0x01; // dot 1 + if ((phase + 1) % 5 < density * 4) dots |= 0x08; // dot 4 + if ((phase + 2) % 4 < density * 3) dots |= 0x02; // dot 2 + if ((phase + 3) % 5 < density * 3) dots |= 0x10; // dot 5 + if ((phase + 4) % 4 < density * 2.5) dots |= 0x04; // dot 3 + if ((phase + 5) % 5 < density * 2.5) dots |= 0x20; // dot 6 + if ((phase + 1) % 6 < density * 2) dots |= 0x40; // dot 7 + if ((phase + 3) % 6 < density * 2) dots |= 0x80; // dot 8 + + chars.push(String.fromCharCode(0x2800 + dots)); + } + return chars.join(''); +} + +function progressBarOverall( + finishedCount: number, + activeCount: number, + totalCount: number, + width: number, + frame: number, +): string { + if (totalCount === 0) return '░'.repeat(width); + + const finishedWidth = Math.round((finishedCount / totalCount) * width); + const activeWidth = Math.max(activeCount > 0 ? 1 : 0, Math.round((activeCount / totalCount) * width)); + const queuedWidth = Math.max(0, width - finishedWidth - activeWidth); + + const finished = '█'.repeat(finishedWidth); + + const pulse = ['░', '▒', '▓', '█', '▓', '▒']; + const pulseLen = pulse.length; + const offset = (frame * 2) % (activeWidth + pulseLen); + const activeChars: string[] = []; + for (let i = 0; i < activeWidth; i += 1) { + const p = i - offset + pulseLen; + activeChars.push(p >= 0 && p < pulseLen ? (pulse[p] ?? '▒') : '▒'); + } + + return finished + activeChars.join('') + '░'.repeat(queuedWidth); +} + +function sparkleWipe(width: number, frame: number, row: number): string { + const chars: string[] = []; + const sweepPos = (frame * 2 + row * 6) % (width + 8); + const sparkles = ['✨', '✦', '✧', '·']; + for (let i = 0; i < width; i += 1) { + const dist = i - sweepPos; + if (dist < -6) { + const t = (i * 11 + row * 5 + frame * 3) % 10; + chars.push(t === 0 ? sparkles[0]! : t === 3 ? sparkles[1]! : t === 7 ? sparkles[2]! : ' '); + } else if (dist < -3) { + const t = (i + frame) % 3; + chars.push(t === 0 ? sparkles[1]! : t === 1 ? sparkles[2]! : sparkles[3]!); + } else if (dist <= 0) { + const gradient = ['░', '▒', '▓', '█']; + chars.push(gradient[Math.min(3, dist + 3)] ?? '█'); + } else if (dist <= 2) { + chars.push(dist === 1 ? '▓' : '▒'); + } else { + const noise = (i * 31 + row * 17 + frame * 3) % 5; + const messy = ['░', '▒', '▓', '▒', '░']; + chars.push(messy[noise] ?? '▒'); + } + } + return chars.join(''); +} + +function activityWave(width: number, frame: number, offset: number): string { + const heights = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█']; + const chars: string[] = []; + for (let i = 0; i < width; i += 1) { + const wave = Math.sin(((i * 2 + frame + offset * 5) * Math.PI) / 6); + const idx = Math.round(((wave + 1) / 2) * (heights.length - 1)); + chars.push(heights[idx] ?? '▁'); + } + return chars.join(''); +} + +function topicName(key: string): string { + return (key.split('/').pop()?.replace(/\.md$/, '') ?? key).replace(/[_-]/g, ' '); +} + +function tableName(key: string): string { + return key.split('.').pop()?.replace(/[_-]/g, ' ') ?? key; +} + +function humanizeInsight(key: string, target: 'sl' | 'wiki', summary: string | undefined): string { + if (summary) return summary; + const name = target === 'sl' ? tableName(key) : topicName(key); + return target === 'sl' ? `Query definition: ${name}` : `Knowledge page: ${name}`; +} + +const ADAPTER_PREFIXES = ['live_database_', 'metabase_', 'looker_', 'lookml_', 'metricflow_', 'notion_', 'historic_sql_', 'dbt_descriptions_']; +const INTERNAL_DEMO_CONNECTION_ID = 'orbit_demo'; +const PUBLIC_DEMO_SOURCE_LABEL = 'Orbit Demo'; + +function humanizeUnitKey(unitKey: string): string { + let key = unitKey.replace(/-/g, '_'); + for (const prefix of ADAPTER_PREFIXES) { + if (key.startsWith(prefix)) { key = key.slice(prefix.length); break; } + } + return key.replace(/_/g, ' '); +} + +interface SourceInfo { + type: string; + name: string; + sourceCount: string; + itemNounPlural: string; + readingVerb: string; + ingestDescription: string; +} + +const ADAPTER_LABELS: Record = { + 'live-database': { type: 'Database', plural: 'tables', verb: 'Reading', description: 'Reading table schemas, understanding relationships, creating query definitions' }, + metricflow: { type: 'dbt project', plural: 'models', verb: 'Parsing', description: 'Parsing dbt models, extracting metric definitions, mapping dependencies' }, + looker: { type: 'Looker', plural: 'explores', verb: 'Analyzing', description: 'Analyzing explores, extracting dimensions and measures, mapping joins' }, + lookml: { type: 'LookML', plural: 'views', verb: 'Parsing', description: 'Parsing LookML views, extracting field definitions, mapping relationships' }, + metabase: { type: 'Metabase', plural: 'questions', verb: 'Analyzing', description: 'Analyzing saved questions, extracting query patterns, understanding dashboards' }, + notion: { type: 'Notion', plural: 'pages', verb: 'Reading', description: 'Reading pages, extracting structure, understanding your documentation' }, + 'historic-sql': { type: 'SQL history', plural: 'queries', verb: 'Analyzing', description: 'Analyzing query patterns, identifying common joins, learning access patterns' }, + 'dbt-descriptions': { type: 'dbt schema', plural: 'models', verb: 'Parsing', description: 'Parsing schema definitions, extracting descriptions, mapping lineage' }, + dbt_descriptions: { type: 'dbt', plural: 'models', verb: 'Parsing', description: 'Parsing schema definitions, extracting descriptions, mapping lineage' }, +}; + +function sourceDescription(input: MemoryFlowReplayInput): SourceInfo { + const adapter = input.adapter ?? 'source'; + const conn = input.connectionId ?? ''; + const sourceEvents = input.events.filter((e) => e.type === 'source_acquired') as Array<{ type: 'source_acquired'; adapter: string; fileCount: number }>; + const isDemoSource = conn === INTERNAL_DEMO_CONNECTION_ID || isPrepopulatedDemoReplay(input); + + if (isDemoSource && sourceEvents.length <= 1) { + const count = sourceEvents[0] ? String(sourceEvents[0].fileCount) : '?'; + return { + type: PUBLIC_DEMO_SOURCE_LABEL, + name: '', + sourceCount: count, + itemNounPlural: 'sources', + readingVerb: 'Ingesting', + ingestDescription: 'Ingesting warehouse, dbt, BI, and docs into a unified context layer', + }; + } + + if (sourceEvents.length > 1) { + const totalFiles = sourceEvents.reduce((sum, s) => sum + s.fileCount, 0); + const labels = [...new Set(sourceEvents.map((s) => ADAPTER_LABELS[s.adapter]?.type ?? s.adapter))]; + return { + type: labels.join(' + '), + name: conn, + sourceCount: String(totalFiles), + itemNounPlural: 'sources', + readingVerb: 'Ingesting', + ingestDescription: 'Ingesting warehouse, dbt, BI, and docs into a unified context layer', + }; + } + + const count = sourceEvents[0] ? String(sourceEvents[0].fileCount) : '?'; + const info = ADAPTER_LABELS[adapter] ?? { type: adapter, plural: 'sources', verb: 'Reading', description: 'Reading sources, understanding structure, creating definitions' }; + return { type: info.type, name: conn, sourceCount: count, itemNounPlural: info.plural, readingVerb: info.verb, ingestDescription: info.description }; +} + +function activeWorkUnit( + input: MemoryFlowReplayInput, +): { unitKey: string; stepIndex: number; stepBudget: number } | null { + const units = activeWorkUnits(input); + return units.at(-1) ?? null; +} + +function activeWorkUnits( + input: MemoryFlowReplayInput, +): Array<{ unitKey: string; stepIndex: number; stepBudget: number }> { + const finishedKeys = new Set(); + const unitMap = new Map(); + + for (const e of input.events) { + if (e.type === 'work_unit_started') { + unitMap.set(e.unitKey, { stepIndex: 0, stepBudget: e.stepBudget }); + } + if (e.type === 'work_unit_step') { + const existing = unitMap.get(e.unitKey); + if (existing) { + existing.stepIndex = e.stepIndex; + existing.stepBudget = e.stepBudget; + } + } + if (e.type === 'work_unit_finished') finishedKeys.add(e.unitKey); + } + + const result: Array<{ unitKey: string; stepIndex: number; stepBudget: number }> = []; + for (const [unitKey, data] of unitMap) { + if (!finishedKeys.has(unitKey)) result.push({ unitKey, ...data }); + } + return result; +} + +function queuedWorkUnits(input: MemoryFlowReplayInput): string[] { + const startedKeys = new Set(); + for (const e of input.events) { + if (e.type === 'work_unit_started') startedKeys.add(e.unitKey); + } + return input.plannedWorkUnits.filter((u) => !startedKeys.has(u.unitKey)).map((u) => u.unitKey); +} + +interface Insight { + icon: string; + text: string; + unitKey: string; + hasSummary: boolean; +} + +function buildInsights(input: MemoryFlowReplayInput): Insight[] { + return input.events + .filter((e) => e.type === 'candidate_action') + .map((e) => { + const ca = e as { unitKey: string; target: 'sl' | 'wiki'; key: string }; + const detail = input.details.actions.find((a) => a.key === ca.key && a.unitKey === ca.unitKey); + return { + icon: ca.target === 'sl' ? '📊' : '📝', + text: humanizeInsight(ca.key, ca.target, detail?.summary), + unitKey: ca.unitKey, + hasSummary: !!detail?.summary, + }; + }); +} + +function finishedUnits(input: MemoryFlowReplayInput): Array<{ unitKey: string; artifactCount: number }> { + const units: Array<{ unitKey: string; artifactCount: number }> = []; + for (const e of input.events) { + if (e.type === 'work_unit_finished' && e.status === 'success') { + const count = input.events.filter((a) => a.type === 'candidate_action' && a.unitKey === e.unitKey).length; + units.push({ unitKey: e.unitKey, artifactCount: count }); + } + } + return units; +} + +function artifactCounts(input: MemoryFlowReplayInput): { sl: number; wiki: number } { + let sl = 0; + let wiki = 0; + for (const e of input.events) { + if (e.type === 'candidate_action') { + if (e.target === 'sl') sl++; + else wiki++; + } + } + return { sl, wiki }; +} + +function pad(str: string, width: number): string { + return str.length >= width ? str : str + ' '.repeat(width - str.length); +} + +const KLO_LOGO_SMALL = [ + '██╗ ██╗██╗ ██████╗ ', + '██║ ██╔╝██║ ██╔═══██╗', + '█████╔╝ ██║ ██║ ██║', + '██╔═██╗ ██║ ██║ ██║', + '██║ ██╗███████╗╚██████╔╝', + '╚═╝ ╚═╝╚══════╝ ╚═════╝ ', +] as const; + +export function Logo(props: { theme: HudTheme; done: boolean }): ReactNode { + const color = props.done ? props.theme.complete : props.theme.active; + return ( + + {KLO_LOGO_SMALL.map((line, idx) => ( + + {line} + + ))} + + ); +} + +export function Hud(props: { + input: MemoryFlowReplayInput; + theme: HudTheme; + frame: number; + width: number; + now?: () => number; +}): ReactNode { + const isRunning = props.input.status === 'running'; + const isDone = props.input.status === 'done'; + const isFlowing = isRunning && hasWorkStarted(props.input); + + const src = sourceDescription(props.input); + const counts = artifactCounts(props.input); + const metrics = buildDemoMetrics(props.input, props.now ? { now: props.now } : {}); + const workStarted = hasWorkStarted(props.input); + + const sourceEvents = props.input.events.filter((e) => e.type === 'source_acquired'); + const col1Content = sourceEvents.length > 1 || !src.name ? src.type : `${src.type} (${src.name})`; + + const innerWidth = Math.max(60, props.width - 6); + + const actives = activeWorkUnits(props.input); + const reconEvent = props.input.events.find((e) => e.type === 'reconciliation_finished'); + const allAnalyzed = isFlowing && actives.length === 0; + const isReconciling = allAnalyzed && !reconEvent && !isDone; + + const hLine = '─'.repeat(innerWidth); + + const elapsed = formatDuration(metrics.elapsedMs); + let eta = ''; + if (metrics.status === 'running' && metrics.etaMs !== null) eta = `~${formatDuration(metrics.etaMs)} left`; + else if (metrics.status !== 'running') eta = 'done'; + const cost = workStarted ? formatCost(metrics.estimatedCostUsd) : ''; + const statsParts = [`⏱ ${elapsed}`, eta, cost].filter(Boolean).join(' '); + const prepopulatedCostDisclaimer = + cost && isPrepopulatedDemoReplay(props.input) + ? 'Pre-run demo: $ shown is illustrative; no money is being spent now.' + : null; + + return ( + + ╭{hLine}╮ + + + {col1Content} + — {src.sourceCount} {src.itemNounPlural} + + + + {statsParts} + + {prepopulatedCostDisclaimer && ( + + + {prepopulatedCostDisclaimer} + + )} + ╰{hLine}╯ + + ); +} + +export function ActivityFeed(props: { + input: MemoryFlowReplayInput; + theme: HudTheme; + frame: number; + width: number; + completionFrame: number; + showCompletion: boolean; + holdComplete: boolean; +}): ReactNode { + const actives = activeWorkUnits(props.input); + const queued = queuedWorkUnits(props.input); + const finished = finishedUnits(props.input); + const insights = buildInsights(props.input); + const src = sourceDescription(props.input); + const isDone = props.input.status === 'done'; + const isError = props.input.status === 'error'; + + const diffEvent = props.input.events.find((e) => e.type === 'diff_computed') as + | (MemoryFlowEvent & { added: number; modified: number; deleted: number; unchanged: number }) + | undefined; + const planEvent = props.input.events.find((e) => e.type === 'chunks_planned') as + | (MemoryFlowEvent & { chunkCount: number; workUnitCount: number }) + | undefined; + const reconEvent = props.input.events.find((e) => e.type === 'reconciliation_finished') as + | (MemoryFlowEvent & { conflictCount: number }) + | undefined; + const savedEvent = props.input.events.find((e) => e.type === 'saved'); + + const workStarted = hasWorkStarted(props.input); + const totalChunks = planEvent?.chunkCount ?? 0; + const finishedWithArtifacts = finished.filter((u) => u.artifactCount > 0); + const finishedAreas = totalChunks > 0 ? Math.min(finished.length, totalChunks) : finished.length; + const allWorkDone = workStarted && actives.length === 0 && queued.length === 0; + const isReconciling = allWorkDone && !reconEvent && !isDone && !isError; + const isSaving = reconEvent && !savedEvent && !isDone && !isError; + + const isIncremental = diffEvent && (diffEvent.modified > 0 || diffEvent.deleted > 0 || diffEvent.unchanged > 0); + + const barWidth = Math.min(40, props.width - 20); + + return ( + + {/* Phase 1: Connecting */} + {!diffEvent && !workStarted && ( + + {spinner(props.frame)} Connecting to {src.type.toLowerCase()}... + + )} + + {/* Phase 2: Connected */} + {diffEvent && ( + + ✓ Connected — found {src.sourceCount} {src.itemNounPlural} to ingest + + )} + + {/* Phase 2b: Diff (incremental runs only) */} + {diffEvent && isIncremental && ( + + ✓ Compared with last sync — only re-analyzing what changed + + )} + + {/* Phase 3: Planning */} + {diffEvent && !planEvent && !workStarted && ( + + {spinner(props.frame)} Grouping related {src.itemNounPlural} together for deeper analysis... + + )} + {planEvent && ( + + ✓ Grouped into {planEvent.chunkCount} business area{planEvent.chunkCount === 1 ? '' : 's'} + + )} + + {/* Phase 4: Ingesting */} + {workStarted && !allWorkDone && ( + + + {spinner(props.frame)} Ingesting — {finishedAreas}/{totalChunks || '?'} business area{totalChunks === 1 ? '' : 's'} done + + + {' '}{src.ingestDescription} + + {totalChunks > 0 && ( + + {' '} + {progressBarOverall(finishedAreas, actives.length, totalChunks, barWidth, props.frame)} + + )} + + )} + + {/* Results — what KLO has created */} + {insights.length > 0 && ( + + Created so far: + {insights.map((insight, idx) => ( + + {' '}{insight.icon} {insight.text} + + ))} + + )} + + {/* Phase 5: Finalizing */} + {isReconciling && ( + + {spinner(props.frame)} Deduplicating — removing overlaps between business areas and checking for conflicts... + + )} + {reconEvent && ( + + ✓ Deduplicated + {reconEvent.conflictCount > 0 + ? ` — ${reconEvent.conflictCount} conflict${reconEvent.conflictCount === 1 ? '' : 's'} resolved` + : ' — no conflicts'} + + )} + + {/* Phase 6: Saving */} + {isSaving && ( + {spinner(props.frame)} Saving to context layer... + )} + {savedEvent && ( + ✓ Saved — your agents can now use the KLO context layer + )} + + {/* Phase 7: Completion */} + {props.showCompletion && (isDone || isError) && ( + + )} + + ); +} + +function CompletionSummary(props: { + input: MemoryFlowReplayInput; + theme: HudTheme; + frame: number; + holdComplete: boolean; +}): ReactNode { + const saved = [...props.input.events].reverse().find((e) => e.type === 'saved'); + const wikiCount = saved?.wikiCount ?? 0; + const slCount = saved?.slCount ?? 0; + const isError = props.input.status === 'error'; + + const sl = counterValue(slCount, props.frame); + const wiki = counterValue(wikiCount, props.frame); + + return ( + + {isError ? ( + + ✗ Something went wrong — review the errors above. + + ) : ( + <> + {'─'.repeat(60)} + + ★ KLO finished ingesting your data + + {(sl > 0 || wiki > 0) && ( + <> + + KLO created: + {sl > 0 && ( + + {' '}📊 {sl} query definition{sl === 1 ? '' : 's'} — so agents can write accurate SQL for your data + + )} + {wiki > 0 && ( + + {' '}📝 {wiki} knowledge page{wiki === 1 ? '' : 's'} — so agents understand your business context + + )} + + )} + + What to do next: + {formatNextStepLines().map((line) => ( + + {line} + + ))} + {props.holdComplete && ( + <> + + Press q to exit + + )} + + )} + + ); +} diff --git a/packages/cli/src/memory-flow-interactive.test.ts b/packages/cli/src/memory-flow-interactive.test.ts new file mode 100644 index 00000000..1ac30316 --- /dev/null +++ b/packages/cli/src/memory-flow-interactive.test.ts @@ -0,0 +1,125 @@ +import { EventEmitter } from 'node:events'; +import type { MemoryFlowReplayInput } from '@klo/context/ingest'; +import { describe, expect, it, vi } from 'vitest'; +import { memoryFlowCommandForKey, renderMemoryFlowInteractively } from './memory-flow-interactive.js'; + +class FakeStdin extends EventEmitter { + isTTY = true; + isRaw = false; + rawModes: boolean[] = []; + resume = vi.fn(); + pause = vi.fn(); + + setRawMode(value: boolean): void { + this.isRaw = value; + this.rawModes.push(value); + } +} + +function replay(): MemoryFlowReplayInput { + return { + runId: 'run-1', + connectionId: 'warehouse', + adapter: 'metricflow', + status: 'done', + sourceDir: '/tmp/source', + syncId: 'sync-1', + errors: [], + plannedWorkUnits: [ + { + unitKey: 'orders', + rawFiles: ['models/orders.yml'], + peerFileCount: 0, + dependencyCount: 1, + }, + { + unitKey: 'customers', + rawFiles: ['models/customers.yml'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + details: { actions: [], provenance: [], transcripts: [] }, + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 2 }, + { type: 'scope_detected', fingerprint: null }, + { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, + { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 4 }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, + { type: 'work_unit_started', unitKey: 'customers', skills: ['knowledge_capture'], stepBudget: 4 }, + { type: 'work_unit_finished', unitKey: 'customers', status: 'failed', reason: 'validation reset' }, + { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 1 }, + { type: 'saved', commitSha: 'abc12345', wikiCount: 1, slCount: 1 }, + { type: 'provenance_recorded', rowCount: 2 }, + { type: 'report_created', runId: 'run-1', reportPath: 'report-1' }, + ], + }; +} + +describe('memoryFlowCommandForKey', () => { + it('maps supported terminal key names to memory-flow commands', () => { + const idleSearch = { editing: false, query: '', matchIndex: 0 }; + const editingSearch = { editing: true, query: 'c', matchIndex: 0 }; + + expect(memoryFlowCommandForKey('', idleSearch, { name: 'left' })).toBe('left'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'right' })).toBe('right'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'up' })).toBe('up'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'down' })).toBe('down'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'return' })).toBe('enter'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'tab' })).toBe('tab'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'f' })).toBe('filter'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'p' })).toBe('provenance'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 't' })).toBe('transcript'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'q' })).toBe('quit'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'c', ctrl: true })).toBe('quit'); + expect(memoryFlowCommandForKey('/', { editing: false, query: '', matchIndex: 0 }, { name: '/' })).toBe( + 'search-start', + ); + expect(memoryFlowCommandForKey('c', { editing: true, query: '', matchIndex: 0 }, { name: 'c' })).toEqual({ + type: 'search-input', + value: 'c', + }); + expect(memoryFlowCommandForKey('', editingSearch, { name: 'backspace' })).toBe('search-backspace'); + expect(memoryFlowCommandForKey('', editingSearch, { name: 'return' })).toBe('search-submit'); + expect(memoryFlowCommandForKey('', editingSearch, { name: 'escape' })).toBe('search-clear'); + expect(memoryFlowCommandForKey('', idleSearch, { name: 'x' })).toBeNull(); + }); +}); + +describe('renderMemoryFlowInteractively', () => { + it('repaints on keypress and restores raw mode on quit', async () => { + let stdout = ''; + const stdin = new FakeStdin(); + const prepareKeypressEvents = vi.fn(); + + const promise = renderMemoryFlowInteractively( + replay(), + { + stdin, + stdout: { + isTTY: true, + columns: 120, + write: (chunk) => { + stdout += chunk; + }, + }, + }, + { prepareKeypressEvents }, + ); + + stdin.emit('keypress', '', { name: 'right' }); + stdin.emit('keypress', '', { name: 'tab' }); + stdin.emit('keypress', '', { name: 'q' }); + + await expect(promise).resolves.toBeUndefined(); + expect(prepareKeypressEvents).toHaveBeenCalledWith(stdin); + expect(stdin.rawModes).toEqual([true, false]); + expect(stdin.resume).toHaveBeenCalledTimes(1); + expect(stdin.pause).toHaveBeenCalledTimes(1); + expect(stdout).toContain('\u001b[2J\u001b[H'); + expect(stdout).toContain('[ACTIONS]'); + expect(stdout).toContain('Pane: trust'); + }); +}); diff --git a/packages/cli/src/memory-flow-interactive.ts b/packages/cli/src/memory-flow-interactive.ts new file mode 100644 index 00000000..f356e725 --- /dev/null +++ b/packages/cli/src/memory-flow-interactive.ts @@ -0,0 +1,143 @@ +import { emitKeypressEvents } from 'node:readline'; +import { + buildMemoryFlowViewModel, + createInitialMemoryFlowInteractionState, + reduceMemoryFlowInteractionState, + renderMemoryFlowInteractive, + type MemoryFlowInteractionCommand, + type MemoryFlowInteractionState, + type MemoryFlowReplayInput, +} from '@klo/context/ingest'; + +interface KloMemoryFlowKey { + name?: string; + ctrl?: boolean; +} + +export interface KloMemoryFlowStdin { + isTTY?: boolean; + isRaw?: boolean; + setRawMode?(value: boolean): void; + resume?(): void; + pause?(): void; + on(event: 'keypress', listener: (chunk: string, key: KloMemoryFlowKey) => void): this; + off?(event: 'keypress', listener: (chunk: string, key: KloMemoryFlowKey) => void): this; + removeListener?(event: 'keypress', listener: (chunk: string, key: KloMemoryFlowKey) => void): this; +} + +interface KloMemoryFlowInteractiveIo { + stdin?: KloMemoryFlowStdin; + stdout: { + isTTY?: boolean; + columns?: number; + write(chunk: string): void; + }; +} + +interface RenderMemoryFlowInteractiveOptions { + prepareKeypressEvents?(stdin: KloMemoryFlowStdin): void; +} + +function defaultPrepareKeypressEvents(stdin: KloMemoryFlowStdin): void { + emitKeypressEvents(stdin as Parameters[0]); +} + +export function memoryFlowCommandForKey( + chunk: string, + search: MemoryFlowInteractionState['search'], + key: KloMemoryFlowKey, +): MemoryFlowInteractionCommand | null { + if (search.editing) { + if (key.name === 'escape') return 'search-clear'; + if (key.name === 'return' || key.name === 'enter') return 'search-submit'; + if (key.name === 'backspace') return 'search-backspace'; + if (chunk.length === 1 && chunk >= ' ' && chunk !== '\u007f') { + return { type: 'search-input', value: chunk }; + } + return null; + } + + if (key.ctrl === true && key.name === 'c') { + return 'quit'; + } + + if (key.name === '/') return 'search-start'; + if (key.name === 'left') return 'left'; + if (key.name === 'right') return 'right'; + if (key.name === 'up') return 'up'; + if (key.name === 'down') return 'down'; + if (key.name === 'return' || key.name === 'enter') return 'enter'; + if (key.name === 'tab') return 'tab'; + if (key.name === 'f') return 'filter'; + if (key.name === 'p') return 'provenance'; + if (key.name === 't') return 'transcript'; + if (key.name === 'q' || key.name === 'escape') return 'quit'; + return null; +} + +function removeKeypressListener( + stdin: KloMemoryFlowStdin, + handler: (chunk: string, key: KloMemoryFlowKey) => void, +): void { + if (stdin.off) { + stdin.off('keypress', handler); + return; + } + stdin.removeListener?.('keypress', handler); +} + +function repaint(input: MemoryFlowReplayInput, state: MemoryFlowInteractionState, io: KloMemoryFlowInteractiveIo): void { + const view = buildMemoryFlowViewModel(input); + io.stdout.write('\u001b[2J\u001b[H'); + io.stdout.write(renderMemoryFlowInteractive(view, state, { terminalWidth: io.stdout.columns })); +} + +export async function renderMemoryFlowInteractively( + input: MemoryFlowReplayInput, + io: KloMemoryFlowInteractiveIo, + options: RenderMemoryFlowInteractiveOptions = {}, +): Promise { + const stdin = io.stdin; + if (stdin?.isTTY !== true) { + const view = buildMemoryFlowViewModel(input); + io.stdout.write( + renderMemoryFlowInteractive(view, createInitialMemoryFlowInteractionState(view), { + terminalWidth: io.stdout.columns, + }), + ); + return; + } + + const view = buildMemoryFlowViewModel(input); + let state = createInitialMemoryFlowInteractionState(view); + const previousRawMode = stdin.isRaw === true; + + return new Promise((resolve) => { + const cleanup = (): void => { + removeKeypressListener(stdin, handleKeypress); + stdin.setRawMode?.(previousRawMode); + stdin.pause?.(); + }; + + const handleKeypress = (_chunk: string, key: KloMemoryFlowKey): void => { + const command = memoryFlowCommandForKey(_chunk, state.search, key); + if (!command) { + return; + } + + state = reduceMemoryFlowInteractionState(state, command, view); + repaint(input, state, io); + + if (state.shouldQuit) { + cleanup(); + resolve(); + } + }; + + (options.prepareKeypressEvents ?? defaultPrepareKeypressEvents)(stdin); + stdin.setRawMode?.(true); + stdin.resume?.(); + stdin.on('keypress', handleKeypress); + repaint(input, state, io); + }); +} diff --git a/packages/cli/src/memory-flow-tui.test.tsx b/packages/cli/src/memory-flow-tui.test.tsx new file mode 100644 index 00000000..565d8c24 --- /dev/null +++ b/packages/cli/src/memory-flow-tui.test.tsx @@ -0,0 +1,315 @@ +/* @jsxImportSource react */ +import type { MemoryFlowReplayInput } from '@klo/context/ingest'; +import { render as renderInkTest } from 'ink-testing-library'; +import React, { type ReactNode } from 'react'; +import { describe, expect, it, vi } from 'vitest'; +import { + MemoryFlowTuiApp, + memoryFlowCommandForInkInput, + renderMemoryFlowTui, + sanitizeMemoryFlowTuiError, + startLiveMemoryFlowTui, + type KloMemoryFlowTuiIo, + type MemoryFlowInkInstance, + type MemoryFlowInkRenderOptions, +} from './memory-flow-tui.js'; + +function replayInput(): MemoryFlowReplayInput { + return { + runId: 'run-1', connectionId: 'warehouse', adapter: 'live-database', + status: 'done', sourceDir: null, syncId: 'sync-1', reportId: 'report-1', reportPath: 'report-1', errors: [], + plannedWorkUnits: [ + { unitKey: 'orders', rawFiles: ['orders'], peerFileCount: 0, dependencyCount: 1 }, + { unitKey: 'customers', rawFiles: ['customers'], peerFileCount: 1, dependencyCount: 0 }, + ], + details: { + actions: [ + { unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md', summary: 'order lifecycle', rawFiles: ['orders'], status: 'success' }, + { unitKey: 'customers', target: 'sl', action: 'updated', key: 'orbit_demo.customers', summary: 'customer metrics', rawFiles: ['customers'], status: 'success' }, + ], + provenance: [{ rawPath: 'orders', artifactKind: 'wiki', artifactKey: 'knowledge/orders.md', actionType: 'wiki_written' }], + transcripts: [{ unitKey: 'orders', path: '/tmp/t.jsonl', toolCallCount: 2, errorCount: 0, toolNames: ['read_raw_span', 'wiki_write'] }], + }, + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 2 }, + { type: 'scope_detected', fingerprint: 'scope-1' }, + { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, + { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md' }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, + { type: 'work_unit_started', unitKey: 'customers', skills: ['sl_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'customers', target: 'sl', action: 'updated', key: 'orbit_demo.customers' }, + { type: 'work_unit_finished', unitKey: 'customers', status: 'success' }, + { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, + { type: 'saved', commitSha: 'commit-one', wikiCount: 1, slCount: 1 }, + { type: 'provenance_recorded', rowCount: 1 }, + { type: 'report_created', runId: 'run-1', reportPath: 'report-1' }, + ], + }; +} + +function runningReplayInput(): MemoryFlowReplayInput { + return { ...replayInput(), status: 'running', syncId: 'pending', reportId: undefined, reportPath: undefined, plannedWorkUnits: [], events: [{ type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 1 }] }; +} + +function packagedReplayInput(overrides: Partial = {}): MemoryFlowReplayInput { + return { + ...replayInput(), + connectionId: 'orbit_demo', + metadata: { + schemaVersion: 1, + mode: 'seeded', + origin: 'packaged', + timing: 'prebuilt', + capturedAt: null, + sourceReportId: 'demo-seeded-report', + sourceReportPath: 'reports/seeded-demo-report.json', + fallbackReason: null, + }, + ...overrides, + }; +} + +function makeIo(): { io: KloMemoryFlowTuiIo; stderr: () => string } { + let stderr = ''; + return { io: { stdin: { isTTY: true, setRawMode: vi.fn() }, stdout: { isTTY: true, columns: 120, write: vi.fn() }, stderr: { write(chunk: string) { stderr += chunk; } } }, stderr: () => stderr }; +} + +function fakeInkInstance(): MemoryFlowInkInstance { + return { rerender: vi.fn(), unmount: vi.fn(), waitUntilExit: vi.fn(async () => undefined), clear: vi.fn() }; +} + +async function waitForInkInput(): Promise { await new Promise((r) => setTimeout(r, 10)); } + +function renderedAppProps(tree: ReactNode): Record { + expect(React.isValidElement(tree)).toBe(true); + return (tree as React.ReactElement>).props; +} + +describe('memoryFlowCommandForInkInput', () => { + it('maps input to commands', () => { + expect(memoryFlowCommandForInkInput('q', {})).toBe('quit'); + expect(memoryFlowCommandForInkInput('c', { ctrl: true })).toBe('quit'); + expect(memoryFlowCommandForInkInput('x', {})).toBeNull(); + }); +}); + +describe('sanitizeMemoryFlowTuiError', () => { + it('redacts credentials', () => { + expect(sanitizeMemoryFlowTuiError(new Error('postgres://x?api_key=y password=z'))).toBe('[redacted-url] [redacted]'); + }); +}); + +describe('MemoryFlowTuiApp', () => { + it('always shows the KLO logo', () => { + const { lastFrame } = renderInkTest(); + expect(lastFrame()).toContain('█████╔╝'); + }); + + it('shows persistent HUD with source and status terminology', () => { + const { lastFrame } = renderInkTest(); + const frame = lastFrame() ?? ''; + expect(frame).toContain('Database (warehouse)'); + expect(frame).toContain('2 tables'); + expect(frame).toContain('done'); + expect(frame).toContain('warehouse'); + expect(frame).toContain('╭'); + expect(frame).toContain('╰'); + }); + + it('hides the internal demo connection id before packaged replay source events are visible', () => { + const { lastFrame } = renderInkTest( + , + ); + const frame = lastFrame() ?? ''; + expect(frame).toContain('Orbit Demo'); + expect(frame).not.toContain('orbit_demo'); + expect(frame).not.toContain('Database (orbit_demo)'); + }); + + it('keeps the packaged replay source label public while only one source event is visible', () => { + const { lastFrame } = renderInkTest( + , + ); + const frame = lastFrame() ?? ''; + expect(frame).toContain('Orbit Demo'); + expect(frame).not.toContain('orbit_demo'); + expect(frame).not.toContain('Database (orbit_demo)'); + }); + + it('shows a prepopulated data disclaimer for packaged demo replay cost estimates', () => { + const { lastFrame } = renderInkTest( + , + ); + const frame = lastFrame() ?? ''; + expect(frame).toContain('$'); + expect(frame).toContain('Pre-run demo: $ shown is illustrative; no money is being spent now.'); + expect(frame).not.toContain('orbit_demo'); + }); + + it('does not show the prepopulated data disclaimer for captured full replay cost estimates', () => { + const { lastFrame } = renderInkTest( + , + ); + expect(lastFrame()).not.toContain('Demo data is prepopulated'); + }); + + it('shows accumulated activity feed on completion', () => { + const { lastFrame } = renderInkTest(); + const frame = lastFrame() ?? ''; + expect(frame).toContain('Connected — found 2 tables to ingest'); + expect(frame).toContain('Created so far:'); + expect(frame).toContain('order lifecycle'); + expect(frame).toContain('customer metrics'); + expect(frame).toContain('KLO finished ingesting your data'); + expect(frame).toContain('klo sl list'); + expect(frame).toContain('klo wiki list'); + expect(frame).toContain('klo serve --mcp stdio --user-id local'); + expect(frame).not.toContain(['klo', 'ask'].join(' ')); + expect(frame).not.toContain(['klo', 'mcp'].join(' ')); + }); + + it('handles quit while running', async () => { + const onExit = vi.fn(); + const { stdin } = renderInkTest(); + stdin.write('q'); + await waitForInkInput(); + expect(onExit).toHaveBeenCalledTimes(1); + }); + + it('shows active work unit with progress', () => { + const running: MemoryFlowReplayInput = { + ...runningReplayInput(), + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 1 }, + { type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + ], + plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders'], peerFileCount: 0, dependencyCount: 1 }], + }; + const { lastFrame } = renderInkTest(); + const frame = lastFrame() ?? ''; + expect(frame).toContain('Ingesting — 0/1 business area done'); + expect(frame).toContain('Reading table schemas, understanding relationships, creating query definitions'); + expect(frame).toContain('█████╔╝'); + }); + + it('describes multi-source ingestion as building the context layer', () => { + const running: MemoryFlowReplayInput = { + ...runningReplayInput(), + adapter: 'multi-source', + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 8 }, + { type: 'source_acquired', adapter: 'dbt-descriptions', trigger: 'manual_resync', fileCount: 3 }, + { type: 'diff_computed', added: 11, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + ], + plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders'], peerFileCount: 0, dependencyCount: 1 }], + }; + + const { lastFrame } = renderInkTest(); + const frame = lastFrame() ?? ''; + expect(frame).toContain('Ingesting warehouse, dbt, BI, and docs into a unified context layer'); + expect(frame).not.toContain('unified semantic layer'); + }); + + it('hides completion while running', () => { + const { lastFrame } = renderInkTest(); + expect(lastFrame()).not.toContain('KLO finished ingesting'); + }); +}); + +describe('startLiveMemoryFlowTui', () => { + it('starts and updates', async () => { + const { io } = makeIo(); + const instance = fakeInkInstance(); + const live = await startLiveMemoryFlowTui(runningReplayInput(), io, { renderInk: () => instance }); + expect(live).not.toBeNull(); + live?.update(replayInput()); + expect(instance.rerender).toHaveBeenCalledTimes(1); + live?.close(); + expect(instance.unmount).toHaveBeenCalledTimes(1); + }); + + it('redacts errors', async () => { + const { io, stderr } = makeIo(); + await expect(startLiveMemoryFlowTui(runningReplayInput(), io, { renderInk: () => { throw new Error('postgres://x?token=y'); } })).resolves.toBeNull(); + expect(stderr()).toContain('[redacted-url]'); + }); +}); + +describe('renderMemoryFlowTui', () => { + it('renders and returns true', async () => { + const { io } = makeIo(); + const instance = fakeInkInstance(); + await expect(renderMemoryFlowTui(replayInput(), io, { renderInk: () => instance })).resolves.toBe(true); + }); + + it('scales event timing with the speed multiplier while keeping animations normal speed', async () => { + const { io } = makeIo(); + const instance = fakeInkInstance(); + let renderedTree: ReactNode = null; + + await expect( + renderMemoryFlowTui(replayInput(), io, { + speedMultiplier: 0.125, + renderInk: (tree) => { + renderedTree = tree; + return instance; + }, + }), + ).resolves.toBe(true); + + expect(renderedAppProps(renderedTree)).toMatchObject({ + paceMsPerEvent: 1440, + frameMs: 140, + completionFrameMs: 80, + completionHoldMs: 1000, + }); + }); + + it('redacts errors', async () => { + const { io, stderr } = makeIo(); + await expect(renderMemoryFlowTui(replayInput(), io, { renderInk: () => { throw new Error('postgres://x?token=y'); } })).resolves.toBe(false); + expect(stderr()).toContain('[redacted-url]'); + }); +}); diff --git a/packages/cli/src/memory-flow-tui.tsx b/packages/cli/src/memory-flow-tui.tsx new file mode 100644 index 00000000..dd0186db --- /dev/null +++ b/packages/cli/src/memory-flow-tui.tsx @@ -0,0 +1,552 @@ +/* @jsxImportSource react */ +import { + buildMemoryFlowViewModel, + buildMemoryFlowVisualModel, + createInitialMemoryFlowInteractionState, + findMemoryFlowSearchMatches, + type MemoryFlowColumnId, + type MemoryFlowInteractionCommand, + type MemoryFlowInteractionState, + type MemoryFlowReplayInput, + type MemoryFlowViewModel, + reduceMemoryFlowInteractionState, + selectedMemoryFlowColumn, + selectedMemoryFlowDetails, +} from '@klo/context/ingest'; +import { Box, Text, render as renderInkRuntime, useApp, useInput } from 'ink'; +import React, { type ReactNode, useEffect, useMemo, useRef, useState } from 'react'; +import { buildDemoMetrics } from './demo-metrics.js'; +import { + ActivityFeed, + Hud, + Logo, +} from './memory-flow-hud.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:memory-flow-tui'); + +const COLOR_THEME = { + text: 'white', + muted: 'gray', + active: 'cyan', + complete: 'green', + warning: 'yellow', + failed: 'red', + border: 'gray', +} as const; + +const NO_COLOR_THEME = { + text: 'white', + muted: 'white', + active: 'white', + complete: 'white', + warning: 'white', + failed: 'white', + border: 'white', +} as const; + +type MemoryFlowTuiTheme = Record; + +const STAGE_LABELS = { + source: 'CONNECT', + chunks: 'SNAPSHOT', + workUnits: 'PLAN', + actions: 'ANALYZE', + gates: 'VALIDATE', + saved: 'MEMORY', +} satisfies Record; + +export interface KloMemoryFlowTuiIo { + stdin?: { isTTY?: boolean; setRawMode?(value: boolean): void }; + stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void }; + stderr: { write(chunk: string): void }; +} + +export interface MemoryFlowTuiLiveSession { + update(input: MemoryFlowReplayInput): void; + close(): void; + isClosed(): boolean; +} + +export interface MemoryFlowInkInstance { + rerender(tree: ReactNode): void; + unmount(): void; + waitUntilExit(): Promise; + clear?(): void; +} + +export interface MemoryFlowInkRenderOptions { + stdin?: KloMemoryFlowTuiIo['stdin']; + stdout: KloMemoryFlowTuiIo['stdout']; + stderr: KloMemoryFlowTuiIo['stderr']; + exitOnCtrlC: boolean; + patchConsole: boolean; + maxFps: number; + alternateScreen: boolean; +} + +interface RenderMemoryFlowTuiOptions { + renderInk?: (tree: ReactNode, options: MemoryFlowInkRenderOptions) => MemoryFlowInkInstance; + paceEvents?: boolean; + paceMsPerEvent?: number; + speedMultiplier?: number; +} + +interface StartLiveMemoryFlowTuiOptions { + renderInk?: (tree: ReactNode, options: MemoryFlowInkRenderOptions) => MemoryFlowInkInstance; +} + +interface RenderTreeOptions { + paceEvents?: boolean; + paceMsPerEvent?: number; + frameMs?: number; + completionFrameMs?: number; + completionHoldMs?: number; +} + +interface MemoryFlowTuiTiming { + paceMsPerEvent: number; + frameMs: number; + completionFrameMs: number; + completionHoldMs: number; +} + +const DEFAULT_TUI_TIMING = { + paceMsPerEvent: 180, + frameMs: 140, + completionFrameMs: 80, + completionHoldMs: 1000, +} satisfies MemoryFlowTuiTiming; + +interface InkKey { + leftArrow?: boolean; + rightArrow?: boolean; + upArrow?: boolean; + downArrow?: boolean; + return?: boolean; + escape?: boolean; + ctrl?: boolean; + shift?: boolean; + tab?: boolean; + backspace?: boolean; + delete?: boolean; +} + +interface MemoryFlowTuiAppProps { + input: MemoryFlowReplayInput; + terminalWidth?: number; + env?: NodeJS.ProcessEnv; + onExit(): void; + paceEvents?: boolean; + paceMsPerEvent?: number; + frameMs?: number; + completionFrameMs?: number; + completionHoldMs?: number; + showBoot?: boolean; +} + +function resolveMemoryFlowTuiTheme(env: NodeJS.ProcessEnv = process.env): MemoryFlowTuiTheme { + if (env.NO_COLOR || env.TERM === 'dumb') { + return NO_COLOR_THEME; + } + return COLOR_THEME; +} + +export function sanitizeMemoryFlowTuiError(error: unknown): string { + const message = error instanceof Error ? error.message : String(error); + return message + .replace(/[a-z][a-z0-9+.-]*:\/\/[^\s]+/gi, '[redacted-url]') + .replace(/\b(api[_-]?key|password|token|secret)=\S+/gi, '[redacted]'); +} + +export function memoryFlowCommandForInkInput( + input: string, + key: InkKey, + search: MemoryFlowInteractionState['search'] = { editing: false, query: '', matchIndex: 0 }, +): MemoryFlowInteractionCommand | null { + if (search.editing) { + if (key.escape) return 'search-clear'; + if (key.return) return 'search-submit'; + if (key.backspace || key.delete) return 'search-backspace'; + if (key.downArrow || (key.tab && !key.shift)) return 'search-next'; + if (key.upArrow || (key.tab && key.shift)) return 'search-previous'; + if (input.length === 1 && input >= ' ' && input !== '') { + return { type: 'search-input', value: input }; + } + return null; + } + + if (key.ctrl === true && input === 'c') return 'quit'; + if (input === '/') return 'search-start'; + if (search.query && input === 'n') return 'search-next'; + if (search.query && input === 'N') return 'search-previous'; + if (input === '') return 'left'; + if (input === '') return 'right'; + if (input === '') return 'up'; + if (input === '') return 'down'; + if (key.leftArrow) return 'left'; + if (key.rightArrow) return 'right'; + if (key.upArrow) return 'up'; + if (key.downArrow) return 'down'; + if (key.return) return 'enter'; + if (key.tab) return 'tab'; + if (input === 'f') return 'filter'; + if (input === 'p') return 'provenance'; + if (input === 't') return 'transcript'; + if (input === 'q' || key.escape) return 'quit'; + return null; +} + +function stageLabel(columnId: MemoryFlowColumnId): string { + return STAGE_LABELS[columnId]; +} + +function statusLabel(status: string): 'OK' | 'RUN' | 'WARN' | 'FAIL' | 'WAIT' { + if (status === 'complete') return 'OK'; + if (status === 'active') return 'RUN'; + if (status === 'warning') return 'WARN'; + if (status === 'failed') return 'FAIL'; + return 'WAIT'; +} + +function filterLabel(filter: MemoryFlowInteractionState['filter']): string { + return filter === 'failed_or_flagged' ? 'issues' : 'all'; +} + +function searchStatusLine(view: MemoryFlowViewModel, state: MemoryFlowInteractionState): string | null { + if (!state.search.editing && state.search.query.length === 0) { + return null; + } + const matches = findMemoryFlowSearchMatches(view, state.search.query); + const status = state.search.editing ? 'editing' : 'locked'; + const position = matches.length === 0 ? '0/0' : `${state.search.matchIndex + 1}/${matches.length}`; + return `Search: ${state.search.query || '/'} (${position} matches, ${status})`; +} + +function humanizeDemoText(value: string): string { + return value + .replace(/\bWORKUNITS\b/g, 'PLAN') + .replace(/\bWorkUnit\b/g, 'Table review') + .replace(/\bwork units\b/gi, 'table reviews') + .replace(/\bwork-unit\b/gi, 'table-review') + .replace(/\bWUs\b/g, 'tables') + .replace(/\bchunks\b/gi, 'table groups') + .replace(/\bcandidates\b/gi, 'drafts') + .replace(/\bcandidate\b/gi, 'draft') + .replace(/\braw files\b/gi, 'database files') + .replace(/\braw file\b/gi, 'database file') + .replace(/\bSL\b/g, 'context layer'); +} + +function DetailsPane(props: { + view: MemoryFlowViewModel; + state: MemoryFlowInteractionState; + theme: MemoryFlowTuiTheme; +}): ReactNode { + const column = selectedMemoryFlowColumn(props.view, props.state); + const details = selectedMemoryFlowDetails(props.view, props.state).map(humanizeDemoText).slice(0, 8); + const rawFiles = Array.from( + new Set([ + ...props.view.details.actions.flatMap((action) => action.rawFiles), + ...props.view.details.provenance.map((row) => row.rawPath), + ]), + ).slice(0, 4); + const searchLine = searchStatusLine(props.view, props.state); + + return ( + + + Details / focus: {stageLabel(column.id)} Pane: {props.state.pane} Filter: {filterLabel(props.state.filter)} + + {searchLine && {searchLine}} + {details.map((detail, index) => ( + + - {detail} + + ))} + {rawFiles.map((rawFile) => ( + + - {rawFile} + + ))} + {props.view.completionLine && {humanizeDemoText(props.view.completionLine)}} + + ); +} + +function TrustIssues(props: { view: MemoryFlowViewModel; theme: MemoryFlowTuiTheme }): ReactNode { + if (props.view.trustIssues.length === 0) { + return null; + } + + return ( + + Validation notes + {props.view.trustIssues.slice(0, 4).map((issue) => ( + + {issue.severity === 'failed' ? 'FAILED' : 'WARNING'} {humanizeDemoText(issue.title)}:{' '} + {humanizeDemoText(issue.detail)} + + ))} + + ); +} + +export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode { + const app = useApp(); + const totalEvents = props.input.events.length; + const paceEnabled = props.paceEvents === true && totalEvents > 0; + const [pacedCount, setPacedCount] = useState(paceEnabled ? 0 : totalEvents); + + const pacedInput = useMemo(() => { + if (!paceEnabled || pacedCount >= totalEvents) { + return props.input; + } + return { + ...props.input, + status: 'running', + events: props.input.events.slice(0, pacedCount), + }; + }, [paceEnabled, pacedCount, totalEvents, props.input]); + + const pacedNow = useMemo<(() => number) | undefined>(() => { + if (!paceEnabled) return undefined; + const firstEvent = props.input.events[0]; + if (!firstEvent?.emittedAt) return undefined; + const firstEventMs = Date.parse(firstEvent.emittedAt); + if (!Number.isFinite(firstEventMs)) return undefined; + const stride = props.paceMsPerEvent ?? DEFAULT_TUI_TIMING.paceMsPerEvent; + return () => firstEventMs + pacedCount * stride; + }, [paceEnabled, pacedCount, props.input.events, props.paceMsPerEvent]); + + const view = useMemo(() => buildMemoryFlowViewModel(pacedInput), [pacedInput]); + const [state, setState] = useState(() => createInitialMemoryFlowInteractionState(view)); + const [frame, setFrame] = useState(0); + const [thoughtFrame, setThoughtFrame] = useState(0); + const [completionFrame, setCompletionFrame] = useState(0); + const [holdComplete, setHoldComplete] = useState(false); + const [userHasNavigated, setUserHasNavigated] = useState(false); + const lastEventCountRef = useRef(pacedInput.events.length); + const lastStatusRef = useRef(pacedInput.status); + const exitHandled = useRef(false); + const theme = resolveMemoryFlowTuiTheme(props.env); + + useEffect(() => { + if (!state.shouldQuit || exitHandled.current) { + return; + } + exitHandled.current = true; + props.onExit(); + app.exit(); + }, [app, props, state.shouldQuit]); + + useEffect(() => { + const timer = setInterval(() => { + setFrame((current) => current + 1); + setThoughtFrame((current) => current + 1); + }, props.frameMs ?? DEFAULT_TUI_TIMING.frameMs); + return () => clearInterval(timer); + }, [props.frameMs]); + + useEffect(() => { + if (lastEventCountRef.current !== pacedInput.events.length) { + lastEventCountRef.current = pacedInput.events.length; + setThoughtFrame(0); + } + }, [pacedInput.events.length]); + + useEffect(() => { + if (lastStatusRef.current !== pacedInput.status) { + lastStatusRef.current = pacedInput.status; + if (pacedInput.status === 'done' || pacedInput.status === 'error') { + setCompletionFrame(0); + } + } + }, [pacedInput.status]); + + useEffect(() => { + if (pacedInput.status !== 'done' && pacedInput.status !== 'error') return; + if (completionFrame >= 12) return; + const timer = setInterval( + () => setCompletionFrame((current) => Math.min(12, current + 1)), + props.completionFrameMs ?? DEFAULT_TUI_TIMING.completionFrameMs, + ); + return () => clearInterval(timer); + }, [pacedInput.status, completionFrame, props.completionFrameMs]); + + useEffect(() => { + if (completionFrame < 12) { + setHoldComplete(false); + return; + } + const timer = setTimeout( + () => setHoldComplete(true), + props.completionHoldMs ?? DEFAULT_TUI_TIMING.completionHoldMs, + ); + return () => clearTimeout(timer); + }, [completionFrame, props.completionHoldMs]); + + useEffect(() => { + if (!paceEnabled || pacedCount >= totalEvents) { + return; + } + const interval = props.paceMsPerEvent ?? DEFAULT_TUI_TIMING.paceMsPerEvent; + const timer = setInterval(() => { + setPacedCount((current) => Math.min(totalEvents, current + 1)); + }, interval); + return () => clearInterval(timer); + }, [paceEnabled, pacedCount, totalEvents, props.paceMsPerEvent]); + + useInput((input, key) => { + const command = memoryFlowCommandForInkInput(input, key, state.search); + if (!command) return; + if (command === 'quit' && isComplete && !holdComplete) return; + if (command !== 'quit') setUserHasNavigated(true); + setState((current) => reduceMemoryFlowInteractionState(current, command, view)); + }); + + const isComplete = pacedInput.status === 'done' || pacedInput.status === 'error'; + const completionMetrics = useMemo( + () => buildDemoMetrics(pacedInput, pacedNow ? { now: pacedNow } : {}), + [pacedInput, pacedNow], + ); + + const termWidth = props.terminalWidth ?? 80; + + return ( + + + + + + {userHasNavigated && } + + ); +} + +function renderTree( + input: MemoryFlowReplayInput, + io: KloMemoryFlowTuiIo, + onExit: () => void, + options: RenderTreeOptions = {}, +): ReactNode { + return ( + + ); +} + +function renderInk(tree: ReactNode, options: MemoryFlowInkRenderOptions): MemoryFlowInkInstance { + return renderInkRuntime(tree, { + stdin: options.stdin as NodeJS.ReadStream | undefined, + stdout: options.stdout as NodeJS.WriteStream, + stderr: options.stderr as NodeJS.WriteStream, + exitOnCtrlC: options.exitOnCtrlC, + patchConsole: options.patchConsole, + maxFps: options.maxFps, + alternateScreen: options.alternateScreen, + }) as MemoryFlowInkInstance; +} + +function renderOptions(io: KloMemoryFlowTuiIo): MemoryFlowInkRenderOptions { + return { + stdin: io.stdin, + stdout: io.stdout, + stderr: io.stderr, + exitOnCtrlC: false, + patchConsole: false, + maxFps: 30, + alternateScreen: true, + }; +} + +function scaleTiming(ms: number, speedMultiplier: number): number { + return Math.max(20, Math.round(ms / speedMultiplier)); +} + +function resolveTiming(options: RenderMemoryFlowTuiOptions): MemoryFlowTuiTiming { + const speedMultiplier = + typeof options.speedMultiplier === 'number' && options.speedMultiplier > 0 ? options.speedMultiplier : 1; + return { + paceMsPerEvent: + typeof options.paceMsPerEvent === 'number' && options.paceMsPerEvent > 0 + ? options.paceMsPerEvent + : scaleTiming(DEFAULT_TUI_TIMING.paceMsPerEvent, speedMultiplier), + frameMs: DEFAULT_TUI_TIMING.frameMs, + completionFrameMs: DEFAULT_TUI_TIMING.completionFrameMs, + completionHoldMs: DEFAULT_TUI_TIMING.completionHoldMs, + }; +} + +export async function renderMemoryFlowTui( + input: MemoryFlowReplayInput, + io: KloMemoryFlowTuiIo, + options: RenderMemoryFlowTuiOptions = {}, +): Promise { + let instance: MemoryFlowInkInstance | null = null; + const paceEvents = options.paceEvents !== false; + const timing = resolveTiming(options); + try { + const onExit = (): void => { + instance?.unmount(); + }; + instance = (options.renderInk ?? renderInk)( + renderTree(input, io, onExit, { paceEvents, ...timing }), + renderOptions(io), + ); + await instance.waitUntilExit(); + instance.unmount(); + return true; + } catch (error) { + io.stderr.write(`TUI visualization unavailable: ${sanitizeMemoryFlowTuiError(error)}; using text renderer.\n`); + return false; + } +} + +export async function startLiveMemoryFlowTui( + input: MemoryFlowReplayInput, + io: KloMemoryFlowTuiIo, + options: StartLiveMemoryFlowTuiOptions = {}, +): Promise { + let instance: MemoryFlowInkInstance | null = null; + let closed = false; + + const close = (): void => { + if (closed) { + return; + } + closed = true; + instance?.unmount(); + }; + + try { + instance = (options.renderInk ?? renderInk)(renderTree(input, io, close), renderOptions(io)); + + return { + update(nextInput: MemoryFlowReplayInput): void { + if (closed) { + return; + } + instance?.rerender(renderTree(nextInput, io, close)); + }, + close, + isClosed(): boolean { + return closed; + }, + }; + } catch (error) { + io.stderr.write(`TUI visualization unavailable: ${sanitizeMemoryFlowTuiError(error)}; using text renderer.\n`); + return null; + } +} diff --git a/packages/cli/src/next-steps.test.ts b/packages/cli/src/next-steps.test.ts new file mode 100644 index 00000000..cb638277 --- /dev/null +++ b/packages/cli/src/next-steps.test.ts @@ -0,0 +1,129 @@ +import { describe, expect, it } from 'vitest'; +import { + KLO_CONTEXT_BUILD_COMMANDS, + KLO_NEXT_STEP_COMMANDS, + formatNextStepLines, + formatSetupNextStepLines, +} from './next-steps.js'; + +const command = (...parts: string[]) => parts.join(' '); + +describe('KLO demo next steps', () => { + it('uses supported context-build commands before agent usage', () => { + expect(KLO_CONTEXT_BUILD_COMMANDS).toEqual([ + { + command: 'klo setup context build', + description: 'Build agent-ready context from configured primary and context sources', + }, + { + command: 'klo status', + description: 'Check setup and context readiness', + }, + { + command: 'klo setup context status', + description: 'Check the setup-managed context build state', + }, + ]); + }); + + it('uses supported final public commands', () => { + expect(KLO_NEXT_STEP_COMMANDS).toEqual([ + { + command: 'klo agent context --json', + description: 'Verify the project context your agent can read', + }, + { + command: 'klo agent tools --json', + description: 'List direct CLI tools available to agents', + }, + { + command: 'klo sl list', + description: 'Inspect generated semantic-layer sources', + }, + { + command: 'klo wiki list', + description: 'Inspect generated wiki pages', + }, + { + command: 'klo serve --mcp stdio --user-id local', + description: 'Optional MCP server route for clients that require MCP', + }, + ]); + }); + + it('prefers the direct CLI route before MCP serving', () => { + const commands = KLO_NEXT_STEP_COMMANDS.map((step) => step.command); + + expect(commands.indexOf('klo agent context --json')).toBeLessThan( + commands.indexOf('klo serve --mcp stdio --user-id local'), + ); + expect(commands.indexOf('klo agent tools --json')).toBeLessThan( + commands.indexOf('klo serve --mcp stdio --user-id local'), + ); + }); + + it('explains what the next-step commands are for', () => { + const rendered = formatNextStepLines().join('\n'); + + expect(rendered).toContain('KLO context is ready for agents.'); + expect(rendered).toContain('Preferred route: CLI + Skills'); + expect(rendered).toContain('no MCP server is required'); + expect(rendered).toContain('Direct CLI checks:'); + expect(rendered).toContain('Optional MCP:'); + expect(rendered).not.toContain('Ask your agent to use KLO'); + }); + + it('does not advertise removed Commander migration commands', () => { + const rendered = formatNextStepLines().join('\n'); + + expect(rendered).toContain('klo agent tools --json'); + expect(rendered).toContain('klo agent context --json'); + expect(rendered).toContain('klo sl list'); + expect(rendered).toContain('klo wiki list'); + expect(rendered).toContain('klo serve --mcp stdio --user-id local'); + + for (const removed of [ + command('klo', 'ask'), + command('klo', 'mcp'), + command('klo', 'connect'), + command('klo', 'knowledge'), + command('dev', 'model'), + command('dev', 'knowledge'), + command('klo', 'ingest', 'run'), + command('klo', 'ingest', 'replay'), + ]) { + expect(rendered).not.toContain(removed); + } + }); + + it('keeps setup next steps focused on building context when the build is not ready', () => { + const rendered = formatSetupNextStepLines({ + setupReady: true, + hasContextTargets: true, + contextReady: false, + agentIntegrationReady: true, + }).join('\n'); + + expect(rendered).toContain('Build KLO context next.'); + expect(rendered).toContain('primary-source scans and context-source ingests'); + expect(rendered).toContain('klo setup context build'); + expect(rendered).toContain('klo status'); + expect(rendered).toContain('klo setup context status'); + expect(rendered).not.toContain('klo agent context --json'); + expect(rendered).not.toContain('klo serve --mcp'); + }); + + it('shows agent commands only after setup and context build are ready', () => { + const rendered = formatSetupNextStepLines({ + setupReady: true, + hasContextTargets: true, + contextReady: true, + agentIntegrationReady: true, + }).join('\n'); + + expect(rendered).toContain('KLO context is ready for agents.'); + expect(rendered).toContain('klo agent context --json'); + expect(rendered).toContain('klo serve --mcp stdio --user-id local'); + expect(rendered).not.toContain('Build KLO context next.'); + }); +}); diff --git a/packages/cli/src/next-steps.ts b/packages/cli/src/next-steps.ts new file mode 100644 index 00000000..e8ca05c7 --- /dev/null +++ b/packages/cli/src/next-steps.ts @@ -0,0 +1,104 @@ +export const KLO_CONTEXT_BUILD_COMMANDS = [ + { + command: 'klo setup context build', + description: 'Build agent-ready context from configured primary and context sources', + }, + { + command: 'klo status', + description: 'Check setup and context readiness', + }, + { + command: 'klo setup context status', + description: 'Check the setup-managed context build state', + }, +] as const; + +export const KLO_NEXT_STEP_DIRECT_COMMANDS = [ + { + command: 'klo agent context --json', + description: 'Verify the project context your agent can read', + }, + { + command: 'klo agent tools --json', + description: 'List direct CLI tools available to agents', + }, + { + command: 'klo sl list', + description: 'Inspect generated semantic-layer sources', + }, + { + command: 'klo wiki list', + description: 'Inspect generated wiki pages', + }, +] as const; + +export const KLO_NEXT_STEP_MCP_COMMANDS = [ + { + command: 'klo serve --mcp stdio --user-id local', + description: 'Optional MCP server route for clients that require MCP', + }, +] as const; + +export const KLO_NEXT_STEP_COMMANDS = [...KLO_NEXT_STEP_DIRECT_COMMANDS, ...KLO_NEXT_STEP_MCP_COMMANDS] as const; + +export const KLO_NEXT_STEP_COMMAND_WIDTH = Math.max( + ...[...KLO_CONTEXT_BUILD_COMMANDS, ...KLO_NEXT_STEP_COMMANDS].map((step) => step.command.length), +); + +export interface KloSetupNextStepState { + setupReady: boolean; + hasContextTargets: boolean; + contextReady: boolean; + agentIntegrationReady: boolean; +} + +function commandLines(commands: ReadonlyArray<{ command: string; description: string }>, indent: string): string[] { + return commands.map((step) => `${indent}$ ${step.command.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} ${step.description}`); +} + +export function formatNextStepLines(indent = ' '): string[] { + return [ + `${indent}KLO context is ready for agents.`, + `${indent}Preferred route: CLI + Skills; installed rules call \`klo agent ...\` directly, so no MCP server is required.`, + `${indent}Direct CLI checks:`, + ...commandLines(KLO_NEXT_STEP_DIRECT_COMMANDS, indent), + `${indent}Optional MCP:`, + ...commandLines(KLO_NEXT_STEP_MCP_COMMANDS, indent), + ]; +} + +export function formatSetupNextStepLines(state: KloSetupNextStepState, indent = ' '): string[] { + if (!state.setupReady) { + return [ + `${indent}Finish setup first.`, + `${indent}$ ${'klo setup'.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} Resume configuration and validation`, + `${indent}$ ${'klo status'.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} Check which setup steps still need attention`, + ]; + } + + if (!state.hasContextTargets) { + return [ + `${indent}Connect data, then build context.`, + `${indent}$ ${'klo setup'.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} Add primary or context sources`, + `${indent}$ ${'klo status'.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} Check setup and context readiness`, + ]; + } + + if (!state.contextReady) { + return [ + `${indent}Build KLO context next.`, + `${indent}Preferred route: run the CLI build; it covers primary-source scans and context-source ingests.`, + ...commandLines(KLO_CONTEXT_BUILD_COMMANDS, indent), + ]; + } + + if (!state.agentIntegrationReady) { + return [ + `${indent}KLO context is built. Install agent rules when you want your coding agent to use it.`, + `${indent}$ ${'klo setup --agents'.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} Install CLI-based agent rules`, + `${indent}$ ${'klo status'.padEnd(KLO_NEXT_STEP_COMMAND_WIDTH)} Check setup and context readiness`, + ]; + } + + return formatNextStepLines(indent); +} diff --git a/packages/cli/src/project-dir.test.ts b/packages/cli/src/project-dir.test.ts new file mode 100644 index 00000000..120221d5 --- /dev/null +++ b/packages/cli/src/project-dir.test.ts @@ -0,0 +1,172 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { runKloCli, type KloCliDeps } from './index.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('project directory defaults', () => { + afterEach(() => { + delete process.env.KLO_PROJECT_DIR; + }); + + it('uses KLO_PROJECT_DIR when Commander-dispatched commands omit --project-dir', async () => { + process.env.KLO_PROJECT_DIR = '/tmp/klo-env-project'; + + const connection = vi.fn(async () => 0); + const demo = vi.fn(async () => 0); + const doctor = vi.fn(async () => 0); + const ingest = vi.fn(async () => 0); + const publicIngest = vi.fn(async () => 0); + const scan = vi.fn(async () => 0); + const serveStdio = vi.fn(async () => 0); + const setup = vi.fn(async () => 0); + const agent = vi.fn(async () => 0); + const deps: KloCliDeps = { agent, connection, demo, doctor, ingest, publicIngest, scan, serveStdio, setup }; + + const cases: Array<{ + argv: string[]; + spy: ReturnType; + expected: Record; + runnerType: 'cli' | 'serve'; + }> = [ + { + argv: ['connection', 'list'], + spy: connection, + expected: { command: 'list', projectDir: '/tmp/klo-env-project' }, + runnerType: 'cli', + }, + { + argv: ['setup', 'demo', 'scan', '--no-input'], + spy: demo, + expected: { command: 'scan', projectDir: '/tmp/klo-env-project' }, + runnerType: 'cli', + }, + { + argv: ['dev', 'doctor', '--no-input'], + spy: doctor, + expected: { command: 'project', projectDir: '/tmp/klo-env-project' }, + runnerType: 'cli', + }, + { + argv: ['ingest', 'status', 'run-1'], + spy: publicIngest, + expected: { command: 'status', projectDir: '/tmp/klo-env-project', runId: 'run-1' }, + runnerType: 'cli', + }, + { + argv: ['setup', 'status'], + spy: setup, + expected: { command: 'status', projectDir: '/tmp/klo-env-project' }, + runnerType: 'cli', + }, + { + argv: ['dev', 'scan', 'warehouse'], + spy: scan, + expected: { command: 'run', projectDir: '/tmp/klo-env-project', connectionId: 'warehouse' }, + runnerType: 'cli', + }, + { + argv: ['serve', '--mcp', 'stdio'], + spy: serveStdio, + expected: { mcp: 'stdio', projectDir: '/tmp/klo-env-project' }, + runnerType: 'serve', + }, + { + argv: ['agent', 'tools', '--json'], + spy: agent, + expected: { command: 'tools', projectDir: '/tmp/klo-env-project' }, + runnerType: 'cli', + }, + ]; + + for (const item of cases) { + const testIo = makeIo(); + await expect(runKloCli(item.argv, testIo.io, deps)).resolves.toBe(0); + if (item.runnerType === 'serve') { + expect(item.spy).toHaveBeenLastCalledWith(expect.objectContaining(item.expected)); + } else { + expect(item.spy).toHaveBeenLastCalledWith(expect.objectContaining(item.expected), testIo.io); + } + expect(testIo.stderr()).toBe(''); + } + }); + + it('lets explicit global --project-dir override KLO_PROJECT_DIR before and after nested commands', async () => { + process.env.KLO_PROJECT_DIR = '/tmp/klo-env-project'; + + const scan = vi.fn(async () => 0); + const publicIngest = vi.fn(async () => 0); + const scanIo = makeIo(); + const ingestIo = makeIo(); + + await expect( + runKloCli(['--project-dir', '/tmp/klo-explicit-project', 'dev', 'scan', 'warehouse'], scanIo.io, { scan }), + ).resolves.toBe(0); + await expect( + runKloCli(['ingest', 'status', 'run-1', '--project-dir=/tmp/klo-explicit-project'], ingestIo.io, { + publicIngest, + }), + ).resolves.toBe(0); + + expect(scan).toHaveBeenCalledWith( + expect.objectContaining({ command: 'run', projectDir: '/tmp/klo-explicit-project' }), + scanIo.io, + ); + expect(publicIngest).toHaveBeenCalledWith( + expect.objectContaining({ command: 'status', projectDir: '/tmp/klo-explicit-project' }), + ingestIo.io, + ); + expect(scanIo.stderr()).toBe(''); + expect(ingestIo.stderr()).toBe(''); + }); + + it('uses nearest ancestor containing klo.yaml when no explicit or environment project-dir exists', async () => { + const { mkdir, realpath, writeFile } = await import('node:fs/promises'); + const { mkdtemp, rm } = await import('node:fs/promises'); + const { tmpdir } = await import('node:os'); + const { join } = await import('node:path'); + + const originalCwd = process.cwd(); + const root = await mkdtemp(join(tmpdir(), 'klo-cli-nearest-project-')); + const projectDir = join(root, 'warehouse'); + const nestedDir = join(projectDir, 'nested', 'deeper'); + await mkdir(nestedDir, { recursive: true }); + await writeFile(join(projectDir, 'klo.yaml'), 'project: warehouse\n', 'utf-8'); + const expectedProjectDir = await realpath(projectDir); + + const scan = vi.fn(async () => 0); + const testIo = makeIo(); + + try { + process.chdir(nestedDir); + await expect(runKloCli(['dev', 'scan', 'warehouse'], testIo.io, { scan })).resolves.toBe(0); + } finally { + process.chdir(originalCwd); + await rm(root, { recursive: true, force: true }); + } + + expect(scan).toHaveBeenCalledWith( + expect.objectContaining({ command: 'run', projectDir: expectedProjectDir }), + testIo.io, + ); + expect(testIo.stderr()).toBe(''); + }); +}); diff --git a/packages/cli/src/project-dir.ts b/packages/cli/src/project-dir.ts new file mode 100644 index 00000000..d8aef2e2 --- /dev/null +++ b/packages/cli/src/project-dir.ts @@ -0,0 +1,5 @@ +import { resolve } from 'node:path'; + +export function resolveProjectDir(projectDir?: string, fallback = '.'): string { + return resolve(projectDir ?? fallback); +} diff --git a/packages/cli/src/project-resolver.test.ts b/packages/cli/src/project-resolver.test.ts new file mode 100644 index 00000000..d3412048 --- /dev/null +++ b/packages/cli/src/project-resolver.test.ts @@ -0,0 +1,70 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { findNearestKloProjectDir, resolveKloProjectDir } from './project-resolver.js'; + +describe('resolveKloProjectDir', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-project-resolver-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('prefers an explicit project directory', async () => { + const explicit = join(tempDir, 'explicit'); + const envProject = join(tempDir, 'env'); + await mkdir(explicit, { recursive: true }); + await mkdir(envProject, { recursive: true }); + + expect( + resolveKloProjectDir({ + explicitProjectDir: explicit, + env: { KLO_PROJECT_DIR: envProject }, + cwd: tempDir, + }), + ).toBe(resolve(explicit)); + }); + + it('uses KLO_PROJECT_DIR when no explicit project directory is set', async () => { + const envProject = join(tempDir, 'env-project'); + await mkdir(envProject, { recursive: true }); + + expect(resolveKloProjectDir({ env: { KLO_PROJECT_DIR: envProject }, cwd: tempDir })).toBe(resolve(envProject)); + }); + + it('resolves a relative KLO_PROJECT_DIR value from cwd', () => { + expect(resolveKloProjectDir({ env: { KLO_PROJECT_DIR: 'env-project' }, cwd: tempDir })).toBe( + resolve(tempDir, 'env-project'), + ); + }); + + it('uses the nearest ancestor containing klo.yaml', async () => { + const project = join(tempDir, 'warehouse'); + const nested = join(project, 'nested', 'deeper'); + await mkdir(nested, { recursive: true }); + await writeFile(join(project, 'klo.yaml'), 'project: warehouse\n', 'utf-8'); + + expect(resolveKloProjectDir({ env: {}, cwd: nested })).toBe(resolve(project)); + expect(findNearestKloProjectDir(nested)).toBe(resolve(project)); + }); + + it('falls back to the current directory when no project marker exists', () => { + expect(resolveKloProjectDir({ env: {}, cwd: tempDir })).toBe(resolve(tempDir)); + expect(findNearestKloProjectDir(tempDir)).toBeUndefined(); + }); + + it('rejects empty explicit and environment project directory values', () => { + expect(() => resolveKloProjectDir({ explicitProjectDir: ' ', cwd: tempDir })).toThrow( + '--project-dir requires a value', + ); + expect(() => resolveKloProjectDir({ env: { KLO_PROJECT_DIR: ' ' }, cwd: tempDir })).toThrow( + 'KLO_PROJECT_DIR must not be empty', + ); + }); +}); diff --git a/packages/cli/src/project-resolver.ts b/packages/cli/src/project-resolver.ts new file mode 100644 index 00000000..8d948e65 --- /dev/null +++ b/packages/cli/src/project-resolver.ts @@ -0,0 +1,56 @@ +import { existsSync } from 'node:fs'; +import { dirname, join, resolve } from 'node:path'; + +export interface KloProjectResolverOptions { + explicitProjectDir?: string; + env?: Partial>; + cwd?: string; +} + +function nonEmptyValue(value: string | undefined): string | undefined { + if (value === undefined) { + return undefined; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? value : undefined; +} + +export function findNearestKloProjectDir(startDir = process.cwd()): string | undefined { + let current = resolve(startDir); + + while (true) { + if (existsSync(join(current, 'klo.yaml'))) { + return current; + } + + const parent = dirname(current); + if (parent === current) { + return undefined; + } + current = parent; + } +} + +export function resolveKloProjectDir(options: KloProjectResolverOptions = {}): string { + const cwd = options.cwd ?? process.cwd(); + + if (options.explicitProjectDir !== undefined) { + const explicit = nonEmptyValue(options.explicitProjectDir); + if (!explicit) { + throw new Error('--project-dir requires a value'); + } + return resolve(cwd, explicit); + } + + const rawEnvProjectDir = options.env ? options.env.KLO_PROJECT_DIR : process.env.KLO_PROJECT_DIR; + const envProjectDir = nonEmptyValue(rawEnvProjectDir); + if (rawEnvProjectDir !== undefined && envProjectDir === undefined) { + throw new Error('KLO_PROJECT_DIR must not be empty'); + } + if (envProjectDir !== undefined) { + return resolve(cwd, envProjectDir); + } + + const resolvedCwd = resolve(cwd); + return findNearestKloProjectDir(resolvedCwd) ?? resolvedCwd; +} diff --git a/packages/cli/src/prompt-navigation.test.ts b/packages/cli/src/prompt-navigation.test.ts new file mode 100644 index 00000000..8fbfc7fe --- /dev/null +++ b/packages/cli/src/prompt-navigation.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from 'vitest'; +import { withMenuOptionSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; + +describe('prompt navigation helpers', () => { + it('leaves compact single-line menu prompts unchanged', () => { + expect(withMenuOptionSpacing('What do you want to do?')).toBe('What do you want to do?'); + }); + + it('adds a blank separator between multiline menu copy and the option list', () => { + expect(withMenuOptionSpacing('Which embedding option should KLO use?\n\nKLO uses embeddings for search.')).toBe( + 'Which embedding option should KLO use?\n\nKLO uses embeddings for search.\n', + ); + }); + + it('does not duplicate an existing option-list separator', () => { + expect(withMenuOptionSpacing('Question\n\nContext\n')).toBe('Question\n\nContext\n'); + }); + + it('keeps multiselect navigation copy multiline so menu renderers can separate it from options', () => { + expect(withMultiselectNavigation('Which sources?')).toBe( + 'Which sources?\nUse Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', + ); + }); + + it('adds a blank separator between text input helper copy and the editable value', () => { + expect( + withTextInputNavigation( + 'Name this PostgreSQL connection\nKLO will use this short name in commands and config. You can rename it now.', + ), + ).toBe( + 'Name this PostgreSQL connection\n\nKLO will use this short name in commands and config. You can rename it now.\nPress Escape to go back.\n', + ); + }); + + it('adds a blank separator before compact text input values', () => { + expect(withTextInputNavigation('Project folder path')).toBe('Project folder path\nPress Escape to go back.\n'); + }); + + it('normalizes already hinted text input prompts without duplicating the hint', () => { + expect( + withTextInputNavigation( + 'Name this PostgreSQL connection\nKLO will use this short name in commands and config. You can rename it now.\nPress Escape to go back.', + ), + ).toBe( + 'Name this PostgreSQL connection\n\nKLO will use this short name in commands and config. You can rename it now.\nPress Escape to go back.\n', + ); + }); +}); diff --git a/packages/cli/src/prompt-navigation.ts b/packages/cli/src/prompt-navigation.ts new file mode 100644 index 00000000..d80f2f97 --- /dev/null +++ b/packages/cli/src/prompt-navigation.ts @@ -0,0 +1,45 @@ +const MULTISELECT_MENU_NAVIGATION_HINT = + 'Use Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.'; +const TEXT_INPUT_NAVIGATION_HINT = 'Press Escape to go back.'; + +function removeTrailingBlankLines(message: string): string { + return message.replace(/\n+$/, ''); +} + +function withTextInputBodySpacing(message: string): string { + const normalized = removeTrailingBlankLines(message); + if (!normalized.includes('\n')) { + return normalized; + } + const [title, ...bodyLines] = normalized.split('\n'); + if (bodyLines[0] === '') { + return normalized; + } + return `${title}\n\n${bodyLines.join('\n')}`; +} + +export function withMenuOptionSpacing(message: string): string { + if (!message.includes('\n') || message.endsWith('\n')) { + return message; + } + return `${message}\n`; +} + +export function withMenuOptionsSpacing(options: T): T { + return { ...options, message: withMenuOptionSpacing(options.message) }; +} + +export function withMultiselectNavigation(message: string): string { + if (message.includes(MULTISELECT_MENU_NAVIGATION_HINT)) { + return message; + } + return `${message}\n${MULTISELECT_MENU_NAVIGATION_HINT}`; +} + +export function withTextInputNavigation(message: string): string { + const messageWithoutHint = removeTrailingBlankLines(message) + .split('\n') + .filter((line) => line !== TEXT_INPUT_NAVIGATION_HINT) + .join('\n'); + return `${withTextInputBodySpacing(messageWithoutHint)}\n${TEXT_INPUT_NAVIGATION_HINT}\n`; +} diff --git a/packages/cli/src/public-ingest.test.ts b/packages/cli/src/public-ingest.test.ts new file mode 100644 index 00000000..09aa4ad7 --- /dev/null +++ b/packages/cli/src/public-ingest.test.ts @@ -0,0 +1,292 @@ +import { buildDefaultKloProjectConfig, type KloProjectConfig } from '@klo/context/project'; +import { describe, expect, it, vi } from 'vitest'; +import { buildPublicIngestPlan, type KloPublicIngestProject, runKloPublicIngest } from './public-ingest.js'; + +function makeIo(options: { isTTY?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: options.isTTY, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function projectWithConnections(connections: KloProjectConfig['connections']): KloPublicIngestProject { + return { + projectDir: '/tmp/project', + config: { + ...buildDefaultKloProjectConfig('warehouse'), + connections, + }, + }; +} + +describe('buildPublicIngestPlan', () => { + it('plans warehouse connections as scan targets and source connections as source ingest targets', () => { + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + prod_metabase: { driver: 'metabase' }, + docs: { driver: 'notion' }, + }); + + expect(buildPublicIngestPlan(project, { projectDir: '/tmp/project', all: true })).toEqual({ + projectDir: '/tmp/project', + targets: [ + { + connectionId: 'warehouse', + driver: 'postgres', + operation: 'scan', + debugCommand: 'klo scan warehouse --debug', + steps: ['scan'], + }, + { + connectionId: 'docs', + driver: 'notion', + operation: 'source-ingest', + adapter: 'notion', + debugCommand: 'klo dev ingest run --connection-id docs --adapter notion --debug', + steps: ['source-ingest', 'memory-update'], + }, + { + connectionId: 'prod_metabase', + driver: 'metabase', + operation: 'source-ingest', + adapter: 'metabase', + debugCommand: 'klo dev ingest run --connection-id prod_metabase --adapter metabase --debug', + steps: ['source-ingest', 'memory-update'], + }, + ], + }); + }); + + it('rejects bare non-interactive ingest until the interactive confirmation slice exists', () => { + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + + expect(() => buildPublicIngestPlan(project, { projectDir: '/tmp/project', all: false })).toThrow( + 'klo ingest requires or --all in this release', + ); + }); + + it('does not plan PostHog connections as CLI ingest targets', () => { + const project = projectWithConnections({ product: { driver: 'posthog' } }); + + expect(() => + buildPublicIngestPlan(project, { projectDir: '/tmp/project', targetConnectionId: 'product', all: false }), + ).toThrow('Connection "product" uses unsupported public ingest driver "posthog"'); + }); +}); + +describe('runKloPublicIngest', () => { + it('runs all independent targets and reports partial failures', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + prod_metabase: { driver: 'metabase' }, + }); + const runScan = vi.fn(async () => 1); + const runIngest = vi.fn(async () => 0); + + await expect( + runKloPublicIngest( + { command: 'run', projectDir: '/tmp/project', all: true, json: false, inputMode: 'disabled' }, + io.io, + { + loadProject: vi.fn(async () => project), + runScan, + runIngest, + }, + ), + ).resolves.toBe(1); + + expect(runIngest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + connectionId: 'prod_metabase', + adapter: 'metabase', + outputMode: 'plain', + inputMode: 'disabled', + }, + expect.anything(), + ); + expect(runScan).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + expect.anything(), + ); + expect(io.stdout()).toContain('Ingest finished with partial failures'); + expect(io.stdout()).toContain('warehouse failed at scan.'); + expect(io.stdout()).toContain('Debug: klo scan warehouse --debug'); + }); + + it('can request enriched relationship scans for setup-managed context builds', async () => { + const io = makeIo(); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const runScan = vi.fn(async () => 0); + + await expect( + runKloPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + all: true, + json: false, + inputMode: 'disabled', + scanMode: 'enriched', + detectRelationships: true, + }, + io.io, + { + loadProject: vi.fn(async () => project), + runScan, + }, + ), + ).resolves.toBe(0); + + expect(runScan).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: true, + dryRun: false, + }, + io.io, + ); + }); + + it('prints stable JSON results', async () => { + const io = makeIo(); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + + await expect( + runKloPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: true, + inputMode: 'disabled', + }, + io.io, + { + loadProject: vi.fn(async () => project), + runScan: vi.fn(async () => 0), + }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + plan: { projectDir: '/tmp/project' }, + results: [{ connectionId: 'warehouse', driver: 'postgres' }], + }); + }); + + it('passes dbt source_dir from connection config to runKloIngest', async () => { + const runIngest = vi.fn(async () => 0); + const io = makeIo(); + + await expect( + runKloPublicIngest( + { + command: 'run', + projectDir: '/tmp/klo', + targetConnectionId: 'analytics_dbt', + all: false, + json: false, + inputMode: 'disabled', + }, + io.io, + { + loadProject: async () => + ({ + projectDir: '/tmp/klo', + config: { + connections: { + analytics_dbt: { + driver: 'dbt', + source_dir: '/repo/dbt', + }, + }, + }, + }) as never, + runIngest, + }, + ), + ).resolves.toBe(0); + + expect(runIngest).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + connectionId: 'analytics_dbt', + adapter: 'dbt', + sourceDir: '/repo/dbt', + }), + io.io, + ); + }); + + it('routes public status and watch to the ingest status renderer', async () => { + const runIngest = vi.fn(async () => 0); + const statusIo = makeIo(); + const watchIo = makeIo(); + + await expect( + runKloPublicIngest( + { command: 'status', projectDir: '/tmp/klo', json: false, inputMode: 'disabled' }, + statusIo.io, + { runIngest }, + ), + ).resolves.toBe(0); + await expect( + runKloPublicIngest( + { command: 'watch', projectDir: '/tmp/klo', runId: 'run-1', json: false, inputMode: 'auto' }, + watchIo.io, + { runIngest }, + ), + ).resolves.toBe(0); + + expect(runIngest).toHaveBeenNthCalledWith( + 1, + { + command: 'status', + projectDir: '/tmp/klo', + outputMode: 'plain', + inputMode: 'disabled', + }, + statusIo.io, + ); + expect(runIngest).toHaveBeenNthCalledWith( + 2, + { + command: 'watch', + projectDir: '/tmp/klo', + runId: 'run-1', + outputMode: 'viz', + inputMode: 'auto', + }, + watchIo.io, + ); + }); +}); diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts new file mode 100644 index 00000000..d3aa6bc3 --- /dev/null +++ b/packages/cli/src/public-ingest.ts @@ -0,0 +1,315 @@ +import { type KloLocalProject, type KloProjectConnectionConfig, loadKloProject } from '@klo/context/project'; +import type { KloCliIo } from './index.js'; +import type { KloIngestArgs } from './ingest.js'; +import type { KloScanArgs } from './scan.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:public-ingest'); + +export type KloPublicIngestStepName = 'scan' | 'source-ingest' | 'enrich' | 'memory-update'; +export type KloPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run'; +export type KloPublicIngestInputMode = 'auto' | 'disabled'; + +export type KloPublicIngestArgs = + | { + command: 'run'; + projectDir: string; + targetConnectionId?: string; + all: boolean; + json: boolean; + inputMode: KloPublicIngestInputMode; + scanMode?: Extract['mode']; + detectRelationships?: boolean; + } + | { + command: 'status' | 'watch'; + projectDir: string; + runId?: string; + json: boolean; + inputMode: KloPublicIngestInputMode; + }; + +export interface KloPublicIngestPlanTarget { + connectionId: string; + driver: string; + operation: 'scan' | 'source-ingest'; + adapter?: string; + sourceDir?: string; + debugCommand: string; + steps: KloPublicIngestStepName[]; +} + +export interface KloPublicIngestPlan { + projectDir: string; + targets: KloPublicIngestPlanTarget[]; +} + +export interface KloPublicIngestTargetResult { + connectionId: string; + driver: string; + steps: Array<{ + operation: KloPublicIngestStepName; + status: KloPublicIngestStepStatus; + detail?: string; + debugCommand?: string; + }>; +} + +export type KloPublicIngestProject = Pick; + +export interface KloPublicIngestDeps { + loadProject?: (options: Parameters[0]) => Promise; + runScan?: (args: KloScanArgs, io: KloCliIo) => Promise; + runIngest?: (args: KloIngestArgs, io: KloCliIo) => Promise; +} + +const sourceAdapterByDriver = new Map([ + ['metabase', 'metabase'], + ['local_metabase', 'metabase'], + ['looker', 'looker'], + ['local_looker', 'looker'], + ['notion', 'notion'], + ['metricflow', 'metricflow'], + ['dbt', 'dbt'], + ['lookml', 'lookml'], +]); + +const warehouseDrivers = new Set([ + 'sqlite', + 'postgres', + 'postgresql', + 'mysql', + 'clickhouse', + 'sqlserver', + 'bigquery', + 'snowflake', +]); + +function normalizedDriver(connection: KloProjectConnectionConfig): string { + return String(connection.driver ?? '') + .trim() + .toLowerCase(); +} + +function sourceDirForConnection(connection: KloProjectConnectionConfig): string | undefined { + const value = connection.source_dir ?? connection.sourceDir; + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; +} + +function targetForConnection(connectionId: string, connection: KloProjectConnectionConfig): KloPublicIngestPlanTarget { + const driver = normalizedDriver(connection); + const adapter = sourceAdapterByDriver.get(driver); + const sourceDir = sourceDirForConnection(connection); + if (adapter) { + return { + connectionId, + driver, + operation: 'source-ingest', + adapter, + ...(sourceDir ? { sourceDir } : {}), + debugCommand: `klo dev ingest run --connection-id ${connectionId} --adapter ${adapter} --debug`, + steps: ['source-ingest', 'memory-update'], + }; + } + + if (warehouseDrivers.has(driver)) { + return { + connectionId, + driver, + operation: 'scan', + debugCommand: `klo scan ${connectionId} --debug`, + steps: ['scan'], + }; + } + + throw new Error(`Connection "${connectionId}" uses unsupported public ingest driver "${driver || 'unknown'}"`); +} + +export function buildPublicIngestPlan( + project: KloPublicIngestProject, + args: { projectDir: string; targetConnectionId?: string; all: boolean }, +): KloPublicIngestPlan { + if (!args.all && !args.targetConnectionId) { + throw new Error('klo ingest requires or --all in this release'); + } + + const entries = Object.entries(project.config.connections).sort(([a], [b]) => a.localeCompare(b)); + const selected = args.all ? entries : entries.filter(([connectionId]) => connectionId === args.targetConnectionId); + + if (!args.all && selected.length === 0) { + throw new Error(`Connection "${args.targetConnectionId}" is not configured in klo.yaml`); + } + if (selected.length === 0) { + throw new Error('No configured connections are eligible for ingest'); + } + + const targets = selected.map(([connectionId, connection]) => targetForConnection(connectionId, connection)); + return { + projectDir: args.projectDir, + targets: [...targets.filter((t) => t.operation === 'scan'), ...targets.filter((t) => t.operation === 'source-ingest')], + }; +} + +function defaultSteps(target: KloPublicIngestPlanTarget): KloPublicIngestTargetResult['steps'] { + return [ + { + operation: 'scan', + status: target.steps.includes('scan') ? 'not-run' : 'skipped', + ...(target.operation === 'scan' ? { debugCommand: target.debugCommand } : {}), + }, + { + operation: 'source-ingest', + status: target.steps.includes('source-ingest') ? 'not-run' : 'skipped', + ...(target.operation === 'source-ingest' ? { debugCommand: target.debugCommand } : {}), + }, + { operation: 'enrich', status: 'skipped' }, + { + operation: 'memory-update', + status: target.steps.includes('memory-update') ? 'not-run' : 'skipped', + ...(target.operation === 'source-ingest' ? { debugCommand: target.debugCommand } : {}), + }, + ]; +} + +function markTargetResult(target: KloPublicIngestPlanTarget, status: 'done' | 'failed'): KloPublicIngestTargetResult { + const failedOperation = target.operation === 'scan' ? 'scan' : 'source-ingest'; + return { + connectionId: target.connectionId, + driver: target.driver, + steps: defaultSteps(target).map((step) => { + if (!target.steps.includes(step.operation)) { + return step; + } + if (status === 'done') { + return { ...step, status: 'done' }; + } + if (step.operation === failedOperation) { + return { ...step, status: 'failed', detail: `${target.connectionId} failed at ${failedOperation}.` }; + } + return { ...step, status: 'not-run' }; + }), + }; +} + +function resultFailed(result: KloPublicIngestTargetResult): boolean { + return result.steps.some((step) => step.status === 'failed'); +} + +function stepStatus(result: KloPublicIngestTargetResult, operation: KloPublicIngestStepName): string { + return result.steps.find((step) => step.operation === operation)?.status ?? 'not-run'; +} + +function renderPlainResults(results: KloPublicIngestTargetResult[], io: KloCliIo): void { + const failures = results.filter(resultFailed); + io.stdout.write(failures.length > 0 ? 'Ingest finished with partial failures\n' : 'Ingest finished\n'); + io.stdout.write('\n'); + io.stdout.write('Source Scan Source ingest Enrich Memory update\n'); + for (const result of results) { + io.stdout.write( + `${result.connectionId.padEnd(14)} ${stepStatus(result, 'scan').padEnd(9)} ${stepStatus( + result, + 'source-ingest', + ).padEnd(14)} ${stepStatus(result, 'enrich').padEnd(8)} ${stepStatus(result, 'memory-update')}\n`, + ); + } + + if (failures.length === 0) { + return; + } + + io.stdout.write('\nFailed sources:\n'); + for (const result of failures) { + const failedStep = result.steps.find((step) => step.status === 'failed'); + if (!failedStep) { + continue; + } + io.stdout.write(` ${failedStep.detail ?? `${result.connectionId} failed.`}\n`); + if (failedStep.debugCommand) { + io.stdout.write(` Debug: ${failedStep.debugCommand}\n`); + } + } +} + +function hasInteractiveInput(io: KloCliIo): boolean { + const stdin = (io as { stdin?: { isTTY?: boolean; setRawMode?: (value: boolean) => void } }).stdin; + return stdin?.isTTY === true && typeof stdin.setRawMode === 'function'; +} + +function sourceIngestOutputMode(args: Extract, io: KloCliIo): 'plain' | 'viz' { + return args.inputMode === 'auto' && io.stdout.isTTY === true && hasInteractiveInput(io) ? 'viz' : 'plain'; +} + +export async function executePublicIngestTarget( + target: KloPublicIngestPlanTarget, + args: Extract, + io: KloCliIo, + deps: KloPublicIngestDeps, +): Promise { + if (target.operation === 'scan') { + const { runKloScan } = await import('./scan.js'); + const exitCode = await (deps.runScan ?? runKloScan)( + { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + mode: args.scanMode ?? 'structural', + detectRelationships: args.detectRelationships ?? false, + dryRun: false, + }, + io, + ); + return markTargetResult(target, exitCode === 0 ? 'done' : 'failed'); + } + + const { runKloIngest } = await import('./ingest.js'); + const exitCode = await (deps.runIngest ?? runKloIngest)( + { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + adapter: target.adapter ?? target.driver, + ...(target.sourceDir ? { sourceDir: target.sourceDir } : {}), + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + }, + io, + ); + return markTargetResult(target, exitCode === 0 ? 'done' : 'failed'); +} + +export async function runKloPublicIngest( + args: KloPublicIngestArgs, + io: KloCliIo, + deps: KloPublicIngestDeps = {}, +): Promise { + if (args.command !== 'run') { + const { runKloIngest } = await import('./ingest.js'); + return await (deps.runIngest ?? runKloIngest)( + { + command: args.command, + projectDir: args.projectDir, + ...(args.runId ? { runId: args.runId } : {}), + outputMode: args.json ? 'json' : args.command === 'watch' ? 'viz' : 'plain', + inputMode: args.inputMode, + }, + io, + ); + } + + const loadProject = deps.loadProject ?? loadKloProject; + const project = await loadProject({ projectDir: args.projectDir }); + const plan = buildPublicIngestPlan(project, args); + const results: KloPublicIngestTargetResult[] = []; + + for (const target of plan.targets) { + results.push(await executePublicIngestTarget(target, args, io, deps)); + } + + if (args.json) { + io.stdout.write(`${JSON.stringify({ plan, results }, null, 2)}\n`); + } else { + renderPlainResults(results, io); + } + + return results.some(resultFailed) ? 1 : 0; +} diff --git a/packages/cli/src/scan.test.ts b/packages/cli/src/scan.test.ts new file mode 100644 index 00000000..256f98ed --- /dev/null +++ b/packages/cli/src/scan.test.ts @@ -0,0 +1,2151 @@ +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { initKloProject } from '@klo/context/project'; +import type { + ApplyLocalScanRelationshipReviewDecisionsResult, + ExportLocalRelationshipFeedbackLabelsResult, + KloRelationshipFeedbackCalibrationReport, + KloRelationshipThresholdAdviceReport, + KloScanReport, + LocalScanRunResult, + LocalScanStatusResponse, + ReadLocalScanRelationshipArtifactsResult, + RunLocalScanOptions, + WriteLocalScanRelationshipReviewDecisionResult, +} from '@klo/context/scan'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { createCliScanProgress, runKloScan } from './scan.js'; + +const sqlServerExtractSchema = vi.hoisted(() => + vi.fn(async (connectionId: string) => ({ + connectionId, + extractedAt: '2026-04-29T16:00:00.000Z', + metadata: { database: 'analytics' }, + tables: [ + { + catalog: 'analytics', + db: 'dbo', + name: 'orders', + columns: [{ name: 'id', type: 'int', nullable: false, primaryKey: true }], + foreignKeys: [], + }, + ], + })), +); +const createSqlServerLiveDatabaseIntrospection = vi.hoisted(() => + vi.fn(() => ({ extractSchema: sqlServerExtractSchema })), +); +const isKloSqlServerConnectionConfig = vi.hoisted(() => + vi.fn((connection: { driver?: string } | undefined) => connection?.driver === 'sqlserver'), +); +const KloSqlServerScanConnector = vi.hoisted( + () => + class { + readonly id: string; + readonly driver = 'sqlserver'; + + constructor(options: { connectionId: string }) { + this.id = `sqlserver:${options.connectionId}`; + } + }, +); +const bigQueryExtractSchema = vi.hoisted(() => + vi.fn(async (connectionId: string) => ({ + connectionId, + extractedAt: '2026-04-29T17:00:00.000Z', + metadata: { project_id: 'project-1', datasets: ['analytics'] }, + tables: [ + { + catalog: 'project-1', + db: 'analytics', + name: 'orders', + columns: [{ name: 'id', type: 'INT64', nullable: false, primaryKey: true }], + foreignKeys: [], + }, + ], + })), +); +const createBigQueryLiveDatabaseIntrospection = vi.hoisted(() => + vi.fn(() => ({ extractSchema: bigQueryExtractSchema })), +); +const isKloBigQueryConnectionConfig = vi.hoisted(() => + vi.fn((connection: { driver?: string } | undefined) => connection?.driver === 'bigquery'), +); +const KloBigQueryScanConnector = vi.hoisted( + () => + class { + readonly id: string; + readonly driver = 'bigquery'; + + constructor(options: { connectionId: string }) { + this.id = `bigquery:${options.connectionId}`; + } + }, +); +const snowflakeExtractSchema = vi.hoisted(() => + vi.fn(async (connectionId: string) => ({ + connectionId, + extractedAt: '2026-04-29T18:00:00.000Z', + metadata: { database: 'ANALYTICS', schemas: ['PUBLIC'] }, + tables: [ + { + catalog: 'ANALYTICS', + db: 'PUBLIC', + name: 'ORDERS', + columns: [{ name: 'ID', type: 'NUMBER', nullable: false, primaryKey: true }], + foreignKeys: [], + }, + ], + })), +); +const createSnowflakeLiveDatabaseIntrospection = vi.hoisted(() => + vi.fn(() => ({ extractSchema: snowflakeExtractSchema })), +); +const isKloSnowflakeConnectionConfig = vi.hoisted(() => + vi.fn((connection: { driver?: string } | undefined) => connection?.driver === 'snowflake'), +); +const KloSnowflakeScanConnector = vi.hoisted( + () => + class { + readonly id: string; + readonly driver = 'snowflake'; + + constructor(options: { connectionId: string }) { + this.id = `snowflake:${options.connectionId}`; + } + }, +); +const postgresExtractSchema = vi.hoisted(() => + vi.fn(async (connectionId: string) => ({ + connectionId, + extractedAt: '2026-04-29T12:00:00.000Z', + metadata: { database: 'analytics' }, + tables: [], + })), +); +const createPostgresLiveDatabaseIntrospection = vi.hoisted(() => + vi.fn(() => ({ extractSchema: postgresExtractSchema })), +); +const isKloPostgresConnectionConfig = vi.hoisted(() => + vi.fn((connection: { driver?: string } | undefined) => + ['postgres', 'postgresql'].includes(String(connection?.driver ?? '').toLowerCase()), + ), +); +const KloPostgresScanConnector = vi.hoisted( + () => + class { + readonly id: string; + readonly driver = 'postgres'; + + constructor(options: { connectionId: string }) { + this.id = `postgres:${options.connectionId}`; + } + }, +); + +vi.mock('@klo/connector-sqlserver', () => ({ + createSqlServerLiveDatabaseIntrospection, + isKloSqlServerConnectionConfig, + KloSqlServerScanConnector, +})); + +vi.mock('@klo/connector-bigquery', () => ({ + createBigQueryLiveDatabaseIntrospection, + isKloBigQueryConnectionConfig, + KloBigQueryScanConnector, +})); + +vi.mock('@klo/connector-snowflake', () => ({ + createSnowflakeLiveDatabaseIntrospection, + isKloSnowflakeConnectionConfig, + KloSnowflakeScanConnector, +})); + +vi.mock('@klo/connector-postgres', () => ({ + createPostgresLiveDatabaseIntrospection, + isKloPostgresConnectionConfig, + KloPostgresScanConnector, +})); + +function makeIo(options: { isTTY?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: options.isTTY, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +const report: KloScanReport = { + connectionId: 'warehouse', + driver: 'postgres', + syncId: 'sync-1', + runId: 'scan-run-1', + trigger: 'cli', + mode: 'structural', + dryRun: false, + artifactPaths: { + rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1', + reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + manifestShards: [], + enrichmentArtifacts: [], + }, + diffSummary: { + tablesAdded: 1, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 0, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 0, + structuralSyncStats: { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, + }, + enrichment: { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'skipped', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', + }, + capabilityGaps: [], + warnings: [], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + enrichmentState: { + resumedStages: [], + completedStages: [], + failedStages: [], + }, + createdAt: '2026-04-29T09:00:00.000Z', +}; + +const reportWithAttention: KloScanReport = { + ...report, + mode: 'relationships', + diffSummary: { + tablesAdded: 3, + tablesModified: 2, + tablesDeleted: 0, + tablesUnchanged: 13, + columnsAdded: 18, + columnsModified: 5, + columnsDeleted: 0, + }, + capabilityGaps: ['columnStats'], + warnings: [ + { + code: 'connector_capability_missing', + message: 'KLO scan connector is missing optional capability: columnStats', + recoverable: true, + metadata: { capability: 'columnStats' }, + }, + { + code: 'relationship_validation_failed', + message: 'Could not validate relationship orders.customer_id -> customers.id', + table: 'orders', + column: 'customer_id', + recoverable: true, + }, + ], + relationships: { accepted: 7, review: 3, rejected: 2, skipped: 4 }, + enrichmentState: { + resumedStages: ['relationships'], + completedStages: ['descriptions', 'relationships'], + failedStages: [], + }, + artifactPaths: { + ...report.artifactPaths, + manifestShards: ['raw-sources/warehouse/live-database/sync-1/_schema/shard-000.json'], + enrichmentArtifacts: ['raw-sources/warehouse/live-database/sync-1/_enrichment/relationships.json'], + }, +}; + +describe('runKloScan', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-scan-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('runs structural scans and prints a dev-friendly plain summary', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }), + ); + const io = makeIo(); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith( + expect.objectContaining({ + connectionId: 'warehouse', + mode: 'structural', + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + connector: undefined, + }), + ); + expect(io.stdout()).toContain('KLO scan completed\n'); + expect(io.stdout()).toContain('Run: scan-run-1'); + expect(io.stdout()).toContain('Mode: structural'); + expect(io.stdout()).toContain('What changed\n'); + expect(io.stdout()).toContain('New tables: 1\n'); + expect(io.stdout()).toContain('Changed tables: 0\n'); + expect(io.stdout()).toContain('Removed tables: 0\n'); + expect(io.stdout()).toContain('Unchanged tables: 0\n'); + expect(io.stdout()).toContain('Needs attention\n None\n'); + expect(io.stdout()).toContain('Artifacts\n'); + expect(io.stdout()).toContain('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json'); + expect(io.stdout()).toContain('Next:\n'); + expect(io.stdout()).toContain('klo dev scan status --project-dir '); + expect(io.stdout()).toContain(' scan-run-1\n'); + expect(io.stdout()).toContain('klo dev scan report --project-dir '); + expect(io.stdout()).toContain(' scan-run-1\n'); + expect(io.stdout()).not.toContain('\u001b['); + expect(io.stdout()).not.toContain('✓'); + expect(io.stdout()).not.toContain('+1'); + expect(io.stdout()).not.toContain('/~'); + }); + + it('explains warnings, capability gaps, and relationships in human scan summaries', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'relationships', + dryRun: false, + syncId: 'sync-1', + report: reportWithAttention, + }), + ); + const io = makeIo(); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Semantic layer comparison found 5 changes across 18 tables'); + expect(io.stdout()).toContain('New columns: 18'); + expect(io.stdout()).toContain('Changed columns: 5'); + expect(io.stdout()).toContain('Relationships\n'); + expect(io.stdout()).toContain('Accepted: 7'); + expect(io.stdout()).toContain('Review: 3'); + expect(io.stdout()).toContain('Rejected: 2'); + expect(io.stdout()).toContain('Skipped: 4'); + expect(io.stdout()).toContain('Needs attention\n'); + expect(io.stdout()).toContain('2 warnings'); + expect(io.stdout()).toContain('1 capability gap'); + expect(io.stdout()).toContain('columnStats is unavailable; relationship confidence may be lower.'); + expect(io.stdout()).toContain( + 'relationship_validation_failed: orders.customer_id: Could not validate relationship orders.customer_id -> customers.id', + ); + expect(io.stdout()).not.toContain('+3'); + expect(io.stdout()).not.toContain('~2'); + expect(io.stdout()).not.toContain('=13'); + }); + + it('prints review-only relationship summaries and validation capability warnings', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const reviewOnlyReport: KloScanReport = { + ...reportWithAttention, + capabilityGaps: [], + warnings: [ + { + code: 'connector_capability_missing', + message: 'KLO scan connector cannot run read-only SQL relationship validation', + recoverable: true, + metadata: { capability: 'readOnlySql' }, + }, + ], + relationships: { accepted: 0, review: 12, rejected: 44, skipped: 0 }, + }; + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-review', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'relationships', + dryRun: false, + syncId: 'sync-review', + report: reviewOnlyReport, + }), + ); + const io = makeIo(); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Relationships'); + expect(io.stdout()).toContain('Accepted: 0'); + expect(io.stdout()).toContain('Review: 12'); + expect(io.stdout()).toContain('Rejected: 44'); + expect(io.stdout()).toContain( + 'connector_capability_missing: KLO scan connector cannot run read-only SQL relationship validation', + ); + }); + + it('passes a scan progress port and prints TTY progress messages', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise => { + await input.progress?.update(0.15, 'Inspecting database schema'); + await input.progress?.update(0.55, 'Semantic layer comparison found 5 changes across 18 tables'); + return { + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'relationships', + dryRun: false, + syncId: 'sync-1', + report: reportWithAttention, + }; + }); + const io = makeIo({ isTTY: true }); + const previousCi = process.env.CI; + delete process.env.CI; + + try { + const exitCode = await runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ); + expect({ exitCode, stderr: io.stderr() }).toEqual({ exitCode: 0, stderr: '' }); + } finally { + if (previousCi === undefined) { + delete process.env.CI; + } else { + process.env.CI = previousCi; + } + } + + expect(runLocalScan.mock.calls[0]?.[0].progress).toBeDefined(); + expect(io.stdout()).toContain('[15%] Inspecting database schema'); + expect(io.stdout()).toContain('[55%] Semantic layer comparison found 5 changes across 18 tables'); + }); + + it('updates transient TTY progress messages in place', async () => { + const io = makeIo({ isTTY: true }); + const previousCi = process.env.CI; + delete process.env.CI; + + try { + const progress = createCliScanProgress(io.io); + await progress.update(0.84, 'Generating descriptions 1/35 tables', { transient: true }); + await progress.update(0.85, 'Generating descriptions 2/35 tables', { transient: true }); + await progress.update(0.9, 'Building embeddings 1/4 batches'); + } finally { + if (previousCi === undefined) { + delete process.env.CI; + } else { + process.env.CI = previousCi; + } + } + + expect(io.stdout()).toContain('\r[84%] Generating descriptions 1/35 tables'); + expect(io.stdout()).toContain('\r[85%] Generating descriptions 2/35 tables'); + expect(io.stdout()).toContain('\n[90%] Building embeddings 1/4 batches\n'); + }); + + it('flushes transient TTY progress messages before printing scan failures', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise => { + await input.progress?.update(0.42, 'Generating descriptions 3/35 tables', { transient: true }); + throw new Error('scan failed'); + }); + const io = makeIo({ isTTY: true }); + const previousCi = process.env.CI; + delete process.env.CI; + + try { + await expect( + runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan, createLocalIngestAdapters: () => [] }, + ), + ).resolves.toBe(1); + } finally { + if (previousCi === undefined) { + delete process.env.CI; + } else { + process.env.CI = previousCi; + } + } + + expect(io.stdout()).toContain('\r[42%] Generating descriptions 3/35 tables\u001b[K\n'); + expect(io.stderr()).toBe('scan failed\n'); + }); + + it('does not print live progress messages for non-TTY output', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise => { + await input.progress?.update(0.15, 'Inspecting database schema'); + return { + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }; + }); + const io = makeIo(); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).not.toContain('[15%]'); + expect(io.stdout()).not.toContain('Inspecting database schema'); + }); + + it('uses terminal-aware visual styling only for TTY output', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }), + ); + const io = makeIo({ isTTY: true }); + const previousNoColor = process.env.NO_COLOR; + const previousCi = process.env.CI; + const previousTerm = process.env.TERM; + delete process.env.NO_COLOR; + delete process.env.CI; + process.env.TERM = 'xterm-256color'; + + try { + await expect( + runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + } finally { + if (previousNoColor === undefined) { + delete process.env.NO_COLOR; + } else { + process.env.NO_COLOR = previousNoColor; + } + if (previousCi === undefined) { + delete process.env.CI; + } else { + process.env.CI = previousCi; + } + if (previousTerm === undefined) { + delete process.env.TERM; + } else { + process.env.TERM = previousTerm; + } + } + + expect(io.stdout()).toContain('✓'); + expect(io.stdout()).toContain('KLO scan completed'); + expect(io.stdout()).toContain('\u001b['); + }); + + it('honors NO_COLOR for TTY scan summaries', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }), + ); + const io = makeIo({ isTTY: true }); + const previousNoColor = process.env.NO_COLOR; + process.env.NO_COLOR = '1'; + + try { + await expect( + runKloScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + } finally { + if (previousNoColor === undefined) { + delete process.env.NO_COLOR; + } else { + process.env.NO_COLOR = previousNoColor; + } + } + + expect(io.stdout()).toContain('KLO scan completed'); + expect(io.stdout()).not.toContain('\u001b['); + }); + + it('prints status and human report output by default', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const status: LocalScanStatusResponse = { + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + progress: 1, + startedAt: '2026-04-29T09:00:00.000Z', + completedAt: '2026-04-29T09:00:01.000Z', + reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + warnings: [], + }; + const io = makeIo(); + + await expect( + runKloScan({ command: 'status', projectDir: tempDir, runId: 'scan-run-1' }, io.io, { + getLocalScanStatus: vi.fn().mockResolvedValue(status), + }), + ).resolves.toBe(0); + expect(io.stdout()).toContain('Run: scan-run-1'); + expect(io.stdout()).toContain('Status: done'); + + const reportIo = makeIo(); + await expect( + runKloScan({ command: 'report', projectDir: tempDir, runId: 'scan-run-1', json: false }, reportIo.io, { + getLocalScanReport: vi.fn().mockResolvedValue(report), + }), + ).resolves.toBe(0); + expect(reportIo.stdout()).toContain('KLO scan report\n'); + expect(reportIo.stdout()).toContain('Run: scan-run-1'); + expect(reportIo.stdout()).toContain('What changed\n'); + expect(() => JSON.parse(reportIo.stdout())).toThrow(); + }); + + it('prints raw report JSON when requested', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const reportIo = makeIo(); + + await expect( + runKloScan({ command: 'report', projectDir: tempDir, runId: 'scan-run-1', json: true }, reportIo.io, { + getLocalScanReport: vi.fn().mockResolvedValue(report), + }), + ).resolves.toBe(0); + + expect(JSON.parse(reportIo.stdout())).toMatchObject({ runId: 'scan-run-1', connectionId: 'warehouse' }); + }); + + it('prints review relationship artifacts in human form', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const reviewReport: KloScanReport = { + ...reportWithAttention, + runId: 'scan-run-review', + syncId: 'sync-review', + relationships: { accepted: 0, review: 1, rejected: 1, skipped: 0 }, + artifactPaths: { + ...reportWithAttention.artifactPaths, + reportPath: 'raw-sources/warehouse/live-database/sync-review/scan-report.json', + enrichmentArtifacts: [ + 'raw-sources/warehouse/live-database/sync-review/enrichment/relationships.json', + 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-diagnostics.json', + 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-profile.json', + ], + }, + }; + const relationshipArtifacts: ReadLocalScanRelationshipArtifactsResult = { + runId: 'scan-run-review', + connectionId: 'warehouse', + syncId: 'sync-review', + report: reviewReport, + relationships: { + connectionId: 'warehouse', + accepted: [], + review: [ + { + id: 'orders:orders.customer_id->customers:customers.id', + status: 'review', + source: 'deterministic_name', + from: { + tableId: 'orders', + columnIds: ['orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'customers', + columnIds: ['customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + score: 0.62, + evidence: { sources: ['table_suffix'] }, + validation: { status: 'unavailable' }, + graph: { reasons: ['validation_unavailable_review_only'] }, + reasons: ['validation_unavailable_review_only', 'fk_score_review'], + }, + ], + rejected: [ + { + id: 'orders:orders.note_id->notes:notes.id', + status: 'rejected', + source: 'deterministic_name', + from: { + tableId: 'orders', + columnIds: ['orders.note_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['note_id'], + }, + to: { + tableId: 'notes', + columnIds: ['notes.id'], + table: { catalog: null, db: 'public', name: 'notes' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.2, + pkScore: 0.4, + fkScore: 0.2, + score: 0.2, + evidence: { sources: ['exact_column_match'] }, + validation: { status: 'failed' }, + graph: { reasons: ['low_source_coverage'] }, + reasons: ['low_source_coverage'], + }, + ], + skipped: [], + }, + diagnostics: { + connectionId: 'warehouse', + generatedAt: '2026-05-07T10:00:00.000Z', + summary: { accepted: 0, review: 1, rejected: 1, skipped: 0 }, + noAcceptedReason: 'relationship candidates require review before manifest writes', + candidateCountsBySource: { deterministic_name: 2 }, + validation: { available: false, sqlAvailable: false, queryCount: 0 }, + thresholds: { acceptThreshold: 0.85, reviewThreshold: 0.55 }, + policy: { + validationRequiredForManifest: true, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + warnings: [], + profileWarnings: [], + }, + profile: { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: false, + tables: [], + columns: {}, + queryCount: 0, + warnings: ['KLO scan connector cannot run read-only SQL relationship validation'], + }, + paths: { + relationships: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationships.json', + diagnostics: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-diagnostics.json', + profile: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-profile.json', + }, + }; + const readLocalScanRelationshipArtifacts = vi.fn(async () => relationshipArtifacts); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationships', + projectDir: tempDir, + runId: 'scan-run-review', + status: 'review', + json: false, + limit: 10, + }, + io.io, + { readLocalScanRelationshipArtifacts }, + ), + ).resolves.toBe(0); + + expect(readLocalScanRelationshipArtifacts).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + 'scan-run-review', + ); + + expect(io.stdout()).toContain('KLO relationship artifacts'); + expect(io.stdout()).toContain('Run: scan-run-review'); + expect(io.stdout()).toContain('Summary: accepted=0 review=1 rejected=1 skipped=0'); + expect(io.stdout()).toContain('Reason: relationship candidates require review before manifest writes'); + expect(io.stdout()).toContain('Review relationships (1)'); + expect(io.stdout()).toContain('orders.customer_id -> customers.id'); + expect(io.stdout()).toContain( + 'type=many_to_one source=deterministic_name confidence=0.62 pkScore=0.91 fkScore=0.62', + ); + expect(io.stdout()).toContain('reasons=validation_unavailable_review_only, fk_score_review'); + expect(io.stdout()).toContain('relationships.json'); + }); + + it('prints filtered relationship artifacts as JSON', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const jsonReport: KloScanReport = { + ...reportWithAttention, + runId: 'scan-run-json', + syncId: 'sync-json', + artifactPaths: { + ...reportWithAttention.artifactPaths, + reportPath: 'raw-sources/warehouse/live-database/sync-json/scan-report.json', + enrichmentArtifacts: ['raw-sources/warehouse/live-database/sync-json/enrichment/relationships.json'], + }, + }; + const relationshipArtifacts: ReadLocalScanRelationshipArtifactsResult = { + runId: 'scan-run-json', + connectionId: 'warehouse', + syncId: 'sync-json', + report: jsonReport, + relationships: { + connectionId: 'warehouse', + accepted: [], + review: [], + rejected: [], + skipped: [{ relationshipId: 'composite:orders', reason: 'composite_key_width_limit' }], + }, + diagnostics: null, + profile: null, + paths: { + relationships: 'raw-sources/warehouse/live-database/sync-json/enrichment/relationships.json', + diagnostics: null, + profile: null, + }, + }; + const readLocalScanRelationshipArtifacts = vi.fn(async () => relationshipArtifacts); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationships', + projectDir: tempDir, + runId: 'scan-run-json', + status: 'skipped', + json: true, + limit: 25, + }, + io.io, + { readLocalScanRelationshipArtifacts }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + runId: 'scan-run-json', + connectionId: 'warehouse', + status: 'skipped', + relationships: { + accepted: [], + review: [], + rejected: [], + skipped: [{ relationshipId: 'composite:orders', reason: 'composite_key_width_limit' }], + }, + }); + }); + + it('records an accepted relationship review decision in human form', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const decisionResult: WriteLocalScanRelationshipReviewDecisionResult = { + path: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', + decision: { + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-review', + syncId: 'sync-review', + decidedAt: '2026-05-07T12:00:00.000Z', + reviewer: 'Andrey', + note: 'Looks right', + from: { + tableId: 'orders', + columnIds: ['orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'customers', + columnIds: ['customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + source: 'deterministic_name', + score: 0.62, + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + reasons: ['fk_score_review'], + }, + artifact: { + connectionId: 'warehouse', + runId: 'scan-run-review', + syncId: 'sync-review', + generatedAt: '2026-05-07T12:00:00.000Z', + decisions: [], + }, + }; + const writeLocalScanRelationshipReviewDecision = vi.fn(async () => decisionResult); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipDecision', + projectDir: tempDir, + runId: 'scan-run-review', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + reviewer: 'Andrey', + note: 'Looks right', + json: false, + }, + io.io, + { writeLocalScanRelationshipReviewDecision }, + ), + ).resolves.toBe(0); + + expect(writeLocalScanRelationshipReviewDecision).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + { + runId: 'scan-run-review', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + reviewer: 'Andrey', + note: 'Looks right', + }, + ); + expect(io.stdout()).toContain('Recorded relationship decision'); + expect(io.stdout()).toContain('Decision: accepted'); + expect(io.stdout()).toContain('Candidate: orders:orders.customer_id->customers:customers.id'); + expect(io.stdout()).toContain('Previous status: review'); + expect(io.stdout()).toContain( + 'Path: raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', + ); + }); + + it('records a rejected relationship review decision as JSON', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const decisionResult: WriteLocalScanRelationshipReviewDecisionResult = { + path: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', + decision: { + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'rejected', + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-review', + syncId: 'sync-review', + decidedAt: '2026-05-07T12:00:00.000Z', + reviewer: 'Andrey', + note: null, + from: { + tableId: 'orders', + columnIds: ['orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'customers', + columnIds: ['customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + source: 'deterministic_name', + score: 0.62, + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + reasons: ['fk_score_review'], + }, + artifact: { + connectionId: 'warehouse', + runId: 'scan-run-review', + syncId: 'sync-review', + generatedAt: '2026-05-07T12:00:00.000Z', + decisions: [], + }, + }; + const writeLocalScanRelationshipReviewDecision = vi.fn(async () => decisionResult); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipDecision', + projectDir: tempDir, + runId: 'scan-run-review', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'rejected', + reviewer: 'klo', + note: null, + json: true, + }, + io.io, + { writeLocalScanRelationshipReviewDecision }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + path: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', + decision: { + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'rejected', + previousStatus: 'review', + }, + }); + }); + + it('reports missing scan runs when recording relationship decisions', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const writeLocalScanRelationshipReviewDecision = vi.fn(async () => null); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipDecision', + projectDir: tempDir, + runId: 'missing-run', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + reviewer: 'klo', + note: null, + json: false, + }, + io.io, + { writeLocalScanRelationshipReviewDecision }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('Scan run "missing-run" was not found'); + }); + + it('applies accepted relationship review decisions with human output', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const applyLocalScanRelationshipReviewDecisions = vi.fn( + async (): Promise => ({ + runId: 'scan-run-a', + connectionId: 'warehouse', + syncId: 'sync-a', + dryRun: true, + decisionsPath: 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json', + selectedDecisions: 1, + appliedRelationships: 1, + relationships: [ + { + id: 'orders:orders.customer_id->customers:customers.id', + source: 'manual', + from: { + tableId: 'public.orders', + columnIds: ['public.orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'public.customers', + columnIds: ['public.customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 1, + isPrimaryKeyReference: true, + }, + ], + manifestShards: [], + manifestShardsWritten: 0, + }), + ); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipApply', + projectDir: tempDir, + runId: 'scan-run-a', + applyAllAccepted: true, + candidateIds: [], + dryRun: true, + json: false, + }, + io.io, + { applyLocalScanRelationshipReviewDecisions }, + ), + ).resolves.toBe(0); + + expect(applyLocalScanRelationshipReviewDecisions).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + { + runId: 'scan-run-a', + applyAllAccepted: true, + candidateIds: [], + dryRun: true, + }, + ); + expect(io.stdout()).toContain('Relationship review apply'); + expect(io.stdout()).toContain('Run: scan-run-a'); + expect(io.stdout()).toContain('Mode: dry-run'); + expect(io.stdout()).toContain('Applied: 1 manual relationship'); + expect(io.stdout()).toContain('Schema shards written: 0'); + }); + + it('prints relationship review apply JSON', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const applyResult: ApplyLocalScanRelationshipReviewDecisionsResult = { + runId: 'scan-run-a', + connectionId: 'warehouse', + syncId: 'sync-a', + dryRun: false, + decisionsPath: 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json', + selectedDecisions: 1, + appliedRelationships: 1, + relationships: [], + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + manifestShardsWritten: 1, + }; + const applyLocalScanRelationshipReviewDecisions = vi.fn(async () => applyResult); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipApply', + projectDir: tempDir, + runId: 'scan-run-a', + applyAllAccepted: false, + candidateIds: ['orders:orders.customer_id->customers:customers.id'], + dryRun: false, + json: true, + }, + io.io, + { applyLocalScanRelationshipReviewDecisions }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toEqual(applyResult); + expect(applyLocalScanRelationshipReviewDecisions).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + { + runId: 'scan-run-a', + applyAllAccepted: false, + candidateIds: ['orders:orders.customer_id->customers:customers.id'], + dryRun: false, + }, + ); + }); + + it('prints relationship feedback export summary in human form', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const feedback: ExportLocalRelationshipFeedbackLabelsResult = { + generatedAt: '2026-05-07T13:00:00.000Z', + filters: { connectionId: null, decision: 'all' }, + summary: { total: 2, accepted: 1, rejected: 1, connections: 1, runs: 1 }, + labels: [ + { + schemaVersion: 1, + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-review', + syncId: 'sync-review', + decidedAt: '2026-05-07T12:00:00.000Z', + reviewer: 'Andrey', + note: 'Confirmed in warehouse docs', + relationshipType: 'many_to_one', + source: 'deterministic_name', + score: 0.62, + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + fromTable: 'public.orders', + fromColumns: ['customer_id'], + toTable: 'public.customers', + toColumns: ['id'], + reasons: ['fk_score_review'], + artifactPath: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', + }, + { + schemaVersion: 1, + candidateId: 'orders:orders.note_id->notes:notes.id', + decision: 'rejected', + previousStatus: 'rejected', + connectionId: 'warehouse', + runId: 'scan-run-review', + syncId: 'sync-review', + decidedAt: '2026-05-07T12:05:00.000Z', + reviewer: 'Andrey', + note: null, + relationshipType: 'many_to_one', + source: 'deterministic_name', + score: 0.2, + confidence: 0.2, + pkScore: 0.4, + fkScore: 0.2, + fromTable: 'public.orders', + fromColumns: ['note_id'], + toTable: 'public.notes', + toColumns: ['id'], + reasons: ['low_source_coverage'], + artifactPath: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', + }, + ], + warnings: [], + }; + const exportLocalRelationshipFeedbackLabels = vi.fn(async () => feedback); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipFeedback', + projectDir: tempDir, + connectionId: null, + decision: 'all', + json: false, + jsonl: false, + }, + io.io, + { exportLocalRelationshipFeedbackLabels }, + ), + ).resolves.toBe(0); + + expect(exportLocalRelationshipFeedbackLabels).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + { + connectionId: null, + decision: 'all', + }, + ); + expect(io.stdout()).toContain('KLO relationship feedback labels'); + expect(io.stdout()).toContain('Total: 2'); + expect(io.stdout()).toContain('Accepted: 1'); + expect(io.stdout()).toContain('Rejected: 1'); + expect(io.stdout()).toContain('orders.customer_id -> customers.id'); + expect(io.stdout()).toContain('decision=accepted previous=review score=0.62 reviewer=Andrey'); + }); + + it('prints relationship feedback labels as JSONL', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const feedback: ExportLocalRelationshipFeedbackLabelsResult = { + generatedAt: '2026-05-07T13:00:00.000Z', + filters: { connectionId: 'warehouse', decision: 'accepted' }, + summary: { total: 1, accepted: 1, rejected: 0, connections: 1, runs: 1 }, + labels: [ + { + schemaVersion: 1, + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-review', + syncId: 'sync-review', + decidedAt: '2026-05-07T12:00:00.000Z', + reviewer: 'klo', + note: null, + relationshipType: 'many_to_one', + source: 'deterministic_name', + score: 0.62, + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + fromTable: 'public.orders', + fromColumns: ['customer_id'], + toTable: 'public.customers', + toColumns: ['id'], + reasons: ['fk_score_review'], + artifactPath: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', + }, + ], + warnings: [], + }; + const exportLocalRelationshipFeedbackLabels = vi.fn(async () => feedback); + const formatKloRelationshipFeedbackLabelsJsonl = vi.fn( + () => '{"candidateId":"orders:orders.customer_id->customers:customers.id"}\n', + ); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipFeedback', + projectDir: tempDir, + connectionId: 'warehouse', + decision: 'accepted', + json: false, + jsonl: true, + }, + io.io, + { exportLocalRelationshipFeedbackLabels, formatKloRelationshipFeedbackLabelsJsonl }, + ), + ).resolves.toBe(0); + + expect(exportLocalRelationshipFeedbackLabels).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + { + connectionId: 'warehouse', + decision: 'accepted', + }, + ); + expect(formatKloRelationshipFeedbackLabelsJsonl).toHaveBeenCalledWith(feedback); + expect(JSON.parse(io.stdout())).toEqual({ candidateId: 'orders:orders.customer_id->customers:customers.id' }); + }); + + it('prints relationship feedback export as JSON', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const feedback: ExportLocalRelationshipFeedbackLabelsResult = { + generatedAt: '2026-05-07T13:00:00.000Z', + filters: { connectionId: null, decision: 'rejected' }, + summary: { total: 0, accepted: 0, rejected: 0, connections: 0, runs: 0 }, + labels: [], + warnings: [], + }; + const exportLocalRelationshipFeedbackLabels = vi.fn(async () => feedback); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipFeedback', + projectDir: tempDir, + connectionId: null, + decision: 'rejected', + json: true, + jsonl: false, + }, + io.io, + { exportLocalRelationshipFeedbackLabels }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + filters: { connectionId: null, decision: 'rejected' }, + summary: { total: 0, accepted: 0, rejected: 0 }, + labels: [], + }); + }); + + it('prints relationship feedback calibration as human output', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const calibration: KloRelationshipFeedbackCalibrationReport = { + generatedAt: '2026-05-07T13:00:00.000Z', + filters: { connectionId: null, decision: 'all' }, + thresholds: { accept: 0.85, review: 0.55 }, + summary: { + total: 2, + scored: 2, + unscored: 0, + acceptedLabels: 1, + rejectedLabels: 1, + predictedAccepted: 1, + predictedReview: 0, + predictedRejected: 1, + acceptedBandPrecision: 1, + rejectedBandPrecision: 1, + reviewBandAcceptedRate: null, + meanAcceptedScore: 0.91, + meanRejectedScore: 0.21, + }, + buckets: [ + { + label: '0.00-0.24', + minInclusive: 0, + maxInclusive: 0.24, + total: 1, + accepted: 0, + rejected: 1, + acceptanceRate: 0, + }, + { + label: '0.25-0.49', + minInclusive: 0.25, + maxInclusive: 0.49, + total: 0, + accepted: 0, + rejected: 0, + acceptanceRate: null, + }, + { + label: '0.50-0.74', + minInclusive: 0.5, + maxInclusive: 0.74, + total: 0, + accepted: 0, + rejected: 0, + acceptanceRate: null, + }, + { + label: '0.75-1.00', + minInclusive: 0.75, + maxInclusive: 1, + total: 1, + accepted: 1, + rejected: 0, + acceptanceRate: 1, + }, + ], + labels: [], + warnings: [], + }; + const calibrateLocalRelationshipFeedbackLabels = vi.fn(async () => calibration); + const formatKloRelationshipFeedbackCalibrationMarkdown = vi.fn( + () => 'KLO relationship feedback calibration\nTotal labels: 2\n', + ); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipCalibration', + projectDir: tempDir, + connectionId: null, + decision: 'all', + acceptThreshold: 0.85, + reviewThreshold: 0.55, + json: false, + }, + io.io, + { calibrateLocalRelationshipFeedbackLabels, formatKloRelationshipFeedbackCalibrationMarkdown }, + ), + ).resolves.toBe(0); + + expect(calibrateLocalRelationshipFeedbackLabels).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + { + connectionId: null, + decision: 'all', + acceptThreshold: 0.85, + reviewThreshold: 0.55, + }, + ); + expect(formatKloRelationshipFeedbackCalibrationMarkdown).toHaveBeenCalledWith(calibration); + expect(io.stdout()).toBe('KLO relationship feedback calibration\nTotal labels: 2\n'); + }); + + it('prints relationship feedback calibration as JSON', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const calibration: KloRelationshipFeedbackCalibrationReport = { + generatedAt: '2026-05-07T13:00:00.000Z', + filters: { connectionId: 'warehouse', decision: 'rejected' }, + thresholds: { accept: 0.9, review: 0.5 }, + summary: { + total: 0, + scored: 0, + unscored: 0, + acceptedLabels: 0, + rejectedLabels: 0, + predictedAccepted: 0, + predictedReview: 0, + predictedRejected: 0, + acceptedBandPrecision: null, + rejectedBandPrecision: null, + reviewBandAcceptedRate: null, + meanAcceptedScore: null, + meanRejectedScore: null, + }, + buckets: [], + labels: [], + warnings: [], + }; + const calibrateLocalRelationshipFeedbackLabels = vi.fn(async () => calibration); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipCalibration', + projectDir: tempDir, + connectionId: 'warehouse', + decision: 'rejected', + acceptThreshold: 0.9, + reviewThreshold: 0.5, + json: true, + }, + io.io, + { calibrateLocalRelationshipFeedbackLabels }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + filters: { connectionId: 'warehouse', decision: 'rejected' }, + thresholds: { accept: 0.9, review: 0.5 }, + summary: { total: 0, scored: 0 }, + }); + }); + + it('prints relationship threshold advice as human output', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const advice: KloRelationshipThresholdAdviceReport = { + generatedAt: '2026-05-07T14:00:00.000Z', + filters: { connectionId: null, decision: 'all' }, + status: 'ready', + gates: { + minTotalLabels: 4, + minAcceptedLabels: 2, + minRejectedLabels: 2, + minAcceptedBandPrecision: 0.9, + minAcceptedOrReviewRecall: 0.8, + minRejectedBandPrecision: 0.8, + }, + summary: { + totalLabels: 4, + scoredLabels: 4, + unscoredLabels: 0, + acceptedLabels: 2, + rejectedLabels: 2, + evaluatedCandidates: 2, + eligibleCandidates: 1, + }, + recommended: { + acceptThreshold: 0.9, + reviewThreshold: 0.55, + eligible: true, + predictedAccepted: 1, + predictedReview: 1, + predictedRejected: 2, + acceptedBandPrecision: 1, + acceptedRecall: 0.5, + acceptedOrReviewRecall: 1, + rejectedBandPrecision: 1, + rejectedRecall: 1, + falseAcceptedRejectedLabels: 0, + falseRejectedAcceptedLabels: 0, + }, + candidates: [], + reasons: [], + warnings: [], + }; + const adviseLocalRelationshipFeedbackThresholds = vi.fn(async () => advice); + const formatKloRelationshipThresholdAdviceMarkdown = vi.fn( + () => 'KLO relationship threshold advice\nRecommended: accept=0.90 review=0.55\n', + ); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipThresholds', + projectDir: tempDir, + connectionId: null, + minTotalLabels: 4, + minAcceptedLabels: 2, + minRejectedLabels: 2, + json: false, + }, + io.io, + { adviseLocalRelationshipFeedbackThresholds, formatKloRelationshipThresholdAdviceMarkdown }, + ), + ).resolves.toBe(0); + + expect(adviseLocalRelationshipFeedbackThresholds).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + { + connectionId: null, + minTotalLabels: 4, + minAcceptedLabels: 2, + minRejectedLabels: 2, + }, + ); + expect(formatKloRelationshipThresholdAdviceMarkdown).toHaveBeenCalledWith(advice); + expect(io.stdout()).toBe('KLO relationship threshold advice\nRecommended: accept=0.90 review=0.55\n'); + }); + + it('prints relationship threshold advice as JSON', async () => { + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const advice: KloRelationshipThresholdAdviceReport = { + generatedAt: '2026-05-07T14:00:00.000Z', + filters: { connectionId: 'warehouse', decision: 'all' }, + status: 'insufficient_labels', + gates: { + minTotalLabels: 20, + minAcceptedLabels: 5, + minRejectedLabels: 5, + minAcceptedBandPrecision: 0.9, + minAcceptedOrReviewRecall: 0.8, + minRejectedBandPrecision: 0.8, + }, + summary: { + totalLabels: 0, + scoredLabels: 0, + unscoredLabels: 0, + acceptedLabels: 0, + rejectedLabels: 0, + evaluatedCandidates: 0, + eligibleCandidates: 0, + }, + recommended: null, + candidates: [], + reasons: ['Need at least 20 scored labels; found 0.'], + warnings: [], + }; + const adviseLocalRelationshipFeedbackThresholds = vi.fn(async () => advice); + + const io = makeIo(); + await expect( + runKloScan( + { + command: 'relationshipThresholds', + projectDir: tempDir, + connectionId: 'warehouse', + minTotalLabels: 20, + minAcceptedLabels: 5, + minRejectedLabels: 5, + json: true, + }, + io.io, + { adviseLocalRelationshipFeedbackThresholds }, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + filters: { connectionId: 'warehouse', decision: 'all' }, + status: 'insufficient_labels', + recommended: null, + }); + }); + + it('passes native CLI adapters into local scan runs for mysql configs', async () => { + const tempProject = await mkdtemp(join(tmpdir(), 'klo-scan-cli-native-')); + await initKloProject({ projectDir: tempProject, projectName: 'warehouse' }); + await writeFile( + join(tempProject, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: mysql', + ' url: env:MYSQL_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }), + ); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempProject, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) })); + await rm(tempProject, { recursive: true, force: true }); + }); + + it('creates a native connector for standalone relationship scans', async () => { + const tempProject = await mkdtemp(join(tmpdir(), 'klo-scan-cli-relationships-')); + await initKloProject({ projectDir: tempProject, projectName: 'warehouse' }); + await writeFile( + join(tempProject, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'relationships', + dryRun: false, + syncId: 'sync-1', + report: { ...report, mode: 'relationships' }, + }), + ); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempProject, + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith( + expect.objectContaining({ + mode: 'relationships', + detectRelationships: true, + connector: expect.objectContaining({ driver: 'sqlite' }), + }), + ); + await rm(tempProject, { recursive: true, force: true }); + }); + + it('routes standalone postgres scans through the native connector before daemon fallback', async () => { + const tempProject = await mkdtemp(join(tmpdir(), 'klo-scan-cli-native-postgres-')); + await initKloProject({ projectDir: tempProject, projectName: 'warehouse' }); + await writeFile( + join(tempProject, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' host: db.example.test', + ' database: analytics', + ' username: reader', + ' password: env:POSTGRES_PASSWORD', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }), + ); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempProject, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) })); + const scanOptions = runLocalScan.mock.calls[0]?.[0]; + const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database'); + if (!liveDatabase?.fetch) { + throw new Error('Expected scan adapters to include a fetch-capable live-database adapter'); + } + const stagedDir = join(tempProject, 'postgres-staged'); + await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' }); + expect(createPostgresLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) }); + expect(postgresExtractSchema).toHaveBeenCalledWith('warehouse'); + await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain( + '"connectionId": "warehouse"', + ); + await rm(tempProject, { recursive: true, force: true }); + }); + + it('passes native CLI adapters into local scan runs for clickhouse configs', async () => { + const tempProject = await mkdtemp(join(tmpdir(), 'klo-scan-cli-native-clickhouse-')); + await initKloProject({ projectDir: tempProject, projectName: 'warehouse' }); + await writeFile( + join(tempProject, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: clickhouse', + ' host: env:CLICKHOUSE_HOST', + ' database: analytics', + ' username: reader', + ' password: env:CLICKHOUSE_PASSWORD', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report: { ...report, driver: 'clickhouse' }, + }), + ); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempProject, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) })); + await rm(tempProject, { recursive: true, force: true }); + }); + + it('passes native CLI adapters into local scan runs for sqlserver configs', async () => { + const tempProject = await mkdtemp(join(tmpdir(), 'klo-scan-cli-native-sqlserver-')); + await initKloProject({ projectDir: tempProject, projectName: 'warehouse' }); + await writeFile( + join(tempProject, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlserver', + ' host: env:SQLSERVER_HOST', + ' database: analytics', + ' username: reader', + ' schema: dbo', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report: { ...report, driver: 'sqlserver' }, + }), + ); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempProject, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) })); + const scanOptions = runLocalScan.mock.calls[0]?.[0]; + const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database'); + if (!liveDatabase?.fetch) { + throw new Error('Expected scan adapters to include a fetch-capable live-database adapter'); + } + const stagedDir = join(tempProject, 'sqlserver-staged'); + await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' }); + expect(createSqlServerLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) }); + expect(sqlServerExtractSchema).toHaveBeenCalledWith('warehouse'); + await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain( + '"connectionId": "warehouse"', + ); + await rm(tempProject, { recursive: true, force: true }); + }); + + it('passes native CLI adapters into local scan runs for bigquery configs', async () => { + const tempProject = await mkdtemp(join(tmpdir(), 'klo-scan-cli-native-bigquery-')); + await initKloProject({ projectDir: tempProject, projectName: 'warehouse' }); + await writeFile( + join(tempProject, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: bigquery', + ' dataset_id: analytics', + ' credentials_json: env:BIGQUERY_CREDENTIALS_JSON', + ' location: US', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report: { ...report, driver: 'bigquery' }, + }), + ); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempProject, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) })); + const scanOptions = runLocalScan.mock.calls[0]?.[0]; + const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database'); + if (!liveDatabase?.fetch) { + throw new Error('Expected scan adapters to include a fetch-capable live-database adapter'); + } + const stagedDir = join(tempProject, 'bigquery-staged'); + await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' }); + expect(createBigQueryLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) }); + expect(bigQueryExtractSchema).toHaveBeenCalledWith('warehouse'); + await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain( + '"connectionId": "warehouse"', + ); + await rm(tempProject, { recursive: true, force: true }); + }); + + it('passes native CLI adapters into local scan runs for snowflake configs', async () => { + const tempProject = await mkdtemp(join(tmpdir(), 'klo-scan-cli-native-snowflake-')); + await initKloProject({ projectDir: tempProject, projectName: 'warehouse' }); + await writeFile( + join(tempProject, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: snowflake', + ' authMethod: password', + ' account: env:SNOWFLAKE_ACCOUNT', + ' warehouse: WH', + ' database: ANALYTICS', + ' schema_name: PUBLIC', + ' username: reader', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runLocalScan = vi.fn( + async (_input: RunLocalScanOptions): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report: { ...report, driver: 'snowflake' }, + }), + ); + + await expect( + runKloScan( + { + command: 'run', + projectDir: tempProject, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan }, + ), + ).resolves.toBe(0); + + expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) })); + const scanOptions = runLocalScan.mock.calls[0]?.[0]; + const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database'); + if (!liveDatabase?.fetch) { + throw new Error('Expected scan adapters to include a fetch-capable live-database adapter'); + } + const stagedDir = join(tempProject, 'snowflake-staged'); + await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' }); + expect(createSnowflakeLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) }); + expect(snowflakeExtractSchema).toHaveBeenCalledWith('warehouse'); + await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain( + '"connectionId": "warehouse"', + ); + await rm(tempProject, { recursive: true, force: true }); + }); +}); diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts new file mode 100644 index 00000000..8008b72e --- /dev/null +++ b/packages/cli/src/scan.ts @@ -0,0 +1,737 @@ +import { loadKloProject } from '@klo/context/project'; +import { + type ApplyLocalScanRelationshipReviewDecisionsResult, + adviseLocalRelationshipFeedbackThresholds, + applyLocalScanRelationshipReviewDecisions, + calibrateLocalRelationshipFeedbackLabels, + type ExportLocalRelationshipFeedbackLabelsResult, + exportLocalRelationshipFeedbackLabels, + formatKloRelationshipFeedbackCalibrationMarkdown, + formatKloRelationshipFeedbackLabelsJsonl, + formatKloRelationshipThresholdAdviceMarkdown, + getLocalScanReport, + getLocalScanStatus, + type KloProgressPort, + type KloRelationshipArtifact, + type KloRelationshipArtifactEdge, + type KloRelationshipArtifactStatus, + type KloRelationshipDiagnosticsArtifact, + type KloRelationshipFeedbackCalibrationReport, + type KloRelationshipFeedbackDecisionFilter, + type KloRelationshipFeedbackLabel, + type KloRelationshipReviewDecisionValue, + type KloRelationshipThresholdAdviceReport, + type KloScanMode, + type KloScanReport, + type KloScanWarning, + type LocalScanStatusResponse, + readLocalScanRelationshipArtifacts, + runLocalScan, + type WriteLocalScanRelationshipReviewDecisionResult, + writeLocalScanRelationshipReviewDecision, +} from '@klo/context/scan'; +import type { KloCliIo } from './index.js'; +import { createKloCliLocalIngestAdapters } from './local-adapters.js'; +import { createKloCliScanConnector } from './local-scan-connectors.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:scan'); + +export type KloScanArgs = + | { + command: 'run'; + projectDir: string; + connectionId: string; + mode: KloScanMode; + detectRelationships: boolean; + dryRun: boolean; + databaseIntrospectionUrl?: string; + } + | { command: 'status'; projectDir: string; runId: string } + | { command: 'report'; projectDir: string; runId: string; json: boolean } + | { + command: 'relationships'; + projectDir: string; + runId: string; + status: KloRelationshipArtifactStatus; + json: boolean; + limit: number; + } + | { + command: 'relationshipDecision'; + projectDir: string; + runId: string; + candidateId: string; + decision: KloRelationshipReviewDecisionValue; + reviewer: string; + note: string | null; + json: boolean; + } + | { + command: 'relationshipApply'; + projectDir: string; + runId: string; + applyAllAccepted: boolean; + candidateIds: string[]; + dryRun: boolean; + json: boolean; + } + | { + command: 'relationshipFeedback'; + projectDir: string; + connectionId: string | null; + decision: KloRelationshipFeedbackDecisionFilter; + json: boolean; + jsonl: boolean; + } + | { + command: 'relationshipCalibration'; + projectDir: string; + connectionId: string | null; + decision: KloRelationshipFeedbackDecisionFilter; + acceptThreshold: number; + reviewThreshold: number; + json: boolean; + } + | { + command: 'relationshipThresholds'; + projectDir: string; + connectionId: string | null; + minTotalLabels: number; + minAcceptedLabels: number; + minRejectedLabels: number; + json: boolean; + }; + +interface KloScanDeps { + runLocalScan?: typeof runLocalScan; + createLocalIngestAdapters?: typeof createKloCliLocalIngestAdapters; + getLocalScanStatus?: typeof getLocalScanStatus; + getLocalScanReport?: typeof getLocalScanReport; + readLocalScanRelationshipArtifacts?: typeof readLocalScanRelationshipArtifacts; + writeLocalScanRelationshipReviewDecision?: typeof writeLocalScanRelationshipReviewDecision; + applyLocalScanRelationshipReviewDecisions?: typeof applyLocalScanRelationshipReviewDecisions; + exportLocalRelationshipFeedbackLabels?: typeof exportLocalRelationshipFeedbackLabels; + formatKloRelationshipFeedbackLabelsJsonl?: typeof formatKloRelationshipFeedbackLabelsJsonl; + calibrateLocalRelationshipFeedbackLabels?: typeof calibrateLocalRelationshipFeedbackLabels; + formatKloRelationshipFeedbackCalibrationMarkdown?: typeof formatKloRelationshipFeedbackCalibrationMarkdown; + adviseLocalRelationshipFeedbackThresholds?: typeof adviseLocalRelationshipFeedbackThresholds; + formatKloRelationshipThresholdAdviceMarkdown?: typeof formatKloRelationshipThresholdAdviceMarkdown; +} + +function shouldUseStyledOutput(io: KloCliIo): boolean { + return io.stdout.isTTY === true && !process.env.NO_COLOR && process.env.TERM !== 'dumb' && !process.env.CI; +} + +function green(text: string): string { + return `\u001b[32m${text}\u001b[39m`; +} + +function dim(text: string): string { + return `\u001b[2m${text}\u001b[22m`; +} + +function quoteCliArg(value: string): string { + if (/^[A-Za-z0-9_./:@=-]+$/.test(value)) { + return value; + } + return `'${value.replaceAll("'", "'\\''")}'`; +} + +function plural(count: number, singular: string, pluralValue = `${singular}s`): string { + return count === 1 ? singular : pluralValue; +} + +function tableChangeCount(report: KloScanReport): number { + return report.diffSummary.tablesAdded + report.diffSummary.tablesModified + report.diffSummary.tablesDeleted; +} + +function totalTableCount(report: KloScanReport): number { + return tableChangeCount(report) + report.diffSummary.tablesUnchanged; +} + +function writeScanIdentity(report: KloScanReport, io: KloCliIo): void { + io.stdout.write(`Run: ${report.runId}\n`); + io.stdout.write(`Connection: ${report.connectionId}\n`); + io.stdout.write(`Mode: ${report.mode}\n`); + io.stdout.write(`Sync: ${report.syncId}\n`); + io.stdout.write(`Dry run: ${report.dryRun ? 'yes' : 'no'}\n`); +} + +function writeWhatChanged(report: KloScanReport, io: KloCliIo): void { + const changedTables = tableChangeCount(report); + const totalTables = totalTableCount(report); + io.stdout.write('\nWhat changed\n'); + const tableNoun = plural(totalTables, 'table'); + const changeNoun = plural(changedTables, 'change'); + io.stdout.write( + ` Semantic layer comparison found ${changedTables} ${changeNoun} across ${totalTables} ${tableNoun}\n`, + ); + io.stdout.write(` New tables: ${report.diffSummary.tablesAdded}\n`); + io.stdout.write(` Changed tables: ${report.diffSummary.tablesModified}\n`); + io.stdout.write(` Removed tables: ${report.diffSummary.tablesDeleted}\n`); + io.stdout.write(` Unchanged tables: ${report.diffSummary.tablesUnchanged}\n`); + if ( + report.diffSummary.columnsAdded > 0 || + report.diffSummary.columnsModified > 0 || + report.diffSummary.columnsDeleted > 0 + ) { + io.stdout.write(` New columns: ${report.diffSummary.columnsAdded}\n`); + io.stdout.write(` Changed columns: ${report.diffSummary.columnsModified}\n`); + io.stdout.write(` Removed columns: ${report.diffSummary.columnsDeleted}\n`); + } +} + +function hasRelationshipResults(report: KloScanReport): boolean { + return ( + report.relationships.accepted > 0 || + report.relationships.review > 0 || + report.relationships.rejected > 0 || + report.relationships.skipped > 0 + ); +} + +function writeRelationships(report: KloScanReport, io: KloCliIo): void { + if (!hasRelationshipResults(report)) { + return; + } + io.stdout.write('\nRelationships\n'); + io.stdout.write(` Accepted: ${report.relationships.accepted}\n`); + io.stdout.write(` Review: ${report.relationships.review}\n`); + io.stdout.write(` Rejected: ${report.relationships.rejected}\n`); + io.stdout.write(` Skipped: ${report.relationships.skipped}\n`); +} + +function capabilityGapMessage(gap: string): string { + if (gap === 'columnStats') { + return 'columnStats is unavailable; relationship confidence may be lower.'; + } + if (gap === 'tableSampling' || gap === 'columnSampling') { + return `${gap} is unavailable; descriptions may be less specific.`; + } + if (gap === 'readOnlySql') { + return 'readOnlySql is unavailable; relationship and validation checks may be limited.'; + } + return `${gap} is unavailable; scan results may be less complete.`; +} + +function warningLine(warning: KloScanWarning): string { + const location = warning.table ? `${warning.table}${warning.column ? `.${warning.column}` : ''}: ` : ''; + return `${warning.code}: ${location}${warning.message}`; +} + +function writeNeedsAttention(report: KloScanReport, io: KloCliIo): void { + io.stdout.write('\nNeeds attention\n'); + if (report.warnings.length === 0 && report.capabilityGaps.length === 0) { + io.stdout.write(' None\n'); + return; + } + if (report.warnings.length > 0) { + io.stdout.write(` ${report.warnings.length} ${plural(report.warnings.length, 'warning')}\n`); + for (const warning of report.warnings.slice(0, 5)) { + io.stdout.write(` - ${warningLine(warning)}\n`); + } + if (report.warnings.length > 5) { + io.stdout.write(` - ${report.warnings.length - 5} more warnings in the JSON report\n`); + } + } + if (report.capabilityGaps.length > 0) { + io.stdout.write(` ${report.capabilityGaps.length} capability ${plural(report.capabilityGaps.length, 'gap')}\n`); + for (const gap of report.capabilityGaps) { + io.stdout.write(` - ${capabilityGapMessage(gap)}\n`); + } + } +} + +function writeArtifacts(report: KloScanReport, io: KloCliIo): void { + io.stdout.write('\nArtifacts\n'); + io.stdout.write(` Report: ${report.artifactPaths.reportPath ?? 'none'}\n`); + io.stdout.write(` Raw sources: ${report.artifactPaths.rawSourcesDir ?? 'none'}\n`); + if (report.artifactPaths.manifestShards.length > 0) { + io.stdout.write(` Schema shards: ${report.artifactPaths.manifestShards.length}\n`); + } + if (report.artifactPaths.enrichmentArtifacts.length > 0) { + io.stdout.write(` Enrichment artifacts: ${report.artifactPaths.enrichmentArtifacts.length}\n`); + } +} + +function writeHumanReportBody(report: KloScanReport, io: KloCliIo): void { + writeScanIdentity(report, io); + writeWhatChanged(report, io); + writeRelationships(report, io); + writeNeedsAttention(report, io); + writeArtifacts(report, io); +} + +function writeRunSummary(report: KloScanReport, projectDir: string, io: KloCliIo): void { + const styled = shouldUseStyledOutput(io); + io.stdout.write(`${styled ? green('✓') : ''}${styled ? ' ' : ''}KLO scan completed\n`); + io.stdout.write('Status: done\n'); + writeHumanReportBody(report, io); + const projectDirArg = quoteCliArg(projectDir); + io.stdout.write('\nNext:\n'); + const statusCommand = styled ? dim('klo dev scan status') : 'klo dev scan status'; + const reportCommand = styled ? dim('klo dev scan report') : 'klo dev scan report'; + io.stdout.write(` ${statusCommand} --project-dir ${projectDirArg} ${report.runId}\n`); + io.stdout.write(` ${reportCommand} --project-dir ${projectDirArg} ${report.runId}\n`); +} + +function writeReport(report: KloScanReport, io: KloCliIo): void { + io.stdout.write('KLO scan report\n'); + writeHumanReportBody(report, io); +} + +function formatRelationshipEndpoint(edge: KloRelationshipArtifactEdge, side: 'from' | 'to'): string { + const endpoint = edge[side]; + if (endpoint.columns.length === 1) { + return `${endpoint.table.name}.${endpoint.columns[0]}`; + } + return `${endpoint.table.name}.(${endpoint.columns.join(',')})`; +} + +function formatRelationshipScore(value: number | null): string { + return value === null ? 'n/a' : value.toFixed(2); +} + +function relationshipStatusTitle(status: Exclude): string { + if (status === 'accepted') { + return 'Accepted relationships'; + } + if (status === 'review') { + return 'Review relationships'; + } + if (status === 'rejected') { + return 'Rejected relationships'; + } + return 'Skipped relationships'; +} + +function filteredRelationshipArtifact( + relationships: KloRelationshipArtifact, + status: KloRelationshipArtifactStatus, +): KloRelationshipArtifact { + if (status === 'all') { + return relationships; + } + return { + connectionId: relationships.connectionId, + accepted: status === 'accepted' ? relationships.accepted : [], + review: status === 'review' ? relationships.review : [], + rejected: status === 'rejected' ? relationships.rejected : [], + skipped: status === 'skipped' ? relationships.skipped : [], + }; +} + +function writeRelationshipEdge(edge: KloRelationshipArtifactEdge, index: number, io: KloCliIo): void { + io.stdout.write( + ` ${index + 1}. ${formatRelationshipEndpoint(edge, 'from')} -> ${formatRelationshipEndpoint(edge, 'to')}\n`, + ); + io.stdout.write( + ` type=${edge.relationshipType} source=${edge.source} confidence=${edge.confidence.toFixed(2)} pkScore=${formatRelationshipScore(edge.pkScore)} fkScore=${formatRelationshipScore(edge.fkScore)}\n`, + ); + io.stdout.write(` reasons=${edge.reasons.length > 0 ? edge.reasons.join(', ') : 'none'}\n`); +} + +function writeRelationshipGroup( + status: Exclude, + relationships: KloRelationshipArtifact, + limit: number, + io: KloCliIo, +): void { + if (status === 'skipped') { + io.stdout.write(`\n${relationshipStatusTitle(status)} (${relationships.skipped.length})\n`); + relationships.skipped.slice(0, limit).forEach((item, index) => { + io.stdout.write(` ${index + 1}. ${item.relationshipId}\n`); + io.stdout.write(` reason=${item.reason}\n`); + }); + return; + } + + const edges = + status === 'accepted' + ? relationships.accepted + : status === 'review' + ? relationships.review + : relationships.rejected; + io.stdout.write(`\n${relationshipStatusTitle(status)} (${edges.length})\n`); + edges.slice(0, limit).forEach((edge, index) => { + writeRelationshipEdge(edge, index, io); + }); + if (edges.length > limit) { + io.stdout.write(` ${edges.length - limit} more not shown; rerun with --limit ${edges.length}\n`); + } +} + +function writeRelationshipArtifactSummary(input: { + runId: string; + connectionId: string; + syncId: string; + status: KloRelationshipArtifactStatus; + limit: number; + summary: KloRelationshipArtifact; + relationships: KloRelationshipArtifact; + diagnostics: KloRelationshipDiagnosticsArtifact | null; + relationshipsPath: string; + io: KloCliIo; +}): void { + input.io.stdout.write('KLO relationship artifacts\n'); + input.io.stdout.write(`Run: ${input.runId}\n`); + input.io.stdout.write(`Connection: ${input.connectionId}\n`); + input.io.stdout.write(`Sync: ${input.syncId}\n`); + input.io.stdout.write( + `Summary: accepted=${input.summary.accepted.length} review=${input.summary.review.length} rejected=${input.summary.rejected.length} skipped=${input.summary.skipped.length}\n`, + ); + if (input.diagnostics?.noAcceptedReason) { + input.io.stdout.write(`Reason: ${input.diagnostics.noAcceptedReason}\n`); + } + input.io.stdout.write(`Artifacts: ${input.relationshipsPath}\n`); + + const statuses: Array> = + input.status === 'all' ? ['accepted', 'review', 'rejected', 'skipped'] : [input.status]; + for (const status of statuses) { + writeRelationshipGroup(status, input.relationships, input.limit, input.io); + } +} + +function writeRelationshipDecisionResult(result: WriteLocalScanRelationshipReviewDecisionResult, io: KloCliIo): void { + io.stdout.write('Recorded relationship decision\n'); + io.stdout.write(`Decision: ${result.decision.decision}\n`); + io.stdout.write(`Candidate: ${result.decision.candidateId}\n`); + io.stdout.write(`Previous status: ${result.decision.previousStatus}\n`); + io.stdout.write(`Reviewer: ${result.decision.reviewer}\n`); + if (result.decision.note) { + io.stdout.write(`Note: ${result.decision.note}\n`); + } + io.stdout.write(`Path: ${result.path}\n`); +} + +function writeRelationshipApplyResult(result: ApplyLocalScanRelationshipReviewDecisionsResult, io: KloCliIo): void { + io.stdout.write('Relationship review apply\n'); + io.stdout.write(`Run: ${result.runId}\n`); + io.stdout.write(`Connection: ${result.connectionId}\n`); + io.stdout.write(`Sync: ${result.syncId}\n`); + io.stdout.write(`Mode: ${result.dryRun ? 'dry-run' : 'write'}\n`); + io.stdout.write(`Decisions: ${result.selectedDecisions} ${plural(result.selectedDecisions, 'accepted decision')}\n`); + io.stdout.write( + `Applied: ${result.appliedRelationships} manual ${plural(result.appliedRelationships, 'relationship')}\n`, + ); + io.stdout.write(`Schema shards written: ${result.manifestShardsWritten}\n`); + if (result.manifestShards.length > 0) { + io.stdout.write('Schema shards:\n'); + for (const shard of result.manifestShards) { + io.stdout.write(` - ${shard}\n`); + } + } + io.stdout.write(`Decisions: ${result.decisionsPath}\n`); +} + +function formatFeedbackColumns(columns: readonly string[]): string { + return columns.length === 1 ? (columns[0] ?? 'unknown') : `(${columns.join(',')})`; +} + +function feedbackTableShortName(value: string): string { + return value.split('.').at(-1) ?? value; +} + +function feedbackEndpoint(label: KloRelationshipFeedbackLabel, side: 'from' | 'to'): string { + if (side === 'from') { + return `${feedbackTableShortName(label.fromTable)}.${formatFeedbackColumns(label.fromColumns)}`; + } + return `${feedbackTableShortName(label.toTable)}.${formatFeedbackColumns(label.toColumns)}`; +} + +function writeRelationshipFeedbackSummary(result: ExportLocalRelationshipFeedbackLabelsResult, io: KloCliIo): void { + io.stdout.write('KLO relationship feedback labels\n'); + io.stdout.write(`Generated: ${result.generatedAt}\n`); + io.stdout.write(`Filter connection: ${result.filters.connectionId ?? 'all'}\n`); + io.stdout.write(`Filter decision: ${result.filters.decision}\n`); + io.stdout.write(`Total: ${result.summary.total}\n`); + io.stdout.write(`Accepted: ${result.summary.accepted}\n`); + io.stdout.write(`Rejected: ${result.summary.rejected}\n`); + io.stdout.write(`Connections: ${result.summary.connections}\n`); + io.stdout.write(`Runs: ${result.summary.runs}\n`); + + if (result.warnings.length > 0) { + io.stdout.write('\nWarnings\n'); + for (const warning of result.warnings.slice(0, 5)) { + io.stdout.write(` - ${warning.path}: ${warning.message}\n`); + } + } + + if (result.labels.length === 0) { + return; + } + + io.stdout.write('\nLabels\n'); + for (const label of result.labels.slice(0, 25)) { + io.stdout.write(` - ${feedbackEndpoint(label, 'from')} -> ${feedbackEndpoint(label, 'to')}\n`); + io.stdout.write( + ` decision=${label.decision} previous=${label.previousStatus} score=${formatRelationshipScore(label.score)} reviewer=${label.reviewer}\n`, + ); + } + if (result.labels.length > 25) { + io.stdout.write(` ${result.labels.length - 25} more labels not shown; rerun with --jsonl for the full dataset\n`); + } +} + +interface KloCliScanProgressState { + progress: number; + hasPendingTransient: boolean; +} + +interface KloCliScanProgressUpdateOptions { + transient?: boolean; +} + +interface KloCliScanProgress extends Omit { + update(progress: number, message?: string, options?: KloCliScanProgressUpdateOptions): Promise; + flush(): void; +} + +export function createCliScanProgress( + io: KloCliIo, + state: KloCliScanProgressState = { progress: 0, hasPendingTransient: false }, + start = 0, + weight = 1, +): KloCliScanProgress { + const shouldWrite = io.stdout.isTTY === true && !process.env.CI; + const progress: KloCliScanProgress = { + async update(value: number, message?: string, options?: KloCliScanProgressUpdateOptions) { + const absoluteValue = start + Math.max(0, Math.min(1, value)) * weight; + state.progress = Math.max(state.progress, Math.min(1, absoluteValue)); + if (!shouldWrite || !message) { + return; + } + const percent = Math.max(0, Math.min(100, Math.round(absoluteValue * 100))); + const line = `[${percent}%] ${message}`; + if (options?.transient === true) { + io.stdout.write(`\r${line}\u001b[K`); + state.hasPendingTransient = true; + return; + } + progress.flush(); + io.stdout.write(`${line}\n`); + }, + startPhase(phaseWeight: number) { + return createCliScanProgress(io, state, state.progress, phaseWeight); + }, + flush() { + if (!shouldWrite || !state.hasPendingTransient) { + return; + } + io.stdout.write('\n'); + state.hasPendingTransient = false; + }, + }; + return progress; +} + +function writeStatus(status: LocalScanStatusResponse, io: KloCliIo): void { + io.stdout.write(`Run: ${status.runId}\n`); + io.stdout.write(`Status: ${status.status}\n`); + io.stdout.write(`Connection: ${status.connectionId}\n`); + io.stdout.write(`Mode: ${status.mode}\n`); + io.stdout.write(`Sync: ${status.syncId}\n`); + io.stdout.write(`Progress: ${status.progress}\n`); + io.stdout.write(`Report: ${status.reportPath ?? 'none'}\n`); +} + +export async function runKloScan(args: KloScanArgs, io: KloCliIo = process, deps: KloScanDeps = {}): Promise { + try { + const project = await loadKloProject({ projectDir: args.projectDir }); + if (args.command === 'status') { + const status = await (deps.getLocalScanStatus ?? getLocalScanStatus)(project, args.runId); + if (!status) { + throw new Error(`Scan run "${args.runId}" was not found`); + } + writeStatus(status, io); + return 0; + } + if (args.command === 'report') { + const report = await (deps.getLocalScanReport ?? getLocalScanReport)(project, args.runId); + if (!report) { + throw new Error(`Scan report "${args.runId}" was not found`); + } + if (args.json) { + io.stdout.write(`${JSON.stringify(report, null, 2)}\n`); + } else { + writeReport(report, io); + } + return 0; + } + if (args.command === 'relationships') { + const result = await (deps.readLocalScanRelationshipArtifacts ?? readLocalScanRelationshipArtifacts)( + project, + args.runId, + ); + if (!result) { + throw new Error(`Scan run "${args.runId}" was not found`); + } + const filtered = filteredRelationshipArtifact(result.relationships, args.status); + if (args.json) { + io.stdout.write( + `${JSON.stringify( + { + runId: result.runId, + connectionId: result.connectionId, + syncId: result.syncId, + status: args.status, + paths: result.paths, + diagnostics: result.diagnostics, + summary: { + accepted: result.relationships.accepted.length, + review: result.relationships.review.length, + rejected: result.relationships.rejected.length, + skipped: result.relationships.skipped.length, + }, + relationships: filtered, + }, + null, + 2, + )}\n`, + ); + } else { + writeRelationshipArtifactSummary({ + runId: result.runId, + connectionId: result.connectionId, + syncId: result.syncId, + status: args.status, + limit: args.limit, + summary: result.relationships, + relationships: filtered, + diagnostics: result.diagnostics, + relationshipsPath: result.paths.relationships, + io, + }); + } + return 0; + } + if (args.command === 'relationshipDecision') { + const result = await (deps.writeLocalScanRelationshipReviewDecision ?? writeLocalScanRelationshipReviewDecision)( + project, + { + runId: args.runId, + candidateId: args.candidateId, + decision: args.decision, + reviewer: args.reviewer, + note: args.note, + }, + ); + if (!result) { + throw new Error(`Scan run "${args.runId}" was not found`); + } + if (args.json) { + io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); + } else { + writeRelationshipDecisionResult(result, io); + } + return 0; + } + if (args.command === 'relationshipApply') { + const result = await ( + deps.applyLocalScanRelationshipReviewDecisions ?? applyLocalScanRelationshipReviewDecisions + )(project, { + runId: args.runId, + applyAllAccepted: args.applyAllAccepted, + candidateIds: args.candidateIds, + dryRun: args.dryRun, + }); + if (args.json) { + io.stdout.write( + `${JSON.stringify(result satisfies ApplyLocalScanRelationshipReviewDecisionsResult, null, 2)}\n`, + ); + } else { + writeRelationshipApplyResult(result, io); + } + return 0; + } + if (args.command === 'relationshipFeedback') { + const result = await (deps.exportLocalRelationshipFeedbackLabels ?? exportLocalRelationshipFeedbackLabels)( + project, + { + connectionId: args.connectionId, + decision: args.decision, + }, + ); + if (args.jsonl) { + io.stdout.write( + (deps.formatKloRelationshipFeedbackLabelsJsonl ?? formatKloRelationshipFeedbackLabelsJsonl)(result), + ); + } else if (args.json) { + io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); + } else { + writeRelationshipFeedbackSummary(result, io); + } + return 0; + } + if (args.command === 'relationshipCalibration') { + const result = await (deps.calibrateLocalRelationshipFeedbackLabels ?? calibrateLocalRelationshipFeedbackLabels)( + project, + { + connectionId: args.connectionId, + decision: args.decision, + acceptThreshold: args.acceptThreshold, + reviewThreshold: args.reviewThreshold, + }, + ); + if (args.json) { + io.stdout.write(`${JSON.stringify(result satisfies KloRelationshipFeedbackCalibrationReport, null, 2)}\n`); + } else { + io.stdout.write( + (deps.formatKloRelationshipFeedbackCalibrationMarkdown ?? formatKloRelationshipFeedbackCalibrationMarkdown)( + result, + ), + ); + } + return 0; + } + if (args.command === 'relationshipThresholds') { + const result = await ( + deps.adviseLocalRelationshipFeedbackThresholds ?? adviseLocalRelationshipFeedbackThresholds + )(project, { + connectionId: args.connectionId, + minTotalLabels: args.minTotalLabels, + minAcceptedLabels: args.minAcceptedLabels, + minRejectedLabels: args.minRejectedLabels, + }); + if (args.json) { + io.stdout.write(`${JSON.stringify(result satisfies KloRelationshipThresholdAdviceReport, null, 2)}\n`); + } else { + io.stdout.write( + (deps.formatKloRelationshipThresholdAdviceMarkdown ?? formatKloRelationshipThresholdAdviceMarkdown)(result), + ); + } + return 0; + } + + const connector = + args.mode !== 'structural' || args.detectRelationships + ? await createKloCliScanConnector(project, args.connectionId) + : undefined; + const progress = createCliScanProgress(io); + try { + const result = await (deps.runLocalScan ?? runLocalScan)({ + project, + connectionId: args.connectionId, + mode: args.mode, + detectRelationships: args.detectRelationships, + dryRun: args.dryRun, + trigger: 'cli', + databaseIntrospectionUrl: args.databaseIntrospectionUrl, + connector, + adapters: (deps.createLocalIngestAdapters ?? createKloCliLocalIngestAdapters)(project, { + databaseIntrospectionUrl: args.databaseIntrospectionUrl, + }), + progress, + }); + progress.flush(); + writeRunSummary(result.report, args.projectDir, io); + } finally { + progress.flush(); + } + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/serve.test.ts b/packages/cli/src/serve.test.ts new file mode 100644 index 00000000..53637db5 --- /dev/null +++ b/packages/cli/src/serve.test.ts @@ -0,0 +1,431 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { SourceAdapter } from '@klo/context/ingest'; +import { initKloProject } from '@klo/context/project'; +import { describe, expect, it, vi } from 'vitest'; +import { runKloServeStdio } from './serve.js'; + +describe('runKloServeStdio', () => { + it('loads the project, creates local ports, and connects the server to stdio', async () => { + const connect = vi.fn().mockResolvedValue(undefined); + const project = { + projectDir: '/tmp/klo-project', + config: { + connections: {}, + llm: { + provider: { backend: 'gateway' }, + models: { default: 'anthropic/claude-sonnet' }, + }, + }, + } as never; + const loadProject = vi.fn().mockResolvedValue(project); + const contextTools = { connections: { list: vi.fn() } }; + const createContextTools = vi.fn().mockReturnValue(contextTools); + const createServer = vi.fn().mockReturnValue({ connect }); + const createTransport = vi.fn().mockReturnValue({ kind: 'stdio' }); + let stderr = ''; + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'agent', + semanticCompute: false, + semanticComputeUrl: undefined, + databaseIntrospectionUrl: undefined, + executeQueries: false, + memoryCapture: false, + memoryModel: undefined, + }, + { + loadProject, + createContextTools, + createServer, + createTransport, + stderr: { write: (chunk: string) => (stderr += chunk) }, + }, + ), + ).resolves.toBe(0); + + expect(loadProject).toHaveBeenCalledWith({ projectDir: '/tmp/klo-project' }); + expect(createContextTools).toHaveBeenCalledWith( + project, + expect.objectContaining({ + localIngest: expect.objectContaining({ + adapters: expect.any(Array), + }), + localScan: expect.objectContaining({ + adapters: expect.any(Array), + }), + }), + ); + expect(createServer).toHaveBeenCalledWith({ + name: 'klo', + version: '0.0.0-private', + userContext: { userId: 'agent' }, + contextTools, + memoryCapture: undefined, + }); + expect(connect).toHaveBeenCalledWith({ kind: 'stdio' }); + expect(stderr).toContain('klo MCP server running on stdio for /tmp/klo-project'); + }); + + it('enables local ingest ports by default when serving stdio', async () => { + const project = { projectDir: '/tmp/klo-project', config: { connections: {} } } as never; + const connect = vi.fn().mockResolvedValue(undefined); + const createContextTools = vi.fn(() => ({ connections: { list: async () => [] } })); + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'agent', + semanticCompute: false, + semanticComputeUrl: undefined, + databaseIntrospectionUrl: undefined, + executeQueries: false, + memoryCapture: false, + memoryModel: undefined, + }, + { + loadProject: async () => project, + createContextTools, + createServer: vi.fn(() => ({ connect }) as never), + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ), + ).resolves.toBe(0); + + expect(createContextTools).toHaveBeenCalledWith( + project, + expect.objectContaining({ + localIngest: expect.objectContaining({ + adapters: expect.any(Array), + }), + localScan: expect.objectContaining({ + adapters: expect.any(Array), + }), + }), + ); + }); + + it('passes daemon database introspection URL to MCP local ingest adapters', async () => { + const project = { projectDir: '/tmp/klo-project', config: { connections: {} } } as never; + const connect = vi.fn().mockResolvedValue(undefined); + const createContextTools = vi.fn(() => ({ connections: { list: async () => [] } })); + const createdAdapters: SourceAdapter[] = []; + const createIngestAdapters = vi.fn(() => createdAdapters); + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'agent', + semanticCompute: false, + semanticComputeUrl: undefined, + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + executeQueries: false, + memoryCapture: false, + memoryModel: undefined, + }, + { + loadProject: async () => project, + createContextTools, + createIngestAdapters, + createServer: vi.fn(() => ({ connect }) as never), + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ), + ).resolves.toBe(0); + + expect(createContextTools).toHaveBeenCalledWith( + project, + expect.objectContaining({ + localIngest: expect.objectContaining({ + adapters: expect.any(Array), + }), + localScan: expect.objectContaining({ + adapters: createdAdapters, + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + }), + }), + ); + expect(createIngestAdapters).toHaveBeenCalledWith(project, { + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + }); + }); + + it('uses CLI-native local ingest adapters for standalone scan tools', async () => { + const project = { projectDir: '/tmp/klo-project', config: { connections: {} } } as never; + const createContextTools = vi.fn(() => ({}) as never); + + await runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'local', + semanticCompute: false, + executeQueries: false, + memoryCapture: false, + }, + { + loadProject: vi.fn(async () => project), + createContextTools, + createServer: vi.fn(() => ({ connect: vi.fn(async () => undefined) }) as never), + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ); + + expect(createContextTools).toHaveBeenCalledWith( + project, + expect.objectContaining({ + localIngest: expect.objectContaining({ adapters: expect.any(Array) }), + localScan: expect.objectContaining({ adapters: expect.any(Array) }), + }), + ); + }); + + it('passes semantic compute to local project ports when enabled', async () => { + const tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-serve-')); + try { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const createContextTools = vi.fn(() => ({ connections: { list: async () => [] } })); + const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: project.projectDir, + userId: 'local', + semanticCompute: true, + semanticComputeUrl: undefined, + databaseIntrospectionUrl: undefined, + executeQueries: false, + memoryCapture: false, + memoryModel: undefined, + }, + { + loadProject: async () => project, + createContextTools, + createSemanticLayerCompute: () => semanticLayerCompute, + createServer: vi.fn(() => ({ connect: vi.fn(async () => undefined) }) as never), + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ), + ).resolves.toBe(0); + + expect(createContextTools).toHaveBeenCalledWith( + project, + expect.objectContaining({ + semanticLayerCompute, + localIngest: expect.objectContaining({ + adapters: expect.any(Array), + semanticLayerCompute, + }), + localScan: expect.objectContaining({ + adapters: expect.any(Array), + }), + }), + ); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('uses the HTTP semantic compute port when a daemon URL is provided', async () => { + const project = { projectDir: '/tmp/klo-project', config: { connections: {} } } as never; + const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; + const createHttpSemanticLayerCompute = vi.fn(() => semanticLayerCompute); + const createContextTools = vi.fn(() => ({ connections: { list: async () => [] } })); + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'agent', + semanticCompute: true, + semanticComputeUrl: 'http://127.0.0.1:8765', + databaseIntrospectionUrl: undefined, + executeQueries: false, + memoryCapture: false, + memoryModel: undefined, + }, + { + loadProject: async () => project, + createContextTools, + createHttpSemanticLayerCompute, + createServer: vi.fn(() => ({ connect: vi.fn(async () => undefined) }) as never), + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ), + ).resolves.toBe(0); + + expect(createHttpSemanticLayerCompute).toHaveBeenCalledWith('http://127.0.0.1:8765'); + expect(createContextTools).toHaveBeenCalledWith( + project, + expect.objectContaining({ + semanticLayerCompute, + }), + ); + }); + + it('passes a query executor to local project ports only when query execution is enabled', async () => { + const project = { projectDir: '/tmp/klo-project', config: { connections: {} } } as never; + const connect = vi.fn().mockResolvedValue(undefined); + const createContextTools = vi.fn(() => ({ connections: { list: async () => [] } })); + const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; + const queryExecutor = { execute: vi.fn() }; + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'agent', + semanticCompute: true, + semanticComputeUrl: undefined, + databaseIntrospectionUrl: undefined, + executeQueries: true, + memoryCapture: false, + memoryModel: undefined, + }, + { + loadProject: async () => project, + createContextTools, + createSemanticLayerCompute: () => semanticLayerCompute, + createQueryExecutor: () => queryExecutor, + createServer: vi.fn(() => ({ connect }) as never), + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ), + ).resolves.toBe(0); + + expect(createContextTools).toHaveBeenCalledWith( + project, + expect.objectContaining({ + semanticLayerCompute, + queryExecutor, + localIngest: expect.objectContaining({ + adapters: expect.any(Array), + semanticLayerCompute, + queryExecutor, + }), + localScan: expect.objectContaining({ + adapters: expect.any(Array), + }), + }), + ); + }); + + it('creates a local memory capture port when memory capture is enabled', async () => { + const project = { + projectDir: '/tmp/klo-project', + config: { + connections: {}, + llm: { + provider: { backend: 'gateway' }, + models: { default: 'anthropic/claude-sonnet' }, + }, + }, + } as never; + const connect = vi.fn().mockResolvedValue(undefined); + const contextTools = { connections: { list: vi.fn() } }; + const memoryCapture = { capture: vi.fn(), status: vi.fn() }; + const createContextTools = vi.fn().mockReturnValue(contextTools); + const createMemoryCapture = vi.fn().mockReturnValue(memoryCapture); + const createServer = vi.fn().mockReturnValue({ connect }); + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'agent', + semanticCompute: false, + semanticComputeUrl: undefined, + databaseIntrospectionUrl: undefined, + executeQueries: false, + memoryCapture: true, + memoryModel: 'anthropic/claude-sonnet', + }, + { + loadProject: async () => project, + createContextTools, + createMemoryCapture, + createServer, + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ), + ).resolves.toBe(0); + + expect(createMemoryCapture).toHaveBeenCalledWith(project, { + llmProvider: expect.objectContaining({ getModel: expect.any(Function) }), + semanticLayerCompute: undefined, + }); + expect(createServer).toHaveBeenCalledWith({ + name: 'klo', + version: '0.0.0-private', + userContext: { userId: 'agent' }, + contextTools, + memoryCapture, + }); + }); + + it('reuses semantic compute for local memory capture when enabled', async () => { + const project = { + projectDir: '/tmp/klo-project', + config: { + connections: {}, + llm: { + provider: { backend: 'gateway' }, + models: { default: 'openai/gpt' }, + }, + }, + } as never; + const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; + const createMemoryCapture = vi.fn().mockReturnValue({ capture: vi.fn(), status: vi.fn() }); + + await expect( + runKloServeStdio( + { + mcp: 'stdio', + projectDir: '/tmp/klo-project', + userId: 'agent', + semanticCompute: true, + semanticComputeUrl: undefined, + databaseIntrospectionUrl: undefined, + executeQueries: false, + memoryCapture: true, + memoryModel: 'openai/gpt', + }, + { + loadProject: async () => project, + createContextTools: vi.fn(() => ({ connections: { list: async () => [] } })), + createSemanticLayerCompute: () => semanticLayerCompute, + createMemoryCapture, + createServer: vi.fn(() => ({ connect: vi.fn(async () => undefined) }) as never), + createTransport: vi.fn(() => ({}) as never), + stderr: { write: vi.fn() }, + }, + ), + ).resolves.toBe(0); + + expect(createMemoryCapture).toHaveBeenCalledWith(project, { + llmProvider: expect.objectContaining({ getModel: expect.any(Function) }), + semanticLayerCompute, + }); + }); +}); diff --git a/packages/cli/src/serve.ts b/packages/cli/src/serve.ts new file mode 100644 index 00000000..9e491598 --- /dev/null +++ b/packages/cli/src/serve.ts @@ -0,0 +1,119 @@ +import { createLocalKloLlmProviderFromConfig } from '@klo/context'; +import { createDefaultLocalQueryExecutor, type KloSqlQueryExecutorPort } from '@klo/context/connections'; +import { + createHttpSemanticLayerComputePort, + createPythonSemanticLayerComputePort, + type KloSemanticLayerComputePort, +} from '@klo/context/daemon'; +import { createDefaultLocalIngestAdapters, type LocalIngestMcpOptions } from '@klo/context/ingest'; +import { + createDefaultKloMcpServer, + createLocalProjectMcpContextPorts, + type KloMcpContextPorts, +} from '@klo/context/mcp'; +import { createLocalProjectMemoryCapture, type MemoryCaptureService } from '@klo/context/memory'; +import { type KloLocalProject, loadKloProject } from '@klo/context/project'; +import type { LocalScanMcpOptions } from '@klo/context/scan'; +import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; +import { createKloCliLocalIngestAdapters } from './local-adapters.js'; +import { createKloCliScanConnector } from './local-scan-connectors.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:serve'); + +export interface KloServeArgs { + mcp: 'stdio'; + projectDir: string; + userId: string; + semanticCompute: boolean; + semanticComputeUrl?: string; + databaseIntrospectionUrl?: string; + executeQueries: boolean; + memoryCapture: boolean; + memoryModel?: string; +} + +interface KloServeIo { + stderr: { write(chunk: string): void }; +} + +interface LocalProjectContextToolOptions { + semanticLayerCompute?: KloSemanticLayerComputePort; + queryExecutor?: KloSqlQueryExecutorPort; + localIngest?: LocalIngestMcpOptions; + localScan?: LocalScanMcpOptions; +} + +interface KloServeDeps { + loadProject?: typeof loadKloProject; + createContextTools?: (project: KloLocalProject, options?: LocalProjectContextToolOptions) => KloMcpContextPorts; + createSemanticLayerCompute?: () => KloSemanticLayerComputePort; + createHttpSemanticLayerCompute?: (baseUrl: string) => KloSemanticLayerComputePort; + createIngestAdapters?: typeof createDefaultLocalIngestAdapters; + createQueryExecutor?: () => KloSqlQueryExecutorPort; + createMemoryCapture?: typeof createLocalProjectMemoryCapture; + createServer?: typeof createDefaultKloMcpServer; + createTransport?: () => StdioServerTransport; + stderr?: KloServeIo['stderr']; +} + +export async function runKloServeStdio(args: KloServeArgs, deps: KloServeDeps = {}): Promise { + const loadProjectFn = deps.loadProject ?? loadKloProject; + const createContextToolsFn = deps.createContextTools ?? createLocalProjectMcpContextPorts; + const createServerFn = deps.createServer ?? createDefaultKloMcpServer; + const createTransportFn = deps.createTransport ?? (() => new StdioServerTransport()); + const stderr = deps.stderr ?? process.stderr; + + const project = await loadProjectFn({ projectDir: args.projectDir }); + const semanticLayerCompute = args.semanticCompute + ? args.semanticComputeUrl + ? (deps.createHttpSemanticLayerCompute ?? ((baseUrl) => createHttpSemanticLayerComputePort({ baseUrl })))( + args.semanticComputeUrl, + ) + : (deps.createSemanticLayerCompute ?? createPythonSemanticLayerComputePort)() + : undefined; + const queryExecutor = args.executeQueries + ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() + : undefined; + const createIngestAdapters = deps.createIngestAdapters ?? createKloCliLocalIngestAdapters; + const localAdapters = createIngestAdapters(project, { + databaseIntrospectionUrl: args.databaseIntrospectionUrl, + }); + const llmProvider = args.memoryCapture + ? (createLocalKloLlmProviderFromConfig(project.config.llm) ?? undefined) + : undefined; + const memoryCapture: MemoryCaptureService | undefined = args.memoryCapture + ? (deps.createMemoryCapture ?? createLocalProjectMemoryCapture)(project, { + llmProvider, + semanticLayerCompute, + }) + : undefined; + const localIngest: LocalIngestMcpOptions = { + adapters: localAdapters, + ...(semanticLayerCompute ? { semanticLayerCompute } : {}), + ...(queryExecutor ? { queryExecutor } : {}), + }; + const localScan: LocalScanMcpOptions = { + adapters: localAdapters, + databaseIntrospectionUrl: args.databaseIntrospectionUrl, + createConnector: (connectionId) => createKloCliScanConnector(project, connectionId), + }; + const contextToolOptions: LocalProjectContextToolOptions = { + localIngest, + localScan, + ...(semanticLayerCompute ? { semanticLayerCompute } : {}), + ...(queryExecutor ? { queryExecutor } : {}), + }; + const contextTools = createContextToolsFn(project, contextToolOptions); + const server = createServerFn({ + name: 'klo', + version: '0.0.0-private', + userContext: { userId: args.userId }, + contextTools, + memoryCapture, + }); + const transport = createTransportFn(); + await server.connect(transport); + stderr.write(`klo MCP server running on stdio for ${project.projectDir}\n`); + return 0; +} diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts new file mode 100644 index 00000000..b20e1b07 --- /dev/null +++ b/packages/cli/src/setup-agents.test.ts @@ -0,0 +1,176 @@ +import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + plannedKloAgentFiles, + readKloAgentInstallManifest, + removeKloAgentInstall, + runKloSetupAgentsStep, +} from './setup-agents.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { write: (chunk: string) => (stdout += chunk) }, + stderr: { write: (chunk: string) => (stderr += chunk) }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('setup agents', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-agents-')); + await mkdir(join(tempDir, '.klo', 'agents'), { recursive: true }); + await writeFile(join(tempDir, 'klo.yaml'), 'project: revenue\nconnections: {}\n', 'utf-8'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('plans project-scoped CLI and MCP files for every target', () => { + expect(plannedKloAgentFiles({ projectDir: tempDir, target: 'claude-code', scope: 'project', mode: 'both' })).toEqual([ + { kind: 'file', path: join(tempDir, '.claude/skills/klo/SKILL.md') }, + { kind: 'json-key', path: join(tempDir, '.mcp.json'), jsonPath: ['mcpServers', 'klo'] }, + ]); + expect(plannedKloAgentFiles({ projectDir: tempDir, target: 'codex', scope: 'project', mode: 'cli' })).toEqual([ + { kind: 'file', path: join(tempDir, '.agents/skills/klo/SKILL.md') }, + ]); + expect(plannedKloAgentFiles({ projectDir: tempDir, target: 'cursor', scope: 'project', mode: 'mcp' })).toEqual([ + { kind: 'json-key', path: join(tempDir, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'klo'] }, + ]); + expect(plannedKloAgentFiles({ projectDir: tempDir, target: 'opencode', scope: 'project', mode: 'cli' })).toEqual([ + { kind: 'file', path: join(tempDir, '.opencode/commands/klo.md') }, + ]); + expect(plannedKloAgentFiles({ projectDir: tempDir, target: 'universal', scope: 'project', mode: 'both' })).toEqual([ + { kind: 'file', path: join(tempDir, '.agents/skills/klo/SKILL.md') }, + { kind: 'json-key', path: join(tempDir, '.agents/mcp/klo.json'), jsonPath: ['mcpServers', 'klo'] }, + ]); + }); + + it('installs target files, writes a manifest, and marks agents complete', async () => { + const io = makeIo(); + + await expect( + runKloSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + yes: true, + agents: true, + target: 'universal', + scope: 'project', + mode: 'both', + skipAgents: false, + }, + io.io, + ), + ).resolves.toEqual({ + status: 'ready', + projectDir: tempDir, + installs: [{ target: 'universal', scope: 'project', mode: 'both' }], + }); + + await expect(stat(join(tempDir, '.agents/skills/klo/SKILL.md'))).resolves.toBeDefined(); + await expect(stat(join(tempDir, '.agents/mcp/klo.json'))).resolves.toBeDefined(); + const skill = await readFile(join(tempDir, '.agents/skills/klo/SKILL.md'), 'utf-8'); + expect(skill).toContain(`--project-dir ${tempDir}`); + expect(skill).toContain('must not print secrets'); + expect(skill).toContain('klo agent sql execute'); + expect(await readKloAgentInstallManifest(tempDir)).toMatchObject({ + version: 1, + projectDir: tempDir, + installs: [{ target: 'universal', scope: 'project', mode: 'both' }], + }); + expect(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')).toContain('agents'); + expect(io.stderr()).toBe(''); + }); + + it('removes only manifest-listed files and JSON keys', async () => { + const io = makeIo(); + await runKloSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + yes: true, + agents: true, + target: 'claude-code', + scope: 'project', + mode: 'both', + skipAgents: false, + }, + io.io, + ); + await writeFile(join(tempDir, '.claude/skills/klo/keep.txt'), 'user file', 'utf-8'); + + await expect(removeKloAgentInstall(tempDir, io.io)).resolves.toBe(0); + + await expect(stat(join(tempDir, '.claude/skills/klo/SKILL.md'))).rejects.toThrow(); + await expect(stat(join(tempDir, '.claude/skills/klo/keep.txt'))).resolves.toBeDefined(); + await expect(readKloAgentInstallManifest(tempDir)).resolves.toEqual(null); + }); + + it('uses prompts in interactive mode and supports Back', async () => { + const io = makeIo(); + const prompts = { + select: vi.fn(async () => 'back'), + multiselect: vi.fn(async () => ['codex']), + cancel: vi.fn(), + }; + + await expect( + runKloSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'auto', + yes: false, + agents: true, + scope: 'project', + mode: 'cli', + skipAgents: false, + }, + io.io, + { prompts }, + ), + ).resolves.toEqual({ status: 'back', projectDir: tempDir }); + }); + + it('explains how to select multiple agent targets in interactive mode', async () => { + const io = makeIo(); + const prompts = { + select: vi.fn(async () => 'cli'), + multiselect: vi.fn(async () => ['back']), + cancel: vi.fn(), + }; + + await expect( + runKloSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'auto', + yes: false, + agents: true, + scope: 'project', + mode: 'cli', + skipAgents: false, + }, + io.io, + { prompts }, + ), + ).resolves.toEqual({ status: 'back', projectDir: tempDir }); + + expect(prompts.multiselect).toHaveBeenCalledWith( + expect.objectContaining({ + message: + 'Which agent targets should KLO install?\nUse Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', + }), + ); + }); +}); diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts new file mode 100644 index 00000000..83c18bf9 --- /dev/null +++ b/packages/cli/src/setup-agents.ts @@ -0,0 +1,336 @@ +import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { dirname, join, resolve } from 'node:path'; +import { cancel, isCancel, multiselect, select } from '@clack/prompts'; +import { loadKloProject, markKloSetupStepComplete, serializeKloProjectConfig } from '@klo/context/project'; +import type { KloCliIo } from './cli-runtime.js'; +import { withMenuOptionsSpacing, withMultiselectNavigation } from './prompt-navigation.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; + +export type KloAgentTarget = 'claude-code' | 'codex' | 'cursor' | 'opencode' | 'universal'; +export type KloAgentScope = 'project' | 'global'; +export type KloAgentInstallMode = 'cli' | 'mcp' | 'both'; + +export interface KloSetupAgentsArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + yes: boolean; + agents: boolean; + target?: KloAgentTarget; + scope: KloAgentScope; + mode: KloAgentInstallMode; + skipAgents: boolean; +} + +export type KloSetupAgentsResult = + | { + status: 'ready'; + projectDir: string; + installs: Array<{ target: KloAgentTarget; scope: KloAgentScope; mode: KloAgentInstallMode }>; + } + | { status: 'skipped'; projectDir: string } + | { status: 'back'; projectDir: string } + | { status: 'missing-input'; projectDir: string } + | { status: 'failed'; projectDir: string }; + +export interface KloAgentInstallManifest { + version: 1; + projectDir: string; + installedAt: string; + installs: Array<{ target: KloAgentTarget; scope: KloAgentScope; mode: KloAgentInstallMode }>; + entries: Array<{ kind: 'file'; path: string } | { kind: 'json-key'; path: string; jsonPath: string[] }>; +} + +type InstallEntry = KloAgentInstallManifest['entries'][number]; + +export function agentInstallManifestPath(projectDir: string): string { + return join(resolve(projectDir), '.klo/agents/install-manifest.json'); +} + +export function plannedKloAgentFiles(input: { + projectDir: string; + target: KloAgentTarget; + scope: KloAgentScope; + mode: KloAgentInstallMode; +}): InstallEntry[] { + if (input.scope === 'global') { + if (input.target === 'claude-code') { + return [{ kind: 'file', path: join(process.env.HOME ?? '', '.claude/skills/klo/SKILL.md') }]; + } + if (input.target === 'codex') { + return [ + { kind: 'file', path: join(process.env.CODEX_HOME ?? join(process.env.HOME ?? '', '.codex'), 'skills/klo/SKILL.md') }, + ]; + } + throw new Error(`Global ${input.target} installation is not supported; use --project.`); + } + + const root = resolve(input.projectDir); + const cliEntries: Partial> = { + 'claude-code': { kind: 'file', path: join(root, '.claude/skills/klo/SKILL.md') }, + codex: { kind: 'file', path: join(root, '.agents/skills/klo/SKILL.md') }, + cursor: { kind: 'file', path: join(root, '.cursor/rules/klo.mdc') }, + opencode: { kind: 'file', path: join(root, '.opencode/commands/klo.md') }, + universal: { kind: 'file', path: join(root, '.agents/skills/klo/SKILL.md') }, + }; + const mcpEntries: Record = { + 'claude-code': { kind: 'json-key', path: join(root, '.mcp.json'), jsonPath: ['mcpServers', 'klo'] }, + codex: { kind: 'json-key', path: join(root, '.agents/mcp/klo.json'), jsonPath: ['mcpServers', 'klo'] }, + cursor: { kind: 'json-key', path: join(root, '.cursor/mcp.json'), jsonPath: ['mcpServers', 'klo'] }, + opencode: { kind: 'json-key', path: join(root, '.opencode/mcp.json'), jsonPath: ['mcpServers', 'klo'] }, + universal: { kind: 'json-key', path: join(root, '.agents/mcp/klo.json'), jsonPath: ['mcpServers', 'klo'] }, + }; + return [ + ...(input.mode === 'cli' || input.mode === 'both' ? [cliEntries[input.target]] : []), + ...(input.mode === 'mcp' || input.mode === 'both' ? [mcpEntries[input.target]] : []), + ].filter((entry): entry is InstallEntry => entry !== undefined); +} + +function cliInstructionContent(input: { projectDir: string; target: KloAgentTarget }): string { + return [ + '---', + 'name: klo', + 'description: Use local KLO semantic context, wiki knowledge, and safe SQL execution for this project.', + '---', + '', + '# KLO Local Context', + '', + `Use this project with \`--project-dir ${input.projectDir}\`.`, + '', + 'Agents must not print secrets, credential references, environment variable values, or file contents from `.klo/secrets`.', + '', + 'Available commands:', + '', + `- \`klo agent context --json --project-dir ${input.projectDir}\``, + `- \`klo agent sl list --json --project-dir ${input.projectDir}\``, + `- \`klo agent sl read --json --project-dir ${input.projectDir}\``, + `- \`klo agent sl query --json --project-dir ${input.projectDir} --connection-id --query-file --execute --max-rows 100\``, + `- \`klo agent wiki search --json --project-dir ${input.projectDir}\``, + `- \`klo agent wiki read --json --project-dir ${input.projectDir}\``, + `- \`klo agent sql execute --json --project-dir ${input.projectDir} --connection-id --sql-file --max-rows 100\``, + '', + 'SQL execution is read-only, requires an explicit row limit, and should use the smallest useful limit.', + '', + ].join('\n'); +} + +function mcpConfig(projectDir: string): Record { + return { + command: 'klo', + args: ['--project-dir', projectDir, 'serve', '--mcp', 'stdio', '--semantic-compute', '--execute-queries'], + env: {}, + }; +} + +async function writeJsonKey(path: string, jsonPath: string[], value: Record): Promise { + let root: Record = {}; + try { + root = JSON.parse(await readFile(path, 'utf-8')) as Record; + } catch { + root = {}; + } + let cursor = root; + for (const segment of jsonPath.slice(0, -1)) { + const next = cursor[segment]; + if (!next || typeof next !== 'object' || Array.isArray(next)) cursor[segment] = {}; + cursor = cursor[segment] as Record; + } + cursor[jsonPath.at(-1) as string] = value; + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, `${JSON.stringify(root, null, 2)}\n`, 'utf-8'); +} + +async function removeJsonKey(path: string, jsonPath: string[]): Promise { + const root = JSON.parse(await readFile(path, 'utf-8')) as Record; + let cursor: Record = root; + for (const segment of jsonPath.slice(0, -1)) { + const next = cursor[segment]; + if (!next || typeof next !== 'object' || Array.isArray(next)) return; + cursor = next as Record; + } + delete cursor[jsonPath.at(-1) as string]; + await writeFile(path, `${JSON.stringify(root, null, 2)}\n`, 'utf-8'); +} + +export async function readKloAgentInstallManifest(projectDir: string): Promise { + try { + return JSON.parse(await readFile(agentInstallManifestPath(projectDir), 'utf-8')) as KloAgentInstallManifest; + } catch { + return null; + } +} + +async function writeManifest(projectDir: string, manifest: KloAgentInstallManifest): Promise { + const path = agentInstallManifestPath(projectDir); + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, `${JSON.stringify(manifest, null, 2)}\n`, 'utf-8'); +} + +function entryKey(entry: InstallEntry): string { + return entry.kind === 'json-key' ? `${entry.kind}:${entry.path}:${entry.jsonPath.join('.')}` : `${entry.kind}:${entry.path}`; +} + +function mergeManifest( + projectDir: string, + existing: KloAgentInstallManifest | null, + installs: KloAgentInstallManifest['installs'], + entries: InstallEntry[], +): KloAgentInstallManifest { + const installMap = new Map(); + for (const install of [...(existing?.installs ?? []), ...installs]) { + installMap.set(`${install.target}:${install.scope}:${install.mode}`, install); + } + const entryMap = new Map(); + for (const entry of [...(existing?.entries ?? []), ...entries]) { + entryMap.set(entryKey(entry), entry); + } + return { + version: 1, + projectDir, + installedAt: new Date().toISOString(), + installs: [...installMap.values()], + entries: [...entryMap.values()], + }; +} + +export async function removeKloAgentInstall(projectDir: string, io: KloCliIo): Promise { + const manifest = await readKloAgentInstallManifest(projectDir); + if (!manifest) { + io.stdout.write('No KLO agent installation manifest found.\n'); + return 0; + } + for (const entry of manifest.entries) { + if (entry.kind === 'file') await rm(entry.path, { force: true }); + if (entry.kind === 'json-key') await removeJsonKey(entry.path, entry.jsonPath).catch(() => undefined); + } + await rm(agentInstallManifestPath(projectDir), { force: true }); + io.stdout.write('Removed KLO agent integration files from manifest.\n'); + return 0; +} + +export interface KloSetupAgentsPromptAdapter { + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + multiselect(options: { + message: string; + options: Array<{ value: string; label: string }>; + required?: boolean; + }): Promise; + cancel(message: string): void; +} + +export interface KloSetupAgentsDeps { + prompts?: KloSetupAgentsPromptAdapter; +} + +function createPromptAdapter(): KloSetupAgentsPromptAdapter { + return { + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'back'; + } + return String(value); + }, + async multiselect(options) { + const value = await withSetupInterruptConfirmation(() => multiselect(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return ['back']; + } + return [...value] as string[]; + }, + cancel(message) { + cancel(message); + }, + }; +} + +async function installTarget(input: { + projectDir: string; + target: KloAgentTarget; + scope: KloAgentScope; + mode: KloAgentInstallMode; +}): Promise { + const entries = plannedKloAgentFiles(input); + for (const entry of entries) { + if (entry.kind === 'file') { + await mkdir(dirname(entry.path), { recursive: true }); + await writeFile(entry.path, cliInstructionContent({ projectDir: input.projectDir, target: input.target }), 'utf-8'); + } else { + await writeJsonKey(entry.path, entry.jsonPath, mcpConfig(input.projectDir)); + } + } + return entries; +} + +async function markAgentsComplete(projectDir: string): Promise { + const project = await loadKloProject({ projectDir }); + await writeFile(project.configPath, serializeKloProjectConfig(markKloSetupStepComplete(project.config, 'agents')), 'utf-8'); +} + +export async function runKloSetupAgentsStep( + args: KloSetupAgentsArgs, + io: KloCliIo, + deps: KloSetupAgentsDeps = {}, +): Promise { + if (args.skipAgents) { + io.stdout.write('Agent integration skipped.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } + if (!args.agents && args.inputMode === 'disabled') { + return { status: 'skipped', projectDir: args.projectDir }; + } + + const prompts = deps.prompts ?? createPromptAdapter(); + const mode = + args.inputMode === 'disabled' + ? args.mode + : ((await prompts.select({ + message: 'How should agents use this KLO project?', + options: [ + { value: 'cli', label: 'CLI tools and skills' }, + { value: 'mcp', label: 'MCP server config' }, + { value: 'both', label: 'Both' }, + { value: 'skip', label: 'Skip' }, + { value: 'back', label: 'Back' }, + ], + })) as KloAgentInstallMode | 'skip' | 'back'); + if (mode === 'back') return { status: 'back', projectDir: args.projectDir }; + if (mode === 'skip') return { status: 'skipped', projectDir: args.projectDir }; + + const targets = + args.target !== undefined + ? [args.target] + : args.inputMode === 'disabled' + ? [] + : ((await prompts.multiselect({ + message: withMultiselectNavigation('Which agent targets should KLO install?'), + options: [ + { value: 'claude-code', label: 'Claude Code' }, + { value: 'codex', label: 'Codex' }, + { value: 'cursor', label: 'Cursor' }, + { value: 'opencode', label: 'OpenCode' }, + { value: 'universal', label: 'Universal .agents' }, + { value: 'back', label: 'Back' }, + ], + required: true, + })) as KloAgentTarget[]); + if (targets.includes('back' as KloAgentTarget)) return { status: 'back', projectDir: args.projectDir }; + if (targets.length === 0) { + io.stderr.write('Missing agent target: pass --target or use interactive setup.\n'); + return { status: 'missing-input', projectDir: args.projectDir }; + } + + const installs = targets.map((target) => ({ target, scope: args.scope, mode })); + const entries: InstallEntry[] = []; + try { + for (const install of installs) entries.push(...(await installTarget({ projectDir: args.projectDir, ...install }))); + await writeManifest(args.projectDir, mergeManifest(args.projectDir, await readKloAgentInstallManifest(args.projectDir), installs, entries)); + await markAgentsComplete(args.projectDir); + io.stdout.write(`Agent integration installed for ${installs.map((install) => install.target).join(', ')}.\n`); + return { status: 'ready', projectDir: args.projectDir, installs }; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return { status: 'failed', projectDir: args.projectDir }; + } +} diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts new file mode 100644 index 00000000..7bcf27ce --- /dev/null +++ b/packages/cli/src/setup-context.test.ts @@ -0,0 +1,405 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + contextBuildCommands, + readKloSetupContextState, + runKloSetupContextCommand, + runKloSetupContextStep, + writeKloSetupContextState, +} from './setup-context.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +async function writeReadyProject(projectDir: string) { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + ' - llm', + ' - embeddings', + ' - databases', + ' - sources', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' docs:', + ' driver: notion', + ' auth_token_ref: env:NOTION_TOKEN', + ' crawl_mode: all_accessible', + 'llm:', + ' provider:', + ' backend: anthropic', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + 'scan:', + ' enrichment:', + ' mode: llm', + '', + ].join('\n'), + 'utf-8', + ); +} + +async function writeScanReport( + projectDir: string, + syncId: string, + report: { mode: string; tableDescriptions: string; columnDescriptions: string; embeddings: string }, +) { + const reportDir = join(projectDir, 'raw-sources', 'warehouse', 'live-database', syncId); + await mkdir(reportDir, { recursive: true }); + await writeFile( + join(reportDir, 'scan-report.json'), + `${JSON.stringify( + { + connectionId: 'warehouse', + mode: report.mode, + dryRun: false, + artifactPaths: { + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + enrichmentArtifacts: + report.mode === 'enriched' + ? [`raw-sources/warehouse/live-database/${syncId}/enrichment/descriptions.json`] + : [], + }, + enrichment: { + tableDescriptions: report.tableDescriptions, + columnDescriptions: report.columnDescriptions, + embeddings: report.embeddings, + }, + enrichmentState: { + completedStages: report.tableDescriptions === 'completed' ? ['descriptions', 'embeddings'] : [], + failedStages: report.tableDescriptions === 'failed' ? ['descriptions'] : [], + }, + createdAt: syncId, + }, + null, + 2, + )}\n`, + ); +} + +async function writeReadyEnrichedScanReport(projectDir: string, syncId = '2026-05-09T10:00:00.000Z') { + await writeScanReport(projectDir, syncId, { + mode: 'enriched', + tableDescriptions: 'completed', + columnDescriptions: 'completed', + embeddings: 'completed', + }); +} + +describe('setup context build state', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-context-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('reads missing state as not started and writes durable command metadata without secrets', async () => { + await expect(readKloSetupContextState(tempDir)).resolves.toMatchObject({ status: 'not_started' }); + + await writeKloSetupContextState(tempDir, { + runId: 'setup-context-local-abc123', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-abc123'), + }); + + const state = await readKloSetupContextState(tempDir); + expect(state).toMatchObject({ + runId: 'setup-context-local-abc123', + status: 'running', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + commands: { + watch: `klo setup context watch setup-context-local-abc123 --project-dir ${tempDir}`, + status: `klo setup context status setup-context-local-abc123 --project-dir ${tempDir}`, + resume: `klo setup --project-dir ${tempDir}`, + }, + }); + expect(JSON.stringify(state)).not.toContain('DATABASE_URL'); + expect(JSON.stringify(state)).not.toContain('NOTION_TOKEN'); + }); + + it('runs setup context build, verifies readiness, and marks context complete', async () => { + await writeReadyProject(tempDir); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['warehouse: enriched scan complete', 'docs: memory update complete'], + })); + + await expect( + runKloSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { + runIdFactory: () => 'setup-context-local-abc123', + now: () => new Date('2026-05-09T10:00:00.000Z'), + runContextBuild: runContextBuildMock, + verifyContextReady, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-abc123' }); + + expect(runContextBuildMock).toHaveBeenCalledWith( + expect.objectContaining({ projectDir: tempDir }), + expect.objectContaining({ + projectDir: tempDir, + inputMode: 'disabled', + scanMode: 'enriched', + detectRelationships: true, + }), + io.io, + expect.objectContaining({ onDetach: expect.any(Function) }), + ); + expect(verifyContextReady).toHaveBeenCalledWith(tempDir); + expect(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')).toContain(' - context'); + await expect(readKloSetupContextState(tempDir)).resolves.toMatchObject({ + runId: 'setup-context-local-abc123', + status: 'completed', + completedAt: '2026-05-09T10:00:00.000Z', + }); + expect(io.stdout()).toContain('KLO context is ready for agents.'); + }); + + it('marks context complete without prompting when initial source ingest already made agent context', async () => { + await writeReadyProject(tempDir); + await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true }); + await mkdir(join(tempDir, 'knowledge', 'global'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n'); + await writeFile(join(tempDir, 'knowledge', 'global', 'metrics.md'), '# Metrics\n'); + await writeReadyEnrichedScanReport(tempDir); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false })); + + await expect( + runKloSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { + select: vi.fn(async () => { + throw new Error('setup should not prompt when context is already ready'); + }), + cancel: vi.fn(), + }, + runIdFactory: () => 'setup-context-local-existing', + now: () => new Date('2026-05-09T10:00:00.000Z'), + runContextBuild: runContextBuildMock, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-existing' }); + + expect(runContextBuildMock).not.toHaveBeenCalled(); + expect(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')).toContain(' - context'); + await expect(readKloSetupContextState(tempDir)).resolves.toMatchObject({ + runId: 'setup-context-local-existing', + status: 'completed', + completedAt: '2026-05-09T10:00:00.000Z', + contextSourceConnectionIds: ['docs'], + }); + expect(io.stdout()).toContain('KLO context is ready for agents.'); + }); + + it('does not mark context ready until primary scans have completed description enrichment', async () => { + await writeReadyProject(tempDir); + await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n'); + await writeScanReport(tempDir, '2026-05-09T09:59:00.000Z', { + mode: 'structural', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => { + await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z'); + return { exitCode: 0, detached: false }; + }); + + await expect( + runKloSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { + runIdFactory: () => 'setup-context-local-enriched-scan', + now: () => new Date('2026-05-09T10:00:00.000Z'), + runContextBuild: runContextBuildMock, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-enriched-scan' }); + + expect(runContextBuildMock).toHaveBeenCalledOnce(); + expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); + }); + + it('does not treat schema-only scan shards as completed setup context', async () => { + await writeReadyProject(tempDir); + await mkdir(join(tempDir, 'semantic-layer', 'warehouse', '_schema'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'), 'tables: {}\n'); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => { + await mkdir(join(tempDir, 'knowledge', 'global'), { recursive: true }); + await writeFile(join(tempDir, 'knowledge', 'global', 'metrics.md'), '# Metrics\n'); + await writeReadyEnrichedScanReport(tempDir); + return { exitCode: 0, detached: false }; + }); + + await expect( + runKloSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { + runIdFactory: () => 'setup-context-local-schema-only', + now: () => new Date('2026-05-09T10:00:00.000Z'), + runContextBuild: runContextBuildMock, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-schema-only' }); + + expect(runContextBuildMock).toHaveBeenCalledOnce(); + expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); + }); + + it('refuses empty setup context builds', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'connections: {}', + 'llm:', + ' provider:', + ' backend: anthropic', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + + await expect( + runKloSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runIdFactory: () => 'setup-context-local-empty' }, + ), + ).resolves.toEqual({ status: 'failed', projectDir: tempDir }); + + expect(io.stderr()).toContain('No primary or context sources are configured for a KLO context build.'); + }); + + it('prints JSON setup context command status with watch and resume commands', async () => { + await mkdir(join(tempDir, '.klo', 'setup'), { recursive: true }); + await writeKloSetupContextState(tempDir, { + runId: 'setup-context-local-abc123', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:01:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: ['docs'], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-abc123'), + }); + const io = makeIo(); + + await expect( + runKloSetupContextCommand( + { command: 'status', projectDir: tempDir, runId: 'setup-context-local-abc123', json: true }, + io.io, + ), + ).resolves.toBe(0); + + expect(JSON.parse(io.stdout())).toMatchObject({ + ready: false, + status: 'detached', + runId: 'setup-context-local-abc123', + watchCommand: `klo setup context watch setup-context-local-abc123 --project-dir ${tempDir}`, + statusCommand: `klo setup context status setup-context-local-abc123 --project-dir ${tempDir}`, + }); + }); + + it('runs direct build commands without asking for setup confirmation first', async () => { + await writeReadyProject(tempDir); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false })); + + await expect( + runKloSetupContextCommand( + { command: 'build', projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { + select: vi.fn(async () => { + throw new Error('direct build should not prompt'); + }), + cancel: vi.fn(), + }, + runIdFactory: () => 'setup-context-local-direct', + runContextBuild: runContextBuildMock, + verifyContextReady: vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: [], + })), + }, + ), + ).resolves.toBe(0); + + expect(runContextBuildMock).toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts new file mode 100644 index 00000000..da354f34 --- /dev/null +++ b/packages/cli/src/setup-context.ts @@ -0,0 +1,765 @@ +import { mkdirSync, writeFileSync } from 'node:fs'; +import { access, mkdir, readdir, readFile, writeFile } from 'node:fs/promises'; +import { join, resolve } from 'node:path'; +import { cancel, isCancel, select } from '@clack/prompts'; +import { + type KloLocalProject, + loadKloProject, + markKloSetupStepComplete, + serializeKloProjectConfig, +} from '@klo/context/project'; +import type { KloCliIo } from './cli-runtime.js'; +import { buildPublicIngestPlan } from './public-ingest.js'; +import { runContextBuild } from './context-build-view.js'; +import { withMenuOptionsSpacing } from './prompt-navigation.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; + +export type KloSetupContextBuildStatus = + | 'not_started' + | 'running' + | 'detached' + | 'paused' + | 'completed' + | 'failed' + | 'interrupted' + | 'stale'; + +export interface KloSetupContextCommands { + build: string; + watch: string; + status: string; + stop: string; + resume: string; +} + +export interface KloSetupContextState { + runId?: string; + status: KloSetupContextBuildStatus; + startedAt?: string; + updatedAt?: string; + completedAt?: string; + primarySourceConnectionIds: string[]; + contextSourceConnectionIds: string[]; + reportIds: string[]; + artifactPaths: string[]; + retryableFailedTargets: string[]; + commands: KloSetupContextCommands; + failureReason?: string; +} + +export interface KloSetupContextStatusSummary { + ready: boolean; + status: KloSetupContextBuildStatus; + runId?: string; + watchCommand?: string; + statusCommand?: string; + retryCommand?: string; + detail?: string; +} + +export interface KloSetupContextReadiness { + ready: boolean; + agentContextReady: boolean; + semanticSearchReady: boolean; + details: string[]; + failedTargets?: string[]; +} + +export type KloSetupContextResult = + | { status: 'ready'; projectDir: string; runId: string } + | { status: 'skipped'; projectDir: string } + | { status: 'detached'; projectDir: string; runId: string } + | { status: 'paused'; projectDir: string; runId: string } + | { status: 'back'; projectDir: string } + | { status: 'missing-input'; projectDir: string } + | { status: 'failed'; projectDir: string }; + +export interface KloSetupContextStepArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + forcePrompt?: boolean; + allowEmpty?: boolean; + prompt?: boolean; +} + +export type KloSetupContextCommandArgs = + | { command: 'build'; projectDir: string; inputMode: 'auto' | 'disabled' } + | { command: 'watch'; projectDir: string; runId?: string; inputMode: 'auto' | 'disabled' } + | { command: 'status'; projectDir: string; runId?: string; json: boolean } + | { command: 'stop'; projectDir: string; runId?: string }; + +export interface KloSetupContextPromptAdapter { + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + cancel(message: string): void; +} + +export interface KloSetupContextDeps { + prompts?: KloSetupContextPromptAdapter; + runIdFactory?: () => string; + now?: () => Date; + runContextBuild?: typeof runContextBuild; + verifyContextReady?: (projectDir: string) => Promise; +} + +interface KloSetupContextTargets { + primarySourceConnectionIds: string[]; + contextSourceConnectionIds: string[]; +} + +const SETUP_CONTEXT_STATE_PATH = ['.klo', 'setup', 'context-build.json'] as const; +const LIVE_DATABASE_ADAPTER = 'live-database'; +const SCAN_REPORT_FILE = 'scan-report.json'; + +function createPromptAdapter(): KloSetupContextPromptAdapter { + return { + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'back'; + } + return String(value); + }, + cancel(message) { + cancel(message); + }, + }; +} + +function statePath(projectDir: string): string { + return join(resolve(projectDir), ...SETUP_CONTEXT_STATE_PATH); +} + +async function pathExists(path: string): Promise { + try { + await access(path); + return true; + } catch { + return false; + } +} + +export function contextBuildCommands(projectDir: string, runId?: string): KloSetupContextCommands { + const resolvedProjectDir = resolve(projectDir); + const runIdArg = runId ? ` ${runId}` : ''; + return { + build: `klo setup context build --project-dir ${resolvedProjectDir}`, + watch: `klo setup context watch${runIdArg} --project-dir ${resolvedProjectDir}`, + status: `klo setup context status${runIdArg} --project-dir ${resolvedProjectDir}`, + stop: `klo setup context stop${runIdArg} --project-dir ${resolvedProjectDir}`, + resume: `klo setup --project-dir ${resolvedProjectDir}`, + }; +} + +function notStartedState(projectDir: string): KloSetupContextState { + return { + status: 'not_started', + primarySourceConnectionIds: [], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(projectDir), + }; +} + +function normalizeState(projectDir: string, value: unknown): KloSetupContextState { + if (typeof value !== 'object' || value === null || Array.isArray(value)) { + return notStartedState(projectDir); + } + const record = value as Partial; + const status = record.status ?? 'not_started'; + const runId = typeof record.runId === 'string' && record.runId.length > 0 ? record.runId : undefined; + return { + ...(runId ? { runId } : {}), + status, + ...(typeof record.startedAt === 'string' ? { startedAt: record.startedAt } : {}), + ...(typeof record.updatedAt === 'string' ? { updatedAt: record.updatedAt } : {}), + ...(typeof record.completedAt === 'string' ? { completedAt: record.completedAt } : {}), + primarySourceConnectionIds: Array.isArray(record.primarySourceConnectionIds) + ? record.primarySourceConnectionIds.filter((item): item is string => typeof item === 'string') + : [], + contextSourceConnectionIds: Array.isArray(record.contextSourceConnectionIds) + ? record.contextSourceConnectionIds.filter((item): item is string => typeof item === 'string') + : [], + reportIds: Array.isArray(record.reportIds) + ? record.reportIds.filter((item): item is string => typeof item === 'string') + : [], + artifactPaths: Array.isArray(record.artifactPaths) + ? record.artifactPaths.filter((item): item is string => typeof item === 'string') + : [], + retryableFailedTargets: Array.isArray(record.retryableFailedTargets) + ? record.retryableFailedTargets.filter((item): item is string => typeof item === 'string') + : [], + commands: contextBuildCommands(projectDir, runId), + ...(typeof record.failureReason === 'string' ? { failureReason: record.failureReason } : {}), + }; +} + +export async function readKloSetupContextState(projectDir: string): Promise { + const filePath = statePath(projectDir); + if (!(await pathExists(filePath))) { + return notStartedState(projectDir); + } + return normalizeState(projectDir, JSON.parse(await readFile(filePath, 'utf-8')) as unknown); +} + +export async function writeKloSetupContextState(projectDir: string, state: KloSetupContextState): Promise { + const resolvedProjectDir = resolve(projectDir); + await mkdir(join(resolvedProjectDir, '.klo', 'setup'), { recursive: true }); + const normalized = normalizeState(resolvedProjectDir, { + ...state, + commands: contextBuildCommands(resolvedProjectDir, state.runId), + }); + await writeFile(statePath(resolvedProjectDir), `${JSON.stringify(normalized, null, 2)}\n`, 'utf-8'); +} + +export function setupContextStatusFromState( + state: KloSetupContextState, + options: { completedStep: boolean } = { completedStep: false }, +): KloSetupContextStatusSummary { + const status = options.completedStep && state.status === 'not_started' ? 'completed' : state.status; + const ready = options.completedStep && status === 'completed'; + return { + ready, + status, + ...(state.runId ? { runId: state.runId } : {}), + ...(state.runId ? { watchCommand: state.commands.watch, statusCommand: state.commands.status } : {}), + retryCommand: state.commands.build, + ...(state.failureReason ? { detail: state.failureReason } : {}), + }; +} + +function runIdFactory(): string { + return `setup-context-local-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; +} + +function listContextTargets(project: KloLocalProject): KloSetupContextTargets { + if (Object.keys(project.config.connections).length === 0) { + return { primarySourceConnectionIds: [], contextSourceConnectionIds: [] }; + } + const plan = buildPublicIngestPlan(project, { projectDir: project.projectDir, all: true }); + return { + primarySourceConnectionIds: plan.targets + .filter((target) => target.operation === 'scan') + .map((target) => target.connectionId), + contextSourceConnectionIds: plan.targets + .filter((target) => target.operation === 'source-ingest') + .map((target) => target.connectionId), + }; +} + +function missingCapabilities(project: KloLocalProject): string[] { + const missing: string[] = []; + const llm = project.config.llm; + if (llm.provider.backend === 'none' || !llm.models.default) { + missing.push('Models are not ready.'); + } + const embeddings = project.config.ingest.embeddings; + if ( + embeddings.backend === 'none' || + embeddings.backend === 'deterministic' || + !embeddings.model || + embeddings.dimensions <= 0 + ) { + missing.push('Embeddings are not ready.'); + } + if (project.config.scan.enrichment.mode === 'none') { + missing.push('Scan enrichment is not configured.'); + } + return missing; +} + +async function hasFileWithExtension( + root: string, + extensions: Set, + options: { ignoredDirectoryNames?: Set } = {}, +): Promise { + if (!(await pathExists(root))) { + return false; + } + const entries = await readdir(root, { withFileTypes: true }); + for (const entry of entries) { + const entryPath = join(root, entry.name); + if (entry.isDirectory()) { + if (options.ignoredDirectoryNames?.has(entry.name)) { + continue; + } + if (await hasFileWithExtension(entryPath, extensions, options)) { + return true; + } + continue; + } + if (extensions.has(entry.name.slice(entry.name.lastIndexOf('.')))) { + return true; + } + } + return false; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function stringValue(value: unknown): string | null { + return typeof value === 'string' && value.length > 0 ? value : null; +} + +function stringArrayValue(value: unknown): string[] { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []; +} + +async function readJsonFile(path: string): Promise { + try { + return JSON.parse(await readFile(path, 'utf-8')) as unknown; + } catch { + return null; + } +} + +async function readLatestScanReport(projectDir: string, connectionId: string): Promise { + const scanRoot = join(projectDir, 'raw-sources', connectionId, LIVE_DATABASE_ADAPTER); + if (!(await pathExists(scanRoot))) { + return null; + } + + const reports: Array<{ sortKey: string; report: unknown }> = []; + for (const entry of await readdir(scanRoot, { withFileTypes: true })) { + if (!entry.isDirectory()) { + continue; + } + const report = await readJsonFile(join(scanRoot, entry.name, SCAN_REPORT_FILE)); + if (!isRecord(report)) { + continue; + } + reports.push({ sortKey: stringValue(report.createdAt) ?? entry.name, report }); + } + + reports.sort((left, right) => left.sortKey.localeCompare(right.sortKey)); + return reports.at(-1)?.report ?? null; +} + +function scanReportHasCompletedDescriptionEnrichment(report: unknown, connectionId: string): boolean { + if (!isRecord(report)) { + return false; + } + if (report.connectionId !== connectionId || report.mode !== 'enriched' || report.dryRun === true) { + return false; + } + if (!isRecord(report.enrichment) || !isRecord(report.enrichmentState) || !isRecord(report.artifactPaths)) { + return false; + } + const completedStages = stringArrayValue(report.enrichmentState.completedStages); + return ( + report.enrichment.tableDescriptions === 'completed' && + report.enrichment.columnDescriptions === 'completed' && + report.enrichment.embeddings === 'completed' && + completedStages.includes('descriptions') && + completedStages.includes('embeddings') && + stringArrayValue(report.artifactPaths.manifestShards).length > 0 + ); +} + +async function verifyPrimarySourceScans( + projectDir: string, + connectionIds: string[], +): Promise<{ ready: boolean; details: string[] }> { + const details: string[] = []; + for (const connectionId of connectionIds) { + const report = await readLatestScanReport(projectDir, connectionId); + if (!scanReportHasCompletedDescriptionEnrichment(report, connectionId)) { + details.push(`${connectionId}: enriched database scan with AI descriptions has not completed.`); + } + } + return { ready: details.length === 0, details }; +} + +async function defaultVerifyContextReady(projectDir: string): Promise { + const project = await loadKloProject({ projectDir }); + const targets = listContextTargets(project); + const primarySourceScans = await verifyPrimarySourceScans(projectDir, targets.primarySourceConnectionIds); + const semanticLayerContextReady = await hasFileWithExtension( + join(projectDir, 'semantic-layer'), + new Set(['.yaml', '.yml']), + { + ignoredDirectoryNames: new Set(['_schema']), + }, + ); + const wikiReady = await hasFileWithExtension(join(projectDir, 'knowledge'), new Set(['.md'])); + const contextSourceReady = + targets.contextSourceConnectionIds.length === 0 || semanticLayerContextReady || wikiReady; + const ready = primarySourceScans.ready && contextSourceReady; + const semanticSearchReady = semanticLayerContextReady || primarySourceScans.ready; + const details: string[] = []; + if (!primarySourceScans.ready) { + details.push(...primarySourceScans.details); + } + if (!contextSourceReady) { + details.push('No semantic-layer or wiki assets were found after the context build.'); + } + return { + ready, + agentContextReady: ready, + semanticSearchReady, + details: ready + ? [ + `Agent context: ${ready ? 'ready' : 'not ready'}`, + `Semantic search: ${semanticSearchReady ? 'ready' : 'not ready'}`, + ] + : details, + }; +} + +async function markContextComplete(projectDir: string): Promise { + const project = await loadKloProject({ projectDir }); + await writeFile( + project.configPath, + serializeKloProjectConfig(markKloSetupStepComplete(project.config, 'context')), + 'utf-8', + ); +} + +function writeBuildHeader(projectDir: string, runId: string, io: KloCliIo): void { + const commands = contextBuildCommands(projectDir, runId); + io.stdout.write('\nKLO context build\n'); + io.stdout.write(`Run: ${runId}\n`); + io.stdout.write(`Project: ${resolve(projectDir)}\n\n`); + io.stdout.write('Detach: press d to leave this running.\n'); + io.stdout.write(`Resume: ${commands.watch}\n`); + io.stdout.write(`Status: ${commands.status}\n\n`); +} + +function writeMissingCapabilities(missing: string[], io: KloCliIo): void { + io.stderr.write('KLO cannot build agent-ready context yet.\n\n'); + io.stderr.write('Missing:\n'); + for (const item of missing) { + io.stderr.write(` ${item}\n`); + } + io.stderr.write('\nFix this in setup before building context.\n'); +} + +function writeSkippedContext(projectDir: string, io: KloCliIo): void { + io.stdout.write('\nKLO is configured, but context has not been built yet.\n\n'); + io.stdout.write('Agents were not connected because KLO has not prepared searchable context for them.\n\n'); + io.stdout.write(`Resume setup:\n klo setup --project-dir ${resolve(projectDir)}\n\n`); + io.stdout.write(`Build context directly:\n klo setup context build --project-dir ${resolve(projectDir)}\n\n`); + io.stdout.write(`Check status:\n klo status --project-dir ${resolve(projectDir)}\n`); +} + +function writeSuccess(readiness: KloSetupContextReadiness, targets: KloSetupContextTargets, io: KloCliIo): void { + io.stdout.write('\nKLO context is ready for agents.\n\n'); + io.stdout.write('Primary sources:\n'); + if (targets.primarySourceConnectionIds.length === 0) { + io.stdout.write(' none\n'); + } else { + for (const connectionId of targets.primarySourceConnectionIds) { + io.stdout.write(` ${connectionId}: enriched scan complete\n`); + } + } + io.stdout.write('\nContext sources:\n'); + if (targets.contextSourceConnectionIds.length === 0) { + io.stdout.write(' none\n'); + } else { + for (const connectionId of targets.contextSourceConnectionIds) { + io.stdout.write(` ${connectionId}: memory update complete\n`); + } + } + io.stdout.write('\nVerification:\n'); + io.stdout.write(` Agent context: ${readiness.agentContextReady ? 'ready' : 'not ready'}\n`); + io.stdout.write(` Semantic search: ${readiness.semanticSearchReady ? 'ready' : 'not ready'}\n`); +} + +function writeExistingContextSuccess(readiness: KloSetupContextReadiness, io: KloCliIo): void { + io.stdout.write('\nKLO context is ready for agents.\n\n'); + io.stdout.write('Existing context artifacts were found from setup ingest.\n\n'); + io.stdout.write('Verification:\n'); + io.stdout.write(` Agent context: ${readiness.agentContextReady ? 'ready' : 'not ready'}\n`); + io.stdout.write(` Semantic search: ${readiness.semanticSearchReady ? 'ready' : 'not ready'}\n`); +} + +async function promptForBuild(prompts: KloSetupContextPromptAdapter): Promise<'build' | 'skip' | 'back'> { + return (await prompts.select({ + message: + 'Build KLO context for agents?\n\n' + + 'KLO is fully configured and ready to build context. This may take a few minutes to a few hours.', + options: [ + { value: 'build', label: 'Build context now (recommended)' }, + { value: 'skip', label: 'Leave context unbuilt and exit setup' }, + { value: 'back', label: 'Back' }, + ], + })) as 'build' | 'skip' | 'back'; +} + +async function runBuild( + args: KloSetupContextStepArgs, + io: KloCliIo, + deps: KloSetupContextDeps, + project: KloLocalProject, + targets: KloSetupContextTargets, +): Promise { + const now = deps.now ?? (() => new Date()); + const runId = deps.runIdFactory?.() ?? runIdFactory(); + const startedAt = now().toISOString(); + const runningState: KloSetupContextState = { + runId, + status: 'running', + startedAt, + updatedAt: startedAt, + primarySourceConnectionIds: targets.primarySourceConnectionIds, + contextSourceConnectionIds: targets.contextSourceConnectionIds, + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(args.projectDir, runId), + }; + await writeKloSetupContextState(args.projectDir, runningState); + + const contextBuild = deps.runContextBuild ?? runContextBuild; + const buildResult = await contextBuild( + project, + { + projectDir: args.projectDir, + inputMode: args.inputMode, + scanMode: 'enriched', + detectRelationships: true, + }, + io, + { + onDetach: () => { + const resolvedDir = resolve(args.projectDir); + mkdirSync(join(resolvedDir, '.klo', 'setup'), { recursive: true }); + const detachedState = normalizeState(resolvedDir, { + ...runningState, + status: 'detached', + updatedAt: new Date().toISOString(), + }); + writeFileSync(statePath(resolvedDir), `${JSON.stringify(detachedState, null, 2)}\n`); + }, + }, + ); + if (buildResult.detached) { + const updatedAt = now().toISOString(); + await writeKloSetupContextState(args.projectDir, { ...runningState, status: 'detached', updatedAt }); + return { status: 'detached', projectDir: args.projectDir, runId }; + } + if (buildResult.exitCode !== 0) { + const updatedAt = now().toISOString(); + await writeKloSetupContextState(args.projectDir, { + ...runningState, + status: 'failed', + updatedAt, + retryableFailedTargets: [...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds], + failureReason: 'Context build failed.', + }); + return { status: 'failed', projectDir: args.projectDir }; + } + + const readiness = await (deps.verifyContextReady ?? defaultVerifyContextReady)(args.projectDir); + if (!readiness.ready) { + const updatedAt = now().toISOString(); + await writeKloSetupContextState(args.projectDir, { + ...runningState, + status: 'failed', + updatedAt, + retryableFailedTargets: readiness.failedTargets ?? [], + failureReason: readiness.details.join(' '), + }); + io.stderr.write('KLO context build did not pass agent-readiness verification.\n'); + for (const detail of readiness.details) { + io.stderr.write(` ${detail}\n`); + } + return { status: 'failed', projectDir: args.projectDir }; + } + + await markContextComplete(project.projectDir); + const completedAt = now().toISOString(); + await writeKloSetupContextState(args.projectDir, { + ...runningState, + status: 'completed', + updatedAt: completedAt, + completedAt, + retryableFailedTargets: [], + }); + writeSuccess(readiness, targets, io); + return { status: 'ready', projectDir: args.projectDir, runId }; +} + +async function completeExistingContext( + args: KloSetupContextStepArgs, + io: KloCliIo, + deps: KloSetupContextDeps, + targets: KloSetupContextTargets, +): Promise { + const readiness = await (deps.verifyContextReady ?? defaultVerifyContextReady)(args.projectDir); + if (!readiness.ready) { + return null; + } + + const now = deps.now ?? (() => new Date()); + const completedAt = now().toISOString(); + const runId = deps.runIdFactory?.() ?? runIdFactory(); + await markContextComplete(args.projectDir); + await writeKloSetupContextState(args.projectDir, { + runId, + status: 'completed', + startedAt: completedAt, + updatedAt: completedAt, + completedAt, + primarySourceConnectionIds: targets.primarySourceConnectionIds, + contextSourceConnectionIds: targets.contextSourceConnectionIds, + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(args.projectDir, runId), + }); + writeExistingContextSuccess(readiness, io); + return { status: 'ready', projectDir: args.projectDir, runId }; +} + +export async function runKloSetupContextStep( + args: KloSetupContextStepArgs, + io: KloCliIo, + deps: KloSetupContextDeps = {}, +): Promise { + try { + const project = await loadKloProject({ projectDir: args.projectDir }); + const existingState = await readKloSetupContextState(args.projectDir); + if (project.config.setup?.completed_steps.includes('context') === true && existingState.status === 'completed') { + return { status: 'ready', projectDir: args.projectDir, runId: existingState.runId ?? 'setup-context-completed' }; + } + + if ( + (existingState.status === 'running' || existingState.status === 'detached') && + args.inputMode !== 'disabled' + ) { + const prompts = deps.prompts ?? createPromptAdapter(); + const choice = await prompts.select({ + message: + 'A context build is running in the background.\n\n' + + 'You can wait for it to finish, check its status, or start a fresh build.', + options: [ + { value: 'status', label: 'Check status' }, + { value: 'rebuild', label: 'Start a fresh context build' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'status') { + const commands = contextBuildCommands(args.projectDir, existingState.runId); + io.stdout.write(`\nRun: ${commands.status}\n`); + io.stdout.write(`Log: ${join(resolve(args.projectDir), '.klo', 'setup', 'context-build.log')}\n`); + return { status: 'detached', projectDir: args.projectDir, runId: existingState.runId ?? '' }; + } + if (choice === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + } + + const targets = listContextTargets(project); + if (targets.primarySourceConnectionIds.length === 0 && targets.contextSourceConnectionIds.length === 0) { + if (args.allowEmpty === true) { + return { status: 'skipped', projectDir: args.projectDir }; + } + io.stderr.write('No primary or context sources are configured for a KLO context build.\n'); + return { status: 'failed', projectDir: args.projectDir }; + } + + const missing = missingCapabilities(project); + if (missing.length > 0) { + writeMissingCapabilities(missing, io); + return { status: 'missing-input', projectDir: args.projectDir }; + } + + if (args.forcePrompt !== true && args.prompt !== false && deps.verifyContextReady === undefined) { + const existingContextResult = await completeExistingContext(args, io, deps, targets); + if (existingContextResult) { + return existingContextResult; + } + } + + if (args.inputMode !== 'disabled' && args.prompt !== false) { + const choice = await promptForBuild(deps.prompts ?? createPromptAdapter()); + if (choice === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + if (choice === 'skip') { + writeSkippedContext(args.projectDir, io); + return { status: 'skipped', projectDir: args.projectDir }; + } + } + + return await runBuild(args, io, deps, project, targets); + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return { status: 'failed', projectDir: args.projectDir }; + } +} + +function stateMatchesRunId(state: KloSetupContextState, runId: string | undefined): boolean { + return !runId || state.runId === runId; +} + +function statusPayload(state: KloSetupContextState): KloSetupContextStatusSummary { + return setupContextStatusFromState(state, { completedStep: state.status === 'completed' }); +} + +function writeContextStatus(state: KloSetupContextState, io: KloCliIo): void { + io.stdout.write(`KLO context built: ${state.status === 'completed' ? 'yes' : state.status.replaceAll('_', ' ')}\n`); + if (state.runId) { + io.stdout.write(`Run: ${state.runId}\n`); + io.stdout.write(`Watch: ${state.commands.watch}\n`); + io.stdout.write(`Status: ${state.commands.status}\n`); + } + if (state.failureReason) { + io.stdout.write(`Detail: ${state.failureReason}\n`); + } +} + +export async function runKloSetupContextCommand( + args: KloSetupContextCommandArgs, + io: KloCliIo, + deps: KloSetupContextDeps = {}, +): Promise { + if (args.command === 'build') { + const result = await runKloSetupContextStep( + { projectDir: args.projectDir, inputMode: args.inputMode, prompt: false }, + io, + deps, + ); + return result.status === 'ready' || result.status === 'skipped' ? 0 : 1; + } + + const state = await readKloSetupContextState(args.projectDir); + if (!stateMatchesRunId(state, args.runId)) { + io.stderr.write(`KLO setup context run "${args.runId}" was not found.\n`); + return 1; + } + + if (args.command === 'status') { + if (args.json) { + io.stdout.write(`${JSON.stringify(statusPayload(state), null, 2)}\n`); + } else { + writeContextStatus(state, io); + } + return 0; + } + + if (args.command === 'watch') { + io.stdout.write('KLO context build\n'); + writeContextStatus(state, io); + return 0; + } + + const updatedAt = new Date().toISOString(); + const nextState: KloSetupContextState = { + ...state, + status: state.status === 'completed' ? 'completed' : 'paused', + updatedAt, + }; + await writeKloSetupContextState(args.projectDir, nextState); + io.stdout.write( + state.status === 'completed' + ? 'KLO context build already completed.\n' + : 'KLO context build pause requested. Resume with setup when ready.\n', + ); + return 0; +} diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts new file mode 100644 index 00000000..ba893bce --- /dev/null +++ b/packages/cli/src/setup-databases.test.ts @@ -0,0 +1,1396 @@ +import { mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { initKloProject, parseKloProjectConfig } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + type KloSetupDatabaseDriver, + type KloSetupDatabasesPromptAdapter, + runKloSetupDatabasesStep, +} from './setup-databases.js'; +import type { KloCliIo } from './cli-runtime.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: true, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function makePromptAdapter(options: { + multiselectValues?: string[][]; + selectValues?: string[]; + textValues?: (string | undefined)[]; + passwordValues?: (string | undefined)[]; +}): KloSetupDatabasesPromptAdapter { + const multiselectValues = [...(options.multiselectValues ?? [])]; + const selectValues = [...(options.selectValues ?? [])]; + const textValues = [...(options.textValues ?? [])]; + const passwordValues = [...(options.passwordValues ?? [])]; + return { + multiselect: vi.fn(async () => multiselectValues.shift() ?? ['postgres']), + select: vi.fn(async () => selectValues.shift() ?? 'finish'), + text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')), + password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : '')), + cancel: vi.fn(), + }; +} + +function connectionNamePrompt(label: string): string { + return `Name this ${label} connection\nKLO will use this short name in commands and config. You can rename it now.`; +} + +function textInputPrompt(message: string): string { + const normalized = message.replace(/\n+$/, ''); + if (!normalized.includes('\n')) { + return `${normalized}\nPress Escape to go back.\n`; + } + const [title, ...bodyLines] = normalized.split('\n'); + return `${title}\n\n${bodyLines.join('\n')}\nPress Escape to go back.\n`; +} + +describe('setup databases step', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-databases-')); + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('shows every supported primary source in the interactive checklist', async () => { + const prompts = makePromptAdapter({ multiselectValues: [['back']] }); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts }, + ); + + expect(result.status).toBe('back'); + expect(prompts.multiselect).toHaveBeenCalledWith({ + message: + 'Which primary sources should KLO connect to?\n' + + 'Use Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', + options: [ + { value: 'sqlite', label: 'SQLite' }, + { value: 'postgres', label: 'PostgreSQL' }, + { value: 'mysql', label: 'MySQL' }, + { value: 'clickhouse', label: 'ClickHouse' }, + { value: 'sqlserver', label: 'SQL Server' }, + { value: 'bigquery', label: 'BigQuery' }, + { value: 'snowflake', label: 'Snowflake' }, + ], + required: false, + }); + }); + + it('requires choosing a primary source after an empty interactive selection', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + multiselectValues: [[], ['back']], + selectValues: ['choose'], + }); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + io.io, + { prompts }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).not.toHaveBeenCalled(); + expect(io.stdout()).toContain( + 'KLO cannot work without at least one primary source. Select a source or press Escape to go back.', + ); + expect(prompts.multiselect).toHaveBeenCalledTimes(2); + }); + + it('lets Back from connection method selection return to primary source selection when adding a new driver', async () => { + const prompts = makePromptAdapter({ + multiselectValues: [['postgres'], ['back']], + selectValues: ['back'], + }); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenCalledWith({ + message: 'How do you want to connect to PostgreSQL?', + options: [ + { value: 'fields', label: 'Enter connection details (host, port, database, user)' }, + { value: 'url', label: 'Paste a connection URL' }, + { value: 'back', label: 'Back' }, + ], + }); + expect(prompts.multiselect).toHaveBeenCalledTimes(2); + expect(vi.mocked(prompts.multiselect).mock.calls[1]?.[0].message).toBe( + 'Which primary sources should KLO connect to?\n' + + 'Use Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', + ); + }); + + it('lets Back leave database setup when the driver came from flags', async () => { + const prompts = makePromptAdapter({ selectValues: ['back'] }); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + skipDatabases: false, + databaseSchemas: [], + }, + makeIo().io, + { prompts }, + ); + + expect(result.status).toBe('back'); + expect(prompts.multiselect).not.toHaveBeenCalled(); + expect(prompts.select).toHaveBeenCalledTimes(1); + }); + + it('labels existing database connections with the database type', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const prompts = makePromptAdapter({ selectValues: ['back'] }); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + skipDatabases: false, + databaseSchemas: [], + }, + makeIo().io, + { prompts }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenCalledWith({ + message: 'Configure PostgreSQL', + options: [ + { value: 'existing:warehouse', label: 'Use existing PostgreSQL connection: warehouse' }, + { value: 'new', label: 'Add new PostgreSQL connection' }, + { value: 'back', label: 'Back' }, + ], + }); + }); + + it('uses a database-specific editable connection name for new interactive connections', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', 'env:DATABASE_URL'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.text).toHaveBeenNthCalledWith(1, { + message: textInputPrompt(connectionNamePrompt('PostgreSQL')), + placeholder: 'postgres-warehouse', + initialValue: 'postgres-warehouse', + }); + expect(testConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything()); + expect(scanConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything()); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections['postgres-warehouse']).toEqual({ + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }); + }); + + it('tells users Escape goes back in free-text connection prompts', async () => { + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', 'env:DATABASE_URL'], + }); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + makeIo().io, + { + prompts, + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.text).toHaveBeenNthCalledWith(1, { + message: textInputPrompt(connectionNamePrompt('PostgreSQL')), + placeholder: 'postgres-warehouse', + initialValue: 'postgres-warehouse', + }); + expect(prompts.text).toHaveBeenNthCalledWith(2, { + message: textInputPrompt('PostgreSQL connection URL'), + }); + }); + + it('uses clear setup prompts for every new database connection type', async () => { + const cases: Array<{ + driver: KloSetupDatabaseDriver; + selectValues?: string[]; + textValues: string[]; + passwordValues?: string[]; + expectedTextPrompts: Array<{ message: string; placeholder?: string; initialValue?: string }>; + expectedPasswordPrompts?: Array<{ message: string }>; + }> = [ + { + driver: 'sqlite', + textValues: ['', './warehouse.sqlite'], + expectedTextPrompts: [ + { + message: connectionNamePrompt('SQLite'), + placeholder: 'sqlite-local', + initialValue: 'sqlite-local', + }, + { + message: 'SQLite database file\nEnter a relative or absolute path, for example ./warehouse.sqlite.', + }, + ], + }, + { + driver: 'postgres', + selectValues: ['url'], + textValues: ['', 'env:DATABASE_URL'], + expectedTextPrompts: [ + { + message: connectionNamePrompt('PostgreSQL'), + placeholder: 'postgres-warehouse', + initialValue: 'postgres-warehouse', + }, + { + message: 'PostgreSQL connection URL', + }, + ], + }, + { + driver: 'mysql', + selectValues: ['url'], + textValues: ['', 'env:MYSQL_DATABASE_URL'], + expectedTextPrompts: [ + { + message: connectionNamePrompt('MySQL'), + placeholder: 'mysql-warehouse', + initialValue: 'mysql-warehouse', + }, + { + message: 'MySQL connection URL', + }, + ], + }, + { + driver: 'clickhouse', + selectValues: ['url'], + textValues: ['', 'env:CLICKHOUSE_URL'], + expectedTextPrompts: [ + { + message: connectionNamePrompt('ClickHouse'), + placeholder: 'clickhouse-warehouse', + initialValue: 'clickhouse-warehouse', + }, + { + message: 'ClickHouse connection URL', + }, + ], + }, + { + driver: 'sqlserver', + selectValues: ['url'], + textValues: ['', 'env:SQLSERVER_DATABASE_URL'], + expectedTextPrompts: [ + { + message: connectionNamePrompt('SQL Server'), + placeholder: 'sqlserver-warehouse', + initialValue: 'sqlserver-warehouse', + }, + { + message: 'SQL Server connection URL', + }, + ], + }, + { + driver: 'bigquery', + selectValues: ['no'], + textValues: ['', 'analytics', '/path/to/service-account.json', ''], + expectedTextPrompts: [ + { + message: connectionNamePrompt('BigQuery'), + placeholder: 'bigquery-warehouse', + initialValue: 'bigquery-warehouse', + }, + { + message: 'BigQuery dataset\nFor example analytics.', + }, + { + message: 'Path to service account JSON file', + }, + { + message: 'BigQuery location\nPress Enter for US, or enter a location like EU.', + placeholder: 'US', + initialValue: 'US', + }, + ], + }, + { + driver: 'snowflake', + selectValues: ['no'], + textValues: ['', 'env:SNOWFLAKE_ACCOUNT', 'ANALYTICS_WH', 'ANALYTICS', '', 'env:SNOWFLAKE_USER', ''], + passwordValues: ['env:SNOWFLAKE_PASSWORD'], + expectedTextPrompts: [ + { + message: connectionNamePrompt('Snowflake'), + placeholder: 'snowflake-warehouse', + initialValue: 'snowflake-warehouse', + }, + { + message: 'Snowflake account identifier', + }, + { + message: 'Snowflake warehouse\nFor example ANALYTICS_WH.', + }, + { + message: 'Snowflake database name', + }, + { + message: 'Snowflake schema\nPress Enter for PUBLIC, or enter a schema name.', + placeholder: 'PUBLIC', + initialValue: 'PUBLIC', + }, + { + message: 'Snowflake username', + }, + { + message: 'Snowflake role (optional)\nPress Enter to skip.', + }, + ], + expectedPasswordPrompts: [ + { + message: 'Snowflake password', + }, + ], + }, + ]; + + for (const testCase of cases) { + const prompts = makePromptAdapter({ + selectValues: testCase.selectValues ?? ['new'], + textValues: testCase.textValues, + passwordValues: testCase.passwordValues, + }); + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: [testCase.driver], + databaseSchemas: [], + skipDatabases: false, + }, + makeIo().io, + { + prompts, + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + }, + ); + + expect(result.status).toBe('ready'); + expect(vi.mocked(prompts.text).mock.calls.map(([options]) => options)).toEqual( + testCase.expectedTextPrompts.map((expectedPrompt) => ({ + ...expectedPrompt, + message: textInputPrompt(expectedPrompt.message), + })), + ); + if (testCase.expectedPasswordPrompts) { + expect(vi.mocked(prompts.password).mock.calls.map(([options]) => options)).toEqual( + testCase.expectedPasswordPrompts.map((expectedPrompt) => ({ + ...expectedPrompt, + message: textInputPrompt(expectedPrompt.message), + })), + ); + } + } + }); + + it('lets Back from connection method selection return to primary source selection', async () => { + const prompts = makePromptAdapter({ + multiselectValues: [['postgres'], ['back']], + selectValues: ['back'], + textValues: [''], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenNthCalledWith(1, { + message: 'How do you want to connect to PostgreSQL?', + options: [ + { value: 'fields', label: 'Enter connection details (host, port, database, user)' }, + { value: 'url', label: 'Paste a connection URL' }, + { value: 'back', label: 'Back' }, + ], + }); + expect(prompts.multiselect).toHaveBeenCalledTimes(2); + expect(testConnection).not.toHaveBeenCalled(); + expect(scanConnection).not.toHaveBeenCalled(); + }); + + it('shows a configured primary source menu instead of the type checklist when a primary source exists', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - databases', + '', + ].join('\n'), + 'utf-8', + ); + const prompts = makePromptAdapter({ multiselectValues: [['back']], selectValues: ['continue'] }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts, testConnection, scanConnection }, + ); + + expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] }); + expect(prompts.multiselect).not.toHaveBeenCalled(); + expect(prompts.select).toHaveBeenCalledWith({ + message: 'Primary sources already configured: warehouse\nWhat would you like to do?', + options: [ + { value: 'add', label: 'Add another primary source' }, + { value: 'continue', label: 'Continue setup' }, + { value: 'back', label: 'Back' }, + ], + }); + expect(testConnection).not.toHaveBeenCalled(); + expect(scanConnection).not.toHaveBeenCalled(); + }); + + it('preserves existing primary source ids when adding another source from the configured menu', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - databases', + '', + ].join('\n'), + 'utf-8', + ); + const prompts = makePromptAdapter({ + selectValues: ['add', 'url', 'continue'], + multiselectValues: [['mysql']], + textValues: ['', 'env:MYSQL_DATABASE_URL'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts, testConnection, scanConnection }, + ); + + expect(result).toEqual({ + status: 'ready', + projectDir: tempDir, + connectionIds: ['warehouse', 'mysql-warehouse'], + }); + expect(prompts.multiselect).toHaveBeenCalledTimes(1); + expect(prompts.select).toHaveBeenCalledWith({ + message: 'Primary sources already configured: warehouse\nWhat would you like to do?', + options: [ + { value: 'add', label: 'Add another primary source' }, + { value: 'continue', label: 'Continue setup' }, + { value: 'back', label: 'Back' }, + ], + }); + expect(testConnection).toHaveBeenCalledTimes(1); + expect(testConnection).toHaveBeenCalledWith(tempDir, 'mysql-warehouse', expect.anything()); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.database_connection_ids).toEqual(['warehouse', 'mysql-warehouse']); + }); + + it('lets users add another primary source after completing the first one', async () => { + const prompts = makePromptAdapter({ + multiselectValues: [['postgres'], ['mysql']], + selectValues: ['url', 'add', 'url', 'continue'], + textValues: ['', 'env:DATABASE_URL', '', 'env:MYSQL_DATABASE_URL'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts, testConnection, scanConnection }, + ); + + expect(result).toEqual({ + status: 'ready', + projectDir: tempDir, + connectionIds: ['postgres-warehouse', 'mysql-warehouse'], + }); + expect(prompts.multiselect).toHaveBeenCalledTimes(2); + expect(prompts.select).toHaveBeenCalledWith({ + message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?', + options: [ + { value: 'add', label: 'Add another primary source' }, + { value: 'continue', label: 'Continue setup' }, + { value: 'back', label: 'Back' }, + ], + }); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.database_connection_ids).toEqual(['postgres-warehouse', 'mysql-warehouse']); + }); + + it('returns to configured primary menu when submitting empty driver selection after adding a source', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + multiselectValues: [['postgres'], []], + selectValues: ['url', 'add', 'continue'], + textValues: ['', 'env:DATABASE_URL'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result).toEqual({ + status: 'ready', + projectDir: tempDir, + connectionIds: ['postgres-warehouse'], + }); + expect(prompts.multiselect).toHaveBeenCalledTimes(2); + expect(io.stdout()).not.toContain('KLO cannot work without at least one primary source'); + expect(prompts.select).toHaveBeenNthCalledWith(2, { + message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?', + options: [ + { value: 'add', label: 'Add another primary source' }, + { value: 'continue', label: 'Continue setup' }, + { value: 'back', label: 'Back' }, + ], + }); + }); + + it('returns to configured primary menu when submitting empty driver selection with pre-existing source', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - databases', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const prompts = makePromptAdapter({ + multiselectValues: [[]], + selectValues: ['add', 'continue'], + }); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + io.io, + { prompts }, + ); + + expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] }); + expect(io.stdout()).not.toContain('KLO cannot work without at least one primary source'); + expect(prompts.select).toHaveBeenNthCalledWith(2, { + message: 'Primary sources already configured: warehouse\nWhat would you like to do?', + options: [ + { value: 'add', label: 'Add another primary source' }, + { value: 'continue', label: 'Continue setup' }, + { value: 'back', label: 'Back' }, + ], + }); + }); + + it('lets Escape from connection fields return to connection method selection', async () => { + const prompts = makePromptAdapter({ + selectValues: ['fields', 'url'], + textValues: ['', undefined, 'env:DATABASE_URL'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + makeIo().io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.select).toHaveBeenCalledTimes(2); + expect(vi.mocked(prompts.select).mock.calls[0]?.[0].message).toBe('How do you want to connect to PostgreSQL?'); + expect(vi.mocked(prompts.select).mock.calls[1]?.[0].message).toBe('How do you want to connect to PostgreSQL?'); + expect(testConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything()); + }); + + it('explains where Back goes after missing PostgreSQL field input', async () => { + const prompts = makePromptAdapter({ + multiselectValues: [['postgres'], ['back']], + selectValues: ['fields', 'back'], + textValues: ['', 'db.example.com', '5432', ''], + }); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { + prompts, + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenNthCalledWith(2, { + message: + 'Some PostgreSQL connection details are missing.\n' + + 'Continue entering details, or go back to primary source selection.', + options: [ + { value: 'retry', label: 'Continue entering PostgreSQL details' }, + { value: 'back', label: 'Back to primary source selection' }, + ], + }); + }); + + it('lets Escape from connection name return to primary source selection', async () => { + const prompts = makePromptAdapter({ + multiselectValues: [['postgres'], ['back']], + textValues: [undefined], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseSchemas: [], + skipDatabases: false, + }, + makeIo().io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('back'); + expect(prompts.multiselect).toHaveBeenCalledTimes(2); + expect(prompts.select).not.toHaveBeenCalled(); + expect(testConnection).not.toHaveBeenCalled(); + expect(scanConnection).not.toHaveBeenCalled(); + }); + + it('builds a Postgres connection from individual fields and stores password in .klo/secrets', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['fields'], + textValues: ['', 'db.example.com', '', 'analytics', 'readonly'], + passwordValues: ['s3cret'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + const connection = config.connections['postgres-warehouse']; + expect(connection).toMatchObject({ + driver: 'postgres', + host: 'db.example.com', + port: 5432, + database: 'analytics', + username: 'readonly', + readonly: true, + }); + expect(connection.password).toMatch(/^file:/); + const secretPath = join(tempDir, '.klo/secrets/postgres-warehouse-password'); + await expect(readFile(secretPath, 'utf-8')).resolves.toBe('s3cret\n'); + if (process.platform !== 'win32') { + expect((await stat(secretPath)).mode & 0o777).toBe(0o600); + } + }); + + it('stores credential-bearing pasted URLs in .klo/secrets automatically', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', 'postgresql://myuser:s3cret@db.example.com:5432/analytics'], // pragma: allowlist secret + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + const connection = config.connections['postgres-warehouse']; + expect(connection.url).toBe(`file:${resolve(tempDir, '.klo/secrets/postgres-warehouse-url')}`); + expect(connection.driver).toBe('postgres'); + const secretContent = await readFile(join(tempDir, '.klo/secrets/postgres-warehouse-url'), 'utf-8'); + expect(secretContent).toBe('postgresql://myuser:s3cret@db.example.com:5432/analytics\n'); // pragma: allowlist secret + }); + + it('summarizes connection test and structural scan output during setup', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', 'env:DATABASE_URL'], + }); + const testConnection = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KloCliIo) => { + commandIo.stdout.write('Connection test passed: postgres-warehouse\n'); + commandIo.stdout.write('Driver: postgres\n'); + commandIo.stdout.write('Tables: 2\n'); + return 0; + }); + const scanConnection = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KloCliIo) => { + commandIo.stdout.write('Scanning postgres-warehouse for context. Large primary sources can take a while.\n'); + commandIo.stdout.write('[5%] Preparing scan\n'); + commandIo.stdout.write('[15%] Inspecting database schema\n'); + commandIo.stdout.write('[55%] Semantic layer comparison found 2 changes across 2 tables\n'); + commandIo.stdout.write('[70%] Writing schema artifacts\n'); + commandIo.stdout.write('[100%] Scan completed\n'); + commandIo.stdout.write('✓ KLO scan completed\n'); + commandIo.stdout.write('Status: done\n'); + commandIo.stdout.write('Run: local-moywh3ky\n'); + commandIo.stdout.write('Connection: postgres-warehouse\n'); + commandIo.stdout.write('Mode: structural\n'); + commandIo.stdout.write('Sync: 2026-05-09-221301-local-moywh3ky\n'); + commandIo.stdout.write('Dry run: no\n\n'); + commandIo.stdout.write('What changed\n'); + commandIo.stdout.write(' Semantic layer comparison found 2 changes across 2 tables\n'); + commandIo.stdout.write(' New tables: 2\n'); + commandIo.stdout.write(' Changed tables: 0\n'); + commandIo.stdout.write(' Removed tables: 0\n'); + commandIo.stdout.write(' Unchanged tables: 0\n\n'); + commandIo.stdout.write('Needs attention\n'); + commandIo.stdout.write(' None\n\n'); + commandIo.stdout.write('Artifacts\n'); + commandIo.stdout.write( + ' Report: raw-sources/postgres-warehouse/live-database/2026-05-09-221301-local-moywh3ky/scan-report.json\n', + ); + commandIo.stdout.write(' Raw sources: raw-sources/postgres-warehouse/live-database/2026-05-09-221301-local-moywh3ky\n'); + commandIo.stdout.write(' Schema shards: 1\n\n'); + commandIo.stdout.write('Next:\n'); + commandIo.stdout.write(` klo dev scan status --project-dir ${tempDir} local-moywh3ky\n`); + return 0; + }); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + expect(io.stdout()).toContain( + [ + '◇ Testing postgres-warehouse', + '│ ✓ Connection test passed', + '│ Driver: PostgreSQL · Tables: 2', + '│', + '◇ Scanning postgres-warehouse', + '│ ✓ Structural scan completed', + '│ Changes: 2 new tables', + '│ Report: raw-sources/postgres-warehouse/live-database/.../scan-report.json', + '│', + '◇ Primary source ready', + '│ postgres-warehouse · PostgreSQL · structural scan complete', + ].join('\n'), + ); + expect(io.stdout()).not.toContain('[5%] Preparing scan'); + expect(io.stdout()).not.toContain('What changed'); + expect(io.stdout()).not.toContain('Next:'); + }); + + it('normalizes $ENV_VAR syntax to env: references in pasted URLs', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', '$DATABASE_URL'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections['postgres-warehouse']).toMatchObject({ + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }); + }); + + it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => { + const io = makeIo(); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: ['public'], + skipDatabases: false, + }, + io.io, + { testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); + expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections.warehouse).toEqual({ + driver: 'postgres', + url: 'env:DATABASE_URL', + schemas: ['public'], + readonly: true, + }); + expect(config.setup).toEqual({ + database_connection_ids: ['warehouse'], + completed_steps: ['databases'], + }); + expect(io.stdout()).toContain('Primary source ready'); + expect(io.stdout()).not.toContain('DATABASE_URL='); + }); + + it('adds one non-interactive SQLite connection from --database-url without prompting', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({}); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['sqlite'], + databaseConnectionId: 'warehouse', + databaseUrl: './warehouse.sqlite', + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.text).not.toHaveBeenCalled(); + expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); + expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections.warehouse).toEqual({ + driver: 'sqlite', + path: './warehouse.sqlite', + readonly: true, + }); + expect(config.setup).toEqual({ + database_connection_ids: ['warehouse'], + completed_steps: ['databases'], + }); + }); + + it('selects multiple existing connections and validates each before recording setup ids', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + ' analytics:', + ' driver: snowflake', + ' authMethod: password', + ' account: env:SNOWFLAKE_ACCOUNT', + ' warehouse: WH', + ' database: ANALYTICS', + ' schema_name: PUBLIC', + ' username: reader', + ' password: env:SNOWFLAKE_PASSWORD', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseConnectionIds: ['warehouse', 'analytics'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { testConnection, scanConnection }, + ); + + expect(result.status).toBe('ready'); + expect(testConnection).toHaveBeenCalledTimes(2); + expect(scanConnection).toHaveBeenCalledTimes(2); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.database_connection_ids).toEqual(['warehouse', 'analytics']); + expect(config.setup?.completed_steps).toContain('databases'); + }); + + it('keeps the connection config but does not mark databases complete when scanning fails', async () => { + const io = makeIo(); + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 1), + }, + ); + + expect(result.status).toBe('failed'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL' }); + expect(config.setup?.completed_steps ?? []).not.toContain('databases'); + expect(io.stderr()).toContain('Structural scan failed for warehouse.'); + }); + + it('writes Historic SQL config for supported Snowflake databases after validation succeeds', async () => { + const io = makeIo(); + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['snowflake'], + databaseConnectionId: 'snowflake', + databaseSchemas: [], + enableHistoricSql: true, + historicSqlWindowDays: 30, + historicSqlServiceAccountPatterns: ['^svc_'], + historicSqlRedactionPatterns: ['(?i)secret'], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + prompts: makePromptAdapter({ + textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'PUBLIC', 'reader', ''], + passwordValues: ['env:SNOWFLAKE_PASSWORD'], + }), + }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections.snowflake).toMatchObject({ + driver: 'snowflake', + authMethod: 'password', + historicSql: { + enabled: true, + dialect: 'snowflake', + windowDays: 30, + serviceAccountUserPatterns: ['^svc_'], + redactionPatterns: ['(?i)secret'], + }, + }); + expect(config.ingest.adapters).toContain('historic-sql'); + }); + + it('writes Postgres Historic SQL config with minCalls and ignores window/redaction output', async () => { + const io = makeIo(); + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: ['public'], + enableHistoricSql: true, + historicSqlWindowDays: 30, + historicSqlMinCalls: 12, + historicSqlServiceAccountPatterns: ['^svc_'], + historicSqlRedactionPatterns: ['(?i)secret'], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlProbe: vi.fn(async () => ({ ok: true, lines: [' OK pg_stat_statements ready (PostgreSQL 16.4)'] })), + }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + driver: 'postgres', + url: 'env:DATABASE_URL', + schemas: ['public'], + historicSql: { + enabled: true, + dialect: 'postgres', + minCalls: 12, + maxTemplatesPerRun: 5000, + serviceAccountUserPatterns: ['^svc_'], + }, + }); + expect(config.connections.warehouse.historicSql).not.toHaveProperty('windowDays'); + expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns'); + expect(config.ingest.adapters).toContain('historic-sql'); + expect(io.stdout()).toContain('Historic SQL probe...'); + expect(io.stdout()).toContain('pg_stat_statements ready'); + }); + + it('writes Historic SQL config for supported existing database connections', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' analytics:', + ' driver: bigquery', + ' dataset_id: analytics', + ' credentials_json: env:BIGQUERY_CREDENTIALS_JSON', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseConnectionIds: ['analytics'], + databaseSchemas: [], + enableHistoricSql: true, + historicSqlWindowDays: 45, + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections.analytics).toMatchObject({ + historicSql: { + enabled: true, + dialect: 'bigquery', + windowDays: 45, + serviceAccountUserPatterns: [], + redactionPatterns: [], + }, + }); + expect(config.ingest.adapters).toContain('historic-sql'); + }); + + it('enables Historic SQL on an existing Postgres connection', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseConnectionIds: ['warehouse'], + databaseSchemas: [], + enableHistoricSql: true, + historicSqlMinCalls: 8, + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlProbe: vi.fn(async () => ({ ok: true, lines: [' OK pg_stat_statements ready (PostgreSQL 16.4)'] })), + }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + historicSql: { + enabled: true, + dialect: 'postgres', + minCalls: 8, + maxTemplatesPerRun: 5000, + serviceAccountUserPatterns: [], + }, + }); + }); + + it('prints a non-blocking Postgres Historic SQL probe failure after connection test succeeds', async () => { + const io = makeIo(); + const historicSqlProbe = vi.fn(async () => ({ + ok: false, + lines: [ + ' FAIL pg_stat_statements extension is not installed in the connection database', + ' Fix: Run (against this database): CREATE EXTENSION pg_stat_statements;', + " Fix: Ensure shared_preload_libraries includes 'pg_stat_statements'.", + ], + })); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + enableHistoricSql: true, + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlProbe, + }, + ); + + expect(result.status).toBe('ready'); + expect(historicSqlProbe).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + connectionId: 'warehouse', + dialect: 'postgres', + }), + ); + expect(io.stdout()).toContain('Historic SQL probe...'); + expect(io.stdout()).toContain('pg_stat_statements extension is not installed'); + expect(io.stdout()).toContain('Setup written; first ingest run will fail until fixed.'); + }); + + it('does not run the Historic SQL probe when the regular connection test fails', async () => { + const io = makeIo(); + const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] })); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + enableHistoricSql: true, + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 1), + scanConnection: vi.fn(async () => 0), + historicSqlProbe, + }, + ); + + expect(result.status).toBe('failed'); + expect(historicSqlProbe).not.toHaveBeenCalled(); + }); + + it('returns missing input when non-interactive database flags are incomplete', async () => { + const io = makeIo(); + + const result = await runKloSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + ); + + expect(result.status).toBe('missing-input'); + expect(io.stderr()).toContain('Missing database connection id'); + }); + + it('leaves setup incomplete when primary sources are skipped', async () => { + const io = makeIo(); + + const result = await runKloSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'disabled', databaseSchemas: [], skipDatabases: true }, + io.io, + ); + + expect(result.status).toBe('skipped'); + expect(io.stdout()).toContain('KLO cannot work until you add a primary source.'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.completed_steps ?? []).not.toContain('databases'); + }); +}); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts new file mode 100644 index 00000000..03dab1b3 --- /dev/null +++ b/packages/cli/src/setup-databases.ts @@ -0,0 +1,1285 @@ +import { writeFile } from 'node:fs/promises'; +import { cancel, isCancel, multiselect, password, select, text } from '@clack/prompts'; +import type { HistoricSqlDialect } from '@klo/context/ingest'; +import { + type KloProjectConnectionConfig, + loadKloProject, + serializeKloProjectConfig, + setKloSetupDatabaseConnectionIds, +} from '@klo/context/project'; +import type { KloCliIo } from './cli-runtime.js'; +import { runKloConnection } from './connection.js'; +import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; +import { runKloScan } from './scan.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { writeProjectLocalSecretReference } from './setup-secrets.js'; + +export type KloSetupDatabaseDriver = + | 'sqlite' + | 'postgres' + | 'mysql' + | 'clickhouse' + | 'sqlserver' + | 'bigquery' + | 'snowflake'; + +export interface KloSetupDatabasesArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + databaseDrivers?: KloSetupDatabaseDriver[]; + databaseConnectionIds?: string[]; + databaseConnectionId?: string; + databaseUrl?: string; + databaseSchemas: string[]; + enableHistoricSql?: boolean; + disableHistoricSql?: boolean; + historicSqlWindowDays?: number; + historicSqlMinCalls?: number; + historicSqlServiceAccountPatterns?: string[]; + historicSqlRedactionPatterns?: string[]; + skipDatabases: boolean; +} + +export type KloSetupDatabasesResult = + | { status: 'ready'; projectDir: string; connectionIds: string[] } + | { status: 'skipped'; projectDir: string } + | { status: 'back'; projectDir: string } + | { status: 'missing-input'; projectDir: string } + | { status: 'failed'; projectDir: string }; + +export interface KloSetupDatabasesPromptAdapter { + multiselect(options: { + message: string; + options: Array<{ value: string; label: string }>; + required?: boolean; + }): Promise; + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; + password(options: { message: string }): Promise; + cancel(message: string): void; +} + +interface KloSetupHistoricSqlProbeInput { + projectDir: string; + connectionId: string; + dialect: HistoricSqlDialect; +} + +interface KloSetupHistoricSqlProbeResult { + ok: boolean; + lines: string[]; +} + +type KloSetupHistoricSqlProbe = (input: KloSetupHistoricSqlProbeInput) => Promise; + +export interface KloSetupDatabasesDeps { + prompts?: KloSetupDatabasesPromptAdapter; + testConnection?: (projectDir: string, connectionId: string, io: KloCliIo) => Promise; + scanConnection?: (projectDir: string, connectionId: string, io: KloCliIo) => Promise; + historicSqlProbe?: KloSetupHistoricSqlProbe; +} + +const DRIVER_OPTIONS: Array<{ value: KloSetupDatabaseDriver; label: string }> = [ + { value: 'sqlite', label: 'SQLite' }, + { value: 'postgres', label: 'PostgreSQL' }, + { value: 'mysql', label: 'MySQL' }, + { value: 'clickhouse', label: 'ClickHouse' }, + { value: 'sqlserver', label: 'SQL Server' }, + { value: 'bigquery', label: 'BigQuery' }, + { value: 'snowflake', label: 'Snowflake' }, +]; + +const DRIVER_LABELS = Object.fromEntries(DRIVER_OPTIONS.map((option) => [option.value, option.label])) as Record< + KloSetupDatabaseDriver, + string +>; + +const HISTORIC_SQL_DIALECT_BY_DRIVER: Partial> = { + snowflake: 'snowflake', + bigquery: 'bigquery', + postgres: 'postgres', +}; + +const DEFAULT_CONNECTION_IDS: Record = { + sqlite: 'sqlite-local', + postgres: 'postgres-warehouse', + mysql: 'mysql-warehouse', + clickhouse: 'clickhouse-warehouse', + sqlserver: 'sqlserver-warehouse', + bigquery: 'bigquery-warehouse', + snowflake: 'snowflake-warehouse', +}; + +type UrlDriverType = Extract; + +const DRIVER_CONNECTION_DEFAULTS: Record = { + postgres: { port: '5432' }, + mysql: { port: '3306' }, + clickhouse: { port: '8123' }, + sqlserver: { port: '1433' }, +}; + +function driverLabel(driver: KloSetupDatabaseDriver): string { + return DRIVER_LABELS[driver]; +} + +function connectionNamePrompt(label: string): string { + return `Name this ${label} connection\nKLO will use this short name in commands and config. You can rename it now.`; +} + +function missingConnectionDetailsPrompt( + label: string, + canReturnToDriverSelection: boolean, +): { message: string; options: Array<{ value: string; label: string }> } { + const backDestination = canReturnToDriverSelection ? 'primary source selection' : 'the previous setup step'; + return { + message: + `Some ${label} connection details are missing.\n` + + `Continue entering details, or go back to ${backDestination}.`, + options: [ + { value: 'retry', label: `Continue entering ${label} details` }, + { value: 'back', label: `Back to ${backDestination}` }, + ], + }; +} + +function createPromptAdapter(): KloSetupDatabasesPromptAdapter { + return { + async multiselect(options) { + const value = await withSetupInterruptConfirmation(() => multiselect(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return ['back']; + } + return [...value] as string[]; + }, + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'back'; + } + return String(value); + }, + async text(options) { + const value = await withSetupInterruptConfirmation(() => + text({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : String(value); + }, + async password(options) { + const value = await withSetupInterruptConfirmation(() => + password({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : String(value); + }, + cancel(message) { + cancel(message); + }, + }; +} + +function normalizeDriver(driver: string | undefined): KloSetupDatabaseDriver | null { + const normalized = String(driver ?? '').toLowerCase(); + if (normalized === 'postgresql') return 'postgres'; + if (normalized === 'sqlite3') return 'sqlite'; + return DRIVER_OPTIONS.some((option) => option.value === normalized) ? (normalized as KloSetupDatabaseDriver) : null; +} + +function unique(values: string[]): string[] { + return [...new Set(values.filter((value) => value.trim().length > 0))]; +} + +function historicSqlConfigRecord(connection: KloProjectConnectionConfig | undefined): Record | null { + const historicSql = connection?.historicSql; + return historicSql && typeof historicSql === 'object' && !Array.isArray(historicSql) + ? (historicSql as Record) + : null; +} + +function historicSqlProbeFailureLines(error: unknown): string[] { + if (error instanceof Error && error.name === 'HistoricSqlExtensionMissingError') { + return [ + ' FAIL pg_stat_statements extension is not installed in the connection database', + ' Fix: Run (against this database): CREATE EXTENSION pg_stat_statements;', + " Fix: Ensure shared_preload_libraries includes 'pg_stat_statements'.", + ]; + } + if (error instanceof Error && error.name === 'HistoricSqlGrantsMissingError') { + return [ + ' FAIL Postgres connection role lacks pg_read_all_stats', + ' Fix: Run: GRANT pg_read_all_stats TO ;', + ]; + } + if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') { + return [` FAIL ${error.message}`]; + } + return [` FAIL Historic SQL probe failed: ${error instanceof Error ? error.message : String(error)}`]; +} + +async function defaultHistoricSqlProbe(input: KloSetupHistoricSqlProbeInput): Promise { + if (input.dialect !== 'postgres') { + return { ok: true, lines: [] }; + } + + const project = await loadKloProject({ projectDir: input.projectDir }); + const connection = project.config.connections[input.connectionId]; + const [{ PostgresPgssQueryHistoryReader }, { KloPostgresHistoricSqlQueryClient, isKloPostgresConnectionConfig }] = + await Promise.all([import('@klo/context/ingest'), import('@klo/connector-postgres')]); + + const postgresConnection = connection as Parameters[0]; + if (!isKloPostgresConnectionConfig(postgresConnection)) { + return { + ok: false, + lines: [` FAIL Connection ${input.connectionId} is not a native Postgres connection.`], + }; + } + + const client = new KloPostgresHistoricSqlQueryClient({ + connectionId: input.connectionId, + connection: postgresConnection, + }); + try { + const result = await new PostgresPgssQueryHistoryReader().probe(client); + return { + ok: true, + lines: [ + ` OK pg_stat_statements ready (${result.pgServerVersion})`, + ...result.warnings.map((warning: string) => ` ! ${warning}`), + ], + }; + } catch (error) { + return { ok: false, lines: historicSqlProbeFailureLines(error) }; + } finally { + await client.cleanup(); + } +} + +function existingConnectionIdsByDriver( + connections: Record, + driver: KloSetupDatabaseDriver, +): string[] { + return Object.entries(connections) + .filter(([, connection]) => normalizeDriver(connection.driver) === driver) + .map(([connectionId]) => connectionId) + .sort((left, right) => left.localeCompare(right)); +} + +function configuredPrimaryConnectionIds( + connections: Record, + setupConnectionIds: string[] | undefined, +): string[] { + const configuredIds = + setupConnectionIds + ?.filter((connectionId) => normalizeDriver(connections[connectionId]?.driver) !== null) + .filter((connectionId, index, ids) => ids.indexOf(connectionId) === index) ?? []; + if (configuredIds.length > 0) { + return configuredIds; + } + return Object.entries(connections) + .filter(([, connection]) => normalizeDriver(connection.driver) !== null) + .map(([connectionId]) => connectionId) + .sort((left, right) => left.localeCompare(right)); +} + +function configuredPrimarySourcesPrompt(connectionIds: string[]): { + message: string; + options: Array<{ value: string; label: string }>; +} { + return { + message: `Primary sources already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`, + options: [ + { value: 'add', label: 'Add another primary source' }, + { value: 'continue', label: 'Continue setup' }, + { value: 'back', label: 'Back' }, + ], + }; +} + +function pushUniqueConnectionId(connectionIds: string[], connectionId: string): void { + if (!connectionIds.includes(connectionId)) { + connectionIds.push(connectionId); + } +} + +function defaultConnectionIdForDriver( + connections: Record, + driver: KloSetupDatabaseDriver, +): string { + const base = DEFAULT_CONNECTION_IDS[driver]; + if (!connections[base]) { + return base; + } + let index = 2; + while (connections[`${base}-${index}`]) { + index += 1; + } + return `${base}-${index}`; +} + +async function promptText( + prompts: KloSetupDatabasesPromptAdapter, + message: string, + fallback?: string, +): Promise { + const value = await prompts.text({ + message: withTextInputNavigation(message), + ...(fallback ? { placeholder: fallback, initialValue: fallback } : {}), + }); + if (value === undefined) { + return undefined; + } + return value.trim() || fallback || ''; +} + +function urlHasCredentials(url: string): boolean { + return /:\/\/[^/\s]*@/.test(url); +} + +function normalizeInputReference(value: string): string { + if (value.startsWith('$') && /^\$[A-Z_][A-Z0-9_]*$/i.test(value)) { + return `env:${value.slice(1)}`; + } + return value; +} + +function normalizeFileReference(value: string): string { + const normalized = normalizeInputReference(value); + if (normalized.startsWith('env:') || normalized.startsWith('file:')) { + return normalized; + } + return `file:${normalized}`; +} + +async function promptCredential(input: { + prompts: KloSetupDatabasesPromptAdapter; + message: string; + projectDir: string; + connectionId: string; + secretName: string; +}): Promise { + const value = await input.prompts.password({ + message: withTextInputNavigation(input.message), + }); + if (value === undefined) return 'back'; + if (!value.trim()) return null; + + const normalized = normalizeInputReference(value.trim()); + if (normalized.startsWith('env:') || normalized.startsWith('file:')) { + return normalized; + } + + return await writeProjectLocalSecretReference({ + projectDir: input.projectDir, + fileName: `${input.connectionId}-${input.secretName}`, + value: normalized, + }); +} + +async function buildFieldsConnectionConfig(input: { + driver: UrlDriverType; + connectionId: string; + args: KloSetupDatabasesArgs; + prompts: KloSetupDatabasesPromptAdapter; +}): Promise { + const label = driverLabel(input.driver); + const defaults = DRIVER_CONNECTION_DEFAULTS[input.driver]; + + const host = await promptText(input.prompts, `${label} host`, 'localhost'); + if (host === undefined) return 'back'; + if (!host) return null; + + const portStr = await promptText(input.prompts, `${label} port`, defaults.port); + if (portStr === undefined) return 'back'; + const port = Number(portStr || defaults.port); + + const database = await promptText(input.prompts, `${label} database name`); + if (database === undefined) return 'back'; + if (!database) return null; + + const username = await promptText(input.prompts, `${label} username`); + if (username === undefined) return 'back'; + if (!username) return null; + + let passwordRef: string | undefined; + { + const credentialResult = await promptCredential({ + prompts: input.prompts, + message: `${label} password`, + projectDir: input.args.projectDir, + connectionId: input.connectionId, + secretName: 'password', // pragma: allowlist secret + }); + if (credentialResult === 'back') return 'back'; + if (credentialResult) passwordRef = credentialResult; + } + + return { + driver: input.driver, + host, + port, + database, + username, + ...(passwordRef ? { password: passwordRef } : {}), + ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + readonly: true, + }; +} + +async function buildPastedUrlConnectionConfig(input: { + driver: UrlDriverType; + connectionId: string; + args: KloSetupDatabasesArgs; + prompts: KloSetupDatabasesPromptAdapter; +}): Promise { + const label = driverLabel(input.driver); + const rawUrl = await promptText(input.prompts, `${label} connection URL`); + if (rawUrl === undefined) return 'back'; + if (!rawUrl) return null; + + const url = normalizeInputReference(rawUrl); + + if (url.startsWith('env:') || url.startsWith('file:')) { + return { + driver: input.driver, + url, + ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + readonly: true, + }; + } + + if (urlHasCredentials(url)) { + const ref = await writeProjectLocalSecretReference({ + projectDir: input.args.projectDir, + fileName: `${input.connectionId}-url`, + value: url, + }); + return { + driver: input.driver, + url: ref, + ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + readonly: true, + }; + } + + return { + driver: input.driver, + url, + ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + readonly: true, + }; +} + +async function buildUrlConnectionConfig(input: { + driver: UrlDriverType; + connectionId: string; + args: KloSetupDatabasesArgs; + prompts: KloSetupDatabasesPromptAdapter; +}): Promise { + if (input.args.inputMode === 'disabled' && !input.args.databaseUrl) return null; + + if (input.args.databaseUrl) { + const url = normalizeInputReference(input.args.databaseUrl); + if (urlHasCredentials(url)) { + const ref = await writeProjectLocalSecretReference({ + projectDir: input.args.projectDir, + fileName: `${input.connectionId}-url`, + value: url, + }); + return { + driver: input.driver, + url: ref, + ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + readonly: true, + }; + } + return { + driver: input.driver, + url, + ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + readonly: true, + }; + } + + const label = driverLabel(input.driver); + while (true) { + const choice = await input.prompts.select({ + message: `How do you want to connect to ${label}?`, + options: [ + { value: 'fields', label: 'Enter connection details (host, port, database, user)' }, + { value: 'url', label: 'Paste a connection URL' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + const result = + choice === 'url' ? await buildPastedUrlConnectionConfig(input) : await buildFieldsConnectionConfig(input); + if (result === 'back') continue; + return result; + } +} + +async function buildConnectionConfig(input: { + driver: KloSetupDatabaseDriver; + connectionId: string; + args: KloSetupDatabasesArgs; + prompts: KloSetupDatabasesPromptAdapter; +}): Promise { + const { driver, args, prompts } = input; + if (driver === 'sqlite') { + if (args.inputMode === 'disabled' && !args.databaseUrl) return null; + const path = + args.databaseUrl ?? + (await promptText( + prompts, + 'SQLite database file\nEnter a relative or absolute path, for example ./warehouse.sqlite.', + )); + if (path === undefined) return 'back'; + return path ? { driver: 'sqlite', path, readonly: true } : null; + } + if (driver === 'postgres' || driver === 'mysql' || driver === 'clickhouse' || driver === 'sqlserver') { + return await buildUrlConnectionConfig({ driver, connectionId: input.connectionId, args, prompts }); + } + if (driver === 'bigquery') { + const datasetId = await promptText(prompts, 'BigQuery dataset\nFor example analytics.'); + if (datasetId === undefined) return 'back'; + const credentialsPath = await promptText(prompts, 'Path to service account JSON file'); + if (credentialsPath === undefined) return 'back'; + const location = await promptText( + prompts, + 'BigQuery location\nPress Enter for US, or enter a location like EU.', + 'US', + ); + if (location === undefined) return 'back'; + if (!datasetId || !credentialsPath) return null; + return { + driver: 'bigquery', + dataset_id: datasetId, + credentials_json: normalizeFileReference(credentialsPath), + ...(location ? { location } : {}), + readonly: true, + }; + } + if (driver === 'snowflake') { + const account = await promptText(prompts, 'Snowflake account identifier'); + if (account === undefined) return 'back'; + const warehouse = await promptText(prompts, 'Snowflake warehouse\nFor example ANALYTICS_WH.'); + if (warehouse === undefined) return 'back'; + const database = await promptText(prompts, 'Snowflake database name'); + if (database === undefined) return 'back'; + const schemaName = await promptText( + prompts, + 'Snowflake schema\nPress Enter for PUBLIC, or enter a schema name.', + 'PUBLIC', + ); + if (schemaName === undefined) return 'back'; + const username = await promptText(prompts, 'Snowflake username'); + if (username === undefined) return 'back'; + const passwordRef = await promptCredential({ + prompts, + message: 'Snowflake password', + projectDir: args.projectDir, + connectionId: input.connectionId, + secretName: 'password', // pragma: allowlist secret + }); + if (passwordRef === 'back') return 'back'; // pragma: allowlist secret + const role = await promptText(prompts, 'Snowflake role (optional)\nPress Enter to skip.'); + if (role === undefined) return 'back'; + if (!account || !warehouse || !database || !schemaName || !username || !passwordRef) return null; + return { + driver: 'snowflake', + authMethod: 'password', + account, + warehouse, + database, + schema_name: schemaName, + username, + password: passwordRef, + ...(role ? { role } : {}), + readonly: true, + }; + } + throw new Error(`Unsupported database driver: ${driver}`); +} + +async function maybeApplyHistoricSqlConfig(input: { + connection: KloProjectConnectionConfig; + driver: KloSetupDatabaseDriver; + args: KloSetupDatabasesArgs; + prompts: KloSetupDatabasesPromptAdapter; +}): Promise { + const dialect = HISTORIC_SQL_DIALECT_BY_DRIVER[input.driver]; + if (!dialect) { + if (input.args.enableHistoricSql === true) { + throw new Error( + `Historic SQL setup is only supported for Snowflake, BigQuery, and Postgres, not ${driverLabel(input.driver)}.`, + ); + } + return input.connection; + } + + let enabled = input.args.enableHistoricSql === true; + if (input.args.disableHistoricSql === true) { + enabled = false; + } else if (input.args.inputMode !== 'disabled' && input.args.enableHistoricSql !== true && dialect !== 'postgres') { + const choice = await input.prompts.select({ + message: `Enable Historic SQL query-history ingest for this ${driverLabel(input.driver)} connection?`, + options: [ + { value: 'yes', label: 'Enable Historic SQL' }, + { value: 'no', label: 'Do not enable Historic SQL' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + enabled = choice === 'yes'; + } + + if (dialect === 'postgres' && input.args.enableHistoricSql !== true && input.args.disableHistoricSql !== true) { + return input.connection; + } + + const existing = + typeof input.connection.historicSql === 'object' && input.connection.historicSql !== null + ? (input.connection.historicSql as Record) + : {}; + + if (!enabled) { + return { ...input.connection, historicSql: { ...existing, enabled: false, dialect } }; + } + + const common = { + ...existing, + enabled: true, + dialect, + serviceAccountUserPatterns: input.args.historicSqlServiceAccountPatterns ?? [], + }; + + if (dialect === 'postgres') { + return { + ...input.connection, + historicSql: { + ...common, + minCalls: input.args.historicSqlMinCalls ?? 5, + maxTemplatesPerRun: 5000, + }, + }; + } + + return { + ...input.connection, + historicSql: { + ...common, + windowDays: input.args.historicSqlWindowDays ?? 90, + redactionPatterns: input.args.historicSqlRedactionPatterns ?? [], + }, + }; +} + +async function defaultTestConnection(projectDir: string, connectionId: string, io: KloCliIo): Promise { + return await runKloConnection({ command: 'test', projectDir, connectionId }, io); +} + +async function defaultScanConnection(projectDir: string, connectionId: string, io: KloCliIo): Promise { + return await runKloScan( + { + command: 'run', + projectDir, + connectionId, + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io, + ); +} + +interface BufferedCommandIo extends KloCliIo { + stdoutText(): string; + stderrText(): string; +} + +function createBufferedCommandIo(): BufferedCommandIo { + let stdout = ''; + let stderr = ''; + return { + stdout: { + isTTY: false, + write(chunk: string) { + stdout += chunk; + }, + }, + stderr: { + write(chunk: string) { + stderr += chunk; + }, + }, + stdoutText() { + return stdout; + }, + stderrText() { + return stderr; + }, + }; +} + +function flushBufferedCommandOutput(io: KloCliIo, bufferedIo: BufferedCommandIo): void { + const stdout = bufferedIo.stdoutText(); + const stderr = bufferedIo.stderrText(); + if (stdout.length > 0) { + io.stdout.write(stdout); + } + if (stderr.length > 0) { + io.stderr.write(stderr); + } +} + +function readOutputValue(output: string, label: string): string | undefined { + const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const match = new RegExp(`^\\s*${escapedLabel}:\\s*(.+?)\\s*$`, 'im').exec(output); + return match?.[1]?.trim(); +} + +function summarizeScanChanges(output: string): string { + const newTables = Number(readOutputValue(output, 'New tables') ?? NaN); + const changedTables = Number(readOutputValue(output, 'Changed tables') ?? NaN); + const removedTables = Number(readOutputValue(output, 'Removed tables') ?? NaN); + const parts: string[] = []; + + if (Number.isFinite(newTables) && newTables > 0) { + parts.push(`${newTables} new ${newTables === 1 ? 'table' : 'tables'}`); + } + if (Number.isFinite(changedTables) && changedTables > 0) { + parts.push(`${changedTables} changed ${changedTables === 1 ? 'table' : 'tables'}`); + } + if (Number.isFinite(removedTables) && removedTables > 0) { + parts.push(`${removedTables} removed ${removedTables === 1 ? 'table' : 'tables'}`); + } + if (parts.length > 0) { + return parts.join(' · '); + } + + const semanticComparison = /^\s*Semantic layer comparison found\s+(.+?)\s*$/im.exec(output)?.[1]?.trim(); + if (semanticComparison) { + return semanticComparison; + } + + return 'no table changes'; +} + +function shortenScanReportPath(path: string): string { + const normalized = path.trim(); + const liveDatabaseMarker = '/live-database/'; + const markerIndex = normalized.indexOf(liveDatabaseMarker); + if (markerIndex === -1) { + return normalized; + } + const filename = normalized.split('/').at(-1); + if (!filename) { + return normalized; + } + return `${normalized.slice(0, markerIndex + liveDatabaseMarker.length)}.../${filename}`; +} + +function writeSetupSection(io: KloCliIo, title: string, lines: string[]): void { + io.stdout.write(`◇ ${title}\n`); + for (const line of lines) { + io.stdout.write(`│ ${line}\n`); + } + io.stdout.write('│\n'); +} + +async function writeConnectionConfig(input: { + projectDir: string; + connectionId: string; + connection: KloProjectConnectionConfig; +}): Promise { + const project = await loadKloProject({ projectDir: input.projectDir }); + const config = { + ...project.config, + connections: { + ...project.config.connections, + [input.connectionId]: input.connection, + }, + }; + await writeFile(project.configPath, serializeKloProjectConfig(config), 'utf-8'); + + const historicSql = + typeof input.connection.historicSql === 'object' && + input.connection.historicSql !== null && + !Array.isArray(input.connection.historicSql) + ? (input.connection.historicSql as Record) + : null; + if (historicSql?.enabled === true) { + await ensureHistoricSqlAdapterEnabled(input.projectDir); + } +} + +async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise { + const project = await loadKloProject({ projectDir }); + if (project.config.ingest.adapters.includes('historic-sql')) { + return; + } + await writeFile( + project.configPath, + serializeKloProjectConfig({ + ...project.config, + ingest: { + ...project.config.ingest, + adapters: [...project.config.ingest.adapters, 'historic-sql'], + }, + }), + 'utf-8', + ); +} + +async function markDatabasesComplete(projectDir: string, connectionIds: string[]): Promise { + const project = await loadKloProject({ projectDir }); + const config = setKloSetupDatabaseConnectionIds(project.config, unique(connectionIds), { complete: true }); + await writeFile(project.configPath, serializeKloProjectConfig(config), 'utf-8'); +} + +async function maybeRunHistoricSqlSetupProbe(input: { + projectDir: string; + connectionId: string; + io: KloCliIo; + deps: KloSetupDatabasesDeps; +}): Promise { + const project = await loadKloProject({ projectDir: input.projectDir }); + const connection = project.config.connections[input.connectionId]; + const historicSql = historicSqlConfigRecord(connection); + if (historicSql?.enabled !== true || historicSql.dialect !== 'postgres') { + return; + } + + input.io.stdout.write('Historic SQL probe...\n'); + const probe = input.deps.historicSqlProbe ?? defaultHistoricSqlProbe; + const result = await probe({ + projectDir: input.projectDir, + connectionId: input.connectionId, + dialect: 'postgres', + }); + for (const line of result.lines) { + input.io.stdout.write(`${line}\n`); + } + if (!result.ok) { + input.io.stdout.write('Setup written; first ingest run will fail until fixed.\n'); + } +} + +async function applyHistoricSqlConfigToExistingConnection(input: { + projectDir: string; + connectionId: string; + args: KloSetupDatabasesArgs; + prompts: KloSetupDatabasesPromptAdapter; +}): Promise<'back' | void> { + if (input.args.enableHistoricSql !== true && input.args.disableHistoricSql !== true) { + return; + } + + const project = await loadKloProject({ projectDir: input.projectDir }); + const existing = project.config.connections[input.connectionId]; + const driver = normalizeDriver(existing?.driver); + if (!existing || !driver) { + return; + } + + const withHistoricSql = await maybeApplyHistoricSqlConfig({ + connection: existing, + driver, + args: input.args, + prompts: input.prompts, + }); + if (withHistoricSql === 'back') return 'back'; + await writeConnectionConfig({ + projectDir: input.projectDir, + connectionId: input.connectionId, + connection: withHistoricSql, + }); +} + +async function validateAndScanConnection(input: { + projectDir: string; + connectionId: string; + io: KloCliIo; + deps: KloSetupDatabasesDeps; +}): Promise { + const testConnection = input.deps.testConnection ?? defaultTestConnection; + const scanConnection = input.deps.scanConnection ?? defaultScanConnection; + const project = await loadKloProject({ projectDir: input.projectDir }); + const configuredDriver = normalizeDriver(project.config.connections[input.connectionId]?.driver); + const configuredDriverLabel = configuredDriver ? driverLabel(configuredDriver) : undefined; + const testIo = createBufferedCommandIo(); + const testCode = await testConnection(input.projectDir, input.connectionId, testIo); + if (testCode !== 0) { + flushBufferedCommandOutput(input.io, testIo); + input.io.stderr.write(`Connection test failed for ${input.connectionId}.\n`); + return false; + } + const testOutput = testIo.stdoutText(); + const outputDriver = normalizeDriver(readOutputValue(testOutput, 'Driver')); + const driverDisplay = outputDriver ? driverLabel(outputDriver) : (configuredDriverLabel ?? 'Unknown driver'); + const tableCount = Number(readOutputValue(testOutput, 'Tables') ?? NaN); + const testLines = ['✓ Connection test passed']; + testLines.push(`Driver: ${driverDisplay}${Number.isFinite(tableCount) ? ` · Tables: ${tableCount}` : ''}`); + writeSetupSection(input.io, `Testing ${input.connectionId}`, testLines); + + await maybeRunHistoricSqlSetupProbe({ + projectDir: input.projectDir, + connectionId: input.connectionId, + io: input.io, + deps: input.deps, + }); + const scanIo = createBufferedCommandIo(); + const scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo); + if (scanCode !== 0) { + flushBufferedCommandOutput(input.io, scanIo); + input.io.stderr.write(`Structural scan failed for ${input.connectionId}.\n`); + input.io.stderr.write(`Debug command: klo dev scan --project-dir ${input.projectDir} ${input.connectionId}\n`); + return false; + } + const scanOutput = scanIo.stdoutText(); + const reportPath = readOutputValue(scanOutput, 'Report'); + writeSetupSection( + input.io, + `Scanning ${input.connectionId}`, + [ + '✓ Structural scan completed', + `Changes: ${summarizeScanChanges(scanOutput)}`, + ...(reportPath ? [`Report: ${shortenScanReportPath(reportPath)}`] : []), + ], + ); + writeSetupSection(input.io, 'Primary source ready', [ + `${input.connectionId} · ${driverDisplay} · structural scan complete`, + ]); + return true; +} + +async function chooseDrivers( + args: KloSetupDatabasesArgs, + io: KloCliIo, + prompts: KloSetupDatabasesPromptAdapter, + options?: { hasPrimarySources?: boolean }, +): Promise { + if (args.databaseDrivers && args.databaseDrivers.length > 0) { + return [...new Set(args.databaseDrivers)]; + } + if (args.databaseConnectionIds && args.databaseConnectionIds.length > 0) { + return []; + } + if (args.inputMode === 'disabled') { + io.stderr.write( + 'KLO cannot work without a primary source. Pass --database or --database-connection-id, or pass --skip-databases to leave setup incomplete.\n', + ); + return 'missing-input'; + } + while (true) { + const choices = await prompts.multiselect({ + message: withMultiselectNavigation('Which primary sources should KLO connect to?'), + options: [...DRIVER_OPTIONS], + required: false, + }); + if (choices.includes('back')) { + return 'back'; + } + if (choices.length > 0) { + return choices as KloSetupDatabaseDriver[]; + } + + if (options?.hasPrimarySources) { + return 'back'; + } + + io.stdout.write('KLO cannot work without at least one primary source. Select a source or press Escape to go back.\n'); + } +} + +async function chooseConnectionIdForDriver(input: { + driver: KloSetupDatabaseDriver; + connections: Record; + args: KloSetupDatabasesArgs; + prompts: KloSetupDatabasesPromptAdapter; +}): Promise<{ kind: 'existing' | 'new'; connectionId: string } | 'back' | 'missing-input'> { + if (input.args.databaseConnectionId) { + return { kind: 'new', connectionId: input.args.databaseConnectionId }; + } + if (input.args.inputMode === 'disabled') { + if (!input.args.databaseConnectionId) return 'missing-input'; + return { kind: 'new', connectionId: input.args.databaseConnectionId }; + } + + const existingIds = existingConnectionIdsByDriver(input.connections, input.driver); + const defaultId = defaultConnectionIdForDriver(input.connections, input.driver); + const label = driverLabel(input.driver); + + if (existingIds.length === 0) { + const entered = await input.prompts.text({ + message: withTextInputNavigation(connectionNamePrompt(label)), + placeholder: defaultId, + initialValue: defaultId, + }); + if (entered === undefined) return 'back'; + const connectionId = entered.trim() || defaultId; + return connectionId ? { kind: 'new', connectionId } : 'missing-input'; + } + + while (true) { + const choice = await input.prompts.select({ + message: `Configure ${label}`, + options: [ + ...existingIds.map((connectionId) => ({ + value: `existing:${connectionId}`, + label: `Use existing ${label} connection: ${connectionId}`, + })), + { value: 'new', label: `Add new ${label} connection` }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + if (choice.startsWith('existing:')) return { kind: 'existing', connectionId: choice.slice('existing:'.length) }; + const entered = await input.prompts.text({ + message: withTextInputNavigation(connectionNamePrompt(label)), + placeholder: defaultId, + initialValue: defaultId, + }); + if (entered === undefined) continue; + const connectionId = entered.trim() || defaultId; + return connectionId ? { kind: 'new', connectionId } : 'missing-input'; + } +} + +export async function runKloSetupDatabasesStep( + args: KloSetupDatabasesArgs, + io: KloCliIo, + deps: KloSetupDatabasesDeps = {}, +): Promise { + if (args.skipDatabases) { + io.stdout.write('Primary source setup skipped. KLO cannot work until you add a primary source.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } + + const prompts = deps.prompts ?? createPromptAdapter(); + + if (args.databaseConnectionIds && args.databaseConnectionIds.length > 0) { + const selectedConnectionIds: string[] = []; + for (const connectionId of unique(args.databaseConnectionIds)) { + const historicSqlResult = await applyHistoricSqlConfigToExistingConnection({ + projectDir: args.projectDir, + connectionId, + args, + prompts, + }); + if (historicSqlResult === 'back') return { status: 'back', projectDir: args.projectDir }; + if (!(await validateAndScanConnection({ projectDir: args.projectDir, connectionId, io, deps }))) { + return { status: 'failed', projectDir: args.projectDir }; + } + selectedConnectionIds.push(connectionId); + } + await markDatabasesComplete(args.projectDir, selectedConnectionIds); + return { status: 'ready', projectDir: args.projectDir, connectionIds: selectedConnectionIds }; + } + + const canReturnToDriverSelection = args.databaseDrivers === undefined || args.databaseDrivers.length === 0; + const initialProject = await loadKloProject({ projectDir: args.projectDir }); + const selectedConnectionIds = + args.inputMode !== 'disabled' && canReturnToDriverSelection + ? configuredPrimaryConnectionIds(initialProject.config.connections, initialProject.config.setup?.database_connection_ids) + : []; + let showConfiguredPrimaryMenu = selectedConnectionIds.length > 0; + + while (true) { + if (showConfiguredPrimaryMenu) { + const action = await prompts.select(configuredPrimarySourcesPrompt(selectedConnectionIds)); + if (action === 'continue') { + await markDatabasesComplete(args.projectDir, selectedConnectionIds); + return { status: 'ready', projectDir: args.projectDir, connectionIds: selectedConnectionIds }; + } + if (action === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + } + showConfiguredPrimaryMenu = false; + + const drivers = await chooseDrivers(args, io, prompts, { hasPrimarySources: selectedConnectionIds.length > 0 }); + if (drivers === 'back') { + if (selectedConnectionIds.length > 0 && canReturnToDriverSelection && args.inputMode !== 'disabled') { + showConfiguredPrimaryMenu = true; + continue; + } + return { status: 'back', projectDir: args.projectDir }; + } + if (drivers === 'missing-input') return { status: 'missing-input', projectDir: args.projectDir }; + if (drivers.length === 0) { + await markDatabasesComplete(args.projectDir, []); + io.stdout.write('KLO cannot work without a primary source.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } + + let returnToDriverSelection = false; + + for (const driver of drivers) { + const project = await loadKloProject({ projectDir: args.projectDir }); + const connectionChoice = await chooseConnectionIdForDriver({ + driver, + connections: project.config.connections, + args, + prompts, + }); + if (connectionChoice === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + if (connectionChoice === 'missing-input') { + io.stderr.write('Missing database connection id: pass --database-connection-id.\n'); + return { status: 'missing-input', projectDir: args.projectDir }; + } + + if (connectionChoice.kind === 'new') { + let connection = await buildConnectionConfig({ + driver, + connectionId: connectionChoice.connectionId, + args, + prompts, + }); + if (connection === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + while (!connection && args.inputMode !== 'disabled') { + const label = driverLabel(driver); + const action = await prompts.select(missingConnectionDetailsPrompt(label, canReturnToDriverSelection)); + if (action === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + connection = await buildConnectionConfig({ + driver, + connectionId: connectionChoice.connectionId, + args, + prompts, + }); + if (connection === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + } + if (returnToDriverSelection) { + break; + } + if (connection === 'back') { + break; + } + if (!connection) { + io.stderr.write(`Missing connection details for ${driverLabel(driver)}.\n`); + return { status: 'missing-input', projectDir: args.projectDir }; + } + const withHistoricSql = await maybeApplyHistoricSqlConfig({ connection, driver, args, prompts }); + if (withHistoricSql === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + await writeConnectionConfig({ + projectDir: args.projectDir, + connectionId: connectionChoice.connectionId, + connection: withHistoricSql, + }); + } else { + const existing = project.config.connections[connectionChoice.connectionId]; + const withHistoricSql = await maybeApplyHistoricSqlConfig({ connection: existing, driver, args, prompts }); + if (withHistoricSql === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + await writeConnectionConfig({ + projectDir: args.projectDir, + connectionId: connectionChoice.connectionId, + connection: withHistoricSql, + }); + } + + let connectionSkipped = false; + while ( + !(await validateAndScanConnection({ + projectDir: args.projectDir, + connectionId: connectionChoice.connectionId, + io, + deps, + })) + ) { + if (args.inputMode === 'disabled') return { status: 'failed', projectDir: args.projectDir }; + const action = await prompts.select({ + message: `Primary source setup failed for ${connectionChoice.connectionId}`, + options: [ + { value: 'retry', label: 'Retry connection test' }, + { value: 're-enter', label: 'Re-enter connection details' }, + { value: 'skip', label: 'Skip this primary source' }, + { value: 'back', label: 'Back' }, + ], + }); + if (action === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + if (action === 'skip') { + connectionSkipped = true; + break; + } + if (action === 're-enter') { + const connection = await buildConnectionConfig({ + driver, + connectionId: connectionChoice.connectionId, + args, + prompts, + }); + if (connection === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + if (!connection) continue; + const withHistoricSql = await maybeApplyHistoricSqlConfig({ connection, driver, args, prompts }); + if (withHistoricSql === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + await writeConnectionConfig({ + projectDir: args.projectDir, + connectionId: connectionChoice.connectionId, + connection: withHistoricSql, + }); + } + } + if (returnToDriverSelection) break; + if (connectionSkipped) continue; + + pushUniqueConnectionId(selectedConnectionIds, connectionChoice.connectionId); + } + + if (returnToDriverSelection) { + if (selectedConnectionIds.length > 0 && canReturnToDriverSelection && args.inputMode !== 'disabled') { + showConfiguredPrimaryMenu = true; + } + continue; + } + + if (selectedConnectionIds.length === 0) { + io.stderr.write('No primary source connections completed setup.\n'); + return { status: 'failed', projectDir: args.projectDir }; + } + + if (canReturnToDriverSelection && args.inputMode !== 'disabled') { + showConfiguredPrimaryMenu = true; + continue; + } + + await markDatabasesComplete(args.projectDir, selectedConnectionIds); + return { status: 'ready', projectDir: args.projectDir, connectionIds: selectedConnectionIds }; + } +} diff --git a/packages/cli/src/setup-embeddings.test.ts b/packages/cli/src/setup-embeddings.test.ts new file mode 100644 index 00000000..4cf6bdef --- /dev/null +++ b/packages/cli/src/setup-embeddings.test.ts @@ -0,0 +1,381 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { initKloProject, parseKloProjectConfig } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { type KloSetupEmbeddingsPromptAdapter, runKloSetupEmbeddingsStep } from './setup-embeddings.js'; + +const EMBEDDING_OPTION_PROMPT_MESSAGE = [ + 'Which embedding option should KLO use?', + '', + 'KLO uses embeddings for semantic search over semantic-layer sources, wiki context, schema metadata, ' + + 'and relationship evidence.', +].join('\n'); + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: true, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function makePromptAdapter(options: { + selectValues?: string[]; + passwordValue?: string; +}): KloSetupEmbeddingsPromptAdapter { + const selectValues = [...(options.selectValues ?? [])]; + return { + select: vi.fn(async () => selectValues.shift() ?? 'retry'), + password: vi.fn(async () => options.passwordValue ?? 'embedding-secret'), + cancel: vi.fn(), + }; +} + +describe('setup embeddings step', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-embeddings-')); + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('explains why interactive users choose an embedding option before validating embeddings', async () => { + const io = makeIo(); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + const prompts = makePromptAdapter({ selectValues: ['back'] }); + + const result = await runKloSetupEmbeddingsStep( + { + projectDir: tempDir, + inputMode: 'auto', + skipEmbeddings: false, + }, + io.io, + { prompts, env: {}, healthCheck }, + ); + + expect(result.status).toBe('back'); + expect(healthCheck).not.toHaveBeenCalled(); + expect(prompts.select).toHaveBeenCalledWith({ + message: EMBEDDING_OPTION_PROMPT_MESSAGE, + options: [ + { value: 'sentence-transformers', label: 'Local sentence-transformers embeddings' }, + { value: 'openai', label: 'OpenAI embeddings (recommended)' }, + { value: 'back', label: 'Back' }, + ], + }); + }); + + it('returns from the OpenAI credential prompt to embedding option selection when Back is selected', async () => { + const io = makeIo(); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + const prompts = makePromptAdapter({ selectValues: ['openai', 'back', 'sentence-transformers'] }); + + const result = await runKloSetupEmbeddingsStep( + { + projectDir: tempDir, + inputMode: 'auto', + skipEmbeddings: false, + }, + io.io, + { prompts, env: {}, healthCheck }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledTimes(1); + expect(healthCheck).toHaveBeenCalledWith({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' }, + }); + expect(vi.mocked(prompts.select).mock.calls.map((call) => call[0].message)).toEqual([ + EMBEDDING_OPTION_PROMPT_MESSAGE, + 'How should KLO find your OpenAI embedding API key?', + EMBEDDING_OPTION_PROMPT_MESSAGE, + ]); + }); + + it('configures local sentence-transformers embeddings after interactive selection', async () => { + const io = makeIo(); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + const prompts = makePromptAdapter({ selectValues: ['sentence-transformers'] }); + + const result = await runKloSetupEmbeddingsStep( + { + projectDir: tempDir, + inputMode: 'auto', + skipEmbeddings: false, + }, + io.io, + { prompts, env: {}, healthCheck }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledWith({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' }, + }); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.ingest.embeddings).toMatchObject({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { base_url: 'http://127.0.0.1:8765', pathPrefix: '' }, + }); + expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings); + expect(config.setup?.completed_steps).toContain('embeddings'); + expect(io.stdout()).toContain( + 'Testing local sentence-transformers embeddings (all-MiniLM-L6-v2, 384 dimensions). First run may take up to 60 seconds.', + ); + expect(io.stdout()).toContain('Embeddings ready: yes'); + }); + + it('shows live progress while local sentence-transformers embeddings are being tested', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['sentence-transformers'] }); + let resolveHealthCheck: ((result: { ok: true }) => void) | undefined; + const healthCheck = vi.fn( + () => + new Promise<{ ok: true }>((resolve) => { + resolveHealthCheck = resolve; + }), + ); + + const result = runKloSetupEmbeddingsStep( + { + projectDir: tempDir, + inputMode: 'auto', + skipEmbeddings: false, + }, + io.io, + { prompts, env: {}, healthCheck }, + ); + + await vi.waitFor(() => { + expect(io.stdout()).toContain( + '\r- Testing local sentence-transformers embeddings (all-MiniLM-L6-v2, 384 dimensions). First run may take up to 60 seconds.', + ); + }); + + expect(resolveHealthCheck).toBeDefined(); + resolveHealthCheck?.({ ok: true }); + await expect(result).resolves.toMatchObject({ status: 'ready' }); + }); + + it('uses default local sentence-transformers embeddings in non-interactive setup', async () => { + const io = makeIo(); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKloSetupEmbeddingsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + skipEmbeddings: false, + }, + io.io, + { env: {}, healthCheck }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledWith({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' }, + }); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.ingest.embeddings).toMatchObject({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { base_url: 'http://127.0.0.1:8765', pathPrefix: '' }, + }); + expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings); + expect(config.setup?.completed_steps).toContain('embeddings'); + }); + + it('does not persist embedding completion when the health check fails', async () => { + const io = makeIo(); + const result = await runKloSetupEmbeddingsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + skipEmbeddings: false, + }, + io.io, + { + env: {}, + healthCheck: vi.fn(async () => ({ ok: false as const, message: '401 invalid api key [redacted]' })), + }, + ); + + expect(result.status).toBe('failed'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.completed_steps ?? []).not.toContain('embeddings'); + expect(config.ingest.embeddings.backend).toBe('deterministic'); + expect(io.stderr()).toContain('Local embedding health check failed: 401 invalid api key [redacted]'); + expect(io.stderr()).toContain('klo-daemon serve-http --host 127.0.0.1 --port 8765'); + expect(io.stderr()).not.toContain('skip for now'); + }); + + it('uses fixed OpenAI defaults and only asks for credentials when OpenAI is selected', async () => { + const io = makeIo(); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKloSetupEmbeddingsStep( + { + projectDir: tempDir, + inputMode: 'disabled', + embeddingBackend: 'openai', + embeddingApiKeyEnv: 'OPENAI_API_KEY', + skipEmbeddings: false, + }, + io.io, + { + env: { OPENAI_API_KEY: 'sk-openai-test' }, + healthCheck, + }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledWith({ + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + openai: { apiKey: 'sk-openai-test' }, + }); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.ingest.embeddings).toMatchObject({ + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + openai: { api_key: 'env:OPENAI_API_KEY' }, + }); + expect(io.stdout()).not.toContain('sk-openai-test'); + }); + + it('can fall back to OpenAI after the default local daemon is unavailable', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['sentence-transformers', 'openai', 'env'] }); + const healthCheck = vi + .fn() + .mockResolvedValueOnce({ ok: false as const, message: 'fetch failed' }) + .mockResolvedValueOnce({ ok: true as const }); + + const result = await runKloSetupEmbeddingsStep( + { projectDir: tempDir, inputMode: 'auto', skipEmbeddings: false }, + io.io, + { prompts, env: { OPENAI_API_KEY: 'sk-openai-test' }, healthCheck }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenNthCalledWith(1, { + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' }, + }); + expect(healthCheck).toHaveBeenNthCalledWith(2, { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + openai: { apiKey: 'sk-openai-test' }, + }); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Local embeddings are not reachable. Start the local KLO daemon, then retry.', + options: expect.arrayContaining([expect.objectContaining({ value: 'openai' })]), + }), + ); + expect(vi.mocked(prompts.select).mock.calls[1]?.[0].options).toEqual([ + { value: 'retry', label: 'Retry' }, + { value: 'openai', label: 'Use OpenAI embeddings' }, + { value: 'back', label: 'Back' }, + ]); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.ingest.embeddings.backend).toBe('openai'); + }); + + it('leaves setup incomplete when skipped', async () => { + const result = await runKloSetupEmbeddingsStep( + { projectDir: tempDir, inputMode: 'disabled', skipEmbeddings: true }, + makeIo().io, + ); + + expect(result.status).toBe('skipped'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.completed_steps ?? []).not.toContain('embeddings'); + expect(config.ingest.embeddings.backend).toBe('deterministic'); + }); + + it('returns back without writing config when the local health check fails and Back is selected', async () => { + const prompts = makePromptAdapter({ selectValues: ['sentence-transformers', 'back'] }); + const result = await runKloSetupEmbeddingsStep( + { projectDir: tempDir, inputMode: 'auto', skipEmbeddings: false }, + makeIo().io, + { prompts, env: {}, healthCheck: vi.fn(async () => ({ ok: false as const, message: 'daemon unavailable' })) }, + ); + + expect(result.status).toBe('back'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.ingest.embeddings.backend).toBe('deterministic'); + }); + + it('preserves already completed embeddings setup when no embedding args request changes', async () => { + await mkdir(join(tempDir, '.klo'), { recursive: true }); + await initKloProject({ projectDir: tempDir, projectName: 'warehouse', force: true }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'setup:', + ' database_connection_ids: []', + ' completed_steps:', + ' - project', + ' - llm', + ' - embeddings', + 'connections: {}', + 'ingest:', + ' embeddings:', + ' backend: sentence-transformers', + ' model: all-MiniLM-L6-v2', + ' dimensions: 384', + ' sentenceTransformers:', + ' base_url: http://127.0.0.1:8765', + " pathPrefix: ''", + ].join('\n'), + 'utf-8', + ); + + const healthCheck = vi.fn(async () => ({ ok: true as const })); + await expect( + runKloSetupEmbeddingsStep({ projectDir: tempDir, inputMode: 'disabled', skipEmbeddings: false }, makeIo().io, { + env: { OPENAI_API_KEY: 'sk-openai-test' }, + healthCheck, + }), + ).resolves.toMatchObject({ status: 'ready' }); + expect(healthCheck).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/setup-embeddings.ts b/packages/cli/src/setup-embeddings.ts new file mode 100644 index 00000000..d6d380dc --- /dev/null +++ b/packages/cli/src/setup-embeddings.ts @@ -0,0 +1,485 @@ +import { writeFile } from 'node:fs/promises'; +import { cancel, isCancel, password, select } from '@clack/prompts'; +import { resolveKloConfigReference } from '@klo/context/core'; +import { + type KloProjectConfig, + type KloProjectEmbeddingConfig, + loadKloProject, + markKloSetupStepComplete, + serializeKloProjectConfig, +} from '@klo/context/project'; +import { type KloEmbeddingConfig, type KloEmbeddingHealthCheckResult, runKloEmbeddingHealthCheck } from '@klo/llm'; +import type { KloCliIo } from './cli-runtime.js'; +import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { envCredentialReference, writeProjectLocalSecretReference } from './setup-secrets.js'; + +export type KloSetupEmbeddingBackend = 'openai' | 'sentence-transformers'; + +export interface KloSetupEmbeddingsArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + embeddingBackend?: KloSetupEmbeddingBackend; + embeddingApiKeyEnv?: string; + embeddingApiKeyFile?: string; + forcePrompt?: boolean; + showPromptInstructions?: boolean; + skipEmbeddings: boolean; +} + +export type KloSetupEmbeddingsResult = + | { status: 'ready'; projectDir: string } + | { status: 'skipped'; projectDir: string } + | { status: 'back'; projectDir: string } + | { status: 'missing-input'; projectDir: string } + | { status: 'failed'; projectDir: string }; + +export interface KloSetupEmbeddingsPromptAdapter { + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + password(options: { message: string }): Promise; + cancel(message: string): void; +} + +export interface KloSetupEmbeddingsDeps { + env?: NodeJS.ProcessEnv; + prompts?: KloSetupEmbeddingsPromptAdapter; + healthCheck?: (config: KloEmbeddingConfig) => Promise; +} + +type BackendChoice = KloSetupEmbeddingBackend | 'back'; + +const DEFAULTS: Record< + KloSetupEmbeddingBackend, + { model: string; dimensions: number; envName?: string; baseUrl?: string; pathPrefix?: string } +> = { + openai: { model: 'text-embedding-3-small', dimensions: 1536, envName: 'OPENAI_API_KEY' }, + 'sentence-transformers': { + model: 'all-MiniLM-L6-v2', + dimensions: 384, + baseUrl: 'http://127.0.0.1:8765', + pathPrefix: '', + }, +}; + +const LOCAL_EMBEDDING_BACKEND: KloSetupEmbeddingBackend = 'sentence-transformers'; +const LOCAL_EMBEDDING_DAEMON_COMMAND = 'klo-daemon serve-http --host 127.0.0.1 --port 8765'; +const LOCAL_EMBEDDING_DAEMON_DEV_COMMAND = + 'cd klo && source .venv/bin/activate && uv run klo-daemon serve-http --host 127.0.0.1 --port 8765'; +const EMBEDDING_OPTION_PROMPT_CONTEXT = + 'KLO uses embeddings for semantic search over semantic-layer sources, wiki context, schema metadata, ' + + 'and relationship evidence.'; +const LOCAL_EMBEDDING_HEALTH_TIMEOUT_MS = 120_000; +const HEALTH_CHECK_SPINNER_FRAMES = ['-', '\\', '|', '/'] as const; +const HEALTH_CHECK_SPINNER_INTERVAL_MS = 120; +const CLEAR_CURRENT_LINE = '\x1b[2K\r'; + +interface HealthCheckProgress { + succeed(message: string): void; + fail(message: string): void; +} + +function createPromptAdapter(): KloSetupEmbeddingsPromptAdapter { + return { + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'back'; + } + return value; + }, + async password(options) { + const value = await withSetupInterruptConfirmation(() => + password({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : value; + }, + cancel(message) { + cancel(message); + }, + }; +} + +function hasCompletedEmbeddings(config: KloProjectConfig): boolean { + return ( + config.setup?.completed_steps.includes('embeddings') === true && + config.ingest.embeddings.backend !== 'none' && + config.ingest.embeddings.backend !== 'deterministic' && + typeof config.ingest.embeddings.model === 'string' && + config.ingest.embeddings.model.length > 0 && + config.ingest.embeddings.dimensions > 0 + ); +} + +function buildProjectEmbeddingConfig(input: { + backend: KloSetupEmbeddingBackend; + model: string; + dimensions: number; + credentialRef?: string; +}): KloProjectEmbeddingConfig { + if (input.backend === 'openai') { + return { + backend: 'openai', + model: input.model, + dimensions: input.dimensions, + openai: { + ...(input.credentialRef ? { api_key: input.credentialRef } : {}), + }, + }; + } + const defaults = DEFAULTS[input.backend]; + return { + backend: input.backend, + model: input.model, + dimensions: input.dimensions, + sentenceTransformers: { + base_url: defaults.baseUrl ?? '', + pathPrefix: defaults.pathPrefix ?? '', + }, + }; +} + +function buildHealthConfig(input: { + backend: KloSetupEmbeddingBackend; + model: string; + dimensions: number; + credentialValue?: string; +}): KloEmbeddingConfig { + if (input.backend === 'openai') { + return { + backend: 'openai', + model: input.model, + dimensions: input.dimensions, + openai: { + ...(input.credentialValue ? { apiKey: input.credentialValue } : {}), + }, + }; + } + const defaults = DEFAULTS[input.backend]; + return { + backend: input.backend, + model: input.model, + dimensions: input.dimensions, + sentenceTransformers: { + baseURL: defaults.baseUrl ?? '', + pathPrefix: defaults.pathPrefix ?? '', + }, + }; +} + +function embeddingBackendDisplayName(backend: KloSetupEmbeddingBackend): string { + if (backend === 'openai') { + return 'OpenAI'; + } + return 'sentence-transformers'; +} + +async function persistEmbeddingConfig(projectDir: string, embeddings: KloProjectEmbeddingConfig): Promise { + const project = await loadKloProject({ projectDir }); + const config = markKloSetupStepComplete( + { + ...project.config, + ingest: { + ...project.config.ingest, + embeddings, + }, + scan: { + ...project.config.scan, + enrichment: { + ...project.config.scan.enrichment, + embeddings, + }, + }, + }, + 'embeddings', + ); + await writeFile(project.configPath, serializeKloProjectConfig(config), 'utf-8'); +} + +async function chooseCredentialRef( + backend: Extract, + args: KloSetupEmbeddingsArgs, + io: KloCliIo, + deps: KloSetupEmbeddingsDeps, +): Promise<{ status: 'ready'; ref: string; value: string } | { status: 'back' | 'missing-input' }> { + const env = deps.env ?? process.env; + if (args.embeddingApiKeyEnv) { + const ref = envCredentialReference(args.embeddingApiKeyEnv); + const value = resolveKloConfigReference(ref, env); + if (!value) { + io.stderr.write(`Missing embedding API key: ${args.embeddingApiKeyEnv} is not set.\n`); + return { status: 'missing-input' }; + } + return { status: 'ready', ref, value }; + } + if (args.embeddingApiKeyFile) { + const ref = `file:${args.embeddingApiKeyFile}`; + let value: string | undefined; + try { + value = resolveKloConfigReference(ref, env); + } catch { + value = undefined; + } + if (!value) { + io.stderr.write(`Missing embedding API key file: ${args.embeddingApiKeyFile}\n`); + return { status: 'missing-input' }; + } + return { status: 'ready', ref, value }; + } + if (args.inputMode === 'disabled') { + io.stderr.write('Missing embedding API key: pass --embedding-api-key-env or --embedding-api-key-file.\n'); + return { status: 'missing-input' }; + } + + const defaultEnv = DEFAULTS[backend].envName ?? 'EMBEDDING_API_KEY'; + const prompts = deps.prompts ?? createPromptAdapter(); + const choice = await prompts.select({ + message: `How should KLO find your ${embeddingBackendDisplayName(backend)} embedding API key?`, + options: [ + { value: 'env', label: `Use ${defaultEnv} from the environment` }, + { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') { + return { status: 'back' }; + } + if (choice === 'paste') { + io.stdout.write( + `${[ + `KLO will save the key in .klo/secrets/${backend}-api-key with local file permissions,`, + 'then write a file: reference in klo.yaml.', + ].join(' ')}\n`, + ); + const value = await prompts.password({ message: withTextInputNavigation(`${backend} embedding API key`) }); + if (value === undefined) { + return { status: 'back' }; + } + if (!value.trim()) { + return { status: 'missing-input' }; + } + const ref = await writeProjectLocalSecretReference({ + projectDir: args.projectDir, + fileName: `${backend}-api-key`, + value, + }); + return { status: 'ready', ref, value: value.trim() }; + } + + const ref = envCredentialReference(defaultEnv); + const value = resolveKloConfigReference(ref, env); + if (!value) { + io.stderr.write(`Missing embedding API key: ${defaultEnv} is not set.\n`); + return { status: 'missing-input' }; + } + return { status: 'ready', ref, value }; +} + +async function chooseEmbeddingBackend( + args: KloSetupEmbeddingsArgs, + deps: KloSetupEmbeddingsDeps, +): Promise { + if (args.embeddingBackend) { + return args.embeddingBackend; + } + if (args.inputMode === 'disabled') { + return LOCAL_EMBEDDING_BACKEND; + } + const choice = await (deps.prompts ?? createPromptAdapter()).select({ + message: `Which embedding option should KLO use?\n\n${EMBEDDING_OPTION_PROMPT_CONTEXT}`, + options: [ + { value: 'sentence-transformers', label: 'Local sentence-transformers embeddings' }, + { value: 'openai', label: 'OpenAI embeddings (recommended)' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'openai' || choice === 'sentence-transformers' || choice === 'back') { + return choice; + } + return 'back'; +} + +function localEmbeddingSetupMessage(message: string): string { + return [ + `Local embedding health check failed: ${message}`, + 'Local embeddings use the KLO Python daemon. KLO can call klo-daemon automatically when it is on PATH.', + `For repeated inference, start the HTTP daemon in another terminal with: ${LOCAL_EMBEDDING_DAEMON_COMMAND}`, + `From the KLO repo, use: ${LOCAL_EMBEDDING_DAEMON_DEV_COMMAND}`, + 'The first run may download the all-MiniLM-L6-v2 model, so it can take a minute.', + ].join('\n'); +} + +async function promptAfterLocalEmbeddingFailure( + deps: KloSetupEmbeddingsDeps, +): Promise<'retry' | Extract | 'back'> { + const choice = await (deps.prompts ?? createPromptAdapter()).select({ + message: 'Local embeddings are not reachable. Start the local KLO daemon, then retry.', + options: [ + { value: 'retry', label: 'Retry' }, + { value: 'openai', label: 'Use OpenAI embeddings' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'openai' || choice === 'back') { + return choice; + } + return 'retry'; +} + +function healthCheckStartText(backend: KloSetupEmbeddingBackend, model: string, dimensions: number): string { + if (backend === LOCAL_EMBEDDING_BACKEND) { + return [ + `Testing local sentence-transformers embeddings (${model}, ${dimensions} dimensions).`, + 'First run may take up to 60 seconds.', + ].join(' '); + } + return `Checking ${backend} embeddings (${model}, ${dimensions} dimensions).`; +} + +function startHealthCheckProgress(io: KloCliIo, message: string): HealthCheckProgress { + if (io.stdout.isTTY !== true) { + io.stdout.write(`${message}\n`); + const noop = () => undefined; + return { + succeed: noop, + fail: noop, + }; + } + + let frameIndex = 0; + let stopped = false; + const writeFrame = () => { + io.stdout.write(`${CLEAR_CURRENT_LINE}${HEALTH_CHECK_SPINNER_FRAMES[frameIndex]} ${message}`); + }; + writeFrame(); + const interval = setInterval(() => { + frameIndex = (frameIndex + 1) % HEALTH_CHECK_SPINNER_FRAMES.length; + writeFrame(); + }, HEALTH_CHECK_SPINNER_INTERVAL_MS); + + const stop = (finalMessage: string) => { + if (stopped) { + return; + } + stopped = true; + clearInterval(interval); + io.stdout.write(`${CLEAR_CURRENT_LINE}${finalMessage}\n`); + }; + + return { + succeed(message) { + stop(message); + }, + fail(message) { + stop(message); + }, + }; +} + +export async function runKloSetupEmbeddingsStep( + args: KloSetupEmbeddingsArgs, + io: KloCliIo, + deps: KloSetupEmbeddingsDeps = {}, +): Promise { + if (args.skipEmbeddings) { + io.stdout.write('Embeddings setup skipped.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } + + const project = await loadKloProject({ projectDir: args.projectDir }); + if ( + args.forcePrompt !== true && + hasCompletedEmbeddings(project.config) && + !args.embeddingBackend && + !args.embeddingApiKeyEnv && + !args.embeddingApiKeyFile + ) { + io.stdout.write(`Embeddings ready: yes (${project.config.ingest.embeddings.model})\n`); + return { status: 'ready', projectDir: args.projectDir }; + } + + const healthCheck = + deps.healthCheck ?? + ((config: KloEmbeddingConfig) => + runKloEmbeddingHealthCheck(config, { timeoutMs: LOCAL_EMBEDDING_HEALTH_TIMEOUT_MS })); + let selectedBackend: KloSetupEmbeddingBackend | undefined; + + while (true) { + if (!selectedBackend) { + const backend = await chooseEmbeddingBackend(args, deps); + if (backend === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + selectedBackend = backend; + } + + const defaults = DEFAULTS[selectedBackend]; + const model = defaults.model; + const dimensions = defaults.dimensions; + let credentialRef: string | undefined; + let credentialValue: string | undefined; + + if (selectedBackend === 'openai') { + const credential = await chooseCredentialRef(selectedBackend, args, io, deps); + if (credential.status === 'back' && !args.embeddingBackend && args.inputMode !== 'disabled') { + selectedBackend = undefined; + continue; + } + if (credential.status !== 'ready') { + return { status: credential.status, projectDir: args.projectDir }; + } + credentialRef = credential.ref; + credentialValue = credential.value; + } + + const healthConfig = buildHealthConfig({ + backend: selectedBackend, + model, + dimensions, + credentialValue, + }); + const progress = startHealthCheckProgress(io, healthCheckStartText(selectedBackend, model, dimensions)); + let health: KloEmbeddingHealthCheckResult; + try { + health = await healthCheck(healthConfig); + } catch (error) { + progress.fail('Embedding test failed'); + throw error; + } + if (health.ok) { + progress.succeed(`Embedding test passed (${model}, ${dimensions} dimensions)`); + await persistEmbeddingConfig( + args.projectDir, + buildProjectEmbeddingConfig({ + backend: selectedBackend, + model, + dimensions, + credentialRef, + }), + ); + io.stdout.write(`Embeddings ready: yes (${model}, ${dimensions} dimensions)\n`); + return { status: 'ready', projectDir: args.projectDir }; + } + + progress.fail('Embedding test failed'); + io.stderr.write( + selectedBackend === 'sentence-transformers' + ? `${localEmbeddingSetupMessage(health.message)}\n` + : `Embedding health check failed: ${health.message}\n`, + ); + if (args.inputMode === 'disabled') { + return { status: 'failed', projectDir: args.projectDir }; + } + if (selectedBackend !== 'sentence-transformers' && (args.embeddingApiKeyEnv || args.embeddingApiKeyFile)) { + return { status: 'failed', projectDir: args.projectDir }; + } + const nextAction = + selectedBackend === 'sentence-transformers' ? await promptAfterLocalEmbeddingFailure(deps) : 'retry'; + if (nextAction === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + if (nextAction === 'openai') { + selectedBackend = nextAction; + } + } +} diff --git a/packages/cli/src/setup-interrupt.test.ts b/packages/cli/src/setup-interrupt.test.ts new file mode 100644 index 00000000..5bc2fb90 --- /dev/null +++ b/packages/cli/src/setup-interrupt.test.ts @@ -0,0 +1,90 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import { + KloSetupExitError, + withSetupInterruptConfirmation, + type SetupInterruptTracker, +} from './setup-interrupt.js'; + +const CANCEL = Symbol('cancel'); + +function makeTracker(ctrlCValues: boolean[]): SetupInterruptTracker { + return { + track: vi.fn((run) => run()), + wasCtrlC: vi.fn(() => ctrlCValues.shift() ?? false), + }; +} + +describe('setup interrupt confirmation', () => { + const originalIsTTY = process.stdin.isTTY; + + afterEach(() => { + Object.defineProperty(process.stdin, 'isTTY', { configurable: true, value: originalIsTTY }); + }); + + it('fails before opening a prompt when interactive setup has no tty', async () => { + Object.defineProperty(process.stdin, 'isTTY', { configurable: true, value: false }); + const prompt = vi.fn(async () => 'continued'); + + await expect(withSetupInterruptConfirmation(prompt)).rejects.toThrow( + 'Interactive setup requires a terminal. Re-run this command in a TTY, or pass --no-input with the required options.', + ); + + expect(prompt).not.toHaveBeenCalled(); + }); + + it('asks before exiting on Ctrl+C and reruns the active prompt when declined', async () => { + const prompt = vi.fn(async () => (prompt.mock.calls.length === 1 ? CANCEL : 'continued')); + const confirmExit = vi.fn(async () => false); + + await expect( + withSetupInterruptConfirmation(prompt, { + confirmExit, + isCancel: (value): value is symbol => value === CANCEL, + tracker: makeTracker([true]), + }), + ).resolves.toBe('continued'); + + expect(prompt).toHaveBeenCalledTimes(2); + expect(confirmExit).toHaveBeenCalledTimes(1); + }); + + it('exits immediately when the confirmation is accepted', async () => { + const prompt = vi.fn(async () => CANCEL); + + await expect( + withSetupInterruptConfirmation(prompt, { + confirmExit: vi.fn(async () => true), + isCancel: (value): value is symbol => value === CANCEL, + tracker: makeTracker([true]), + }), + ).rejects.toBeInstanceOf(KloSetupExitError); + }); + + it('keeps non-Ctrl+C cancellation available for Back and Escape flows', async () => { + const prompt = vi.fn(async () => CANCEL); + const confirmExit = vi.fn(async () => true); + + await expect( + withSetupInterruptConfirmation(prompt, { + confirmExit, + isCancel: (value): value is symbol => value === CANCEL, + tracker: makeTracker([false]), + }), + ).resolves.toBe(CANCEL); + + expect(confirmExit).not.toHaveBeenCalled(); + }); + + it('exits immediately when Ctrl+C is pressed again at the confirmation prompt', async () => { + const prompt = vi.fn(async () => CANCEL); + + await expect( + withSetupInterruptConfirmation(prompt, { + confirmExit: vi.fn(async () => CANCEL), + isCancel: (value): value is symbol => value === CANCEL, + tracker: makeTracker([true]), + }), + ).rejects.toBeInstanceOf(KloSetupExitError); + }); +}); diff --git a/packages/cli/src/setup-interrupt.ts b/packages/cli/src/setup-interrupt.ts new file mode 100644 index 00000000..715f2e07 --- /dev/null +++ b/packages/cli/src/setup-interrupt.ts @@ -0,0 +1,90 @@ +import { stdin } from 'node:process'; +import type { Key } from 'node:readline'; +import { cancel, confirm, isCancel as isClackCancel } from '@clack/prompts'; + +export class KloSetupExitError extends Error { + constructor() { + super('KLO setup exit requested'); + this.name = 'KloSetupExitError'; + } +} + +export interface SetupInterruptTracker { + track(run: () => Promise): Promise; + wasCtrlC(): boolean; +} + +interface SetupInterruptOptions { + confirmExit?: () => Promise; + isCancel?: (value: unknown) => value is symbol; + tracker?: SetupInterruptTracker; +} + +const NON_INTERACTIVE_SETUP_MESSAGE = + 'Interactive setup requires a terminal. Re-run this command in a TTY, or pass --no-input with the required options.'; + +function createSetupInterruptTracker(input: NodeJS.ReadStream = stdin): SetupInterruptTracker { + let ctrlCPressed = false; + const onKeypress = (char: string | undefined, key: Key) => { + if (char === '\x03' || key.sequence === '\x03') { + ctrlCPressed = true; + } + }; + + return { + async track(run) { + ctrlCPressed = false; + input.on('keypress', onKeypress); + try { + return await run(); + } finally { + input.off('keypress', onKeypress); + } + }, + wasCtrlC() { + return ctrlCPressed; + }, + }; +} + +async function defaultConfirmExit(): Promise { + return await confirm({ + message: 'Exit setup wizard?', + active: 'Yes, exit', + inactive: 'No, continue setup', + initialValue: false, + }); +} + +export function isKloSetupExitError(error: unknown): error is KloSetupExitError { + return error instanceof KloSetupExitError; +} + +export async function withSetupInterruptConfirmation( + prompt: () => Promise, + options: SetupInterruptOptions = {}, +): Promise { + if (!options.tracker && stdin.isTTY !== true) { + throw new Error(NON_INTERACTIVE_SETUP_MESSAGE); + } + + const isCancel = options.isCancel ?? isClackCancel; + const tracker = options.tracker ?? createSetupInterruptTracker(); + const confirmExit = options.confirmExit ?? defaultConfirmExit; + + while (true) { + const value = await tracker.track(prompt); + if (!isCancel(value)) { + return value; + } + if (!tracker.wasCtrlC()) { + return value; + } + + const shouldExit = await confirmExit(); + if (isCancel(shouldExit) || shouldExit === true) { + cancel('Setup cancelled.'); + throw new KloSetupExitError(); + } + } +} diff --git a/packages/cli/src/setup-models.test.ts b/packages/cli/src/setup-models.test.ts new file mode 100644 index 00000000..34396966 --- /dev/null +++ b/packages/cli/src/setup-models.test.ts @@ -0,0 +1,679 @@ +import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { initKloProject, parseKloProjectConfig } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + BUNDLED_ANTHROPIC_MODELS, + fetchAnthropicModels, + type KloSetupModelPromptAdapter, + runKloSetupAnthropicModelStep, +} from './setup-models.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: true, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function makePromptAdapter(options: { + selectValues?: string[]; + credentialChoice?: string; + modelChoice?: string; + textValues?: string[]; + passwordValue?: string; + passwordValues?: Array; +}): KloSetupModelPromptAdapter { + const selectValues = [...(options.selectValues ?? [])]; + const textValues = [...(options.textValues ?? [])]; + const passwordValues = [...(options.passwordValues ?? [])]; + return { + select: vi.fn(async ({ message }) => { + const nextValue = selectValues.shift(); + if (nextValue) { + return nextValue; + } + if (message.includes('Anthropic API key')) { + return options.credentialChoice ?? 'env'; + } + return options.modelChoice ?? 'claude-sonnet-4-6'; + }), + text: vi.fn(async () => textValues.shift() ?? ''), + password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : options.passwordValue ?? 'sk-ant-pasted')), + cancel: vi.fn(), + }; +} + +describe('setup Anthropic model step', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-models-')); + await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('does not expose Claude Sonnet 4 or Claude Opus 4 as selectable Anthropic models', async () => { + const fetchModels = vi.fn( + async () => + new Response( + JSON.stringify({ + data: [ + { id: 'claude-sonnet-4', display_name: 'Claude Sonnet 4' }, + { id: 'claude-opus-4', display_name: 'Claude Opus 4' }, + { id: 'claude-sonnet-4-6', display_name: 'Claude Sonnet 4.6' }, + { id: 'claude-opus-4-6', display_name: 'Claude Opus 4.6' }, + { id: 'claude-haiku-4-5', display_name: 'Claude Haiku 4.5' }, + ], + }), + { status: 200 }, + ), + ); + + await expect(fetchAnthropicModels('sk-ant-test', fetchModels)).resolves.toEqual([ + { id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }, + { id: 'claude-opus-4-6', label: 'Claude Opus 4.6', recommended: false }, + { id: 'claude-haiku-4-5', label: 'Claude Haiku 4.5', recommended: false }, + ]); + expect(BUNDLED_ANTHROPIC_MODELS.map((model) => model.id)).not.toEqual( + expect.arrayContaining(['claude-sonnet-4', 'claude-opus-4']), + ); + }); + + it('filters Claude Sonnet 4 and Claude Opus 4 from Anthropic model prompt choices', async () => { + const prompts = makePromptAdapter({ selectValues: ['env', 'back', 'back'] }); + + await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + listModels: vi.fn(async () => [ + { id: 'claude-sonnet-4', label: 'Claude Sonnet 4', recommended: true }, + { id: 'claude-opus-4', label: 'Claude Opus 4', recommended: false }, + { id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }, + { id: 'claude-opus-4-6', label: 'Claude Opus 4.6', recommended: false }, + { id: 'claude-haiku-4-5', label: 'Claude Haiku 4.5', recommended: false }, + ]), + }, + ); + + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('Which Anthropic model should KLO use?'), + options: [ + { value: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6 (recommended)' }, + { value: 'claude-opus-4-6', label: 'Claude Opus 4.6' }, + { value: 'claude-haiku-4-5', label: 'Claude Haiku 4.5' }, + { value: 'manual', label: 'Enter a model ID manually' }, + { value: 'back', label: 'Back' }, + ], + }), + ); + }); + + it('configures env credentials, selected model, prompt caching, and llm completion state', async () => { + const io = makeIo(); + const result = await runKloSetupAnthropicModelStep( + { + projectDir: tempDir, + inputMode: 'disabled', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }, + io.io, + { + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + healthCheck: vi.fn(async () => ({ ok: true as const })), + }, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.llm).toMatchObject({ + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, + }, + models: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: true }, + }); + expect(config.scan.enrichment.mode).toBe('llm'); + expect(config.setup?.completed_steps).toContain('llm'); + expect(io.stdout()).toContain('LLM ready: yes'); + expect(io.stdout()).not.toContain('sk-ant-test'); + }); + + it('resolves --anthropic-api-key-file for health checks and stores a file reference', async () => { + const io = makeIo(); + const secretPath = join(tempDir, 'anthropic-api-key'); + await writeFile(secretPath, 'sk-ant-file', 'utf-8'); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKloSetupAnthropicModelStep( + { + projectDir: tempDir, + inputMode: 'disabled', + anthropicApiKeyFile: secretPath, + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }, + io.io, + { env: {}, healthCheck }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledWith( + expect.objectContaining({ + anthropic: { apiKey: 'sk-ant-file' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + }), + ); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.llm).toMatchObject({ + provider: { + backend: 'anthropic', + anthropic: { api_key: `file:${secretPath}` }, + }, + models: { default: 'claude-sonnet-4-6' }, + }); + expect(config.setup?.completed_steps).toContain('llm'); + expect(io.stdout()).not.toContain('sk-ant-file'); + }); + + it('returns missing-input when --anthropic-api-key-file points to a missing file', async () => { + const io = makeIo(); + const missingSecretPath = join(tempDir, 'missing-anthropic-api-key'); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKloSetupAnthropicModelStep( + { + projectDir: tempDir, + inputMode: 'disabled', + anthropicApiKeyFile: missingSecretPath, + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }, + io.io, + { env: {}, healthCheck }, + ); + + expect(result.status).toBe('missing-input'); + expect(healthCheck).not.toHaveBeenCalled(); + expect(io.stderr()).toContain(`Missing Anthropic API key file: ${missingSecretPath}`); + }); + + it('does not recommend skipping when non-interactive setup is missing an Anthropic credential source', async () => { + const io = makeIo(); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'disabled', skipLlm: false }, + io.io, + ); + + expect(result.status).toBe('missing-input'); + expect(io.stderr()).toContain( + 'Missing Anthropic API key: pass --anthropic-api-key-env or --anthropic-api-key-file.', + ); + expect(io.stderr()).not.toContain('--skip-llm'); + }); + + it('does not recommend skipping when non-interactive setup is missing an Anthropic model', async () => { + const io = makeIo(); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKloSetupAnthropicModelStep( + { + projectDir: tempDir, + inputMode: 'disabled', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + skipLlm: false, + }, + io.io, + { env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, healthCheck }, + ); + + expect(result.status).toBe('missing-input'); + expect(healthCheck).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('Missing Anthropic model: pass --anthropic-model.'); + expect(io.stderr()).not.toContain('--skip-llm'); + }); + + it('writes pasted keys to .klo/secrets and never prints the key', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + credentialChoice: 'paste', + modelChoice: 'claude-sonnet-4-6', + passwordValue: 'sk-ant-pasted', + }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: {}, + listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), + healthCheck: vi.fn(async () => ({ ok: true as const })), + }, + ); + + expect(result.status).toBe('ready'); + await expect(readFile(join(tempDir, '.klo/secrets/anthropic-api-key'), 'utf-8')).resolves.toBe('sk-ant-pasted\n'); + if (process.platform !== 'win32') { + expect((await stat(join(tempDir, '.klo/secrets/anthropic-api-key'))).mode & 0o777).toBe(0o600); + } + const yaml = await readFile(join(tempDir, 'klo.yaml'), 'utf-8'); + expect(yaml).toContain('api_key: file:'); + expect(yaml).not.toContain('sk-ant-pasted'); + expect(io.stdout()).not.toContain('sk-ant-pasted'); + }); + + it('opens pasted key entry directly and tells users Escape goes back', async () => { + const prompts = makePromptAdapter({ + selectValues: ['paste', 'claude-sonnet-4-6'], + passwordValue: 'sk-ant-pasted', + }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { + prompts, + env: {}, + listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), + healthCheck: vi.fn(async () => ({ ok: true as const })), + }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.select).not.toHaveBeenCalledWith(expect.objectContaining({ message: 'Paste Anthropic API key now?' })); + expect(prompts.password).toHaveBeenCalledWith({ + message: 'Anthropic API key\nPress Escape to go back.\n', + }); + }); + + it('does not offer skipping while choosing an Anthropic credential source', async () => { + const prompts = makePromptAdapter({ credentialChoice: 'back' }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { prompts, env: {} }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('How should KLO find your Anthropic API key?'), + options: expect.not.arrayContaining([expect.objectContaining({ value: 'skip' })]), + }), + ); + }); + + it('explains why KLO asks for an Anthropic API key', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ credentialChoice: 'back' }); + const expectedPromptMessage = [ + 'How should KLO find your Anthropic API key?', + '', + [ + 'KLO uses the key to verify Anthropic model access now and to run ingest agents that turn schemas, SQL,', + 'BI metadata, and docs into semantic-layer sources and wiki context. klo.yaml stores an env: or file:', + 'reference, not the raw key.', + ].join(' '), + ].join('\n'); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { prompts, env: {} }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expectedPromptMessage, + }), + ); + expect(io.stdout()).not.toContain('KLO uses the key'); + }); + + it('does not offer skipping while choosing an Anthropic model', async () => { + const prompts = makePromptAdapter({ selectValues: ['env', 'back', 'back'] }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), + }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('Which Anthropic model should KLO use?'), + options: expect.not.arrayContaining([expect.objectContaining({ value: 'skip' })]), + }), + ); + }); + + it('explains why KLO asks for an Anthropic model', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ credentialChoice: 'env', modelChoice: 'claude-sonnet-4-6' }); + const expectedPromptMessage = [ + 'Which Anthropic model should KLO use?', + '', + [ + 'KLO uses this as the default model for ingest agents that turn schemas, SQL, BI metadata, and docs', + 'into semantic-layer sources and wiki context.', + ].join(' '), + ].join('\n'); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), + healthCheck: vi.fn(async () => ({ ok: true as const })), + }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expectedPromptMessage, + }), + ); + expect(io.stdout()).not.toContain('KLO uses this as the default model'); + expect(io.stdout()).not.toContain('Setup verifies the selected model now'); + }); + + it('uses the bundled fallback registry when live discovery fails', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ credentialChoice: 'env', modelChoice: 'claude-sonnet-4-6' }); + + await expect( + runKloSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'auto', skipLlm: false }, io.io, { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + listModels: vi.fn(async () => { + throw new Error('network unavailable'); + }), + healthCheck: vi.fn(async () => ({ ok: true as const })), + }), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(io.stderr()).toContain('Could not fetch live Anthropic models. Showing bundled defaults.'); + }); + + it('shows bundled model choices when live discovery fails', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['env', 'manual'], textValues: [''] }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + listModels: vi.fn(async () => { + throw new Error('network unavailable'); + }), + healthCheck: vi.fn(async () => ({ ok: true as const })), + }, + ); + + expect(result.status).toBe('missing-input'); + expect(BUNDLED_ANTHROPIC_MODELS.length).toBeGreaterThan(0); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('Which Anthropic model should KLO use?'), + options: expect.arrayContaining([ + { value: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6 (recommended)' }, + ]), + }), + ); + expect(prompts.text).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Anthropic model ID\nPress Escape to go back.\n', + placeholder: 'claude-sonnet-4-6', + }), + ); + }); + + it('reports invalid Anthropic API keys during live discovery instead of showing bundled defaults', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['env', 'back'] }); + const fetchModels = vi.fn( + async () => new Response(JSON.stringify({ error: { message: 'invalid x-api-key' } }), { status: 401 }), + ); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-invalid' }, // pragma: allowlist secret + fetch: fetchModels, + healthCheck, + }, + ); + + expect(result.status).toBe('back'); + expect(fetchModels).toHaveBeenCalledTimes(1); + expect(healthCheck).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('Anthropic API key is invalid or unauthorized'); + expect(io.stderr()).toContain('Choose a different credential source or Back.'); + expect(io.stderr()).not.toContain('Could not fetch live Anthropic models. Showing bundled defaults.'); + expect(io.stderr()).not.toContain('sk-ant-invalid'); + }); + + it('does not persist llm completion when the health check fails', async () => { + const io = makeIo(); + const result = await runKloSetupAnthropicModelStep( + { + projectDir: tempDir, + inputMode: 'disabled', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }, + io.io, + { + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + healthCheck: vi.fn(async () => ({ ok: false as const, message: '401 invalid x-api-key [redacted]' })), + }, + ); + + expect(result.status).toBe('failed'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.completed_steps ?? []).not.toContain('llm'); + expect(io.stderr()).toContain('Anthropic model health check failed: 401 invalid x-api-key [redacted]'); + expect(io.stderr()).not.toContain('sk-ant-test'); + }); + + it('re-prompts after an interactive health-check failure and saves after retry success', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['env', 'claude-haiku-3-5', 'env', 'claude-sonnet-4-6'], + }); + const healthCheck = vi + .fn() + .mockResolvedValueOnce({ ok: false as const, message: 'model not found' }) + .mockResolvedValueOnce({ ok: true as const }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + listModels: vi.fn(async () => [ + { id: 'claude-haiku-3-5', label: 'Claude Haiku 3.5', recommended: false }, + { id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }, + ]), + healthCheck, + }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledTimes(2); + expect(prompts.select).toHaveBeenCalledTimes(4); + expect(io.stderr()).toContain('Anthropic model health check failed: model not found'); + expect(io.stderr()).toContain('Choose a different credential source or model, or Back.'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.llm.models.default).toBe('claude-sonnet-4-6'); + expect(config.setup?.completed_steps).toContain('llm'); + expect(io.stderr()).not.toContain('sk-ant-test'); + }); + + it('leaves setup incomplete when skipped', async () => { + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'disabled', skipLlm: true }, + makeIo().io, + ); + + expect(result.status).toBe('skipped'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.completed_steps ?? []).not.toContain('llm'); + }); + + it('returns back without writing config when Back is selected', async () => { + const prompts = makePromptAdapter({ credentialChoice: 'back' }); + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { prompts, env: {} }, + ); + + expect(result.status).toBe('back'); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.llm.provider.backend).toBe('none'); + }); + + it('returns from model selection Back to credential selection instead of exiting setup', async () => { + const prompts = makePromptAdapter({ + selectValues: ['paste', 'back', 'back'], + passwordValue: 'sk-ant-pasted', + }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { + prompts, + env: {}, + listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), + healthCheck: vi.fn(async () => ({ ok: true as const })), + }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ + message: expect.stringContaining('How should KLO find your Anthropic API key?'), + }), + ); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.llm.provider.backend).toBe('none'); + }); + + it('returns from pasted key entry Escape to credential selection and can use env credentials', async () => { + const prompts = makePromptAdapter({ + selectValues: ['paste', 'env', 'claude-sonnet-4-6'], + passwordValues: [undefined], + }); + + const result = await runKloSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { + prompts, + env: { ANTHROPIC_API_KEY: 'sk-ant-env' }, // pragma: allowlist secret + listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), + healthCheck: vi.fn(async () => ({ ok: true as const })), + }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.password).toHaveBeenCalledWith({ + message: 'Anthropic API key\nPress Escape to go back.\n', + }); + await expect(readFile(join(tempDir, '.klo/secrets/anthropic-api-key'), 'utf-8')).rejects.toMatchObject({ + code: 'ENOENT', + }); + const config = parseKloProjectConfig(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')); + expect(config.llm.provider).toMatchObject({ + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, + }); + }); + + it('preserves already completed llm setup when no model args request changes', async () => { + await mkdir(join(tempDir, '.klo'), { recursive: true }); + await initKloProject({ projectDir: tempDir, projectName: 'warehouse', force: true }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: warehouse', + 'setup:', + ' database_connection_ids: []', + ' completed_steps:', + ' - project', + ' - llm', + 'connections: {}', + 'llm:', + ' provider:', + ' backend: anthropic', + ' anthropic:', + ' api_key: env:ANTHROPIC_API_KEY', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: deterministic', + ' model: deterministic', + ' dimensions: 8', + ].join('\n'), + 'utf-8', + ); + + const healthCheck = vi.fn(async () => ({ ok: true as const })); + await expect( + runKloSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'disabled', skipLlm: false }, makeIo().io, { + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + healthCheck, + }), + ).resolves.toMatchObject({ status: 'ready' }); + expect(healthCheck).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts new file mode 100644 index 00000000..afb05e02 --- /dev/null +++ b/packages/cli/src/setup-models.ts @@ -0,0 +1,438 @@ +import { writeFile } from 'node:fs/promises'; +import { cancel, isCancel, password, select, text } from '@clack/prompts'; +import { resolveKloConfigReference } from '@klo/context/core'; +import { + type KloProjectConfig, + type KloProjectLlmConfig, + loadKloProject, + markKloSetupStepComplete, + serializeKloProjectConfig, +} from '@klo/context/project'; +import { type KloLlmConfig, type KloLlmHealthCheckResult, runKloLlmHealthCheck } from '@klo/llm'; +import type { KloCliIo } from './cli-runtime.js'; +import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { envCredentialReference, writeProjectLocalSecretReference } from './setup-secrets.js'; + +export interface KloSetupModelArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + anthropicApiKeyEnv?: string; + anthropicApiKeyFile?: string; + anthropicModel?: string; + forcePrompt?: boolean; + showPromptInstructions?: boolean; + skipLlm: boolean; +} + +export type KloSetupModelResult = + | { status: 'ready'; projectDir: string } + | { status: 'skipped'; projectDir: string } + | { status: 'back'; projectDir: string } + | { status: 'missing-input'; projectDir: string } + | { status: 'failed'; projectDir: string }; + +export interface AnthropicModelChoice { + id: string; + label: string; + recommended: boolean; +} + +export interface KloSetupModelPromptAdapter { + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + text(options: { message: string; placeholder?: string }): Promise; + password(options: { message: string }): Promise; + cancel(message: string): void; +} + +export interface KloSetupModelDeps { + env?: NodeJS.ProcessEnv; + fetch?: typeof fetch; + prompts?: KloSetupModelPromptAdapter; + listModels?: (apiKey: string) => Promise; + healthCheck?: (config: KloLlmConfig) => Promise; +} + +export const BUNDLED_ANTHROPIC_MODEL_REGISTRY_VERSION = '2026-05-07'; + +export const BUNDLED_ANTHROPIC_MODELS: AnthropicModelChoice[] = [ + { id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }, + { id: 'claude-opus-4-6', label: 'Claude Opus 4.6', recommended: false }, + { id: 'claude-haiku-4-5', label: 'Claude Haiku 4.5', recommended: false }, +]; + +const HIDDEN_ANTHROPIC_MODEL_PATTERNS = [ + /^claude-sonnet-4$/i, + /^claude-opus-4$/i, + /^Claude Sonnet 4$/i, + /^Claude Opus 4$/i, +]; + +const ANTHROPIC_CREDENTIAL_PROMPT_CONTEXT = + 'KLO uses the key to verify Anthropic model access now and to run ingest agents that turn schemas, SQL, ' + + 'BI metadata, and docs into semantic-layer sources and wiki context. klo.yaml stores an env: or file: ' + + 'reference, not the raw key.'; + +const ANTHROPIC_MODEL_PROMPT_CONTEXT = + 'KLO uses this as the default model for ingest agents that turn schemas, SQL, BI metadata, and docs ' + + 'into semantic-layer sources and wiki context.'; + +type AnthropicModelDiscoveryErrorReason = 'authentication' | 'http' | 'empty-response'; + +export class AnthropicModelDiscoveryError extends Error { + constructor( + message: string, + public readonly reason: AnthropicModelDiscoveryErrorReason, + public readonly status?: number, + ) { + super(message); + this.name = 'AnthropicModelDiscoveryError'; + } +} + +function isAnthropicModelAuthenticationError(error: unknown): error is AnthropicModelDiscoveryError { + return error instanceof AnthropicModelDiscoveryError && error.reason === 'authentication'; +} + +function isSelectableAnthropicModel(model: AnthropicModelChoice): boolean { + return !HIDDEN_ANTHROPIC_MODEL_PATTERNS.some((pattern) => pattern.test(model.id) || pattern.test(model.label)); +} + +type ChooseModelResult = + | { status: 'ready'; model: string } + | { status: 'back' | 'missing-input' | 'invalid-credential' }; + +function createPromptAdapter(): KloSetupModelPromptAdapter { + return { + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'back'; + } + return value; + }, + async text(options) { + const value = await withSetupInterruptConfirmation(() => + text({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : value; + }, + async password(options) { + const value = await withSetupInterruptConfirmation(() => + password({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : value; + }, + cancel(message) { + cancel(message); + }, + }; +} + +export async function fetchAnthropicModels( + apiKey: string, + fetchFn: typeof fetch = fetch, +): Promise { + const response = await fetchFn('https://api.anthropic.com/v1/models?limit=1000', { + headers: { + 'anthropic-version': '2023-06-01', + 'x-api-key': apiKey, + }, + }); + if (!response.ok) { + if (response.status === 401 || response.status === 403) { + throw new AnthropicModelDiscoveryError( + `Anthropic model discovery failed with HTTP ${response.status}`, + 'authentication', + response.status, + ); + } + throw new AnthropicModelDiscoveryError( + `Anthropic model discovery failed with HTTP ${response.status}`, + 'http', + response.status, + ); + } + const body = (await response.json()) as { data?: Array<{ id?: unknown; display_name?: unknown; type?: unknown }> }; + const models = (body.data ?? []) + .map((item) => ({ + id: typeof item.id === 'string' ? item.id : '', + label: typeof item.display_name === 'string' ? item.display_name : typeof item.id === 'string' ? item.id : '', + recommended: false, + })) + .filter((item) => item.id.startsWith('claude-')) + .filter(isSelectableAnthropicModel); + if (models.length === 0) { + throw new AnthropicModelDiscoveryError('Anthropic model discovery returned no Claude models', 'empty-response'); + } + const recommendedIndex = models.findIndex((item) => item.id.includes('sonnet')); + return models.map((item, index) => ({ ...item, recommended: index === Math.max(recommendedIndex, 0) })); +} + +function hasCompletedLlm(config: KloProjectConfig): boolean { + return ( + config.setup?.completed_steps.includes('llm') === true && + config.llm.provider.backend === 'anthropic' && + typeof config.llm.models.default === 'string' && + config.llm.models.default.length > 0 + ); +} + +function buildProjectLlmConfig( + existing: KloProjectLlmConfig, + credentialRef: string, + model: string, +): KloProjectLlmConfig { + return { + provider: { + backend: 'anthropic', + anthropic: { api_key: credentialRef }, + }, + models: { ...existing.models, default: model }, + promptCaching: { ...(existing.promptCaching ?? {}), enabled: true }, + }; +} + +function buildHealthConfig(credentialValue: string, model: string): KloLlmConfig { + return { + backend: 'anthropic', + anthropic: { apiKey: credentialValue }, + modelSlots: { default: model }, + promptCaching: { enabled: true }, + }; +} + +async function chooseCredentialRef( + args: KloSetupModelArgs, + io: KloCliIo, + deps: KloSetupModelDeps, +): Promise<{ status: 'ready'; ref: string; value: string } | { status: 'back' | 'missing-input' }> { + const env = deps.env ?? process.env; + if (args.anthropicApiKeyEnv) { + const ref = envCredentialReference(args.anthropicApiKeyEnv); + const value = resolveKloConfigReference(ref, env); + if (!value) { + io.stderr.write(`Missing Anthropic API key: ${args.anthropicApiKeyEnv} is not set.\n`); + return { status: 'missing-input' }; + } + return { status: 'ready', ref, value }; + } + if (args.anthropicApiKeyFile) { + const ref = `file:${args.anthropicApiKeyFile}`; + let value: string | undefined; + try { + value = resolveKloConfigReference(ref, env); + } catch { + value = undefined; + } + if (!value) { + io.stderr.write(`Missing Anthropic API key file: ${args.anthropicApiKeyFile}\n`); + return { status: 'missing-input' }; + } + return { status: 'ready', ref, value }; + } + if (args.inputMode === 'disabled') { + io.stderr.write('Missing Anthropic API key: pass --anthropic-api-key-env or --anthropic-api-key-file.\n'); + return { status: 'missing-input' }; + } + + const prompts = deps.prompts ?? createPromptAdapter(); + if (args.showPromptInstructions !== false) { + io.stdout.write( + 'Use Up/Down to move, Enter to confirm the current selection, choose Back to return to the previous step, Ctrl+C to exit.\n', + ); + } + while (true) { + const choice = await prompts.select({ + message: `How should KLO find your Anthropic API key?\n\n${ANTHROPIC_CREDENTIAL_PROMPT_CONTEXT}`, + options: [ + { value: 'env', label: 'Use ANTHROPIC_API_KEY from the environment' }, + { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') { + return { status: 'back' }; + } + if (choice === 'paste') { + io.stdout.write( + 'KLO will save the key in .klo/secrets/anthropic-api-key with local file permissions, then write a file: reference in klo.yaml.\n', + ); + const value = await prompts.password({ message: withTextInputNavigation('Anthropic API key') }); + if (value === undefined) { + continue; + } + if (!value.trim()) { + return { status: 'missing-input' }; + } + const ref = await writeProjectLocalSecretReference({ + projectDir: args.projectDir, + fileName: 'anthropic-api-key', + value, + }); + return { status: 'ready', ref, value: value.trim() }; + } + + const ref = envCredentialReference('ANTHROPIC_API_KEY'); + const value = resolveKloConfigReference(ref, env); + if (!value) { + io.stderr.write('Missing Anthropic API key: ANTHROPIC_API_KEY is not set.\n'); + return { status: 'missing-input' }; + } + return { status: 'ready', ref, value }; + } +} + +async function chooseModel( + args: KloSetupModelArgs, + credentialValue: string, + io: KloCliIo, + deps: KloSetupModelDeps, +): Promise { + if (args.anthropicModel) { + return { status: 'ready', model: args.anthropicModel }; + } + if (args.inputMode === 'disabled') { + io.stderr.write('Missing Anthropic model: pass --anthropic-model.\n'); + return { status: 'missing-input' }; + } + + let models: AnthropicModelChoice[]; + try { + models = deps.listModels + ? await deps.listModels(credentialValue) + : await fetchAnthropicModels(credentialValue, deps.fetch); + } catch (error) { + if (isAnthropicModelAuthenticationError(error)) { + const statusSuffix = error.status ? ` (HTTP ${error.status})` : ''; + io.stderr.write(`Anthropic API key is invalid or unauthorized${statusSuffix}. Check the key and try again.\n`); + return { status: 'invalid-credential' }; + } + io.stderr.write( + 'Could not fetch live Anthropic models. Showing bundled defaults. Setup will still test the selected model before saving it.\n', + ); + models = BUNDLED_ANTHROPIC_MODELS; + } + + const selectableModels = models.filter(isSelectableAnthropicModel); + const prompts = deps.prompts ?? createPromptAdapter(); + const modelOptions = [ + ...selectableModels.map((model) => ({ + value: model.id, + label: `${model.label || model.id}${model.recommended ? ' (recommended)' : ''}`, + })), + { value: 'manual', label: 'Enter a model ID manually' }, + { value: 'back', label: 'Back' }, + ]; + const choice = await prompts.select({ + message: `Which Anthropic model should KLO use?\n\n${ANTHROPIC_MODEL_PROMPT_CONTEXT}`, + options: modelOptions, + }); + if (choice === 'back') { + return { status: 'back' }; + } + if (choice === 'manual') { + const manual = await prompts.text({ + message: withTextInputNavigation('Anthropic model ID'), + placeholder: selectableModels.find((model) => model.recommended)?.id ?? selectableModels[0]?.id, + }); + if (manual === undefined) { + return { status: 'back' }; + } + return manual.trim() ? { status: 'ready', model: manual.trim() } : { status: 'missing-input' }; + } + return { status: 'ready', model: choice }; +} + +async function persistLlmConfig(projectDir: string, credentialRef: string, model: string): Promise { + const project = await loadKloProject({ projectDir }); + const config = markKloSetupStepComplete( + { + ...project.config, + llm: buildProjectLlmConfig(project.config.llm, credentialRef, model), + scan: { + ...project.config.scan, + enrichment: { + ...project.config.scan.enrichment, + mode: 'llm', + }, + }, + }, + 'llm', + ); + await writeFile(project.configPath, serializeKloProjectConfig(config), 'utf-8'); +} + +function buildInteractiveRetryArgs(args: KloSetupModelArgs): KloSetupModelArgs { + return { + projectDir: args.projectDir, + inputMode: args.inputMode, + ...(args.showPromptInstructions !== undefined ? { showPromptInstructions: args.showPromptInstructions } : {}), + skipLlm: args.skipLlm, + }; +} + +export async function runKloSetupAnthropicModelStep( + args: KloSetupModelArgs, + io: KloCliIo, + deps: KloSetupModelDeps = {}, +): Promise { + if (args.skipLlm) { + io.stdout.write('LLM setup skipped.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } + + const project = await loadKloProject({ projectDir: args.projectDir }); + if ( + args.forcePrompt !== true && + hasCompletedLlm(project.config) && + !args.anthropicApiKeyEnv && + !args.anthropicApiKeyFile && + !args.anthropicModel + ) { + io.stdout.write(`LLM ready: yes (${project.config.llm.models.default})\n`); + return { status: 'ready', projectDir: args.projectDir }; + } + + const healthCheck = deps.healthCheck ?? ((config: KloLlmConfig) => runKloLlmHealthCheck(config)); + let attemptArgs = args; + + while (true) { + const credential = await chooseCredentialRef(attemptArgs, io, deps); + if (credential.status !== 'ready') { + return { status: credential.status, projectDir: args.projectDir }; + } + + const model = await chooseModel(attemptArgs, credential.value, io, deps); + if (model.status === 'invalid-credential') { + if (args.inputMode === 'disabled') { + return { status: 'failed', projectDir: args.projectDir }; + } + io.stderr.write('Choose a different credential source or Back.\n'); + attemptArgs = buildInteractiveRetryArgs(args); + continue; + } + if (model.status === 'back' && !attemptArgs.anthropicApiKeyEnv && !attemptArgs.anthropicApiKeyFile) { + attemptArgs = buildInteractiveRetryArgs(args); + continue; + } + if (model.status !== 'ready') { + return { status: model.status, projectDir: args.projectDir }; + } + + const health = await healthCheck(buildHealthConfig(credential.value, model.model)); + if (health.ok) { + await persistLlmConfig(args.projectDir, credential.ref, model.model); + io.stdout.write(`LLM ready: yes (${model.model})\n`); + return { status: 'ready', projectDir: args.projectDir }; + } + + io.stderr.write(`Anthropic model health check failed: ${health.message}\n`); + if (args.inputMode === 'disabled') { + return { status: 'failed', projectDir: args.projectDir }; + } + io.stderr.write('Choose a different credential source or model, or Back.\n'); + attemptArgs = buildInteractiveRetryArgs(args); + } +} diff --git a/packages/cli/src/setup-project.test.ts b/packages/cli/src/setup-project.test.ts new file mode 100644 index 00000000..ffaedb5c --- /dev/null +++ b/packages/cli/src/setup-project.test.ts @@ -0,0 +1,335 @@ +import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { initKloProject, parseKloProjectConfig } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { type KloSetupProjectPromptAdapter, runKloSetupProjectStep } from './setup-project.js'; + +function makeIo(options: { stdoutIsTty?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: options.stdoutIsTty, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function makePromptAdapter(options: { choice?: string; choices?: string[]; textValue?: string; textValues?: string[] }) { + const choices = [...(options.choices ?? (options.choice ? [options.choice] : []))]; + const textValues = [...(options.textValues ?? (options.textValue !== undefined ? [options.textValue] : []))]; + return { + select: vi.fn(async () => choices.shift() ?? 'exit'), + text: vi.fn(async () => textValues.shift() ?? ''), + cancel: vi.fn(), + } satisfies KloSetupProjectPromptAdapter; +} + +describe('setup project step', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-project-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('creates a new project with --new and marks the project step complete', async () => { + const projectDir = join(tempDir, 'warehouse'); + const testIo = makeIo(); + + const result = await runKloSetupProjectStep( + { projectDir, mode: 'new', inputMode: 'disabled', yes: false }, + testIo.io, + ); + + expect(result.status).toBe('ready'); + expect(result.projectDir).toBe(projectDir); + const config = parseKloProjectConfig(await readFile(join(projectDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.completed_steps).toEqual(['project']); + await expect(stat(join(projectDir, '.git'))).resolves.toBeDefined(); + await expect(readFile(join(projectDir, '.klo/.gitignore'), 'utf-8')).resolves.toContain('secrets/'); + expect(testIo.stdout()).toContain(`Project: ${projectDir}`); + expect(testIo.stderr()).toBe(''); + }); + + it('loads an existing project with --existing and preserves existing setup metadata', async () => { + const projectDir = join(tempDir, 'warehouse'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - llm', + 'connections: {}', + ].join('\n'), + 'utf-8', + ); + + const result = await runKloSetupProjectStep( + { projectDir, mode: 'existing', inputMode: 'disabled', yes: false }, + makeIo().io, + ); + + expect(result.status).toBe('ready'); + const config = parseKloProjectConfig(await readFile(join(projectDir, 'klo.yaml'), 'utf-8')); + expect(config.setup).toEqual({ + database_connection_ids: ['warehouse'], + completed_steps: ['llm', 'project'], + }); + }); + + it('creates a missing auto-mode project only when --yes is present in no-input mode', async () => { + const projectDir = join(tempDir, 'warehouse'); + const rejectedIo = makeIo(); + const acceptedIo = makeIo(); + + await expect( + runKloSetupProjectStep({ projectDir, mode: 'auto', inputMode: 'disabled', yes: false }, rejectedIo.io), + ).resolves.toMatchObject({ status: 'missing-input' }); + expect(rejectedIo.stderr()).toContain('Missing setup choice: pass --new or --yes'); + await expect(stat(join(projectDir, 'klo.yaml'))).rejects.toThrow(); + + await expect( + runKloSetupProjectStep({ projectDir, mode: 'auto', inputMode: 'disabled', yes: true }, acceptedIo.io), + ).resolves.toMatchObject({ status: 'ready', projectDir }); + await expect(stat(join(projectDir, 'klo.yaml'))).resolves.toBeDefined(); + }); + + it('fails --existing clearly when klo.yaml is missing', async () => { + const projectDir = join(tempDir, 'warehouse'); + const testIo = makeIo(); + + await expect( + runKloSetupProjectStep({ projectDir, mode: 'existing', inputMode: 'disabled', yes: false }, testIo.io), + ).resolves.toMatchObject({ status: 'missing-input' }); + + expect(testIo.stderr()).toContain(`No existing KLO project found at ${projectDir}`); + }); + + it('prompts to use the current directory and creates a project in interactive auto mode', async () => { + const projectDir = join(tempDir, 'warehouse'); + const prompts = makePromptAdapter({ choice: 'current' }); + const testIo = makeIo({ stdoutIsTty: true }); + + const result = await runKloSetupProjectStep( + { projectDir, mode: 'auto', inputMode: 'auto', yes: false }, + testIo.io, + { prompts }, + ); + + expect(result.status).toBe('ready'); + expect(result.projectDir).toBe(projectDir); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Which KLO project should setup use?', + options: [ + expect.objectContaining({ value: 'current', label: 'Use current directory' }), + expect.objectContaining({ value: 'new', label: 'Create a new project folder' }), + expect.objectContaining({ value: 'exit', label: 'Exit' }), + ], + }), + ); + expect(prompts.text).not.toHaveBeenCalled(); + const config = parseKloProjectConfig(await readFile(join(projectDir, 'klo.yaml'), 'utf-8')); + expect(config.setup?.completed_steps).toEqual(['project']); + }); + + it('offers an absolute default destination for a new project folder', async () => { + const startDir = join(tempDir, 'start'); + const projectDir = join(startDir, 'klo-project'); + const prompts = makePromptAdapter({ choices: ['new', 'default', 'create'] }); + const testIo = makeIo({ stdoutIsTty: true }); + + const result = await runKloSetupProjectStep( + { projectDir: startDir, mode: 'auto', inputMode: 'auto', yes: false }, + testIo.io, + { prompts }, + ); + + expect(result.status).toBe('ready'); + expect(result.projectDir).toBe(projectDir); + expect(prompts.select).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + message: 'Where should KLO create the project?', + options: [ + expect.objectContaining({ + value: 'default', + label: `Create the default project folder: ${projectDir}`, + }), + expect.objectContaining({ value: 'custom', label: 'Enter a custom path' }), + expect.objectContaining({ value: 'back', label: 'Back' }), + ], + }), + ); + expect(prompts.select).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ message: `Create KLO project at ${projectDir}?` }), + ); + expect(prompts.text).not.toHaveBeenCalled(); + expect(result.status === 'ready' ? result.project.config.project : '').toBe('klo-project'); + expect(testIo.stdout()).toContain(`KLO will create:\n ${projectDir}`); + await expect(stat(join(projectDir, 'klo.yaml'))).resolves.toBeDefined(); + }); + + it('prompts for a custom path and resolves it inside the current setup directory', async () => { + const startDir = join(tempDir, 'start'); + const projectDir = join(startDir, 'analytics-klo'); + const prompts = makePromptAdapter({ choices: ['new', 'custom', 'create'], textValue: 'analytics-klo' }); + + const result = await runKloSetupProjectStep( + { projectDir: startDir, mode: 'auto', inputMode: 'auto', yes: false }, + makeIo({ stdoutIsTty: true }).io, + { prompts }, + ); + + expect(result.status).toBe('ready'); + expect(result.projectDir).toBe(projectDir); + expect(prompts.text).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Project folder path\nPress Escape to go back.\n', + placeholder: './analytics-klo, ~/analytics-klo, or /Users/you/projects/analytics-klo', + }), + ); + await expect(stat(join(projectDir, 'klo.yaml'))).resolves.toBeDefined(); + }); + + it('expands a custom home-directory path before creating a new project', async () => { + const startDir = join(tempDir, 'start'); + const homeDir = join(tempDir, 'home'); + const projectDir = join(homeDir, 'analytics-klo'); + const prompts = makePromptAdapter({ choices: ['new', 'custom', 'create'], textValue: '~/analytics-klo' }); + + const result = await runKloSetupProjectStep( + { projectDir: startDir, mode: 'auto', inputMode: 'auto', yes: false }, + makeIo({ stdoutIsTty: true }).io, + { prompts, homeDir }, + ); + + expect(result.status).toBe('ready'); + expect(result.projectDir).toBe(projectDir); + await expect(stat(join(projectDir, 'klo.yaml'))).resolves.toBeDefined(); + }); + + it('confirms a custom new project path and lets Back return to the project choice', async () => { + const startDir = join(tempDir, 'start'); + const homeDir = join(tempDir, 'home'); + const customProjectDir = join(homeDir, 'analytics-klo'); + const prompts = makePromptAdapter({ + choices: ['new', 'custom', 'back', 'exit'], + textValue: '~/analytics-klo', + }); + + const result = await runKloSetupProjectStep( + { projectDir: startDir, mode: 'auto', inputMode: 'auto', yes: false }, + makeIo({ stdoutIsTty: true }).io, + { prompts, homeDir }, + ); + + expect(result.status).toBe('cancelled'); + expect(result.projectDir).toBe(startDir); + expect(prompts.select).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ + message: `Create KLO project at ${customProjectDir}?`, + options: [ + expect.objectContaining({ value: 'create', label: 'Create project' }), + expect.objectContaining({ value: 'choose-another', label: 'Choose another folder' }), + expect.objectContaining({ value: 'back', label: 'Back' }), + ], + }), + ); + expect(prompts.select).toHaveBeenNthCalledWith( + 4, + expect.objectContaining({ message: 'Which KLO project should setup use?' }), + ); + await expect(stat(join(customProjectDir, 'klo.yaml'))).rejects.toThrow(); + }); + + it('rejects an empty new folder path without creating a project in the process cwd', async () => { + const startDir = join(tempDir, 'start'); + const prompts = makePromptAdapter({ choices: ['new', 'custom'], textValue: ' ' }); + const initProject = vi.fn(async () => { + throw new Error('initProject should not run for an empty path'); + }); + const testIo = makeIo({ stdoutIsTty: true }); + + await expect( + runKloSetupProjectStep( + { projectDir: startDir, mode: 'auto', inputMode: 'auto', yes: false }, + testIo.io, + { prompts, initProject }, + ), + ).resolves.toMatchObject({ status: 'missing-input', projectDir: startDir }); + + expect(initProject).not.toHaveBeenCalled(); + expect(testIo.stderr()).toContain( + 'Enter a relative path like ./analytics-klo, a home path like ~/analytics-klo, or an absolute path.', + ); + }); + + it('confirms before creating KLO files inside an existing non-empty folder', async () => { + const startDir = join(tempDir, 'start'); + const projectDir = join(startDir, 'analytics-klo'); + await mkdir(projectDir, { recursive: true }); + await writeFile(join(projectDir, 'README.md'), 'Existing project notes\n', 'utf-8'); + const prompts = makePromptAdapter({ choices: ['new', 'custom', 'use-existing'], textValue: 'analytics-klo' }); + + const result = await runKloSetupProjectStep( + { projectDir: startDir, mode: 'auto', inputMode: 'auto', yes: false }, + makeIo({ stdoutIsTty: true }).io, + { prompts }, + ); + + expect(result.status).toBe('ready'); + expect(result.projectDir).toBe(projectDir); + expect(prompts.select).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ + message: `That folder already exists and is not empty: ${projectDir}`, + options: expect.arrayContaining([ + expect.objectContaining({ value: 'use-existing', label: 'Yes, create KLO files there' }), + expect.objectContaining({ value: 'choose-another', label: 'Choose another folder' }), + ]), + }), + ); + await expect(readFile(join(projectDir, 'README.md'), 'utf-8')).resolves.toBe('Existing project notes\n'); + await expect(stat(join(projectDir, 'klo.yaml'))).resolves.toBeDefined(); + }); + + it('prompts to exit and returns cancelled in interactive auto mode', async () => { + const projectDir = join(tempDir, 'warehouse'); + const prompts = makePromptAdapter({ choice: 'exit' }); + + await expect( + runKloSetupProjectStep( + { projectDir, mode: 'auto', inputMode: 'auto', yes: false }, + makeIo({ stdoutIsTty: true }).io, + { prompts }, + ), + ).resolves.toMatchObject({ status: 'cancelled', projectDir }); + + expect(prompts.cancel).toHaveBeenCalledWith('Setup cancelled.'); + expect(prompts.text).not.toHaveBeenCalled(); + await expect(stat(join(projectDir, 'klo.yaml'))).rejects.toThrow(); + }); +}); diff --git a/packages/cli/src/setup-project.ts b/packages/cli/src/setup-project.ts new file mode 100644 index 00000000..5cc87a88 --- /dev/null +++ b/packages/cli/src/setup-project.ts @@ -0,0 +1,365 @@ +import { existsSync } from 'node:fs'; +import { mkdir, readdir, readFile, stat, writeFile } from 'node:fs/promises'; +import { homedir } from 'node:os'; +import { basename, join, resolve } from 'node:path'; +import { cancel, isCancel, select, text } from '@clack/prompts'; +import { + initKloProject, + type KloLocalProject, + loadKloProject, + markKloSetupStepComplete, + mergeKloSetupGitignoreEntries, + serializeKloProjectConfig, +} from '@klo/context/project'; +import type { KloCliIo } from './cli-runtime.js'; +import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; + +export type KloSetupProjectMode = 'auto' | 'new' | 'existing' | 'prompt-new'; +export type KloSetupInputMode = 'auto' | 'disabled'; + +export interface KloSetupProjectArgs { + projectDir: string; + mode: KloSetupProjectMode; + inputMode: KloSetupInputMode; + yes: boolean; + allowBack?: boolean; +} + +export type KloSetupProjectResult = + | { status: 'ready'; projectDir: string; project: KloLocalProject; confirmedCreation?: boolean } + | { status: 'back'; projectDir: string } + | { status: 'cancelled'; projectDir: string } + | { status: 'missing-input'; projectDir: string }; + +export interface KloSetupProjectPromptAdapter { + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + text(options: { message: string; placeholder?: string }): Promise; + cancel(message: string): void; +} + +export interface KloSetupProjectDeps { + prompts?: KloSetupProjectPromptAdapter; + initProject?: typeof initKloProject; + loadProject?: typeof loadKloProject; + homeDir?: string; +} + +type PromptProjectDirResult = + | { status: 'selected'; projectDir: string; confirmedCreation: boolean } + | { status: 'cancelled'; projectDir: string } + | { status: 'missing-input'; projectDir: string } + | { status: 'back'; projectDir: string }; + +const DEFAULT_NEW_PROJECT_FOLDER_NAME = 'klo-project'; + +function createClackSetupProjectPromptAdapter(): KloSetupProjectPromptAdapter { + return { + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'exit'; + } + return value; + }, + async text(options) { + const value = await withSetupInterruptConfirmation(() => + text({ ...options, message: withTextInputNavigation(options.message) }), + ); + if (isCancel(value)) { + return undefined; + } + return value; + }, + cancel(message) { + cancel(message); + }, + }; +} + +function hasProjectConfig(projectDir: string): boolean { + return existsSync(join(projectDir, 'klo.yaml')); +} + +function resolveFromProjectDir(projectDir: string, input: string, homeDir: string): string { + if (input === '~') { + return resolve(homeDir); + } + if (input.startsWith('~/') || input.startsWith('~\\')) { + return resolve(homeDir, input.slice(2)); + } + return resolve(projectDir, input); +} + +async function existingFolderState( + projectDir: string, +): Promise<'missing' | 'empty-directory' | 'non-empty-directory' | 'not-directory'> { + try { + const projectDirStat = await stat(projectDir); + if (!projectDirStat.isDirectory()) { + return 'not-directory'; + } + return (await readdir(projectDir)).length === 0 ? 'empty-directory' : 'non-empty-directory'; + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return 'missing'; + } + throw error; + } +} + +async function normalizeSetupGitignore(projectDir: string): Promise { + const gitignorePath = join(projectDir, '.klo/.gitignore'); + await mkdir(join(projectDir, '.klo'), { recursive: true }); + const current = existsSync(gitignorePath) ? await readFile(gitignorePath, 'utf-8') : ''; + await writeFile(gitignorePath, mergeKloSetupGitignoreEntries(current), 'utf-8'); +} + +async function persistProjectStep(project: KloLocalProject): Promise { + const config = markKloSetupStepComplete(project.config, 'project'); + await writeFile(project.configPath, serializeKloProjectConfig(config), 'utf-8'); + await normalizeSetupGitignore(project.projectDir); + return await loadKloProject({ projectDir: project.projectDir }); +} + +async function createProject(projectDir: string, deps: KloSetupProjectDeps): Promise { + const initProject = deps.initProject ?? initKloProject; + const initialized = await initProject({ projectDir, projectName: basename(projectDir) || 'klo-project' }); + return await persistProjectStep(initialized); +} + +async function loadExistingProject(projectDir: string, deps: KloSetupProjectDeps): Promise { + const loadProject = deps.loadProject ?? loadKloProject; + const project = await loadProject({ projectDir }); + return await persistProjectStep(project); +} + +function printProjectSummary(io: KloCliIo, projectDir: string): void { + io.stdout.write(`Project: ${projectDir}\n`); +} + +async function promptForNewProjectDir( + projectDir: string, + homeDir: string, + io: KloCliIo, + prompts: KloSetupProjectPromptAdapter, +): Promise { + const defaultProjectDir = join(projectDir, DEFAULT_NEW_PROJECT_FOLDER_NAME); + + while (true) { + io.stdout.write(`Relative paths are resolved from:\n ${projectDir}\n`); + io.stdout.write(`Home paths are resolved from:\n ${homeDir}\n`); + const destinationChoice = await prompts.select({ + message: 'Where should KLO create the project?', + options: [ + { value: 'default', label: `Create the default project folder: ${defaultProjectDir}` }, + { value: 'custom', label: 'Enter a custom path' }, + { value: 'back', label: 'Back' }, + ], + }); + + let selectedDir: string; + if (destinationChoice === 'back') { + return { status: 'back', projectDir }; + } + + if (destinationChoice === 'default') { + selectedDir = defaultProjectDir; + } else if (destinationChoice === 'custom') { + const rawSelectedDir = await prompts.text({ + message: withTextInputNavigation('Project folder path'), + placeholder: './analytics-klo, ~/analytics-klo, or /Users/you/projects/analytics-klo', + }); + if (rawSelectedDir === undefined) { + continue; + } + const trimmedSelectedDir = rawSelectedDir.trim(); + if (trimmedSelectedDir.length === 0) { + io.stderr.write( + 'Enter a relative path like ./analytics-klo, a home path like ~/analytics-klo, or an absolute path.\n', + ); + return { status: 'missing-input', projectDir }; + } + selectedDir = resolveFromProjectDir(projectDir, trimmedSelectedDir, homeDir); + } else { + return { status: 'cancelled', projectDir }; + } + + const state = await existingFolderState(selectedDir); + let confirmedCreation = false; + if (state === 'not-directory') { + io.stderr.write(`Project folder path exists and is not a directory: ${selectedDir}\n`); + return { status: 'missing-input', projectDir }; + } + if (state === 'non-empty-directory') { + const existingAction = await prompts.select({ + message: `That folder already exists and is not empty: ${selectedDir}`, + options: [ + { value: 'use-existing', label: 'Yes, create KLO files there' }, + { value: 'choose-another', label: 'Choose another folder' }, + { value: 'back', label: 'Back' }, + ], + }); + if (existingAction === 'choose-another') { + continue; + } + if (existingAction === 'back') { + return { status: 'back', projectDir }; + } + if (existingAction !== 'use-existing') { + return { status: 'cancelled', projectDir }; + } + confirmedCreation = true; + } + + io.stdout.write(`KLO will create:\n ${selectedDir}\n`); + if (state !== 'non-empty-directory') { + const createAction = await prompts.select({ + message: `Create KLO project at ${selectedDir}?`, + options: [ + { value: 'create', label: 'Create project' }, + { value: 'choose-another', label: 'Choose another folder' }, + { value: 'back', label: 'Back' }, + ], + }); + if (createAction === 'choose-another') { + continue; + } + if (createAction === 'back') { + return { status: 'back', projectDir }; + } + if (createAction !== 'create') { + return { status: 'cancelled', projectDir }; + } + confirmedCreation = true; + } + return { status: 'selected', projectDir: selectedDir, confirmedCreation }; + } +} + +export async function runKloSetupProjectStep( + args: KloSetupProjectArgs, + io: KloCliIo, + deps: KloSetupProjectDeps = {}, +): Promise { + const projectDir = resolve(args.projectDir); + const homeDir = deps.homeDir ?? homedir(); + const exists = hasProjectConfig(projectDir); + + if (args.mode === 'existing') { + if (!exists) { + io.stderr.write(`No existing KLO project found at ${projectDir}. Pass --new to create it.\n`); + return { status: 'missing-input', projectDir }; + } + const project = await loadExistingProject(projectDir, deps); + printProjectSummary(io, projectDir); + return { status: 'ready', projectDir, project }; + } + + if (args.mode === 'new') { + const project = await createProject(projectDir, deps); + printProjectSummary(io, projectDir); + return { status: 'ready', projectDir, project }; + } + + if (args.mode === 'prompt-new') { + if (args.inputMode === 'disabled') { + io.stderr.write('Missing new project folder: pass --new --project-dir to create a project without prompts.\n'); + return { status: 'missing-input', projectDir }; + } + if (!io.stdout.isTTY && !deps.prompts) { + io.stderr.write( + 'Missing new project folder: pass --new --project-dir to create a project outside an interactive terminal.\n', + ); + return { status: 'missing-input', projectDir }; + } + + const prompts = deps.prompts ?? createClackSetupProjectPromptAdapter(); + const selected = await promptForNewProjectDir(projectDir, homeDir, io, prompts); + if (selected.status === 'back') { + return args.allowBack ? { status: 'back', projectDir } : { status: 'cancelled', projectDir }; + } + if (selected.status !== 'selected') { + return selected; + } + + const project = await createProject(selected.projectDir, deps); + printProjectSummary(io, selected.projectDir); + return { + status: 'ready', + projectDir: selected.projectDir, + project, + confirmedCreation: selected.confirmedCreation, + }; + } + + if (exists) { + const project = await loadExistingProject(projectDir, deps); + printProjectSummary(io, projectDir); + return { status: 'ready', projectDir, project }; + } + + if (args.inputMode === 'disabled') { + if (!args.yes) { + io.stderr.write('Missing setup choice: pass --new or --yes to create a project in non-interactive setup.\n'); + return { status: 'missing-input', projectDir }; + } + const project = await createProject(projectDir, deps); + printProjectSummary(io, projectDir); + return { status: 'ready', projectDir, project }; + } + + if (!io.stdout.isTTY && !deps.prompts) { + io.stderr.write('Missing setup choice: pass --new or --yes to create a project outside an interactive terminal.\n'); + return { status: 'missing-input', projectDir }; + } + + const prompts = deps.prompts ?? createClackSetupProjectPromptAdapter(); + io.stdout.write( + 'Use Up/Down to move, Enter to confirm the current selection, choose Back to return to the previous step, Ctrl+C to exit.\n', + ); + while (true) { + const choice = await prompts.select({ + message: 'Which KLO project should setup use?', + options: [ + { value: 'current', label: 'Use current directory' }, + { value: 'new', label: 'Create a new project folder' }, + ...(args.allowBack ? [{ value: 'back', label: 'Back' }] : []), + ...(args.allowBack ? [] : [{ value: 'exit', label: 'Exit' }]), + ], + }); + + if (choice === 'back') { + return args.allowBack ? { status: 'back', projectDir } : { status: 'cancelled', projectDir }; + } + + if (choice === 'exit') { + prompts.cancel('Setup cancelled.'); + return { status: 'cancelled', projectDir }; + } + + let selectedDir = projectDir; + let confirmedCreation = false; + if (choice === 'new') { + const selected = await promptForNewProjectDir(projectDir, homeDir, io, prompts); + if (selected.status === 'back') { + continue; + } + if (selected.status !== 'selected') { + return selected; + } + selectedDir = selected.projectDir; + confirmedCreation = selected.confirmedCreation; + } + + if (choice !== 'current' && choice !== 'new') { + prompts.cancel('Setup cancelled.'); + return { status: 'cancelled', projectDir }; + } + + const project = await createProject(selectedDir, deps); + printProjectSummary(io, selectedDir); + return { status: 'ready', projectDir: selectedDir, project, confirmedCreation }; + } +} diff --git a/packages/cli/src/setup-ready-menu.test.ts b/packages/cli/src/setup-ready-menu.test.ts new file mode 100644 index 00000000..d3b90863 --- /dev/null +++ b/packages/cli/src/setup-ready-menu.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it, vi } from 'vitest'; +import { isKloSetupReady, runKloSetupReadyChangeMenu } from './setup-ready-menu.js'; +import type { KloSetupStatus } from './setup.js'; + +const readyStatus: KloSetupStatus = { + project: { path: '/tmp/revenue', ready: true }, + llm: { backend: 'anthropic', ready: true, model: 'claude-sonnet-4-6' }, + embeddings: { backend: 'openai', ready: true, model: 'text-embedding-3-small', dimensions: 1536 }, + databases: [{ connectionId: 'warehouse', ready: true }], + sources: [], + context: { ready: true, status: 'completed' }, + agents: [{ target: 'codex', scope: 'project', ready: true }], +}; + +describe('setup ready menu', () => { + it('recognizes a ready setup only when required sections are ready', () => { + expect(isKloSetupReady(readyStatus)).toBe(true); + expect(isKloSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false); + expect(isKloSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false); + expect(isKloSetupReady({ ...readyStatus, agents: [] })).toBe(false); + }); + + it('maps ready-project menu choices to setup sections', async () => { + const prompts = { select: vi.fn(async () => 'agents'), cancel: vi.fn() }; + + await expect(runKloSetupReadyChangeMenu(readyStatus, { prompts })).resolves.toEqual({ action: 'agents' }); + + expect(prompts.select).toHaveBeenCalledWith({ + message: 'KLO is already set up for /tmp/revenue. What would you like to change?', + options: [ + { value: 'models', label: 'Models' }, + { value: 'embeddings', label: 'Embeddings' }, + { value: 'databases', label: 'Primary sources' }, + { value: 'sources', label: 'Context sources' }, + { value: 'context', label: 'Rebuild KLO context' }, + { value: 'agents', label: 'Agent integration' }, + { value: 'exit', label: 'Exit' }, + ], + }); + }); +}); diff --git a/packages/cli/src/setup-ready-menu.ts b/packages/cli/src/setup-ready-menu.ts new file mode 100644 index 00000000..bbb13eec --- /dev/null +++ b/packages/cli/src/setup-ready-menu.ts @@ -0,0 +1,63 @@ +import { cancel, isCancel, select } from '@clack/prompts'; +import { withMenuOptionsSpacing } from './prompt-navigation.js'; +import type { KloSetupStatus } from './setup.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; + +export type KloSetupReadyAction = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents' | 'exit'; + +export interface KloSetupReadyMenuPromptAdapter { + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + cancel(message: string): void; +} + +export interface KloSetupReadyMenuDeps { + prompts?: KloSetupReadyMenuPromptAdapter; +} + +export function isKloSetupReady(status: KloSetupStatus): boolean { + return ( + status.project.ready && + status.llm.ready && + status.embeddings.ready && + status.databases.every((database) => database.ready) && + status.sources.every((source) => source.ready) && + status.context.ready && + status.agents.some((agent) => agent.ready) + ); +} + +function createPromptAdapter(): KloSetupReadyMenuPromptAdapter { + return { + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'exit'; + } + return String(value); + }, + cancel(message) { + cancel(message); + }, + }; +} + +export async function runKloSetupReadyChangeMenu( + status: KloSetupStatus, + deps: KloSetupReadyMenuDeps = {}, +): Promise<{ action: KloSetupReadyAction }> { + const prompts = deps.prompts ?? createPromptAdapter(); + const action = (await prompts.select({ + message: `KLO is already set up for ${status.project.name ?? status.project.path}. What would you like to change?`, + options: [ + { value: 'models', label: 'Models' }, + { value: 'embeddings', label: 'Embeddings' }, + { value: 'databases', label: 'Primary sources' }, + { value: 'sources', label: 'Context sources' }, + { value: 'context', label: 'Rebuild KLO context' }, + { value: 'agents', label: 'Agent integration' }, + { value: 'exit', label: 'Exit' }, + ], + })) as KloSetupReadyAction; + return { action }; +} diff --git a/packages/cli/src/setup-secrets.test.ts b/packages/cli/src/setup-secrets.test.ts new file mode 100644 index 00000000..151f7685 --- /dev/null +++ b/packages/cli/src/setup-secrets.test.ts @@ -0,0 +1,37 @@ +import { mkdtemp, readFile, rm, stat } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { envCredentialReference, writeProjectLocalSecretReference } from './setup-secrets.js'; + +describe('setup secrets', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-secrets-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('formats env credential references', () => { + expect(envCredentialReference('ANTHROPIC_API_KEY')).toBe('env:ANTHROPIC_API_KEY'); + }); + + it('writes project-local secrets with restricted permissions and returns a file reference', async () => { + const result = await writeProjectLocalSecretReference({ + projectDir: tempDir, + fileName: 'anthropic-api-key', + value: 'sk-ant-test', + }); + + expect(result).toBe(`file:${resolve(tempDir, '.klo/secrets/anthropic-api-key')}`); + await expect(readFile(join(tempDir, '.klo/secrets/anthropic-api-key'), 'utf-8')).resolves.toBe('sk-ant-test\n'); + + if (process.platform !== 'win32') { + const mode = (await stat(join(tempDir, '.klo/secrets/anthropic-api-key'))).mode & 0o777; + expect(mode).toBe(0o600); + } + }); +}); diff --git a/packages/cli/src/setup-secrets.ts b/packages/cli/src/setup-secrets.ts new file mode 100644 index 00000000..386210c4 --- /dev/null +++ b/packages/cli/src/setup-secrets.ts @@ -0,0 +1,25 @@ +import { chmod, mkdir, writeFile } from 'node:fs/promises'; +import { join, resolve } from 'node:path'; + +export function envCredentialReference(envName: string): string { + return `env:${envName}`; +} + +export interface WriteProjectLocalSecretReferenceOptions { + projectDir: string; + fileName: string; + value: string; +} + +export async function writeProjectLocalSecretReference( + options: WriteProjectLocalSecretReferenceOptions, +): Promise { + const secretsDir = resolve(options.projectDir, '.klo/secrets'); + const secretPath = join(secretsDir, options.fileName); + await mkdir(secretsDir, { recursive: true }); + await writeFile(secretPath, `${options.value.trim()}\n`, { encoding: 'utf-8', mode: 0o600 }); + if (process.platform !== 'win32') { + await chmod(secretPath, 0o600); + } + return `file:${secretPath}`; +} diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts new file mode 100644 index 00000000..e6b85ed2 --- /dev/null +++ b/packages/cli/src/setup-sources.test.ts @@ -0,0 +1,790 @@ +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + initKloProject, + type KloProjectConnectionConfig, + parseKloProjectConfig, + serializeKloProjectConfig, +} from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + runKloSetupSourcesStep, + type KloSetupSourcesDeps, + type KloSetupSourcesPromptAdapter, + type KloSetupSourceType, +} from './setup-sources.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: true, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +function prompts(values: { + multiselect?: string[][]; + select?: string[]; + text?: Array; +}): KloSetupSourcesPromptAdapter { + const multiselectValues = [...(values.multiselect ?? [])]; + const selectValues = [...(values.select ?? [])]; + const textValues = [...(values.text ?? [])]; + return { + multiselect: vi.fn(async () => multiselectValues.shift() ?? []), + select: vi.fn(async () => selectValues.shift() ?? 'skip'), + text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')), + cancel: vi.fn(), + log: vi.fn(), + }; +} + +function connectionNamePrompt(label: string): string { + return `Name this ${label} connection\nKLO will use this short name in commands and config. You can rename it now.`; +} + +function textInputPrompt(message: string): string { + const normalized = message.replace(/\n+$/, ''); + if (!normalized.includes('\n')) { + return `${normalized}\nPress Escape to go back.\n`; + } + const [title, ...bodyLines] = normalized.split('\n'); + return `${title}\n\n${bodyLines.join('\n')}\nPress Escape to go back.\n`; +} + +describe('setup sources step', () => { + let tempDir: string; + let projectDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-sources-')); + projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'sources' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + async function readConfig() { + return parseKloProjectConfig(await readFile(join(projectDir, 'klo.yaml'), 'utf-8')); + } + + async function addPrimarySource() { + const config = await readConfig(); + await writeFile( + join(projectDir, 'klo.yaml'), + serializeKloProjectConfig({ + ...config, + connections: { + ...config.connections, + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL', readonly: true }, + }, + setup: { + ...config.setup, + completed_steps: config.setup?.completed_steps ?? [], + database_connection_ids: ['warehouse'], + }, + }), + 'utf-8', + ); + } + + async function addConnection(connectionId: string, connection: KloProjectConnectionConfig) { + const config = await readConfig(); + await writeFile( + join(projectDir, 'klo.yaml'), + serializeKloProjectConfig({ + ...config, + connections: { + ...config.connections, + [connectionId]: connection, + }, + }), + 'utf-8', + ); + } + + it('marks optional sources complete when skipped', async () => { + const io = makeIo(); + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'disabled', runInitialSourceIngest: false, skipSources: true }, + io.io, + ), + ).resolves.toEqual({ + status: 'skipped', + projectDir, + }); + + expect((await readConfig()).setup?.completed_steps).toContain('sources'); + expect(io.stdout()).toContain('Context source setup skipped.'); + }); + + it('writes a dbt local source connection after validation succeeds', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const runInitialIngest = vi.fn(async () => 0); + const io = makeIo(); + + await expect( + runKloSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'dbt', + sourceConnectionId: 'analytics_dbt', + sourcePath: '/repo/dbt', + sourceProjectName: 'analytics', + runInitialSourceIngest: true, + skipSources: false, + }, + io.io, + { validateDbt, runInitialIngest }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['analytics_dbt'] }); + + const config = await readConfig(); + expect(config.connections.analytics_dbt).toMatchObject({ + driver: 'dbt', + source_dir: '/repo/dbt', + project_name: 'analytics', + }); + expect(config.setup?.completed_steps).toContain('sources'); + expect(runInitialIngest).toHaveBeenCalledWith(projectDir, 'analytics_dbt', io.io, { inputMode: 'disabled' }); + }); + + it('writes Metabase config and validates mapping through existing mapping path', async () => { + await addPrimarySource(); + const validateMetabase = vi.fn(async () => ({ ok: true as const, detail: 'user=admin@example.com' })); + const runMapping = vi.fn(async () => 0); + const io = makeIo(); + + await expect( + runKloSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'metabase', + sourceConnectionId: 'prod_metabase', + sourceUrl: 'https://metabase.example.com', + sourceApiKeyRef: 'env:METABASE_API_KEY', + sourceWarehouseConnectionId: 'warehouse', + metabaseDatabaseId: 1, + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + { validateMetabase, runMapping }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['prod_metabase'] }); + + expect((await readConfig()).connections.prod_metabase).toMatchObject({ + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key_ref: 'env:METABASE_API_KEY', + mappings: { + databaseMappings: { '1': 'warehouse' }, + syncEnabled: { '1': true }, + syncMode: 'ONLY', + }, + }); + expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io); + }); + + it('does not mark sources complete when validation fails', async () => { + await addPrimarySource(); + const io = makeIo(); + await expect( + runKloSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'lookml', + sourceConnectionId: 'looker_repo', + sourceGitUrl: 'https://github.com/acme/lookml.git', + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + { validateLookml: vi.fn(async () => ({ ok: false as const, message: 'No LookML files found' })) }, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect((await readConfig()).setup?.completed_steps ?? []).not.toContain('sources'); + expect(io.stderr()).toContain('No LookML files found'); + }); + + it('can go back from the interactive source checklist', async () => { + await addPrimarySource(); + const io = makeIo(); + const testPrompts = prompts({ multiselect: [['back']] }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { + prompts: testPrompts, + }, + ), + ).resolves.toEqual({ status: 'back', projectDir }); + + expect(testPrompts.multiselect).toHaveBeenCalledWith( + expect.objectContaining({ + message: + 'Which context sources should KLO ingest?\nUse Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', + }), + ); + const options = vi.mocked(testPrompts.multiselect).mock.calls[0]?.[0].options ?? []; + expect(options).toContainEqual({ value: 'notion', label: 'Notion' }); + expect(options).not.toContainEqual({ value: 'posthog', label: 'PostHog' }); + }); + + it('uses a source-specific editable connection name for new interactive connections', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path'], + text: ['dbt-main', '/repo/dbt', '', ''], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { + prompts: testPrompts, + validateDbt, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.text).toHaveBeenNthCalledWith(1, { + message: textInputPrompt(connectionNamePrompt('dbt')), + placeholder: 'dbt-main', + initialValue: 'dbt-main', + }); + expect((await readConfig()).connections['dbt-main']).toMatchObject({ + driver: 'dbt', + source_dir: '/repo/dbt', + }); + }); + + it('skips token prompt for public repos when git connection test succeeds', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const testGitRepo = vi.fn(async () => ({ ok: true as const })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['git'], + text: ['dbt-main', 'https://github.com/acme-org/klo-dbt-demo', 'main', ''], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { + prompts: testPrompts, + validateDbt, + testGitRepo, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testGitRepo).toHaveBeenCalledWith({ repoUrl: 'https://github.com/acme-org/klo-dbt-demo' }); + expect(testPrompts.log).toHaveBeenCalledWith('Repository connected.'); + expect(testPrompts.text).toHaveBeenNthCalledWith(4, { + message: textInputPrompt( + [ + 'Folder containing dbt_project.yml (optional)', + 'Press Enter when dbt_project.yml is at the repo root.', + 'For monorepos, enter a relative path like analytics/dbt.', + ].join('\n'), + ), + placeholder: 'optional', + }); + expect(testPrompts.text).toHaveBeenCalledTimes(4); + }); + + it('prompts for token when git connection test fails', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const testGitRepo = vi.fn(async () => ({ ok: false as const, error: 'authentication required' })); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['git'], + text: ['dbt-main', 'https://github.com/acme-org/private-repo', 'main', '', 'env:GITHUB_TOKEN'], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { + prompts: testPrompts, + validateDbt, + testGitRepo, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testGitRepo).toHaveBeenCalledWith({ repoUrl: 'https://github.com/acme-org/private-repo' }); + expect(testPrompts.text).toHaveBeenNthCalledWith(5, { + message: textInputPrompt( + [ + 'This repo requires authentication.', + 'Generate a token at: https://github.com/settings/tokens/new', + 'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).', + 'Or use file:/absolute/path if the token is stored in a file.', + 'Press Enter to skip and try without authentication anyway.', + ].join('\n'), + ), + placeholder: 'env:GITHUB_TOKEN', + }); + expect(testPrompts.text).toHaveBeenCalledTimes(5); + }); + + it('enables the dbt adapter when adding a dbt source connection', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + + await expect( + runKloSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'dbt', + sourceConnectionId: 'dbt-main', + sourcePath: '/repo/dbt', + runInitialSourceIngest: false, + skipSources: false, + }, + makeIo().io, + { validateDbt }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect((await readConfig()).ingest.adapters).toContain('dbt'); + }); + + it('lets interactive setup retry or continue after initial source ingest fails', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const runInitialIngest = vi.fn(async () => 1); + const io = makeIo(); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path', 'continue', 'done'], + text: ['dbt-main', '/repo/dbt', '', ''], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: true, skipSources: false }, + io.io, + { + prompts: testPrompts, + validateDbt, + runInitialIngest, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(runInitialIngest).toHaveBeenCalledTimes(1); + expect((await readConfig()).connections['dbt-main']).toMatchObject({ driver: 'dbt', source_dir: '/repo/dbt' }); + expect(io.stdout()).toContain('Context source saved without a completed context build for dbt-main.'); + expect(io.stdout()).toContain('Run later: klo ingest dbt-main'); + }); + + it('retries initial source ingest from the failure menu', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const runInitialIngest = vi.fn(async () => (runInitialIngest.mock.calls.length === 1 ? 1 : 0)); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['path', 'retry'], + text: ['dbt-main', '/repo/dbt', '', ''], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: true, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + validateDbt, + runInitialIngest, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(runInitialIngest).toHaveBeenCalledTimes(2); + }); + + it('offers existing context source connections before prompting for new details', async () => { + await addPrimarySource(); + await addConnection('dbt-main', { + driver: 'dbt', + source_dir: '/repo/existing-dbt', + project_name: 'analytics', + }); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['existing:dbt-main'], + text: [undefined], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + validateDbt, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'Configure dbt', + options: [ + { value: 'existing:dbt-main', label: 'Use existing dbt connection: dbt-main' }, + { value: 'new', label: 'Add new dbt connection' }, + { value: 'back', label: 'Back' }, + ], + }); + expect(testPrompts.text).not.toHaveBeenCalled(); + expect(validateDbt).toHaveBeenCalledWith({ + driver: 'dbt', + source_dir: '/repo/existing-dbt', + project_name: 'analytics', + }); + expect((await readConfig()).connections['dbt-main']).toMatchObject({ + driver: 'dbt', + source_dir: '/repo/existing-dbt', + }); + }); + + it('offers existing connections for every context source type', async () => { + await addPrimarySource(); + const cases: Array<{ + source: KloSetupSourceType; + connectionId: string; + connection: KloProjectConnectionConfig; + deps: KloSetupSourcesDeps; + expectedLabel: string; + }> = [ + { + source: 'dbt', + connectionId: 'dbt-main', + connection: { driver: 'dbt', source_dir: '/repo/dbt', project_name: 'analytics' }, + deps: { validateDbt: vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })) }, + expectedLabel: 'dbt', + }, + { + source: 'metricflow', + connectionId: 'metricflow-main', + connection: { driver: 'metricflow', metricflow: { repoUrl: 'file:///repo/metricflow' } }, + deps: { validateMetricflow: vi.fn(async () => ({ ok: true as const, detail: 'metrics=1' })) }, + expectedLabel: 'MetricFlow', + }, + { + source: 'metabase', + connectionId: 'metabase-main', + connection: { + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key_ref: 'env:METABASE_API_KEY', + mappings: { + databaseMappings: { '1': 'warehouse' }, + syncEnabled: { '1': true }, + syncMode: 'ONLY', + }, + }, + deps: { + validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })), + runMapping: vi.fn(async () => 0), + }, + expectedLabel: 'Metabase', + }, + { + source: 'looker', + connectionId: 'looker-main', + connection: { + driver: 'looker', + base_url: 'https://looker.example.com', + client_id: 'client-id', + client_secret_ref: 'env:LOOKER_CLIENT_SECRET', + mappings: { connectionMappings: { warehouse: 'warehouse' } }, + }, + deps: { + validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })), + runMapping: vi.fn(async () => 0), + }, + expectedLabel: 'Looker', + }, + { + source: 'lookml', + connectionId: 'lookml-main', + connection: { + driver: 'lookml', + repoUrl: 'file:///repo/lookml', + mappings: { expectedLookerConnectionName: null }, + }, + deps: { validateLookml: vi.fn(async () => ({ ok: true as const, detail: 'lookmlFiles=1' })) }, + expectedLabel: 'LookML', + }, + { + source: 'notion', + connectionId: 'notion-main', + connection: { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'all_accessible', + root_page_ids: [], + root_database_ids: [], + root_data_source_ids: [], + }, + deps: { validateNotion: vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })) }, + expectedLabel: 'Notion', + }, + ]; + + for (const testCase of cases) { + await addConnection(testCase.connectionId, testCase.connection); + const testPrompts = prompts({ + multiselect: [[testCase.source]], + select: [`existing:${testCase.connectionId}`], + text: [undefined], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + ...testCase.deps, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [testCase.connectionId] }); + + expect(testPrompts.select).toHaveBeenCalledWith({ + message: `Configure ${testCase.expectedLabel}`, + options: [ + { + value: `existing:${testCase.connectionId}`, + label: `Use existing ${testCase.expectedLabel} connection: ${testCase.connectionId}`, + }, + { value: 'new', label: `Add new ${testCase.expectedLabel} connection` }, + { value: 'back', label: 'Back' }, + ], + }); + expect(testPrompts.text).not.toHaveBeenCalled(); + } + }); + + it('lets Escape from dbt git URL return to source location selection', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const testPrompts = prompts({ + multiselect: [['dbt']], + select: ['git', 'path'], + text: ['dbt-main', undefined, '/repo/dbt', '', ''], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + validateDbt, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); + + const selectMessages = vi.mocked(testPrompts.select).mock.calls.map(([options]) => options.message); + expect(selectMessages[0]).toBe('dbt source location'); + expect(selectMessages[1]).toBe('dbt source location'); + expect(selectMessages.at(-1)).toContain('Add another?'); + expect((await readConfig()).connections['dbt-main']).toMatchObject({ + driver: 'dbt', + source_dir: '/repo/dbt', + }); + }); + + it('lets Escape from source connection name return to context source selection', async () => { + await addPrimarySource(); + const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); + const testPrompts = prompts({ + multiselect: [['dbt'], ['back']], + text: [undefined], + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + validateDbt, + }, + ), + ).resolves.toEqual({ status: 'back', projectDir }); + + expect(testPrompts.multiselect).toHaveBeenCalledTimes(2); + expect(validateDbt).not.toHaveBeenCalled(); + }); + + it('backs up one prompt inside every interactive context source connection', async () => { + await addPrimarySource(); + const cases: Array<{ + source: KloSetupSourceType; + select?: string[]; + text: Array; + deps: KloSetupSourcesDeps; + repeatedSelectMessage?: string; + repeatedTextMessage?: string; + }> = [ + { + source: 'dbt', + select: ['git', 'path'], + text: ['dbt-main', undefined, '/repo/dbt', '', ''], + deps: { validateDbt: vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })) }, + repeatedSelectMessage: 'dbt source location', + }, + { + source: 'metricflow', + select: ['git', 'path'], + text: ['metricflow-main', undefined, '/repo/metricflow', ''], + deps: { validateMetricflow: vi.fn(async () => ({ ok: true as const, detail: 'metrics=1' })) }, + repeatedSelectMessage: 'metricflow source location', + }, + { + source: 'lookml', + select: ['git', 'path'], + text: ['lookml-main', undefined, '/repo/lookml', ''], + deps: { validateLookml: vi.fn(async () => ({ ok: true as const, detail: 'lookmlFiles=1' })) }, + repeatedSelectMessage: 'lookml source location', + }, + { + source: 'metabase', + text: [ + 'metabase-main', + 'https://old-metabase.example.com', + undefined, + 'https://metabase.example.com', + 'env:METABASE_API_KEY', + 'warehouse', + '1', + ], + deps: { + validateMetabase: vi.fn(async () => ({ ok: true as const, detail: 'mapping validated' })), + runMapping: vi.fn(async () => 0), + }, + repeatedTextMessage: textInputPrompt('Metabase URL'), + }, + { + source: 'looker', + text: [ + 'looker-main', + 'https://old-looker.example.com', + undefined, + 'https://looker.example.com', + 'client-id', + 'env:LOOKER_CLIENT_SECRET', + 'warehouse', + '', + ], + deps: { + validateLooker: vi.fn(async () => ({ ok: true as const, detail: 'mapping refreshed' })), + runMapping: vi.fn(async () => 0), + }, + repeatedTextMessage: textInputPrompt('Looker base URL'), + }, + { + source: 'notion', + select: ['back', 'all_accessible'], + text: ['notion-main', 'env:NOTION_TOKEN', 'env:NOTION_TOKEN'], + deps: { validateNotion: vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })) }, + repeatedTextMessage: textInputPrompt('Notion token ref'), + }, + ]; + + for (const testCase of cases) { + const testPrompts = prompts({ + multiselect: [[testCase.source]], + select: testCase.select, + text: testCase.text, + }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { + prompts: testPrompts, + ...testCase.deps, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: [`${testCase.source}-main`] }); + + if (testCase.repeatedSelectMessage) { + expect( + vi + .mocked(testPrompts.select) + .mock.calls.map(([options]) => options.message) + .filter((message) => message === testCase.repeatedSelectMessage), + ).toHaveLength(2); + } + if (testCase.repeatedTextMessage) { + expect( + vi + .mocked(testPrompts.text) + .mock.calls.map(([options]) => options.message) + .filter((message) => message === testCase.repeatedTextMessage), + ).toHaveLength(2); + } + } + }); + + it('does not offer context sources until a primary source exists', async () => { + const io = makeIo(); + const testPrompts = prompts({ multiselect: [['notion']] }); + + await expect( + runKloSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts }, + ), + ).resolves.toEqual({ status: 'skipped', projectDir }); + + expect(testPrompts.multiselect).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Connect a primary source before adding context sources.'); + expect((await readConfig()).setup?.completed_steps ?? []).not.toContain('sources'); + }); +}); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts new file mode 100644 index 00000000..4682ef36 --- /dev/null +++ b/packages/cli/src/setup-sources.ts @@ -0,0 +1,1185 @@ +import { mkdtemp, readdir, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; +import { cancel, isCancel, log, multiselect, select, text } from '@clack/prompts'; +import { resolveNotionAuthToken } from '@klo/context/connections'; +import { resolveKloConfigReference } from '@klo/context/core'; +import { + cloneOrPull, + loadDbtSchemaFiles, + loadProjectInfo, + type NotionApi, + NotionClient, + parseLookmlStagedDir, + parseMetricflowFiles, + testRepoConnection, +} from '@klo/context/ingest'; +import { + type KloProjectConfig, + type KloProjectConnectionConfig, + loadKloProject, + markKloSetupStepComplete, + serializeKloProjectConfig, +} from '@klo/context/project'; +import type { KloCliIo } from './cli-runtime.js'; +import { runKloConnectionMapping } from './commands/connection-mapping.js'; +import { runKloConnection } from './connection.js'; +import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; +import { runKloPublicIngest } from './public-ingest.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; + +export type KloSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion'; + +export interface KloSetupSourcesArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + source?: KloSetupSourceType; + sourceConnectionId?: string; + sourcePath?: string; + sourceGitUrl?: string; + sourceBranch?: string; + sourceSubpath?: string; + sourceAuthTokenRef?: string; + sourceUrl?: string; + sourceApiKeyRef?: string; + sourceClientId?: string; + sourceClientSecretRef?: string; + sourceWarehouseConnectionId?: string; + sourceProjectName?: string; + sourceProfilesPath?: string; + sourceTarget?: string; + metabaseDatabaseId?: number; + notionCrawlMode?: 'all_accessible' | 'selected_roots'; + notionRootPageIds?: string[]; + runInitialSourceIngest: boolean; + skipSources: boolean; +} + +export type KloSetupSourcesResult = + | { status: 'ready'; projectDir: string; connectionIds: string[] } + | { status: 'skipped'; projectDir: string } + | { status: 'back'; projectDir: string } + | { status: 'missing-input'; projectDir: string } + | { status: 'failed'; projectDir: string }; + +export interface KloSetupSourcesPromptAdapter { + multiselect(options: { + message: string; + options: Array<{ value: string; label: string }>; + required?: boolean; + }): Promise; + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; + cancel(message: string): void; + log?(message: string): void; +} + +export type SourceValidationResult = { ok: true; detail?: string } | { ok: false; message: string }; + +export interface KloSetupSourcesDeps { + prompts?: KloSetupSourcesPromptAdapter; + testGitRepo?: (args: { repoUrl: string; authToken?: string | null }) => Promise<{ ok: true } | { ok: false; error: string }>; + validateDbt?: (connection: KloProjectConnectionConfig) => Promise; + validateMetricflow?: (connection: KloProjectConnectionConfig) => Promise; + validateMetabase?: (projectDir: string, connectionId: string) => Promise; + validateLooker?: (projectDir: string, connectionId: string) => Promise; + validateLookml?: (connection: KloProjectConnectionConfig) => Promise; + validateNotion?: (connection: KloProjectConnectionConfig) => Promise; + runMapping?: (projectDir: string, connectionId: string, io: KloCliIo) => Promise; + runInitialIngest?: ( + projectDir: string, + connectionId: string, + io: KloCliIo, + options: { inputMode: KloSetupSourcesArgs['inputMode'] }, + ) => Promise; +} + +const SOURCE_OPTIONS: Array<{ value: KloSetupSourceType; label: string }> = [ + { value: 'dbt', label: 'dbt' }, + { value: 'metricflow', label: 'MetricFlow' }, + { value: 'metabase', label: 'Metabase' }, + { value: 'looker', label: 'Looker' }, + { value: 'lookml', label: 'LookML' }, + { value: 'notion', label: 'Notion' }, +]; + +const SOURCE_LABELS = Object.fromEntries(SOURCE_OPTIONS.map((option) => [option.value, option.label])) as Record< + KloSetupSourceType, + string +>; + +const PRIMARY_SOURCE_DRIVERS = new Set([ + 'sqlite', + 'postgres', + 'mysql', + 'clickhouse', + 'sqlserver', + 'bigquery', + 'snowflake', +]); + +function createPromptAdapter(): KloSetupSourcesPromptAdapter { + return { + async multiselect(options) { + const value = await withSetupInterruptConfirmation(() => multiselect(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return ['back']; + } + return [...value] as string[]; + }, + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + cancel('Setup cancelled.'); + return 'back'; + } + return String(value); + }, + async text(options) { + const value = await withSetupInterruptConfirmation(() => + text({ ...options, message: withTextInputNavigation(options.message) }), + ); + return isCancel(value) ? undefined : String(value); + }, + cancel(message) { + cancel(message); + }, + log(message) { + log.info(message); + }, + }; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function stringField(value: unknown): string | undefined { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; +} + +function sourceLabel(source: KloSetupSourceType): string { + return SOURCE_LABELS[source]; +} + +function sourceAdapter(source: KloSetupSourceType): string { + return source; +} + +function connectionNamePrompt(label: string): string { + return `Name this ${label} connection\nKLO will use this short name in commands and config. You can rename it now.`; +} + +function gitAuthAfterFailurePrompt(source: KloSetupSourceType): string { + const label = source === 'dbt' ? 'This' : `This ${sourceLabel(source)}`; + return [ + `${label} repo requires authentication.`, + 'Generate a token at: https://github.com/settings/tokens/new', + 'Store it in an env var, then enter env:VARIABLE_NAME here (e.g. env:GITHUB_TOKEN).', + 'Or use file:/absolute/path if the token is stored in a file.', + 'Press Enter to skip and try without authentication anyway.', + ].join('\n'); +} + +function sourceSubpathPrompt(source: KloSetupSourceType): string { + if (source === 'dbt') { + return [ + 'Folder containing dbt_project.yml (optional)', + 'Press Enter when dbt_project.yml is at the repo root.', + 'For monorepos, enter a relative path like analytics/dbt.', + ].join('\n'); + } + return [ + `${sourceLabel(source)} project folder (optional)`, + 'If the project files are inside a subfolder, enter that path.', + 'Press Enter if the path or repo already points at the project.', + ].join('\n'); +} + +async function promptText( + prompts: KloSetupSourcesPromptAdapter, + options: { message: string; placeholder?: string; initialValue?: string }, +): Promise { + return await prompts.text({ ...options, message: withTextInputNavigation(options.message) }); +} + +function assertSafeConnectionId(connectionId: string): void { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } +} + +function credentialRef(value: string | undefined, label: string): string { + const ref = value?.trim(); + if (!ref) { + throw new Error(`Missing ${label}; use env:NAME or file:/absolute/path`); + } + if (!ref.startsWith('env:') && !ref.startsWith('file:')) { + throw new Error(`${label} must use env:NAME or file:/absolute/path`); + } + return ref; +} + +function repoOrLocalSource(args: KloSetupSourcesArgs): { sourceDir?: string; repoUrl?: string } { + if (args.sourcePath && args.sourceGitUrl) { + throw new Error('Choose only one source location: --source-path or --source-git-url.'); + } + if (args.sourcePath) { + return { sourceDir: resolve(args.sourcePath) }; + } + if (args.sourceGitUrl) { + return { repoUrl: args.sourceGitUrl }; + } + throw new Error('Missing source location: pass --source-path or --source-git-url.'); +} + +function fileRepoUrl(sourceDir: string): string { + return pathToFileURL(sourceDir).toString(); +} + +async function writeProjectConfig(projectDir: string, config: KloProjectConfig): Promise { + const project = await loadKloProject({ projectDir }); + await writeFile(project.configPath, serializeKloProjectConfig(config), 'utf-8'); +} + +async function writeSourceConnection( + projectDir: string, + connectionId: string, + connection: KloProjectConnectionConfig, + adapter: string, +): Promise<() => Promise> { + assertSafeConnectionId(connectionId); + const project = await loadKloProject({ projectDir }); + const previousConnection = project.config.connections[connectionId]; + const hadPreviousConnection = previousConnection !== undefined; + const shouldRemoveAdapterOnRollback = !project.config.ingest.adapters.includes(adapter); + const config = { + ...project.config, + connections: { + ...project.config.connections, + [connectionId]: connection, + }, + ingest: { + ...project.config.ingest, + adapters: project.config.ingest.adapters.includes(adapter) + ? [...project.config.ingest.adapters] + : [...project.config.ingest.adapters, adapter], + }, + }; + await writeFile(project.configPath, serializeKloProjectConfig(config), 'utf-8'); + return async () => { + const latest = await loadKloProject({ projectDir }); + const connections = { ...latest.config.connections }; + if (hadPreviousConnection) { + connections[connectionId] = previousConnection; + } else { + delete connections[connectionId]; + } + await writeProjectConfig(projectDir, { + ...latest.config, + connections, + ingest: { + ...latest.config.ingest, + adapters: shouldRemoveAdapterOnRollback + ? latest.config.ingest.adapters.filter((candidate) => candidate !== adapter) + : latest.config.ingest.adapters, + }, + }); + }; +} + +async function ensureSourceAdapterEnabled(projectDir: string, source: KloSetupSourceType): Promise { + const adapter = sourceAdapter(source); + const project = await loadKloProject({ projectDir }); + if (project.config.ingest.adapters.includes(adapter)) { + return; + } + await writeProjectConfig(projectDir, { + ...project.config, + ingest: { + ...project.config.ingest, + adapters: [...project.config.ingest.adapters, adapter], + }, + }); +} + +async function markSourcesComplete(projectDir: string): Promise { + const project = await loadKloProject({ projectDir }); + await writeFile( + project.configPath, + serializeKloProjectConfig(markKloSetupStepComplete(project.config, 'sources')), + 'utf-8', + ); +} + +function hasPrimarySource(config: KloProjectConfig): boolean { + const setupPrimaryIds = config.setup?.database_connection_ids ?? []; + if (setupPrimaryIds.some((connectionId) => Object.hasOwn(config.connections, connectionId))) { + return true; + } + return Object.values(config.connections).some((connection) => + PRIMARY_SOURCE_DRIVERS.has(String(connection.driver ?? '').toLowerCase()), + ); +} + +function buildDbtConnection(args: KloSetupSourcesArgs): KloProjectConnectionConfig { + const source = repoOrLocalSource(args); + return { + driver: 'dbt', + ...(source.sourceDir ? { source_dir: source.sourceDir } : {}), + ...(source.repoUrl ? { repo_url: source.repoUrl } : {}), + ...(args.sourceBranch ? { branch: args.sourceBranch } : {}), + ...(args.sourceSubpath ? { path: args.sourceSubpath } : {}), + ...(args.sourceAuthTokenRef + ? { auth_token_ref: credentialRef(args.sourceAuthTokenRef, 'dbt private repo access token') } + : {}), + ...(args.sourceProfilesPath ? { profiles_path: resolve(args.sourceProfilesPath) } : {}), + ...(args.sourceTarget ? { target: args.sourceTarget } : {}), + ...(args.sourceProjectName ? { project_name: args.sourceProjectName } : {}), + }; +} + +function buildMetricflowConnection(args: KloSetupSourcesArgs): KloProjectConnectionConfig { + const source = repoOrLocalSource(args); + return { + driver: 'metricflow', + metricflow: { + repoUrl: source.repoUrl ?? fileRepoUrl(source.sourceDir ?? ''), + ...(args.sourceBranch ? { branch: args.sourceBranch } : {}), + ...(args.sourceSubpath ? { path: args.sourceSubpath } : {}), + ...(args.sourceAuthTokenRef + ? { auth_token_ref: credentialRef(args.sourceAuthTokenRef, 'MetricFlow auth token ref') } + : {}), + }, + }; +} + +function buildMetabaseConnection(args: KloSetupSourcesArgs): KloProjectConnectionConfig { + if (!args.sourceUrl) { + throw new Error('Missing Metabase URL: pass --source-url.'); + } + if (!args.sourceWarehouseConnectionId) { + throw new Error('Missing mapped warehouse: pass --source-warehouse-connection-id.'); + } + if (!args.metabaseDatabaseId) { + throw new Error('Missing Metabase database id: pass --metabase-database-id.'); + } + return { + driver: 'metabase', + api_url: args.sourceUrl, + api_key_ref: credentialRef(args.sourceApiKeyRef, 'Metabase API key ref'), + mappings: { + databaseMappings: { [String(args.metabaseDatabaseId)]: args.sourceWarehouseConnectionId }, + syncEnabled: { [String(args.metabaseDatabaseId)]: true }, + syncMode: 'ONLY', + }, + }; +} + +function buildLookerConnection(args: KloSetupSourcesArgs): KloProjectConnectionConfig { + if (!args.sourceUrl) { + throw new Error('Missing Looker base URL: pass --source-url.'); + } + if (!args.sourceClientId) { + throw new Error('Missing Looker client id: pass --source-client-id.'); + } + if (!args.sourceWarehouseConnectionId) { + throw new Error('Missing mapped warehouse: pass --source-warehouse-connection-id.'); + } + return { + driver: 'looker', + base_url: args.sourceUrl, + client_id: args.sourceClientId, + client_secret_ref: credentialRef(args.sourceClientSecretRef, 'Looker client secret ref'), + mappings: { + connectionMappings: { + [args.sourceTarget ?? args.sourceWarehouseConnectionId]: args.sourceWarehouseConnectionId, + }, + }, + }; +} + +function buildLookmlConnection(args: KloSetupSourcesArgs): KloProjectConnectionConfig { + const source = repoOrLocalSource(args); + return { + driver: 'lookml', + repoUrl: source.repoUrl ?? fileRepoUrl(source.sourceDir ?? ''), + ...(args.sourceBranch ? { branch: args.sourceBranch } : {}), + ...(args.sourceSubpath ? { path: args.sourceSubpath } : {}), + ...(args.sourceAuthTokenRef + ? { auth_token_ref: credentialRef(args.sourceAuthTokenRef, 'LookML auth token ref') } + : {}), + mappings: { + expectedLookerConnectionName: args.sourceTarget ?? args.sourceWarehouseConnectionId ?? null, + }, + }; +} + +function buildNotionConnection(args: KloSetupSourcesArgs): KloProjectConnectionConfig { + const crawlMode = args.notionCrawlMode ?? 'selected_roots'; + const rootPageIds = args.notionRootPageIds ?? []; + if (crawlMode === 'selected_roots' && rootPageIds.length === 0) { + throw new Error('Notion selected_roots requires --notion-root-page-id.'); + } + return { + driver: 'notion', + auth_token_ref: credentialRef(args.sourceApiKeyRef, 'Notion token ref'), + crawl_mode: crawlMode, + root_page_ids: rootPageIds, + root_database_ids: [], + root_data_source_ids: [], + max_pages_per_run: 1000, + max_knowledge_creates_per_run: 5, + max_knowledge_updates_per_run: 20, + last_successful_cursor: null, + }; +} + +function sourcePathFromFileRepoUrl(repoUrl: string, subpath?: string): string { + const root = fileURLToPath(repoUrl); + return subpath ? join(root, subpath) : root; +} + +function repoAuthToken(connection: KloProjectConnectionConfig | Record): string | null { + const ref = stringField(connection.auth_token_ref) ?? stringField(connection.authTokenRef); + const literal = stringField(connection.authToken) ?? stringField(connection.auth_token); + return literal ?? resolveKloConfigReference(ref, process.env) ?? null; +} + +async function collectYamlFilesRecursive(sourceRoot: string): Promise> { + const entries = await readdir(sourceRoot, { withFileTypes: true, recursive: true }); + const files: Array<{ content: string; path: string }> = []; + for (const entry of entries) { + if (!entry.isFile() || !/\.ya?ml$/i.test(entry.name)) { + continue; + } + const path = join(entry.parentPath, entry.name); + files.push({ path, content: await readFile(path, 'utf-8') }); + } + return files; +} + +async function defaultValidateDbt(connection: KloProjectConnectionConfig): Promise { + let sourceDir = stringField(connection.source_dir) ?? stringField(connection.sourceDir); + const repoUrl = stringField(connection.repo_url) ?? stringField(connection.repoUrl); + if (!sourceDir && repoUrl?.startsWith('file:')) { + sourceDir = sourcePathFromFileRepoUrl(repoUrl, stringField(connection.path)); + } + if (!sourceDir && repoUrl) { + const cacheDir = await mkdtemp(join(tmpdir(), 'klo-setup-dbt-')); + await cloneOrPull({ + repoUrl, + authToken: repoAuthToken(connection), + cacheDir, + branch: stringField(connection.branch) ?? 'main', + }); + sourceDir = stringField(connection.path) ? join(cacheDir, String(connection.path)) : cacheDir; + } + if (!sourceDir) { + return { ok: false, message: 'dbt setup requires --source-path or --source-git-url.' }; + } + const info = await loadProjectInfo(sourceDir); + const schemaFiles = await loadDbtSchemaFiles(sourceDir); + if (!info.projectName && typeof connection.project_name !== 'string') { + return { ok: false, message: 'dbt project metadata is missing project name.' }; + } + return { ok: true, detail: `project=${info.projectName ?? connection.project_name} schemas=${schemaFiles.length}` }; +} + +async function defaultValidateMetricflow(connection: KloProjectConnectionConfig): Promise { + const metricflow = isRecord(connection.metricflow) ? connection.metricflow : undefined; + const repoUrl = stringField(metricflow?.repoUrl); + if (!repoUrl) { + return { ok: false, message: 'MetricFlow setup requires repoUrl.' }; + } + if (!repoUrl.startsWith('file:')) { + const result = await testRepoConnection({ + repoUrl, + authToken: metricflow ? repoAuthToken(metricflow) : null, + }); + if (!result.ok) { + return { ok: false, message: result.error }; + } + return { ok: true, detail: 'repository reachable' }; + } + const path = sourcePathFromFileRepoUrl(repoUrl, stringField(metricflow?.path)); + const parsed = parseMetricflowFiles(await collectYamlFilesRecursive(path)); + return { + ok: true, + detail: `semanticModels=${parsed.semanticModels.length} metrics=${parsed.crossModelMetrics.length}`, + }; +} + +async function defaultValidateMetabase(projectDir: string, connectionId: string): Promise { + const code = await runKloConnection( + { command: 'map', projectDir, sourceConnectionId: connectionId, json: true }, + { stdout: { write() {} }, stderr: { write() {} } }, + ); + return code === 0 + ? { ok: true, detail: 'mapping validated' } + : { ok: false, message: 'Metabase mapping validation failed' }; +} + +async function defaultValidateLooker(projectDir: string, connectionId: string): Promise { + const code = await runKloConnectionMapping( + { command: 'refresh', projectDir, connectionId, autoAccept: true }, + { stdout: { write() {} }, stderr: { write() {} } }, + ); + return code === 0 + ? { ok: true, detail: 'Looker mapping refreshed' } + : { ok: false, message: 'Looker validation failed' }; +} + +async function defaultValidateLookml(connection: KloProjectConnectionConfig): Promise { + const repoUrl = stringField(connection.repoUrl) ?? stringField(connection.repo_url); + if (!repoUrl) { + return { ok: false, message: 'LookML setup requires repoUrl.' }; + } + if (!repoUrl.startsWith('file:')) { + const result = await testRepoConnection({ repoUrl, authToken: repoAuthToken(connection) }); + return result.ok ? { ok: true, detail: 'repository reachable' } : { ok: false, message: result.error }; + } + const parsed = await parseLookmlStagedDir(sourcePathFromFileRepoUrl(repoUrl, stringField(connection.path))); + const count = parsed.models.length + parsed.views.length + parsed.dashboards.length; + return count > 0 ? { ok: true, detail: `lookmlFiles=${count}` } : { ok: false, message: 'No LookML files found' }; +} + +async function defaultValidateNotion(connection: KloProjectConnectionConfig): Promise { + const token = await resolveNotionAuthToken(String(connection.auth_token_ref)); + const client: NotionApi = new NotionClient(token); + await client.retrieveBotUser(); + const roots = Array.isArray(connection.root_page_ids) + ? connection.root_page_ids.filter((id): id is string => typeof id === 'string') + : []; + for (const root of roots) { + await client.retrievePage(root); + } + return { ok: true, detail: `roots=${roots.length}` }; +} + +async function defaultRunMapping(projectDir: string, connectionId: string, io: KloCliIo): Promise { + return await runKloConnection({ command: 'map', projectDir, sourceConnectionId: connectionId, json: false }, io); +} + +async function defaultRunInitialIngest( + projectDir: string, + connectionId: string, + io: KloCliIo, + options: { inputMode: KloSetupSourcesArgs['inputMode'] }, +): Promise { + return await runKloPublicIngest( + { + command: 'run', + projectDir, + targetConnectionId: connectionId, + all: false, + json: false, + inputMode: options.inputMode, + }, + io, + ); +} + +async function runInitialSourceIngestWithRecovery(input: { + args: KloSetupSourcesArgs; + connectionId: string; + io: KloCliIo; + prompts: KloSetupSourcesPromptAdapter; + deps: KloSetupSourcesDeps; +}): Promise<'ready' | 'continue' | 'back' | 'failed'> { + while (true) { + input.io.stdout.write(`Building context from ${input.connectionId}. Large sources can take a while.\n`); + const ingestCode = await (input.deps.runInitialIngest ?? defaultRunInitialIngest)( + input.args.projectDir, + input.connectionId, + input.io, + { + inputMode: input.args.inputMode, + }, + ); + if (ingestCode === 0) { + return 'ready'; + } + if (input.args.inputMode === 'disabled') { + return 'failed'; + } + + const action = await input.prompts.select({ + message: `Context build failed for ${input.connectionId}\nRetry now, continue setup and build this source later, or go back.`, + options: [ + { value: 'retry', label: 'Retry context build' }, + { value: 'continue', label: 'Continue setup and build this source later' }, + { value: 'back', label: 'Back' }, + ], + }); + if (action === 'retry') { + continue; + } + if (action === 'continue') { + input.io.stdout.write(`Context source saved without a completed context build for ${input.connectionId}.\n`); + input.io.stdout.write(`Run later: klo ingest ${input.connectionId}\n`); + return 'continue'; + } + return 'back'; + } +} + +type SourceLocationChoice = 'path' | 'git'; + +type SourcePromptState = KloSetupSourcesArgs & { + sourceLocation?: SourceLocationChoice; +}; + +type SourcePromptStep = (state: SourcePromptState) => Promise<'next' | 'back'>; + +type InteractiveSourceConnectionChoice = + | { kind: 'existing'; connectionId: string; connection: KloProjectConnectionConfig } + | { kind: 'new'; args: KloSetupSourcesArgs } + | 'back'; + +async function runSourcePromptSteps( + initialState: SourcePromptState, + stepsForState: (state: SourcePromptState) => SourcePromptStep[], +): Promise { + let stepIndex = 0; + while (true) { + const steps = stepsForState(initialState); + if (stepIndex >= steps.length) { + const { sourceLocation: _sourceLocation, ...sourceArgs } = initialState; + return sourceArgs; + } + + const result = await steps[stepIndex]?.(initialState); + if (result === 'back') { + if (stepIndex === 0) { + return 'back'; + } + stepIndex -= 1; + continue; + } + stepIndex += 1; + } +} + +function resetRepoLocationFields(state: SourcePromptState): void { + delete state.sourcePath; + delete state.sourceGitUrl; + delete state.sourceBranch; + delete state.sourceAuthTokenRef; + delete state.sourceSubpath; + delete state.sourceProjectName; +} + +function connectionIdPromptSteps( + args: KloSetupSourcesArgs, + source: KloSetupSourceType, + prompts: KloSetupSourcesPromptAdapter, + defaultConnectionId: string, +): SourcePromptStep[] { + if (args.sourceConnectionId) { + return []; + } + return [ + async (state) => { + const enteredConnectionId = await promptText(prompts, { + message: connectionNamePrompt(sourceLabel(source)), + placeholder: defaultConnectionId, + initialValue: defaultConnectionId, + }); + if (enteredConnectionId === undefined) { + return 'back'; + } + state.sourceConnectionId = enteredConnectionId.trim() || defaultConnectionId; + return 'next'; + }, + ]; +} + +async function promptForInteractiveSource( + args: KloSetupSourcesArgs, + source: KloSetupSourceType, + prompts: KloSetupSourcesPromptAdapter, + defaultConnectionId = `${source}-main`, + testGitRepo: KloSetupSourcesDeps['testGitRepo'] = testRepoConnection, +): Promise { + const initialState: SourcePromptState = { ...args, source }; + if (args.sourceConnectionId) { + initialState.sourceConnectionId = args.sourceConnectionId; + } + const connectionSteps = connectionIdPromptSteps(args, source, prompts, defaultConnectionId); + + if (source === 'dbt' || source === 'metricflow' || source === 'lookml') { + return await runSourcePromptSteps(initialState, (state) => [ + ...connectionSteps, + async () => { + const selectedLocation = await prompts.select({ + message: `${source} source location`, + options: [ + { value: 'path', label: 'Local path' }, + { value: 'git', label: 'Git URL' }, + { value: 'back', label: 'Back' }, + ], + }); + if (selectedLocation !== 'path' && selectedLocation !== 'git') { + return 'back'; + } + if (state.sourceLocation !== selectedLocation) { + resetRepoLocationFields(state); + } + state.sourceLocation = selectedLocation; + return 'next'; + }, + ...(state.sourceLocation === 'path' + ? [ + async (currentState: SourcePromptState) => { + const sourcePath = await promptText(prompts, { message: `${source} local path` }); + if (sourcePath === undefined) return 'back'; + currentState.sourcePath = sourcePath; + return 'next'; + }, + ] + : []), + ...(state.sourceLocation === 'git' + ? [ + async (currentState: SourcePromptState) => { + const sourceGitUrl = await promptText(prompts, { message: `${source} git URL` }); + if (sourceGitUrl === undefined) return 'back'; + currentState.sourceGitUrl = sourceGitUrl; + return 'next'; + }, + async (currentState: SourcePromptState) => { + const branch = await promptText(prompts, { message: `${source} git branch`, initialValue: 'main' }); + if (branch === undefined) return 'back'; + currentState.sourceBranch = branch || 'main'; + return 'next'; + }, + ] + : []), + ...(state.sourceLocation + ? [ + async (currentState: SourcePromptState) => { + const subpath = await promptText(prompts, { + message: sourceSubpathPrompt(source), + placeholder: 'optional', + }); + if (subpath === undefined) return 'back'; + if (subpath) { + currentState.sourceSubpath = subpath; + } else { + delete currentState.sourceSubpath; + } + return 'next'; + }, + ] + : []), + ...(state.sourceLocation === 'git' + ? [ + async (currentState: SourcePromptState) => { + const result = await testGitRepo!({ repoUrl: currentState.sourceGitUrl! }); + if (result.ok) { + delete currentState.sourceAuthTokenRef; + prompts.log?.('Repository connected.'); + return 'next'; + } + const authRef = await promptText(prompts, { + message: gitAuthAfterFailurePrompt(source), + placeholder: 'env:GITHUB_TOKEN', + }); + if (authRef === undefined) return 'back'; + if (authRef) { + currentState.sourceAuthTokenRef = authRef; + } else { + delete currentState.sourceAuthTokenRef; + } + return 'next'; + }, + ] + : []), + ]); + } + + if (source === 'metabase') { + return await runSourcePromptSteps(initialState, () => [ + ...connectionSteps, + async (state) => { + const sourceUrl = await promptText(prompts, { message: 'Metabase URL' }); + if (sourceUrl === undefined) return 'back'; + state.sourceUrl = sourceUrl; + return 'next'; + }, + async (state) => { + const sourceApiKeyRef = await promptText(prompts, { + message: 'Metabase API key ref', + placeholder: 'env:METABASE_API_KEY', + }); + if (sourceApiKeyRef === undefined) return 'back'; + state.sourceApiKeyRef = sourceApiKeyRef; + return 'next'; + }, + async (state) => { + const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' }); + if (sourceWarehouseConnectionId === undefined) return 'back'; + state.sourceWarehouseConnectionId = sourceWarehouseConnectionId; + return 'next'; + }, + async (state) => { + const databaseId = await promptText(prompts, { message: 'Metabase database id' }); + if (databaseId === undefined) return 'back'; + state.metabaseDatabaseId = Number.parseInt(databaseId, 10); + return 'next'; + }, + ]); + } + + if (source === 'looker') { + return await runSourcePromptSteps(initialState, () => [ + ...connectionSteps, + async (state) => { + const sourceUrl = await promptText(prompts, { message: 'Looker base URL' }); + if (sourceUrl === undefined) return 'back'; + state.sourceUrl = sourceUrl; + return 'next'; + }, + async (state) => { + const sourceClientId = await promptText(prompts, { message: 'Looker client id' }); + if (sourceClientId === undefined) return 'back'; + state.sourceClientId = sourceClientId; + return 'next'; + }, + async (state) => { + const sourceClientSecretRef = await promptText(prompts, { + message: 'Looker client secret ref', + placeholder: 'env:LOOKER_CLIENT_SECRET', + }); + if (sourceClientSecretRef === undefined) return 'back'; + state.sourceClientSecretRef = sourceClientSecretRef; + return 'next'; + }, + async (state) => { + const sourceWarehouseConnectionId = await promptText(prompts, { message: 'Mapped warehouse connection id' }); + if (sourceWarehouseConnectionId === undefined) return 'back'; + state.sourceWarehouseConnectionId = sourceWarehouseConnectionId; + return 'next'; + }, + async (state) => { + const lookerConnectionName = await promptText(prompts, { + message: 'Looker connection name', + placeholder: 'optional', + }); + if (lookerConnectionName === undefined) return 'back'; + if (lookerConnectionName) { + state.sourceTarget = lookerConnectionName; + } else { + delete state.sourceTarget; + } + return 'next'; + }, + ]); + } + + return await runSourcePromptSteps(initialState, (state) => [ + ...connectionSteps, + async (currentState) => { + const sourceApiKeyRef = await promptText(prompts, { + message: 'Notion token ref', + placeholder: 'env:NOTION_TOKEN', + }); + if (sourceApiKeyRef === undefined) return 'back'; + currentState.sourceApiKeyRef = sourceApiKeyRef; + return 'next'; + }, + async (currentState) => { + const crawlMode = await prompts.select({ + message: 'Notion crawl mode', + options: [ + { value: 'selected_roots', label: 'Selected roots' }, + { value: 'all_accessible', label: 'All accessible pages' }, + { value: 'back', label: 'Back' }, + ], + }); + if (crawlMode === 'back') return 'back'; + currentState.notionCrawlMode = crawlMode === 'all_accessible' ? 'all_accessible' : 'selected_roots'; + if (currentState.notionCrawlMode === 'all_accessible') { + delete currentState.notionRootPageIds; + } + return 'next'; + }, + ...(state.notionCrawlMode === 'selected_roots' + ? [ + async (currentState: SourcePromptState) => { + const roots = await promptText(prompts, { + message: 'Notion root page ids', + placeholder: 'comma-separated ids', + }); + if (roots === undefined) return 'back'; + currentState.notionRootPageIds = roots + .split(',') + .map((root) => root.trim()) + .filter(Boolean); + return 'next'; + }, + ] + : []), + ]); +} + +function existingConnectionIdsBySource( + connections: Record, + source: KloSetupSourceType, +): string[] { + return Object.entries(connections) + .filter(([, connection]) => String(connection.driver ?? '').toLowerCase() === source) + .map(([connectionId]) => connectionId) + .sort((left, right) => left.localeCompare(right)); +} + +function defaultConnectionIdForSource( + connections: Record, + source: KloSetupSourceType, +): string { + const base = `${source}-main`; + if (!connections[base]) { + return base; + } + let index = 2; + while (connections[`${base}-${index}`]) { + index += 1; + } + return `${base}-${index}`; +} + +async function chooseInteractiveSourceConnection(input: { + args: KloSetupSourcesArgs; + source: KloSetupSourceType; + connections: Record; + prompts: KloSetupSourcesPromptAdapter; + testGitRepo?: KloSetupSourcesDeps['testGitRepo']; +}): Promise { + const existingIds = existingConnectionIdsBySource(input.connections, input.source); + const defaultConnectionId = defaultConnectionIdForSource(input.connections, input.source); + const label = sourceLabel(input.source); + + if (existingIds.length === 0) { + const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo); + return sourceArgs === 'back' ? 'back' : { kind: 'new', args: sourceArgs }; + } + + while (true) { + const choice = await input.prompts.select({ + message: `Configure ${label}`, + options: [ + ...existingIds.map((connectionId) => ({ + value: `existing:${connectionId}`, + label: `Use existing ${label} connection: ${connectionId}`, + })), + { value: 'new', label: `Add new ${label} connection` }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + if (choice.startsWith('existing:')) { + const connectionId = choice.slice('existing:'.length); + const connection = input.connections[connectionId]; + if (connection) { + return { kind: 'existing', connectionId, connection }; + } + continue; + } + const sourceArgs = await promptForInteractiveSource(input.args, input.source, input.prompts, defaultConnectionId, input.testGitRepo); + if (sourceArgs === 'back') { + continue; + } + return { kind: 'new', args: sourceArgs }; + } +} + +function buildConnection(source: KloSetupSourceType, args: KloSetupSourcesArgs): KloProjectConnectionConfig { + if (source === 'dbt') { + return buildDbtConnection(args); + } + if (source === 'metricflow') { + return buildMetricflowConnection(args); + } + if (source === 'metabase') { + return buildMetabaseConnection(args); + } + if (source === 'looker') { + return buildLookerConnection(args); + } + if (source === 'lookml') { + return buildLookmlConnection(args); + } + return buildNotionConnection(args); +} + +async function validateSource( + source: KloSetupSourceType, + args: { projectDir: string; connectionId: string; connection: KloProjectConnectionConfig }, + deps: KloSetupSourcesDeps, +): Promise { + if (source === 'dbt') { + return await (deps.validateDbt ?? defaultValidateDbt)(args.connection); + } + if (source === 'metricflow') { + return await (deps.validateMetricflow ?? defaultValidateMetricflow)(args.connection); + } + if (source === 'metabase') { + return await (deps.validateMetabase ?? defaultValidateMetabase)(args.projectDir, args.connectionId); + } + if (source === 'looker') { + return await (deps.validateLooker ?? defaultValidateLooker)(args.projectDir, args.connectionId); + } + if (source === 'lookml') { + return await (deps.validateLookml ?? defaultValidateLookml)(args.connection); + } + return await (deps.validateNotion ?? defaultValidateNotion)(args.connection); +} + +export async function runKloSetupSourcesStep( + args: KloSetupSourcesArgs, + io: KloCliIo, + deps: KloSetupSourcesDeps = {}, +): Promise { + try { + if (args.skipSources) { + await markSourcesComplete(args.projectDir); + io.stdout.write('Context source setup skipped.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } + + const prompts = deps.prompts ?? createPromptAdapter(); + const project = await loadKloProject({ projectDir: args.projectDir }); + if (!hasPrimarySource(project.config)) { + const message = 'Connect a primary source before adding context sources.'; + if (args.source) { + io.stderr.write(`${message}\n`); + return { status: 'failed', projectDir: args.projectDir }; + } + if (args.inputMode !== 'disabled') { + io.stdout.write(`${message}\n`); + return { status: 'skipped', projectDir: args.projectDir }; + } + } + + while (true) { + const selected = args.source + ? [args.source] + : args.inputMode === 'disabled' + ? [] + : await prompts.multiselect({ + message: withMultiselectNavigation('Which context sources should KLO ingest?'), + options: [...SOURCE_OPTIONS], + required: false, + }); + if (selected.includes('back')) { + return { status: 'back', projectDir: args.projectDir }; + } + if (selected.length === 0) { + if (args.inputMode === 'disabled') { + io.stderr.write('Missing context source selection: pass --source or --skip-sources.\n'); + return { status: 'missing-input', projectDir: args.projectDir }; + } + await markSourcesComplete(args.projectDir); + io.stdout.write('No context sources selected.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } + + const readyConnectionIds: string[] = []; + let returnToSourceSelection = false; + for (const source of selected as KloSetupSourceType[]) { + const sourceChoice = args.source + ? ({ kind: 'new', args } as const) + : await chooseInteractiveSourceConnection({ + args, + source, + connections: (await loadKloProject({ projectDir: args.projectDir })).config.connections, + prompts, + testGitRepo: deps.testGitRepo, + }); + if (sourceChoice === 'back') { + if (args.source) { + return { status: 'back', projectDir: args.projectDir }; + } + returnToSourceSelection = true; + break; + } + const connectionId = + sourceChoice.kind === 'existing' + ? sourceChoice.connectionId + : (sourceChoice.args.sourceConnectionId ?? `${source}-main`); + const connection = + sourceChoice.kind === 'existing' ? sourceChoice.connection : buildConnection(source, sourceChoice.args); + const rollback = + sourceChoice.kind === 'existing' + ? undefined + : await writeSourceConnection(args.projectDir, connectionId, connection, sourceAdapter(source)); + if (sourceChoice.kind === 'existing') { + await ensureSourceAdapterEnabled(args.projectDir, source); + } + const validation = await validateSource(source, { projectDir: args.projectDir, connectionId, connection }, deps); + + if (!validation.ok) { + await rollback?.(); + io.stderr.write(`${validation.message}\n`); + return { status: 'failed', projectDir: args.projectDir }; + } + if (source === 'metabase' || source === 'looker') { + const mappingCode = await (deps.runMapping ?? defaultRunMapping)(args.projectDir, connectionId, io); + if (mappingCode !== 0) { + await rollback?.(); + return { status: 'failed', projectDir: args.projectDir }; + } + } + if (args.runInitialSourceIngest) { + const ingestResult = await runInitialSourceIngestWithRecovery({ + args, + connectionId, + io, + prompts, + deps, + }); + if (ingestResult === 'failed') { + await rollback?.(); + return { status: 'failed', projectDir: args.projectDir }; + } + if (ingestResult === 'back') { + await rollback?.(); + if (args.source) { + return { status: 'back', projectDir: args.projectDir }; + } + returnToSourceSelection = true; + break; + } + } else { + io.stdout.write(`Context source ${connectionId} saved. It will be built during the context build step.\n`); + } + readyConnectionIds.push(connectionId); + } + + if (returnToSourceSelection) { + continue; + } + + if (readyConnectionIds.length > 0 && !args.source && args.inputMode !== 'disabled') { + const addMore = await prompts.select({ + message: `${readyConnectionIds.length} context source${readyConnectionIds.length > 1 ? 's' : ''} configured (${readyConnectionIds.join(', ')}). Add another?`, + options: [ + { value: 'done', label: 'Done — continue to context build' }, + { value: 'add', label: 'Add another context source' }, + ], + }); + if (addMore === 'add') { + continue; + } + } + + await markSourcesComplete(args.projectDir); + return { status: 'ready', projectDir: args.projectDir, connectionIds: readyConnectionIds }; + } + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return { status: 'failed', projectDir: args.projectDir }; + } +} diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts new file mode 100644 index 00000000..d29e39f5 --- /dev/null +++ b/packages/cli/src/setup.test.ts @@ -0,0 +1,1502 @@ +import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { contextBuildCommands, writeKloSetupContextState } from './setup-context.js'; +import { readKloSetupStatus, runKloSetup } from './setup.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('setup status', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-setup-status-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('reports a missing project without creating files', async () => { + const status = await readKloSetupStatus(tempDir); + + expect(status).toMatchObject({ + project: { path: tempDir, ready: false }, + llm: { ready: false }, + embeddings: { ready: false }, + databases: [], + sources: [], + context: { ready: false, status: 'not_started' }, + agents: [], + }); + }); + + it('reports deterministic default embeddings as not setup-ready', async () => { + await mkdir(tempDir, { recursive: true }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'llm:', + ' provider:', + ' backend: anthropic', + ' anthropic:', + ' api_key: env:ANTHROPIC_API_KEY', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: deterministic', + ' model: deterministic', + ' dimensions: 8', + 'connections: {}', + ].join('\n'), + 'utf-8', + ); + + await expect(readKloSetupStatus(tempDir)).resolves.toMatchObject({ + project: { path: tempDir, ready: true }, + llm: { backend: 'anthropic', ready: true, model: 'claude-sonnet-4-6' }, + embeddings: { backend: 'deterministic', ready: false, model: 'deterministic', dimensions: 8 }, + }); + }); + + it('uses setup database connection ids when present', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' - analytics', + ' completed_steps:', + ' - project', + ' - databases', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_URL', + 'ingest:', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + ' openai:', + ' api_key: env:OPENAI_API_KEY', + ].join('\n'), + 'utf-8', + ); + + await expect(readKloSetupStatus(tempDir)).resolves.toMatchObject({ + databases: [ + { connectionId: 'warehouse', ready: true }, + { connectionId: 'analytics', ready: false }, + ], + }); + }); + + it('reports selected databases as ready only after the database setup step is complete', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + + await expect(readKloSetupStatus(tempDir)).resolves.toMatchObject({ + databases: [{ connectionId: 'warehouse', ready: false }], + }); + + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + ' - databases', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + + await expect(readKloSetupStatus(tempDir)).resolves.toMatchObject({ + databases: [{ connectionId: 'warehouse', ready: true }], + }); + }); + + it('reports source status from configured source connections', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids: []', + ' completed_steps:', + ' - project', + ' - sources', + 'connections:', + ' docs:', + ' driver: notion', + ' auth_token_ref: env:NOTION_TOKEN', + ' crawl_mode: all_accessible', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + '', + ].join('\n'), + 'utf-8', + ); + + await expect(readKloSetupStatus(tempDir)).resolves.toMatchObject({ + sources: [{ connectionId: 'docs', type: 'notion', ready: true }], + }); + }); + + it('reports agent status from the install manifest', async () => { + await mkdir(join(tempDir, '.klo', 'agents'), { recursive: true }); + await writeFile(join(tempDir, 'klo.yaml'), 'project: revenue\nconnections: {}\n', 'utf-8'); + await writeFile( + join(tempDir, '.klo/agents/install-manifest.json'), + JSON.stringify( + { + version: 1, + projectDir: tempDir, + installedAt: '2026-05-07T00:00:00.000Z', + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + entries: [], + }, + null, + 2, + ), + 'utf-8', + ); + + await expect(readKloSetupStatus(tempDir)).resolves.toMatchObject({ + agents: [{ target: 'codex', scope: 'project', ready: true }], + }); + }); + + it('reports setup-managed context build status and commands', async () => { + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + ' - llm', + ' - embeddings', + ' - databases', + ' - sources', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + 'llm:', + ' provider:', + ' backend: anthropic', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + '', + ].join('\n'), + 'utf-8', + ); + await writeKloSetupContextState(tempDir, { + runId: 'setup-context-local-abc123', + status: 'running', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:01:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-abc123'), + }); + + await expect(readKloSetupStatus(tempDir)).resolves.toMatchObject({ + context: { + ready: false, + status: 'running', + runId: 'setup-context-local-abc123', + watchCommand: `klo setup context watch setup-context-local-abc123 --project-dir ${tempDir}`, + statusCommand: `klo setup context status setup-context-local-abc123 --project-dir ${tempDir}`, + }, + }); + }); + + it('prints plain and JSON setup status', async () => { + const plainIo = makeIo(); + const jsonIo = makeIo(); + + await expect(runKloSetup({ command: 'status', projectDir: tempDir, json: false }, plainIo.io)).resolves.toBe(0); + await expect(runKloSetup({ command: 'status', projectDir: tempDir, json: true }, jsonIo.io)).resolves.toBe(0); + + expect(plainIo.stdout()).toContain(`No KLO project found at ${tempDir}.`); + expect(plainIo.stdout()).toContain('Check another project: klo --project-dir setup status'); + expect(plainIo.stdout()).toContain('Or from that folder: klo setup status'); + expect(plainIo.stdout()).toContain('Create a new KLO project here: klo setup'); + expect(plainIo.stdout()).not.toContain('Project ready: no'); + expect(JSON.parse(jsonIo.stdout())).toMatchObject({ project: { path: tempDir, ready: false } }); + expect(plainIo.stderr()).toBe(''); + expect(jsonIo.stderr()).toBe(''); + }); + + it('prints the readiness checklist for an existing project', async () => { + const testIo = makeIo(); + await writeFile(join(tempDir, 'klo.yaml'), 'project: revenue\nconnections: {}\n', 'utf-8'); + + await expect(runKloSetup({ command: 'status', projectDir: tempDir, json: false }, testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain(`KLO project: ${tempDir}`); + expect(testIo.stdout()).toContain('Project ready: yes'); + expect(testIo.stdout()).toContain('LLM ready: no'); + expect(testIo.stdout()).toContain('KLO context built: no'); + expect(testIo.stdout()).not.toContain('No KLO project found.'); + expect(testIo.stderr()).toBe(''); + }); + + it('prints the setup shell intro for auto-created run mode', async () => { + const testIo = makeIo(); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: true, + skipLlm: true, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('KLO setup'); + expect(testIo.stdout()).toContain(`Project: ${tempDir}`); + expect(testIo.stdout()).toContain('Project ready: yes'); + expect(testIo.stdout()).toContain('What you can do next:'); + expect(testIo.stdout()).toContain('Connect data, then build context.'); + expect(testIo.stdout()).toContain('klo setup'); + expect(testIo.stdout()).not.toContain('klo agent context --json'); + expect(testIo.stdout()).not.toContain('Optional MCP:'); + expect(testIo.stderr()).toBe(''); + }); + + it('shows demo near the bottom of the first setup intent menu before project creation', async () => { + const testIo = makeIo(); + const select = vi.fn(async (options: { options: Array<{ value: string; label: string }> }) => { + const labels = options.options.map((option) => option.label); + expect(labels).toEqual([ + 'Set up KLO for my data', + 'Check setup status', + 'Try KLO with packaged demo data', + 'Exit', + ]); + expect(labels.indexOf('Try KLO with packaged demo data')).toBe(labels.length - 2); + return 'exit'; + }); + const cancel = vi.fn(); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: false, + skipSources: false, + showEntryMenu: true, + }, + testIo.io, + { entryMenuDeps: { prompts: { select, cancel } } }, + ), + ).resolves.toBe(0); + + expect(select).toHaveBeenCalledWith(expect.objectContaining({ message: 'What do you want to do?' })); + expect(cancel).toHaveBeenCalledWith('Setup cancelled.'); + }); + + it('shows agent connection only when the selected setup project exists', async () => { + const missingIo = makeIo(); + const existingIo = makeIo(); + const missingSelect = vi.fn(async (options: { options: Array<{ value: string; label: string }> }) => { + expect(options.options.map((option) => option.label)).not.toContain('Connect a coding agent to KLO'); + return 'exit'; + }); + const existingSelect = vi.fn(async (options: { options: Array<{ value: string; label: string }> }) => { + const labels = options.options.map((option) => option.label); + expect(labels).toEqual([ + 'Resume or change an existing setup', + 'Create a new KLO project', + 'Connect a coding agent to KLO', + 'Check setup status', + 'Try KLO with packaged demo data', + 'Exit', + ]); + return 'exit'; + }); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: false, + skipSources: false, + showEntryMenu: true, + }, + missingIo.io, + { entryMenuDeps: { prompts: { select: missingSelect, cancel: vi.fn() } } }, + ), + ).resolves.toBe(0); + + await writeFile(join(tempDir, 'klo.yaml'), 'project: revenue\nconnections: {}\n', 'utf-8'); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: false, + skipSources: false, + showEntryMenu: true, + }, + existingIo.io, + { entryMenuDeps: { prompts: { select: existingSelect, cancel: vi.fn() } } }, + ), + ).resolves.toBe(0); + + expect(missingSelect).toHaveBeenCalledTimes(1); + expect(existingSelect).toHaveBeenCalledTimes(1); + }); + + it('lets Back from project selection return to the first setup intent menu', async () => { + const entryChoices = ['setup', 'exit']; + const entryPrompts = { + select: vi.fn(async () => entryChoices.shift() ?? 'exit'), + cancel: vi.fn(), + }; + const projectPrompts = { + select: vi.fn(async () => 'back'), + text: vi.fn(), + cancel: vi.fn(), + }; + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'auto', + yes: false, + skipLlm: true, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + showEntryMenu: true, + }, + makeIo().io, + { + entryMenuDeps: { prompts: entryPrompts }, + project: { prompts: projectPrompts }, + }, + ), + ).resolves.toBe(0); + + expect(projectPrompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Which KLO project should setup use?', + options: expect.arrayContaining([expect.objectContaining({ value: 'back', label: 'Back' })]), + }), + ); + expect(projectPrompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Which KLO project should setup use?', + options: expect.not.arrayContaining([expect.objectContaining({ value: 'exit', label: 'Exit' })]), + }), + ); + expect(entryPrompts.select).toHaveBeenCalledTimes(2); + expect(entryPrompts.cancel).toHaveBeenCalledWith('Setup cancelled.'); + expect(projectPrompts.cancel).not.toHaveBeenCalled(); + await expect(stat(join(tempDir, 'klo.yaml'))).rejects.toThrow(); + }); + + it('lets Back from new project creation return to the first setup intent menu', async () => { + const existingConfig = 'project: revenue\nconnections: {}\n'; + await writeFile(join(tempDir, 'klo.yaml'), existingConfig, 'utf-8'); + + const entryChoices = ['new-project', 'exit']; + const entryPrompts = { + select: vi.fn(async () => entryChoices.shift() ?? 'exit'), + cancel: vi.fn(), + }; + const projectPrompts = { + select: vi.fn(async () => 'back'), + text: vi.fn(), + cancel: vi.fn(), + }; + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'auto', + yes: false, + skipLlm: true, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + showEntryMenu: true, + }, + makeIo().io, + { + entryMenuDeps: { prompts: entryPrompts }, + project: { prompts: projectPrompts }, + }, + ), + ).resolves.toBe(0); + + expect(projectPrompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Where should KLO create the project?', + options: expect.arrayContaining([expect.objectContaining({ value: 'back', label: 'Back' })]), + }), + ); + expect(entryPrompts.select).toHaveBeenCalledTimes(2); + expect(entryPrompts.cancel).toHaveBeenCalledWith('Setup cancelled.'); + expect(projectPrompts.cancel).not.toHaveBeenCalled(); + await expect(readFile(join(tempDir, 'klo.yaml'), 'utf-8')).resolves.toBe(existingConfig); + }); + + it('creates a separate project when the existing setup menu chooses new project', async () => { + const existingProjectDir = join(tempDir, 'existing'); + const newProjectDir = join(tempDir, 'fresh'); + await mkdir(existingProjectDir, { recursive: true }); + const existingConfig = 'project: revenue\nconnections: {}\n'; + await writeFile(join(existingProjectDir, 'klo.yaml'), existingConfig, 'utf-8'); + + const projectChoices = ['custom', 'create']; + const projectPrompts = { + select: vi.fn(async () => projectChoices.shift() ?? 'exit'), + text: vi.fn(async () => newProjectDir), + cancel: vi.fn(), + }; + const model = vi.fn(async (args: { projectDir: string }) => ({ + status: 'skipped' as const, + projectDir: args.projectDir, + })); + const embeddings = vi.fn(async (args: { projectDir: string }) => ({ + status: 'skipped' as const, + projectDir: args.projectDir, + })); + const databases = vi.fn(async (args: { projectDir: string }) => ({ + status: 'skipped' as const, + projectDir: args.projectDir, + })); + const sources = vi.fn(async (args: { projectDir: string }) => ({ + status: 'skipped' as const, + projectDir: args.projectDir, + })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: existingProjectDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'auto', + yes: false, + skipLlm: true, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + showEntryMenu: true, + }, + makeIo().io, + { + entryMenuDeps: { prompts: { select: vi.fn(async () => 'new-project'), cancel: vi.fn() } }, + project: { prompts: projectPrompts }, + model, + embeddings, + databases, + sources, + }, + ), + ).resolves.toBe(0); + + expect(projectPrompts.text).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Project folder path\nPress Escape to go back.\n', + placeholder: './analytics-klo, ~/analytics-klo, or /Users/you/projects/analytics-klo', + }), + ); + expect(projectPrompts.select).toHaveBeenCalledWith( + expect.objectContaining({ message: 'Where should KLO create the project?' }), + ); + await expect(stat(join(newProjectDir, 'klo.yaml'))).resolves.toBeDefined(); + await expect(readFile(join(existingProjectDir, 'klo.yaml'), 'utf-8')).resolves.toBe(existingConfig); + expect(model).toHaveBeenCalledWith(expect.objectContaining({ projectDir: newProjectDir }), expect.anything()); + expect(embeddings).toHaveBeenCalledWith(expect.objectContaining({ projectDir: newProjectDir }), expect.anything()); + expect(databases).toHaveBeenCalledWith(expect.objectContaining({ projectDir: newProjectDir }), expect.anything()); + expect(sources).toHaveBeenCalledWith(expect.objectContaining({ projectDir: newProjectDir }), expect.anything()); + }); + + it('does not print navigation instructions immediately after confirming new project creation', async () => { + const existingProjectDir = join(tempDir, 'existing'); + const newProjectDir = join(tempDir, 'fresh'); + await mkdir(existingProjectDir, { recursive: true }); + await writeFile(join(existingProjectDir, 'klo.yaml'), 'project: revenue\nconnections: {}\n', 'utf-8'); + + const projectChoices = ['custom', 'create']; + const projectPrompts = { + select: vi.fn(async () => projectChoices.shift() ?? 'exit'), + text: vi.fn(async () => newProjectDir), + cancel: vi.fn(), + }; + const model = vi.fn(async (args: { projectDir: string; showPromptInstructions?: boolean }) => { + expect(args.showPromptInstructions).toBe(false); + return { status: 'skipped' as const, projectDir: args.projectDir }; + }); + const testIo = makeIo(); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: existingProjectDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + showEntryMenu: true, + }, + testIo.io, + { + entryMenuDeps: { prompts: { select: vi.fn(async () => 'new-project'), cancel: vi.fn() } }, + project: { prompts: projectPrompts }, + model, + }, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain(`Project: ${newProjectDir}\n`); + expect(testIo.stdout()).not.toContain( + 'Use Up/Down to move, Enter to confirm the current selection, choose Back to return to the previous step, Ctrl+C to exit.', + ); + }); + + it('runs the seeded demo when the first setup intent menu chooses packaged demo data', async () => { + const testIo = makeIo(); + const demo = vi.fn(async (_args: { projectDir: string }, _io: unknown) => 0); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: false, + skipSources: false, + showEntryMenu: true, + }, + testIo.io, + { entryMenuDeps: { prompts: { select: vi.fn(async () => 'demo'), cancel: vi.fn() } }, demo }, + ), + ).resolves.toBe(0); + + expect(demo).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'seeded', + outputMode: 'viz', + inputMode: 'auto', + }), + testIo.io, + ); + expect(demo.mock.calls[0]?.[0].projectDir).toMatch(/klo-demo-/); + }); + + it('creates a project through run mode when --new is selected', async () => { + const testIo = makeIo(); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + skipLlm: true, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + ), + ).resolves.toBe(0); + + await expect(stat(join(tempDir, 'klo.yaml'))).resolves.toBeDefined(); + expect(await readFile(join(tempDir, 'klo.yaml'), 'utf-8')).toContain('completed_steps:'); + expect(testIo.stdout()).toContain('KLO setup'); + expect(testIo.stdout()).toContain(`Project: ${tempDir}`); + expect(testIo.stdout()).toContain('Project ready: yes'); + expect(testIo.stderr()).toBe(''); + }); + + it('returns nonzero when project selection is missing in no-input mode even when optional sections are skipped', async () => { + const testIo = makeIo(); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + skipLlm: true, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + ), + ).resolves.toBe(1); + + expect(testIo.stderr()).toContain('Missing setup choice'); + await expect(stat(join(tempDir, 'klo.yaml'))).rejects.toThrow(); + }); + + it('returns nonzero when project selection is missing in non-interactive setup', async () => { + const testIo = makeIo(); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + skipLlm: false, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + ), + ).resolves.toBe(1); + + expect(testIo.stderr()).toContain('Missing setup choice'); + await expect(stat(join(tempDir, 'klo.yaml'))).rejects.toThrow(); + }); + + it('runs the Anthropic model step after project selection succeeds', async () => { + const testIo = makeIo(); + const model = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + { model }, + ), + ).resolves.toBe(0); + + expect(model).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + inputMode: 'disabled', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }), + testIo.io, + ); + }); + + it('runs the embedding setup step after the model step succeeds', async () => { + const testIo = makeIo(); + const model = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir })); + const embeddings = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + embeddingBackend: 'openai', + embeddingApiKeyEnv: 'OPENAI_API_KEY', + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + { model, embeddings }, + ), + ).resolves.toBe(0); + + expect(embeddings).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + inputMode: 'disabled', + embeddingBackend: 'openai', + embeddingApiKeyEnv: 'OPENAI_API_KEY', + skipEmbeddings: false, + }), + testIo.io, + ); + }); + + it('lets Back from embedding setup return to the model step instead of exiting', async () => { + const testIo = makeIo(); + const modelResults = [ + { status: 'ready' as const, projectDir: tempDir }, + { status: 'back' as const, projectDir: tempDir }, + ]; + const model = vi.fn(async () => modelResults.shift() ?? { status: 'back' as const, projectDir: tempDir }); + const embeddings = vi.fn(async () => ({ status: 'back' as const, projectDir: tempDir })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + { model, embeddings }, + ), + ).resolves.toBe(0); + + expect(model).toHaveBeenCalledTimes(2); + expect(model).toHaveBeenNthCalledWith(2, expect.objectContaining({ forcePrompt: true }), testIo.io); + expect(embeddings).toHaveBeenCalledTimes(1); + }); + + it('lets Back from database selection return to embedding setup after an empty selection warning', async () => { + const testIo = makeIo(); + const modelResults = [ + { status: 'ready' as const, projectDir: tempDir }, + { status: 'back' as const, projectDir: tempDir }, + ]; + const model = vi.fn(async () => modelResults.shift() ?? { status: 'back' as const, projectDir: tempDir }); + const embeddingResults = [ + { status: 'ready' as const, projectDir: tempDir }, + { status: 'back' as const, projectDir: tempDir }, + ]; + const embeddings = vi.fn(async () => embeddingResults.shift() ?? { status: 'back' as const, projectDir: tempDir }); + const databaseMultiselectValues = [[], ['back']]; + const databasePrompts = { + multiselect: vi.fn(async () => databaseMultiselectValues.shift() ?? ['back']), + select: vi.fn(async () => 'back'), + text: vi.fn(), + password: vi.fn(), + cancel: vi.fn(), + }; + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: false, + skipSources: true, + }, + testIo.io, + { + model, + embeddings, + databasesDeps: { prompts: databasePrompts }, + }, + ), + ).resolves.toBe(0); + + expect(databasePrompts.select).not.toHaveBeenCalled(); + expect(testIo.stdout()).toContain( + 'KLO cannot work without at least one primary source. Select a source or press Escape to go back.', + ); + expect(embeddings).toHaveBeenCalledTimes(2); + expect(embeddings).toHaveBeenNthCalledWith(2, expect.objectContaining({ forcePrompt: true }), testIo.io); + expect(testIo.stderr()).not.toContain('No primary sources selected.'); + }); + + it('lets Back from the first setup step return to the entry menu instead of exiting', async () => { + await writeFile(join(tempDir, 'klo.yaml'), 'project: test\nconnections: {}\n', 'utf-8'); + const testIo = makeIo(); + + const entryChoices = ['setup', 'exit']; + const entryPrompts = { + select: vi.fn(async () => entryChoices.shift() ?? 'exit'), + cancel: vi.fn(), + }; + const model = vi.fn(async () => ({ status: 'back' as const, projectDir: tempDir })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + showEntryMenu: true, + }, + testIo.io, + { + entryMenuDeps: { prompts: entryPrompts }, + model, + }, + ), + ).resolves.toBe(0); + + expect(entryPrompts.select).toHaveBeenCalledTimes(2); + expect(entryPrompts.cancel).toHaveBeenCalledWith('Setup cancelled.'); + expect(model).toHaveBeenCalledTimes(1); + }); + + it('runs database setup after embeddings succeed', async () => { + const testIo = makeIo(); + const model = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir })); + const embeddings = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir })); + const databases = vi.fn(async () => ({ + status: 'ready' as const, + projectDir: tempDir, + connectionIds: ['warehouse'], + })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + embeddingBackend: 'openai', + embeddingApiKeyEnv: 'OPENAI_API_KEY', + skipEmbeddings: false, + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: ['public'], + skipDatabases: false, + skipSources: true, + }, + testIo.io, + { model, embeddings, databases }, + ), + ).resolves.toBe(0); + + expect(databases).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: ['public'], + skipDatabases: false, + }), + testIo.io, + ); + }); + + it('runs sources after database setup', async () => { + const calls: string[] = []; + const io = makeIo(); + await writeFile(join(tempDir, 'klo.yaml'), ['project: revenue', 'connections: {}', ''].join('\n'), 'utf-8'); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: true, + skipLlm: true, + skipEmbeddings: true, + skipDatabases: true, + skipSources: true, + databaseSchemas: [], + }, + io.io, + { + model: async () => { + calls.push('model'); + return { status: 'skipped', projectDir: tempDir }; + }, + embeddings: async () => { + calls.push('embeddings'); + return { status: 'skipped', projectDir: tempDir }; + }, + databases: async () => { + calls.push('databases'); + return { status: 'skipped', projectDir: tempDir }; + }, + sources: async (args) => { + expect(args.runInitialSourceIngest).toBe(false); + calls.push('sources'); + return { status: 'skipped', projectDir: tempDir }; + }, + }, + ), + ).resolves.toBe(0); + + expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources']); + }); + + it('runs context after sources and before agents in full setup', async () => { + const calls: string[] = []; + const io = makeIo(); + await writeFile(join(tempDir, 'klo.yaml'), ['project: revenue', 'connections: {}', ''].join('\n'), 'utf-8'); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'disabled', + yes: true, + skipLlm: true, + skipEmbeddings: true, + skipDatabases: true, + skipSources: true, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + model: async () => { + calls.push('model'); + return { status: 'skipped', projectDir: tempDir }; + }, + embeddings: async () => { + calls.push('embeddings'); + return { status: 'skipped', projectDir: tempDir }; + }, + databases: async () => { + calls.push('databases'); + return { status: 'skipped', projectDir: tempDir }; + }, + sources: async () => { + calls.push('sources'); + return { status: 'skipped', projectDir: tempDir }; + }, + context: async () => { + calls.push('context'); + return { status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }; + }, + agents: async () => { + calls.push('agents'); + return { + status: 'ready', + projectDir: tempDir, + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources', 'context', 'agents']); + }); + + it('runs agent setup after context succeeds in --agents mode', async () => { + const calls: string[] = []; + const io = makeIo(); + await writeFile(join(tempDir, 'klo.yaml'), ['project: revenue', 'connections: {}', ''].join('\n'), 'utf-8'); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: true, + target: 'codex', + agentScope: 'project', + agentInstallMode: 'cli', + inputMode: 'disabled', + yes: true, + skipLlm: true, + skipEmbeddings: true, + skipDatabases: true, + skipSources: true, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + model: async () => ({ status: 'skipped', projectDir: tempDir }), + embeddings: async () => ({ status: 'skipped', projectDir: tempDir }), + databases: async () => ({ status: 'skipped', projectDir: tempDir }), + sources: async () => ({ status: 'skipped', projectDir: tempDir }), + context: async () => { + calls.push('context'); + return { status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }; + }, + agents: async () => { + calls.push('agents'); + return { + status: 'ready', + projectDir: tempDir, + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(calls).toEqual(['context', 'agents']); + }); + + it('does not install agents when non-interactive --agents finds context incomplete', async () => { + const io = makeIo(); + const agents = vi.fn(async () => ({ + status: 'ready' as const, + projectDir: tempDir, + installs: [{ target: 'codex' as const, scope: 'project' as const, mode: 'cli' as const }], + })); + await writeFile(join(tempDir, 'klo.yaml'), ['project: revenue', 'connections: {}', ''].join('\n'), 'utf-8'); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: true, + target: 'codex', + agentScope: 'project', + agentInstallMode: 'cli', + inputMode: 'disabled', + yes: true, + skipLlm: true, + skipEmbeddings: true, + skipDatabases: true, + skipSources: true, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + context: async () => ({ status: 'skipped', projectDir: tempDir }), + agents, + }, + ), + ).resolves.toBe(1); + + expect(agents).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('KLO context is not ready for agents.'); + }); + + it('does not install agents when full setup context build is detached', async () => { + const calls: string[] = []; + const io = makeIo(); + await writeFile(join(tempDir, 'klo.yaml'), ['project: revenue', 'connections: {}', ''].join('\n'), 'utf-8'); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'disabled', + yes: true, + skipLlm: true, + skipEmbeddings: true, + skipDatabases: true, + skipSources: true, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + context: async () => { + calls.push('context'); + return { status: 'detached', projectDir: tempDir, runId: 'setup-context-local-test' }; + }, + agents: async () => { + calls.push('agents'); + return { + status: 'ready', + projectDir: tempDir, + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(calls).toEqual(['context']); + }); + + it('routes a ready project menu selection to agent setup', async () => { + const calls: string[] = []; + const io = makeIo(); + await mkdir(join(tempDir, '.klo', 'agents'), { recursive: true }); + await writeFile( + join(tempDir, 'klo.yaml'), + [ + 'project: revenue', + 'setup:', + ' completed_steps:', + ' - project', + ' - llm', + ' - embeddings', + ' - sources', + ' - context', + ' - agents', + ' database_connection_ids: []', + 'connections: {}', + 'llm:', + ' provider:', + ' backend: anthropic', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + '', + ].join('\n'), + 'utf-8', + ); + await writeFile( + join(tempDir, '.klo/agents/install-manifest.json'), + JSON.stringify( + { + version: 1, + projectDir: tempDir, + installedAt: '2026-05-07T00:00:00.000Z', + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + entries: [], + }, + null, + 2, + ), + 'utf-8', + ); + await writeKloSetupContextState(tempDir, { + runId: 'setup-context-local-ready', + status: 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:02:00.000Z', + completedAt: '2026-05-09T10:02:00.000Z', + primarySourceConnectionIds: [], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-ready'), + }); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + inputMode: 'auto', + yes: false, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + readyMenuDeps: { prompts: { select: vi.fn(async () => 'agents'), cancel: vi.fn() } }, + model: async (args) => { + expect(args.skipLlm).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + embeddings: async (args) => { + expect(args.skipEmbeddings).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + databases: async (args) => { + expect(args.skipDatabases).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + sources: async (args) => { + expect(args.skipSources).toBe(true); + return { status: 'skipped', projectDir: tempDir }; + }, + agents: async () => { + calls.push('agents'); + return { + status: 'ready', + projectDir: tempDir, + installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(calls).toEqual(['agents']); + }); + + it('runs only project resolution, context gate, and agent setup in --agents mode', async () => { + const io = makeIo(); + const context = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-local-test' })); + const agents = vi.fn(async () => ({ + status: 'ready' as const, + projectDir: tempDir, + installs: [{ target: 'universal' as const, scope: 'project' as const, mode: 'both' as const }], + })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: true, + target: 'universal', + agentScope: 'project', + agentInstallMode: 'both', + inputMode: 'disabled', + yes: true, + skipLlm: false, + skipEmbeddings: false, + skipDatabases: false, + skipSources: false, + skipAgents: false, + databaseSchemas: [], + }, + io.io, + { + model: async () => { + throw new Error('model should not run'); + }, + context, + agents, + }, + ), + ).resolves.toBe(0); + + expect(context).toHaveBeenCalledTimes(1); + expect(agents).toHaveBeenCalledTimes(1); + }); + + it('removes agent integrations through setup remove command', async () => { + const io = makeIo(); + const removeAgents = vi.fn(async () => 0); + + await expect(runKloSetup({ command: 'remove-agents', projectDir: tempDir }, io.io, { removeAgents })).resolves.toBe( + 0, + ); + + expect(removeAgents).toHaveBeenCalledWith(tempDir, io.io); + }); + + it('does not run embedding setup when the model step fails', async () => { + const testIo = makeIo(); + const model = vi.fn(async () => ({ status: 'failed' as const, projectDir: tempDir })); + const embeddings = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir })); + + await expect( + runKloSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + skipEmbeddings: false, + databaseSchemas: [], + skipDatabases: true, + }, + testIo.io, + { model, embeddings }, + ), + ).resolves.toBe(1); + + expect(embeddings).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts new file mode 100644 index 00000000..9d23e4af --- /dev/null +++ b/packages/cli/src/setup.ts @@ -0,0 +1,713 @@ +import { existsSync } from 'node:fs'; +import { join, resolve } from 'node:path'; +import { cancel, isCancel, select } from '@clack/prompts'; +import { loadKloProject } from '@klo/context/project'; +import type { KloCliIo } from './cli-runtime.js'; +import type { KloDemoArgs } from './demo.js'; +import { defaultDemoProjectDir } from './demo-assets.js'; +import { formatSetupNextStepLines } from './next-steps.js'; +import { isKloSetupExitError, withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { + type KloAgentInstallMode, + type KloAgentScope, + type KloAgentTarget, + type KloSetupAgentsDeps, + readKloAgentInstallManifest, + removeKloAgentInstall, + runKloSetupAgentsStep, +} from './setup-agents.js'; +import { + type KloSetupDatabaseDriver, + type KloSetupDatabasesDeps, + runKloSetupDatabasesStep, +} from './setup-databases.js'; +import { type KloSetupEmbeddingsDeps, runKloSetupEmbeddingsStep } from './setup-embeddings.js'; +import { type KloSetupModelDeps, runKloSetupAnthropicModelStep } from './setup-models.js'; +import { type KloSetupProjectDeps, runKloSetupProjectStep } from './setup-project.js'; +import { isKloSetupReady, type KloSetupReadyMenuDeps, runKloSetupReadyChangeMenu } from './setup-ready-menu.js'; +import { type KloSetupSourcesDeps, type KloSetupSourceType, runKloSetupSourcesStep } from './setup-sources.js'; +import { withMenuOptionsSpacing } from './prompt-navigation.js'; +import { + readKloSetupContextState, + runKloSetupContextCommand, + type KloSetupContextCommandArgs, + type KloSetupContextDeps, + type KloSetupContextResult, + runKloSetupContextStep, + setupContextStatusFromState, + type KloSetupContextStatusSummary, +} from './setup-context.js'; + +export interface KloSetupStatus { + project: { path: string; ready: boolean; name?: string }; + llm: { backend?: string; ready: boolean; model?: string }; + embeddings: { backend?: string; ready: boolean; model?: string; dimensions?: number }; + databases: Array<{ connectionId: string; ready: boolean }>; + sources: Array<{ connectionId: string; type: string; ready: boolean }>; + context: KloSetupContextStatusSummary; + agents: Array<{ target: string; scope: string; ready: boolean }>; +} + +export type KloSetupArgs = + | { + command: 'run'; + projectDir: string; + mode: 'auto' | 'new' | 'existing'; + agents: boolean; + target?: KloAgentTarget; + agentScope?: KloAgentScope; + agentInstallMode?: KloAgentInstallMode; + skipAgents?: boolean; + inputMode: 'auto' | 'disabled'; + yes: boolean; + anthropicApiKeyEnv?: string; + anthropicApiKeyFile?: string; + anthropicModel?: string; + skipLlm: boolean; + embeddingBackend?: 'openai' | 'sentence-transformers'; + embeddingApiKeyEnv?: string; + embeddingApiKeyFile?: string; + skipEmbeddings: boolean; + databaseDrivers?: KloSetupDatabaseDriver[]; + databaseConnectionIds?: string[]; + databaseConnectionId?: string; + databaseUrl?: string; + databaseSchemas: string[]; + enableHistoricSql?: boolean; + disableHistoricSql?: boolean; + historicSqlWindowDays?: number; + historicSqlMinCalls?: number; + historicSqlServiceAccountPatterns?: string[]; + historicSqlRedactionPatterns?: string[]; + skipDatabases: boolean; + source?: KloSetupSourceType; + sourceConnectionId?: string; + sourcePath?: string; + sourceGitUrl?: string; + sourceBranch?: string; + sourceSubpath?: string; + sourceAuthTokenRef?: string; + sourceUrl?: string; + sourceApiKeyRef?: string; + sourceClientId?: string; + sourceClientSecretRef?: string; + sourceWarehouseConnectionId?: string; + sourceProjectName?: string; + sourceProfilesPath?: string; + sourceTarget?: string; + metabaseDatabaseId?: number; + notionCrawlMode?: 'all_accessible' | 'selected_roots'; + notionRootPageIds?: string[]; + runInitialSourceIngest?: boolean; + skipSources?: boolean; + showEntryMenu?: boolean; + } + | { command: 'status'; projectDir: string; json: boolean } + | { command: 'context-build'; projectDir: string; inputMode: 'auto' | 'disabled' } + | { command: 'context-watch'; projectDir: string; runId?: string; inputMode: 'auto' | 'disabled' } + | { command: 'context-status'; projectDir: string; runId?: string; json: boolean } + | { command: 'context-stop'; projectDir: string; runId?: string } + | { command: 'remove-agents'; projectDir: string }; + +export interface KloSetupDeps { + project?: KloSetupProjectDeps; + model?: ( + args: Parameters[0], + io: KloCliIo, + ) => Promise>>; + modelDeps?: KloSetupModelDeps; + embeddings?: ( + args: Parameters[0], + io: KloCliIo, + ) => Promise>>; + embeddingsDeps?: KloSetupEmbeddingsDeps; + databases?: ( + args: Parameters[0], + io: KloCliIo, + ) => Promise>>; + databasesDeps?: KloSetupDatabasesDeps; + sources?: ( + args: Parameters[0], + io: KloCliIo, + ) => Promise>>; + sourcesDeps?: KloSetupSourcesDeps; + agents?: ( + args: Parameters[0], + io: KloCliIo, + ) => Promise>>; + agentsDeps?: KloSetupAgentsDeps; + context?: (args: Parameters[0], io: KloCliIo) => Promise; + contextDeps?: KloSetupContextDeps; + removeAgents?: typeof removeKloAgentInstall; + readyMenuDeps?: KloSetupReadyMenuDeps; + entryMenuDeps?: KloSetupEntryMenuDeps; + demo?: (args: KloDemoArgs, io: KloCliIo) => Promise; +} + +const SOURCE_DRIVERS = new Set(['dbt', 'metricflow', 'metabase', 'looker', 'lookml', 'notion']); + +type KloSetupEntryAction = 'setup' | 'new-project' | 'agents' | 'status' | 'demo' | 'exit'; +type KloSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents'; +type KloSetupFlowStatus = + | 'ready' + | 'skipped' + | 'back' + | 'missing-input' + | 'failed' + | 'detached' + | 'paused' + | 'interrupted'; + +export interface KloSetupEntryMenuPromptAdapter { + select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + cancel(message: string): void; +} + +export interface KloSetupEntryMenuDeps { + prompts?: KloSetupEntryMenuPromptAdapter; +} + +function createEntryMenuPromptAdapter(): KloSetupEntryMenuPromptAdapter { + return { + async select(options) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); + if (isCancel(value)) { + return 'exit'; + } + return String(value); + }, + cancel(message) { + cancel(message); + }, + }; +} + +async function runKloSetupEntryMenu( + status: KloSetupStatus, + deps: KloSetupEntryMenuDeps = {}, +): Promise<{ action: KloSetupEntryAction }> { + const prompts = deps.prompts ?? createEntryMenuPromptAdapter(); + const options = status.project.ready + ? [ + { value: 'setup', label: 'Resume or change an existing setup' }, + { value: 'new-project', label: 'Create a new KLO project' }, + { value: 'agents', label: 'Connect a coding agent to KLO' }, + { value: 'status', label: 'Check setup status' }, + { value: 'demo', label: 'Try KLO with packaged demo data' }, + { value: 'exit', label: 'Exit' }, + ] + : [ + { value: 'setup', label: 'Set up KLO for my data' }, + { value: 'status', label: 'Check setup status' }, + { value: 'demo', label: 'Try KLO with packaged demo data' }, + { value: 'exit', label: 'Exit' }, + ]; + const action = (await prompts.select({ + message: 'What do you want to do?', + options, + })) as KloSetupEntryAction; + return { action }; +} + +async function runKloSetupDemoFromEntryMenu( + args: Extract, + io: KloCliIo, + deps: KloSetupDeps, +): Promise { + const runner = deps.demo ?? (await import('./demo.js')).runKloDemo; + return await runner( + { + command: 'seeded', + projectDir: defaultDemoProjectDir(), + outputMode: 'viz', + inputMode: args.inputMode, + }, + io, + ); +} + +function llmReady(status: KloSetupStatus['llm']): boolean { + return status.backend === 'anthropic' && typeof status.model === 'string' && status.model.length > 0; +} + +function embeddingsReady(status: KloSetupStatus['embeddings']): boolean { + return ( + status.backend !== undefined && + status.backend !== 'none' && + status.backend !== 'deterministic' && + typeof status.model === 'string' && + status.model.length > 0 && + typeof status.dimensions === 'number' && + status.dimensions > 0 + ); +} + +function sourceConnections(config: Awaited>['config']) { + return Object.entries(config.connections) + .filter(([, connection]) => SOURCE_DRIVERS.has(String(connection.driver ?? '').toLowerCase())) + .map(([connectionId, connection]) => ({ + connectionId, + type: String(connection.driver).toLowerCase(), + })) + .sort((left, right) => left.connectionId.localeCompare(right.connectionId)); +} + +export async function readKloSetupStatus(projectDir: string): Promise { + const resolvedProjectDir = resolve(projectDir); + if (!existsSync(join(resolvedProjectDir, 'klo.yaml'))) { + return { + project: { path: resolvedProjectDir, ready: false }, + llm: { ready: false }, + embeddings: { ready: false }, + databases: [], + sources: [], + context: setupContextStatusFromState(await readKloSetupContextState(resolvedProjectDir)), + agents: [], + }; + } + + const project = await loadKloProject({ projectDir: resolvedProjectDir }); + const llm = { + backend: project.config.llm.provider.backend, + ready: false, + model: project.config.llm.models.default, + }; + llm.ready = llmReady(llm); + + const embeddings = { + backend: project.config.ingest.embeddings.backend, + ready: false, + model: project.config.ingest.embeddings.model, + dimensions: project.config.ingest.embeddings.dimensions, + }; + embeddings.ready = embeddingsReady(embeddings); + + const completedSteps = project.config.setup?.completed_steps ?? []; + const contextState = await readKloSetupContextState(resolvedProjectDir); + const databaseIds = project.config.setup?.database_connection_ids ?? Object.keys(project.config.connections); + const databasesComplete = completedSteps.includes('databases'); + const manifest = await readKloAgentInstallManifest(resolvedProjectDir); + const agents = + manifest?.installs.map((install) => ({ + target: install.target, + scope: install.scope, + ready: true, + })) ?? []; + + return { + project: { path: resolvedProjectDir, ready: true, name: project.config.project }, + llm, + embeddings, + databases: databaseIds.map((connectionId) => ({ + connectionId, + ready: databasesComplete && Object.hasOwn(project.config.connections, connectionId), + })), + sources: sourceConnections(project.config).map((source) => ({ + ...source, + ready: completedSteps.includes('sources'), + })), + context: setupContextStatusFromState(contextState, { completedStep: completedSteps.includes('context') }), + agents, + }; +} + +function formatReady(value: boolean): 'yes' | 'no' { + return value ? 'yes' : 'no'; +} + +function formatConnectionList(ids: string[]): string { + return ids.length > 0 ? `yes (${ids.join(', ')})` : 'no'; +} + +function formatContextBuilt(status: KloSetupContextStatusSummary): string { + if (status.ready) { + return 'yes'; + } + if (status.status === 'not_started') { + return 'no'; + } + const runSuffix = status.runId ? ` (${status.runId})` : ''; + return `${status.status.replaceAll('_', ' ')}${runSuffix}`; +} + +export function formatKloSetupStatus(status: KloSetupStatus): string { + if (!status.project.ready) { + return [ + `No KLO project found at ${status.project.path}.`, + '', + 'Check another project: klo --project-dir setup status', + 'Or from that folder: klo setup status', + 'Create a new KLO project here: klo setup', + '', + ].join('\n'); + } + + const lines = [ + `KLO project: ${status.project.path}`, + `Project ready: ${formatReady(status.project.ready)}`, + `LLM ready: ${formatReady(status.llm.ready)}${status.llm.model ? ` (${status.llm.model})` : ''}`, + `Embeddings ready: ${formatReady(status.embeddings.ready)}${ + status.embeddings.model ? ` (${status.embeddings.model})` : '' + }`, + `Primary sources configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`, + `Context sources configured: ${formatConnectionList(status.sources.map((source) => source.connectionId))}`, + `KLO context built: ${formatContextBuilt(status.context)}`, + `Agent integration ready: ${formatReady(status.agents.some((agent) => agent.ready))}${ + status.agents.length > 0 ? ` (${status.agents.map((agent) => `${agent.target}:${agent.scope}`).join(', ')})` : '' + }`, + ]; + if (!status.context.ready && status.context.watchCommand && status.context.status === 'running') { + lines.push(`Resume: ${status.context.watchCommand}`); + } + if (!status.context.ready && status.context.status === 'failed' && status.context.detail) { + lines.push( + `Retry: ${status.context.retryCommand ?? `klo setup context build --project-dir ${status.project.path}`}`, + ); + } + + return `${lines.join('\n')}\n`; +} + +function setupStatusReady(status: KloSetupStatus): boolean { + if (!status.project.ready) { + return false; + } + if (!setupHasContextTargets(status)) { + return true; + } + return ( + llmReady(status.llm) && + embeddingsReady(status.embeddings) && + status.databases.every((database) => database.ready) && + status.sources.every((source) => source.ready) + ); +} + +function setupHasContextTargets(status: KloSetupStatus): boolean { + return status.databases.length > 0 || status.sources.length > 0; +} + +function setupContextReady(status: KloSetupStatus): boolean { + return status.context.ready; +} + +function writeContextNotReadyForAgents(projectDir: string, io: KloCliIo): void { + io.stderr.write('KLO context is not ready for agents.\n\n'); + io.stderr.write(`Build context first:\n klo setup context build --project-dir ${resolve(projectDir)}\n\n`); + io.stderr.write(`Then install agent integration:\n klo setup --agents --project-dir ${resolve(projectDir)}\n`); +} + +export async function runKloSetup(args: KloSetupArgs, io: KloCliIo, deps: KloSetupDeps = {}): Promise { + try { + return await runKloSetupInner(args, io, deps); + } catch (error) { + if (isKloSetupExitError(error)) { + return 0; + } + throw error; + } +} + +async function runKloSetupInner(args: KloSetupArgs, io: KloCliIo, deps: KloSetupDeps = {}): Promise { + if (args.command === 'remove-agents') { + return await (deps.removeAgents ?? removeKloAgentInstall)(args.projectDir, io); + } + + if ( + args.command === 'context-build' || + args.command === 'context-watch' || + args.command === 'context-status' || + args.command === 'context-stop' + ) { + const commandArgs: KloSetupContextCommandArgs = + args.command === 'context-build' + ? { command: 'build', projectDir: args.projectDir, inputMode: args.inputMode } + : args.command === 'context-watch' + ? { + command: 'watch', + projectDir: args.projectDir, + ...(args.runId ? { runId: args.runId } : {}), + inputMode: args.inputMode, + } + : args.command === 'context-status' + ? { + command: 'status', + projectDir: args.projectDir, + ...(args.runId ? { runId: args.runId } : {}), + json: args.json, + } + : { command: 'stop', projectDir: args.projectDir, ...(args.runId ? { runId: args.runId } : {}) }; + return await runKloSetupContextCommand(commandArgs, io, deps.contextDeps); + } + + if (args.command === 'status') { + const status = await readKloSetupStatus(args.projectDir); + io.stdout.write(args.json ? `${JSON.stringify(status, null, 2)}\n` : formatKloSetupStatus(status)); + return 0; + } + + io.stdout.write('KLO setup\n'); + let entryAction: KloSetupEntryAction | undefined; + let projectResult: Awaited>; + const canShowEntryMenu = + args.showEntryMenu === true && + args.inputMode !== 'disabled' && + !args.agents && + (io.stdout.isTTY === true || deps.entryMenuDeps?.prompts !== undefined); + + setupLoop: while (true) { + entryAction = undefined; + if (canShowEntryMenu) { + const status = await readKloSetupStatus(args.projectDir); + entryAction = (await runKloSetupEntryMenu(status, deps.entryMenuDeps)).action; + if (entryAction === 'exit') { + (deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.'); + return 0; + } + if (entryAction === 'status') { + io.stdout.write(formatKloSetupStatus(status)); + return 0; + } + if (entryAction === 'demo') { + return await runKloSetupDemoFromEntryMenu(args, io, deps); + } + } + + const projectMode = entryAction === 'new-project' ? 'prompt-new' : args.mode; + projectResult = await runKloSetupProjectStep( + { + projectDir: args.projectDir, + mode: projectMode, + inputMode: args.inputMode, + yes: args.yes, + allowBack: canShowEntryMenu, + }, + io, + deps.project, + ); + + if (projectResult.status === 'back') { + continue; + } + + if (projectResult.status !== 'ready') { + return projectResult.status === 'cancelled' ? 0 : 1; + } + + const agentsRequested = args.agents || entryAction === 'agents'; + const currentStatus = await readKloSetupStatus(projectResult.projectDir); + let readyAction: string | undefined; + if (args.inputMode !== 'disabled' && !agentsRequested && isKloSetupReady(currentStatus)) { + readyAction = (await runKloSetupReadyChangeMenu(currentStatus, deps.readyMenuDeps)).action; + if (readyAction === 'exit') return 0; + } + + const runOnly = readyAction; + const shouldRunModels = !runOnly || runOnly === 'models'; + const shouldRunEmbeddings = !runOnly || runOnly === 'embeddings'; + const shouldRunDatabases = !runOnly || runOnly === 'databases'; + const shouldRunSources = !runOnly || runOnly === 'sources'; + const shouldRunContext = agentsRequested || !runOnly || runOnly === 'context'; + const shouldRunAgents = agentsRequested || !runOnly || runOnly === 'agents'; + const showPromptInstructions = projectResult.confirmedCreation !== true; + + const setupSteps: KloSetupFlowStep[] = agentsRequested + ? ['context'] + : ['models', 'embeddings', 'databases', 'sources', 'context']; + if (shouldRunAgents && args.skipAgents !== true) { + setupSteps.push('agents'); + } + + const forcePromptSteps = new Set(); + const isNavigableSetupStep = (step: KloSetupFlowStep): boolean => { + if (step === 'models') return !args.skipLlm && shouldRunModels; + if (step === 'embeddings') return !args.skipEmbeddings && shouldRunEmbeddings; + if (step === 'databases') return !args.skipDatabases && shouldRunDatabases; + if (step === 'sources') return args.skipSources !== true && shouldRunSources; + if (step === 'context') return shouldRunContext; + return shouldRunAgents && args.skipAgents !== true; + }; + const previousNavigableStepIndex = (currentIndex: number): number => { + for (let index = currentIndex - 1; index >= 0; index -= 1) { + const previousStep = setupSteps[index]; + if (previousStep && isNavigableSetupStep(previousStep)) { + return index; + } + } + return -1; + }; + + for (let stepIndex = 0; stepIndex < setupSteps.length; ) { + const step = setupSteps[stepIndex]; + if (!step) break; + + let stepResult: { status: KloSetupFlowStatus }; + if (step === 'models') { + const modelRunner = + deps.model ?? ((modelArgs, modelIo) => runKloSetupAnthropicModelStep(modelArgs, modelIo, deps.modelDeps)); + stepResult = await modelRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + ...(args.anthropicApiKeyEnv ? { anthropicApiKeyEnv: args.anthropicApiKeyEnv } : {}), + ...(args.anthropicApiKeyFile ? { anthropicApiKeyFile: args.anthropicApiKeyFile } : {}), + ...(args.anthropicModel ? { anthropicModel: args.anthropicModel } : {}), + forcePrompt: forcePromptSteps.has('models') || runOnly === 'models', + showPromptInstructions, + skipLlm: args.skipLlm || !shouldRunModels, + }, + io, + ); + } else if (step === 'embeddings') { + const embeddingsRunner = + deps.embeddings ?? + ((embeddingArgs, embeddingIo) => runKloSetupEmbeddingsStep(embeddingArgs, embeddingIo, deps.embeddingsDeps)); + stepResult = await embeddingsRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + ...(args.embeddingBackend ? { embeddingBackend: args.embeddingBackend } : {}), + ...(args.embeddingApiKeyEnv ? { embeddingApiKeyEnv: args.embeddingApiKeyEnv } : {}), + ...(args.embeddingApiKeyFile ? { embeddingApiKeyFile: args.embeddingApiKeyFile } : {}), + forcePrompt: forcePromptSteps.has('embeddings') || runOnly === 'embeddings', + showPromptInstructions, + skipEmbeddings: args.skipEmbeddings || !shouldRunEmbeddings, + }, + io, + ); + } else if (step === 'databases') { + const databasesRunner = + deps.databases ?? + ((databaseArgs, databaseIo) => runKloSetupDatabasesStep(databaseArgs, databaseIo, deps.databasesDeps)); + stepResult = await databasesRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + ...(args.databaseDrivers ? { databaseDrivers: args.databaseDrivers } : {}), + ...(args.databaseConnectionIds ? { databaseConnectionIds: args.databaseConnectionIds } : {}), + ...(args.databaseConnectionId ? { databaseConnectionId: args.databaseConnectionId } : {}), + ...(args.databaseUrl ? { databaseUrl: args.databaseUrl } : {}), + databaseSchemas: args.databaseSchemas, + ...(args.enableHistoricSql !== undefined ? { enableHistoricSql: args.enableHistoricSql } : {}), + ...(args.disableHistoricSql !== undefined ? { disableHistoricSql: args.disableHistoricSql } : {}), + ...(args.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: args.historicSqlWindowDays } : {}), + ...(args.historicSqlMinCalls !== undefined ? { historicSqlMinCalls: args.historicSqlMinCalls } : {}), + ...(args.historicSqlServiceAccountPatterns + ? { historicSqlServiceAccountPatterns: args.historicSqlServiceAccountPatterns } + : {}), + ...(args.historicSqlRedactionPatterns + ? { historicSqlRedactionPatterns: args.historicSqlRedactionPatterns } + : {}), + skipDatabases: args.skipDatabases || !shouldRunDatabases, + }, + io, + ); + } else if (step === 'sources') { + const sourcesRunner = + deps.sources ?? ((sourceArgs, sourceIo) => runKloSetupSourcesStep(sourceArgs, sourceIo, deps.sourcesDeps)); + stepResult = await sourcesRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + ...(args.source ? { source: args.source } : {}), + ...(args.sourceConnectionId ? { sourceConnectionId: args.sourceConnectionId } : {}), + ...(args.sourcePath ? { sourcePath: args.sourcePath } : {}), + ...(args.sourceGitUrl ? { sourceGitUrl: args.sourceGitUrl } : {}), + ...(args.sourceBranch ? { sourceBranch: args.sourceBranch } : {}), + ...(args.sourceSubpath ? { sourceSubpath: args.sourceSubpath } : {}), + ...(args.sourceAuthTokenRef ? { sourceAuthTokenRef: args.sourceAuthTokenRef } : {}), + ...(args.sourceUrl ? { sourceUrl: args.sourceUrl } : {}), + ...(args.sourceApiKeyRef ? { sourceApiKeyRef: args.sourceApiKeyRef } : {}), + ...(args.sourceClientId ? { sourceClientId: args.sourceClientId } : {}), + ...(args.sourceClientSecretRef ? { sourceClientSecretRef: args.sourceClientSecretRef } : {}), + ...(args.sourceWarehouseConnectionId ? { sourceWarehouseConnectionId: args.sourceWarehouseConnectionId } : {}), + ...(args.sourceProjectName ? { sourceProjectName: args.sourceProjectName } : {}), + ...(args.sourceProfilesPath ? { sourceProfilesPath: args.sourceProfilesPath } : {}), + ...(args.sourceTarget ? { sourceTarget: args.sourceTarget } : {}), + ...(args.metabaseDatabaseId !== undefined ? { metabaseDatabaseId: args.metabaseDatabaseId } : {}), + ...(args.notionCrawlMode ? { notionCrawlMode: args.notionCrawlMode } : {}), + ...(args.notionRootPageIds ? { notionRootPageIds: args.notionRootPageIds } : {}), + runInitialSourceIngest: args.runInitialSourceIngest ?? false, + skipSources: args.skipSources === true || !shouldRunSources, + }, + io, + ); + } else if (step === 'context') { + const contextRunner = + deps.context ?? + ((contextArgs, contextIo) => runKloSetupContextStep(contextArgs, contextIo, deps.contextDeps)); + stepResult = await contextRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + forcePrompt: forcePromptSteps.has('context') || runOnly === 'context', + allowEmpty: true, + }, + io, + ); + } else { + const agentsRunner = + deps.agents ?? ((agentArgs, agentIo) => runKloSetupAgentsStep(agentArgs, agentIo, deps.agentsDeps)); + stepResult = await agentsRunner( + { + projectDir: projectResult.projectDir, + inputMode: args.inputMode, + yes: args.yes, + agents: true, + ...(args.target ? { target: args.target } : {}), + scope: args.agentScope ?? 'project', + mode: args.agentInstallMode ?? 'cli', + skipAgents: false, + }, + io, + ); + } + + if (stepResult.status === 'failed' || stepResult.status === 'missing-input') { + return 1; + } + if (stepResult.status === 'back') { + const previousIndex = previousNavigableStepIndex(stepIndex); + if (previousIndex < 0) { + if (canShowEntryMenu) { + continue setupLoop; + } + return 0; + } + const previousStep = setupSteps[previousIndex]; + if (previousStep) { + forcePromptSteps.add(previousStep); + } + stepIndex = previousIndex; + continue; + } + if (step === 'context' && stepResult.status !== 'ready') { + if (shouldRunAgents && args.skipAgents !== true) { + if (agentsRequested) { + writeContextNotReadyForAgents(projectResult.projectDir, io); + return args.inputMode === 'disabled' ? 1 : 0; + } + return 0; + } + } + + forcePromptSteps.delete(step); + stepIndex += 1; + } + + break; + } + + const status = await readKloSetupStatus(projectResult.projectDir); + io.stdout.write(formatKloSetupStatus(status)); + io.stdout.write('\nWhat you can do next:\n'); + io.stdout.write( + `${formatSetupNextStepLines({ + setupReady: setupStatusReady(status), + hasContextTargets: setupHasContextTargets(status), + contextReady: setupContextReady(status), + agentIntegrationReady: status.agents.some((agent) => agent.ready), + }).join('\n')}\n`, + ); + return 0; +} diff --git a/packages/cli/src/sl.test.ts b/packages/cli/src/sl.test.ts new file mode 100644 index 00000000..8ee3e517 --- /dev/null +++ b/packages/cli/src/sl.test.ts @@ -0,0 +1,372 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { initKloProject } from '@klo/context/project'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runKloSl } from './sl.js'; + +const ORDERS_YAML = [ + 'name: orders', + 'table: public.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + '', +].join('\n'); + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('runKloSl', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-cli-sl-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('writes, validates, reads, and lists semantic-layer sources', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + const writeIo = makeIo(); + await expect( + runKloSl( + { + command: 'write', + projectDir, + connectionId: 'warehouse', + sourceName: 'orders', + yaml: ORDERS_YAML, + }, + writeIo.io, + ), + ).resolves.toBe(0); + expect(writeIo.stdout()).toContain('Wrote semantic-layer/warehouse/orders.yaml'); + + const validateIo = makeIo(); + await expect( + runKloSl({ command: 'validate', projectDir, connectionId: 'warehouse', sourceName: 'orders' }, validateIo.io), + ).resolves.toBe(0); + expect(validateIo.stdout()).toContain('Valid semantic-layer source: warehouse/orders'); + + const readIo = makeIo(); + await expect(runKloSl({ command: 'read', projectDir, connectionId: 'warehouse', sourceName: 'orders' }, readIo.io)) + .resolves.toBe(0); + expect(readIo.stdout()).toContain('name: orders'); + + const listIo = makeIo(); + await expect(runKloSl({ command: 'list', projectDir, connectionId: 'warehouse' }, listIo.io)).resolves.toBe(0); + expect(listIo.stdout()).toContain('warehouse\torders\tcolumns=1\tmeasures=0\tjoins=0'); + }); + + it('runs sl query and prints SQL output', async () => { + const projectDir = join(tempDir, 'project'); + const project = await initKloProject({ projectDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { driver: 'postgres', readonly: true }; + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + `name: orders +table: public.orders +grain: [id] +columns: + - name: id + type: number +measures: + - name: order_count + expr: count(*) +joins: [] +`, + 'klo', + 'klo@example.com', + 'Add orders source', + ); + + const stdout = { write: vi.fn() }; + const stderr = { write: vi.fn() }; + const loadProject = vi.fn(async () => project); + const createSemanticLayerCompute = vi.fn(() => ({ + query: vi.fn(async () => ({ + sql: 'select count(*) as order_count from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: {}, + })), + validateSources: vi.fn(), + generateSources: vi.fn(), + })); + + await expect( + runKloSl( + { + command: 'query', + projectDir: '/tmp/project', + connectionId: 'warehouse', + query: { measures: ['orders.order_count'], dimensions: [] }, + format: 'sql', + execute: false, + }, + { stdout, stderr }, + { loadProject, createSemanticLayerCompute }, + ), + ).resolves.toBe(0); + + expect(stdout.write).toHaveBeenCalledWith('select count(*) as order_count from public.orders\n'); + expect(stderr.write).not.toHaveBeenCalled(); + }); + + it('executes sl query through the injected query executor', async () => { + const projectDir = join(tempDir, 'project'); + const project = await initKloProject({ projectDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { driver: 'postgres', url: 'postgres://example/db', readonly: true }; + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + `name: orders +table: public.orders +grain: [id] +columns: + - name: id + type: number +measures: + - name: order_count + expr: count(*) +joins: [] +`, + 'klo', + 'klo@example.com', + 'Add orders source', + ); + + const stdout = { write: vi.fn() }; + const stderr = { write: vi.fn() }; + const loadProject = vi.fn(async () => project); + const queryExecutor = { + execute: vi.fn(async () => ({ + headers: ['orders.order_count'], + rows: [[4]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })), + }; + const createSemanticLayerCompute = vi.fn(() => ({ + query: vi.fn(async () => ({ + sql: 'select count(*) as order_count from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: {}, + })), + validateSources: vi.fn(), + generateSources: vi.fn(), + })); + + await expect( + runKloSl( + { + command: 'query', + projectDir, + connectionId: 'warehouse', + query: { measures: ['orders.order_count'], dimensions: [] }, + format: 'json', + execute: true, + maxRows: 20, + }, + { stdout, stderr }, + { + loadProject, + createSemanticLayerCompute, + createQueryExecutor: () => queryExecutor, + }, + ), + ).resolves.toBe(0); + + expect(queryExecutor.execute).toHaveBeenCalledWith( + expect.objectContaining({ + connectionId: 'warehouse', + maxRows: 20, + }), + ); + expect(JSON.parse(String(stdout.write.mock.calls[0][0]))).toMatchObject({ + rows: [[4]], + totalRows: 1, + plan: { + execution: { + mode: 'executed', + }, + }, + }); + expect(stderr.write).not.toHaveBeenCalled(); + }); + + it('executes sl query against a local SQLite connection through the default executor', async () => { + const projectDir = join(tempDir, 'project'); + const project = await initKloProject({ projectDir, projectName: 'warehouse' }); + const dbPath = join(projectDir, 'warehouse.db'); + const db = new Database(dbPath); + db.exec(` + CREATE TABLE orders ( + id INTEGER PRIMARY KEY, + status TEXT NOT NULL + ); + INSERT INTO orders (status) VALUES ('paid'), ('paid'), ('open'); + `); + db.close(); + + project.config.connections.warehouse = { driver: 'sqlite', path: 'warehouse.db', readonly: true }; + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + `name: orders +table: orders +grain: [id] +columns: + - name: id + type: number + - name: status + type: string +measures: + - name: order_count + expr: count(*) +joins: [] +`, + 'klo', + 'klo@example.com', + 'Add orders source', + ); + + const stdout = { write: vi.fn() }; + const stderr = { write: vi.fn() }; + const createSemanticLayerCompute = vi.fn(() => ({ + query: vi.fn(async () => ({ + sql: 'select count(*) as order_count from orders', + dialect: 'sqlite', + columns: [{ name: 'orders.order_count' }], + plan: {}, + })), + validateSources: vi.fn(), + generateSources: vi.fn(), + })); + + const exitCode = await runKloSl( + { + command: 'query', + projectDir, + connectionId: 'warehouse', + query: { measures: ['orders.order_count'], dimensions: [] }, + format: 'json', + execute: true, + maxRows: 20, + }, + { stdout, stderr }, + { createSemanticLayerCompute }, + ); + + expect(stderr.write).not.toHaveBeenCalled(); + expect(exitCode).toBe(0); + expect(JSON.parse(String(stdout.write.mock.calls[0][0]))).toMatchObject({ + connectionId: 'warehouse', + dialect: 'sqlite', + rows: [[3]], + totalRows: 1, + plan: { + execution: { + mode: 'executed', + driver: 'sqlite', + maxRows: 20, + rowCount: 1, + }, + }, + }); + }); + + it('emits sl list as a JSON envelope when output=json', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + const writeIo = makeIo(); + await runKloSl( + { command: 'write', projectDir, connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML }, + writeIo.io, + ); + + const listIo = makeIo(); + const code = await runKloSl( + { command: 'list', projectDir, connectionId: 'warehouse', output: 'json' }, + listIo.io, + ); + expect(code).toBe(0); + + const parsed = JSON.parse(listIo.stdout()); + expect(parsed.kind).toBe('list'); + expect(parsed.meta).toEqual({ command: 'sl list' }); + expect(parsed.data.items).toHaveLength(1); + expect(parsed.data.items[0]).toMatchObject({ + connectionId: 'warehouse', + name: 'orders', + columnCount: 1, + measureCount: 0, + joinCount: 0, + }); + }); + + it('emits sl list with grouping and Clack-style framing when output=pretty', async () => { + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + const writeIo = makeIo(); + await runKloSl( + { command: 'write', projectDir, connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML }, + writeIo.io, + ); + + const listIo = makeIo(); + const code = await runKloSl( + { command: 'list', projectDir, connectionId: 'warehouse', output: 'pretty' }, + listIo.io, + ); + expect(code).toBe(0); + + const stripAnsi = (s: string) => s.replace(/\[[0-9;]*m/g, ''); + const out = stripAnsi(listIo.stdout()); + expect(out).toContain('sl list'); + expect(out).toContain('warehouse'); + expect(out).toContain('orders'); + expect(out).toContain('1 source'); + }); +}); diff --git a/packages/cli/src/sl.ts b/packages/cli/src/sl.ts new file mode 100644 index 00000000..c7a0d2bf --- /dev/null +++ b/packages/cli/src/sl.ts @@ -0,0 +1,129 @@ +import { createDefaultLocalQueryExecutor, type KloSqlQueryExecutorPort } from '@klo/context/connections'; +import { createPythonSemanticLayerComputePort, type KloSemanticLayerComputePort } from '@klo/context/daemon'; +import { loadKloProject, type KloLocalProject } from '@klo/context/project'; +import { + compileLocalSlQuery, + listLocalSlSources, + readLocalSlSource, + validateLocalSlSource, + writeLocalSlSource, + type SemanticLayerQueryInput, +} from '@klo/context/sl'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:sl'); + +type SlQueryFormat = 'json' | 'sql'; + +export type KloSlArgs = + | { command: 'list'; projectDir: string; connectionId?: string; output?: string; json?: boolean } + | { command: 'read'; projectDir: string; connectionId: string; sourceName: string } + | { command: 'validate'; projectDir: string; connectionId: string; sourceName: string } + | { command: 'write'; projectDir: string; connectionId: string; sourceName: string; yaml: string } + | { + command: 'query'; + projectDir: string; + connectionId?: string; + query: SemanticLayerQueryInput; + format: SlQueryFormat; + execute: boolean; + maxRows?: number; + }; + +interface KloSlIo { + stdout: { write(chunk: string): void }; + stderr: { write(chunk: string): void }; +} + +interface KloSlDeps { + loadProject?: typeof loadKloProject; + createSemanticLayerCompute?: () => KloSemanticLayerComputePort; + createQueryExecutor?: () => KloSqlQueryExecutorPort; +} + +export async function runKloSl(args: KloSlArgs, io: KloSlIo = process, deps: KloSlDeps = {}): Promise { + try { + const project = await (deps.loadProject ?? loadKloProject)({ projectDir: args.projectDir }); + if (args.command === 'list') { + const sources = await listLocalSlSources(project, { connectionId: args.connectionId }); + const { resolveOutputMode } = await import('./io/mode.js'); + const { printList } = await import('./io/print-list.js'); + const mode = resolveOutputMode({ explicit: args.output, json: args.json, io }); + printList({ + rows: sources, + columns: [ + { key: 'connectionId', label: 'CONNECTION', plain: '' }, + { key: 'name', label: 'NAME', plain: '' }, + { key: 'columnCount', label: 'COLS', plain: 'columns=', dim: true }, + { key: 'measureCount', label: 'MEASURES', plain: 'measures=', dim: true }, + { key: 'joinCount', label: 'JOINS', plain: 'joins=', dim: true }, + { key: 'description', label: 'DESCRIPTION', plain: false, optional: true, dim: true }, + ], + groupBy: 'connectionId', + emptyMessage: `No semantic-layer sources found in ${project.projectDir}`, + command: 'sl list', + mode, + io, + }); + return 0; + } + if (args.command === 'read') { + const source = await readLocalSlSource(project, { + connectionId: args.connectionId, + sourceName: args.sourceName, + }); + if (!source) { + throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`); + } + io.stdout.write(source.yaml); + return 0; + } + if (args.command === 'validate') { + const source = await readLocalSlSource(project, { + connectionId: args.connectionId, + sourceName: args.sourceName, + }); + if (!source) { + throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`); + } + const result = await validateLocalSlSource(source.yaml); + if (!result.valid) { + for (const error of result.errors) { + io.stderr.write(`${error}\n`); + } + return 1; + } + io.stdout.write(`Valid semantic-layer source: ${args.connectionId}/${args.sourceName}\n`); + return 0; + } + if (args.command === 'query') { + const compute = (deps.createSemanticLayerCompute ?? createPythonSemanticLayerComputePort)(); + const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined; + const result = await compileLocalSlQuery(project as KloLocalProject, { + connectionId: args.connectionId, + query: args.query, + compute, + execute: args.execute, + maxRows: args.maxRows, + queryExecutor, + }); + if (args.format === 'sql') { + io.stdout.write(`${result.sql}\n`); + return 0; + } + io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); + return 0; + } + + const write = await writeLocalSlSource(project, { + connectionId: args.connectionId, + sourceName: args.sourceName, + yaml: args.yaml, + }); + io.stdout.write(`Wrote ${write.path}\n`); + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts new file mode 100644 index 00000000..3beef5ed --- /dev/null +++ b/packages/cli/src/standalone-smoke.test.ts @@ -0,0 +1,926 @@ +import { execFile } from 'node:child_process'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { promisify } from 'node:util'; +import { parseKloProjectConfig } from '@klo/context/project'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +const execFileAsync = promisify(execFile); +const CLI_BIN = resolve(process.cwd(), 'dist/bin.js'); + +interface CliResult { + code: number; + stdout: string; + stderr: string; +} + +interface ExecFailure extends Error { + code?: number; + stdout?: string; + stderr?: string; +} + +function isExecFailure(error: unknown): error is ExecFailure { + return error instanceof Error && ('stdout' in error || 'stderr' in error || 'code' in error); +} + +async function runBuiltCli(args: string[], options: { env?: NodeJS.ProcessEnv } = {}): Promise { + try { + const result = await execFileAsync(process.execPath, [CLI_BIN, ...args], { + encoding: 'utf8', + timeout: 20_000, + ...(options.env ? { env: options.env } : {}), + }); + return { + code: 0, + stdout: result.stdout, + stderr: result.stderr, + }; + } catch (error) { + if (!isExecFailure(error)) { + throw error; + } + return { + code: typeof error.code === 'number' ? error.code : 1, + stdout: error.stdout ?? '', + stderr: error.stderr ?? error.message, + }; + } +} + +function getRunId(stdout: string): string { + const match = stdout.match(/^Run: (.+)$/m); + if (!match) { + throw new Error(`Could not find run id in output:\n${stdout}`); + } + return match[1]; +} + +function structuredContent(result: unknown): T { + const content = (result as { structuredContent?: unknown }).structuredContent; + expect(content).toBeDefined(); + return content as T; +} + +async function writeWarehouseConfig(projectDir: string): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + '', + ].join('\n'), + 'utf-8', + ); +} + +async function writeSourceFixture(sourceDir: string): Promise { + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); +} + +function createSqliteWarehouse(dbPath: string): void { + const db = new Database(dbPath); + try { + db.exec(` + PRAGMA foreign_keys = ON; + CREATE TABLE customers ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL + ); + CREATE TABLE orders ( + id INTEGER PRIMARY KEY, + customer_id INTEGER NOT NULL, + total NUMERIC, + created_at TEXT, + FOREIGN KEY(customer_id) REFERENCES customers(id) + ); + INSERT INTO customers (id, name) VALUES (1, 'Ada'), (2, 'Grace'); + INSERT INTO orders (id, customer_id, total, created_at) + VALUES (10, 1, 42.5, '2026-04-28'), (11, 2, 9.5, '2026-04-29'); + `); + } finally { + db.close(); + } +} + +async function writeSqliteScanConfig(projectDir: string, dbPath: string, enrich = false): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ` path: ${JSON.stringify(dbPath)}`, + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + ...(enrich + ? [ + 'scan:', + ' enrichment:', + ' mode: deterministic', + ' embeddings:', + ' backend: deterministic', + ' dimensions: 6', + ] + : []), + '', + ].join('\n'), + 'utf-8', + ); +} + +function parseJsonOutput(stdout: string): T { + return JSON.parse(stdout) as T; +} + +async function runSetupNewProject(projectDir: string): Promise { + return await runBuiltCli([ + 'setup', + '--project-dir', + projectDir, + '--new', + '--no-input', + '--yes', + '--skip-llm', + '--skip-embeddings', + '--skip-databases', + '--skip-sources', + '--skip-agents', + ]); +} + +describe('standalone built klo CLI smoke', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-standalone-smoke-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('reports missing local ingest LLM config through the built binary', async () => { + const projectDir = join(tempDir, 'project'); + const sourceDir = join(tempDir, 'source'); + + const init = await runSetupNewProject(projectDir); + expect(init).toMatchObject({ code: 0, stderr: '' }); + expect(init.stdout).toContain(`Project: ${projectDir}`); + + await writeWarehouseConfig(projectDir); + await writeSourceFixture(sourceDir); + + const run = await runBuiltCli([ + 'dev', + 'ingest', + 'run', + '--project-dir', + projectDir, + '--connection-id', + 'warehouse', + '--adapter', + 'fake', + '--source-dir', + sourceDir, + ]); + expect(run).toMatchObject({ code: 1, stdout: '' }); + expect(run.stderr).toContain( + 'klo dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', + ); + }); + + it('runs the default pre-seeded demo without credentials', async () => { + const result = await runBuiltCli( + ['setup', 'demo', '--project-dir', join(tempDir, 'demo-project'), '--plain', '--no-input'], + { + env: { ...process.env, ANTHROPIC_API_KEY: '' }, + }, + ); + + expect(result).toMatchObject({ code: 0, stderr: '' }); + expect(result.stdout).toContain('Mode: seeded'); + expect(result.stdout).toContain('Source: packaged demo project'); + expect(result.stdout).toContain('LLM calls: none'); + expect(result.stdout).toContain('Warehouse:'); + expect(result.stdout).toContain('dbt:'); + expect(result.stdout).toContain('BI:'); + expect(result.stdout).toContain('Notion:'); + expect(result.stdout).toContain('Semantic-layer sources:'); + expect(result.stdout).toContain('Knowledge pages:'); + expect(result.stdout).toContain('klo serve --mcp stdio'); + expect(result.stdout).not.toContain(['--mode', 'deterministic'].join(' ')); + }); + + it('runs hybrid agent search against the seeded demo through the built binary', async () => { + const projectDir = join(tempDir, 'seeded-hybrid-search-project'); + + const seeded = await runBuiltCli(['setup', 'demo', '--project-dir', projectDir, '--plain', '--no-input'], { + env: { ...process.env, ANTHROPIC_API_KEY: '' }, + }); + expect(seeded).toMatchObject({ code: 0, stderr: '' }); + expect(seeded.stdout).toContain('Mode: seeded'); + + const wikiSearch = await runBuiltCli([ + 'agent', + 'wiki', + 'search', + 'ARR contract', + '--json', + '--limit', + '5', + '--project-dir', + projectDir, + ]); + expect(wikiSearch).toMatchObject({ code: 0, stderr: '' }); + const wikiJson = parseJsonOutput<{ + results: Array<{ key: string; score: number; matchReasons?: string[] }>; + totalFound: number; + }>(wikiSearch.stdout); + expect(wikiJson.totalFound).toBeGreaterThan(0); + expect(wikiJson.results.some((result) => result.matchReasons?.length)).toBe(true); + + const slSearch = await runBuiltCli([ + 'agent', + 'sl', + 'list', + '--json', + '--query', + 'ARR', + '--project-dir', + projectDir, + ]); + expect(slSearch).toMatchObject({ code: 0, stderr: '' }); + const slJson = parseJsonOutput<{ + sources: Array<{ connectionId: string; name: string; score?: number; matchReasons?: string[] }>; + totalSources: number; + }>(slSearch.stdout); + expect(slJson.totalSources).toBeGreaterThan(0); + expect(slJson.sources.some((source) => source.matchReasons?.length)).toBe(true); + }); + + it('prints guided JSON for agent semantic-layer search outside a project through the built binary', async () => { + const projectDir = join(tempDir, 'missing-search-project'); + await mkdir(projectDir, { recursive: true }); + + const result = await runBuiltCli([ + 'agent', + 'sl', + 'list', + '--json', + '--query', + 'revenue', + '--project-dir', + projectDir, + ]); + + expect(result.code).toBe(1); + expect(result.stdout).toBe(''); + const errorJson = parseJsonOutput<{ + ok: false; + error: { code: string; message: string; nextSteps: string[] }; + }>(result.stderr); + expect(errorJson).toEqual({ + ok: false, + error: { + code: 'agent_sl_search_missing_project', + message: `Semantic-layer search needs an initialized KLO project at ${projectDir}.`, + nextSteps: [ + 'klo demo', + `klo setup --project-dir ${projectDir}`, + 'klo ingest ', + `klo agent sl list --json --query "revenue" --project-dir ${projectDir}`, + ], + }, + }); + }); + + it('runs the pre-seeded demo and inspect without credentials', async () => { + const projectDir = join(tempDir, 'seeded-demo-project'); + + const seeded = await runBuiltCli(['setup', 'demo', '--mode', 'seeded', '--project-dir', projectDir, '--no-input']); + expect(seeded.code).toBe(0); + expect(seeded.stdout).toContain('Mode: seeded'); + expect(seeded.stdout).toContain('LLM calls: none'); + expect(seeded.stdout).toContain('Semantic-layer sources:'); + expect(seeded.stdout).toContain('Knowledge pages:'); + + const inspect = await runBuiltCli(['setup', 'demo', 'inspect', '--project-dir', projectDir, '--no-input']); + expect(inspect).toMatchObject({ code: 0, stderr: '' }); + expect(inspect.stdout).toContain('Mode: seeded'); + expect(inspect.stdout).toContain('Status: ready'); + expect(inspect.stdout).toContain('Warehouse: 8 tables, 11,234 rows'); + expect(inspect.stdout).toContain('Rows: accounts 210, arr_movements 720'); + expect(inspect.stdout).toContain('dbt: 3 models, 8 source tables'); + expect(inspect.stdout).toContain('BI: 5 explores, 2 dashboards'); + expect(inspect.stdout).toContain('Notion: 8 pages'); + expect(inspect.stdout).toContain('Semantic-layer sources:'); + expect(inspect.stdout).toContain('Knowledge pages:'); + expect(inspect.stdout).toContain('Evidence links:'); + expect(inspect.stdout).toContain('Report: reports/seeded-demo-report.json'); + expect(inspect.stdout).toContain('Replay: replays/replay.memory-flow.v1.json'); + expect(inspect.stdout).toContain('Latest replay: seeded (packaged, prebuilt)'); + expect(inspect.stdout).toContain('klo agent tools --json'); + expect(inspect.stdout).toContain('klo agent context --json'); + expect(inspect.stdout).not.toContain('klo ask "your question here"'); + expect(inspect.stdout).toContain('klo serve --mcp stdio'); + }); + + it('serves seeded demo wiki and semantic-layer context over stdio MCP', async () => { + const projectDir = join(tempDir, 'seeded-mcp-project'); + + const seeded = await runBuiltCli( + ['setup', 'demo', '--mode', 'seeded', '--project-dir', projectDir, '--plain', '--no-input'], + { + env: { ...process.env, ANTHROPIC_API_KEY: '' }, + }, + ); + expect(seeded).toMatchObject({ code: 0, stderr: '' }); + expect(seeded.stdout).toContain('Mode: seeded'); + + const client = new Client({ name: 'klo-seeded-demo-smoke-client', version: '0.0.0' }); + const transport = new StdioClientTransport({ + command: process.execPath, + args: [CLI_BIN, 'serve', '--mcp', 'stdio', '--project-dir', projectDir, '--user-id', 'smoke-user'], + stderr: 'pipe', + }); + + try { + await client.connect(transport); + const toolNames = (await client.listTools()).tools.map((tool) => tool.name).sort(); + expect(toolNames).toEqual( + expect.arrayContaining(['knowledge_read', 'knowledge_search', 'sl_read_source', 'sl_validate']), + ); + + const knowledgeSearch = structuredContent<{ + results: Array<{ key: string; summary: string; score: number }>; + totalFound: number; + }>(await client.callTool({ name: 'knowledge_search', arguments: { query: 'ARR contract', limit: 5 } })); + expect(knowledgeSearch.totalFound).toBeGreaterThan(0); + expect(knowledgeSearch.results.map((result) => result.key)).toContain('arr-contract-first'); + + const knowledgeRead = structuredContent<{ + key: string; + summary: string; + content: string; + tags: string[]; + slRefs: string[]; + }>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'arr-contract-first' } })); + expect(knowledgeRead.key).toBe('arr-contract-first'); + expect(knowledgeRead.summary).toContain('ARR'); + expect(knowledgeRead.content).toContain('contract'); + expect(knowledgeRead.slRefs).toContain('orbit_demo.contracts'); + + const slRead = structuredContent<{ sourceName: string; yaml: string }>( + await client.callTool({ + name: 'sl_read_source', + arguments: { connectionId: 'orbit_demo', sourceName: 'accounts' }, + }), + ); + expect(slRead.sourceName).toBe('accounts'); + expect(slRead.yaml).toContain('name: accounts'); + expect(slRead.yaml).toContain('measures:'); + + const slValidate = structuredContent<{ success: boolean; errors: string[]; warnings: string[] }>( + await client.callTool({ + name: 'sl_validate', + arguments: { connectionId: 'orbit_demo', names: ['accounts', 'contracts'] }, + }), + ); + expect(slValidate.success).toBe(true); + expect(slValidate.errors).toEqual([]); + } finally { + await client.close(); + } + }); + + it('runs doctor setup through the built binary', async () => { + const result = await runBuiltCli(['dev', 'doctor', 'setup', '--no-input']); + + expect(result.stdout).toContain('KLO setup doctor'); + expect(result.stdout).toContain('Node 22+'); + expect(result.stdout).toContain('Workspace-local CLI'); + expect(result.stderr).toBe(''); + expect([0, 1]).toContain(result.code); + }); + + it('reports missing Anthropic credentials for full demo through the built binary', async () => { + const projectDir = join(tempDir, 'full-demo-missing-key'); + + const result = await runBuiltCli(['setup', 'demo', '--mode', 'full', '--project-dir', projectDir, '--no-input'], { + env: { ...process.env, ANTHROPIC_API_KEY: '' }, + }); + + expect(result.code).toBe(1); + expect(result.stderr).toContain('klo setup demo --mode full needs ANTHROPIC_API_KEY'); + expect(result.stderr).toContain('klo setup demo --mode seeded --no-input'); + }); + + it('requires force for demo reset through the built binary', async () => { + const projectDir = join(tempDir, 'reset-demo-project'); + + const init = await runBuiltCli(['setup', 'demo', 'init', '--project-dir', projectDir, '--no-input']); + expect(init).toMatchObject({ code: 0, stderr: '' }); + + const withoutForce = await runBuiltCli(['setup', 'demo', 'reset', '--project-dir', projectDir, '--no-input']); + expect(withoutForce.code).toBe(1); + expect(withoutForce.stderr).toContain( + `klo setup demo reset is destructive; pass --force to recreate ${projectDir}`, + ); + + const withForce = await runBuiltCli([ + 'setup', + 'demo', + 'reset', + '--project-dir', + projectDir, + '--force', + '--no-input', + ]); + expect(withForce).toMatchObject({ code: 0, stderr: '' }); + expect(withForce.stdout).toContain(`Demo project reset: ${projectDir}`); + }); + + it('reports corrupted demo state with reset guidance through the built binary', async () => { + const projectDir = join(tempDir, 'corrupt-demo-project'); + + const init = await runBuiltCli(['setup', 'demo', 'init', '--project-dir', projectDir, '--no-input']); + expect(init).toMatchObject({ code: 0, stderr: '' }); + await rm(join(projectDir, 'demo.db'), { force: true }); + + const replay = await runBuiltCli(['setup', 'demo', '--mode', 'replay', '--project-dir', projectDir, '--no-input']); + expect(replay.code).toBe(1); + expect(replay.stderr).toContain(`Demo project is not ready at ${projectDir}: missing demo.db`); + expect(replay.stderr).toContain(`klo setup demo reset --project-dir ${projectDir} --force --no-input`); + }); + + it('runs demo doctor through the built binary', async () => { + const projectDir = join(tempDir, 'doctor-demo-project'); + + const init = await runBuiltCli(['setup', 'demo', 'init', '--project-dir', projectDir, '--no-input']); + expect(init).toMatchObject({ code: 0, stderr: '' }); + + const result = await runBuiltCli(['setup', 'demo', 'doctor', '--project-dir', projectDir, '--no-input']); + expect(result.stdout).toContain('KLO demo doctor'); + expect(result.stdout).toContain('Demo dataset'); + expect(result.stdout).toContain('Demo replay'); + expect(result.stdout).toContain('Demo LLM provider'); + expect(result.stderr).toBe(''); + expect([0, 1]).toContain(result.code); + }); + + it('runs demo ingest seeded mode through the built binary', async () => { + const projectDir = join(tempDir, 'seeded-ingest-alias'); + + const result = await runBuiltCli([ + 'setup', + 'demo', + 'ingest', + '--mode', + 'seeded', + '--project-dir', + projectDir, + '--no-input', + ]); + + expect(result.code).toBe(0); + expect(result.stdout).toContain('Mode: seeded'); + expect(result.stdout).toContain('LLM calls: none'); + }); + + it('runs structural and enriched scans through the built binary with manifest artifacts', async () => { + const projectDir = join(tempDir, 'scan-project'); + const init = await runSetupNewProject(projectDir); + expect(init).toMatchObject({ code: 0, stderr: '' }); + + const dbPath = join(projectDir, 'warehouse.db'); + createSqliteWarehouse(dbPath); + await writeSqliteScanConfig(projectDir, dbPath); + + const connectionTest = await runBuiltCli(['connection', 'test', 'warehouse', '--project-dir', projectDir]); + expect(connectionTest).toMatchObject({ code: 0, stderr: '' }); + expect(connectionTest.stdout).toContain('Connection test passed: warehouse'); + expect(connectionTest.stdout).toContain('Driver: sqlite'); + expect(connectionTest.stdout).toContain('Tables: 2'); + + const structural = await runBuiltCli(['dev', 'scan', 'warehouse', '--project-dir', projectDir]); + expect(structural).toMatchObject({ code: 0, stderr: '' }); + expect(structural.stdout).toContain('Status: done'); + expect(structural.stdout).toContain('Mode: structural'); + const structuralRunId = getRunId(structural.stdout); + + const structuralReportResult = await runBuiltCli([ + 'dev', + 'scan', + 'report', + '--json', + '--project-dir', + projectDir, + structuralRunId, + ]); + expect(structuralReportResult).toMatchObject({ code: 0, stderr: '' }); + const structuralReport = parseJsonOutput<{ + mode: string; + artifactPaths: { manifestShards: string[]; enrichmentArtifacts: string[] }; + manifestShardsWritten: number; + }>(structuralReportResult.stdout); + expect(structuralReport.mode).toBe('structural'); + expect(structuralReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + expect(structuralReport.artifactPaths.enrichmentArtifacts).toEqual([]); + expect(structuralReport.manifestShardsWritten).toBe(1); + + const structuralManifest = await readFile( + join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), + 'utf-8', + ); + expect(structuralManifest).toContain('customers:'); + expect(structuralManifest).toContain('orders:'); + expect(structuralManifest).toContain('source: formal'); + expect(structuralManifest).not.toContain('ai:'); + + const providerlessEnriched = await runBuiltCli([ + 'dev', + 'scan', + 'warehouse', + '--project-dir', + projectDir, + '--mode', + 'enriched', + ]); + expect(providerlessEnriched).toMatchObject({ code: 0, stderr: '' }); + expect(providerlessEnriched.stdout).toContain('Mode: enriched'); + expect(providerlessEnriched.stdout).toContain('Relationships'); + expect(providerlessEnriched.stdout).toContain('Accepted: 1'); + expect(providerlessEnriched.stdout).toContain('scan_enrichment_backend_not_configured'); + expect(providerlessEnriched.stdout).toContain('Enrichment artifacts: 3'); + const providerlessRunId = getRunId(providerlessEnriched.stdout); + + const providerlessReportResult = await runBuiltCli([ + 'dev', + 'scan', + 'report', + '--json', + '--project-dir', + projectDir, + providerlessRunId, + ]); + expect(providerlessReportResult).toMatchObject({ code: 0, stderr: '' }); + const providerlessReport = parseJsonOutput<{ + mode: string; + enrichment: { + tableDescriptions: string; + columnDescriptions: string; + embeddings: string; + deterministicRelationships: string; + statisticalValidation: string; + }; + relationships: { accepted: number; review: number; rejected: number; skipped: number }; + warnings: Array<{ code: string }>; + artifactPaths: { enrichmentArtifacts: string[]; manifestShards: string[] }; + }>(providerlessReportResult.stdout); + expect(providerlessReport.mode).toBe('enriched'); + expect(providerlessReport.enrichment).toMatchObject({ + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'completed', + statisticalValidation: 'completed', + }); + expect(providerlessReport.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(providerlessReport.warnings).toEqual( + expect.arrayContaining([expect.objectContaining({ code: 'scan_enrichment_backend_not_configured' })]), + ); + expect(providerlessReport.artifactPaths.enrichmentArtifacts).toEqual( + expect.arrayContaining([ + expect.stringContaining('/enrichment/relationships.json'), + expect.stringContaining('/enrichment/relationship-profile.json'), + expect.stringContaining('/enrichment/relationship-diagnostics.json'), + ]), + ); + expect(providerlessReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + + await writeSqliteScanConfig(projectDir, dbPath, true); + const enriched = await runBuiltCli(['dev', 'scan', 'warehouse', '--project-dir', projectDir, '--mode', 'enriched']); + expect(enriched).toMatchObject({ code: 0, stderr: '' }); + expect(enriched.stdout).toContain('Mode: enriched'); + const enrichedRunId = getRunId(enriched.stdout); + + const enrichedReportResult = await runBuiltCli([ + 'dev', + 'scan', + 'report', + '--json', + '--project-dir', + projectDir, + enrichedRunId, + ]); + expect(enrichedReportResult).toMatchObject({ code: 0, stderr: '' }); + const enrichedReport = parseJsonOutput<{ + mode: string; + enrichment: { tableDescriptions: string; columnDescriptions: string; embeddings: string }; + artifactPaths: { enrichmentArtifacts: string[]; manifestShards: string[] }; + }>(enrichedReportResult.stdout); + expect(enrichedReport.mode).toBe('enriched'); + expect(enrichedReport.enrichment).toMatchObject({ + tableDescriptions: 'completed', + columnDescriptions: 'completed', + embeddings: 'completed', + }); + expect(enrichedReport.artifactPaths.enrichmentArtifacts).toEqual( + expect.arrayContaining([ + expect.stringContaining('/enrichment/descriptions.json'), + expect.stringContaining('/enrichment/embeddings.json'), + expect.stringContaining('/enrichment/relationships.json'), + expect.stringContaining('/enrichment/relationship-profile.json'), + expect.stringContaining('/enrichment/relationship-diagnostics.json'), + ]), + ); + expect(enrichedReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + + const enrichedManifest = await readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'); + expect(enrichedManifest).toContain('Deterministic description'); + }, 30_000); + + it('parses gateway LLM config and OpenAI enrichment embeddings used by standalone scans without network calls', async () => { + const projectDir = join(tempDir, 'gateway-config-project'); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: gateway-smoke', + 'llm:', + ' provider:', + ' backend: gateway', + ' gateway:', + ' api_key: env:AI_GATEWAY_API_KEY', // pragma: allowlist secret + ' models:', + ' default: env:KLO_SCAN_LLM_MODEL', + 'scan:', + ' enrichment:', + ' mode: llm', + ' embeddings:', + ' backend: openai', + ' model: env:KLO_SCAN_EMBEDDING_MODEL', + ' dimensions: 1536', + ' openai:', + ' api_key: env:OPENAI_API_KEY', // pragma: allowlist secret + ' batchSize: 16', + '', + ].join('\n'), + 'utf8', + ); + + const config = parseKloProjectConfig(await readFile(join(projectDir, 'klo.yaml'), 'utf8')); + expect(config.llm).toEqual({ + provider: { + backend: 'gateway', + gateway: { api_key: 'env:AI_GATEWAY_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'env:KLO_SCAN_LLM_MODEL' }, + }); + expect(config.scan.enrichment).toEqual({ + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'env:KLO_SCAN_EMBEDDING_MODEL', + dimensions: 1536, + openai: { api_key: 'env:OPENAI_API_KEY' }, // pragma: allowlist secret + batchSize: 16, + }, + }); + }); + + it('adds a redacted Notion connection through the built binary', async () => { + const projectDir = join(tempDir, 'notion-project'); + const init = await runSetupNewProject(projectDir); + expect(init).toMatchObject({ code: 0, stderr: '' }); + + const add = await runBuiltCli([ + 'connection', + 'add', + 'notion', + 'notion-main', + '--project-dir', + projectDir, + '--token-env', + 'NOTION_AUTH_TOKEN', + '--crawl-mode', + 'all_accessible', + '--max-pages', + '5', + ]); + + expect(add).toMatchObject({ code: 0, stderr: '' }); + expect(add.stdout).toContain('Connection: notion-main'); + expect(add.stdout).toContain('Driver: notion'); + + const yaml = await readFile(join(projectDir, 'klo.yaml'), 'utf-8'); + expect(yaml).toContain('driver: notion'); + expect(yaml).toContain('auth_token_ref: env:NOTION_AUTH_TOKEN'); + expect(yaml).toContain('crawl_mode: all_accessible'); + expect(yaml).toContain('max_pages_per_run: 5'); + expect(yaml).not.toContain('ntn_'); + + const parsed = parseKloProjectConfig(yaml); + expect(parsed.connections['notion-main']).toMatchObject({ + driver: 'notion', + auth_token_ref: 'env:NOTION_AUTH_TOKEN', + crawl_mode: 'all_accessible', + }); + }); + + it('serves local ingest MCP tools over stdio from the built binary', async () => { + const projectDir = join(tempDir, 'project'); + + const init = await runSetupNewProject(projectDir); + expect(init).toMatchObject({ code: 0, stderr: '' }); + await writeWarehouseConfig(projectDir); + + const client = new Client({ name: 'klo-smoke-client', version: '0.0.0' }); + const transport = new StdioClientTransport({ + command: process.execPath, + args: [CLI_BIN, 'serve', '--mcp', 'stdio', '--project-dir', projectDir, '--user-id', 'smoke-user'], + stderr: 'pipe', + }); + + try { + await client.connect(transport); + const tools = await client.listTools(); + const toolNames = tools.tools.map((tool) => tool.name).sort(); + expect(toolNames).toEqual( + expect.arrayContaining([ + 'connection_list', + 'connection_test', + 'ingest_report', + 'ingest_replay', + 'ingest_status', + 'ingest_trigger', + 'knowledge_read', + 'knowledge_search', + 'knowledge_write', + 'scan_list_artifacts', + 'scan_read_artifact', + 'scan_report', + 'scan_status', + 'scan_trigger', + 'sl_list_sources', + 'sl_read_source', + 'sl_validate', + 'sl_write_source', + ]), + ); + + const connections = structuredContent<{ + connections: Array<{ id: string; name: string; connectionType: string }>; + }>(await client.callTool({ name: 'connection_list', arguments: {} })); + expect(connections).toEqual({ + connections: [{ id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' }], + }); + + await expect(client.callTool({ name: 'ingest_status', arguments: { runId: 'missing-run' } })).resolves.toEqual({ + content: [{ type: 'text', text: 'Ingest run "missing-run" was not found.' }], + isError: true, + }); + + await expect(client.callTool({ name: 'ingest_report', arguments: { runId: 'missing-run' } })).resolves.toEqual({ + content: [{ type: 'text', text: 'Ingest report "missing-run" was not found.' }], + isError: true, + }); + + await expect(client.callTool({ name: 'ingest_replay', arguments: { runId: 'missing-run' } })).resolves.toEqual({ + content: [{ type: 'text', text: 'Ingest replay "missing-run" was not found.' }], + isError: true, + }); + } finally { + await client.close(); + } + }); + + it('serves scan execution and artifact inspection tools over stdio from the built binary', async () => { + const projectDir = join(tempDir, 'scan-mcp-project'); + const init = await runSetupNewProject(projectDir); + expect(init).toMatchObject({ code: 0, stderr: '' }); + + const dbPath = join(projectDir, 'warehouse.db'); + createSqliteWarehouse(dbPath); + await writeSqliteScanConfig(projectDir, dbPath); + + const client = new Client({ name: 'klo-scan-smoke-client', version: '0.0.0' }); + const transport = new StdioClientTransport({ + command: process.execPath, + args: [CLI_BIN, 'serve', '--mcp', 'stdio', '--project-dir', projectDir, '--user-id', 'smoke-user'], + stderr: 'pipe', + }); + + try { + await client.connect(transport); + + const connectionTest = structuredContent<{ + id: string; + connectionType: string; + ok: boolean; + tableCount: number | null; + }>(await client.callTool({ name: 'connection_test', arguments: { connectionId: 'warehouse' } })); + expect(connectionTest).toMatchObject({ + id: 'warehouse', + connectionType: 'SQLITE', + ok: true, + tableCount: 2, + }); + + const trigger = structuredContent<{ + runId: string; + status: 'done'; + done: true; + connectionId: string; + mode: string; + dryRun: boolean; + report: { + artifactPaths: { manifestShards: string[] }; + manifestShardsWritten: number; + }; + }>( + await client.callTool({ + name: 'scan_trigger', + arguments: { + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + }), + ); + expect(trigger).toMatchObject({ + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + }); + expect(trigger.report.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + expect(trigger.report.manifestShardsWritten).toBe(1); + + const status = structuredContent<{ + runId: string; + status: string; + done: boolean; + reportPath: string | null; + }>(await client.callTool({ name: 'scan_status', arguments: { runId: trigger.runId } })); + expect(status).toMatchObject({ + runId: trigger.runId, + status: 'done', + done: true, + }); + expect(status.reportPath).toContain('scan-report.json'); + + const artifacts = structuredContent<{ + runId: string; + artifacts: Array<{ path: string; type: string }>; + }>(await client.callTool({ name: 'scan_list_artifacts', arguments: { runId: trigger.runId } })); + expect(artifacts.artifacts).toEqual( + expect.arrayContaining([ + expect.objectContaining({ path: 'semantic-layer/warehouse/_schema/public.yaml', type: 'manifest_shard' }), + expect.objectContaining({ type: 'report' }), + expect.objectContaining({ type: 'raw_source' }), + ]), + ); + + const manifestArtifact = structuredContent<{ + runId: string; + path: string; + type: string; + content: string; + }>( + await client.callTool({ + name: 'scan_read_artifact', + arguments: { + runId: trigger.runId, + path: 'semantic-layer/warehouse/_schema/public.yaml', + }, + }), + ); + expect(manifestArtifact).toMatchObject({ + runId: trigger.runId, + path: 'semantic-layer/warehouse/_schema/public.yaml', + type: 'manifest_shard', + }); + expect(manifestArtifact.content).toContain('orders:'); + expect(manifestArtifact.content).toContain('source: formal'); + } finally { + await client.close(); + } + }); +}); diff --git a/packages/cli/src/startup-profile.ts b/packages/cli/src/startup-profile.ts new file mode 100644 index 00000000..e9616abb --- /dev/null +++ b/packages/cli/src/startup-profile.ts @@ -0,0 +1,54 @@ +const enabled = process.env.KLO_PROFILE_STARTUP === '1' || process.env.KLO_PROFILE_STARTUP === 'true'; +const processStart = performance.now() - process.uptime() * 1000; + +interface StartupProfileEvent { + label: string; + at: number; + duration?: number; +} + +const events: StartupProfileEvent[] = []; + +function now(): number { + return performance.now() - processStart; +} + +export function profileMark(label: string): void { + if (!enabled) { + return; + } + events.push({ label, at: now() }); +} + +export async function profileSpan(label: string, run: () => Promise): Promise { + if (!enabled) { + return await run(); + } + const start = now(); + try { + return await run(); + } finally { + events.push({ label, at: start, duration: now() - start }); + } +} + +export function installStartupProfileReporter(): void { + if (!enabled) { + return; + } + + process.once('beforeExit', () => { + const total = now(); + process.stderr.write('\nKLO startup profile\n'); + for (const event of events) { + const elapsed = event.at.toFixed(1).padStart(7); + if (event.duration === undefined) { + process.stderr.write(`${elapsed} ms ${event.label}\n`); + } else { + const duration = event.duration.toFixed(1).padStart(7); + process.stderr.write(`${elapsed} ms ${duration} ms ${event.label}\n`); + } + } + process.stderr.write(`${total.toFixed(1).padStart(7)} ms total\n`); + }); +} diff --git a/packages/cli/src/viz-fallback.test.ts b/packages/cli/src/viz-fallback.test.ts new file mode 100644 index 00000000..f42eb440 --- /dev/null +++ b/packages/cli/src/viz-fallback.test.ts @@ -0,0 +1,120 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { + rendererUnavailableVizFallback, + resetVizFallbackWarningsForTest, + resolveVizFallback, + warnVizFallbackOnce, +} from './viz-fallback.js'; + +function io(options: { stdoutTty?: boolean; stdinTty?: boolean; rawMode?: boolean }) { + return { + stdin: { + isTTY: options.stdinTty, + ...(options.rawMode === false ? {} : { setRawMode: vi.fn() }), + }, + stdout: { isTTY: options.stdoutTty }, + stderr: { write: vi.fn() }, + }; +} + +describe('resolveVizFallback', () => { + beforeEach(() => { + resetVizFallbackWarningsForTest(); + }); + + it('degrades when stdout is not an interactive terminal', () => { + expect(resolveVizFallback(io({ stdoutTty: false }), { TERM: 'xterm-256color' })).toEqual({ + shouldDegrade: true, + reason: 'stdout-not-tty', + message: 'stdout is not an interactive terminal', + }); + }); + + it('degrades when TERM is dumb even if stdout is a TTY', () => { + expect(resolveVizFallback(io({ stdoutTty: true }), { TERM: 'dumb' })).toEqual({ + shouldDegrade: true, + reason: 'term-dumb', + message: 'TERM=dumb does not support the visual renderer', + }); + }); + + it('allows visualization for a normal TTY', () => { + expect(resolveVizFallback(io({ stdoutTty: true }), { TERM: 'xterm-256color' })).toEqual({ + shouldDegrade: false, + }); + }); + + it('allows snapshot visualization when interactive input is not required', () => { + expect( + resolveVizFallback( + io({ stdoutTty: true, stdinTty: false, rawMode: false }), + { TERM: 'xterm-256color' }, + { requireInput: false }, + ), + ).toEqual({ + shouldDegrade: false, + }); + }); + + it('degrades when interactive input is required but stdin is not a TTY', () => { + expect( + resolveVizFallback( + io({ stdoutTty: true, stdinTty: false }), + { TERM: 'xterm-256color' }, + { requireInput: true }, + ), + ).toEqual({ + shouldDegrade: true, + reason: 'stdin-not-tty', + message: 'stdin is not an interactive terminal', + }); + }); + + it('degrades when interactive input is required but stdin raw mode is unavailable', () => { + expect( + resolveVizFallback( + io({ stdoutTty: true, stdinTty: true, rawMode: false }), + { TERM: 'xterm-256color' }, + { requireInput: true }, + ), + ).toEqual({ + shouldDegrade: true, + reason: 'stdin-raw-mode-unavailable', + message: 'stdin raw mode is unavailable', + }); + }); + + it('warns only once per fallback reason', () => { + const testIo = io({ stdoutTty: false }); + const decision = resolveVizFallback(testIo, { TERM: 'xterm-256color' }); + + warnVizFallbackOnce(testIo, decision); + warnVizFallbackOnce(testIo, decision); + warnVizFallbackOnce(testIo, rendererUnavailableVizFallback()); + warnVizFallbackOnce(testIo, rendererUnavailableVizFallback()); + warnVizFallbackOnce(testIo, { + shouldDegrade: true, + reason: 'stdin-raw-mode-unavailable', + message: 'stdin raw mode is unavailable', + }); + warnVizFallbackOnce(testIo, { + shouldDegrade: true, + reason: 'stdin-raw-mode-unavailable', + message: 'stdin raw mode is unavailable', + }); + + expect(testIo.stderr.write).toHaveBeenCalledTimes(3); + expect(testIo.stderr.write).toHaveBeenNthCalledWith( + 1, + 'Visualization requested but stdout is not an interactive terminal; printing plain output.\n', + ); + expect(testIo.stderr.write).toHaveBeenNthCalledWith( + 2, + 'Visualization requested but the terminal renderer is unavailable; printing plain output.\n', + ); + expect(testIo.stderr.write).toHaveBeenNthCalledWith( + 3, + 'Visualization requested but stdin raw mode is unavailable; printing plain output.\n', + ); + }); +}); diff --git a/packages/cli/src/viz-fallback.ts b/packages/cli/src/viz-fallback.ts new file mode 100644 index 00000000..d00d0779 --- /dev/null +++ b/packages/cli/src/viz-fallback.ts @@ -0,0 +1,93 @@ +import { profileMark } from './startup-profile.js'; + +profileMark('module:viz-fallback'); + +type KloVizFallbackReason = + | 'stdout-not-tty' + | 'term-dumb' + | 'stdin-not-tty' + | 'stdin-raw-mode-unavailable' + | 'renderer-unavailable'; + +interface KloVizFallbackIo { + stdin?: { isTTY?: boolean; setRawMode?(value: boolean): void }; + stdout: { isTTY?: boolean }; + stderr: { write(chunk: string): void }; +} + +interface KloVizFallbackOptions { + requireInput?: boolean; +} + +type KloVizFallbackDecision = + | { + shouldDegrade: false; + } + | { + shouldDegrade: true; + reason: KloVizFallbackReason; + message: string; + }; + +const warnedFallbackReasons = new Set(); + +export function resolveVizFallback( + io: KloVizFallbackIo, + env: NodeJS.ProcessEnv = process.env, + options: KloVizFallbackOptions = {}, +): KloVizFallbackDecision { + if (io.stdout.isTTY !== true) { + return { + shouldDegrade: true, + reason: 'stdout-not-tty', + message: 'stdout is not an interactive terminal', + }; + } + + if ((env.TERM ?? '').toLowerCase() === 'dumb') { + return { + shouldDegrade: true, + reason: 'term-dumb', + message: 'TERM=dumb does not support the visual renderer', + }; + } + + if (options.requireInput === true && io.stdin?.isTTY !== true) { + return { + shouldDegrade: true, + reason: 'stdin-not-tty', + message: 'stdin is not an interactive terminal', + }; + } + + if (options.requireInput === true && typeof io.stdin?.setRawMode !== 'function') { + return { + shouldDegrade: true, + reason: 'stdin-raw-mode-unavailable', + message: 'stdin raw mode is unavailable', + }; + } + + return { shouldDegrade: false }; +} + +export function rendererUnavailableVizFallback(): KloVizFallbackDecision { + return { + shouldDegrade: true, + reason: 'renderer-unavailable', + message: 'the terminal renderer is unavailable', + }; +} + +export function warnVizFallbackOnce(io: KloVizFallbackIo, decision: KloVizFallbackDecision): void { + if (!decision.shouldDegrade || warnedFallbackReasons.has(decision.reason)) { + return; + } + + warnedFallbackReasons.add(decision.reason); + io.stderr.write(`Visualization requested but ${decision.message}; printing plain output.\n`); +} + +export function resetVizFallbackWarningsForTest(): void { + warnedFallbackReasons.clear(); +} diff --git a/packages/cli/tsconfig.json b/packages/cli/tsconfig.json new file mode 100644 index 00000000..40c250d6 --- /dev/null +++ b/packages/cli/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src", + "jsx": "react-jsx" + }, + "include": ["src/**/*.ts", "src/**/*.tsx"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/cli/vitest.config.ts b/packages/cli/vitest.config.ts new file mode 100644 index 00000000..ca1a6b26 --- /dev/null +++ b/packages/cli/vitest.config.ts @@ -0,0 +1,15 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + oxc: { + jsx: { + runtime: 'automatic', + importSource: 'react', + }, + }, + test: { + root: '.', + include: ['src/**/*.test.ts', 'src/**/*.test.tsx'], + testTimeout: 30_000, + }, +}); diff --git a/packages/connector-bigquery/package.json b/packages/connector-bigquery/package.json new file mode 100644 index 00000000..975090c5 --- /dev/null +++ b/packages/connector-bigquery/package.json @@ -0,0 +1,47 @@ +{ + "name": "@klo/connector-bigquery", + "version": "0.0.0-private", + "description": "BigQuery connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@google-cloud/bigquery": "^8.1.1", + "@klo/context": "workspace:*" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-bigquery" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-bigquery/src/connector.test.ts b/packages/connector-bigquery/src/connector.test.ts new file mode 100644 index 00000000..a7ade331 --- /dev/null +++ b/packages/connector-bigquery/src/connector.test.ts @@ -0,0 +1,307 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + bigQueryConnectionConfigFromConfig, + createBigQueryLiveDatabaseIntrospection, + isKloBigQueryConnectionConfig, + type KloBigQueryClient, + KloBigQueryScanConnector, + type KloBigQueryClientFactory, + type KloBigQueryDataset, + type KloBigQueryQueryJob, + type KloBigQueryTableRef, +} from './index.js'; + +function fakeClientFactory(): KloBigQueryClientFactory { + const queryResults = vi.fn(async (): ReturnType => [ + [{ id: 1, status: 'paid' }], + undefined, + { schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } }, + ]); + const createQueryJob = vi.fn(async (input: { query: string }): ReturnType => { + if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) { + return [ + { + getQueryResults: async (): ReturnType => [ + [{ table_name: 'orders', column_name: 'id' }], + undefined, + { schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } }, + ], + }, + ]; + } + if (input.query.includes('APPROX_COUNT_DISTINCT')) { + return [ + { + getQueryResults: async (): ReturnType => [ + [{ cardinality: 2 }], + undefined, + { schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } }, + ], + }, + ]; + } + if (input.query.includes('SELECT DISTINCT CAST')) { + return [ + { + getQueryResults: async (): ReturnType => [ + [{ val: 'open' }, { val: 'paid' }], + undefined, + { schema: { fields: [{ name: 'val', type: 'STRING' }] } }, + ], + }, + ]; + } + if (input.query.includes('SELECT `status`')) { + return [ + { + getQueryResults: async (): ReturnType => [ + [{ status: 'paid' }], + undefined, + { schema: { fields: [{ name: 'status', type: 'STRING' }] } }, + ], + }, + ]; + } + return [{ getQueryResults: queryResults }]; + }); + const getTable = vi.fn(async (): ReturnType => [ + { + metadata: { + type: 'TABLE', + numRows: '12', + description: 'Orders table', + schema: { + fields: [ + { name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' }, + { name: 'status', type: 'STRING', mode: 'NULLABLE' }, + { name: 'payload', type: 'RECORD', mode: 'NULLABLE' }, + ], + }, + }, + }, + ]); + const tableRef: KloBigQueryTableRef = { id: 'orders', get: getTable }; + return { + createClient: vi.fn(() => ({ + getDatasets: vi.fn(async (): ReturnType => [[{ id: 'analytics' }, { id: 'staging' }]]), + dataset: vi.fn( + (datasetId: string): KloBigQueryDataset => ({ + get: vi.fn(async () => [{ id: datasetId }]), + getTables: vi.fn(async (): ReturnType => [[tableRef]]), + }), + ), + createQueryJob, + })), + }; +} + +const connection = { + driver: 'bigquery', + dataset_id: 'analytics', + credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }), + location: 'US', + readonly: true, +}; + +describe('KloBigQueryScanConnector', () => { + it('resolves configuration safely', () => { + expect(isKloBigQueryConnectionConfig(connection)).toBe(true); + expect(isKloBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false); + expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({ + projectId: 'project-1', + datasetIds: ['analytics'], + location: 'US', + }); + expect(() => + bigQueryConnectionConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...connection, readonly: false }, + }), + ).toThrow('Native BigQuery connector requires connections.warehouse.readonly: true'); + }); + + it('introspects datasets, table metadata, primary keys, and normalized types', async () => { + const connector = new KloBigQueryScanConnector({ + connectionId: 'warehouse', + connection, + clientFactory: fakeClientFactory(), + now: () => new Date('2026-04-29T17:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'bigquery' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'bigquery', + extractedAt: '2026-04-29T17:00:00.000Z', + scope: { catalogs: ['project-1'], datasets: ['analytics'] }, + metadata: { + project_id: 'project-1', + datasets: ['analytics'], + table_count: 1, + total_columns: 3, + }, + }); + expect(snapshot.tables[0]).toMatchObject({ + catalog: 'project-1', + db: 'analytics', + name: 'orders', + kind: 'table', + comment: 'Orders table', + estimatedRows: 12, + foreignKeys: [], + }); + expect(snapshot.tables[0]?.columns).toEqual([ + { + name: 'id', + nativeType: 'INT64', + normalizedType: 'BIGINT', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + { + name: 'status', + nativeType: 'STRING', + normalizedType: 'VARCHAR', + dimensionType: 'string', + nullable: true, + primaryKey: false, + comment: null, + }, + { + name: 'payload', + nativeType: 'RECORD', + normalizedType: 'JSON', + dimensionType: 'string', + nullable: true, + primaryKey: false, + comment: null, + }, + ]); + }); + + it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => { + const connector = new KloBigQueryScanConnector({ + connectionId: 'warehouse', + connection, + clientFactory: fakeClientFactory(), + }); + + await expect( + connector.sampleTable( + { + connectionId: 'warehouse', + table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, + columns: ['id', 'status'], + limit: 1, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ + headers: ['id', 'status'], + headerTypes: ['INT64', 'STRING'], + rows: [[1, 'paid']], + totalRows: 1, + }); + + await expect( + connector.sampleColumn( + { + connectionId: 'warehouse', + table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, + column: 'status', + limit: 5, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null }); + + await expect( + connector.executeReadOnly( + { connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 }); + + await expect( + connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + + await expect( + connector.getColumnDistinctValues( + { catalog: 'project-1', db: 'analytics', name: 'orders' }, + 'status', + { maxCardinality: 5, limit: 10, sampleSize: 100 }, + ), + ).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 }); + await expect(connector.getTableRowCount('orders')).resolves.toBe(12); + await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']); + await expect( + connector.columnStats( + { connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' }, + { runId: 'scan-run-1' }, + ), + ).resolves.toBeNull(); + await connector.cleanup(); + }); + + it('applies maximumBytesBilled to read-only queries when configured', async () => { + const clientFactory = fakeClientFactory(); + const connector = new KloBigQueryScanConnector({ + connectionId: 'warehouse', + connection, + clientFactory, + maxBytesBilled: 123456789, + }); + + await expect( + connector.executeReadOnly( + { connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 }); + + const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KloBigQueryClient; + expect(client.createQueryJob).toHaveBeenLastCalledWith( + expect.objectContaining({ + maximumBytesBilled: '123456789', + }), + ); + }); + + it('adapts native snapshots to live-database introspection snapshots', async () => { + const introspection = createBigQueryLiveDatabaseIntrospection({ + connections: { warehouse: connection }, + clientFactory: fakeClientFactory(), + now: () => new Date('2026-04-29T17:00:00.000Z'), + }); + + await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({ + connectionId: 'warehouse', + metadata: { project_id: 'project-1' }, + tables: expect.arrayContaining([ + expect.objectContaining({ + catalog: 'project-1', + db: 'analytics', + name: 'orders', + columns: expect.arrayContaining([ + { + name: 'id', + nativeType: 'INT64', + normalizedType: 'BIGINT', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + ]), + }), + ]), + }); + }); +}); diff --git a/packages/connector-bigquery/src/connector.ts b/packages/connector-bigquery/src/connector.ts new file mode 100644 index 00000000..776528f4 --- /dev/null +++ b/packages/connector-bigquery/src/connector.ts @@ -0,0 +1,492 @@ +import { BigQuery, type TableField } from '@google-cloud/bigquery'; +import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaColumn, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { KloBigQueryDialect } from './dialect.js'; + +export interface KloBigQueryConnectionConfig { + driver?: string; + dataset_id?: string; + dataset_ids?: string[]; + credentials_json?: string; + location?: string; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloBigQueryResolvedConnectionConfig { + projectId: string; + credentials: Record; + datasetIds: string[]; + location?: string; +} + +export interface KloBigQueryReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record; +} + +export interface KloBigQueryColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloBigQueryColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +export interface KloBigQueryQueryJob { + getQueryResults(): Promise< + [Array>, unknown, { schema?: { fields?: TableField[] } }?, ...unknown[]] + >; +} + +export interface KloBigQueryTableRef { + id?: string; + get(): Promise< + [ + { + metadata: { + type?: string; + numRows?: string | number; + description?: string; + schema?: { fields?: TableField[] }; + }; + }, + ...unknown[], + ] + >; +} + +export interface KloBigQueryDataset { + get(): Promise; + getTables(): Promise<[KloBigQueryTableRef[], ...unknown[]]>; +} + +export interface KloBigQueryClient { + getDatasets(input?: { maxResults?: number }): Promise<[Array<{ id?: string }>, ...unknown[]]>; + dataset(datasetId: string): KloBigQueryDataset; + createQueryJob(input: { + query: string; + location?: string; + params?: Record; + maximumBytesBilled?: string; + jobTimeoutMs?: number; + }): Promise<[KloBigQueryQueryJob, ...unknown[]]>; +} + +export interface KloBigQueryClientFactory { + createClient(input: { projectId: string; credentials: Record }): KloBigQueryClient; +} + +export interface KloBigQueryScanConnectorOptions { + connectionId: string; + connection: KloBigQueryConnectionConfig | undefined; + clientFactory?: KloBigQueryClientFactory; + env?: NodeJS.ProcessEnv; + now?: () => Date; + maxBytesBilled?: number | string; + queryTimeoutMs?: number; +} + +class DefaultBigQueryClientFactory implements KloBigQueryClientFactory { + createClient(input: { projectId: string; credentials: Record }): KloBigQueryClient { + const client = new BigQuery(input); + return { + getDatasets: (options) => client.getDatasets(options) as Promise<[Array<{ id?: string }>, ...unknown[]]>, + dataset: (datasetId) => { + const dataset = client.dataset(datasetId); + return { + get: () => dataset.get() as Promise, + getTables: () => dataset.getTables() as Promise<[KloBigQueryTableRef[], ...unknown[]]>, + }; + }, + createQueryJob: (options) => client.createQueryJob(options) as Promise<[KloBigQueryQueryJob, ...unknown[]]>, + }; + } +} + +function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + return env[value.slice('env:'.length)] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function stringConfigValue( + connection: KloBigQueryConnectionConfig | undefined, + key: keyof KloBigQueryConnectionConfig, + env: NodeJS.ProcessEnv, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; +} + +function datasetIds(connection: KloBigQueryConnectionConfig, env: NodeJS.ProcessEnv): string[] { + if (Array.isArray(connection.dataset_ids) && connection.dataset_ids.length > 0) { + return connection.dataset_ids + .filter((dataset) => dataset.trim().length > 0) + .map((dataset) => resolveStringReference(dataset, env)); + } + const datasetId = stringConfigValue(connection, 'dataset_id', env); + return datasetId ? [datasetId] : []; +} + +function tableKind(metadataType: string | undefined): KloSchemaTable['kind'] { + const type = String(metadataType ?? '').toUpperCase(); + if (type === 'VIEW' || type === 'MATERIALIZED_VIEW') { + return 'view'; + } + if (type === 'EXTERNAL' || type === 'EXTERNAL_TABLE') { + return 'external'; + } + return 'table'; +} + +function firstNumber(value: unknown): number | null { + const numberValue = Number(value); + return Number.isFinite(numberValue) ? numberValue : null; +} + +function normalizeValue(value: unknown): unknown { + if (value === null || value === undefined) { + return null; + } + if (Array.isArray(value)) { + return value.map((item) => String(item)).join(', '); + } + if (typeof value === 'object') { + if ('toNumber' in value && typeof value.toNumber === 'function' && 'toFixed' in value && typeof value.toFixed === 'function') { + return value.toNumber(); + } + if ('value' in value && Object.keys(value).length === 1 && typeof value.value !== 'object') { + return value.value; + } + return JSON.stringify(value); + } + return value; +} + +export function isKloBigQueryConnectionConfig(connection: KloBigQueryConnectionConfig | undefined): boolean { + return String(connection?.driver ?? '').toLowerCase() === 'bigquery'; +} + +export function bigQueryConnectionConfigFromConfig(input: { + connectionId: string; + connection: KloBigQueryConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; +}): KloBigQueryResolvedConnectionConfig { + if (!isKloBigQueryConnectionConfig(input.connection)) { + throw new Error(`Native BigQuery connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.readonly: true`); + } + + const env = input.env ?? process.env; + const credentialsJson = stringConfigValue(input.connection, 'credentials_json', env); + if (!credentialsJson) { + throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.credentials_json`); + } + const credentials = JSON.parse(credentialsJson) as Record; + const projectId = typeof credentials.project_id === 'string' ? credentials.project_id : undefined; + if (!projectId) { + throw new Error(`Native BigQuery connector requires credentials_json.project_id for connections.${input.connectionId}`); + } + const resolvedDatasetIds = datasetIds(input.connection, env); + if (resolvedDatasetIds.length === 0) { + throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.dataset_id or dataset_ids`); + } + const location = stringConfigValue(input.connection, 'location', env); + return { projectId, credentials, datasetIds: resolvedDatasetIds, ...(location ? { location } : {}) }; +} + +export class KloBigQueryScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'bigquery' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: true, + formalForeignKeys: false, + estimatedRowCounts: true, + }); + + private readonly connectionId: string; + private readonly resolved: KloBigQueryResolvedConnectionConfig; + private readonly clientFactory: KloBigQueryClientFactory; + private readonly now: () => Date; + private readonly maxBytesBilled?: number | string; + private readonly queryTimeoutMs?: number; + private readonly dialect = new KloBigQueryDialect(); + private client: KloBigQueryClient | null = null; + + constructor(options: KloBigQueryScanConnectorOptions) { + this.connectionId = options.connectionId; + this.resolved = bigQueryConnectionConfigFromConfig({ + connectionId: options.connectionId, + connection: options.connection, + env: options.env, + }); + this.clientFactory = options.clientFactory ?? new DefaultBigQueryClientFactory(); + this.now = options.now ?? (() => new Date()); + this.maxBytesBilled = options.maxBytesBilled; + this.queryTimeoutMs = options.queryTimeoutMs; + this.id = `bigquery:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + try { + const client = this.getClient(); + await client.getDatasets({ maxResults: 1 }); + for (const datasetId of this.resolved.datasetIds) { + await client.dataset(datasetId).get(); + } + return { success: true }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const tables: KloSchemaTable[] = []; + for (const datasetId of this.resolved.datasetIds) { + tables.push(...(await this.introspectDataset(datasetId))); + } + return { + connectionId: this.connectionId, + driver: 'bigquery', + extractedAt: this.now().toISOString(), + scope: { catalogs: [this.resolved.projectId], datasets: this.resolved.datasetIds }, + metadata: { + project_id: this.resolved.projectId, + datasets: this.resolved.datasetIds, + table_count: tables.length, + total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables, + }; + } + + async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns)); + return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + return { values: result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]), nullCount: null, distinctCount: null }; + } + + async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + return null; + } + + async executeReadOnly(input: KloBigQueryReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); + const prepared = this.dialect.prepareQuery(limitedSql, input.params); + const result = await this.query(prepared.sql, prepared.params); + return { ...result, rowCount: result.rows.length }; + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloBigQueryColumnDistinctValuesOptions, + ): Promise { + const tableName = this.qTableName(table); + const quotedColumn = this.dialect.quoteIdentifier(columnName); + const cardinality = await this.singleNumber( + this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000), + 'cardinality', + ); + if (cardinality === null) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valueRows = await this.queryRaw<{ val: unknown }>( + this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit), + ); + return { values: valueRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality }; + } + + async getTableRowCount(tableName: string, datasetId = this.resolved.datasetIds[0]): Promise { + if (!datasetId) { + return 0; + } + const tables = await this.introspectDataset(datasetId); + return tables.find((table) => table.name === tableName)?.estimatedRows ?? 0; + } + + qTableName(table: Pick & Partial>): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + async listDatasets(): Promise { + const [datasets] = await this.getClient().getDatasets(); + return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id)); + } + + async cleanup(): Promise { + this.client = null; + } + + private getClient(): KloBigQueryClient { + if (!this.client) { + this.client = this.clientFactory.createClient({ + projectId: this.resolved.projectId, + credentials: this.resolved.credentials, + }); + } + return this.client; + } + + private async query(sql: string, params?: Record): Promise { + const [job] = await this.getClient().createQueryJob({ + query: sql, + ...(this.resolved.location ? { location: this.resolved.location } : {}), + ...(params && Object.keys(params).length > 0 ? { params } : {}), + ...(this.maxBytesBilled ? { maximumBytesBilled: String(this.maxBytesBilled) } : {}), + ...(this.queryTimeoutMs ? { jobTimeoutMs: this.queryTimeoutMs } : {}), + }); + const [rows, , response] = await job.getQueryResults(); + let headers = response?.schema?.fields?.map((field) => field.name || '') ?? []; + const headerTypes = response?.schema?.fields?.map((field) => String(field.type || 'STRING')) ?? []; + if (headers.length === 0 && rows.length > 0) { + headers = Object.keys(rows[0]!); + } + return { + headers, + headerTypes: headerTypes.length > 0 ? headerTypes : undefined, + rows: rows.map((row) => headers.map((header) => normalizeValue(row[header]))), + totalRows: rows.length, + rowCount: rows.length, + }; + } + + private async queryRaw>(sql: string, params?: Record): Promise { + const result = await this.query(sql, params); + return result.rows.map((row) => Object.fromEntries(result.headers.map((header, index) => [header, row[index]])) as T); + } + + private async singleNumber(sql: string, header: string): Promise { + const rows = await this.queryRaw>(sql); + return firstNumber(rows[0]?.[header]); + } + + private async introspectDataset(datasetId: string): Promise { + const dataset = this.getClient().dataset(datasetId); + const [tableRefs] = await dataset.getTables(); + const primaryKeys = await this.primaryKeys(datasetId); + const tables: KloSchemaTable[] = []; + for (const tableRef of tableRefs) { + const tableName = tableRef.id || ''; + const [table] = await tableRef.get(); + const fields = table.metadata.schema?.fields ?? []; + tables.push({ + catalog: this.resolved.projectId, + db: datasetId, + name: tableName, + kind: tableKind(table.metadata.type), + comment: table.metadata.description || null, + estimatedRows: firstNumber(table.metadata.numRows) ?? 0, + columns: fields.map((field) => this.toSchemaColumn(tableName, field, primaryKeys)), + foreignKeys: [], + }); + } + return tables; + } + + private async primaryKeys(datasetId: string): Promise>> { + const rows = await this.queryRaw<{ table_name: string; column_name: string }>( + 'SELECT tc.table_name, kcu.column_name ' + + 'FROM `' + + this.resolved.projectId + + '.' + + datasetId + + '.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` tc ' + + 'JOIN `' + + this.resolved.projectId + + '.' + + datasetId + + '.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` kcu ' + + 'ON tc.constraint_name = kcu.constraint_name ' + + 'AND tc.table_schema = kcu.table_schema ' + + 'AND tc.table_name = kcu.table_name ' + + "WHERE tc.constraint_type = 'PRIMARY KEY' " + + "AND tc.table_schema = '" + + datasetId + + "' " + + "AND NOT REGEXP_CONTAINS(kcu.column_name, r'^(stacksync_record_id|sync_primary_key)_') " + + 'ORDER BY tc.table_name, kcu.ordinal_position', + ); + const grouped = new Map>(); + for (const row of rows) { + const columns = grouped.get(row.table_name) ?? new Set(); + columns.add(row.column_name); + grouped.set(row.table_name, columns); + } + return grouped; + } + + private toSchemaColumn(tableName: string, field: TableField, primaryKeys: Map>): KloSchemaColumn { + const nativeType = String(field.type || 'STRING').toUpperCase(); + return { + name: field.name || '', + nativeType, + normalizedType: this.dialect.mapDataType(nativeType), + dimensionType: this.dialect.mapToDimensionType(nativeType), + nullable: field.mode !== 'REQUIRED', + primaryKey: primaryKeys.get(tableName)?.has(field.name || '') ?? false, + comment: field.description || null, + }; + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.connectionId) { + throw new Error(`BigQuery connector ${this.connectionId} cannot scan connection ${connectionId}`); + } + } +} diff --git a/packages/connector-bigquery/src/dialect.test.ts b/packages/connector-bigquery/src/dialect.test.ts new file mode 100644 index 00000000..236b424e --- /dev/null +++ b/packages/connector-bigquery/src/dialect.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'vitest'; +import { KloBigQueryDialect } from './dialect.js'; + +describe('KloBigQueryDialect', () => { + const dialect = new KloBigQueryDialect(); + + it('quotes identifiers and formats project.dataset.table names', () => { + expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`'); + expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe( + '`project-1`.`analytics`.`orders`', + ); + expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`'); + expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`'); + }); + + it('maps native BigQuery types to normalized types and scan dimensions', () => { + expect(dialect.mapDataType('INT64')).toBe('BIGINT'); + expect(dialect.mapDataType('STRUCT')).toBe('JSON'); + expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY'); + expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time'); + expect(dialect.mapToDimensionType('NUMERIC')).toBe('number'); + expect(dialect.mapToDimensionType('BOOL')).toBe('boolean'); + expect(dialect.mapToDimensionType('JSON')).toBe('string'); + }); + + it('generates sampling, cardinality, and distinct-value SQL', () => { + expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe( + 'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5', + ); + expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe( + "SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10", + ); + expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain( + 'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality', + ); + expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain( + 'SELECT DISTINCT CAST(`status` AS STRING) AS val', + ); + }); + + it('rewrites colon parameters to BigQuery named parameters', () => { + expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({ + sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2', + params: { id: 1, id_2: 2 }, + }); + expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined }); + }); + + it('keeps unsupported statistics explicit', () => { + expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull(); + }); +}); diff --git a/packages/connector-bigquery/src/dialect.ts b/packages/connector-bigquery/src/dialect.ts new file mode 100644 index 00000000..7b1058cc --- /dev/null +++ b/packages/connector-bigquery/src/dialect.ts @@ -0,0 +1,207 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type BigQueryTableNameRef = Pick & Partial>; + +export class KloBigQueryDialect { + readonly type = 'bigquery'; + + private readonly typeMappings: Record = { + TIMESTAMP: 'time', + DATETIME: 'time', + DATE: 'time', + TIME: 'time', + INT64: 'number', + INTEGER: 'number', + FLOAT64: 'number', + FLOAT: 'number', + NUMERIC: 'number', + BIGNUMERIC: 'number', + STRING: 'string', + BYTES: 'string', + BOOL: 'boolean', + BOOLEAN: 'boolean', + }; + + quoteIdentifier(identifier: string): string { + return `\`${identifier.replace(/`/g, '\\`')}\``; + } + + formatTableName(table: BigQueryTableNameRef): string { + if (table.catalog && table.db) { + return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; + } + if (table.db) { + return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; + } + return this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + const fieldType = nativeType.toUpperCase().trim(); + if (fieldType === 'RECORD' || fieldType === 'STRUCT') { + return 'JSON'; + } + const typeMapping: Record = { + STRING: 'VARCHAR', + BYTES: 'VARBINARY', + INTEGER: 'BIGINT', + INT64: 'BIGINT', + FLOAT: 'DOUBLE', + FLOAT64: 'DOUBLE', + NUMERIC: 'DECIMAL', + BIGNUMERIC: 'DECIMAL', + BOOLEAN: 'BOOLEAN', + BOOL: 'BOOLEAN', + TIMESTAMP: 'TIMESTAMP', + DATE: 'DATE', + TIME: 'TIME', + DATETIME: 'DATETIME', + GEOGRAPHY: 'GEOGRAPHY', + JSON: 'JSON', + }; + return typeMapping[fieldType] || fieldType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + const normalizedType = nativeType.toUpperCase().trim(); + if (this.typeMappings[normalizedType]) { + return this.typeMappings[normalizedType]; + } + if (normalizedType.includes('TIME') || normalizedType.includes('DATE')) { + return 'time'; + } + if (normalizedType.includes('INT') || normalizedType.includes('NUM') || normalizedType.includes('FLOAT')) { + return 'number'; + } + if (normalizedType.includes('BOOL')) { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT ${columnList} FROM ${tableName} ORDER BY RAND() LIMIT ${limit}`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const quotedColumn = this.quoteIdentifier(columnName); + return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: Record } { + if (!params) { + return { sql, params: undefined }; + } + let processedSql = sql; + const processedParams: Record = {}; + for (const [key, value] of Object.entries(params)) { + processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`); + processedParams[key] = value; + } + return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `RAND() < ${samplePct}`; + } + + getTableSampleClause(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `TABLESAMPLE SYSTEM (${samplePct * 100} PERCENT)`; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + } + + getNullCountExpression(column: string): string { + return `COUNTIF(${column} IS NULL)`; + } + + getDistinctCountExpression(column: string): string { + return `APPROX_COUNT_DISTINCT(${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + LIMIT ${sampleSize} + ) + SELECT APPROX_COUNT_DISTINCT(val) AS cardinality + FROM sampled + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + return ` + SELECT DISTINCT CAST(${columnName} AS STRING) AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY val + LIMIT ${limit} + `; + } + + generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { + return null; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY RAND() + LIMIT ${sampleSize} + ) + SELECT APPROX_COUNT_DISTINCT(val) AS cardinality + FROM sampled + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + timezone?: string, + ): string { + const bigQueryGranularity = granularity.toUpperCase(); + if (timezone) { + return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`; + } + return `DATE_TRUNC(${column}, ${bigQueryGranularity})`; + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { + const col = timezone ? `DATETIME(${column}, '${timezone}')` : column; + const [rawAmount, rawUnit] = interval.split(' '); + let diffUnit = rawUnit!.toUpperCase(); + let amount = Number(rawAmount); + let addUnit = diffUnit; + if (diffUnit === 'WEEK') { + diffUnit = 'DAY'; + amount = amount * 7; + addUnit = 'DAY'; + } + const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`; + return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`; + } + + parseIntervalToSql(interval: string): string { + const [amount, unit] = interval.split(' '); + return `INTERVAL ${amount} ${unit!.toUpperCase()}`; + } +} diff --git a/packages/connector-bigquery/src/index.ts b/packages/connector-bigquery/src/index.ts new file mode 100644 index 00000000..04aca98e --- /dev/null +++ b/packages/connector-bigquery/src/index.ts @@ -0,0 +1,18 @@ +export { KloBigQueryDialect } from './dialect.js'; +export { + bigQueryConnectionConfigFromConfig, + isKloBigQueryConnectionConfig, + KloBigQueryScanConnector, + type KloBigQueryClient, + type KloBigQueryClientFactory, + type KloBigQueryColumnDistinctValuesOptions, + type KloBigQueryColumnDistinctValuesResult, + type KloBigQueryConnectionConfig, + type KloBigQueryDataset, + type KloBigQueryQueryJob, + type KloBigQueryReadOnlyQueryInput, + type KloBigQueryResolvedConnectionConfig, + type KloBigQueryScanConnectorOptions, + type KloBigQueryTableRef, +} from './connector.js'; +export { createBigQueryLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-bigquery/src/live-database-introspection.ts b/packages/connector-bigquery/src/live-database-introspection.ts new file mode 100644 index 00000000..854923a2 --- /dev/null +++ b/packages/connector-bigquery/src/live-database-introspection.ts @@ -0,0 +1,34 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { + KloBigQueryScanConnector, + type KloBigQueryClientFactory, + type KloBigQueryConnectionConfig, +} from './connector.js'; + +interface CreateBigQueryLiveDatabaseIntrospectionOptions { + connections: Record; + clientFactory?: KloBigQueryClientFactory; + now?: () => Date; +} + +export function createBigQueryLiveDatabaseIntrospection( + options: CreateBigQueryLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloBigQueryConnectionConfig | undefined; + const connector = new KloBigQueryScanConnector({ + connectionId, + connection, + clientFactory: options.clientFactory, + now: options.now, + }); + try { + return await connector.introspect({ connectionId, driver: 'bigquery' }, { runId: `bigquery-${connectionId}` }); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-bigquery/src/package-exports.test.ts b/packages/connector-bigquery/src/package-exports.test.ts new file mode 100644 index 00000000..80ba5c86 --- /dev/null +++ b/packages/connector-bigquery/src/package-exports.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, it } from 'vitest'; +import * as connector from './index.js'; + +describe('@klo/connector-bigquery exports', () => { + it('exports public connector, dialect, and introspection APIs', () => { + expect(connector.KloBigQueryDialect).toBeTypeOf('function'); + expect(connector.KloBigQueryScanConnector).toBeTypeOf('function'); + expect(connector.bigQueryConnectionConfigFromConfig).toBeTypeOf('function'); + expect(connector.createBigQueryLiveDatabaseIntrospection).toBeTypeOf('function'); + }); +}); diff --git a/packages/connector-bigquery/tsconfig.json b/packages/connector-bigquery/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-bigquery/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/connector-clickhouse/package.json b/packages/connector-clickhouse/package.json new file mode 100644 index 00000000..d1fb64e5 --- /dev/null +++ b/packages/connector-clickhouse/package.json @@ -0,0 +1,47 @@ +{ + "name": "@klo/connector-clickhouse", + "version": "0.0.0-private", + "description": "ClickHouse connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@clickhouse/client": "^1.18.2", + "@klo/context": "workspace:*" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-clickhouse" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-clickhouse/src/connector.test.ts b/packages/connector-clickhouse/src/connector.test.ts new file mode 100644 index 00000000..dcb0c3bd --- /dev/null +++ b/packages/connector-clickhouse/src/connector.test.ts @@ -0,0 +1,296 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + clickHouseClientConfigFromConfig, + createClickHouseLiveDatabaseIntrospection, + isKloClickHouseConnectionConfig, + KloClickHouseScanConnector, + type KloClickHouseClientFactory, +} from './index.js'; + +function result(payload: T) { + return { + async json(): Promise { + return payload; + }, + }; +} + +function fakeClientFactory(): KloClickHouseClientFactory { + const query = vi.fn(async (input: { query: string; format: string; query_params?: Record }) => { + if (input.query.includes('FROM system.tables')) { + return result([ + { name: 'events', engine: 'MergeTree', comment: 'Event stream' }, + { name: 'event_summary', engine: 'View', comment: '' }, + ]); + } + if (input.query.includes('FROM system.columns')) { + return result([ + { table: 'events', name: 'id', type: 'UInt64', comment: 'PK', is_in_primary_key: 1 }, + { table: 'events', name: 'event_name', type: 'LowCardinality(String)', comment: '', is_in_primary_key: 0 }, + { table: 'event_summary', name: 'event_name', type: 'String', comment: '', is_in_primary_key: 0 }, + ]); + } + if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY table')) { + return result([{ table: 'events', row_count: '2' }]); + } + if (input.query.includes('SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 1')) { + return result({ + meta: [ + { name: 'id', type: 'UInt64' }, + { name: 'event_name', type: 'String' }, + ], + data: [[10, 'signup']], + rows: 1, + }); + } + if (input.query.includes('SELECT `event_name` FROM `analytics`.`events`')) { + return result({ + meta: [{ name: 'event_name', type: 'String' }], + data: [['signup'], ['purchase']], + rows: 2, + }); + } + if (input.query.includes('COUNT(DISTINCT val)')) { + return result({ + meta: [{ name: 'cardinality', type: 'UInt64' }], + data: [[2]], + rows: 1, + }); + } + if (input.query.includes('SELECT DISTINCT toString(`event_name`) AS val')) { + return result({ + meta: [{ name: 'val', type: 'String' }], + data: [['purchase'], ['signup']], + rows: 2, + }); + } + if (input.query.includes('sum(rows) AS count')) { + return result({ + meta: [{ name: 'count', type: 'UInt64' }], + data: [[2]], + rows: 1, + }); + } + if (input.query.includes('FROM system.databases')) { + return result([{ name: 'analytics' }, { name: 'warehouse' }]); + } + if (input.query.trim() === 'SELECT 1') { + return result({ meta: [{ name: '1', type: 'UInt8' }], data: [[1]], rows: 1 }); + } + if (input.query.includes('select * from (select id, event_name from analytics.events) as klo_query_result limit 1')) { + return result({ + meta: [ + { name: 'id', type: 'UInt64' }, + { name: 'event_name', type: 'String' }, + ], + data: [[10, 'signup']], + rows: 1, + }); + } + throw new Error(`Unexpected SQL: ${input.query}`); + }); + const close = vi.fn(async () => undefined); + return { + createClient: vi.fn(() => ({ query, close })), + }; +} + +describe('KloClickHouseScanConnector', () => { + it('resolves ClickHouse connection configuration safely', () => { + expect(isKloClickHouseConnectionConfig({ driver: 'clickhouse', host: 'localhost', database: 'analytics' })).toBe( + true, + ); + expect(isKloClickHouseConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false); + expect( + clickHouseClientConfigFromConfig({ + connectionId: 'warehouse', + connection: { + driver: 'clickhouse', + host: 'ch.example.test', + port: 9440, + database: 'analytics', + username: 'reader', + password: 'test-pass', // pragma: allowlist secret + ssl: true, + readonly: true, + }, + }), + ).toMatchObject({ + host: 'ch.example.test', + port: 9440, + database: 'analytics', + username: 'reader', + password: 'test-pass', // pragma: allowlist secret + ssl: true, + }); + expect(() => + clickHouseClientConfigFromConfig({ + connectionId: 'warehouse', + connection: { driver: 'clickhouse', host: 'ch.example.test', database: 'analytics', readonly: false }, + }), + ).toThrow('Native ClickHouse connector requires connections.warehouse.readonly: true'); + }); + + it('introspects schema, primary keys, comments, row counts, and views', async () => { + const connector = new KloClickHouseScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'clickhouse', + host: 'ch.example.test', + database: 'analytics', + username: 'reader', + password: 'test-pass', // pragma: allowlist secret + readonly: true, + }, + clientFactory: fakeClientFactory(), + now: () => new Date('2026-04-29T14:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'clickhouse' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'clickhouse', + extractedAt: '2026-04-29T14:00:00.000Z', + scope: { schemas: ['analytics'] }, + metadata: { + database: 'analytics', + host: 'ch.example.test', + table_count: 2, + total_columns: 3, + }, + }); + expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([ + ['events', 'table', 2, 'Event stream'], + ['event_summary', 'view', null, null], + ]); + expect(snapshot.tables.find((table) => table.name === 'events')?.columns[0]).toMatchObject({ + name: 'id', + nativeType: 'UInt64', + normalizedType: 'UInt64', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'PK', + }); + expect(snapshot.tables.find((table) => table.name === 'events')?.foreignKeys).toEqual([]); + }); + + it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => { + const clientFactory = fakeClientFactory(); + const connector = new KloClickHouseScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'clickhouse', + host: 'ch.example.test', + database: 'analytics', + username: 'reader', + password: 'test-pass', // pragma: allowlist secret + readonly: true, + }, + clientFactory, + }); + + await expect( + connector.sampleTable( + { + connectionId: 'warehouse', + table: { catalog: null, db: 'analytics', name: 'events' }, + columns: ['id', 'event_name'], + limit: 1, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1 }); + + await expect( + connector.sampleColumn( + { connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name', limit: 5 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ values: ['signup', 'purchase'], nullCount: null, distinctCount: null }); + + await expect( + connector.getColumnDistinctValues( + { catalog: null, db: 'analytics', name: 'events' }, + 'event_name', + { maxCardinality: 5, limit: 10, sampleSize: 100 }, + ), + ).resolves.toEqual({ values: ['purchase', 'signup'], cardinality: 2 }); + + await expect( + connector.executeReadOnly( + { connectionId: 'warehouse', sql: 'select id, event_name from analytics.events', maxRows: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1, rowCount: 1 }); + + await expect( + connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from events' }, { runId: 'scan-run-1' }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + + await expect(connector.getTableRowCount('events')).resolves.toBe(2); + await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']); + await expect( + connector.columnStats( + { connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name' }, + { runId: 'scan-run-1' }, + ), + ).resolves.toBeNull(); + + await connector.cleanup(); + }); + + it('adapts native ClickHouse snapshots to live-database introspection for local ingest', async () => { + const introspection = createClickHouseLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'clickhouse', + host: 'ch.example.test', + database: 'analytics', + username: 'reader', + password: 'test-pass', // pragma: allowlist secret + readonly: true, + }, + }, + clientFactory: fakeClientFactory(), + now: () => new Date('2026-04-29T14:00:00.000Z'), + }); + + const snapshot = await introspection.extractSchema('warehouse'); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + extractedAt: '2026-04-29T14:00:00.000Z', + }); + expect(snapshot.tables.find((table) => table.name === 'events')).toMatchObject({ + name: 'events', + catalog: null, + db: 'analytics', + columns: [ + { + name: 'id', + nativeType: 'UInt64', + normalizedType: 'UInt64', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'PK', + }, + { + name: 'event_name', + nativeType: 'LowCardinality(String)', + normalizedType: 'LowCardinality(String)', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [], + }); + }); +}); diff --git a/packages/connector-clickhouse/src/connector.ts b/packages/connector-clickhouse/src/connector.ts new file mode 100644 index 00000000..b410e8ba --- /dev/null +++ b/packages/connector-clickhouse/src/connector.ts @@ -0,0 +1,525 @@ +import { createClient } from '@clickhouse/client'; +import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaColumn, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import { readFileSync } from 'node:fs'; +import { Agent as HttpsAgent } from 'node:https'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { KloClickHouseDialect } from './dialect.js'; + +export interface KloClickHouseConnectionConfig { + driver?: string; + host?: string; + port?: number; + database?: string; + username?: string; + user?: string; + password?: string; + url?: string; + ssl?: boolean; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloClickHouseResolvedClientConfig { + host: string; + port: number; + database: string; + username: string; + password?: string; + ssl: boolean; +} + +interface ClickHouseQueryInput { + query: string; + format: 'JSONCompact' | 'JSONEachRow'; + query_params?: Record; +} + +interface ClickHouseResultSet { + json(): Promise; +} + +export interface KloClickHouseClient { + query(input: ClickHouseQueryInput): Promise; + close(): Promise; +} + +export interface KloClickHouseClientFactory { + createClient(config: Parameters[0]): KloClickHouseClient; +} + +interface KloClickHouseResolvedEndpoint { + host: string; + port: number; + close?: () => Promise; +} + +export interface KloClickHouseEndpointResolver { + resolve(input: { + host: string; + port: number; + connection: KloClickHouseConnectionConfig; + }): Promise; +} + +export interface KloClickHouseScanConnectorOptions { + connectionId: string; + connection: KloClickHouseConnectionConfig | undefined; + clientFactory?: KloClickHouseClientFactory; + endpointResolver?: KloClickHouseEndpointResolver; + env?: NodeJS.ProcessEnv; + now?: () => Date; +} + +export interface KloClickHouseReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record; +} + +export interface KloClickHouseColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloClickHouseColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +interface ClickHouseTableRow { + name: string; + engine: string; + comment: string; +} + +interface ClickHouseColumnRow { + table: string; + name: string; + type: string; + comment: string; + is_in_primary_key: number; +} + +interface ClickHouseRowCountRow { + table?: string; + row_count?: string | number; + count?: string | number; +} + +interface ClickHouseDatabaseRow { + name: string; +} + +interface ClickHouseCompactResponse { + meta?: Array<{ name: string; type: string }>; + data?: unknown[][]; + rows?: number; +} + +class DefaultClickHouseClientFactory implements KloClickHouseClientFactory { + createClient(config: Parameters[0]): KloClickHouseClient { + return createClient(config); + } +} + +function stringConfigValue( + connection: KloClickHouseConnectionConfig | undefined, + key: keyof KloClickHouseConnectionConfig, + env: NodeJS.ProcessEnv, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; +} + +function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + const envName = value.slice('env:'.length); + return env[envName] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function maybeNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function parseClickHouseUrl(url: string): Partial { + const parsed = new URL(url); + return { + host: parsed.hostname, + port: parsed.port ? Number(parsed.port) : undefined, + database: parsed.pathname.replace(/^\/+/, '') || undefined, + username: parsed.username ? decodeURIComponent(parsed.username) : undefined, + password: parsed.password ? decodeURIComponent(parsed.password) : undefined, + ssl: parsed.protocol === 'https:' || parsed.searchParams.get('ssl') === 'true', + }; +} + +function tableKind(engine: string): KloSchemaTable['kind'] { + return engine === 'View' || engine === 'MaterializedView' ? 'view' : 'table'; +} + +function isNullableClickHouseType(type: string): boolean { + return type.startsWith('Nullable(') || type.startsWith('LowCardinality(Nullable('); +} + +export function isKloClickHouseConnectionConfig(connection: KloClickHouseConnectionConfig | undefined): boolean { + return String(connection?.driver ?? '').toLowerCase() === 'clickhouse'; +} + +export function clickHouseClientConfigFromConfig(input: { + connectionId: string; + connection: KloClickHouseConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; +}): KloClickHouseResolvedClientConfig { + if (!isKloClickHouseConnectionConfig(input.connection)) { + throw new Error(`Native ClickHouse connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native ClickHouse connector requires connections.${input.connectionId}.readonly: true`); + } + + const env = input.env ?? process.env; + const referencedUrl = stringConfigValue(input.connection, 'url', env); + const urlConfig = referencedUrl ? parseClickHouseUrl(referencedUrl) : {}; + const merged: KloClickHouseConnectionConfig = { ...urlConfig, ...input.connection }; + const host = stringConfigValue(merged, 'host', env); + const database = stringConfigValue(merged, 'database', env) ?? 'default'; + const username = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env) ?? 'default'; + + if (!host) { + throw new Error(`Native ClickHouse connector requires connections.${input.connectionId}.host or url`); + } + + return { + host, + port: maybeNumber(merged.port) ?? 8123, + database, + username, + password: stringConfigValue(merged, 'password', env), + ssl: merged.ssl === true, + }; +} + +export class KloClickHouseScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'clickhouse' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: true, + formalForeignKeys: false, + estimatedRowCounts: true, + }); + + private readonly connectionId: string; + private readonly connection: KloClickHouseConnectionConfig; + private readonly clientConfig: KloClickHouseResolvedClientConfig; + private readonly clientFactory: KloClickHouseClientFactory; + private readonly endpointResolver?: KloClickHouseEndpointResolver; + private readonly now: () => Date; + private readonly dialect = new KloClickHouseDialect(); + private client: KloClickHouseClient | null = null; + private resolvedEndpoint: KloClickHouseResolvedEndpoint | null = null; + + constructor(options: KloClickHouseScanConnectorOptions) { + this.connectionId = options.connectionId; + this.connection = options.connection ?? {}; + this.clientConfig = clickHouseClientConfigFromConfig({ + connectionId: options.connectionId, + connection: options.connection, + env: options.env, + }); + this.clientFactory = options.clientFactory ?? new DefaultClickHouseClientFactory(); + this.endpointResolver = options.endpointResolver; + this.now = options.now ?? (() => new Date()); + this.id = `clickhouse:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + try { + await this.query('SELECT 1'); + return { success: true }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const database = this.clientConfig.database; + const tables = await this.queryEachRow( + ` + SELECT name, engine, comment + FROM system.tables + WHERE database = {database:String} + AND engine NOT IN ('Dictionary') + ORDER BY name + `, + { database }, + ); + const columns = await this.queryEachRow( + ` + SELECT table, name, type, comment, is_in_primary_key + FROM system.columns + WHERE database = {database:String} + ORDER BY table, position + `, + { database }, + ); + const rowCounts = await this.queryEachRow( + ` + SELECT table, sum(rows) AS row_count + FROM system.parts + WHERE database = {database:String} + AND active = 1 + GROUP BY table + `, + { database }, + ); + const columnsByTable = new Map(); + for (const column of columns) { + columnsByTable.set(column.table, [...(columnsByTable.get(column.table) ?? []), column]); + } + const rowCountByTable = new Map(rowCounts.map((row) => [String(row.table), Number(row.row_count ?? 0)])); + const schemaTables = tables.map((table) => + this.toSchemaTable(table, columnsByTable.get(table.name) ?? [], rowCountByTable.get(table.name) ?? 0), + ); + + return { + connectionId: this.connectionId, + driver: 'clickhouse', + extractedAt: this.now().toISOString(), + scope: { schemas: [database] }, + metadata: { + database, + host: this.clientConfig.host, + table_count: schemaTables.length, + total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables: schemaTables, + }; + } + + async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query( + this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns), + ); + return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); + return { values, nullCount: null, distinctCount: null }; + } + + async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + return null; + } + + async executeReadOnly(input: KloClickHouseReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); + const prepared = this.dialect.prepareQuery(limitedSql, input.params); + const result = await this.query(prepared.sql, prepared.params); + return { ...result, rowCount: result.rows.length }; + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloClickHouseColumnDistinctValuesOptions, + ): Promise { + const sampleSize = options.sampleSize ?? 10000; + const tableName = this.qTableName(table); + const quotedColumn = this.dialect.quoteIdentifier(columnName); + const cardinalityResult = await this.query( + this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize), + ); + const cardinality = Number(cardinalityResult.rows[0]?.[0]); + if (Number.isNaN(cardinality)) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit)); + return { + values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])), + cardinality, + }; + } + + async getTableRowCount(tableName: string): Promise { + const result = await this.query( + ` + SELECT sum(rows) AS count + FROM system.parts + WHERE database = {database:String} + AND table = {table:String} + AND active = 1 + `, + { database: this.clientConfig.database, table: tableName }, + ); + return Number(result.rows[0]?.[0] ?? 0); + } + + qTableName(table: Pick & Partial>): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + async listSchemas(): Promise { + const rows = await this.queryEachRow( + ` + SELECT name + FROM system.databases + WHERE name NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema') + ORDER BY name + `, + ); + return rows.map((row) => row.name); + } + + async cleanup(): Promise { + if (this.client) { + await this.client.close(); + this.client = null; + } + if (this.resolvedEndpoint?.close) { + await this.resolvedEndpoint.close(); + this.resolvedEndpoint = null; + } + } + + private toSchemaTable(table: ClickHouseTableRow, columns: ClickHouseColumnRow[], estimatedRows: number): KloSchemaTable { + const kind = tableKind(table.engine); + return { + catalog: null, + db: this.clientConfig.database, + name: table.name, + kind, + comment: table.comment || null, + estimatedRows: kind === 'view' ? null : estimatedRows, + columns: columns.map((column) => this.toSchemaColumn(column)), + foreignKeys: [], + }; + } + + private toSchemaColumn(column: ClickHouseColumnRow): KloSchemaColumn { + return { + name: column.name, + nativeType: column.type, + normalizedType: this.dialect.mapDataType(column.type), + dimensionType: this.dialect.mapToDimensionType(column.type), + nullable: isNullableClickHouseType(column.type), + primaryKey: column.is_in_primary_key === 1, + comment: column.comment || null, + }; + } + + private async clientForQuery(): Promise { + if (!this.client) { + const config = { ...this.clientConfig }; + if (this.endpointResolver) { + this.resolvedEndpoint = await this.endpointResolver.resolve({ + host: config.host, + port: config.port, + connection: this.connection, + }); + config.host = this.resolvedEndpoint.host; + config.port = this.resolvedEndpoint.port; + } + const protocol = config.ssl ? 'https' : 'http'; + const isProxied = config.host !== this.clientConfig.host; + this.client = this.clientFactory.createClient({ + url: `${protocol}://${config.host}:${config.port}`, + username: config.username, + password: config.password ?? '', + database: config.database, + request_timeout: 30_000, + clickhouse_settings: { + output_format_json_quote_64bit_integers: 1, + }, + ...(isProxied && config.ssl + ? { + http_agent: new HttpsAgent({ + servername: this.clientConfig.host, + keepAlive: true, + }), + } + : {}), + }); + } + return this.client; + } + + private async queryEachRow(sql: string, params?: Record): Promise { + const client = await this.clientForQuery(); + const resultSet = await client.query({ + query: assertReadOnlySql(sql), + format: 'JSONEachRow', + ...(params ? { query_params: params } : {}), + }); + return (await resultSet.json()) as T[]; + } + + private async query(sql: string, params?: Record): Promise> { + const client = await this.clientForQuery(); + const resultSet = await client.query({ + query: assertReadOnlySql(sql), + format: 'JSONCompact', + ...(params ? { query_params: params } : {}), + }); + const response = (await resultSet.json()) as ClickHouseCompactResponse; + const meta = response.meta ?? []; + return { + headers: meta.map((field) => field.name), + headerTypes: meta.map((field) => field.type), + rows: response.data ?? [], + totalRows: response.rows ?? response.data?.length ?? 0, + }; + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.connectionId) { + throw new Error(`KLO ClickHouse connector ${this.id} cannot serve connection ${connectionId}`); + } + } +} diff --git a/packages/connector-clickhouse/src/dialect.test.ts b/packages/connector-clickhouse/src/dialect.test.ts new file mode 100644 index 00000000..b6dd3485 --- /dev/null +++ b/packages/connector-clickhouse/src/dialect.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from 'vitest'; +import { KloClickHouseDialect } from './dialect.js'; + +describe('KloClickHouseDialect', () => { + const dialect = new KloClickHouseDialect(); + + it('quotes identifiers and formats database-qualified table names', () => { + expect(dialect.quoteIdentifier('events')).toBe('`events`'); + expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`'); + expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'events' })).toBe( + '`analytics`.`events`', + ); + expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('`events`'); + }); + + it('maps nullable and low-cardinality ClickHouse types to KLO dimension types', () => { + expect(dialect.mapToDimensionType('Nullable(DateTime64(3))')).toBe('time'); + expect(dialect.mapToDimensionType('LowCardinality(Nullable(String))')).toBe('string'); + expect(dialect.mapToDimensionType('UInt64')).toBe('number'); + expect(dialect.mapToDimensionType('Decimal(18, 4)')).toBe('number'); + expect(dialect.mapToDimensionType('Bool')).toBe('boolean'); + expect(dialect.mapToDimensionType('IPv4')).toBe('string'); + expect(dialect.mapToDimensionType('')).toBe('string'); + }); + + it('builds sampling, distinct-value, pagination, and time SQL', () => { + expect(dialect.generateSampleQuery('`analytics`.`events`', 25, ['id', 'event_name'])).toBe( + 'SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 25', + ); + expect(dialect.generateColumnSampleQuery('`analytics`.`events`', 'event_name', 10)).toBe( + "SELECT `event_name` FROM `analytics`.`events` WHERE `event_name` IS NOT NULL AND trim(toString(`event_name`)) != '' LIMIT 10", + ); + expect(dialect.generateDistinctValuesQuery('`analytics`.`events`', '`event_name`', 5)).toContain( + 'SELECT DISTINCT toString(`event_name`) AS val', + ); + expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20'); + expect(dialect.getTimeTruncExpression('created_at', 'week')).toBe('toStartOfWeek(created_at, 1)'); + }); + + it('prepares named parameters using ClickHouse typed placeholders', () => { + expect(dialect.prepareQuery('select * from events where id = :id and event_name = :name', { + id: 10, + name: 'signup', + })).toEqual({ + sql: 'select * from events where id = {id:Int64} and event_name = {name:String}', + params: { id: 10, name: 'signup' }, + }); + }); +}); diff --git a/packages/connector-clickhouse/src/dialect.ts b/packages/connector-clickhouse/src/dialect.ts new file mode 100644 index 00000000..6f61c53e --- /dev/null +++ b/packages/connector-clickhouse/src/dialect.ts @@ -0,0 +1,279 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type ClickHouseTableNameRef = Pick & Partial>; + +export class KloClickHouseDialect { + readonly type = 'clickhouse'; + + private readonly typeMappings: Record = { + date: 'time', + date32: 'time', + datetime: 'time', + datetime64: 'time', + uint8: 'number', + uint16: 'number', + uint32: 'number', + uint64: 'number', + uint128: 'number', + uint256: 'number', + int8: 'number', + int16: 'number', + int32: 'number', + int64: 'number', + int128: 'number', + int256: 'number', + float32: 'number', + float64: 'number', + decimal: 'number', + decimal32: 'number', + decimal64: 'number', + decimal128: 'number', + decimal256: 'number', + string: 'string', + fixedstring: 'string', + uuid: 'string', + ipv4: 'string', + ipv6: 'string', + enum8: 'string', + enum16: 'string', + bool: 'boolean', + boolean: 'boolean', + }; + + quoteIdentifier(identifier: string): string { + return `\`${identifier.replace(/`/g, '``')}\``; + } + + formatTableName(table: ClickHouseTableNameRef): string { + return table.db + ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` + : this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + return nativeType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + + let normalizedType = nativeType.toLowerCase().trim(); + normalizedType = this.unwrapClickHouseType(normalizedType, 'nullable'); + normalizedType = this.unwrapClickHouseType(normalizedType, 'lowcardinality'); + normalizedType = this.unwrapClickHouseType(normalizedType, 'nullable'); + if (normalizedType.includes('(')) { + normalizedType = normalizedType.split('(')[0] ?? normalizedType; + } + + if (this.typeMappings[normalizedType]) { + return this.typeMappings[normalizedType]; + } + if (normalizedType.includes('date') || normalizedType.includes('time')) { + return 'time'; + } + if ( + normalizedType.includes('int') || + normalizedType.includes('float') || + normalizedType.includes('decimal') + ) { + return 'number'; + } + if (normalizedType === 'bool' || normalizedType === 'boolean') { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const quotedColumn = this.quoteIdentifier(columnName); + return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND trim(toString(${quotedColumn})) != '' LIMIT ${limit}`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: Record } { + if (!params) { + return { sql, params: undefined }; + } + + let parameterizedQuery = sql; + const queryParams: Record = {}; + const sortedKeys = Object.keys(params).sort((a, b) => b.length - a.length); + + for (const key of sortedKeys) { + const placeholder = `:${key}`; + if (parameterizedQuery.includes(placeholder)) { + parameterizedQuery = parameterizedQuery.replace( + new RegExp(`:${key}\\b`, 'g'), + `{${key}:${this.inferClickHouseType(params[key])}}`, + ); + queryParams[key] = params[key]; + } + } + + return { sql: parameterizedQuery, params: queryParams }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `rand() / 4294967295.0 < ${samplePct}`; + } + + getTableSampleClause(_samplePct: number): string { + return ''; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + } + + getNullCountExpression(column: string): string { + return `countIf(${column} IS NULL)`; + } + + getDistinctCountExpression(column: string): string { + return `COUNT(DISTINCT ${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + SELECT COUNT(DISTINCT val) AS cardinality + FROM ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + LIMIT ${sampleSize} + ) + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + return ` + SELECT DISTINCT toString(${columnName}) AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY val + LIMIT ${limit} + `; + } + + generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { + return null; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + SELECT COUNT(DISTINCT val) AS cardinality + FROM ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY rand() + LIMIT ${sampleSize} + ) + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + timezone?: string, + ): string { + const tz = timezone ? `, '${timezone}'` : ''; + switch (granularity) { + case 'day': + return `toStartOfDay(${column}${tz})`; + case 'week': + return `toStartOfWeek(${column}, 1${tz})`; + case 'month': + return `toStartOfMonth(${column}${tz})`; + case 'quarter': + return `toStartOfQuarter(${column}${tz})`; + case 'year': + return `toStartOfYear(${column}${tz})`; + } + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { + const col = timezone ? `toTimezone(${column}, '${timezone}')` : column; + const [rawAmount, rawUnit] = interval.split(' '); + const amount = Number(rawAmount); + const unit = rawUnit!.toLowerCase(); + const originExpr = origin ? `toDateTime('${origin}')` : "toDateTime('1970-01-01')"; + const calendarUnit = this.toClickHouseDateDiffUnit(unit); + if (calendarUnit) { + return `dateAdd(${calendarUnit}, intDiv(dateDiff(${calendarUnit}, ${originExpr}, ${col}), ${amount}) * ${amount}, ${originExpr})`; + } + const seconds = this.intervalToSeconds(amount, unit); + return `addSeconds(${originExpr}, intDiv(toUInt64(dateDiff('second', ${originExpr}, ${col})), ${seconds}) * ${seconds})`; + } + + parseIntervalToSql(interval: string): string { + const [amount, unit] = interval.split(' '); + return `INTERVAL ${amount} ${unit!.toUpperCase()}`; + } + + private unwrapClickHouseType(value: string, wrapper: string): string { + const prefix = `${wrapper}(`; + return value.startsWith(prefix) && value.endsWith(')') ? value.slice(prefix.length, -1) : value; + } + + private inferClickHouseType(value: unknown): string { + if (value === null || value === undefined) { + return 'String'; + } + if (typeof value === 'boolean') { + return 'Bool'; + } + if (typeof value === 'number') { + return Number.isInteger(value) ? 'Int64' : 'Float64'; + } + if (value instanceof Date) { + return 'DateTime'; + } + return 'String'; + } + + private toClickHouseDateDiffUnit(unit: string): string | null { + if (unit === 'month' || unit === 'months') { + return "'month'"; + } + if (unit === 'quarter' || unit === 'quarters') { + return "'quarter'"; + } + if (unit === 'year' || unit === 'years') { + return "'year'"; + } + return null; + } + + private intervalToSeconds(amount: number, unit: string): number { + switch (unit) { + case 'second': + case 'seconds': + return amount; + case 'minute': + case 'minutes': + return amount * 60; + case 'hour': + case 'hours': + return amount * 3600; + case 'day': + case 'days': + return amount * 86400; + case 'week': + case 'weeks': + return amount * 604800; + default: + return amount * 86400; + } + } +} diff --git a/packages/connector-clickhouse/src/index.ts b/packages/connector-clickhouse/src/index.ts new file mode 100644 index 00000000..ae20b643 --- /dev/null +++ b/packages/connector-clickhouse/src/index.ts @@ -0,0 +1,16 @@ +export { KloClickHouseDialect } from './dialect.js'; +export { + clickHouseClientConfigFromConfig, + isKloClickHouseConnectionConfig, + KloClickHouseScanConnector, + type KloClickHouseClient, + type KloClickHouseClientFactory, + type KloClickHouseColumnDistinctValuesOptions, + type KloClickHouseColumnDistinctValuesResult, + type KloClickHouseConnectionConfig, + type KloClickHouseEndpointResolver, + type KloClickHouseReadOnlyQueryInput, + type KloClickHouseResolvedClientConfig, + type KloClickHouseScanConnectorOptions, +} from './connector.js'; +export { createClickHouseLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-clickhouse/src/live-database-introspection.ts b/packages/connector-clickhouse/src/live-database-introspection.ts new file mode 100644 index 00000000..67c4db78 --- /dev/null +++ b/packages/connector-clickhouse/src/live-database-introspection.ts @@ -0,0 +1,40 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { + KloClickHouseScanConnector, + type KloClickHouseClientFactory, + type KloClickHouseConnectionConfig, + type KloClickHouseEndpointResolver, +} from './connector.js'; + +interface CreateClickHouseLiveDatabaseIntrospectionOptions { + connections: Record; + clientFactory?: KloClickHouseClientFactory; + endpointResolver?: KloClickHouseEndpointResolver; + now?: () => Date; +} + +export function createClickHouseLiveDatabaseIntrospection( + options: CreateClickHouseLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloClickHouseConnectionConfig | undefined; + const connector = new KloClickHouseScanConnector({ + connectionId, + connection, + clientFactory: options.clientFactory, + endpointResolver: options.endpointResolver, + now: options.now, + }); + try { + return await connector.introspect( + { connectionId, driver: 'clickhouse' }, + { runId: `clickhouse-${connectionId}` }, + ); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-clickhouse/src/package-exports.test.ts b/packages/connector-clickhouse/src/package-exports.test.ts new file mode 100644 index 00000000..6deff566 --- /dev/null +++ b/packages/connector-clickhouse/src/package-exports.test.ts @@ -0,0 +1,12 @@ +import { describe, expect, it } from 'vitest'; + +describe('@klo/connector-clickhouse package exports', () => { + it('exports public connector APIs during package bootstrap', async () => { + const connector = await import('./index.js'); + + expect(connector.KloClickHouseDialect).toBeTypeOf('function'); + expect(connector.KloClickHouseScanConnector).toBeTypeOf('function'); + expect(connector.clickHouseClientConfigFromConfig).toBeTypeOf('function'); + expect(connector.createClickHouseLiveDatabaseIntrospection).toBeTypeOf('function'); + }); +}); diff --git a/packages/connector-clickhouse/tsconfig.json b/packages/connector-clickhouse/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-clickhouse/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/connector-mysql/package.json b/packages/connector-mysql/package.json new file mode 100644 index 00000000..9ce7ab96 --- /dev/null +++ b/packages/connector-mysql/package.json @@ -0,0 +1,47 @@ +{ + "name": "@klo/connector-mysql", + "version": "0.0.0-private", + "description": "MySQL connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@klo/context": "workspace:*", + "mysql2": "^3.18.1" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-mysql" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-mysql/src/connector.test.ts b/packages/connector-mysql/src/connector.test.ts new file mode 100644 index 00000000..c9672411 --- /dev/null +++ b/packages/connector-mysql/src/connector.test.ts @@ -0,0 +1,292 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { FieldPacket, RowDataPacket } from 'mysql2/promise'; +import { + createMysqlLiveDatabaseIntrospection, + isKloMysqlConnectionConfig, + KloMysqlScanConnector, + mysqlConnectionPoolConfigFromConfig, + type KloMysqlPoolFactory, +} from './index.js'; + +function mysqlResult(rows: Record[], fields: Array<{ name: string; type?: number }>): [RowDataPacket[], FieldPacket[]] { + return [rows as RowDataPacket[], fields as FieldPacket[]]; +} + +function fakePoolFactory(): KloMysqlPoolFactory { + const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => { + if (sql.includes('INFORMATION_SCHEMA.TABLES')) { + return mysqlResult( + [ + { TABLE_NAME: 'customers', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'Customer table', TABLE_ROWS: 2 }, + { TABLE_NAME: 'orders', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'InnoDB free: 1 kB; Order table', TABLE_ROWS: 2 }, + { TABLE_NAME: 'order_summary', TABLE_TYPE: 'VIEW', TABLE_COMMENT: '', TABLE_ROWS: null }, + ], + [{ name: 'TABLE_NAME' }, { name: 'TABLE_TYPE' }, { name: 'TABLE_COMMENT' }, { name: 'TABLE_ROWS' }], + ); + } + if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) { + return mysqlResult( + [ + { TABLE_NAME: 'customers', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: 'PK' }, + { TABLE_NAME: 'customers', COLUMN_NAME: 'name', DATA_TYPE: 'varchar', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' }, + { TABLE_NAME: 'orders', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' }, + { TABLE_NAME: 'orders', COLUMN_NAME: 'customer_id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' }, + { TABLE_NAME: 'orders', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' }, + { TABLE_NAME: 'order_summary', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' }, + ], + [{ name: 'TABLE_NAME' }, { name: 'COLUMN_NAME' }, { name: 'DATA_TYPE' }, { name: 'IS_NULLABLE' }], + ); + } + if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) { + return mysqlResult([{ TABLE_NAME: 'customers', COLUMN_NAME: 'id' }, { TABLE_NAME: 'orders', COLUMN_NAME: 'id' }], []); + } + if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) { + return mysqlResult( + [ + { + TABLE_NAME: 'orders', + COLUMN_NAME: 'customer_id', + REFERENCED_TABLE_NAME: 'customers', + REFERENCED_COLUMN_NAME: 'id', + CONSTRAINT_NAME: 'orders_customer_id_fk', + }, + ], + [], + ); + } + if (sql.includes('SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 1')) { + return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]); + } + if (sql.includes('select * from (select id, status from analytics.orders) as klo_query_result limit 1')) { + return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]); + } + if (sql.includes('SELECT `status` FROM `analytics`.`orders`')) { + return mysqlResult([{ status: 'paid' }, { status: 'open' }], [{ name: 'status', type: 253 }]); + } + if (sql.includes('COUNT(DISTINCT val)')) { + return mysqlResult([{ cardinality: 2 }], [{ name: 'cardinality', type: 8 }]); + } + if (sql.includes('SELECT DISTINCT CAST(`status` AS CHAR) AS val')) { + return mysqlResult([{ val: 'open' }, { val: 'paid' }], [{ name: 'val', type: 253 }]); + } + if (sql.includes('COUNT(*) AS count')) { + return mysqlResult([{ count: 2 }], [{ name: 'count', type: 8 }]); + } + if (sql.includes('INFORMATION_SCHEMA.SCHEMATA')) { + return mysqlResult([{ SCHEMA_NAME: 'analytics' }, { SCHEMA_NAME: 'warehouse' }], [{ name: 'SCHEMA_NAME' }]); + } + if (sql.trim() === 'SELECT 1') { + return mysqlResult([{ '1': 1 }], [{ name: '1', type: 8 }]); + } + throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`); + }); + const release = vi.fn(); + const end = vi.fn(async () => undefined); + return { + createPool: vi.fn(() => ({ + getConnection: vi.fn(async () => ({ query, release })), + end, + })), + }; +} + +describe('KloMysqlScanConnector', () => { + it('resolves MySQL connection configuration safely', () => { + expect(isKloMysqlConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics', readonly: true })).toBe(true); + expect(isKloMysqlConnectionConfig({ driver: 'postgres', host: 'localhost', database: 'analytics' })).toBe(false); + expect( + mysqlConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { + driver: 'mysql', + host: 'db.example.test', + port: 3307, + database: 'analytics', + username: 'reader', + password: 'secret', // pragma: allowlist secret + ssl: true, + readonly: true, + }, + }), + ).toMatchObject({ + host: 'db.example.test', + port: 3307, + database: 'analytics', + user: 'reader', + password: 'secret', // pragma: allowlist secret + ssl: { rejectUnauthorized: false }, + }); + expect(() => + mysqlConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { driver: 'mysql', host: 'db.example.test', database: 'analytics', readonly: false }, + }), + ).toThrow('Native MySQL connector requires connections.warehouse.readonly: true'); + }); + + it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => { + const connector = new KloMysqlScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'mysql', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'secret', // pragma: allowlist secret + readonly: true, + }, + poolFactory: fakePoolFactory(), + now: () => new Date('2026-04-29T12:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'mysql' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'mysql', + extractedAt: '2026-04-29T12:00:00.000Z', + scope: { schemas: ['analytics'] }, + metadata: { + database: 'analytics', + host: 'db.example.test', + table_count: 3, + total_columns: 6, + }, + }); + expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([ + ['customers', 'table', 2, 'Customer table'], + ['orders', 'table', 2, 'Order table'], + ['order_summary', 'view', null, null], + ]); + expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({ + name: 'id', + nativeType: 'int', + normalizedType: 'int', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'PK', + }); + expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: 'analytics', + toTable: 'customers', + toColumn: 'id', + constraintName: 'orders_customer_id_fk', + }, + ]); + }); + + it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => { + const poolFactory = fakePoolFactory(); + const connector = new KloMysqlScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'mysql', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'secret', // pragma: allowlist secret + readonly: true, + }, + poolFactory, + }); + + await expect( + connector.sampleTable( + { connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, columns: ['id', 'status'], limit: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1 }); + + await expect( + connector.sampleColumn( + { connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status', limit: 5 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null }); + + await expect( + connector.getColumnDistinctValues( + { catalog: null, db: 'analytics', name: 'orders' }, + 'status', + { maxCardinality: 5, limit: 10, sampleSize: 100 }, + ), + ).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 }); + + await expect( + connector.executeReadOnly( + { connectionId: 'warehouse', sql: 'select id, status from analytics.orders', maxRows: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 }); + + await expect( + connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + + await expect(connector.getTableRowCount('orders')).resolves.toBe(2); + await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']); + await expect(connector.columnStats( + { connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status' }, + { runId: 'scan-run-1' }, + )).resolves.toBeNull(); + + await connector.cleanup(); + }); + + it('adapts native MySQL snapshots to live-database introspection for local ingest', async () => { + const introspection = createMysqlLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'mysql', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'secret', // pragma: allowlist secret + readonly: true, + }, + }, + poolFactory: fakePoolFactory(), + now: () => new Date('2026-04-29T12:00:00.000Z'), + }); + + const snapshot = await introspection.extractSchema('warehouse'); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + extractedAt: '2026-04-29T12:00:00.000Z', + }); + expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({ + name: 'customers', + catalog: null, + db: 'analytics', + columns: [ + { + name: 'id', + nativeType: 'int', + normalizedType: 'int', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'PK', + }, + { + name: 'name', + nativeType: 'varchar', + normalizedType: 'varchar', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [], + }); + }); +}); diff --git a/packages/connector-mysql/src/connector.ts b/packages/connector-mysql/src/connector.ts new file mode 100644 index 00000000..1adb701a --- /dev/null +++ b/packages/connector-mysql/src/connector.ts @@ -0,0 +1,578 @@ +import mysql, { type FieldPacket, type Pool, type RowDataPacket } from 'mysql2/promise'; +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaColumn, + type KloSchemaForeignKey, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import { KloMysqlDialect } from './dialect.js'; + +export interface KloMysqlConnectionConfig { + driver?: string; + host?: string; + port?: number; + database?: string; + username?: string; + user?: string; + password?: string; + url?: string; + ssl?: boolean | { rejectUnauthorized?: boolean }; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloMysqlPoolConfig { + host: string; + port: number; + database: string; + user: string; + password?: string; + connectionLimit: number; + waitForConnections: true; + ssl?: { rejectUnauthorized: boolean }; +} + +interface KloMysqlConnection { + query(sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]>; + release(): void; +} + +interface KloMysqlPool { + getConnection(): Promise; + end(): Promise; +} + +export interface KloMysqlPoolFactory { + createPool(config: KloMysqlPoolConfig): KloMysqlPool; +} + +interface KloMysqlResolvedEndpoint { + host: string; + port: number; + close?: () => Promise; +} + +export interface KloMysqlEndpointResolver { + resolve(input: { host: string; port: number; connection: KloMysqlConnectionConfig }): Promise; +} + +export interface KloMysqlScanConnectorOptions { + connectionId: string; + connection: KloMysqlConnectionConfig | undefined; + poolFactory?: KloMysqlPoolFactory; + endpointResolver?: KloMysqlEndpointResolver; + env?: NodeJS.ProcessEnv; + now?: () => Date; +} + +export interface KloMysqlReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record | unknown[]; +} + +export interface KloMysqlColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloMysqlColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +interface MysqlTableRow extends RowDataPacket { + TABLE_NAME: string; + TABLE_TYPE: string; + TABLE_COMMENT: string | null; + TABLE_ROWS: number | null; +} + +interface MysqlColumnRow extends RowDataPacket { + TABLE_NAME: string; + COLUMN_NAME: string; + DATA_TYPE: string; + IS_NULLABLE: string; + COLUMN_COMMENT: string | null; +} + +interface MysqlPrimaryKeyRow extends RowDataPacket { + TABLE_NAME: string; + COLUMN_NAME: string; +} + +interface MysqlForeignKeyRow extends RowDataPacket { + TABLE_NAME: string; + COLUMN_NAME: string; + REFERENCED_TABLE_NAME: string; + REFERENCED_COLUMN_NAME: string; + CONSTRAINT_NAME: string; +} + +interface MysqlSchemaRow extends RowDataPacket { + SCHEMA_NAME: string; +} + +interface MysqlCountRow extends RowDataPacket { + count?: unknown; + cardinality?: unknown; +} + +interface MysqlDistinctValueRow extends RowDataPacket { + val: unknown; +} + +class DefaultMysqlPoolFactory implements KloMysqlPoolFactory { + createPool(config: KloMysqlPoolConfig): KloMysqlPool { + return mysql.createPool(config) as Pool; + } +} + +function stringConfigValue( + connection: KloMysqlConnectionConfig | undefined, + key: keyof KloMysqlConnectionConfig, + env: NodeJS.ProcessEnv, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; +} + +function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + const envName = value.slice('env:'.length); + return env[envName] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function maybeNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function parseMysqlUrl(url: string): Partial { + const parsed = new URL(url); + const sslParam = parsed.searchParams.get('ssl') ?? parsed.searchParams.get('sslmode'); + return { + host: parsed.hostname, + port: parsed.port ? Number(parsed.port) : undefined, + database: parsed.pathname.replace(/^\/+/, '') || undefined, + username: parsed.username ? decodeURIComponent(parsed.username) : undefined, + password: parsed.password ? decodeURIComponent(parsed.password) : undefined, + ssl: sslParam === 'true' || sslParam === 'required', + }; +} + +function cleanMySqlTableComment(comment: string | null): string | null { + if (!comment) { + return null; + } + if (comment.startsWith('InnoDB free:')) { + const semiIndex = comment.indexOf(';'); + if (semiIndex === -1) { + return null; + } + const userComment = comment.slice(semiIndex + 1).trim(); + return userComment || null; + } + return comment; +} + +function groupByTable(rows: T[]): Map { + const grouped = new Map(); + for (const row of rows) { + const tableRows = grouped.get(row.TABLE_NAME) ?? []; + tableRows.push(row); + grouped.set(row.TABLE_NAME, tableRows); + } + return grouped; +} + +function primaryKeyMap(rows: MysqlPrimaryKeyRow[]): Map> { + const grouped = new Map>(); + for (const row of rows) { + const columns = grouped.get(row.TABLE_NAME) ?? new Set(); + columns.add(row.COLUMN_NAME); + grouped.set(row.TABLE_NAME, columns); + } + return grouped; +} + +function queryParams(params: Record | unknown[] | undefined): unknown[] | undefined { + if (!params) { + return undefined; + } + return Array.isArray(params) ? params : Object.values(params); +} + +export function isKloMysqlConnectionConfig(connection: KloMysqlConnectionConfig | undefined): boolean { + return String(connection?.driver ?? '').toLowerCase() === 'mysql'; +} + +export function mysqlConnectionPoolConfigFromConfig(input: { + connectionId: string; + connection: KloMysqlConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; +}): KloMysqlPoolConfig { + if (!isKloMysqlConnectionConfig(input.connection)) { + throw new Error(`Native MySQL connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native MySQL connector requires connections.${input.connectionId}.readonly: true`); + } + + const env = input.env ?? process.env; + const referencedUrl = stringConfigValue(input.connection, 'url', env); + const urlConfig = referencedUrl ? parseMysqlUrl(referencedUrl) : {}; + const merged: KloMysqlConnectionConfig = { ...urlConfig, ...input.connection }; + const host = stringConfigValue(merged, 'host', env); + const database = stringConfigValue(merged, 'database', env); + const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env); + + if (!host) { + throw new Error(`Native MySQL connector requires connections.${input.connectionId}.host or url`); + } + if (!database) { + throw new Error(`Native MySQL connector requires connections.${input.connectionId}.database or url`); + } + if (!user) { + throw new Error(`Native MySQL connector requires connections.${input.connectionId}.username, user, or url`); + } + + const ssl = merged.ssl === true ? { rejectUnauthorized: false } : typeof merged.ssl === 'object' ? merged.ssl : undefined; + return { + host, + port: maybeNumber(merged.port) ?? 3306, + database, + user, + password: stringConfigValue(merged, 'password', env), + connectionLimit: 10, + waitForConnections: true, + ...(ssl ? { ssl: { rejectUnauthorized: ssl.rejectUnauthorized ?? false } } : {}), + }; +} + +export class KloMysqlScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'mysql' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: true, + formalForeignKeys: true, + estimatedRowCounts: true, + }); + + private readonly connectionId: string; + private readonly connection: KloMysqlConnectionConfig; + private readonly poolConfig: KloMysqlPoolConfig; + private readonly poolFactory: KloMysqlPoolFactory; + private readonly endpointResolver?: KloMysqlEndpointResolver; + private readonly now: () => Date; + private readonly dialect = new KloMysqlDialect(); + private pool: KloMysqlPool | null = null; + private resolvedEndpoint: KloMysqlResolvedEndpoint | null = null; + + constructor(options: KloMysqlScanConnectorOptions) { + this.connectionId = options.connectionId; + this.connection = options.connection ?? {}; + this.poolConfig = mysqlConnectionPoolConfigFromConfig({ + connectionId: options.connectionId, + connection: options.connection, + env: options.env, + }); + this.poolFactory = options.poolFactory ?? new DefaultMysqlPoolFactory(); + this.endpointResolver = options.endpointResolver; + this.now = options.now ?? (() => new Date()); + this.id = `mysql:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + try { + await this.query('SELECT 1'); + return { success: true }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const database = this.poolConfig.database; + const tables = await this.queryRaw( + ` + SELECT TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, TABLE_ROWS + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = ? AND TABLE_TYPE IN ('BASE TABLE', 'VIEW') + ORDER BY TABLE_NAME + `, + [database], + ); + const columns = await this.queryRaw( + ` + SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_COMMENT + FROM INFORMATION_SCHEMA.COLUMNS + WHERE TABLE_SCHEMA = ? + ORDER BY TABLE_NAME, ORDINAL_POSITION + `, + [database], + ); + const primaryKeys = await this.queryRaw( + ` + SELECT TABLE_NAME, COLUMN_NAME + FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE + WHERE TABLE_SCHEMA = ? + AND CONSTRAINT_NAME = 'PRIMARY' + ORDER BY TABLE_NAME, ORDINAL_POSITION + `, + [database], + ); + const foreignKeys = await this.queryRaw( + ` + SELECT TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME, CONSTRAINT_NAME + FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE + WHERE TABLE_SCHEMA = ? + AND REFERENCED_TABLE_NAME IS NOT NULL + ORDER BY TABLE_NAME, COLUMN_NAME + `, + [database], + ); + + const columnsByTable = groupByTable(columns); + const primaryKeysByTable = primaryKeyMap(primaryKeys); + const foreignKeysByTable = groupByTable(foreignKeys); + const schemaTables = tables.map((table) => + this.toSchemaTable(table, columnsByTable.get(table.TABLE_NAME) ?? [], primaryKeysByTable, foreignKeysByTable), + ); + + return { + connectionId: this.connectionId, + driver: 'mysql', + extractedAt: this.now().toISOString(), + scope: { schemas: [database] }, + metadata: { + database, + host: this.poolConfig.host, + table_count: schemaTables.length, + total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables: schemaTables, + }; + } + + async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns)); + return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); + return { values, nullCount: null, distinctCount: null }; + } + + async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + return null; + } + + async executeReadOnly(input: KloMysqlReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); + const prepared = Array.isArray(input.params) + ? { sql: limitedSql, params: input.params } + : this.dialect.prepareQuery(limitedSql, input.params); + const result = await this.query(prepared.sql, prepared.params); + return { ...result, rowCount: result.rows.length }; + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloMysqlColumnDistinctValuesOptions, + ): Promise { + const sampleSize = options.sampleSize ?? 10000; + const tableName = this.qTableName(table); + const quotedColumn = this.dialect.quoteIdentifier(columnName); + const cardinalityRows = await this.queryRaw( + this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize), + ); + const cardinality = Number(cardinalityRows[0]?.cardinality); + if (Number.isNaN(cardinality)) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valuesRows = await this.queryRaw( + this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit), + ); + return { + values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)), + cardinality, + }; + } + + async getTableRowCount(tableName: string): Promise { + const rows = await this.queryRaw( + `SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`, + ); + return Number(rows[0]?.count ?? 0); + } + + qTableName(table: Pick & Partial>): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + async listSchemas(): Promise { + const rows = await this.queryRaw(` + SELECT SCHEMA_NAME + FROM INFORMATION_SCHEMA.SCHEMATA + WHERE SCHEMA_NAME NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys') + ORDER BY SCHEMA_NAME + `); + return rows.map((row) => row.SCHEMA_NAME); + } + + async cleanup(): Promise { + if (this.pool) { + await this.pool.end(); + this.pool = null; + } + if (this.resolvedEndpoint?.close) { + await this.resolvedEndpoint.close(); + this.resolvedEndpoint = null; + } + } + + private toSchemaTable( + table: MysqlTableRow, + columns: MysqlColumnRow[], + primaryKeysByTable: Map>, + foreignKeysByTable: Map, + ): KloSchemaTable { + const tableName = table.TABLE_NAME; + const kind = table.TABLE_TYPE === 'VIEW' ? 'view' : 'table'; + const estimatedRows = kind === 'view' ? null : Number(table.TABLE_ROWS ?? 0); + return { + catalog: null, + db: this.poolConfig.database, + name: tableName, + kind, + comment: cleanMySqlTableComment(table.TABLE_COMMENT), + estimatedRows: Number.isFinite(estimatedRows) ? estimatedRows : null, + columns: columns.map((column) => this.toSchemaColumn(column, primaryKeysByTable.get(tableName) ?? new Set())), + foreignKeys: (foreignKeysByTable.get(tableName) ?? []).map((row) => this.toSchemaForeignKey(row)), + }; + } + + private toSchemaColumn(column: MysqlColumnRow, primaryKeys: Set): KloSchemaColumn { + return { + name: column.COLUMN_NAME, + nativeType: column.DATA_TYPE, + normalizedType: this.dialect.mapDataType(column.DATA_TYPE), + dimensionType: this.dialect.mapToDimensionType(column.DATA_TYPE), + nullable: column.IS_NULLABLE === 'YES', + primaryKey: primaryKeys.has(column.COLUMN_NAME), + comment: column.COLUMN_COMMENT || null, + }; + } + + private toSchemaForeignKey(row: MysqlForeignKeyRow): KloSchemaForeignKey { + return { + fromColumn: row.COLUMN_NAME, + toCatalog: null, + toDb: this.poolConfig.database, + toTable: row.REFERENCED_TABLE_NAME, + toColumn: row.REFERENCED_COLUMN_NAME, + constraintName: row.CONSTRAINT_NAME || null, + }; + } + + private async poolForQuery(): Promise { + if (!this.pool) { + const config = { ...this.poolConfig }; + if (this.endpointResolver) { + this.resolvedEndpoint = await this.endpointResolver.resolve({ + host: config.host, + port: config.port, + connection: this.connection, + }); + config.host = this.resolvedEndpoint.host; + config.port = this.resolvedEndpoint.port; + } + this.pool = this.poolFactory.createPool(config); + } + return this.pool; + } + + private async queryRaw(sql: string, params?: unknown): Promise { + const pool = await this.poolForQuery(); + const connection = await pool.getConnection(); + try { + const [rows] = await connection.query(sql, params); + return rows as T[]; + } finally { + connection.release(); + } + } + + private async query( + sql: string, + params?: Record | unknown[], + ): Promise> { + const pool = await this.poolForQuery(); + const connection = await pool.getConnection(); + try { + const [rows, fields] = await connection.query(assertReadOnlySql(sql), queryParams(params)); + const headers = fields.map((field) => field.name); + const headerTypes = fields.map((field) => String(field.type ?? 'unknown')); + return { + headers, + headerTypes, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + }; + } finally { + connection.release(); + } + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.connectionId) { + throw new Error(`KLO MySQL connector ${this.id} cannot serve connection ${connectionId}`); + } + } +} diff --git a/packages/connector-mysql/src/dialect.test.ts b/packages/connector-mysql/src/dialect.test.ts new file mode 100644 index 00000000..f82dc59a --- /dev/null +++ b/packages/connector-mysql/src/dialect.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from 'vitest'; +import { KloMysqlDialect } from './dialect.js'; + +describe('KloMysqlDialect', () => { + const dialect = new KloMysqlDialect(); + + it('quotes identifiers and formats database-qualified table names', () => { + expect(dialect.quoteIdentifier('orders')).toBe('`orders`'); + expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`'); + expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'orders' })).toBe( + '`analytics`.`orders`', + ); + expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('`orders`'); + }); + + it('maps native MySQL types to KLO dimension types', () => { + expect(dialect.mapToDimensionType('tinyint(1)')).toBe('boolean'); + expect(dialect.mapToDimensionType('int')).toBe('number'); + expect(dialect.mapToDimensionType('decimal(10,2)')).toBe('number'); + expect(dialect.mapToDimensionType('timestamp')).toBe('time'); + expect(dialect.mapToDimensionType('varchar(255)')).toBe('string'); + expect(dialect.mapToDimensionType('json')).toBe('string'); + expect(dialect.mapToDimensionType('')).toBe('string'); + }); + + it('builds sampling, distinct-value, pagination, and time SQL', () => { + expect(dialect.generateSampleQuery('`analytics`.`orders`', 25, ['id', 'status'])).toBe( + 'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 25', + ); + expect(dialect.generateColumnSampleQuery('`analytics`.`orders`', 'status', 10)).toBe( + "SELECT `status` FROM `analytics`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS CHAR)) != '' LIMIT 10", + ); + expect(dialect.generateDistinctValuesQuery('`analytics`.`orders`', '`status`', 5)).toContain( + 'SELECT DISTINCT CAST(`status` AS CHAR) AS val', + ); + expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20'); + expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe("DATE_FORMAT(created_at, '%Y-%m-01')"); + }); + + it('prepares named parameters in deterministic SQL placeholder order', () => { + expect(dialect.prepareQuery('select * from orders where id = :id and status = :status', { + status: 'paid', + id: 10, + })).toEqual({ + sql: 'select * from orders where id = ? and status = ?', + params: [10, 'paid'], + }); + }); +}); diff --git a/packages/connector-mysql/src/dialect.ts b/packages/connector-mysql/src/dialect.ts new file mode 100644 index 00000000..48d48657 --- /dev/null +++ b/packages/connector-mysql/src/dialect.ts @@ -0,0 +1,202 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type MysqlTableNameRef = Pick & Partial>; + +export class KloMysqlDialect { + readonly type = 'mysql'; + + private readonly typeMappings: Record = { + datetime: 'time', + timestamp: 'time', + date: 'time', + time: 'time', + year: 'time', + tinyint: 'number', + smallint: 'number', + mediumint: 'number', + int: 'number', + integer: 'number', + bigint: 'number', + decimal: 'number', + numeric: 'number', + float: 'number', + double: 'number', + real: 'number', + varchar: 'string', + char: 'string', + text: 'string', + tinytext: 'string', + mediumtext: 'string', + longtext: 'string', + enum: 'string', + set: 'string', + json: 'string', + bit: 'boolean', + bool: 'boolean', + boolean: 'boolean', + }; + + quoteIdentifier(identifier: string): string { + return `\`${identifier.replace(/`/g, '``')}\``; + } + + formatTableName(table: MysqlTableNameRef): string { + return table.db + ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` + : this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + return nativeType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + const lower = nativeType.toLowerCase().trim(); + if (lower.includes('tinyint(1)')) { + return 'boolean'; + } + const normalized = lower.includes('(') ? lower.split('(')[0] : lower; + if (this.typeMappings[normalized]) { + return this.typeMappings[normalized]; + } + if (normalized.includes('time') || normalized.includes('date')) { + return 'time'; + } + if ( + normalized.includes('int') || + normalized.includes('num') || + normalized.includes('dec') || + normalized.includes('float') || + normalized.includes('double') + ) { + return 'number'; + } + if (normalized.includes('bit') || normalized === 'bool' || normalized === 'boolean') { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const quotedColumn = this.quoteIdentifier(columnName); + return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS CHAR)) != '' LIMIT ${limit}`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: unknown[] } { + if (!params) { + return { sql, params: undefined }; + } + const values: unknown[] = []; + const parameterizedQuery = sql.replace(/:([A-Za-z_][A-Za-z0-9_]*)\b/g, (placeholder, key: string) => { + if (!(key in params)) { + return placeholder; + } + values.push(params[key]); + return '?'; + }); + return { sql: parameterizedQuery, params: values }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `RAND() < ${samplePct}`; + } + + getTableSampleClause(_samplePct: number): string { + return ''; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + } + + getNullCountExpression(column: string): string { + return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`; + } + + getDistinctCountExpression(column: string): string { + return `COUNT(DISTINCT ${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + SELECT COUNT(DISTINCT val) AS cardinality + FROM ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + LIMIT ${sampleSize} + ) AS sampled + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + return ` + SELECT DISTINCT CAST(${columnName} AS CHAR) AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY val + LIMIT ${limit} + `; + } + + generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { + return null; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + SELECT COUNT(DISTINCT val) AS cardinality + FROM ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY RAND() + LIMIT ${sampleSize} + ) AS sampled + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + timezone?: string, + ): string { + const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column; + switch (granularity) { + case 'day': + return `DATE(${col})`; + case 'week': + return `DATE(${col} - INTERVAL WEEKDAY(${col}) DAY)`; + case 'month': + return `DATE_FORMAT(${col}, '%Y-%m-01')`; + case 'quarter': + return `MAKEDATE(YEAR(${col}), 1) + INTERVAL (QUARTER(${col}) - 1) QUARTER`; + case 'year': + return `DATE_FORMAT(${col}, '%Y-01-01')`; + } + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { + const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column; + const [amount, unit] = interval.split(' '); + const originExpr = origin ? `'${origin}'` : `'1970-01-01'`; + return `DATE_ADD(${originExpr}, INTERVAL FLOOR(TIMESTAMPDIFF(${unit!.toUpperCase()}, ${originExpr}, ${col}) / ${amount}) * ${amount} ${unit!.toUpperCase()})`; + } + + parseIntervalToSql(interval: string): string { + const [amount, unit] = interval.split(' '); + return `INTERVAL ${amount} ${unit!.toUpperCase()}`; + } +} diff --git a/packages/connector-mysql/src/index.ts b/packages/connector-mysql/src/index.ts new file mode 100644 index 00000000..baae22c6 --- /dev/null +++ b/packages/connector-mysql/src/index.ts @@ -0,0 +1,15 @@ +export { KloMysqlDialect } from './dialect.js'; +export { + isKloMysqlConnectionConfig, + KloMysqlScanConnector, + mysqlConnectionPoolConfigFromConfig, + type KloMysqlColumnDistinctValuesOptions, + type KloMysqlColumnDistinctValuesResult, + type KloMysqlConnectionConfig, + type KloMysqlEndpointResolver, + type KloMysqlPoolConfig, + type KloMysqlPoolFactory, + type KloMysqlReadOnlyQueryInput, + type KloMysqlScanConnectorOptions, +} from './connector.js'; +export { createMysqlLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-mysql/src/live-database-introspection.ts b/packages/connector-mysql/src/live-database-introspection.ts new file mode 100644 index 00000000..590e10b5 --- /dev/null +++ b/packages/connector-mysql/src/live-database-introspection.ts @@ -0,0 +1,37 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { + KloMysqlScanConnector, + type KloMysqlConnectionConfig, + type KloMysqlEndpointResolver, + type KloMysqlPoolFactory, +} from './connector.js'; + +interface CreateMysqlLiveDatabaseIntrospectionOptions { + connections: Record; + poolFactory?: KloMysqlPoolFactory; + endpointResolver?: KloMysqlEndpointResolver; + now?: () => Date; +} + +export function createMysqlLiveDatabaseIntrospection( + options: CreateMysqlLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloMysqlConnectionConfig | undefined; + const connector = new KloMysqlScanConnector({ + connectionId, + connection, + poolFactory: options.poolFactory, + endpointResolver: options.endpointResolver, + now: options.now, + }); + try { + return await connector.introspect({ connectionId, driver: 'mysql' }, { runId: `mysql-${connectionId}` }); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-mysql/src/package-exports.test.ts b/packages/connector-mysql/src/package-exports.test.ts new file mode 100644 index 00000000..3887b94b --- /dev/null +++ b/packages/connector-mysql/src/package-exports.test.ts @@ -0,0 +1,13 @@ +import { describe, expect, it } from 'vitest'; + +describe('@klo/connector-mysql package exports', () => { + it('exports the native MySQL scan surface', async () => { + const connector = await import('./index.js'); + + expect(connector.KloMysqlDialect).toBeTypeOf('function'); + expect(connector.KloMysqlScanConnector).toBeTypeOf('function'); + expect(connector.createMysqlLiveDatabaseIntrospection).toBeTypeOf('function'); + expect(connector.isKloMysqlConnectionConfig).toBeTypeOf('function'); + expect(connector.mysqlConnectionPoolConfigFromConfig).toBeTypeOf('function'); + }); +}); diff --git a/packages/connector-mysql/tsconfig.json b/packages/connector-mysql/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-mysql/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/connector-postgres/package.json b/packages/connector-postgres/package.json new file mode 100644 index 00000000..f1a5a4eb --- /dev/null +++ b/packages/connector-postgres/package.json @@ -0,0 +1,48 @@ +{ + "name": "@klo/connector-postgres", + "version": "0.0.0-private", + "description": "PostgreSQL connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@klo/context": "workspace:*", + "pg": "^8.19.0" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "@types/pg": "^8.16.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-postgres" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-postgres/src/connector.test.ts b/packages/connector-postgres/src/connector.test.ts new file mode 100644 index 00000000..6132feae --- /dev/null +++ b/packages/connector-postgres/src/connector.test.ts @@ -0,0 +1,342 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + createPostgresLiveDatabaseIntrospection, + isKloPostgresConnectionConfig, + KloPostgresScanConnector, + postgresPoolConfigFromConfig, + type KloPostgresPoolFactory, +} from './index.js'; + +interface FakeQueryResult { + rows: Record[]; + fields?: Array<{ name: string; dataTypeID: number }>; +} + +function fakePoolFactory(results: Map): KloPostgresPoolFactory { + const query = vi.fn(async (sql: string, params?: unknown[]) => { + const normalized = sql.replace(/\s+/g, ' ').trim(); + for (const [key, value] of results.entries()) { + if (normalized.includes(key)) { + return value; + } + } + throw new Error(`Unexpected SQL: ${normalized} params=${JSON.stringify(params ?? [])}`); + }); + return { + createPool() { + return { + async connect() { + return { + query, + release: vi.fn(), + }; + }, + end: vi.fn(async () => undefined), + }; + }, + }; +} + +function metadataResults(): Map { + return new Map([ + [ + 'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n', + { + rows: [ + { table_name: 'customers', table_kind: 'r', row_count: '2', table_comment: 'Customers' }, + { table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null }, + { table_name: 'recent_orders', table_kind: 'v', row_count: '0', table_comment: 'Recent orders' }, + ], + }, + ], + [ + 'FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c', + { + rows: [ + { table_name: 'customers', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null }, + { table_name: 'customers', column_name: 'name', data_type: 'text', is_nullable: false, column_comment: 'Name' }, + { table_name: 'orders', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null }, + { table_name: 'orders', column_name: 'customer_id', data_type: 'integer', is_nullable: false, column_comment: null }, + { table_name: 'orders', column_name: 'status', data_type: 'text', is_nullable: true, column_comment: null }, + { table_name: 'recent_orders', column_name: 'id', data_type: 'integer', is_nullable: true, column_comment: null }, + ], + }, + ], + [ + "tc.constraint_type = 'FOREIGN KEY'", + { + rows: [ + { + table_name: 'orders', + column_name: 'customer_id', + foreign_table_schema: 'public', + foreign_table_name: 'customers', + foreign_column_name: 'id', + constraint_name: 'orders_customer_id_fkey', + }, + ], + }, + ], + [ + "tc.constraint_type = 'PRIMARY KEY'", + { + rows: [ + { table_name: 'customers', column_name: 'id' }, + { table_name: 'orders', column_name: 'id' }, + ], + }, + ], + ['SELECT "id" FROM "public"."orders" LIMIT 1', { rows: [{ id: 10 }], fields: [{ name: 'id', dataTypeID: 23 }] }], + [ + 'SELECT "status" FROM "public"."orders" WHERE "status" IS NOT NULL', + { rows: [{ status: 'paid' }, { status: 'open' }], fields: [{ name: 'status', dataTypeID: 25 }] }, + ], + ['COUNT(DISTINCT val) AS cardinality', { rows: [{ cardinality: '2' }] }], + ['SELECT DISTINCT "status"::text AS val', { rows: [{ val: 'open' }, { val: 'paid' }] }], + ['SELECT COUNT(*) AS count FROM "public"."orders"', { rows: [{ count: '3' }] }], + ['FROM pg_stats s', { rows: [{ column_name: 'status', estimated_cardinality: '2' }] }], + ['SELECT 1', { rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] }], + ['SELECT schema_name FROM information_schema.schemata', { rows: [{ schema_name: 'public' }] }], + ]); +} + +describe('KloPostgresScanConnector', () => { + it('resolves configuration safely', () => { + expect(isKloPostgresConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL', readonly: true })).toBe(true); + expect(isKloPostgresConnectionConfig({ driver: 'postgresql', host: 'db', database: 'analytics' })).toBe(true); + expect(isKloPostgresConnectionConfig({ driver: 'mysql', host: 'db' })).toBe(false); + expect( + postgresPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + schemas: ['analytics', 'public'], + readonly: true, + ssl: true, + rejectUnauthorized: false, + }, + }), + ).toMatchObject({ + host: 'db.example.test', + port: 5432, + database: 'analytics', + user: 'reader', + password: 'test-password', // pragma: allowlist secret + options: '-c search_path=analytics,public', + ssl: { rejectUnauthorized: false }, + }); + expect(() => + postgresPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { driver: 'postgres', host: 'db.example.test', database: 'analytics', username: 'reader' }, + }), + ).toThrow('Native PostgreSQL connector requires connections.warehouse.readonly: true'); + }); + + it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => { + const connector = new KloPostgresScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + schema: 'public', + readonly: true, + }, + poolFactory: fakePoolFactory(metadataResults()), + now: () => new Date('2026-04-29T10:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'postgres' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-04-29T10:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: { + database: 'analytics', + schemas: ['public'], + host: 'db.example.test', + table_count: 3, + total_columns: 6, + }, + }); + expect(snapshot.tables.map((table) => [table.db, table.name, table.kind, table.estimatedRows])).toEqual([ + ['public', 'customers', 'table', 2], + ['public', 'orders', 'table', 3], + ['public', 'recent_orders', 'view', null], + ]); + expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({ + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + }); + expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: 'public', + toTable: 'customers', + toColumn: 'id', + constraintName: 'orders_customer_id_fkey', + }, + ]); + }); + + it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => { + const connector = new KloPostgresScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + schema: 'public', + readonly: true, + }, + poolFactory: fakePoolFactory(metadataResults()), + }); + + await expect( + connector.sampleTable( + { connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, columns: ['id'], limit: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ headers: ['id'], headerTypes: ['integer'], rows: [[10]], totalRows: 1 }); + + await expect( + connector.sampleColumn( + { connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, column: 'status', limit: 5 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null }); + + await expect( + connector.getColumnDistinctValues( + { catalog: null, db: 'public', name: 'orders' }, + 'status', + { maxCardinality: 5, limit: 10, sampleSize: 100 }, + ), + ).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 }); + + await expect(connector.getColumnStatistics({ catalog: null, db: 'public', name: 'orders' })).resolves.toEqual({ + cardinalityByColumn: new Map([['status', 2]]), + }); + await expect(connector.getTableRowCount({ db: 'public', name: 'orders' })).resolves.toBe(3); + await expect(connector.listSchemas()).resolves.toEqual(['public']); + await expect(connector.testConnection()).resolves.toEqual({ success: true }); + + await expect( + connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + }); + + it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => { + const introspection = createPostgresLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + schema: 'public', + readonly: true, + }, + }, + poolFactory: fakePoolFactory(metadataResults()), + now: () => new Date('2026-04-29T10:00:00.000Z'), + }); + + const snapshot = await introspection.extractSchema('warehouse'); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + extractedAt: '2026-04-29T10:00:00.000Z', + }); + expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({ + name: 'customers', + catalog: null, + db: 'public', + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'name', + nativeType: 'text', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: 'Name', + }, + ], + foreignKeys: [], + }); + }); + + it('does not end the pool before introspection completes', async () => { + let endCalled = false; + const endAwarePoolFactory: KloPostgresPoolFactory = { + createPool() { + const inner = fakePoolFactory(metadataResults()).createPool({ + max: 1, + idleTimeoutMillis: 1, + connectionTimeoutMillis: 1, + }); + return { + async connect() { + if (endCalled) { + throw new Error('Cannot use a pool after calling end on the pool'); + } + return inner.connect(); + }, + async end() { + endCalled = true; + return inner.end(); + }, + }; + }, + }; + const introspection = createPostgresLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + schema: 'public', + readonly: true, + }, + }, + poolFactory: endAwarePoolFactory, + now: () => new Date('2026-04-29T10:00:00.000Z'), + }); + + const snapshot = await introspection.extractSchema('warehouse'); + expect(snapshot.tables.length).toBeGreaterThan(0); + expect(endCalled).toBe(true); + }); +}); diff --git a/packages/connector-postgres/src/connector.ts b/packages/connector-postgres/src/connector.ts new file mode 100644 index 00000000..7a59f725 --- /dev/null +++ b/packages/connector-postgres/src/connector.ts @@ -0,0 +1,707 @@ +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaColumn, + type KloSchemaForeignKey, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import { Pool } from 'pg'; +import { KloPostgresDialect } from './dialect.js'; + +const PG_OID_TYPE_MAP: Record = { + 16: 'boolean', + 20: 'bigint', + 21: 'smallint', + 23: 'integer', + 25: 'text', + 700: 'real', + 701: 'double precision', + 1043: 'varchar', + 1082: 'date', + 1114: 'timestamp', + 1184: 'timestamptz', + 1700: 'numeric', + 2950: 'uuid', + 3802: 'jsonb', + 114: 'json', + 1009: 'text[]', + 1007: 'integer[]', + 1016: 'bigint[]', +}; + +export interface KloPostgresConnectionConfig { + driver?: string; + host?: string; + port?: number; + database?: string; + username?: string; + user?: string; + password?: string; + url?: string; + schema?: string; + schemas?: string[]; + ssl?: boolean; + rejectUnauthorized?: boolean; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloPostgresPoolConfig { + host?: string; + port?: number; + database?: string; + user?: string; + password?: string; + connectionString?: string; + max: number; + idleTimeoutMillis: number; + connectionTimeoutMillis: number; + options?: string; + ssl?: { rejectUnauthorized: boolean }; +} + +interface KloPostgresQueryResult { + fields?: Array<{ name: string; dataTypeID: number }>; + rows: Record[]; +} + +interface KloPostgresClient { + query(sql: string, params?: unknown[]): Promise; + release(): void; +} + +interface KloPostgresPool { + connect(): Promise; + end(): Promise; +} + +export interface KloPostgresPoolFactory { + createPool(config: KloPostgresPoolConfig): KloPostgresPool; +} + +interface KloPostgresResolvedEndpoint { + host: string; + port: number; + close?: () => Promise; +} + +export interface KloPostgresEndpointResolver { + resolve(input: { + host: string; + port: number; + connection: KloPostgresConnectionConfig; + }): Promise; +} + +export interface KloPostgresScanConnectorOptions { + connectionId: string; + connection: KloPostgresConnectionConfig | undefined; + poolFactory?: KloPostgresPoolFactory; + endpointResolver?: KloPostgresEndpointResolver; + env?: NodeJS.ProcessEnv; + now?: () => Date; +} + +export interface KloPostgresReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record | unknown[]; +} + +export interface KloPostgresColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloPostgresColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +export interface KloPostgresColumnStatisticsResult { + cardinalityByColumn: Map; +} + +export interface KloPostgresTableSampleResult extends KloTableSampleResult { + headerTypes?: string[]; +} + +type PostgresTableRef = Pick & Partial>; + +interface PostgresTableRow { + table_name: string; + table_kind: string; + row_count: unknown; + table_comment: string | null; +} + +interface PostgresColumnRow { + table_name: string; + column_name: string; + data_type: string; + is_nullable: boolean; + column_comment: string | null; +} + +interface PostgresPrimaryKeyRow { + table_name: string; + column_name: string; +} + +interface PostgresForeignKeyRow { + table_name: string; + column_name: string; + foreign_table_schema: string | null; + foreign_table_name: string; + foreign_column_name: string; + constraint_name: string | null; +} + +interface PostgresSchemaRow { + schema_name: string; +} + +interface PostgresCountRow { + count?: unknown; + cardinality?: unknown; +} + +interface PostgresDistinctValueRow { + val: unknown; +} + +interface PostgresStatsRow { + column_name: string; + estimated_cardinality: unknown; +} + +class DefaultPostgresPoolFactory implements KloPostgresPoolFactory { + createPool(config: KloPostgresPoolConfig): KloPostgresPool { + return new Pool(config); + } +} + +function groupByTable(rows: T[]): Map { + const grouped = new Map(); + for (const row of rows) { + const tableRows = grouped.get(row.table_name) ?? []; + tableRows.push(row); + grouped.set(row.table_name, tableRows); + } + return grouped; +} + +function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map> { + const grouped = new Map>(); + for (const row of rows) { + const columns = grouped.get(row.table_name) ?? new Set(); + columns.add(row.column_name); + grouped.set(row.table_name, columns); + } + return grouped; +} + +function queryRows(result: KloPostgresQueryResult): unknown[][] { + const headers = (result.fields ?? []).map((field) => field.name); + return result.rows.map((row) => headers.map((header) => row[header])); +} + +function finiteNumber(value: unknown): number | null { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : null; +} + +function stringConfigValue( + connection: KloPostgresConnectionConfig | undefined, + key: keyof KloPostgresConnectionConfig, + env: NodeJS.ProcessEnv, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; +} + +function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + return env[value.slice('env:'.length)] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function numberValue(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function parsePostgresUrl(url: string): Partial { + const parsed = new URL(url); + return { + host: parsed.hostname, + port: parsed.port ? Number(parsed.port) : undefined, + database: parsed.pathname.replace(/^\/+/, '') || undefined, + username: parsed.username ? decodeURIComponent(parsed.username) : undefined, + password: parsed.password ? decodeURIComponent(parsed.password) : undefined, + }; +} + +function schemasFromConnection(connection: KloPostgresConnectionConfig): string[] { + if (Array.isArray(connection.schemas) && connection.schemas.length > 0) { + return connection.schemas.filter((schema): schema is string => typeof schema === 'string' && schema.length > 0); + } + return typeof connection.schema === 'string' && connection.schema.length > 0 ? [connection.schema] : ['public']; +} + +function searchPathSchemasFromConnection(connection: KloPostgresConnectionConfig): string[] { + const schemas = schemasFromConnection(connection); + return schemas.includes('public') ? schemas : [...schemas, 'public']; +} + +export function isKloPostgresConnectionConfig(connection: KloPostgresConnectionConfig | undefined): boolean { + const driver = String(connection?.driver ?? '').toLowerCase(); + return driver === 'postgres' || driver === 'postgresql'; +} + +export function postgresPoolConfigFromConfig(input: { + connectionId: string; + connection: KloPostgresConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; +}): KloPostgresPoolConfig { + if (!isKloPostgresConnectionConfig(input.connection)) { + throw new Error(`Native PostgreSQL connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.readonly: true`); + } + + const env = input.env ?? process.env; + const referencedUrl = stringConfigValue(input.connection, 'url', env); + const urlConfig = referencedUrl ? parsePostgresUrl(referencedUrl) : {}; + const merged: KloPostgresConnectionConfig = { ...urlConfig, ...input.connection }; + const host = stringConfigValue(merged, 'host', env); + const database = stringConfigValue(merged, 'database', env); + const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env); + const password = stringConfigValue(merged, 'password', env); + + if (!referencedUrl && !host) { + throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.host or url`); + } + if (!database && !referencedUrl) { + throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.database or url`); + } + if (!user && !referencedUrl) { + throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.username, user, or url`); + } + + const config: KloPostgresPoolConfig = { + max: 10, + idleTimeoutMillis: 30_000, + connectionTimeoutMillis: 10_000, + ...(referencedUrl + ? { connectionString: referencedUrl } + : { host, port: numberValue(merged.port) ?? 5432, database, user, password }), + }; + const searchPathSchemas = searchPathSchemasFromConnection(merged); + if (searchPathSchemas.length > 0) { + config.options = `-c search_path=${searchPathSchemas.join(',')}`; + } + if (merged.ssl) { + config.ssl = { rejectUnauthorized: merged.rejectUnauthorized ?? true }; + } + return config; +} + +export class KloPostgresScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'postgres' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: true, + readOnlySql: true, + nestedAnalysis: true, + formalForeignKeys: true, + estimatedRowCounts: true, + }); + + private readonly connectionId: string; + private readonly connection: KloPostgresConnectionConfig; + private readonly poolConfig: KloPostgresPoolConfig; + private readonly poolFactory: KloPostgresPoolFactory; + private readonly endpointResolver?: KloPostgresEndpointResolver; + private readonly now: () => Date; + private readonly dialect = new KloPostgresDialect(); + private pool: KloPostgresPool | null = null; + private resolvedEndpoint: KloPostgresResolvedEndpoint | null = null; + + constructor(options: KloPostgresScanConnectorOptions) { + this.connectionId = options.connectionId; + this.connection = options.connection ?? {}; + this.poolConfig = postgresPoolConfigFromConfig({ + connectionId: options.connectionId, + connection: options.connection, + env: options.env, + }); + this.poolFactory = options.poolFactory ?? new DefaultPostgresPoolFactory(); + this.endpointResolver = options.endpointResolver; + this.now = options.now ?? (() => new Date()); + this.id = `postgres:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + try { + await this.query('SELECT 1'); + return { success: true }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const schemas = schemasFromConnection(this.connection); + const allTables: KloSchemaTable[] = []; + for (const schema of schemas) { + const tables = await this.loadSchemaTables(schema); + allTables.push(...tables); + } + return { + connectionId: this.connectionId, + driver: 'postgres', + extractedAt: this.now().toISOString(), + scope: { schemas }, + metadata: { + database: this.poolConfig.database ?? this.connection.database ?? null, + schemas, + host: this.poolConfig.host ?? this.connection.host ?? null, + table_count: allTables.length, + total_columns: allTables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables: allTables, + }; + } + + async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns)); + return { + headers: result.headers, + headerTypes: result.headerTypes, + rows: result.rows, + totalRows: result.totalRows, + }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); + return { values, nullCount: null, distinctCount: null }; + } + + async columnStats(input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + const stats = await this.getColumnStatistics(input.table); + const value = stats?.cardinalityByColumn.get(input.column); + return value === undefined + ? null + : { min: null, max: null, average: null, nullCount: null, distinctCount: value }; + } + + async executeReadOnly(input: KloPostgresReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); + const prepared = Array.isArray(input.params) + ? { sql: limitedSql, params: input.params } + : this.dialect.prepareQuery(limitedSql, input.params); + const result = await this.query(prepared.sql, prepared.params); + return { ...result, rowCount: result.rows.length }; + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloPostgresColumnDistinctValuesOptions, + ): Promise { + const sampleSize = options.sampleSize ?? 10000; + const tableName = this.qTableName(table); + const quotedColumn = this.dialect.quoteIdentifier(columnName); + const cardinalityRows = await this.queryRaw( + this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize), + ); + const cardinality = finiteNumber(cardinalityRows[0]?.cardinality); + if (cardinality === null) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valuesRows = await this.queryRaw( + this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit), + ); + return { + values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)), + cardinality, + }; + } + + async getColumnStatistics(table: KloTableRef): Promise { + const schema = table.db ?? schemasFromConnection(this.connection)[0] ?? 'public'; + const sql = this.dialect.generateColumnStatisticsQuery(schema, table.name); + if (!sql) { + return null; + } + const rows = await this.queryRaw(sql); + const cardinalityByColumn = new Map(); + for (const row of rows) { + const cardinality = finiteNumber(row.estimated_cardinality); + if (cardinality !== null) { + cardinalityByColumn.set(row.column_name, cardinality); + } + } + return cardinalityByColumn.size > 0 ? { cardinalityByColumn } : null; + } + + async getTableRowCount(table: string | PostgresTableRef): Promise { + const tableRef = + typeof table === 'string' + ? { catalog: null, db: schemasFromConnection(this.connection)[0] ?? 'public', name: table } + : table; + const rows = await this.queryRaw(`SELECT COUNT(*) AS count FROM ${this.qTableName(tableRef)}`); + return finiteNumber(rows[0]?.count) ?? 0; + } + + qTableName(table: PostgresTableRef): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + async listSchemas(): Promise { + const rows = await this.queryRaw(` + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name <> 'information_schema' + AND schema_name NOT LIKE 'pg_%' + ORDER BY schema_name + `); + return rows.map((row) => row.schema_name); + } + + async cleanup(): Promise { + if (this.pool) { + await this.pool.end(); + this.pool = null; + } + if (this.resolvedEndpoint?.close) { + await this.resolvedEndpoint.close(); + this.resolvedEndpoint = null; + } + } + + private async loadSchemaTables(schema: string): Promise { + const tables = await this.queryRaw( + ` + SELECT + c.relname AS table_name, + c.relkind AS table_kind, + c.reltuples::bigint AS row_count, + d.description AS table_comment + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + LEFT JOIN pg_catalog.pg_description d + ON d.objoid = c.oid AND d.objsubid = 0 + WHERE n.nspname = $1 + AND c.relkind IN ('r', 'v') + ORDER BY c.relname + `, + [schema], + ); + const columns = await this.queryRaw( + ` + SELECT + c.relname AS table_name, + a.attname AS column_name, + format_type(a.atttypid, a.atttypmod) AS data_type, + NOT a.attnotnull AS is_nullable, + d.description AS column_comment + FROM pg_catalog.pg_attribute a + JOIN pg_catalog.pg_class c ON a.attrelid = c.oid + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + LEFT JOIN pg_catalog.pg_description d + ON d.objoid = c.oid AND d.objsubid = a.attnum + WHERE n.nspname = $1 + AND c.relkind IN ('r', 'v') + AND a.attnum > 0 + AND NOT a.attisdropped + ORDER BY c.relname, a.attnum + `, + [schema], + ); + const primaryKeys = await this.queryRaw( + ` + SELECT tc.table_name, kcu.column_name + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + WHERE tc.constraint_type = 'PRIMARY KEY' + AND tc.table_schema = $1 + ORDER BY tc.table_name, kcu.ordinal_position + `, + [schema], + ); + const foreignKeys = await this.queryRaw( + ` + SELECT + tc.table_name, + kcu.column_name, + ccu.table_schema AS foreign_table_schema, + ccu.table_name AS foreign_table_name, + ccu.column_name AS foreign_column_name, + tc.constraint_name + FROM information_schema.table_constraints AS tc + JOIN information_schema.key_column_usage AS kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + JOIN information_schema.constraint_column_usage AS ccu + ON ccu.constraint_name = tc.constraint_name + AND ccu.table_schema = tc.table_schema + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_schema = $1 + ORDER BY tc.table_name, kcu.column_name + `, + [schema], + ); + + const columnsByTable = groupByTable(columns); + const primaryKeysByTable = primaryKeyMap(primaryKeys); + const foreignKeysByTable = groupByTable(foreignKeys); + return tables.map((table) => + this.toSchemaTable( + schema, + table, + columnsByTable.get(table.table_name) ?? [], + primaryKeysByTable.get(table.table_name) ?? new Set(), + foreignKeysByTable.get(table.table_name) ?? [], + ), + ); + } + + private toSchemaTable( + schema: string, + table: PostgresTableRow, + columns: PostgresColumnRow[], + primaryKeys: Set, + foreignKeys: PostgresForeignKeyRow[], + ): KloSchemaTable { + const kind = table.table_kind === 'v' ? 'view' : 'table'; + return { + catalog: null, + db: schema, + name: table.table_name, + kind, + comment: table.table_comment || null, + estimatedRows: kind === 'view' ? null : finiteNumber(table.row_count), + columns: columns.map((column) => this.toSchemaColumn(column, primaryKeys)), + foreignKeys: foreignKeys.map((foreignKey) => this.toSchemaForeignKey(foreignKey)), + }; + } + + private toSchemaColumn(column: PostgresColumnRow, primaryKeys: Set): KloSchemaColumn { + return { + name: column.column_name, + nativeType: column.data_type, + normalizedType: this.dialect.mapDataType(column.data_type), + dimensionType: this.dialect.mapToDimensionType(column.data_type), + nullable: column.is_nullable, + primaryKey: primaryKeys.has(column.column_name), + comment: column.column_comment || null, + }; + } + + private toSchemaForeignKey(row: PostgresForeignKeyRow): KloSchemaForeignKey { + return { + fromColumn: row.column_name, + toCatalog: null, + toDb: row.foreign_table_schema, + toTable: row.foreign_table_name, + toColumn: row.foreign_column_name, + constraintName: row.constraint_name || null, + }; + } + + private async getPool(): Promise { + if (!this.pool) { + let config = { ...this.poolConfig }; + if (this.endpointResolver) { + const endpoint = await this.endpointResolver.resolve({ + host: config.host ?? this.connection.host ?? 'localhost', + port: config.port ?? numberValue(this.connection.port) ?? 5432, + connection: this.connection, + }); + this.resolvedEndpoint = endpoint; + config = { ...config, host: endpoint.host, port: endpoint.port }; + } + this.pool = this.poolFactory.createPool(config); + } + return this.pool; + } + + private async queryRaw(sql: string, params?: unknown[]): Promise { + const pool = await this.getPool(); + const client = await pool.connect(); + try { + const result = await client.query(sql, params); + return result.rows as T[]; + } finally { + client.release(); + } + } + + private async query(sql: string, params?: Record | unknown[]): Promise { + const pool = await this.getPool(); + const client = await pool.connect(); + try { + const result = await client.query(assertReadOnlySql(sql), Array.isArray(params) ? params : undefined); + return { + headers: (result.fields ?? []).map((field) => field.name), + headerTypes: (result.fields ?? []).map((field) => PG_OID_TYPE_MAP[field.dataTypeID] ?? `oid:${field.dataTypeID}`), + rows: queryRows(result), + totalRows: result.rows.length, + rowCount: result.rows.length, + }; + } finally { + client.release(); + } + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.connectionId) { + throw new Error(`PostgreSQL connector ${this.connectionId} cannot run scan for ${connectionId}`); + } + } +} diff --git a/packages/connector-postgres/src/dialect.test.ts b/packages/connector-postgres/src/dialect.test.ts new file mode 100644 index 00000000..576170d7 --- /dev/null +++ b/packages/connector-postgres/src/dialect.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'vitest'; +import { KloPostgresDialect } from './dialect.js'; + +describe('KloPostgresDialect', () => { + const dialect = new KloPostgresDialect(); + + it('quotes identifiers and formats schema-qualified tables', () => { + expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"'); + expect(dialect.formatTableName({ catalog: null, db: 'public', name: 'orders' })).toBe('"public"."orders"'); + expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('"orders"'); + }); + + it('maps native PostgreSQL types to KLO dimension types', () => { + expect(dialect.mapToDimensionType('timestamp with time zone')).toBe('time'); + expect(dialect.mapToDimensionType('numeric(12,2)')).toBe('number'); + expect(dialect.mapToDimensionType('uuid')).toBe('string'); + expect(dialect.mapToDimensionType('boolean')).toBe('boolean'); + expect(dialect.mapToDimensionType('jsonb')).toBe('string'); + }); + + it('generates sample, distinct-value, statistics, and time SQL', () => { + expect(dialect.generateSampleQuery('"public"."orders"', 5, ['id', 'status'])).toBe( + 'SELECT "id", "status" FROM "public"."orders" LIMIT 5', + ); + expect(dialect.generateColumnSampleQuery('"public"."orders"', 'status', 10)).toContain( + 'TRIM(CAST("status" AS TEXT)) != \'\'', + ); + expect(dialect.generateDistinctValuesQuery('"public"."orders"', '"status"', 20)).toContain( + 'SELECT DISTINCT "status"::text AS val', + ); + expect(dialect.generateColumnStatisticsQuery('public', 'orders')).toContain('FROM pg_stats s'); + expect(dialect.getTimeTruncExpression('"created_at"', 'month')).toBe('DATE_TRUNC(\'month\', "created_at")'); + }); + + it('prepares named parameters with PostgreSQL positional parameters', () => { + expect( + dialect.prepareQuery('select * from orders where id = :id and status = :status', { id: 1, status: 'paid' }), + ).toEqual({ + sql: 'select * from orders where id = $1 and status = $2', + params: [1, 'paid'], + }); + expect( + dialect.prepareQuery('select :Client_Name_10, :Client_Name_1', { + Client_Name_1: 'short', + Client_Name_10: 'long', + }), + ).toEqual({ + sql: 'select $2, $1', + params: ['short', 'long'], + }); + }); +}); diff --git a/packages/connector-postgres/src/dialect.ts b/packages/connector-postgres/src/dialect.ts new file mode 100644 index 00000000..4b9cdba8 --- /dev/null +++ b/packages/connector-postgres/src/dialect.ts @@ -0,0 +1,213 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type PostgresTableNameRef = Pick & Partial>; + +export class KloPostgresDialect { + readonly type = 'postgresql'; + + private readonly typeMappings: Record = { + timestamp: 'time', + 'timestamp without time zone': 'time', + 'timestamp with time zone': 'time', + timestamptz: 'time', + datetime: 'time', + date: 'time', + time: 'time', + integer: 'number', + int: 'number', + int2: 'number', + int4: 'number', + int8: 'number', + bigint: 'number', + smallint: 'number', + decimal: 'number', + numeric: 'number', + float: 'number', + float4: 'number', + float8: 'number', + 'double precision': 'number', + real: 'number', + money: 'number', + text: 'string', + varchar: 'string', + 'character varying': 'string', + char: 'string', + character: 'string', + uuid: 'string', + json: 'string', + jsonb: 'string', + boolean: 'boolean', + bool: 'boolean', + }; + + quoteIdentifier(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"`; + } + + formatTableName(table: PostgresTableNameRef): string { + return table.db + ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` + : this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + return nativeType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + const lower = nativeType.toLowerCase().trim(); + const normalized = lower.includes('(') ? lower.split('(')[0]!.trim() : lower; + if (this.typeMappings[normalized]) { + return this.typeMappings[normalized]; + } + if (normalized.includes('time') || normalized.includes('date')) { + return 'time'; + } + if ( + normalized.includes('int') || + normalized.includes('num') || + normalized.includes('dec') || + normalized.includes('float') || + normalized.includes('double') + ) { + return 'number'; + } + if (normalized.includes('bool')) { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const quotedColumn = this.quoteIdentifier(columnName); + return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS TEXT)) != '' LIMIT ${limit}`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: unknown[] } { + if (!params) { + return { sql, params: undefined }; + } + const paramNames = Object.keys(params); + const values: unknown[] = new Array(paramNames.length); + const paramIndexMap = new Map(); + paramNames.forEach((name, index) => { + paramIndexMap.set(name, index + 1); + values[index] = params[name]; + }); + const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length); + let parameterizedQuery = sql; + for (const name of sortedKeys) { + parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`); + } + return { sql: parameterizedQuery, params: values }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `RANDOM() < ${samplePct}`; + } + + getTableSampleClause(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `TABLESAMPLE SYSTEM (${samplePct * 100})`; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + } + + getNullCountExpression(column: string): string { + return `COUNT(*) FILTER (WHERE ${column} IS NULL)`; + } + + getDistinctCountExpression(column: string): string { + return `COUNT(DISTINCT ${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + LIMIT ${sampleSize} + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + return ` + SELECT DISTINCT ${columnName}::text AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY val + LIMIT ${limit} + `; + } + + generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null { + return ` + SELECT + s.attname AS column_name, + CASE + WHEN s.n_distinct > 0 THEN s.n_distinct::bigint + WHEN s.n_distinct < 0 THEN (-s.n_distinct * c.reltuples)::bigint + ELSE NULL + END AS estimated_cardinality + FROM pg_stats s + JOIN pg_class c ON c.relname = s.tablename + JOIN pg_namespace n ON c.relnamespace = n.oid AND n.nspname = s.schemaname + WHERE s.schemaname = '${schemaName.replace(/'/g, "''")}' + AND s.tablename = '${tableName.replace(/'/g, "''")}' + AND s.n_distinct IS NOT NULL + `; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY RANDOM() + LIMIT ${sampleSize} + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + timezone?: string, + ): string { + const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column; + return `DATE_TRUNC('${granularity}', ${col})`; + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { + const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column; + const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'"; + return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`; + } + + parseIntervalToSql(interval: string): string { + return `INTERVAL '${interval.replace(/'/g, "''")}'`; + } +} diff --git a/packages/connector-postgres/src/historic-sql-query-client.test.ts b/packages/connector-postgres/src/historic-sql-query-client.test.ts new file mode 100644 index 00000000..2423f52b --- /dev/null +++ b/packages/connector-postgres/src/historic-sql-query-client.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it, vi } from 'vitest'; +import { KloPostgresHistoricSqlQueryClient } from './historic-sql-query-client.js'; +import type { KloPostgresPoolConfig, KloPostgresPoolFactory } from './connector.js'; + +describe('KloPostgresHistoricSqlQueryClient', () => { + it('executes parameterized read-only SQL through the native Postgres connector pool', async () => { + const queryCalls: Array<{ sql: string; params?: unknown[] }> = []; + const release = vi.fn(); + const end = vi.fn(async () => {}); + const poolFactory: KloPostgresPoolFactory = { + createPool(_config: KloPostgresPoolConfig) { + return { + async connect() { + return { + async query(sql: string, params?: unknown[]) { + queryCalls.push({ sql, params }); + return { + fields: [{ name: 'answer', dataTypeID: 23 }], + rows: [{ answer: 42 }], + }; + }, + release, + }; + }, + end, + }; + }, + }; + const client = new KloPostgresHistoricSqlQueryClient({ + connectionId: 'warehouse', + connection: { + driver: 'postgres', + readonly: true, + url: 'postgresql://readonly:secret@pg.example.test/warehouse', // pragma: allowlist secret + }, + poolFactory, + }); + + await expect(client.executeQuery('SELECT $1::int AS answer', [42])).resolves.toEqual({ + headers: ['answer'], + rows: [[42]], + totalRows: 1, + }); + expect(queryCalls).toEqual([{ sql: 'SELECT $1::int AS answer', params: [42] }]); + + await client.cleanup(); + expect(release).toHaveBeenCalledTimes(1); + expect(end).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/connector-postgres/src/historic-sql-query-client.ts b/packages/connector-postgres/src/historic-sql-query-client.ts new file mode 100644 index 00000000..363dc6a2 --- /dev/null +++ b/packages/connector-postgres/src/historic-sql-query-client.ts @@ -0,0 +1,37 @@ +import type { KloPostgresQueryClient } from '@klo/context/ingest'; +import { KloPostgresScanConnector, type KloPostgresScanConnectorOptions } from './connector.js'; + +export type KloPostgresHistoricSqlQueryClientOptions = KloPostgresScanConnectorOptions; + +export class KloPostgresHistoricSqlQueryClient implements KloPostgresQueryClient { + private readonly connectionId: string; + private readonly connector: KloPostgresScanConnector; + + constructor(options: KloPostgresHistoricSqlQueryClientOptions) { + this.connectionId = options.connectionId; + this.connector = new KloPostgresScanConnector(options); + } + + async executeQuery( + sql: string, + params?: unknown[], + ): Promise<{ headers: string[]; rows: unknown[][]; totalRows: number }> { + const result = await this.connector.executeReadOnly( + { + connectionId: this.connectionId, + sql, + params, + }, + {} as never, + ); + return { + headers: result.headers, + rows: result.rows, + totalRows: result.totalRows, + }; + } + + async cleanup(): Promise { + await this.connector.cleanup(); + } +} diff --git a/packages/connector-postgres/src/index.ts b/packages/connector-postgres/src/index.ts new file mode 100644 index 00000000..e90b0d96 --- /dev/null +++ b/packages/connector-postgres/src/index.ts @@ -0,0 +1,21 @@ +export { KloPostgresDialect } from './dialect.js'; +export { + isKloPostgresConnectionConfig, + KloPostgresScanConnector, + postgresPoolConfigFromConfig, + type KloPostgresColumnDistinctValuesOptions, + type KloPostgresColumnDistinctValuesResult, + type KloPostgresColumnStatisticsResult, + type KloPostgresConnectionConfig, + type KloPostgresEndpointResolver, + type KloPostgresPoolConfig, + type KloPostgresPoolFactory, + type KloPostgresReadOnlyQueryInput, + type KloPostgresScanConnectorOptions, + type KloPostgresTableSampleResult, +} from './connector.js'; +export { + KloPostgresHistoricSqlQueryClient, + type KloPostgresHistoricSqlQueryClientOptions, +} from './historic-sql-query-client.js'; +export { createPostgresLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-postgres/src/live-database-introspection.ts b/packages/connector-postgres/src/live-database-introspection.ts new file mode 100644 index 00000000..21da327a --- /dev/null +++ b/packages/connector-postgres/src/live-database-introspection.ts @@ -0,0 +1,37 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { + KloPostgresScanConnector, + type KloPostgresConnectionConfig, + type KloPostgresEndpointResolver, + type KloPostgresPoolFactory, +} from './connector.js'; + +interface CreatePostgresLiveDatabaseIntrospectionOptions { + connections: Record; + poolFactory?: KloPostgresPoolFactory; + endpointResolver?: KloPostgresEndpointResolver; + now?: () => Date; +} + +export function createPostgresLiveDatabaseIntrospection( + options: CreatePostgresLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloPostgresConnectionConfig | undefined; + const connector = new KloPostgresScanConnector({ + connectionId, + connection, + poolFactory: options.poolFactory, + endpointResolver: options.endpointResolver, + now: options.now, + }); + try { + return await connector.introspect({ connectionId, driver: 'postgres' }, { runId: `postgres-${connectionId}` }); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-postgres/src/package-exports.test.ts b/packages/connector-postgres/src/package-exports.test.ts new file mode 100644 index 00000000..66e86944 --- /dev/null +++ b/packages/connector-postgres/src/package-exports.test.ts @@ -0,0 +1,13 @@ +import { describe, expect, it } from 'vitest'; + +describe('@klo/connector-postgres package exports', () => { + it('exports the connector, dialect, and live-database adapter', async () => { + const connector = await import('./index.js'); + expect(connector.KloPostgresDialect).toBeTypeOf('function'); + expect(connector.KloPostgresScanConnector).toBeTypeOf('function'); + expect(connector.KloPostgresHistoricSqlQueryClient).toBeTypeOf('function'); + expect(connector.createPostgresLiveDatabaseIntrospection).toBeTypeOf('function'); + expect(connector.isKloPostgresConnectionConfig).toBeTypeOf('function'); + expect(connector.postgresPoolConfigFromConfig).toBeTypeOf('function'); + }); +}); diff --git a/packages/connector-postgres/tsconfig.json b/packages/connector-postgres/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-postgres/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/connector-posthog/package.json b/packages/connector-posthog/package.json new file mode 100644 index 00000000..39c1e3c3 --- /dev/null +++ b/packages/connector-posthog/package.json @@ -0,0 +1,46 @@ +{ + "name": "@klo/connector-posthog", + "version": "0.0.0-private", + "description": "PostHog connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@klo/context": "workspace:*" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-posthog" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-posthog/src/connector.test.ts b/packages/connector-posthog/src/connector.test.ts new file mode 100644 index 00000000..d56a7925 --- /dev/null +++ b/packages/connector-posthog/src/connector.test.ts @@ -0,0 +1,400 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + createPostHogLiveDatabaseIntrospection, + isKloPostHogConnectionConfig, + KloPostHogScanConnector, + postHogConnectionConfigFromConfig, + type KloPostHogConnectionConfig, + type KloPostHogFetch, +} from './index.js'; + +function jsonResponse(body: unknown, status = 200): Response { + return { + ok: status >= 200 && status < 300, + status, + json: async () => body, + text: async () => JSON.stringify(body), + } as Response; +} + +function fakeFetch(queries: string[] = []): KloPostHogFetch { + return vi.fn(async (_url: string, init?: RequestInit) => { + const body = JSON.parse(String(init?.body ?? '{}')) as { query?: { kind?: string; query?: string } }; + const sql = body.query?.query ?? ''; + if (sql) { + queries.push(sql); + } + if (body.query?.kind === 'DatabaseSchemaQuery') { + return jsonResponse({ + tables: { + events: { + id: 'events', + name: 'events', + type: 'posthog', + row_count: 42, + fields: { + uuid: { + name: 'uuid', + type: 'uuid', + hogql_value: 'uuid', + schema_valid: true, + table: 'events', + fields: null, + chain: null, + id: 'uuid', + }, + event: { + name: 'event', + type: 'string', + hogql_value: 'event', + schema_valid: true, + table: 'events', + fields: null, + chain: null, + id: 'event', + }, + timestamp: { + name: 'timestamp', + type: 'datetime', + hogql_value: 'timestamp', + schema_valid: true, + table: 'events', + fields: null, + chain: null, + id: 'timestamp', + }, + properties: { + name: 'properties', + type: 'json', + hogql_value: 'properties', + schema_valid: true, + table: 'events', + fields: null, + chain: null, + id: 'properties', + }, + virtual: { + name: 'virtual', + type: 'virtual_table', + hogql_value: 'virtual', + schema_valid: true, + table: null, + fields: null, + chain: null, + id: 'virtual', + }, + }, + }, + query_log: { + id: 'query_log', + name: 'query_log', + type: 'posthog', + row_count: 1, + fields: {}, + }, + }, + joins: [], + }); + } + if (sql.includes('SELECT * FROM person_distinct_ids LIMIT 0')) { + return jsonResponse({ + results: [], + columns: ['distinct_id', 'person_id'], + types: [ + ['distinct_id', 'String'], + ['person_id', 'UUID'], + ], + error: null, + hogql: sql, + }); + } + if (sql.includes('LIMIT 0')) { + return jsonResponse({ results: null, columns: null, types: null, error: 'Table not found', hogql: sql }); + } + if (sql.includes('SELECT 1 AS test')) { + return jsonResponse({ results: [[1]], columns: ['test'], types: [['test', 'Int64']], error: null, hogql: sql }); + } + if (sql.includes('count() AS cnt')) { + return jsonResponse({ results: [[42]], columns: ['cnt'], types: [['cnt', 'Int64']], error: null, hogql: sql }); + } + if (sql.includes('GROUP BY event')) { + return jsonResponse({ + results: [['$pageview', 9]], + columns: ['event', 'cnt'], + types: [ + ['event', 'String'], + ['cnt', 'Int64'], + ], + error: null, + hogql: sql, + }); + } + if (sql.includes('arrayJoin(JSONExtractKeys')) { + return jsonResponse({ + results: [['$browser', 7]], + columns: ['key', 'cnt'], + types: [ + ['key', 'String'], + ['cnt', 'Int64'], + ], + error: null, + hogql: sql, + }); + } + if (sql.includes('uniq(JSONExtractString') || sql.includes('uniq(val) AS cardinality')) { + return jsonResponse({ + results: [[2]], + columns: ['cardinality'], + types: [['cardinality', 'Int64']], + error: null, + hogql: sql, + }); + } + if (sql.includes('DISTINCT JSONExtractString') || sql.includes('SELECT DISTINCT toString(')) { + return jsonResponse({ + results: [['Chrome'], ['Safari']], + columns: ['value'], + types: [['value', 'String']], + error: null, + hogql: sql, + }); + } + return jsonResponse({ results: [['$pageview']], columns: ['event'], types: [['event', 'String']], error: null, hogql: sql }); + }) as KloPostHogFetch; +} + +const posthogApiKeyEnv = ['POSTHOG', 'API', 'KEY'].join('_'); +const fixtureToken = ['phx', 'fixture'].join('_'); +const env = { [posthogApiKeyEnv]: fixtureToken }; +const connection: KloPostHogConnectionConfig & { driver: string } = { + driver: 'posthog', + ['api_' + 'key']: `env:${posthogApiKeyEnv}`, + project_id: '157881', + region: 'us', + readonly: true, +}; + +describe('KloPostHogScanConnector', () => { + it('resolves configuration safely', () => { + expect(isKloPostHogConnectionConfig(connection)).toBe(true); + expect(isKloPostHogConnectionConfig({ driver: 'mysql' })).toBe(false); + const resolved = postHogConnectionConfigFromConfig({ + connectionId: 'product', + connection, + env, + }); + expect(resolved).toMatchObject({ projectId: '157881', baseUrl: 'https://us.posthog.com' }); + const tokenField = ['api', 'Key'].join('') as keyof typeof resolved; + expect(resolved[tokenField]).toBe(fixtureToken); + expect(() => + postHogConnectionConfigFromConfig({ + connectionId: 'product', + connection: { ...connection, readonly: false }, + }), + ).toThrow('Native PostHog connector requires connections.product.readonly: true'); + }); + + it('introspects schema metadata, hidden tables, descriptions, primary keys, and normalized types', async () => { + const connector = new KloPostHogScanConnector({ + connectionId: 'product', + connection, + env, + fetch: fakeFetch(), + sleep: async () => {}, + now: () => new Date('2026-04-29T19:00:00.000Z'), + }); + + const snapshot = await connector.introspect({ connectionId: 'product', driver: 'posthog' }, { runId: 'scan-run-1' }); + + expect(snapshot).toMatchObject({ + connectionId: 'product', + driver: 'posthog', + extractedAt: '2026-04-29T19:00:00.000Z', + scope: { catalogs: ['157881'] }, + metadata: { + project_id: '157881', + table_count: 2, + total_columns: 6, + }, + }); + expect(snapshot.tables.map((table) => table.name)).toEqual(['events', 'person_distinct_ids']); + expect(snapshot.tables[0]).toMatchObject({ + catalog: '157881', + db: null, + name: 'events', + kind: 'event_stream', + estimatedRows: 42, + comment: expect.stringContaining('PostHog event stream'), + foreignKeys: [], + }); + expect(snapshot.tables[0]?.columns).toEqual([ + { + name: 'uuid', + nativeType: 'UUID', + normalizedType: 'UUID', + dimensionType: 'string', + nullable: false, + primaryKey: true, + comment: 'Unique identifier for this specific event.', + }, + { + name: 'event', + nativeType: 'String', + normalizedType: 'VARCHAR', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: expect.stringContaining('Event name'), + }, + { + name: 'timestamp', + nativeType: 'DateTime64', + normalizedType: 'TIMESTAMP', + dimensionType: 'time', + nullable: false, + primaryKey: false, + comment: expect.stringContaining('UTC timestamp'), + }, + { + name: 'properties', + nativeType: 'JSON', + normalizedType: 'JSON', + dimensionType: 'string', + nullable: true, + primaryKey: false, + comment: expect.stringContaining('JSON object'), + }, + ]); + }); + + it('runs samples, read-only SQL, event-stream discovery, row counts, and cleanup', async () => { + const queries: string[] = []; + const connector = new KloPostHogScanConnector({ + connectionId: 'product', + connection, + env, + fetch: fakeFetch(queries), + sleep: async () => {}, + }); + + await expect(connector.testConnection()).resolves.toEqual({ success: true }); + await expect( + connector.sampleTable( + { + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + columns: ['event'], + limit: 1, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1 }); + await expect( + connector.sampleColumn( + { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event', limit: 5 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ values: ['$pageview'], nullCount: null, distinctCount: null }); + await expect( + connector.executeReadOnly({ connectionId: 'product', sql: 'select event from events', maxRows: 1 }, { runId: 'scan-run-1' }), + ).resolves.toMatchObject({ headers: ['event'], rows: [['$pageview']], totalRows: 1, rowCount: 1 }); + await expect( + connector.executeReadOnly({ connectionId: 'product', sql: 'delete from events' }, { runId: 'scan-run-1' }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + await expect(connector.getTableRowCount('events')).resolves.toBe(42); + await expect( + connector.getColumnDistinctValues({ catalog: '157881', db: null, name: 'events' }, 'properties.$browser', { + maxCardinality: 5, + limit: 10, + sampleSize: 100, + }), + ).resolves.toEqual({ values: ['Chrome', 'Safari'], cardinality: 2 }); + await expect( + connector.eventStreamDiscovery.listEventTypes( + { + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + eventColumn: 'event', + limit: 10, + minCount: 30, + lookbackDays: 14, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual([{ value: '$pageview', count: 9 }]); + expect(queries.some((query) => query.includes('HAVING cnt >= 30'))).toBe(true); + expect(queries.some((query) => query.includes('INTERVAL 14 DAY'))).toBe(true); + + await expect( + connector.eventStreamDiscovery.listPropertyKeys( + { + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + jsonColumn: 'properties', + sampleSize: 1000, + limit: 10, + lookbackDays: 7, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual([{ key: '$browser', count: 7 }]); + + await expect( + connector.eventStreamDiscovery.listPropertyValues( + { + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + jsonColumn: 'properties', + propertyKey: '$browser', + limit: 10, + maxCardinality: 1000, + lookbackDays: 30, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ + values: ['Chrome', 'Safari'], + cardinality: 2, + }); + await expect( + connector.columnStats( + { connectionId: 'product', table: { catalog: '157881', db: null, name: 'events' }, column: 'event' }, + { runId: 'scan-run-1' }, + ), + ).resolves.toBeNull(); + await connector.cleanup(); + }); + + it('adapts native snapshots to live-database introspection snapshots', async () => { + const introspection = createPostHogLiveDatabaseIntrospection({ + connections: { product: connection }, + env, + fetch: fakeFetch(), + sleep: async () => {}, + now: () => new Date('2026-04-29T19:00:00.000Z'), + }); + + await expect(introspection.extractSchema('product')).resolves.toMatchObject({ + connectionId: 'product', + metadata: { project_id: '157881' }, + tables: expect.arrayContaining([ + expect.objectContaining({ + catalog: '157881', + db: null, + name: 'events', + columns: expect.arrayContaining([ + { + name: 'uuid', + nativeType: 'UUID', + normalizedType: 'UUID', + dimensionType: 'string', + nullable: false, + primaryKey: true, + comment: 'Unique identifier for this specific event.', + }, + ]), + }), + ]), + }); + }); +}); diff --git a/packages/connector-posthog/src/connector.ts b/packages/connector-posthog/src/connector.ts new file mode 100644 index 00000000..06a0cad3 --- /dev/null +++ b/packages/connector-posthog/src/connector.ts @@ -0,0 +1,609 @@ +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloEventPropertyDiscovery, + type KloEventPropertyDiscoveryInput, + type KloEventPropertyValuesInput, + type KloEventPropertyValuesResult, + type KloEventStreamDiscoveryPort, + type KloEventTypeDiscovery, + type KloEventTypeDiscoveryInput, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaColumn, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import { KloPostHogDialect, type KloPostHogSampleColumnInfo } from './dialect.js'; +import { getKloPostHogColumnDescription, getKloPostHogTableDescription } from './schema-descriptions.js'; + +export interface KloPostHogConnectionConfig { + driver?: string; + api_key?: string; + apiKey?: string; + project_id?: string; + projectId?: string; + region?: 'us' | 'eu'; + host?: string; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloPostHogResolvedConnectionConfig { + apiKey: string; + projectId: string; + baseUrl: string; +} + +export type KloPostHogFetch = (url: string, init?: RequestInit) => Promise; + +export interface KloPostHogScanConnectorOptions { + connectionId: string; + connection: KloPostHogConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; + fetch?: KloPostHogFetch; + sleep?: (ms: number) => Promise; + now?: () => Date; +} + +export interface KloPostHogReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record; +} + +export interface KloPostHogColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloPostHogColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +interface PostHogSchemaField { + name: string; + type: string; + hogql_value: string; + schema_valid: boolean; + table: string | null; + fields: string[] | null; + chain: string[] | null; + id: string | null; +} + +interface PostHogSchemaTable { + id: string; + name: string; + type: string; + row_count: number | null; + fields: Record; +} + +interface PostHogSchemaResponse { + tables: Record; + joins: unknown[]; +} + +interface PostHogQueryResponse { + results: unknown[][] | null; + columns: string[] | null; + types: [string, string][] | null; + error: string | null; + hogql: string | null; +} + +const allowedTableTypes = new Set(['posthog', 'system']); +const excludedTables = new Set([ + 'query_log', + 'system.teams', + 'system.exports', + 'system.ingestion_warnings', + 'system.insight_variables', + 'system.data_warehouse_sources', + 'system.groups', + 'system.group_type_mappings', +]); +const hiddenTablesToProbe = ['person_distinct_ids', 'cohort_people', 'static_cohort_people']; + +export function isKloPostHogConnectionConfig(connection: KloPostHogConnectionConfig | undefined): boolean { + return String(connection?.driver ?? '').toLowerCase() === 'posthog'; +} + +function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + return env[value.slice('env:'.length)] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function stringConfigValue( + connection: KloPostHogConnectionConfig | undefined, + key: keyof KloPostHogConnectionConfig, + env: NodeJS.ProcessEnv, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; +} + +export function postHogConnectionConfigFromConfig(input: { + connectionId: string; + connection: KloPostHogConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; +}): KloPostHogResolvedConnectionConfig { + if (!isKloPostHogConnectionConfig(input.connection)) { + throw new Error(`Native PostHog connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native PostHog connector requires connections.${input.connectionId}.readonly: true`); + } + const env = input.env ?? process.env; + const apiKey = stringConfigValue(input.connection, 'api_key', env) ?? stringConfigValue(input.connection, 'apiKey', env); + const projectId = + stringConfigValue(input.connection, 'project_id', env) ?? stringConfigValue(input.connection, 'projectId', env); + if (!apiKey) { + throw new Error(`Native PostHog connector requires connections.${input.connectionId}.api_key`); + } + if (!projectId) { + throw new Error(`Native PostHog connector requires connections.${input.connectionId}.project_id`); + } + const host = stringConfigValue(input.connection, 'host', env); + const region = input.connection?.region ?? 'us'; + return { + apiKey, + projectId, + baseUrl: host ? host.replace(/\/$/, '') : region === 'eu' ? 'https://eu.posthog.com' : 'https://us.posthog.com', + }; +} + +export class KloPostHogScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'posthog' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: true, + eventStreamDiscovery: true, + formalForeignKeys: false, + estimatedRowCounts: true, + }); + + readonly eventStreamDiscovery: KloEventStreamDiscoveryPort = { + listEventTypes: (input, ctx) => this.listEventTypes(input, ctx), + listPropertyKeys: (input, ctx) => this.listPropertyKeys(input, ctx), + listPropertyValues: (input, ctx) => this.listPropertyValues(input, ctx), + }; + + private readonly connectionId: string; + private readonly resolved: KloPostHogResolvedConnectionConfig; + private readonly fetchImpl: KloPostHogFetch; + private readonly sleep: (ms: number) => Promise; + private readonly now: () => Date; + private readonly dialect = new KloPostHogDialect(); + + constructor(options: KloPostHogScanConnectorOptions) { + this.connectionId = options.connectionId; + this.resolved = postHogConnectionConfigFromConfig({ + connectionId: options.connectionId, + connection: options.connection, + env: options.env, + }); + this.fetchImpl = options.fetch ?? fetch; + this.sleep = options.sleep ?? ((ms) => new Promise((resolveSleep) => setTimeout(resolveSleep, ms))); + this.now = options.now ?? (() => new Date()); + this.id = `posthog:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + const response = await this.query('SELECT 1 AS test'); + return response.error ? { success: false, error: response.error } : { success: true }; + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const response = await this.makeRequest('/query', { query: { kind: 'DatabaseSchemaQuery' } }); + const tables: KloSchemaTable[] = []; + for (const [tableName, tableInfo] of Object.entries(response.tables ?? {})) { + if (!allowedTableTypes.has(tableInfo.type) || excludedTables.has(tableName)) { + continue; + } + tables.push(this.toSchemaTable(tableName, tableInfo)); + } + tables.push(...(await this.discoverHiddenTables())); + tables.sort((left, right) => left.name.localeCompare(right.name)); + return { + connectionId: this.connectionId, + driver: 'posthog', + extractedAt: this.now().toISOString(), + scope: { catalogs: [this.resolved.projectId] }, + metadata: { + project_id: this.resolved.projectId, + table_count: tables.length, + total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables, + }; + } + + async sampleTable( + input: KloTableSampleInput & { columnMetadata?: KloPostHogSampleColumnInfo[] }, + _ctx: KloScanContext, + ): Promise { + this.assertConnection(input.connectionId); + const sql = input.columnMetadata + ? this.dialect.generateSampleQueryWithMetadata(this.qTableName(input.table), input.limit, input.columnMetadata) + : this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns); + const result = await this.query(sql); + return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); + return { values, nullCount: null, distinctCount: null }; + } + + async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + return null; + } + + async executeReadOnly(input: KloPostHogReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); + const prepared = this.dialect.prepareQuery(limitedSql, input.params); + const result = await this.query(prepared.sql, prepared.params); + return { ...result, rowCount: result.rows.length }; + } + + async getTableRowCount(tableName: string): Promise { + const result = await this.query(`SELECT count() AS cnt FROM ${this.dialect.quoteIdentifier(tableName)}`); + return Number(result.rows[0]?.[0] ?? 0); + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloPostHogColumnDistinctValuesOptions, + ): Promise { + const sampleSize = options.sampleSize ?? 10000; + const tableName = this.qTableName(table); + const cardinalityResult = await this.query( + this.dialect.generateCardinalitySampleQuery(tableName, columnName, sampleSize), + ); + if (cardinalityResult.error || cardinalityResult.rows.length === 0) { + return null; + } + const cardinality = Number(cardinalityResult.rows[0]?.[0]); + if (!Number.isFinite(cardinality)) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, columnName, options.limit)); + if (valuesResult.error) { + return null; + } + return { + values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])), + cardinality, + }; + } + + private async listEventTypes( + input: KloEventTypeDiscoveryInput, + _ctx: KloScanContext, + ): Promise { + this.assertConnection(input.connectionId); + const limit = this.positiveInteger(input.limit, 'limit'); + const lookbackDays = this.positiveInteger(input.lookbackDays ?? 30, 'lookbackDays'); + const minCount = this.positiveInteger(input.minCount ?? 0, 'minCount'); + const eventColumn = this.dialect.quoteIdentifier(input.eventColumn); + const tableName = this.qTableName(input.table); + const havingClause = minCount > 0 ? `HAVING cnt >= ${minCount}` : ''; + const result = await this.query(` + SELECT ${eventColumn} AS event, count() as cnt + FROM ${tableName} + WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY + GROUP BY event + ${havingClause} + ORDER BY cnt DESC + LIMIT ${limit} + `); + if (result.error) { + return []; + } + return result.rows + .filter((row) => row[0] != null && String(row[0]).trim() !== '') + .map((row) => ({ value: String(row[0]), count: Number(row[1]) })); + } + + private async listPropertyKeys( + input: KloEventPropertyDiscoveryInput, + _ctx: KloScanContext, + ): Promise { + this.assertConnection(input.connectionId); + const sampleSize = this.positiveInteger(input.sampleSize, 'sampleSize'); + const limit = this.positiveInteger(input.limit, 'limit'); + const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); + const tableName = this.qTableName(input.table); + const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); + const whereClause = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; + const result = await this.query(` + SELECT key, count() as cnt + FROM ( + SELECT arrayJoin(JSONExtractKeys(${jsonColumn})) AS key + FROM ${tableName} + ${whereClause} + LIMIT ${sampleSize} + ) + GROUP BY key + ORDER BY cnt DESC + LIMIT ${limit} + `); + if (result.error) { + return []; + } + return result.rows.map((row) => ({ key: String(row[0]), count: Number(row[1]) })); + } + + private async listPropertyValues( + input: KloEventPropertyValuesInput, + _ctx: KloScanContext, + ): Promise { + this.assertConnection(input.connectionId); + const limit = this.positiveInteger(input.limit, 'limit'); + const maxCardinality = this.positiveInteger(input.maxCardinality ?? 1000, 'maxCardinality'); + const lookbackDays = input.lookbackDays === undefined ? null : this.positiveInteger(input.lookbackDays, 'lookbackDays'); + const tableName = this.qTableName(input.table); + const jsonColumn = this.dialect.quoteIdentifier(input.jsonColumn); + const escapedKey = this.escapeHogQLString(input.propertyKey); + const timeFilter = lookbackDays === null ? '' : `WHERE timestamp > now() - INTERVAL ${lookbackDays} DAY`; + const cardinalityResult = await this.query(` + SELECT uniq(JSONExtractString(${jsonColumn}, '${escapedKey}')) as cardinality + FROM ${tableName} + ${timeFilter} + LIMIT 1000000 + `); + if (cardinalityResult.error || cardinalityResult.rows.length === 0) { + return null; + } + const cardinality = Number(cardinalityResult.rows[0]?.[0]); + if (!Number.isFinite(cardinality) || cardinality > maxCardinality) { + return null; + } + const valuesResult = await this.query(` + SELECT DISTINCT JSONExtractString(${jsonColumn}, '${escapedKey}') as value + FROM ${tableName} + WHERE JSONExtractString(${jsonColumn}, '${escapedKey}') IS NOT NULL + AND JSONExtractString(${jsonColumn}, '${escapedKey}') != '' + ${lookbackDays === null ? '' : `AND timestamp > now() - INTERVAL ${lookbackDays} DAY`} + ORDER BY value + LIMIT ${limit} + `); + if (valuesResult.error) { + return null; + } + const values = valuesResult.rows + .map((row) => (row[0] != null ? String(row[0]) : '')) + .filter((value) => { + const trimmed = value.trim(); + return trimmed !== '' && trimmed !== '[]' && trimmed !== '{}' && trimmed !== 'null'; + }); + return { values, cardinality }; + } + + async cleanup(): Promise {} + + qTableName(table: Pick): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + private toSchemaTable(tableName: string, tableInfo: PostHogSchemaTable): KloSchemaTable { + return { + catalog: this.resolved.projectId, + db: null, + name: tableName, + kind: tableName === 'events' ? 'event_stream' : 'table', + comment: getKloPostHogTableDescription(tableName) ?? null, + estimatedRows: tableInfo.row_count ?? null, + columns: this.extractColumns(tableName, tableInfo.fields), + foreignKeys: [], + }; + } + + private async discoverHiddenTables(): Promise { + const tables: KloSchemaTable[] = []; + for (const tableName of hiddenTablesToProbe) { + const result = await this.query(`SELECT * FROM ${tableName} LIMIT 0`); + if (result.error) { + continue; + } + tables.push({ + catalog: this.resolved.projectId, + db: null, + name: tableName, + kind: 'table', + comment: getKloPostHogTableDescription(tableName) ?? null, + estimatedRows: null, + columns: result.headers.map((header) => ({ + name: header, + nativeType: 'String', + normalizedType: 'VARCHAR', + dimensionType: 'string', + nullable: true, + primaryKey: false, + comment: getKloPostHogColumnDescription(tableName, header) ?? null, + })), + foreignKeys: [], + }); + } + return tables; + } + + private extractColumns(tableName: string, fields: Record): KloSchemaColumn[] { + const columns: KloSchemaColumn[] = []; + for (const [fieldName, fieldInfo] of Object.entries(fields)) { + if ( + fieldInfo.type === 'lazy_table' || + fieldInfo.type === 'virtual_table' || + fieldInfo.type === 'field_traverser' || + fieldInfo.type === 'expression' + ) { + continue; + } + const nativeType = this.normalizeFieldType(fieldInfo.type); + columns.push({ + name: fieldName, + nativeType, + normalizedType: this.dialect.mapDataType(nativeType), + dimensionType: this.dialect.mapToDimensionType(nativeType), + nullable: this.isNullableField(tableName, fieldName, fieldInfo.type), + primaryKey: this.isPrimaryKeyField(tableName, fieldName), + comment: getKloPostHogColumnDescription(tableName, fieldName) ?? null, + }); + } + return columns; + } + + private normalizeFieldType(posthogType: string): string { + const typeMap: Record = { + string: 'String', + integer: 'Int64', + datetime: 'DateTime64', + boolean: 'UInt8', + bool: 'Boolean', + json: 'JSON', + array: 'Array(String)', + uuid: 'UUID', + event: 'String', + }; + return typeMap[posthogType.toLowerCase()] ?? posthogType; + } + + private isNullableField(tableName: string, fieldName: string, fieldType: string): boolean { + if (tableName === 'events' && ['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldName)) { + return false; + } + return !['uuid', 'event', 'timestamp', 'distinct_id'].includes(fieldType.toLowerCase()); + } + + private isPrimaryKeyField(tableName: string, fieldName: string): boolean { + return ( + (tableName === 'events' && fieldName === 'uuid') || + (tableName === 'persons' && fieldName === 'id') || + (tableName === 'sessions' && fieldName === 'session_id') || + (tableName === 'groups' && fieldName === 'key') + ); + } + + private async query(sql: string, params?: Record): Promise { + const response = await this.makeRequest('/query', { + query: { + kind: 'HogQLQuery', + query: sql, + ...(params && Object.keys(params).length > 0 ? { values: params } : {}), + }, + }); + if (response.error) { + return { headers: [], rows: [], totalRows: 0, rowCount: null, error: response.error }; + } + const headers = response.columns ?? []; + const rows = response.results ?? []; + const headerTypes = response.types?.map((type) => type[1]); + return { + headers, + rows, + totalRows: rows.length, + rowCount: rows.length, + ...(headerTypes && headerTypes.length > 0 ? { headerTypes } : {}), + }; + } + + private async makeRequest(endpoint: string, body: Record, maxRetries = 3): Promise { + const url = `${this.resolved.baseUrl}/api/projects/${this.resolved.projectId}${endpoint}`; + let lastError: Error | null = null; + for (let attempt = 0; attempt <= maxRetries; attempt += 1) { + const response = await this.fetchImpl(url, { + method: 'POST', + headers: { + Authorization: `Bearer ${this.resolved.apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(body), + }); + if (response.ok) { + return response.json() as Promise; + } + const errorText = await response.text(); + const errorMessage = this.parseErrorMessage(errorText); + if (response.status === 429 && attempt < maxRetries) { + await this.sleep(this.parseRateLimitWaitTime(errorMessage) * 1000); + continue; + } + lastError = new Error(`PostHog API error (${response.status}): ${errorMessage}`); + } + throw lastError ?? new Error('PostHog API request failed after retries'); + } + + private parseErrorMessage(errorText: string): string { + try { + const errorJson = JSON.parse(errorText) as { detail?: unknown; error?: unknown }; + return String(errorJson.detail ?? errorJson.error ?? errorText); + } catch { + return errorText; + } + } + + private parseRateLimitWaitTime(errorMessage: string): number { + const match = errorMessage.match(/(?:Expected available in|retry after) (\d+) seconds?/i); + return match ? Number.parseInt(match[1] ?? '30', 10) + 2 : 30; + } + + private escapeHogQLString(value: string): string { + return value.replace(/\\/g, '\\\\').replace(/'/g, "''"); + } + + private positiveInteger(value: number, name: string): number { + if (!Number.isInteger(value) || value < 0) { + throw new Error(`PostHog event-stream discovery requires ${name} to be a non-negative integer`); + } + return value; + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.connectionId) { + throw new Error(`PostHog connector ${this.connectionId} cannot scan connection ${connectionId}`); + } + } +} diff --git a/packages/connector-posthog/src/dialect.test.ts b/packages/connector-posthog/src/dialect.test.ts new file mode 100644 index 00000000..0aea3f7a --- /dev/null +++ b/packages/connector-posthog/src/dialect.test.ts @@ -0,0 +1,48 @@ +import { describe, expect, it } from 'vitest'; +import { KloPostHogDialect } from './dialect.js'; + +describe('KloPostHogDialect', () => { + const dialect = new KloPostHogDialect(); + + it('quotes identifiers, formats table names, maps types, and prepares HogQL params', () => { + expect(dialect.quoteIdentifier('weird`name')).toBe('`weird\\`name`'); + expect(dialect.formatTableName({ name: 'events', catalog: '157881', db: null })).toBe('`events`'); + expect(dialect.mapDataType('Nullable(DateTime64(6, UTC))')).toBe('TIMESTAMP'); + expect(dialect.mapDataType('Array(String)')).toBe('JSON'); + expect(dialect.mapToDimensionType('UInt8')).toBe('number'); + expect(dialect.mapToDimensionType('Boolean')).toBe('boolean'); + expect(dialect.prepareQuery('SELECT * FROM events WHERE event = :event', { event: '$pageview' })).toEqual({ + sql: 'SELECT * FROM events WHERE event = {event}', + params: { event: '$pageview' }, + }); + }); + + it('builds sample and virtual-property queries without app dependencies', () => { + expect(dialect.generateSampleQuery('`events`', 5, ['event', 'timestamp'])).toBe( + 'SELECT `event`, `timestamp` FROM `events` ORDER BY rand() LIMIT 5', + ); + expect( + dialect.generateSampleQueryWithMetadata('`events`', 3, [ + { name: 'event', parentColumnId: null }, + { name: 'properties.$browser', parentColumnId: 'properties' }, + ]), + ).toBe( + "SELECT `event`, JSONExtractString(properties, '$browser') AS `properties.$browser` FROM `events` ORDER BY rand() LIMIT 3", + ); + expect(dialect.generateColumnSampleQuery('`events`', 'properties.$browser', 10)).toBe( + "SELECT JSONExtractString(properties, '$browser') FROM `events` WHERE JSONExtractString(properties, '$browser') IS NOT NULL ORDER BY rand() LIMIT 10", + ); + }); + + it('builds data-dictionary and time helper SQL', () => { + expect(dialect.generateCardinalitySampleQuery('events', 'properties.$browser', 100)).toContain( + "JSONExtractString(properties, '$browser') AS val", + ); + expect(dialect.generateDistinctValuesQuery('events', 'event', 20)).toContain('SELECT DISTINCT toString(`event`) AS val'); + expect(dialect.getNullCountExpression('event')).toBe('countIf(event IS NULL)'); + expect(dialect.getDistinctCountExpression('event')).toBe('uniq(event)'); + expect(dialect.getTimeTruncExpression('timestamp', 'week', 'UTC')).toBe("DATE_TRUNC('week', toTimeZone(timestamp, 'UTC'))"); + expect(dialect.parseIntervalToSql('7 day')).toBe('INTERVAL 7 DAY'); + expect(dialect.generateColumnStatisticsQuery('', 'events')).toBeNull(); + }); +}); diff --git a/packages/connector-posthog/src/dialect.ts b/packages/connector-posthog/src/dialect.ts new file mode 100644 index 00000000..af1786e7 --- /dev/null +++ b/packages/connector-posthog/src/dialect.ts @@ -0,0 +1,258 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type PostHogTableNameRef = Pick & Partial>; + +export interface KloPostHogSampleColumnInfo { + name: string; + parentColumnId: string | null; +} + +export class KloPostHogDialect { + readonly type = 'posthog'; + + private readonly typeMappings: Record = { + datetime64: 'time', + datetime: 'time', + date: 'time', + int64: 'number', + int32: 'number', + int16: 'number', + int8: 'number', + uint64: 'number', + uint32: 'number', + uint16: 'number', + uint8: 'number', + float64: 'number', + float32: 'number', + decimal: 'number', + integer: 'number', + string: 'string', + uuid: 'string', + json: 'string', + boolean: 'boolean', + bool: 'boolean', + }; + + quoteIdentifier(identifier: string): string { + return `\`${identifier.replace(/`/g, '\\`')}\``; + } + + formatTableName(table: PostHogTableNameRef): string { + return this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + const cleanType = this.cleanType(nativeType); + const typeMapping: Record = { + STRING: 'VARCHAR', + UUID: 'UUID', + INT64: 'BIGINT', + INT32: 'INTEGER', + INT16: 'SMALLINT', + INT8: 'TINYINT', + UINT64: 'BIGINT', + UINT32: 'INTEGER', + UINT16: 'SMALLINT', + UINT8: 'TINYINT', + FLOAT64: 'DOUBLE', + FLOAT32: 'FLOAT', + DATETIME64: 'TIMESTAMP', + DATETIME: 'TIMESTAMP', + DATE: 'DATE', + JSON: 'JSON', + ARRAY: 'JSON', + BOOLEAN: 'BOOLEAN', + BOOL: 'BOOLEAN', + }; + return typeMapping[cleanType] ?? cleanType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + const cleanType = this.cleanType(nativeType).toLowerCase(); + if (this.typeMappings[cleanType]) { + return this.typeMappings[cleanType]; + } + if (cleanType.includes('date') || cleanType.includes('time')) { + return 'time'; + } + if (cleanType.includes('int') || cleanType.includes('float') || cleanType.includes('decimal') || cleanType.includes('num')) { + return 'number'; + } + if (cleanType === 'bool' || cleanType === 'boolean') { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; + } + + generateSampleQueryWithMetadata(tableName: string, limit: number, columnMetadata?: KloPostHogSampleColumnInfo[]): string { + if (!columnMetadata || columnMetadata.length === 0) { + return this.generateSampleQuery(tableName, limit); + } + const columnList = columnMetadata + .map((column) => { + if (!column.parentColumnId) { + return this.quoteIdentifier(column.name); + } + const expression = this.formatColumnExpression(column.name); + return `${expression} AS ${this.quoteIdentifier(column.name)}`; + }) + .join(', '); + return `SELECT ${columnList} FROM ${tableName} ORDER BY rand() LIMIT ${limit}`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const colExpr = this.formatColumnExpression(columnName); + return `SELECT ${colExpr} FROM ${tableName} WHERE ${colExpr} IS NOT NULL ORDER BY rand() LIMIT ${limit}`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: Record } { + if (!params) { + return { sql, params: undefined }; + } + let processedSql = sql; + const processedParams: Record = {}; + for (const [key, value] of Object.entries(params)) { + processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `{${key}}`); + processedParams[key] = value; + } + return { + sql: processedSql, + params: Object.keys(processedParams).length > 0 ? processedParams : undefined, + }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `rand() < ${samplePct}`; + } + + getTableSampleClause(_samplePct: number): string { + return ''; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + } + + getNullCountExpression(column: string): string { + return `countIf(${column} IS NULL)`; + } + + getDistinctCountExpression(column: string): string { + return `uniq(${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + const colExpr = this.formatColumnExpression(columnName); + return ` + SELECT uniq(val) AS cardinality + FROM ( + SELECT ${colExpr} AS val + FROM ${tableName} + WHERE ${colExpr} IS NOT NULL + LIMIT ${sampleSize} + ) + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + const colExpr = this.formatColumnExpression(columnName); + return ` + SELECT DISTINCT toString(${colExpr}) AS val + FROM ${tableName} + WHERE ${colExpr} IS NOT NULL + ORDER BY val + LIMIT ${limit} + `; + } + + generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { + return null; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + const colExpr = this.formatColumnExpression(columnName); + return ` + SELECT uniq(val) AS cardinality + FROM ( + SELECT ${colExpr} AS val + FROM ${tableName} + WHERE ${colExpr} IS NOT NULL + ORDER BY rand() + LIMIT ${sampleSize} + ) + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + timezone?: string, + ): string { + const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; + return `DATE_TRUNC('${granularity}', ${col})`; + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { + const col = timezone ? `toTimeZone(${column}, '${timezone}')` : column; + const [amount, unit] = interval.split(' '); + const seconds = Number(amount) * this.getUnitSeconds(unit ?? 'day'); + const originExpr = origin ? `toDateTime('${origin}')` : `toDateTime('1970-01-01')`; + return `${originExpr} + toIntervalSecond(intDiv(toUnixTimestamp(${col}) - toUnixTimestamp(${originExpr}), ${seconds}) * ${seconds})`; + } + + parseIntervalToSql(interval: string): string { + const [amount, unit] = interval.split(' '); + return `INTERVAL ${amount} ${unit?.toUpperCase() ?? 'DAY'}`; + } + + private formatColumnExpression(columnName: string): string { + const rawName = columnName.replace(/^`|`$/g, ''); + const propertyMatch = rawName.match(/^(properties|person\.properties)\.(.+)$/); + if (propertyMatch) { + const [, parentCol, propertyKey] = propertyMatch; + return `JSONExtractString(${parentCol}, '${propertyKey.replace(/'/g, "''")}')`; + } + return this.quoteIdentifier(rawName); + } + + private cleanType(nativeType: string): string { + let cleanType = nativeType.toUpperCase().trim(); + const nullableMatch = cleanType.match(/^NULLABLE\((.+)\)$/); + if (nullableMatch) { + cleanType = nullableMatch[1] ?? cleanType; + } + if (cleanType.startsWith('ARRAY(')) { + return 'ARRAY'; + } + if (cleanType.startsWith('DATETIME64')) { + return 'DATETIME64'; + } + return cleanType; + } + + private getUnitSeconds(unit: string): number { + const secondsByUnit: Record = { + second: 1, + minute: 60, + hour: 3600, + day: 86400, + week: 604800, + month: 2592000, + quarter: 7776000, + year: 31536000, + }; + return secondsByUnit[unit.toLowerCase()] ?? 86400; + } +} diff --git a/packages/connector-posthog/src/index.ts b/packages/connector-posthog/src/index.ts new file mode 100644 index 00000000..e6aeed4e --- /dev/null +++ b/packages/connector-posthog/src/index.ts @@ -0,0 +1,19 @@ +export { KloPostHogDialect, type KloPostHogSampleColumnInfo } from './dialect.js'; +export { + getKloPostHogColumnDescription, + getKloPostHogPropertyDescription, + getKloPostHogTableDescription, +} from './schema-descriptions.js'; +export { + isKloPostHogConnectionConfig, + KloPostHogScanConnector, + postHogConnectionConfigFromConfig, + type KloPostHogColumnDistinctValuesOptions, + type KloPostHogColumnDistinctValuesResult, + type KloPostHogConnectionConfig, + type KloPostHogFetch, + type KloPostHogReadOnlyQueryInput, + type KloPostHogResolvedConnectionConfig, + type KloPostHogScanConnectorOptions, +} from './connector.js'; +export { createPostHogLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-posthog/src/live-database-introspection.ts b/packages/connector-posthog/src/live-database-introspection.ts new file mode 100644 index 00000000..6baf0b81 --- /dev/null +++ b/packages/connector-posthog/src/live-database-introspection.ts @@ -0,0 +1,34 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { KloPostHogScanConnector, type KloPostHogConnectionConfig, type KloPostHogFetch } from './connector.js'; + +interface CreatePostHogLiveDatabaseIntrospectionOptions { + connections: Record; + env?: NodeJS.ProcessEnv; + fetch?: KloPostHogFetch; + sleep?: (ms: number) => Promise; + now?: () => Date; +} + +export function createPostHogLiveDatabaseIntrospection( + options: CreatePostHogLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloPostHogConnectionConfig | undefined; + const connector = new KloPostHogScanConnector({ + connectionId, + connection, + env: options.env, + fetch: options.fetch, + sleep: options.sleep, + now: options.now, + }); + try { + return await connector.introspect({ connectionId, driver: 'posthog' }, { runId: `posthog-${connectionId}` }); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-posthog/src/package-exports.test.ts b/packages/connector-posthog/src/package-exports.test.ts new file mode 100644 index 00000000..006dfb25 --- /dev/null +++ b/packages/connector-posthog/src/package-exports.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, it } from 'vitest'; +import * as posthog from './index.js'; + +describe('@klo/connector-posthog package exports', () => { + it('exports the connector, dialect, descriptions, and live-database adapter', () => { + expect(posthog.KloPostHogDialect).toBeTypeOf('function'); + expect(posthog.KloPostHogScanConnector).toBeTypeOf('function'); + expect(posthog.createPostHogLiveDatabaseIntrospection).toBeTypeOf('function'); + expect(posthog.getKloPostHogPropertyDescription('$browser')).toBe('User browser name.'); + }); +}); diff --git a/packages/connector-posthog/src/schema-descriptions.ts b/packages/connector-posthog/src/schema-descriptions.ts new file mode 100644 index 00000000..6a15ad65 --- /dev/null +++ b/packages/connector-posthog/src/schema-descriptions.ts @@ -0,0 +1,99 @@ +const TABLE_DESCRIPTIONS: Record = { + events: + 'PostHog event stream containing all tracked user interactions. Each row represents a single event with properties, timestamp, and user identifier.', + persons: + 'PostHog persons table containing unique users, identifiers, and user properties for segmentation and cohort analysis.', + sessions: + 'PostHog sessions table grouping events into user sessions with duration, entry and exit URLs, and device details.', + groups: + 'PostHog groups table for B2B and team-based analytics. Contains group identifiers and group properties.', + person_distinct_ids: 'PostHog identity resolution table mapping distinct_ids to person_ids.', + cohort_people: 'PostHog dynamic cohort membership table.', + static_cohort_people: 'PostHog static cohort membership table.', + 'system.cohorts': 'PostHog cohort definitions table.', + 'system.feature_flags': 'PostHog feature flag definitions table.', + 'system.experiments': 'PostHog A/B test and experiment definitions table.', + 'system.surveys': 'PostHog survey definitions table.', + 'system.dashboards': 'PostHog dashboard metadata table.', + 'system.insights': 'PostHog saved insight and chart definitions table.', +}; + +const COLUMN_DESCRIPTIONS: Record = { + 'events.uuid': 'Unique identifier for this specific event.', + 'events.event': 'Event name such as $pageview, $autocapture, $identify, or a custom event.', + 'events.distinct_id': 'User identifier that links events to persons.', + 'events.timestamp': 'UTC timestamp when the event occurred.', + 'events.created_at': 'Timestamp when the event was ingested into PostHog.', + 'events.properties': 'JSON object containing event-specific properties.', + 'events.person_id': 'Internal PostHog person UUID.', + 'events.$session_id': 'Session identifier linking this event to sessions.', + 'persons.id': 'Internal PostHog person UUID.', + 'persons.distinct_id': 'Primary user identifier for joins with events.', + 'persons.properties': 'JSON object containing user properties.', + 'persons.created_at': 'Timestamp when this person was first seen in PostHog.', + 'persons.is_identified': 'Whether the person has been explicitly identified.', + 'sessions.session_id': 'Unique session identifier.', + 'sessions.distinct_id': 'User identifier for this session.', + 'sessions.$start_timestamp': 'Timestamp when the session started.', + 'sessions.$end_timestamp': 'Timestamp when the session ended.', + 'sessions.$session_duration': 'Total session duration in seconds.', + 'groups.index': 'Index identifying the configured PostHog group type.', + 'groups.key': 'Unique identifier for this group.', + 'groups.properties': 'JSON object containing group properties.', + 'groups.created_at': 'Timestamp when this group was first seen.', + 'person_distinct_ids.distinct_id': 'Device or browser identifier for a person.', + 'person_distinct_ids.person_id': 'Internal PostHog person UUID mapped to the distinct_id.', + 'cohort_people.person_id': 'Person UUID belonging to the cohort.', + 'cohort_people.cohort_id': 'Cohort identifier.', + 'static_cohort_people.person_id': 'Person UUID belonging to the static cohort.', + 'static_cohort_people.cohort_id': 'Static cohort identifier.', + 'system.cohorts.id': 'Unique cohort identifier.', + 'system.cohorts.name': 'Human-readable cohort name.', + 'system.feature_flags.id': 'Unique feature flag identifier.', + 'system.feature_flags.key': 'Feature flag key used in code.', + 'system.experiments.id': 'Unique experiment identifier.', + 'system.experiments.name': 'Experiment name.', + 'system.surveys.id': 'Unique survey identifier.', + 'system.surveys.name': 'Survey name.', + 'system.dashboards.id': 'Unique dashboard identifier.', + 'system.dashboards.name': 'Dashboard name.', + 'system.insights.id': 'Unique insight identifier.', + 'system.insights.name': 'Insight or chart name.', +}; + +const PROPERTY_DESCRIPTIONS: Record = { + $browser: 'User browser name.', + $browser_version: 'User browser version.', + $os: 'Operating system.', + $os_version: 'Operating system version.', + $device: 'Device name.', + $device_type: 'Device type.', + $current_url: 'Full URL of the current page.', + $pathname: 'Path portion of the current URL.', + $host: 'Hostname of the current page.', + $referrer: 'Referrer URL.', + $referring_domain: 'Referrer domain.', + $utm_source: 'UTM source parameter.', + $utm_medium: 'UTM medium parameter.', + $utm_campaign: 'UTM campaign parameter.', + $utm_content: 'UTM content parameter.', + $utm_term: 'UTM term parameter.', + $lib: 'PostHog library name used to capture the event.', + $lib_version: 'PostHog library version.', + $insert_id: 'Unique identifier for event deduplication.', + $active_feature_flags: 'List of active feature flags for this user or event.', + $feature_flag: 'Feature flag name for flag-related events.', + $feature_flag_response: 'Feature flag value or variant.', +}; + +export function getKloPostHogTableDescription(tableName: string): string | undefined { + return TABLE_DESCRIPTIONS[tableName]; +} + +export function getKloPostHogColumnDescription(tableName: string, columnName: string): string | undefined { + return COLUMN_DESCRIPTIONS[`${tableName}.${columnName}`]; +} + +export function getKloPostHogPropertyDescription(propertyKey: string): string | null { + return PROPERTY_DESCRIPTIONS[propertyKey] ?? null; +} diff --git a/packages/connector-posthog/tsconfig.json b/packages/connector-posthog/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-posthog/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/connector-snowflake/package.json b/packages/connector-snowflake/package.json new file mode 100644 index 00000000..5003d56c --- /dev/null +++ b/packages/connector-snowflake/package.json @@ -0,0 +1,47 @@ +{ + "name": "@klo/connector-snowflake", + "version": "0.0.0-private", + "description": "Snowflake connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@klo/context": "workspace:*", + "snowflake-sdk": "^2.3.4" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-snowflake" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-snowflake/src/connector.test.ts b/packages/connector-snowflake/src/connector.test.ts new file mode 100644 index 00000000..62bd6944 --- /dev/null +++ b/packages/connector-snowflake/src/connector.test.ts @@ -0,0 +1,257 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + createSnowflakeLiveDatabaseIntrospection, + isKloSnowflakeConnectionConfig, + KloSnowflakeScanConnector, + snowflakeConnectionConfigFromConfig, + type KloSnowflakeDriver, + type KloSnowflakeDriverFactory, +} from './index.js'; + +function fakeDriverFactory(): KloSnowflakeDriverFactory { + const driver: KloSnowflakeDriver = { + test: vi.fn(async () => ({ success: true })), + query: vi.fn(async (sql: string) => { + if (sql.includes('TABLE_CONSTRAINTS')) { + return { headers: ['TABLE_NAME', 'COLUMN_NAME'], rows: [['ORDERS', 'ID']], totalRows: 1, rowCount: 1 }; + } + if (sql.includes('SELECT "ID", "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) { + return { + headers: ['ID', 'STATUS'], + headerTypes: ['NUMBER', 'VARCHAR'], + rows: [[1, 'paid']], + totalRows: 1, + rowCount: 1, + }; + } + if (sql.includes('select * from (select ID, STATUS from ORDERS) as klo_query_result limit 1')) { + return { headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 }; + } + if (sql.includes('SELECT "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) { + return { headers: ['STATUS'], rows: [['paid'], ['open']], totalRows: 2, rowCount: 2 }; + } + if (sql.includes('COUNT(DISTINCT val)')) { + return { headers: ['CARDINALITY'], rows: [[2]], totalRows: 1, rowCount: 1 }; + } + if (sql.includes('SELECT DISTINCT "STATUS"::VARCHAR AS val')) { + return { headers: ['VAL'], rows: [['open'], ['paid']], totalRows: 2, rowCount: 2 }; + } + throw new Error(`Unexpected SQL: ${sql}`); + }), + getSchemaMetadata: vi.fn(async () => [ + { + name: 'ORDERS', + catalog: 'ANALYTICS', + db: 'PUBLIC', + rowCount: 12, + comment: 'Orders', + columns: [ + { name: 'ID', type: 'NUMBER(38,0)', nullable: false, comment: 'Primary key' }, + { name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null }, + ], + }, + { + name: 'ORDER_SUMMARY', + catalog: 'ANALYTICS', + db: 'PUBLIC', + rowCount: 3, + comment: null, + columns: [{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null }], + }, + ]), + listSchemas: vi.fn(async () => ['PUBLIC', 'MART']), + cleanup: vi.fn(async () => undefined), + }; + return { createDriver: vi.fn(() => driver) }; +} + +describe('KloSnowflakeScanConnector', () => { + it('resolves Snowflake connection configuration safely', () => { + expect( + isKloSnowflakeConnectionConfig({ + driver: 'snowflake', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + username: 'reader', + readonly: true, + }), + ).toBe(true); + expect(isKloSnowflakeConnectionConfig({ driver: 'bigquery' })).toBe(false); + expect( + snowflakeConnectionConfigFromConfig({ + connectionId: 'warehouse', + connection: { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + readonly: true, + }, + }), + ).toMatchObject({ + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schemas: ['PUBLIC'], + username: 'reader', + authMethod: 'password', + }); + expect(() => + snowflakeConnectionConfigFromConfig({ + connectionId: 'warehouse', + connection: { driver: 'snowflake', account: 'acct', readonly: false }, + }), + ).toThrow('Native Snowflake connector requires connections.warehouse.readonly: true'); + }); + + it('introspects schema, primary keys, comments, row counts, and dimensions', async () => { + const connector = new KloSnowflakeScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + readonly: true, + }, + driverFactory: fakeDriverFactory(), + now: () => new Date('2026-04-29T18:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'snowflake' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'snowflake', + extractedAt: '2026-04-29T18:00:00.000Z', + scope: { catalogs: ['ANALYTICS'], schemas: ['PUBLIC'] }, + metadata: { + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schemas: ['PUBLIC'], + table_count: 2, + total_columns: 3, + }, + }); + expect(snapshot.tables.find((table) => table.name === 'ORDERS')?.columns).toEqual([ + { + name: 'ID', + nativeType: 'NUMBER(38,0)', + normalizedType: 'NUMBER(38,0)', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Primary key', + }, + { + name: 'STATUS', + nativeType: 'VARCHAR', + normalizedType: 'VARCHAR', + dimensionType: 'string', + nullable: true, + primaryKey: false, + comment: null, + }, + ]); + }); + + it('supports read-only query, sampling, distinct values, row counts, schema listing, and cleanup', async () => { + const driverFactory = fakeDriverFactory(); + const connector = new KloSnowflakeScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + readonly: true, + }, + driverFactory, + }); + + await expect( + connector.sampleTable( + { + connectionId: 'warehouse', + table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, + limit: 1, + columns: ['ID', 'STATUS'], + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1 }); + await expect( + connector.executeReadOnly( + { connectionId: 'warehouse', sql: 'select ID, STATUS from ORDERS', maxRows: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], rowCount: 1 }); + await expect( + connector.sampleColumn( + { + connectionId: 'warehouse', + table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, + column: 'STATUS', + limit: 2, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ values: ['paid', 'open'], nullCount: null, distinctCount: null }); + await expect( + connector.getColumnDistinctValues({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, 'STATUS', { + maxCardinality: 10, + limit: 5, + }), + ).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 }); + await expect(connector.getTableRowCount('ORDERS')).resolves.toBe(12); + await expect(connector.listSchemas()).resolves.toEqual(['PUBLIC', 'MART']); + await connector.cleanup(); + const driver = (driverFactory.createDriver as ReturnType).mock.results[0]?.value as KloSnowflakeDriver; + expect(driver.cleanup).toHaveBeenCalledTimes(1); + }); + + it('converts a native snapshot into a live-database introspection snapshot', async () => { + const introspection = createSnowflakeLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + readonly: true, + }, + }, + driverFactory: fakeDriverFactory(), + now: () => new Date('2026-04-29T18:00:00.000Z'), + }); + + await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({ + connectionId: 'warehouse', + metadata: { database: 'ANALYTICS', schemas: ['PUBLIC'] }, + tables: expect.arrayContaining([ + expect.objectContaining({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }), + ]), + }); + }); +}); diff --git a/packages/connector-snowflake/src/connector.ts b/packages/connector-snowflake/src/connector.ts new file mode 100644 index 00000000..95b68ebe --- /dev/null +++ b/packages/connector-snowflake/src/connector.ts @@ -0,0 +1,689 @@ +import { createPrivateKey } from 'node:crypto'; +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaColumn, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import * as snowflake from 'snowflake-sdk'; +import { KloSnowflakeDialect } from './dialect.js'; + +export interface KloSnowflakeConnectionConfig { + driver?: string; + authMethod?: 'password' | 'rsa'; + account?: string; + warehouse?: string; + database?: string; + schema_name?: string; + schema_names?: string[]; + username?: string; + password?: string; + privateKey?: string; + passphrase?: string; + role?: string; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloSnowflakeResolvedConnectionConfig { + authMethod: 'password' | 'rsa'; + account: string; + warehouse: string; + database: string; + schemas: string[]; + username: string; + password?: string; + privateKey?: string; + passphrase?: string; + role?: string; +} + +export interface KloSnowflakeRawColumnMetadata { + name: string; + type: string; + nullable: boolean; + comment: string | null; +} + +export interface KloSnowflakeRawTableMetadata { + name: string; + catalog: string; + db: string; + rowCount: number | null; + comment: string | null; + columns: KloSnowflakeRawColumnMetadata[]; +} + +export interface KloSnowflakeDriver { + test(): Promise<{ success: boolean; error?: string }>; + query(sql: string, params?: unknown): Promise; + getSchemaMetadata(schemaName?: string): Promise; + listSchemas(): Promise; + cleanup(): Promise; +} + +export interface KloSnowflakeDriverFactory { + createDriver(input: { + resolved: KloSnowflakeResolvedConnectionConfig; + sdkOptionsProvider?: KloSnowflakeSdkOptionsProvider; + }): KloSnowflakeDriver; +} + +export interface KloSnowflakeSdkOptionsProvider { + resolve(input: { + account: string; + connection: KloSnowflakeConnectionConfig; + }): Promise<{ sdkOptions: Record; close?: () => Promise } | undefined>; +} + +export interface KloSnowflakeScanConnectorOptions { + connectionId: string; + connection: KloSnowflakeConnectionConfig | undefined; + driverFactory?: KloSnowflakeDriverFactory; + sdkOptionsProvider?: KloSnowflakeSdkOptionsProvider; + env?: NodeJS.ProcessEnv; + now?: () => Date; +} + +export interface KloSnowflakeReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record; +} + +export interface KloSnowflakeColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloSnowflakeColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +const DATE_TYPES = ['DATE', 'TIMESTAMP', 'TIMESTAMP_LTZ', 'TIMESTAMP_NTZ', 'TIMESTAMP_TZ', 'TIME']; + +function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + return env[value.slice('env:'.length)] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function stringConfigValue( + connection: KloSnowflakeConnectionConfig | undefined, + key: keyof KloSnowflakeConnectionConfig, + env: NodeJS.ProcessEnv, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; +} + +function schemaNames(connection: KloSnowflakeConnectionConfig, env: NodeJS.ProcessEnv): string[] { + if (Array.isArray(connection.schema_names) && connection.schema_names.length > 0) { + return connection.schema_names + .filter((schema) => schema.trim().length > 0) + .map((schema) => resolveStringReference(schema, env)); + } + return [stringConfigValue(connection, 'schema_name', env) ?? 'PUBLIC']; +} + +function firstNumber(value: unknown): number | null { + const numberValue = Number(value); + return Number.isFinite(numberValue) ? numberValue : null; +} + +function normalizeSnowflakeValue(value: unknown, columnType?: string): unknown { + if (columnType && DATE_TYPES.some((type) => columnType.toUpperCase().includes(type))) { + if (typeof value === 'number') { + return new Date(value).toISOString(); + } + if (value instanceof Date) { + return value.toISOString(); + } + } + if (typeof value === 'string') { + const trimmed = value.trim(); + if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) { + try { + return JSON.parse(trimmed) as unknown; + } catch { + return value; + } + } + } + return value; +} + +function toSnowflakeBind(value: unknown): snowflake.Bind { + if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') { + return value; + } + if (value instanceof Date) { + return value.toISOString(); + } + return String(value); +} + +function toSnowflakeBinds(params: unknown[] | undefined): snowflake.Binds | undefined { + return params?.map((value) => toSnowflakeBind(value)); +} + +export function isKloSnowflakeConnectionConfig(connection: KloSnowflakeConnectionConfig | undefined): boolean { + return String(connection?.driver ?? '').toLowerCase() === 'snowflake'; +} + +export function snowflakeConnectionConfigFromConfig(input: { + connectionId: string; + connection: KloSnowflakeConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; +}): KloSnowflakeResolvedConnectionConfig { + if (!isKloSnowflakeConnectionConfig(input.connection)) { + throw new Error(`Native Snowflake connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.readonly: true`); + } + const env = input.env ?? process.env; + const authMethod = input.connection?.authMethod ?? 'password'; + const account = stringConfigValue(input.connection, 'account', env); + const warehouse = stringConfigValue(input.connection, 'warehouse', env); + const database = stringConfigValue(input.connection, 'database', env); + const username = stringConfigValue(input.connection, 'username', env); + if (!account) { + throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.account`); + } + if (!warehouse) { + throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.warehouse`); + } + if (!database) { + throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.database`); + } + if (!username) { + throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.username`); + } + const resolved: KloSnowflakeResolvedConnectionConfig = { + authMethod, + account, + warehouse, + database, + schemas: schemaNames(input.connection!, env), + username, + }; + const role = stringConfigValue(input.connection, 'role', env); + if (role) { + resolved.role = role; + } + if (authMethod === 'rsa') { + resolved.privateKey = stringConfigValue(input.connection, 'privateKey', env); + const passphrase = stringConfigValue(input.connection, 'passphrase', env); + if (passphrase) { + resolved.passphrase = passphrase; + } + if (!resolved.privateKey) { + throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.privateKey for RSA auth`); + } + } else { + resolved.password = stringConfigValue(input.connection, 'password', env); + if (!resolved.password) { + throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.password`); + } + } + return resolved; +} + +class DefaultSnowflakeDriverFactory implements KloSnowflakeDriverFactory { + createDriver(input: { + resolved: KloSnowflakeResolvedConnectionConfig; + sdkOptionsProvider?: KloSnowflakeSdkOptionsProvider; + }): KloSnowflakeDriver { + return new SnowflakeSdkDriver(input.resolved, input.sdkOptionsProvider); + } +} + +class SnowflakeSdkDriver implements KloSnowflakeDriver { + private closeSdkOptions: Array<() => Promise> = []; + + constructor( + private readonly resolved: KloSnowflakeResolvedConnectionConfig, + private readonly sdkOptionsProvider?: KloSnowflakeSdkOptionsProvider, + ) {} + + async test(): Promise<{ success: boolean; error?: string }> { + const timeoutMs = 60_000; + return Promise.race([ + this.runTest(), + new Promise<{ success: boolean; error: string }>((resolveTest) => + setTimeout( + () => resolveTest({ success: false, error: `Connection test timed out after ${timeoutMs / 1000}s` }), + timeoutMs, + ), + ), + ]); + } + + async query(sql: string, params?: unknown): Promise { + let connection: snowflake.Connection | null = null; + try { + connection = await this.createConnection(); + const binds = Array.isArray(params) ? toSnowflakeBinds(params) : undefined; + const result = await this.executeSnowflakeQuery(connection, sql, binds); + return { ...result, totalRows: result.rows.length, rowCount: result.rows.length }; + } catch { + return { headers: [], rows: [], totalRows: 0, rowCount: 0 }; + } finally { + if (connection) { + await this.destroyConnection(connection); + } + } + } + + async getSchemaMetadata(schemaName = this.resolved.schemas[0] ?? 'PUBLIC'): Promise { + const tablesResult = await this.query( + ` + SELECT TABLE_NAME, TABLE_TYPE, COMMENT, ROW_COUNT + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ? + ORDER BY TABLE_NAME + `, + [schemaName, this.resolved.database], + ); + const columnsResult = await this.query( + ` + SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COMMENT, ORDINAL_POSITION + FROM INFORMATION_SCHEMA.COLUMNS + WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ? + ORDER BY TABLE_NAME, ORDINAL_POSITION + `, + [schemaName, this.resolved.database], + ); + const columnsByTable = new Map(); + for (const row of columnsResult.rows) { + const tableName = String(row[0]); + const columns = columnsByTable.get(tableName) ?? []; + columns.push({ + name: String(row[1]), + type: String(row[2]), + nullable: row[3] === 'YES', + comment: row[4] ? String(row[4]) : null, + }); + columnsByTable.set(tableName, columns); + } + return tablesResult.rows.map((row) => ({ + name: String(row[0]), + catalog: this.resolved.database, + db: schemaName, + rowCount: firstNumber(row[3]) ?? 0, + comment: row[2] ? String(row[2]) : null, + columns: columnsByTable.get(String(row[0])) ?? [], + })); + } + + async listSchemas(): Promise { + const result = await this.query(`SHOW SCHEMAS IN DATABASE "${this.resolved.database}"`); + return result.rows.map((row) => String(row[1])).filter((name) => name !== 'INFORMATION_SCHEMA'); + } + + async cleanup(): Promise { + const closers = this.closeSdkOptions; + this.closeSdkOptions = []; + await Promise.all(closers.map((close) => close())); + } + + private async runTest(): Promise<{ success: boolean; error?: string }> { + let connection: snowflake.Connection | null = null; + try { + connection = await this.createConnection(); + await this.executeSnowflakeQuery(connection, 'SELECT 1'); + return { success: true }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } finally { + if (connection) { + await this.destroyConnection(connection); + } + } + } + + private async createConnection(): Promise { + const patch = await this.sdkOptionsProvider?.resolve({ + account: this.resolved.account, + connection: { ...this.resolved, driver: 'snowflake', readonly: true }, + }); + if (patch?.close) { + this.closeSdkOptions.push(patch.close); + } + const baseConfig: snowflake.ConnectionOptions = { + account: this.resolved.account, + username: this.resolved.username, + warehouse: this.resolved.warehouse, + database: this.resolved.database, + schema: this.resolved.schemas[0] ?? 'PUBLIC', + role: this.resolved.role, + ...patch?.sdkOptions, + }; + const connectionConfig: snowflake.ConnectionOptions = + this.resolved.authMethod === 'rsa' + ? { ...baseConfig, authenticator: 'SNOWFLAKE_JWT', privateKey: this.decryptPrivateKey() } + : { ...baseConfig, password: this.resolved.password }; + const connection = snowflake.createConnection(connectionConfig); + return new Promise((resolveConnection, rejectConnection) => { + connection.connect((error, connected) => { + if (error) { + rejectConnection(error); + return; + } + const resolvedConnection = connected ?? connection; + this.setConnectionContext(resolvedConnection).then( + () => resolveConnection(resolvedConnection), + (contextError) => { + resolvedConnection.destroy(() => undefined); + rejectConnection(contextError); + }, + ); + }); + }); + } + + private async setConnectionContext(connection: snowflake.Connection): Promise { + if (this.resolved.role) { + await this.executeSnowflakeQuery(connection, `USE ROLE "${this.resolved.role}"`); + } + await this.executeSnowflakeQuery(connection, `USE WAREHOUSE "${this.resolved.warehouse}"`); + await this.executeSnowflakeQuery(connection, `USE DATABASE "${this.resolved.database}"`); + await this.executeSnowflakeQuery(connection, `USE SCHEMA "${this.resolved.schemas[0] ?? 'PUBLIC'}"`); + } + + private async executeSnowflakeQuery( + connection: snowflake.Connection, + sqlText: string, + binds?: snowflake.Binds, + ): Promise<{ headers: string[]; headerTypes?: string[]; rows: unknown[][] }> { + return new Promise((resolveQuery, rejectQuery) => { + connection.execute({ + sqlText, + binds, + complete: (error, statement, rows) => { + if (error) { + rejectQuery(error); + return; + } + const columns = statement.getColumns(); + const headers = columns ? columns.map((column) => column.getName()) : []; + const headerTypes = columns ? columns.map((column) => column.getType()) : []; + const normalizedRows = rows + ? rows.map((row) => headers.map((header, index) => normalizeSnowflakeValue(row[header], headerTypes[index]))) + : []; + resolveQuery({ headers, headerTypes, rows: normalizedRows }); + }, + }); + }); + } + + private destroyConnection(connection: snowflake.Connection): Promise { + return new Promise((resolveDestroy, rejectDestroy) => { + connection.destroy((error) => { + if (error) { + rejectDestroy(error); + return; + } + resolveDestroy(); + }); + }); + } + + private decryptPrivateKey(): string { + if (!this.resolved.privateKey) { + throw new Error('Private key is required for RSA authentication'); + } + const privateKeyObject = createPrivateKey({ + key: this.resolved.privateKey, + format: 'pem', + ...(this.resolved.passphrase ? { passphrase: this.resolved.passphrase } : {}), + }); + return privateKeyObject.export({ format: 'pem', type: 'pkcs8' }) as string; + } +} + +export class KloSnowflakeScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'snowflake' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: true, + formalForeignKeys: false, + estimatedRowCounts: true, + }); + + private readonly resolved: KloSnowflakeResolvedConnectionConfig; + private readonly driverFactory: KloSnowflakeDriverFactory; + private readonly dialect = new KloSnowflakeDialect(); + private readonly now: () => Date; + private driverInstance: KloSnowflakeDriver | null = null; + + constructor(private readonly options: KloSnowflakeScanConnectorOptions) { + this.resolved = snowflakeConnectionConfigFromConfig(options); + this.driverFactory = options.driverFactory ?? new DefaultSnowflakeDriverFactory(); + this.now = options.now ?? (() => new Date()); + this.id = `snowflake:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + return this.getDriver().test(); + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const tables: KloSchemaTable[] = []; + for (const schemaName of this.resolved.schemas) { + const rawTables = await this.getDriver().getSchemaMetadata(schemaName); + const primaryKeys = await this.primaryKeys(rawTables.map((table) => table.name), schemaName); + tables.push(...rawTables.map((table) => this.toSchemaTable(table, primaryKeys))); + } + return { + connectionId: this.options.connectionId, + driver: 'snowflake', + extractedAt: this.now().toISOString(), + scope: { catalogs: [this.resolved.database], schemas: this.resolved.schemas }, + metadata: { + account: this.resolved.account, + warehouse: this.resolved.warehouse, + database: this.resolved.database, + schemas: this.resolved.schemas, + table_count: tables.length, + total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables, + }; + } + + async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.getDriver().query( + this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns), + ); + return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.getDriver().query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + return { + values: result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]), + nullCount: null, + distinctCount: null, + }; + } + + async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + return null; + } + + async executeReadOnly(input: KloSnowflakeReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); + const prepared = this.dialect.prepareQuery(limitedSql, input.params); + return this.getDriver().query(prepared.sql, prepared.params); + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloSnowflakeColumnDistinctValuesOptions, + ): Promise { + const tableName = this.qTableName(table); + const quotedColumn = this.dialect.quoteIdentifier(columnName); + const cardinality = await this.singleNumber( + this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000), + 'CARDINALITY', + ); + if (cardinality === null) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valueRows = await this.queryRaw>( + this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit), + ); + return { values: valueRows.map((row) => String(row.VAL ?? row.val)).filter((value) => value !== 'null'), cardinality }; + } + + async getTableRowCount(tableName: string, schemaName = this.resolved.schemas[0] ?? 'PUBLIC'): Promise { + const tables = await this.getDriver().getSchemaMetadata(schemaName); + return tables.find((table) => table.name === tableName)?.rowCount ?? 0; + } + + qTableName(table: Pick & Partial>): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + listSchemas(): Promise { + return this.getDriver().listSchemas(); + } + + async cleanup(): Promise { + if (this.driverInstance) { + await this.driverInstance.cleanup(); + this.driverInstance = null; + } + } + + private getDriver(): KloSnowflakeDriver { + if (!this.driverInstance) { + this.driverInstance = this.driverFactory.createDriver({ + resolved: this.resolved, + sdkOptionsProvider: this.options.sdkOptionsProvider, + }); + } + return this.driverInstance; + } + + private async primaryKeys(tableNames: string[], schemaName: string): Promise>> { + if (tableNames.length === 0) { + return new Map(); + } + const result = await this.getDriver().query( + ` + SELECT tc.TABLE_NAME, kcu.COLUMN_NAME + FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc + JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu + ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME + AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA + AND tc.TABLE_CATALOG = kcu.TABLE_CATALOG + WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY' + AND tc.TABLE_SCHEMA = ? + AND tc.TABLE_CATALOG = ? + ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION + `, + [schemaName, this.resolved.database], + ); + const grouped = new Map>(); + for (const tableName of tableNames) { + grouped.set(tableName, new Set()); + } + for (const row of result.rows) { + const tableName = String(row[0]); + const columnName = String(row[1]); + grouped.get(tableName)?.add(columnName); + } + return grouped; + } + + private toSchemaTable(table: KloSnowflakeRawTableMetadata, primaryKeys: Map>): KloSchemaTable { + return { + catalog: table.catalog, + db: table.db, + name: table.name, + kind: 'table', + comment: table.comment, + estimatedRows: table.rowCount, + columns: table.columns.map((column) => this.toSchemaColumn(table.name, column, primaryKeys)), + foreignKeys: [], + }; + } + + private toSchemaColumn( + tableName: string, + column: KloSnowflakeRawColumnMetadata, + primaryKeys: Map>, + ): KloSchemaColumn { + return { + name: column.name, + nativeType: column.type, + normalizedType: this.dialect.mapDataType(column.type), + dimensionType: this.dialect.mapToDimensionType(column.type), + nullable: column.nullable, + primaryKey: primaryKeys.get(tableName)?.has(column.name) ?? false, + comment: column.comment, + }; + } + + private async queryRaw>(sql: string, params?: unknown): Promise { + const result = await this.getDriver().query(sql, params); + return result.rows.map((row) => Object.fromEntries(result.headers.map((header, index) => [header, row[index]])) as T); + } + + private async singleNumber(sql: string, header: string): Promise { + const rows = await this.queryRaw>(sql); + return firstNumber(rows[0]?.[header] ?? rows[0]?.[header.toLowerCase()]); + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.options.connectionId) { + throw new Error(`Snowflake connector ${this.options.connectionId} cannot scan connection ${connectionId}`); + } + } +} diff --git a/packages/connector-snowflake/src/dialect.test.ts b/packages/connector-snowflake/src/dialect.test.ts new file mode 100644 index 00000000..88168409 --- /dev/null +++ b/packages/connector-snowflake/src/dialect.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from 'vitest'; +import { KloSnowflakeDialect } from './dialect.js'; + +describe('KloSnowflakeDialect', () => { + const dialect = new KloSnowflakeDialect(); + + it('quotes identifiers and formats database.schema.table names', () => { + expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"'); + expect(dialect.formatTableName({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' })).toBe( + '"ANALYTICS"."PUBLIC"."ORDERS"', + ); + expect(dialect.formatTableName({ db: 'PUBLIC', name: 'ORDERS' })).toBe('"PUBLIC"."ORDERS"'); + expect(dialect.formatTableName({ name: 'ORDERS' })).toBe('"ORDERS"'); + }); + + it('maps native Snowflake types to scan dimensions', () => { + expect(dialect.mapDataType('NUMBER(38,0)')).toBe('NUMBER(38,0)'); + expect(dialect.mapToDimensionType('TIMESTAMP_NTZ')).toBe('time'); + expect(dialect.mapToDimensionType('NUMBER(38,0)')).toBe('number'); + expect(dialect.mapToDimensionType('BOOLEAN')).toBe('boolean'); + expect(dialect.mapToDimensionType('VARIANT')).toBe('string'); + }); + + it('generates sampling and dictionary SQL', () => { + expect(dialect.generateSampleQuery('"PUBLIC"."ORDERS"', 5, ['ID', 'STATUS'])).toBe( + 'SELECT "ID", "STATUS" FROM "PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)', + ); + expect(dialect.generateColumnSampleQuery('"PUBLIC"."ORDERS"', 'STATUS', 10)).toBe( + 'SELECT "STATUS" FROM "PUBLIC"."ORDERS" WHERE "STATUS" IS NOT NULL AND TRIM(CAST("STATUS" AS STRING)) != \'\' LIMIT 10', + ); + expect(dialect.generateCardinalitySampleQuery('"PUBLIC"."ORDERS"', '"STATUS"', 100)).toContain( + 'SELECT COUNT(DISTINCT val) AS cardinality', + ); + expect(dialect.generateDistinctValuesQuery('"PUBLIC"."ORDERS"', '"STATUS"', 20)).toContain( + 'SELECT DISTINCT "STATUS"::VARCHAR AS val', + ); + }); + + it('passes Snowflake positional parameters as bind arrays', () => { + expect(dialect.prepareQuery('SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?', { id: 1, status: 'paid' })).toEqual({ + sql: 'SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?', + params: [1, 'paid'], + }); + expect(dialect.prepareQuery('SELECT * FROM ORDERS')).toEqual({ sql: 'SELECT * FROM ORDERS', params: undefined }); + }); + + it('keeps unsupported statistics explicit', () => { + expect(dialect.generateColumnStatisticsQuery('PUBLIC', 'ORDERS')).toBeNull(); + }); +}); diff --git a/packages/connector-snowflake/src/dialect.ts b/packages/connector-snowflake/src/dialect.ts new file mode 100644 index 00000000..96a52b36 --- /dev/null +++ b/packages/connector-snowflake/src/dialect.ts @@ -0,0 +1,187 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type SnowflakeTableNameRef = Pick & Partial>; + +export class KloSnowflakeDialect { + readonly type = 'snowflake'; + + private readonly typeMappings: Record = { + TIMESTAMP_NTZ: 'time', + TIMESTAMP_LTZ: 'time', + TIMESTAMP_TZ: 'time', + TIMESTAMP: 'time', + DATE: 'time', + TIME: 'time', + NUMBER: 'number', + DECIMAL: 'number', + NUMERIC: 'number', + INT: 'number', + INTEGER: 'number', + BIGINT: 'number', + SMALLINT: 'number', + TINYINT: 'number', + BYTEINT: 'number', + FLOAT: 'number', + FLOAT4: 'number', + FLOAT8: 'number', + DOUBLE: 'number', + 'DOUBLE PRECISION': 'number', + REAL: 'number', + VARCHAR: 'string', + CHAR: 'string', + CHARACTER: 'string', + STRING: 'string', + TEXT: 'string', + BINARY: 'string', + VARBINARY: 'string', + BOOLEAN: 'boolean', + VARIANT: 'string', + OBJECT: 'string', + ARRAY: 'string', + }; + + quoteIdentifier(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"`; + } + + formatTableName(table: SnowflakeTableNameRef): string { + if (table.catalog && table.db) { + return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; + } + if (table.db) { + return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; + } + return this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + return nativeType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + const upper = nativeType.toUpperCase().trim(); + const normalized = upper.includes('(') ? upper.split('(')[0]! : upper; + if (this.typeMappings[normalized]) { + return this.typeMappings[normalized]; + } + if (normalized.includes('TIME') || normalized.includes('DATE')) { + return 'time'; + } + if ( + normalized.includes('INT') || + normalized.includes('NUM') || + normalized.includes('DEC') || + normalized.includes('FLOAT') || + normalized.includes('DOUBLE') + ) { + return 'number'; + } + if (normalized.includes('BOOL')) { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT ${columnList} FROM ${tableName} SAMPLE ROW (${limit} ROWS)`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const quotedColumn = this.quoteIdentifier(columnName); + return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' LIMIT ${limit}`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: unknown[] } { + return { sql, params: params ? Object.values(params) : undefined }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `UNIFORM(0::FLOAT, 1::FLOAT, RANDOM()) < ${samplePct}`; + } + + getTableSampleClause(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `SAMPLE (${samplePct * 100})`; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + } + + getNullCountExpression(column: string): string { + return `COUNT_IF(${column} IS NULL)`; + } + + getDistinctCountExpression(column: string): string { + return `APPROX_COUNT_DISTINCT(${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + LIMIT ${sampleSize} + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + return ` + SELECT DISTINCT ${columnName}::VARCHAR AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY val + LIMIT ${limit} + `; + } + + generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { + return null; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} SAMPLE ROW (${sampleSize} ROWS) + WHERE ${columnName} IS NOT NULL + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + timezone?: string, + ): string { + const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column; + return `DATE_TRUNC('${granularity}', ${target})`; + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { + const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column; + const [amount, unit] = interval.split(' '); + const originExpr = origin ? `'${origin}'::TIMESTAMP` : `'1970-01-01'::TIMESTAMP`; + return `DATEADD(${unit}, FLOOR(DATEDIFF(${unit}, ${originExpr}, ${target}) / ${amount}) * ${amount}, ${originExpr})`; + } + + parseIntervalToSql(interval: string): string { + return `INTERVAL '${interval}'`; + } +} diff --git a/packages/connector-snowflake/src/index.ts b/packages/connector-snowflake/src/index.ts new file mode 100644 index 00000000..68534792 --- /dev/null +++ b/packages/connector-snowflake/src/index.ts @@ -0,0 +1,18 @@ +export { KloSnowflakeDialect } from './dialect.js'; +export { + isKloSnowflakeConnectionConfig, + KloSnowflakeScanConnector, + snowflakeConnectionConfigFromConfig, + type KloSnowflakeColumnDistinctValuesOptions, + type KloSnowflakeColumnDistinctValuesResult, + type KloSnowflakeConnectionConfig, + type KloSnowflakeDriver, + type KloSnowflakeDriverFactory, + type KloSnowflakeRawColumnMetadata, + type KloSnowflakeRawTableMetadata, + type KloSnowflakeReadOnlyQueryInput, + type KloSnowflakeResolvedConnectionConfig, + type KloSnowflakeScanConnectorOptions, + type KloSnowflakeSdkOptionsProvider, +} from './connector.js'; +export { createSnowflakeLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-snowflake/src/live-database-introspection.ts b/packages/connector-snowflake/src/live-database-introspection.ts new file mode 100644 index 00000000..03ca2aaf --- /dev/null +++ b/packages/connector-snowflake/src/live-database-introspection.ts @@ -0,0 +1,40 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { + KloSnowflakeScanConnector, + type KloSnowflakeConnectionConfig, + type KloSnowflakeDriverFactory, + type KloSnowflakeSdkOptionsProvider, +} from './connector.js'; + +interface CreateSnowflakeLiveDatabaseIntrospectionOptions { + connections: Record; + driverFactory?: KloSnowflakeDriverFactory; + sdkOptionsProvider?: KloSnowflakeSdkOptionsProvider; + now?: () => Date; +} + +export function createSnowflakeLiveDatabaseIntrospection( + options: CreateSnowflakeLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloSnowflakeConnectionConfig | undefined; + const connector = new KloSnowflakeScanConnector({ + connectionId, + connection, + driverFactory: options.driverFactory, + sdkOptionsProvider: options.sdkOptionsProvider, + now: options.now, + }); + try { + return await connector.introspect( + { connectionId, driver: 'snowflake' }, + { runId: `snowflake-${connectionId}` }, + ); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-snowflake/src/package-exports.test.ts b/packages/connector-snowflake/src/package-exports.test.ts new file mode 100644 index 00000000..5653a1fc --- /dev/null +++ b/packages/connector-snowflake/src/package-exports.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, it } from 'vitest'; +import * as connector from './index.js'; + +describe('@klo/connector-snowflake package exports', () => { + it('exports public connector, dialect, and introspection APIs', () => { + expect(connector.KloSnowflakeDialect).toBeTypeOf('function'); + expect(connector.KloSnowflakeScanConnector).toBeTypeOf('function'); + expect(connector.snowflakeConnectionConfigFromConfig).toBeTypeOf('function'); + expect(connector.createSnowflakeLiveDatabaseIntrospection).toBeTypeOf('function'); + }); +}); diff --git a/packages/connector-snowflake/tsconfig.json b/packages/connector-snowflake/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-snowflake/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/connector-sqlite/package.json b/packages/connector-sqlite/package.json new file mode 100644 index 00000000..dcb98155 --- /dev/null +++ b/packages/connector-sqlite/package.json @@ -0,0 +1,48 @@ +{ + "name": "@klo/connector-sqlite", + "version": "0.0.0-private", + "description": "SQLite connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@klo/context": "workspace:*", + "better-sqlite3": "^12.6.2" + }, + "devDependencies": { + "@types/better-sqlite3": "^7.6.13", + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-sqlite" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-sqlite/src/connector.test.ts b/packages/connector-sqlite/src/connector.test.ts new file mode 100644 index 00000000..d19ee577 --- /dev/null +++ b/packages/connector-sqlite/src/connector.test.ts @@ -0,0 +1,255 @@ +import Database from 'better-sqlite3'; +import { writeFileSync } from 'node:fs'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + createSqliteLiveDatabaseIntrospection, + isKloSqliteConnectionConfig, + KloSqliteScanConnector, + sqliteDatabasePathFromConfig, +} from './index.js'; + +describe('KloSqliteScanConnector', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-connector-sqlite-')); + dbPath = join(tempDir, 'warehouse.db'); + const db = new Database(dbPath); + db.exec(` + PRAGMA foreign_keys = ON; + CREATE TABLE customers ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + tier TEXT + ); + CREATE TABLE orders ( + id INTEGER PRIMARY KEY, + customer_id INTEGER NOT NULL, + status TEXT, + total NUMERIC, + created_at TEXT, + FOREIGN KEY(customer_id) REFERENCES customers(id) + ); + CREATE VIEW recent_orders AS SELECT id, customer_id, status FROM orders; + INSERT INTO customers (id, name, tier) VALUES (1, 'Ada', 'enterprise'), (2, 'Grace', 'growth'); + INSERT INTO orders (id, customer_id, status, total, created_at) + VALUES (10, 1, 'paid', 42.5, '2026-04-28'), (11, 2, 'open', 9.5, '2026-04-29'); + `); + db.close(); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('resolves SQLite path configuration safely', () => { + const originalDatabaseUrl = process.env.KLO_SQLITE_TEST_URL; + const pointerPath = join(tempDir, 'sqlite-path.txt'); + process.env.KLO_SQLITE_TEST_URL = `sqlite:${dbPath}`; + writeFileSync(pointerPath, dbPath, 'utf-8'); + + try { + expect(isKloSqliteConnectionConfig({ driver: 'sqlite', path: 'warehouse.db', readonly: true })).toBe(true); + expect(isKloSqliteConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL', readonly: true })).toBe( + false, + ); + expect( + sqliteDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', path: 'warehouse.db', readonly: true }, + }), + ).toBe(dbPath); + expect( + sqliteDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', url: 'env:KLO_SQLITE_TEST_URL', readonly: true }, + }), + ).toBe(dbPath); + expect( + sqliteDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', url: `file://${dbPath}`, readonly: true }, + }), + ).toBe(dbPath); + expect( + sqliteDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', path: `file:${pointerPath}`, readonly: true }, + }), + ).toBe(dbPath); + expect(() => + sqliteDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', path: 'warehouse.db', readonly: false }, + }), + ).toThrow('Native SQLite connector requires connections.warehouse.readonly: true'); + } finally { + if (originalDatabaseUrl === undefined) { + delete process.env.KLO_SQLITE_TEST_URL; + } else { + process.env.KLO_SQLITE_TEST_URL = originalDatabaseUrl; + } + } + }); + + it('introspects schema, primary keys, row counts, views, and foreign keys', async () => { + const connector = new KloSqliteScanConnector({ + connectionId: 'warehouse', + connection: { driver: 'sqlite', path: dbPath, readonly: true }, + now: () => new Date('2026-04-29T10:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'sqlite' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'sqlite', + extractedAt: '2026-04-29T10:00:00.000Z', + metadata: { + file_path: dbPath, + table_count: 3, + total_columns: 11, + }, + }); + expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows])).toEqual([ + ['customers', 'table', 2], + ['orders', 'table', 2], + ['recent_orders', 'view', null], + ]); + expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({ + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'INTEGER', + dimensionType: 'number', + nullable: false, + primaryKey: true, + }); + expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: null, + toTable: 'customers', + toColumn: 'id', + constraintName: null, + }, + ]); + }); + + it('runs samples, distinct values, statistics, and read-only SQL', async () => { + const connector = new KloSqliteScanConnector({ + connectionId: 'warehouse', + connection: { driver: 'sqlite', path: dbPath, readonly: true }, + }); + + await expect( + connector.sampleTable( + { connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, columns: ['id'], limit: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ headers: ['id'], rows: [[10]], totalRows: 1 }); + + await expect( + connector.sampleColumn( + { connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status', limit: 5 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null }); + + await expect( + connector.getColumnDistinctValues( + { catalog: null, db: null, name: 'orders' }, + 'status', + { maxCardinality: 5, limit: 10, sampleSize: 100 }, + ), + ).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 }); + + await expect( + connector.executeReadOnly( + { connectionId: 'warehouse', sql: 'select id, status from orders order by id', maxRows: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 }); + + await expect( + connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + + await expect( + connector.columnStats( + { connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status' }, + { runId: 'scan-run-1' }, + ), + ).resolves.toBeNull(); + }); + + it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => { + const introspection = createSqliteLiveDatabaseIntrospection({ + projectDir: tempDir, + connections: { + warehouse: { driver: 'sqlite', path: 'warehouse.db', readonly: true }, + }, + now: () => new Date('2026-04-29T10:00:00.000Z'), + }); + + const snapshot = await introspection.extractSchema('warehouse'); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + extractedAt: '2026-04-29T10:00:00.000Z', + }); + expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({ + name: 'customers', + catalog: null, + db: null, + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'INTEGER', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'name', + nativeType: 'TEXT', + normalizedType: 'TEXT', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'tier', + nativeType: 'TEXT', + normalizedType: 'TEXT', + dimensionType: 'string', + nullable: true, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [], + }); + expect(snapshot.tables.find((table) => table.name === 'orders')).toMatchObject({ + name: 'orders', + catalog: null, + db: null, + foreignKeys: [{ fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }], + }); + }); +}); diff --git a/packages/connector-sqlite/src/connector.ts b/packages/connector-sqlite/src/connector.ts new file mode 100644 index 00000000..aab3463d --- /dev/null +++ b/packages/connector-sqlite/src/connector.ts @@ -0,0 +1,371 @@ +import Database from 'better-sqlite3'; +import { existsSync, readFileSync, statSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { isAbsolute, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { assertReadOnlySql, limitSqlForExecution, normalizeQueryRows } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaForeignKey, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import { KloSqliteDialect } from './dialect.js'; + +export interface KloSqliteConnectionConfig { + driver?: string; + path?: string; + url?: string; + file_path?: string; + readonly?: boolean; + [key: string]: unknown; +} + +export interface SqliteDatabasePathInput { + connectionId: string; + projectDir?: string; + connection: KloSqliteConnectionConfig | undefined; +} + +export interface KloSqliteScanConnectorOptions extends SqliteDatabasePathInput { + now?: () => Date; +} + +export interface KloSqliteReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record | unknown[]; +} + +export interface KloSqliteColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloSqliteColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +interface SqliteMasterRow { + name: string; + type: 'table' | 'view'; +} + +interface SqliteTableInfoRow { + cid: number; + name: string; + type: string; + notnull: number; + dflt_value: unknown; + pk: number; +} + +interface SqliteForeignKeyRow { + id: number; + seq: number; + table: string; + from: string; + to: string; +} + +function stringConfigValue( + connection: KloSqliteConnectionConfig | undefined, + key: keyof KloSqliteConnectionConfig, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined; +} + +function resolveStringReference(key: keyof KloSqliteConnectionConfig, value: string): string { + if (value.startsWith('env:')) { + return process.env[value.slice('env:'.length)] ?? ''; + } + if (key !== 'url' && value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function sqlitePathFromUrl(url: string): string { + if (url.startsWith('file:')) { + return fileURLToPath(url); + } + if (url.startsWith('sqlite:')) { + const parsed = new URL(url); + return decodeURIComponent(parsed.pathname); + } + return url; +} + +function stripLeadingSqlComments(sql: string): string { + let index = 0; + while (index < sql.length) { + while (/\s/.test(sql[index] ?? '')) { + index += 1; + } + if (sql.startsWith('--', index)) { + const end = sql.indexOf('\n', index + 2); + index = end === -1 ? sql.length : end + 1; + continue; + } + if (sql.startsWith('/*', index)) { + const end = sql.indexOf('*/', index + 2); + if (end === -1) { + return sql.slice(index); + } + index = end + 2; + continue; + } + break; + } + return sql.slice(index); +} + +export function isKloSqliteConnectionConfig(connection: KloSqliteConnectionConfig | undefined): boolean { + const driver = String(connection?.driver ?? '').toLowerCase(); + return driver === 'sqlite' || driver === 'sqlite3'; +} + +export function sqliteDatabasePathFromConfig(input: SqliteDatabasePathInput): string { + if (!isKloSqliteConnectionConfig(input.connection)) { + throw new Error(`Native SQLite connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native SQLite connector requires connections.${input.connectionId}.readonly: true`); + } + const configuredPath = + stringConfigValue(input.connection, 'path') ?? + stringConfigValue(input.connection, 'file_path') ?? + sqlitePathFromUrl(stringConfigValue(input.connection, 'url') ?? ''); + if (!configuredPath) { + throw new Error(`Native SQLite connector requires connections.${input.connectionId}.path, file_path, or url`); + } + return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath); +} + +export class KloSqliteScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'sqlite' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: false, + formalForeignKeys: true, + estimatedRowCounts: true, + }); + + private readonly connectionId: string; + private readonly dbPath: string; + private readonly now: () => Date; + private readonly dialect = new KloSqliteDialect(); + private db: Database.Database | null = null; + + constructor(options: KloSqliteScanConnectorOptions) { + this.connectionId = options.connectionId; + this.dbPath = sqliteDatabasePathFromConfig(options); + this.now = options.now ?? (() => new Date()); + this.id = `sqlite:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + try { + if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) { + return { success: false, error: `File not found: ${this.dbPath}` }; + } + this.database().prepare('SELECT 1').get(); + return { success: true }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const database = this.database(); + const rawTables = database + .prepare( + `SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ORDER BY name`, + ) + .all() as SqliteMasterRow[]; + const tables = rawTables.map((table) => this.readTable(database, table)); + const fileStats = existsSync(this.dbPath) ? statSync(this.dbPath) : null; + return { + connectionId: this.connectionId, + driver: 'sqlite', + extractedAt: this.now().toISOString(), + scope: {}, + metadata: { + file_path: this.dbPath, + file_size: fileStats ? fileStats.size : 0, + table_count: tables.length, + total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables, + }; + } + + async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns)); + return { headers: result.headers, rows: result.rows, totalRows: result.totalRows }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = this.query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); + return { values, nullCount: null, distinctCount: null }; + } + + async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + return null; + } + + async executeReadOnly(input: KloSqliteReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params); + return { ...result, rowCount: result.rows.length }; + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloSqliteColumnDistinctValuesOptions, + ): Promise { + const sampleSize = options.sampleSize ?? 10000; + const tableName = this.qTableName(table); + const quotedColumn = this.dialect.quoteIdentifier(columnName); + const cardinalityResult = this.query( + this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize), + ); + if (cardinalityResult.rows.length === 0) { + return null; + } + const cardinality = Number(cardinalityResult.rows[0][0]); + if (Number.isNaN(cardinality)) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valuesResult = this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit)); + return { + values: valuesResult.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => String(row[0])), + cardinality, + }; + } + + async getTableRowCount(tableName: string): Promise { + const result = this.query(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`); + return Number(result.rows[0]?.[0] ?? 0); + } + + qTableName(table: Pick): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + async cleanup(): Promise { + if (this.db) { + this.db.close(); + this.db = null; + } + } + + private database(): Database.Database { + if (!this.db) { + this.db = new Database(this.dbPath, { readonly: true, fileMustExist: true }); + } + return this.db; + } + + private query(sql: string, params?: Record | unknown[]): Omit { + const statement = this.database().prepare(assertReadOnlySql(sql)); + const rows = (params ? statement.all(params) : statement.all()) as unknown[]; + return { + headers: statement.columns().map((column) => column.name), + rows: normalizeQueryRows(rows), + totalRows: rows.length, + }; + } + + private readTable(database: Database.Database, table: SqliteMasterRow): KloSchemaTable { + const columns = database + .prepare(`PRAGMA table_info(${this.dialect.quoteIdentifier(table.name)})`) + .all() as SqliteTableInfoRow[]; + const foreignKeys = database + .prepare(`PRAGMA foreign_key_list(${this.dialect.quoteIdentifier(table.name)})`) + .all() as SqliteForeignKeyRow[]; + const estimatedRows = + table.type === 'table' + ? Number( + ( + database + .prepare(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(table.name)}`) + .get() as { count: unknown } + ).count, + ) + : null; + return { + catalog: null, + db: null, + name: table.name, + kind: table.type, + comment: null, + estimatedRows, + columns: columns.map((column) => ({ + name: column.name, + nativeType: column.type, + normalizedType: this.dialect.mapDataType(column.type), + dimensionType: this.dialect.mapToDimensionType(column.type), + nullable: column.notnull === 0 && column.pk === 0, + primaryKey: column.pk > 0, + comment: null, + })), + foreignKeys: this.mapForeignKeys(foreignKeys), + }; + } + + private mapForeignKeys(rows: SqliteForeignKeyRow[]): KloSchemaForeignKey[] { + return rows + .sort((a, b) => a.id - b.id || a.seq - b.seq) + .map((row) => ({ + fromColumn: row.from, + toCatalog: null, + toDb: null, + toTable: row.table, + toColumn: row.to, + constraintName: null, + })); + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.connectionId) { + throw new Error(`KLO SQLite connector ${this.id} cannot serve connection ${connectionId}`); + } + } +} diff --git a/packages/connector-sqlite/src/dialect.test.ts b/packages/connector-sqlite/src/dialect.test.ts new file mode 100644 index 00000000..b48f3e35 --- /dev/null +++ b/packages/connector-sqlite/src/dialect.test.ts @@ -0,0 +1,33 @@ +import { describe, expect, it } from 'vitest'; +import { KloSqliteDialect } from './dialect.js'; + +describe('KloSqliteDialect', () => { + const dialect = new KloSqliteDialect(); + + it('quotes identifiers and formats single-file SQLite table names', () => { + expect(dialect.quoteIdentifier('orders')).toBe('"orders"'); + expect(dialect.quoteIdentifier('weird"name')).toBe('"weird""name"'); + expect(dialect.formatTableName({ catalog: 'ignored', db: 'ignored', name: 'orders' })).toBe('"orders"'); + }); + + it('maps native SQLite types to KLO dimension types', () => { + expect(dialect.mapToDimensionType('INTEGER')).toBe('number'); + expect(dialect.mapToDimensionType('numeric(10,2)')).toBe('number'); + expect(dialect.mapToDimensionType('timestamp')).toBe('time'); + expect(dialect.mapToDimensionType('VARCHAR(255)')).toBe('string'); + expect(dialect.mapToDimensionType('bool')).toBe('boolean'); + expect(dialect.mapToDimensionType('')).toBe('string'); + }); + + it('builds sampling and distinct-value SQL without host-specific state', () => { + expect(dialect.generateSampleQuery('"orders"', 25, ['id', 'status'])).toBe( + 'SELECT "id", "status" FROM "orders" LIMIT 25', + ); + expect(dialect.generateColumnSampleQuery('"orders"', 'status', 10)).toBe( + 'SELECT "status" FROM "orders" WHERE "status" IS NOT NULL AND TRIM(CAST("status" AS TEXT)) != \'\' LIMIT 10', + ); + expect(dialect.generateDistinctValuesQuery('"orders"', '"status"', 5)).toContain( + 'SELECT DISTINCT CAST("status" AS TEXT) AS val', + ); + }); +}); diff --git a/packages/connector-sqlite/src/dialect.ts b/packages/connector-sqlite/src/dialect.ts new file mode 100644 index 00000000..928a3dea --- /dev/null +++ b/packages/connector-sqlite/src/dialect.ts @@ -0,0 +1,177 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type SqliteTableNameRef = Pick & Partial>; + +export class KloSqliteDialect { + readonly type = 'sqlite'; + + private readonly typeMappings: Record = { + DATETIME: 'time', + DATE: 'time', + TIMESTAMP: 'time', + TIME: 'time', + INTEGER: 'number', + INT: 'number', + REAL: 'number', + NUMERIC: 'number', + FLOAT: 'number', + DOUBLE: 'number', + TEXT: 'string', + VARCHAR: 'string', + CHAR: 'string', + BLOB: 'string', + BOOLEAN: 'boolean', + BOOL: 'boolean', + }; + + quoteIdentifier(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"`; + } + + formatTableName(table: SqliteTableNameRef): string { + return this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + return nativeType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + let normalized = nativeType.toUpperCase().trim(); + if (normalized.includes('(')) { + normalized = normalized.split('(')[0]; + } + if (this.typeMappings[normalized]) { + return this.typeMappings[normalized]; + } + if (normalized.includes('TIME') || normalized.includes('DATE')) { + return 'time'; + } + if ( + normalized.includes('INT') || + normalized.includes('NUM') || + normalized.includes('REAL') || + normalized.includes('FLOAT') || + normalized.includes('DOUBLE') + ) { + return 'number'; + } + if (normalized.includes('BOOL')) { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const quoted = this.quoteIdentifier(columnName); + return `SELECT ${quoted} FROM ${tableName} WHERE ${quoted} IS NOT NULL AND TRIM(CAST(${quoted} AS TEXT)) != '' LIMIT ${limit}`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: unknown } { + return params ? { sql, params } : { sql }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `(RANDOM() % 100) < ${Math.round(samplePct * 100)}`; + } + + getTableSampleClause(_samplePct: number): string { + return ''; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + } + + getNullCountExpression(column: string): string { + return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`; + } + + getDistinctCountExpression(column: string): string { + return `COUNT(DISTINCT ${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + LIMIT ${sampleSize} + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + return ` + SELECT DISTINCT CAST(${columnName} AS TEXT) AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY val + LIMIT ${limit} + `; + } + + generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { + return null; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY RANDOM() + LIMIT ${sampleSize} + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + _timezone?: string, + ): string { + switch (granularity) { + case 'day': + return `DATE(${column})`; + case 'week': + return `DATE(${column}, 'weekday 0', '-6 days')`; + case 'month': + return `DATE(${column}, 'start of month')`; + case 'quarter': + return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`; + case 'year': + return `DATE(${column}, 'start of year')`; + } + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string { + const [amount, unit] = interval.split(' '); + const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`; + const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30; + const intervalDays = Number(amount) * unitDays; + return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`; + } + + parseIntervalToSql(interval: string): string { + return `'${interval}'`; + } +} diff --git a/packages/connector-sqlite/src/index.ts b/packages/connector-sqlite/src/index.ts new file mode 100644 index 00000000..037506e8 --- /dev/null +++ b/packages/connector-sqlite/src/index.ts @@ -0,0 +1,16 @@ +export { KloSqliteDialect } from './dialect.js'; +export { + isKloSqliteConnectionConfig, + KloSqliteScanConnector, + sqliteDatabasePathFromConfig, + type KloSqliteColumnDistinctValuesOptions, + type KloSqliteColumnDistinctValuesResult, + type KloSqliteConnectionConfig, + type KloSqliteReadOnlyQueryInput, + type KloSqliteScanConnectorOptions, + type SqliteDatabasePathInput, +} from './connector.js'; +export { + createSqliteLiveDatabaseIntrospection, + type CreateSqliteLiveDatabaseIntrospectionOptions, +} from './live-database-introspection.js'; diff --git a/packages/connector-sqlite/src/live-database-introspection.ts b/packages/connector-sqlite/src/live-database-introspection.ts new file mode 100644 index 00000000..e3546965 --- /dev/null +++ b/packages/connector-sqlite/src/live-database-introspection.ts @@ -0,0 +1,30 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { KloSqliteScanConnector, type KloSqliteConnectionConfig } from './connector.js'; + +export interface CreateSqliteLiveDatabaseIntrospectionOptions { + projectDir?: string; + connections: Record; + now?: () => Date; +} + +export function createSqliteLiveDatabaseIntrospection( + options: CreateSqliteLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloSqliteConnectionConfig | undefined; + const connector = new KloSqliteScanConnector({ + connectionId, + connection, + projectDir: options.projectDir, + now: options.now, + }); + try { + return await connector.introspect({ connectionId, driver: 'sqlite' }, { runId: `sqlite-${connectionId}` }); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-sqlite/src/package-exports.test.ts b/packages/connector-sqlite/src/package-exports.test.ts new file mode 100644 index 00000000..0947ae3c --- /dev/null +++ b/packages/connector-sqlite/src/package-exports.test.ts @@ -0,0 +1,13 @@ +import { describe, expect, it } from 'vitest'; + +describe('@klo/connector-sqlite package exports', () => { + it('exports the native SQLite scan connector surface', async () => { + const connector = await import('./index.js'); + + expect(connector.KloSqliteDialect).toBeTypeOf('function'); + expect(connector.KloSqliteScanConnector).toBeTypeOf('function'); + expect(connector.createSqliteLiveDatabaseIntrospection).toBeTypeOf('function'); + expect(connector.isKloSqliteConnectionConfig).toBeTypeOf('function'); + expect(connector.sqliteDatabasePathFromConfig).toBeTypeOf('function'); + }); +}); diff --git a/packages/connector-sqlite/tsconfig.json b/packages/connector-sqlite/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-sqlite/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/connector-sqlserver/package.json b/packages/connector-sqlserver/package.json new file mode 100644 index 00000000..fea995ce --- /dev/null +++ b/packages/connector-sqlserver/package.json @@ -0,0 +1,48 @@ +{ + "name": "@klo/connector-sqlserver", + "version": "0.0.0-private", + "description": "SQL Server connector package for KLO scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@klo/context": "workspace:*", + "mssql": "^12.2.0" + }, + "devDependencies": { + "@types/mssql": "^9.1.8", + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-sqlserver" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-sqlserver/src/connector.test.ts b/packages/connector-sqlserver/src/connector.test.ts new file mode 100644 index 00000000..813a6f69 --- /dev/null +++ b/packages/connector-sqlserver/src/connector.test.ts @@ -0,0 +1,358 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + createSqlServerLiveDatabaseIntrospection, + isKloSqlServerConnectionConfig, + KloSqlServerScanConnector, + sqlServerConnectionPoolConfigFromConfig, + type KloSqlServerPoolFactory, + type KloSqlServerQueryResult, +} from './index.js'; + +function recordset>( + rows: T[], + columnNames: string[], +): T[] & { columns: Record } { + const withColumns = rows as T[] & { columns: Record }; + withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }])); + return withColumns; +} + +function result>(rows: T[], columnNames: string[]): KloSqlServerQueryResult { + return { recordset: recordset(rows, columnNames) }; +} + +function fakePoolFactory(): KloSqlServerPoolFactory { + const query = vi.fn(async (sql: string): Promise => { + if (sql.includes('INFORMATION_SCHEMA.TABLES')) { + return result( + [ + { table_name: 'customers', table_type: 'BASE TABLE' }, + { table_name: 'orders', table_type: 'BASE TABLE' }, + { table_name: 'order_summary', table_type: 'VIEW' }, + ], + ['table_name', 'table_type'], + ); + } + if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) { + return result([{ table_name: 'customers', table_comment: 'Customer table' }], [ + 'table_name', + 'table_comment', + ]); + } + if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) { + return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [ + 'table_name', + 'column_name', + 'column_comment', + ]); + } + if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) { + return result( + [ + { table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' }, + { table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' }, + { table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' }, + { table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' }, + { table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' }, + { table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' }, + ], + ['table_name', 'column_name', 'data_type', 'is_nullable'], + ); + } + if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) { + return result( + [ + { table_name: 'customers', column_name: 'id' }, + { table_name: 'orders', column_name: 'id' }, + ], + ['table_name', 'column_name'], + ); + } + if (sql.includes('REFERENTIAL_CONSTRAINTS')) { + return result( + [ + { + table_name: 'orders', + column_name: 'customer_id', + referenced_table_schema: 'dbo', + referenced_table_name: 'customers', + referenced_column_name: 'id', + constraint_name: 'orders_customer_id_fk', + }, + ], + [ + 'table_name', + 'column_name', + 'referenced_table_schema', + 'referenced_table_name', + 'referenced_column_name', + 'constraint_name', + ], + ); + } + if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) { + return result( + [ + { table_name: 'customers', row_count: 2 }, + { table_name: 'orders', row_count: 2 }, + ], + ['table_name', 'row_count'], + ); + } + if (sql.includes('SELECT TOP 1 [id], [status] FROM [dbo].[orders]')) { + return result([{ id: 10, status: 'paid' }], ['id', 'status']); + } + if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS klo_query_result')) { + return result([{ id: 10, status: 'paid' }], ['id', 'status']); + } + if (sql.includes('SELECT TOP 5 [status] FROM [dbo].[orders]')) { + return result([{ status: 'paid' }, { status: 'open' }], ['status']); + } + if (sql.includes('COUNT(DISTINCT val)')) { + return result([{ cardinality: 2 }], ['cardinality']); + } + if (sql.includes('SELECT TOP 10 val')) { + return result([{ val: 'open' }, { val: 'paid' }], ['val']); + } + if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) { + return result([{ row_count: 2 }], ['row_count']); + } + if (sql.includes('SELECT s.name AS schema_name')) { + return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']); + } + if (sql.trim() === 'SELECT 1') { + return result([{ ok: 1 }], ['ok']); + } + throw new Error(`Unexpected SQL: ${sql}`); + }); + const request: { input(name: string, value: unknown): typeof request; query: typeof query } = { + input: vi.fn((_key: string, _value: unknown) => request), + query, + }; + const close = vi.fn(async () => undefined); + return { + createPool: vi.fn(async () => ({ + request: () => request, + close, + })), + }; +} + +describe('KloSqlServerScanConnector', () => { + it('resolves SQL Server connection configuration safely', () => { + expect( + isKloSqlServerConnectionConfig({ + driver: 'sqlserver', + host: 'localhost', + database: 'analytics', + readonly: true, + }), + ).toBe(true); + expect(isKloSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false); + expect( + sqlServerConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { + driver: 'sqlserver', + host: 'db.example.test', + port: 14330, + database: 'analytics', + username: 'reader', + trustServerCertificate: false, + readonly: true, + }, + }), + ).toMatchObject({ + server: 'db.example.test', + port: 14330, + database: 'analytics', + user: 'reader', + options: { encrypt: true, trustServerCertificate: false }, + }); + expect(() => + sqlServerConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { driver: 'sqlserver', host: 'db.example.test', database: 'analytics', readonly: false }, + }), + ).toThrow('Native SQL Server connector requires connections.warehouse.readonly: true'); + }); + + it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => { + const connector = new KloSqlServerScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'sqlserver', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + schema: 'dbo', + readonly: true, + }, + poolFactory: fakePoolFactory(), + now: () => new Date('2026-04-29T16:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'sqlserver' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'sqlserver', + extractedAt: '2026-04-29T16:00:00.000Z', + scope: { catalogs: ['analytics'], schemas: ['dbo'] }, + metadata: { + database: 'analytics', + host: 'db.example.test', + schemas: ['dbo'], + table_count: 3, + total_columns: 6, + }, + }); + expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([ + ['customers', 'table', 2, 'Customer table'], + ['orders', 'table', 2, null], + ['order_summary', 'view', null, null], + ]); + expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({ + name: 'id', + nativeType: 'int', + normalizedType: 'int', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'PK', + }); + expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([ + { + fromColumn: 'customer_id', + toCatalog: 'analytics', + toDb: 'dbo', + toTable: 'customers', + toColumn: 'id', + constraintName: 'orders_customer_id_fk', + }, + ]); + }); + + it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => { + const poolFactory = fakePoolFactory(); + const connector = new KloSqlServerScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'sqlserver', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + schema: 'dbo', + readonly: true, + }, + poolFactory, + }); + + await expect( + connector.sampleTable( + { + connectionId: 'warehouse', + table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, + columns: ['id', 'status'], + limit: 1, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ + headers: ['id', 'status'], + headerTypes: ['nvarchar', 'nvarchar'], + rows: [[10, 'paid']], + totalRows: 1, + }); + + await expect( + connector.sampleColumn( + { connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null }); + + await expect( + connector.getColumnDistinctValues( + { catalog: 'analytics', db: 'dbo', name: 'orders' }, + 'status', + { maxCardinality: 5, limit: 10, sampleSize: 100 }, + ), + ).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 }); + + await expect( + connector.executeReadOnly( + { connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 }, + { runId: 'scan-run-1' }, + ), + ).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 }); + + await expect( + connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + + await expect(connector.getTableRowCount('orders')).resolves.toBe(2); + await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']); + await expect( + connector.columnStats( + { connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' }, + { runId: 'scan-run-1' }, + ), + ).resolves.toBeNull(); + + await connector.cleanup(); + }); + + it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => { + const introspection = createSqlServerLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'sqlserver', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + schema: 'dbo', + readonly: true, + }, + }, + poolFactory: fakePoolFactory(), + now: () => new Date('2026-04-29T16:00:00.000Z'), + }); + + const snapshot = await introspection.extractSchema('warehouse'); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + extractedAt: '2026-04-29T16:00:00.000Z', + }); + expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({ + name: 'customers', + catalog: 'analytics', + db: 'dbo', + columns: [ + { + name: 'id', + nativeType: 'int', + normalizedType: 'int', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'PK', + }, + { + name: 'name', + nativeType: 'nvarchar', + normalizedType: 'nvarchar', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [], + }); + }); +}); diff --git a/packages/connector-sqlserver/src/connector.ts b/packages/connector-sqlserver/src/connector.ts new file mode 100644 index 00000000..ce5c491b --- /dev/null +++ b/packages/connector-sqlserver/src/connector.ts @@ -0,0 +1,701 @@ +import { assertReadOnlySql } from '@klo/context/connections'; +import { + createKloConnectorCapabilities, + type KloColumnSampleInput, + type KloColumnSampleResult, + type KloColumnStatsInput, + type KloColumnStatsResult, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaColumn, + type KloSchemaForeignKey, + type KloSchemaSnapshot, + type KloSchemaTable, + type KloTableRef, + type KloTableSampleInput, + type KloTableSampleResult, +} from '@klo/context/scan'; +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import sql from 'mssql'; +import { KloSqlServerDialect } from './dialect.js'; + +export interface KloSqlServerConnectionConfig { + driver?: string; + host?: string; + port?: number; + database?: string; + username?: string; + user?: string; + password?: string; + url?: string; + schema?: string; + schemas?: string[]; + trustServerCertificate?: boolean; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloSqlServerPoolConfig { + server: string; + port: number; + database: string; + user: string; + password?: string; + options: { encrypt: true; trustServerCertificate: boolean }; + pool: { max: number; min: number; idleTimeoutMillis: number }; +} + +export interface KloSqlServerQueryResult { + recordset?: Array> & { columns?: Record }; +} + +interface KloSqlServerRequest { + input(name: string, value: unknown): KloSqlServerRequest; + query(query: string): Promise; +} + +export interface KloSqlServerPool { + request(): KloSqlServerRequest; + close(): Promise; +} + +export interface KloSqlServerPoolFactory { + createPool(config: KloSqlServerPoolConfig): Promise; +} + +interface KloSqlServerResolvedEndpoint { + host: string; + port: number; + close?: () => Promise; +} + +export interface KloSqlServerEndpointResolver { + resolve(input: { + host: string; + port: number; + connection: KloSqlServerConnectionConfig; + }): Promise; +} + +export interface KloSqlServerScanConnectorOptions { + connectionId: string; + connection: KloSqlServerConnectionConfig | undefined; + poolFactory?: KloSqlServerPoolFactory; + endpointResolver?: KloSqlServerEndpointResolver; + env?: NodeJS.ProcessEnv; + now?: () => Date; +} + +export interface KloSqlServerReadOnlyQueryInput extends KloReadOnlyQueryInput { + params?: Record; +} + +export interface KloSqlServerColumnDistinctValuesOptions { + maxCardinality: number; + limit: number; + sampleSize?: number; +} + +export interface KloSqlServerColumnDistinctValuesResult { + values: string[] | null; + cardinality: number; +} + +interface KloSqlServerTableSampleResult extends KloTableSampleResult { + headerTypes?: string[]; +} + +function sqlTypeDeclaration(type: unknown): string { + if (typeof type === 'function') { + try { + return sqlTypeDeclaration(type()); + } catch { + return 'unknown'; + } + } + if (typeof type === 'object' && type !== null && 'declaration' in type) { + const declaration = (type as { declaration?: unknown }).declaration; + return typeof declaration === 'string' ? declaration : 'unknown'; + } + return 'unknown'; +} + +function sqlRecordset( + rows: Array> | undefined, + columns: Record | undefined, +): NonNullable { + const recordset = [...(rows ?? [])] as NonNullable; + recordset.columns = Object.fromEntries( + Object.entries(columns ?? {}).map(([name, metadata]) => [ + name, + { type: { declaration: sqlTypeDeclaration(metadata.type) } }, + ]), + ); + return recordset; +} + +class DefaultSqlServerPoolFactory implements KloSqlServerPoolFactory { + async createPool(config: KloSqlServerPoolConfig): Promise { + const pool = await new sql.ConnectionPool(config as sql.config).connect(); + return { + request() { + const request = pool.request(); + return { + input(name: string, value: unknown) { + request.input(name, value); + return this; + }, + async query(query: string) { + const result = await request.query(query); + return { + recordset: sqlRecordset(result.recordset as Array> | undefined, result.recordset?.columns), + }; + }, + }; + }, + close: () => pool.close(), + }; + } +} + +function stringConfigValue( + connection: KloSqlServerConnectionConfig | undefined, + key: keyof KloSqlServerConnectionConfig, + env: NodeJS.ProcessEnv, +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; +} + +function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + return env[value.slice('env:'.length)] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function parseSqlServerUrl(url: string): Partial { + const parsed = new URL(url); + return { + host: parsed.hostname, + port: parsed.port ? Number(parsed.port) : undefined, + database: parsed.pathname.replace(/^\/+/, '') || undefined, + username: parsed.username ? decodeURIComponent(parsed.username) : undefined, + password: parsed.password ? decodeURIComponent(parsed.password) : undefined, + trustServerCertificate: parsed.searchParams.get('trustServerCertificate') === 'true', + }; +} + +function maybeNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function schemaNames(connection: KloSqlServerConnectionConfig, env: NodeJS.ProcessEnv): string[] { + if (Array.isArray(connection.schemas) && connection.schemas.length > 0) { + return connection.schemas.filter((schema) => schema.trim().length > 0).map((schema) => resolveStringReference(schema, env)); + } + return [stringConfigValue(connection, 'schema', env) ?? 'dbo']; +} + +function groupByTable(rows: T[]): Map { + const grouped = new Map(); + for (const row of rows) { + const values = grouped.get(row.table_name) ?? []; + values.push(row); + grouped.set(row.table_name, values); + } + return grouped; +} + +function firstNumber(value: unknown): number | null { + const numberValue = Number(value); + return Number.isFinite(numberValue) ? numberValue : null; +} + +function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string { + const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, ''); + if (!maxRows) { + return trimmed; + } + if (!Number.isInteger(maxRows) || maxRows <= 0) { + throw new Error('maxRows must be a positive integer.'); + } + return `SELECT TOP ${maxRows} * FROM (${trimmed}) AS klo_query_result`; +} + +export function isKloSqlServerConnectionConfig(connection: KloSqlServerConnectionConfig | undefined): boolean { + return String(connection?.driver ?? '').toLowerCase() === 'sqlserver'; +} + +export function sqlServerConnectionPoolConfigFromConfig(input: { + connectionId: string; + connection: KloSqlServerConnectionConfig | undefined; + env?: NodeJS.ProcessEnv; +}): KloSqlServerPoolConfig { + if (!isKloSqlServerConnectionConfig(input.connection)) { + throw new Error(`Native SQL Server connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.readonly: true`); + } + + const env = input.env ?? process.env; + const referencedUrl = stringConfigValue(input.connection, 'url', env); + const urlConfig = referencedUrl ? parseSqlServerUrl(referencedUrl) : {}; + const merged: KloSqlServerConnectionConfig = { ...urlConfig, ...input.connection }; + const server = stringConfigValue(merged, 'host', env); + const database = stringConfigValue(merged, 'database', env); + const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env); + + if (!server) { + throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.host or url`); + } + if (!database) { + throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.database or url`); + } + if (!user) { + throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.username, user, or url`); + } + + return { + server, + port: maybeNumber(merged.port) ?? 1433, + database, + user, + password: stringConfigValue(merged, 'password', env), + options: { encrypt: true, trustServerCertificate: merged.trustServerCertificate ?? true }, + pool: { max: 10, min: 0, idleTimeoutMillis: 30000 }, + }; +} + +export class KloSqlServerScanConnector implements KloScanConnector { + readonly id: string; + readonly driver = 'sqlserver' as const; + readonly capabilities = createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: false, + formalForeignKeys: true, + estimatedRowCounts: true, + }); + + private readonly connectionId: string; + private readonly connection: KloSqlServerConnectionConfig; + private readonly poolConfig: KloSqlServerPoolConfig; + private readonly schemas: string[]; + private readonly poolFactory: KloSqlServerPoolFactory; + private readonly endpointResolver?: KloSqlServerEndpointResolver; + private readonly now: () => Date; + private readonly dialect = new KloSqlServerDialect(); + private pool: KloSqlServerPool | null = null; + private resolvedEndpoint: KloSqlServerResolvedEndpoint | null = null; + + constructor(options: KloSqlServerScanConnectorOptions) { + this.connectionId = options.connectionId; + this.connection = options.connection ?? {}; + const env = options.env ?? process.env; + this.poolConfig = sqlServerConnectionPoolConfigFromConfig({ + connectionId: options.connectionId, + connection: options.connection, + env, + }); + this.schemas = schemaNames(this.connection, env); + this.poolFactory = options.poolFactory ?? new DefaultSqlServerPoolFactory(); + this.endpointResolver = options.endpointResolver; + this.now = options.now ?? (() => new Date()); + this.id = `sqlserver:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + try { + await this.query('SELECT 1'); + return { success: true }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async introspect(input: KloScanInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const tables: KloSchemaTable[] = []; + for (const schemaName of this.schemas) { + tables.push(...(await this.introspectSchema(schemaName))); + } + return { + connectionId: this.connectionId, + driver: 'sqlserver', + extractedAt: this.now().toISOString(), + scope: { catalogs: [this.poolConfig.database], schemas: this.schemas }, + metadata: { + database: this.poolConfig.database, + schemas: this.schemas, + host: this.poolConfig.server, + table_count: tables.length, + total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), + }, + tables, + }; + } + + async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns)); + return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows }; + } + + async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const result = await this.query( + this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit), + ); + const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]); + return { values, nullCount: null, distinctCount: null }; + } + + async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise { + return null; + } + + async executeReadOnly(input: KloSqlServerReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.assertConnection(input.connectionId); + const limitedSql = limitSqlForSqlServerExecution(input.sql, input.maxRows); + const prepared = this.dialect.prepareQuery(limitedSql, input.params); + const result = await this.query(prepared.sql, prepared.params); + return { ...result, rowCount: result.rows.length }; + } + + async getColumnDistinctValues( + table: KloTableRef, + columnName: string, + options: KloSqlServerColumnDistinctValuesOptions, + ): Promise { + const tableName = this.qTableName(table); + const quotedColumn = this.dialect.quoteIdentifier(columnName); + const cardinalityRows = await this.queryRaw<{ cardinality: unknown }>( + this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000), + ); + const cardinality = Number(cardinalityRows[0]?.cardinality); + if (Number.isNaN(cardinality)) { + return null; + } + if (cardinality === 0) { + return { values: [], cardinality: 0 }; + } + if (cardinality > options.maxCardinality) { + return { values: null, cardinality }; + } + const valuesRows = await this.queryRaw<{ val: unknown }>( + this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit), + ); + return { values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality }; + } + + async getTableRowCount(tableName: string, schemaName = this.schemas[0] ?? 'dbo'): Promise { + const rows = await this.queryRaw<{ row_count: unknown }>( + ` + SELECT SUM(p.rows) AS row_count + FROM sys.tables t + INNER JOIN sys.partitions p ON t.object_id = p.object_id + INNER JOIN sys.schemas s ON t.schema_id = s.schema_id + WHERE s.name = @schemaName + AND t.name = @tableName + AND p.index_id IN (0, 1) + `, + { schemaName, tableName }, + ); + return firstNumber(rows[0]?.row_count) ?? 0; + } + + qTableName(table: Pick & Partial>): string { + return this.dialect.formatTableName(table); + } + + quoteIdentifier(identifier: string): string { + return this.dialect.quoteIdentifier(identifier); + } + + async listSchemas(): Promise { + const rows = await this.queryRaw<{ schema_name: string }>(` + SELECT s.name AS schema_name + FROM sys.schemas s + WHERE s.name NOT IN ( + 'INFORMATION_SCHEMA', 'sys', 'guest', + 'db_owner', 'db_accessadmin', 'db_securityadmin', 'db_ddladmin', + 'db_backupoperator', 'db_datareader', 'db_datawriter', + 'db_denydatareader', 'db_denydatawriter' + ) + ORDER BY s.name + `); + return rows.map((row) => row.schema_name); + } + + async cleanup(): Promise { + if (this.pool) { + await this.pool.close(); + this.pool = null; + } + if (this.resolvedEndpoint?.close) { + await this.resolvedEndpoint.close(); + this.resolvedEndpoint = null; + } + } + + private async introspectSchema(schemaName: string): Promise { + const tables = await this.queryRaw<{ table_name: string; table_type: string }>( + ` + SELECT TABLE_NAME AS table_name, TABLE_TYPE AS table_type + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_SCHEMA = @schemaName + AND TABLE_TYPE IN ('BASE TABLE', 'VIEW') + ORDER BY TABLE_NAME + `, + { schemaName }, + ); + const columns = await this.queryRaw<{ + table_name: string; + column_name: string; + data_type: string; + is_nullable: string; + }>( + ` + SELECT TABLE_NAME AS table_name, COLUMN_NAME AS column_name, DATA_TYPE AS data_type, IS_NULLABLE AS is_nullable + FROM INFORMATION_SCHEMA.COLUMNS + WHERE TABLE_SCHEMA = @schemaName + ORDER BY TABLE_NAME, ORDINAL_POSITION + `, + { schemaName }, + ); + const tableComments = await this.tableComments(schemaName); + const columnComments = await this.columnComments(schemaName); + const primaryKeys = await this.primaryKeys(schemaName); + const foreignKeys = await this.foreignKeys(schemaName); + const rowCounts = await this.rowCounts(schemaName); + const columnsByTable = groupByTable(columns); + const foreignKeysByTable = groupByTable(foreignKeys); + + return tables.map((table) => ({ + catalog: this.poolConfig.database, + db: schemaName, + name: table.table_name, + kind: table.table_type === 'VIEW' ? 'view' : 'table', + comment: tableComments.get(table.table_name) ?? null, + estimatedRows: table.table_type === 'VIEW' ? null : rowCounts.get(table.table_name) ?? 0, + columns: (columnsByTable.get(table.table_name) ?? []).map((column) => + this.toSchemaColumn(column, primaryKeys.get(table.table_name) ?? new Set(), columnComments), + ), + foreignKeys: (foreignKeysByTable.get(table.table_name) ?? []).map((row) => this.toSchemaForeignKey(row)), + })); + } + + private async tableComments(schemaName: string): Promise> { + const rows = await this.queryRaw<{ table_name: string; table_comment: string }>( + ` + SELECT o.name AS table_name, CAST(ep.value AS NVARCHAR(MAX)) AS table_comment + FROM sys.objects o + INNER JOIN sys.schemas s ON o.schema_id = s.schema_id + INNER JOIN sys.extended_properties ep ON ep.major_id = o.object_id + AND ep.minor_id = 0 + AND ep.name = 'MS_Description' + WHERE s.name = @schemaName + AND o.type IN ('U', 'V') + `, + { schemaName }, + ); + return new Map(rows.map((row) => [row.table_name, row.table_comment])); + } + + private async columnComments(schemaName: string): Promise> { + const rows = await this.queryRaw<{ table_name: string; column_name: string; column_comment: string }>( + ` + SELECT o.name AS table_name, c.name AS column_name, CAST(ep.value AS NVARCHAR(MAX)) AS column_comment + FROM sys.columns c + INNER JOIN sys.objects o ON c.object_id = o.object_id + INNER JOIN sys.schemas s ON o.schema_id = s.schema_id + INNER JOIN sys.extended_properties ep ON ep.major_id = c.object_id + AND ep.minor_id = c.column_id + AND ep.name = 'MS_Description' + WHERE s.name = @schemaName + AND o.type IN ('U', 'V') + `, + { schemaName }, + ); + return new Map(rows.map((row) => [`${row.table_name}.${row.column_name}`, row.column_comment])); + } + + private async primaryKeys(schemaName: string): Promise>> { + const rows = await this.queryRaw<{ table_name: string; column_name: string }>( + ` + SELECT tc.TABLE_NAME AS table_name, kcu.COLUMN_NAME AS column_name + FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc + JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu + ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME + AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA + WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY' + AND tc.TABLE_SCHEMA = @schemaName + ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION + `, + { schemaName }, + ); + const grouped = new Map>(); + for (const row of rows) { + const columns = grouped.get(row.table_name) ?? new Set(); + columns.add(row.column_name); + grouped.set(row.table_name, columns); + } + return grouped; + } + + private async foreignKeys(schemaName: string): Promise< + Array<{ + table_name: string; + column_name: string; + referenced_table_schema: string; + referenced_table_name: string; + referenced_column_name: string; + constraint_name: string; + }> + > { + return this.queryRaw( + ` + SELECT + fk.TABLE_NAME AS table_name, + fk.COLUMN_NAME AS column_name, + pk.TABLE_SCHEMA AS referenced_table_schema, + pk.TABLE_NAME AS referenced_table_name, + pk.COLUMN_NAME AS referenced_column_name, + fk.CONSTRAINT_NAME AS constraint_name + FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc + JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk + ON fk.CONSTRAINT_CATALOG = rc.CONSTRAINT_CATALOG + AND fk.CONSTRAINT_SCHEMA = rc.CONSTRAINT_SCHEMA + AND fk.CONSTRAINT_NAME = rc.CONSTRAINT_NAME + JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk + ON pk.CONSTRAINT_CATALOG = rc.UNIQUE_CONSTRAINT_CATALOG + AND pk.CONSTRAINT_SCHEMA = rc.UNIQUE_CONSTRAINT_SCHEMA + AND pk.CONSTRAINT_NAME = rc.UNIQUE_CONSTRAINT_NAME + AND pk.ORDINAL_POSITION = fk.ORDINAL_POSITION + WHERE fk.TABLE_SCHEMA = @schemaName + ORDER BY fk.TABLE_NAME, fk.COLUMN_NAME + `, + { schemaName }, + ); + } + + private async rowCounts(schemaName: string): Promise> { + const rows = await this.queryRaw<{ table_name: string; row_count: unknown }>( + ` + SELECT t.name AS table_name, SUM(p.rows) AS row_count + FROM sys.tables t + INNER JOIN sys.partitions p ON t.object_id = p.object_id + INNER JOIN sys.schemas s ON t.schema_id = s.schema_id + WHERE s.name = @schemaName + AND p.index_id IN (0, 1) + GROUP BY t.name + `, + { schemaName }, + ); + return new Map(rows.map((row) => [row.table_name, firstNumber(row.row_count) ?? 0])); + } + + private toSchemaColumn( + column: { table_name: string; column_name: string; data_type: string; is_nullable: string }, + primaryKeys: Set, + comments: Map, + ): KloSchemaColumn { + return { + name: column.column_name, + nativeType: column.data_type, + normalizedType: this.dialect.mapDataType(column.data_type), + dimensionType: this.dialect.mapToDimensionType(column.data_type), + nullable: column.is_nullable === 'YES', + primaryKey: primaryKeys.has(column.column_name), + comment: comments.get(`${column.table_name}.${column.column_name}`) ?? null, + }; + } + + private toSchemaForeignKey(row: { + column_name: string; + referenced_table_schema: string; + referenced_table_name: string; + referenced_column_name: string; + constraint_name: string; + }): KloSchemaForeignKey { + return { + fromColumn: row.column_name, + toCatalog: this.poolConfig.database, + toDb: row.referenced_table_schema, + toTable: row.referenced_table_name, + toColumn: row.referenced_column_name, + constraintName: row.constraint_name || null, + }; + } + + private async poolForQuery(): Promise { + if (!this.pool) { + const config = { ...this.poolConfig }; + if (this.endpointResolver) { + this.resolvedEndpoint = await this.endpointResolver.resolve({ + host: config.server, + port: config.port, + connection: this.connection, + }); + config.server = this.resolvedEndpoint.host; + config.port = this.resolvedEndpoint.port; + } + this.pool = await this.poolFactory.createPool(config); + } + return this.pool; + } + + private async queryRaw>(query: string, params?: Record): Promise { + const pool = await this.poolForQuery(); + const request = pool.request(); + if (params) { + for (const [key, value] of Object.entries(params)) { + request.input(key, value); + } + } + const result = await request.query(query); + return (result.recordset ?? []) as T[]; + } + + private async query(query: string, params?: Record): Promise> { + const pool = await this.poolForQuery(); + const request = pool.request(); + if (params) { + for (const [key, value] of Object.entries(params)) { + request.input(key, value); + } + } + const result = await request.query(assertReadOnlySql(query)); + const recordset = result.recordset ?? []; + const columnMetadata = recordset.columns ?? {}; + const metadataHeaders = Object.keys(columnMetadata); + const headers = metadataHeaders.length > 0 ? metadataHeaders : Object.keys(recordset[0] ?? {}); + const headerTypes = headers.map((header) => columnMetadata[header]?.type?.declaration ?? 'unknown'); + return { + headers, + headerTypes, + rows: recordset.map((row) => headers.map((header) => row[header])), + totalRows: recordset.length, + }; + } + + private assertConnection(connectionId: string): void { + if (connectionId !== this.connectionId) { + throw new Error(`KLO SQL Server connector ${this.id} cannot serve connection ${connectionId}`); + } + } +} diff --git a/packages/connector-sqlserver/src/dialect.test.ts b/packages/connector-sqlserver/src/dialect.test.ts new file mode 100644 index 00000000..64db720c --- /dev/null +++ b/packages/connector-sqlserver/src/dialect.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from 'vitest'; +import { KloSqlServerDialect } from './dialect.js'; + +describe('KloSqlServerDialect', () => { + const dialect = new KloSqlServerDialect(); + + it('quotes identifiers and formats schema-qualified table names', () => { + expect(dialect.quoteIdentifier('events')).toBe('[events]'); + expect(dialect.quoteIdentifier('odd]name')).toBe('[odd]]name]'); + expect(dialect.formatTableName({ catalog: 'warehouse', db: 'dbo', name: 'events' })).toBe('[dbo].[events]'); + expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('[events]'); + }); + + it('maps SQL Server types to KLO dimension types', () => { + expect(dialect.mapToDimensionType('datetime2')).toBe('time'); + expect(dialect.mapToDimensionType('decimal(18, 2)')).toBe('number'); + expect(dialect.mapToDimensionType('bigint')).toBe('number'); + expect(dialect.mapToDimensionType('bit')).toBe('boolean'); + expect(dialect.mapToDimensionType('uniqueidentifier')).toBe('string'); + expect(dialect.mapToDimensionType('')).toBe('string'); + }); + + it('builds sampling, distinct-value, pagination, and time SQL', () => { + expect(dialect.generateSampleQuery('[dbo].[events]', 25, ['id', 'event_name'])).toBe( + 'SELECT TOP 25 [id], [event_name] FROM [dbo].[events]', + ); + expect(dialect.generateColumnSampleQuery('[dbo].[events]', 'event_name', 10)).toBe( + "SELECT TOP 10 [event_name] FROM [dbo].[events] WHERE [event_name] IS NOT NULL AND LTRIM(RTRIM(CAST([event_name] AS NVARCHAR(MAX)))) != ''", + ); + expect(dialect.generateDistinctValuesQuery('[dbo].[events]', '[event_name]', 5)).toContain('SELECT TOP 5 val'); + expect(dialect.getTopClause(10)).toBe('TOP 10'); + expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY'); + expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe( + 'DATEFROMPARTS(YEAR(created_at), MONTH(created_at), 1)', + ); + }); + + it('prepares named parameters using SQL Server @ parameters', () => { + expect( + dialect.prepareQuery('select * from events where id = :id and name = :name', { + id: 10, + name: 'signup', + }), + ).toEqual({ + sql: 'select * from events where id = @id and name = @name', + params: { id: 10, name: 'signup' }, + }); + }); +}); diff --git a/packages/connector-sqlserver/src/dialect.ts b/packages/connector-sqlserver/src/dialect.ts new file mode 100644 index 00000000..19f650b0 --- /dev/null +++ b/packages/connector-sqlserver/src/dialect.ts @@ -0,0 +1,201 @@ +import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan'; + +type SqlServerTableNameRef = Pick & Partial>; + +export class KloSqlServerDialect { + readonly type = 'sqlserver'; + + private readonly typeMappings: Record = { + datetime: 'time', + datetime2: 'time', + date: 'time', + time: 'time', + datetimeoffset: 'time', + smalldatetime: 'time', + timestamp: 'time', + int: 'number', + bigint: 'number', + smallint: 'number', + tinyint: 'number', + decimal: 'number', + numeric: 'number', + float: 'number', + real: 'number', + money: 'number', + smallmoney: 'number', + varchar: 'string', + nvarchar: 'string', + char: 'string', + nchar: 'string', + text: 'string', + ntext: 'string', + uniqueidentifier: 'string', + xml: 'string', + bit: 'boolean', + }; + + quoteIdentifier(identifier: string): string { + return `[${identifier.replace(/\]/g, ']]')}]`; + } + + formatTableName(table: SqlServerTableNameRef): string { + return table.db + ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` + : this.quoteIdentifier(table.name); + } + + mapDataType(nativeType: string): string { + return nativeType; + } + + mapToDimensionType(nativeType: string): KloSchemaDimensionType { + if (!nativeType) { + return 'string'; + } + const lower = nativeType.toLowerCase().trim(); + const normalized = lower.includes('(') ? lower.split('(')[0]! : lower; + if (this.typeMappings[normalized]) { + return this.typeMappings[normalized]; + } + if (normalized.includes('time') || normalized.includes('date')) { + return 'time'; + } + if ( + normalized.includes('int') || + normalized.includes('num') || + normalized.includes('dec') || + normalized.includes('float') || + normalized.includes('money') + ) { + return 'number'; + } + if (normalized.includes('bit')) { + return 'boolean'; + } + return 'string'; + } + + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string { + const columnList = + columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*'; + return `SELECT TOP ${limit} ${columnList} FROM ${tableName}`; + } + + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string { + const quotedColumn = this.quoteIdentifier(columnName); + return `SELECT TOP ${limit} ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND LTRIM(RTRIM(CAST(${quotedColumn} AS NVARCHAR(MAX)))) != ''`; + } + + prepareQuery(sql: string, params?: Record): { sql: string; params?: Record } { + if (!params) { + return { sql, params: undefined }; + } + let parameterizedQuery = sql; + for (const key of Object.keys(params)) { + parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`); + } + return { sql: parameterizedQuery, params }; + } + + getRandomSampleFilter(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `ABS(CHECKSUM(NEWID())) % 100 < ${Math.round(samplePct * 100)}`; + } + + getTableSampleClause(samplePct: number): string { + if (samplePct <= 0 || samplePct >= 1) { + return ''; + } + return `TABLESAMPLE (${samplePct * 100} PERCENT)`; + } + + getLimitOffsetClause(limit: number, offset?: number): string { + return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : ''; + } + + getTopClause(limit: number): string { + return `TOP ${limit}`; + } + + getNullCountExpression(column: string): string { + return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`; + } + + getDistinctCountExpression(column: string): string { + return `COUNT(DISTINCT ${column})`; + } + + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT TOP ${sampleSize} ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string { + return ` + SELECT TOP ${limit} val + FROM ( + SELECT DISTINCT CAST(${columnName} AS NVARCHAR(MAX)) AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ) AS distinct_vals + ORDER BY val + `; + } + + generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null { + return null; + } + + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { + return ` + WITH sampled AS ( + SELECT TOP ${sampleSize} ${columnName} AS val + FROM ${tableName} + WHERE ${columnName} IS NOT NULL + ORDER BY NEWID() + ) + SELECT COUNT(DISTINCT val) AS cardinality + FROM sampled + `; + } + + getTimeTruncExpression( + column: string, + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', + timezone?: string, + ): string { + const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column; + switch (granularity) { + case 'day': + return `CAST(${col} AS DATE)`; + case 'week': + return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`; + case 'month': + return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`; + case 'quarter': + return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`; + case 'year': + return `DATEFROMPARTS(YEAR(${col}), 1, 1)`; + } + } + + getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { + const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column; + const [amount, unit] = interval.split(' '); + const originExpr = origin ? `'${origin}'` : `'1970-01-01'`; + return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`; + } + + parseIntervalToSql(interval: string): string { + return `'${interval}'`; + } +} diff --git a/packages/connector-sqlserver/src/index.ts b/packages/connector-sqlserver/src/index.ts new file mode 100644 index 00000000..e7db465c --- /dev/null +++ b/packages/connector-sqlserver/src/index.ts @@ -0,0 +1,17 @@ +export { KloSqlServerDialect } from './dialect.js'; +export { + isKloSqlServerConnectionConfig, + KloSqlServerScanConnector, + sqlServerConnectionPoolConfigFromConfig, + type KloSqlServerColumnDistinctValuesOptions, + type KloSqlServerColumnDistinctValuesResult, + type KloSqlServerConnectionConfig, + type KloSqlServerEndpointResolver, + type KloSqlServerPool, + type KloSqlServerPoolConfig, + type KloSqlServerPoolFactory, + type KloSqlServerQueryResult, + type KloSqlServerReadOnlyQueryInput, + type KloSqlServerScanConnectorOptions, +} from './connector.js'; +export { createSqlServerLiveDatabaseIntrospection } from './live-database-introspection.js'; diff --git a/packages/connector-sqlserver/src/live-database-introspection.ts b/packages/connector-sqlserver/src/live-database-introspection.ts new file mode 100644 index 00000000..d74100b2 --- /dev/null +++ b/packages/connector-sqlserver/src/live-database-introspection.ts @@ -0,0 +1,40 @@ +import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest'; +import type { KloProjectConnectionConfig } from '@klo/context/project'; +import { + KloSqlServerScanConnector, + type KloSqlServerConnectionConfig, + type KloSqlServerEndpointResolver, + type KloSqlServerPoolFactory, +} from './connector.js'; + +interface CreateSqlServerLiveDatabaseIntrospectionOptions { + connections: Record; + poolFactory?: KloSqlServerPoolFactory; + endpointResolver?: KloSqlServerEndpointResolver; + now?: () => Date; +} + +export function createSqlServerLiveDatabaseIntrospection( + options: CreateSqlServerLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + return { + async extractSchema(connectionId: string) { + const connection = options.connections[connectionId] as KloSqlServerConnectionConfig | undefined; + const connector = new KloSqlServerScanConnector({ + connectionId, + connection, + poolFactory: options.poolFactory, + endpointResolver: options.endpointResolver, + now: options.now, + }); + try { + return await connector.introspect( + { connectionId, driver: 'sqlserver' }, + { runId: `sqlserver-${connectionId}` }, + ); + } finally { + await connector.cleanup(); + } + }, + }; +} diff --git a/packages/connector-sqlserver/src/package-exports.test.ts b/packages/connector-sqlserver/src/package-exports.test.ts new file mode 100644 index 00000000..72d2cbbc --- /dev/null +++ b/packages/connector-sqlserver/src/package-exports.test.ts @@ -0,0 +1,12 @@ +import { describe, expect, it } from 'vitest'; + +describe('@klo/connector-sqlserver package exports', () => { + it('exports public connector APIs during package bootstrap', async () => { + const connector = await import('./index.js'); + + expect(connector.KloSqlServerDialect).toBeTypeOf('function'); + expect(connector.KloSqlServerScanConnector).toBeTypeOf('function'); + expect(connector.createSqlServerLiveDatabaseIntrospection).toBeTypeOf('function'); + expect(connector.sqlServerConnectionPoolConfigFromConfig).toBeTypeOf('function'); + }); +}); diff --git a/packages/connector-sqlserver/tsconfig.json b/packages/connector-sqlserver/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-sqlserver/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/context/package.json b/packages/context/package.json new file mode 100644 index 00000000..a4866027 --- /dev/null +++ b/packages/context/package.json @@ -0,0 +1,166 @@ +{ + "name": "@klo/context", + "version": "0.0.0-private", + "description": "Core context library for database agents", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./agent": { + "types": "./dist/agent/index.d.ts", + "import": "./dist/agent/index.js", + "default": "./dist/agent/index.js" + }, + "./core": { + "types": "./dist/core/index.d.ts", + "import": "./dist/core/index.js", + "default": "./dist/core/index.js" + }, + "./connections": { + "types": "./dist/connections/index.d.ts", + "import": "./dist/connections/index.js", + "default": "./dist/connections/index.js" + }, + "./daemon": { + "types": "./dist/daemon/index.d.ts", + "import": "./dist/daemon/index.js", + "default": "./dist/daemon/index.js" + }, + "./ingest": { + "types": "./dist/ingest/index.d.ts", + "import": "./dist/ingest/index.js", + "default": "./dist/ingest/index.js" + }, + "./ingest/memory-flow": { + "types": "./dist/ingest/memory-flow/index.d.ts", + "import": "./dist/ingest/memory-flow/index.js", + "default": "./dist/ingest/memory-flow/index.js" + }, + "./ingest/metabase-mapping": { + "types": "./dist/ingest/metabase-mapping.d.ts", + "import": "./dist/ingest/metabase-mapping.js", + "default": "./dist/ingest/metabase-mapping.js" + }, + "./scan": { + "types": "./dist/scan/index.d.ts", + "import": "./dist/scan/index.js", + "default": "./dist/scan/index.js" + }, + "./search": { + "types": "./dist/search/index.d.ts", + "import": "./dist/search/index.js", + "default": "./dist/search/index.js" + }, + "./sql-analysis": { + "types": "./dist/sql-analysis/index.d.ts", + "import": "./dist/sql-analysis/index.js", + "default": "./dist/sql-analysis/index.js" + }, + "./memory": { + "types": "./dist/memory/index.d.ts", + "import": "./dist/memory/index.js", + "default": "./dist/memory/index.js" + }, + "./mcp": { + "types": "./dist/mcp/index.d.ts", + "import": "./dist/mcp/index.js", + "default": "./dist/mcp/index.js" + }, + "./project": { + "types": "./dist/project/index.d.ts", + "import": "./dist/project/index.js", + "default": "./dist/project/index.js" + }, + "./prompts": { + "types": "./dist/prompts/index.d.ts", + "import": "./dist/prompts/index.js", + "default": "./dist/prompts/index.js" + }, + "./skills": { + "types": "./dist/skills/index.d.ts", + "import": "./dist/skills/index.js", + "default": "./dist/skills/index.js" + }, + "./sl": { + "types": "./dist/sl/index.d.ts", + "import": "./dist/sl/index.js", + "default": "./dist/sl/index.js" + }, + "./sl/descriptions": { + "types": "./dist/sl/descriptions.d.ts", + "import": "./dist/sl/descriptions.js", + "default": "./dist/sl/descriptions.js" + }, + "./tools": { + "types": "./dist/tools/index.d.ts", + "import": "./dist/tools/index.js", + "default": "./dist/tools/index.js" + }, + "./wiki": { + "types": "./dist/wiki/index.d.ts", + "import": "./dist/wiki/index.js", + "default": "./dist/wiki/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist", + "prompts", + "skills" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "relationships:benchmarks": "pnpm --silent run build && node scripts/relationship-benchmark-report.mjs", + "search:pglite-spike": "node scripts/pglite-hybrid-search-spike.mjs", + "search:pglite-owner-prototype": "node scripts/pglite-owner-process-prototype.mjs", + "search:pglite-sl-prototype": "node scripts/pglite-sl-search-prototype.mjs", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@klo/llm": "workspace:*", + "@looker/sdk": "^26.6.1", + "@looker/sdk-node": "^26.6.1", + "@looker/sdk-rtl": "^21.6.5", + "@modelcontextprotocol/sdk": "^1.27.1", + "@notionhq/client": "^5.20.0", + "ai": "^6.0.168", + "better-sqlite3": "^12.6.2", + "handlebars": "^4.7.8", + "lookml-parser": "7.1.0", + "minimatch": "^10.2.4", + "p-limit": "^7.3.0", + "pg": "^8.19.0", + "simple-git": "3.32.2", + "yaml": "^2.8.2", + "zod": "^4.1.13" + }, + "devDependencies": { + "@electric-sql/pglite": "^0.4.5", + "@electric-sql/pglite-socket": "^0.1.5", + "@types/better-sqlite3": "^7.6.13", + "@types/node": "^24.3.0", + "@types/pg": "^8.16.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/context" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/context/prompts/memory_agent_backfill.md b/packages/context/prompts/memory_agent_backfill.md new file mode 100644 index 00000000..ee0f7ed4 --- /dev/null +++ b/packages/context/prompts/memory_agent_backfill.md @@ -0,0 +1,21 @@ + +You are backfilling knowledge from a historical chat transcript or archived SQL review. The content has already been researched by another user or process; you're running offline to extract what is durable enough to persist. + + + +Moderately conservative. Historical content is not directly steering current work, so spurious captures will surface in future chats and annoy users. But genuine patterns are worth saving — these backfills exist because the content is known to contain value. + +Capture only when the signal is unambiguous: a metric definition stated plainly, a reusable SQL pattern, a documented correction, a durable business rule. Skip casual chatter and ambiguous interpretations. + + + +1. Read the wiki and SL indexes to avoid creating duplicates. +2. If the content has wiki-style signal, load the `knowledge_capture` skill and follow its workflow. +3. If the content has SL-style signal, load the `sl` skill and follow its Part 3 workflow. +4. Prefer updating existing entries over creating new ones — backfills often duplicate existing knowledge. +5. When done, exit the loop. + + + +Wiki writes follow the session's scope selection (USER for user-scoped enabled, GLOBAL otherwise). The `wiki_write` tool picks automatically — focus on capture judgment. + diff --git a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md new file mode 100644 index 00000000..33c9709d --- /dev/null +++ b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md @@ -0,0 +1,27 @@ + +You are the reconciliation agent for a multi-file ingest bundle. Stage 3 WorkUnits have already run against this job's session worktree; your input is the deterministic Stage Index listing every write each WU made, plus an Eviction Set listing raw files present in the prior sync but absent in this one. Your job is to (a) decide what happens to each evicted artifact (remove vs retain with a deprecation marker), (b) sweep the Stage Index for any cross-WU conflicts the individual WUs missed, and (c) emit conflict + eviction records that the runner will fold into the final IngestReport. + + + +Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts they saw. Your sweep is the safety net for contradictions that are only visible when you can see the whole job at once — e.g. two WUs that each looked clean in isolation but collectively form a near-duplicate cluster. Do not redo work Stage 3 already did. + + + +1. Load `ingest_triage`, then `sl_capture` + `knowledge_capture`. +2. Call `stage_list()` for the full index of this job's writes. If it is empty AND you have no evictions, exit — the runner short-circuits this case but the skill still teaches you to bail fast. +3. If the system prompt includes ``, apply those pins before flagging a same-name or near-duplicate conflict. A pinned `canonicalArtifactKey` keeps the contested name when it is present in the Stage Index; competing variants keep or receive disambiguated names. +4. For each pair of WUs that wrote overlapping SL source names or wiki keys, call `stage_diff` to see the actual difference. If they're the same content, leave it. If they differ per `ingest_triage` rules, apply the correct resolution (rename + capture; election of canonical; silent replace for expression-only re-ingest change; or pinned canonical), then call `emit_conflict_resolution` with the artifact key and decision. +5. Call `eviction_list()` for deleted raw paths. For each eviction: if inbound refs are empty, remove the artifact (`sl_delete`, `wiki_remove`); if inbound refs exist, retain with a deprecation marker. Then call `emit_eviction_decision` for every removed or retained artifact. +6. If the Stage 4 sweep discovers a raw file whose only honest outcome is standalone SQL, wiki-only capture, or a human flag, call `emit_unmapped_fallback` with the raw path, reason, and fallback kind. +7. Use `read_raw_span` to zoom into specific raw files when you need to resolve what two contested measures actually compute. +8. Exit when you've processed every item. + + + +All wiki writes are GLOBAL (same as Stage 3). SL writes target the same session worktree Stage 3 used. + + + +- Do not overwrite a Stage 3 WU's resolution that already matches `ingest_triage` output — that's churn. +- Do not treat two SL sources with the same logical meaning but legitimately different domains (e.g. `finance.revenue` and `marketing.revenue`) as a conflict — that's by design. + diff --git a/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md new file mode 100644 index 00000000..2d296f5a --- /dev/null +++ b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md @@ -0,0 +1,28 @@ + +You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, or similar) and you must translate that slice into KLO semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and searchable via `wiki_sl_search`. + + + +Assertive. The bundle was explicitly submitted for ingest. Default to capturing everything the raw files declare that maps cleanly to KLO: one SL source per table/view, one wiki page per non-obvious business rule or alias. Do not abandon a WorkUnit because "some content overlaps with another WU"; use `ingest_triage` to reconcile, do not skip. + + + +1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files. +2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `knowledge_capture`, and `ingest_triage` last. The triage skill tells you how to react when `wiki_sl_search` reveals that a prior WU already wrote something overlapping. +3. If the system prompt includes ``, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain. +4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `wiki_sl_search` for each candidate name to find prior-WU writes; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip. +5. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag. +6. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written. +7. When you're done, exit the loop without further tool calls. + + + +All wiki writes go to the GLOBAL scope. Bundle ingests are not personal. The `wiki_write` tool selects scope automatically for this caller. + + + +- Do not read peer files; only files listed in `rawFiles` or `dependencyPaths` are accessible. `read_raw_file` will reject everything else. +- Do not invent measures/joins/rules not declared in the raw files. +- Do not duplicate an artifact that prior provenance says you already produced; update it. +- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`. + diff --git a/packages/context/prompts/memory_agent_external_ingest.md b/packages/context/prompts/memory_agent_external_ingest.md new file mode 100644 index 00000000..9a5ecc39 --- /dev/null +++ b/packages/context/prompts/memory_agent_external_ingest.md @@ -0,0 +1,28 @@ + +You are ingesting an external technical artifact (a LookML view, dbt model, schema description, business glossary, or other reference document) into KLO organizational memory. The user has explicitly submitted this content for bulk ingest. Assume it is intentional and worth capturing. + + + +Assertive. Unlike a chat turn, this content was deliberately submitted. Default to capturing. Err on the side of creating an SL source for every declared table/view and a wiki page for every non-obvious business rule, alias, or definition you find in the artifact. + +A single artifact typically produces multiple actions: one SL source per table/view, additional measures or joins per metric, and one wiki page per alias or convention. + + + +1. Review the wiki and SL indexes in the prompt. Prefer updating existing entries over creating duplicates. +2. Load the `sl` skill for SL-writes and `knowledge_capture` for wiki-writes. Both skills describe schema, decision rules, and editing patterns — follow them. +3. For each distinct element in the artifact (table/view, measure, dimension group, derived column, computed filter, business rule, alias): decide whether it belongs in the SL, in the wiki, or both. +4. Write SL sources first (so they have stable names), then wiki pages that reference them via `sl_refs`. +5. When the artifact mixes data definitions with business rules, capture BOTH — one in each store, linked. +6. When you're done, exit the loop without calling any more tools. + + + +All wiki writes go to the GLOBAL scope — they will be visible to every user of this KLO project. Phrase wiki pages as objective business knowledge, not personal preference. The `wiki_write` tool handles scope selection automatically for external ingest. + + + +- Do not fabricate measures, joins, or rules that aren't in the artifact. +- Do not invent column names. If a type is unclear, omit it rather than guess. +- Do not mirror presentation hints (LookML `link:`, `map_layer_name:`, HTML formatting) into SL — those belong in wiki if anywhere. + diff --git a/packages/context/prompts/memory_agent_research.md b/packages/context/prompts/memory_agent_research.md new file mode 100644 index 00000000..f8a59a79 --- /dev/null +++ b/packages/context/prompts/memory_agent_research.md @@ -0,0 +1,30 @@ + +You capture durable knowledge from an analytics assistant's chat turn. The user just asked a question, the assistant answered, and you are running after the turn to decide what — if anything — is worth saving for future chats. + + + +Save the durable parts of a turn: +- A definition the user just stated or refined ("by X I mean…", "going forward, exclude Y", "treat Z as…"). +- A reusable SQL pattern the assistant derived (aggregate metric, derived view, multi-table join). +- A new join path between two existing SL sources. +- A computed dimension or named segment that would be useful in later queries. +- An organizational convention or alias the user surfaced. + +Skip: +- Pure clarifications and one-off lookups with no reusable structure. +- Trivial COUNT(*) / SELECT preview queries with no business filter. +- Restatements of patterns already captured (cite the existing entry instead). + + + +1. Read the wiki index and the SL sources index in the prompt below. +2. Identify durable knowledge OR reusable data patterns in the turn. +3. If the turn has wiki-style signal (preferences, definitions, conventions), load the `knowledge_capture` skill and follow its workflow. +4. If the turn has SL-style signal (reusable metric aggregations, new joins, derived dimensions), load the `sl` skill and follow its Part 3 (capture) workflow. +5. A single turn can produce BOTH a wiki page and an SL source — load both skills and author the edge once on the wiki via `sl_refs: [source_name]`. The reverse edge (wiki pages that cite the SL source) is derived by the reconciler; do not set `knowledge_refs:` on the SL side. +6. When you're done, exit the loop without calling any more tools. Do NOT emit a final text summary. + + + +Wiki writes go to the GLOBAL scope by default. Phrase as objective business knowledge, not personal preference. (Users who want personal-scoped knowledge can opt in by toggling `userScopedKnowledgeEnabled` in app settings; when enabled, `wiki_write` will route to USER scope automatically.) + diff --git a/packages/context/prompts/skills/light_extraction.md b/packages/context/prompts/skills/light_extraction.md new file mode 100644 index 00000000..5d62f5a4 --- /dev/null +++ b/packages/context/prompts/skills/light_extraction.md @@ -0,0 +1,40 @@ +# Light Context Extraction + +Extract up to the configured maximum number of durable knowledge candidates from one short evidence page. + +Capture only durable, reusable company knowledge: + +- definitions +- business rules +- policies +- workflows and processes +- source-of-truth conventions +- aliases and glossary terms +- customer or product assumptions that affect future analysis + +Skip meeting minutiae, raw task lists, project status updates, brainstorms without durable decisions, duplicate facts, transient announcements, and page summaries. + +Each candidate must cite at least one chunk id from the supplied chunk list. Return only JSON with this shape: + +```json +{ + "candidates": [ + { + "candidateKey": "stable-kebab-key", + "topic": "Topic name", + "assertion": "One durable assertion.", + "rationale": "Why the evidence supports this candidate.", + "evidenceChunkIds": ["00000000-0000-0000-0000-000000000000"], + "suggestedPageKey": "stable-page-key", + "actionHint": "create", + "durabilityScore": 3, + "authorityScore": 2, + "reuseScore": 3, + "noveltyScore": 2, + "riskScore": 0 + } + ] +} +``` + +Score fields are integers from 0 to 3. `actionHint` must be one of `create`, `update`, `merge`, `conflict`, or `skip`. diff --git a/packages/context/prompts/skills/page_triage_classifier.md b/packages/context/prompts/skills/page_triage_classifier.md new file mode 100644 index 00000000..c449b312 --- /dev/null +++ b/packages/context/prompts/skills/page_triage_classifier.md @@ -0,0 +1,102 @@ +# Page Triage Classifier + +Classify one staged evidence page into exactly one lane: + +- `skip` - the page is indexed evidence, but it is transient, repetitive, task-like, date-titled status reporting, or too weak to produce durable knowledge candidates. +- `light` - the page is short and contains one to three durable facts, reusable templates, scripts, playbooks, personas, or messaging frameworks that can be extracted in one pass without tool use. +- `full` - the page has substantial structure, several candidate topics, cross-page context, conflicts, source-of-truth nuance, or enough ambiguity to require the full WorkUnit agent. + +Use the page excerpt and structural signals as evidence. Structural signals can influence the decision but cannot replace reading the excerpt. + +Reusable templates and scripts are durable knowledge regardless of subject matter. Sales, marketing, customer-success, and operations pages are not transient merely because they contain messaging copy, outreach scripts, positioning notes, personas, or campaign language. Date-titled standups are still skip; named templates and scripts are not. + +Analytics evidence (BI tools like Looker, Metabase, Tableau) is durable knowledge of *how the organization defines its metrics and segments*. The `signals.objectType` tells you what you are looking at: + +- `looker_explore` (or any explore-like analytics surface) -> `full` by default. Explores enumerate dimensions, measures, and joins — these are the canonical schema-of-the-business and warrant the full WorkUnit agent so each measure can become a candidate. Skip only if the excerpt is empty or contains zero measures and zero descriptive text. +- `looker_dashboard` (or any named dashboard with tile queries, filters, calculated fields) -> `full` when it has multiple tiles or named metrics, `light` when one or two tiles with trivial fields, `skip` only when usage hints make it clear it is unused (e.g. `queryCount30d` and `uniqueUsers30d` are both zero) AND there are no calculated fields, filters, or named tiles worth extracting. +- `looker_look` (or any saved query) -> `light` when the query is a simple field listing, `full` when it has custom calculations, non-trivial filters, or aggregation expressions, `skip` only when usage is zero AND the query is a default field listing. + +Treat dashboard/Look filter values, saved aggregations, calculated fields, and named tiles as candidate metric/segment definitions — they are durable. Do **not** mark BI evidence as `skip` solely because it is "configuration" or "tied to a data model"; that is exactly the durable knowledge we want to capture. + +Historic SQL query-history evidence is durable when usage signals show a repeated pattern worth memory work. For `signals.objectType === "historic_sql_template"`: + +- If `propertyHints.executions_bucket=low AND distinct_users_bucket=solo`, return `skip`. A one-off query by one user is indexed evidence, but it is too weak to produce durable knowledge candidates. +- Else if `propertyHints.service_account_only=true AND below the frequency floor`, return `light`. Treat `executions_bucket=low` or `distinct_users_bucket=solo` as below the frequency floor for this rule. Service-account-only templates can preserve useful SQL evidence, but should not occupy a full WorkUnit unless other signals show shared human usage. +- Otherwise apply the standard full/light/skip logic to the page excerpt. Favor `full` for shared human usage with mid or high execution volume, especially when `tables_touched`, normalized SQL, and slot classifications define a reusable metric, segment, threshold, or operational query pattern. + +Historic-SQL synthetic signal examples: + +- skip low solo template: + +```json +{ + "objectType": "historic_sql_template", + "propertyHints": { + "executions_bucket": "low", + "distinct_users_bucket": "solo", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "slot_summary": "1 constant, 1 runtime" + } +} +``` + +-> `skip` + +- light service-account-only template: + +```json +{ + "objectType": "historic_sql_template", + "propertyHints": { + "executions_bucket": "high", + "distinct_users_bucket": "solo", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "true", + "slot_summary": "1 constant, 0 runtime" + } +} +``` + +-> `light` + +- full shared human template: + +```json +{ + "objectType": "historic_sql_template", + "propertyHints": { + "executions_bucket": "high", + "distinct_users_bucket": "team", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "slot_summary": "2 constant, 1 runtime" + } +} +``` + +-> `full` + +Examples: + +- `Cold Call Script` with reusable call flow, objection handling, or positioning language -> `light` when short, `full` when multi-section or ambiguous. +- `Updated Messaging For Everything` with reusable positioning or campaign messaging framework -> `light` when short, `full` when it contains several frameworks. +- `Messaging March sprint` with reusable messaging templates or playbook sections -> `light` or `full`. +- `2026-04-30 Daily Standup` containing status updates, blockers, and done/next lists -> `skip`. +- `Sales Pipeline` (looker_explore) listing dimensions and measures across opportunity, account, and contact joins -> `full`. +- `Marketing & Acquisition` (looker_dashboard) with tiles like "Cost per Lead", "MQL to SQL %", and saved filters -> `full`. +- An empty looker_explore stub with zero dimensions and zero measures -> `skip`. + +Return only JSON with this shape: + +```json +{ + "lane": "skip", + "reason": "short reason" +} +``` + +Valid lane values are `skip`, `light`, and `full`. diff --git a/packages/context/scripts/pglite-hybrid-search-spike.mjs b/packages/context/scripts/pglite-hybrid-search-spike.mjs new file mode 100644 index 00000000..98e419f7 --- /dev/null +++ b/packages/context/scripts/pglite-hybrid-search-spike.mjs @@ -0,0 +1,354 @@ +import { readdir, readFile, realpath, rm, stat, writeFile, mkdtemp } from 'node:fs/promises'; +import { createRequire } from 'node:module'; +import { tmpdir } from 'node:os'; +import { dirname, join, relative, resolve } from 'node:path'; +import { performance } from 'node:perf_hooks'; +import { fileURLToPath } from 'node:url'; + +const require = createRequire(import.meta.url); +const scriptDir = dirname(fileURLToPath(import.meta.url)); +const contextDir = resolve(scriptDir, '..'); +const kloRoot = resolve(contextDir, '../..'); +const docsDir = join(kloRoot, 'docs'); +const reportPath = join(docsDir, 'hybrid-search-pglite-spike.md'); + +async function timed(label, fn) { + const started = performance.now(); + const value = await fn(); + const durationMs = Number((performance.now() - started).toFixed(2)); + return { label, durationMs, value }; +} + +async function directoryBytes(path) { + const entry = await stat(path); + if (entry.isFile()) { + return entry.size; + } + + if (!entry.isDirectory()) { + return 0; + } + + const children = await readdir(path); + const childSizes = await Promise.all(children.map((child) => directoryBytes(join(path, child)))); + return childSizes.reduce((sum, size) => sum + size, 0); +} + +async function resolvePackageJson(packageName) { + let currentDir = dirname(require.resolve(packageName)); + + while (currentDir !== dirname(currentDir)) { + const packageJsonPath = join(currentDir, 'package.json'); + + try { + const packageJson = JSON.parse(await readFile(packageJsonPath, 'utf8')); + if (packageJson.name === packageName) { + return { packageJsonPath, packageJson }; + } + } catch (error) { + if (error?.code !== 'ENOENT') { + throw error; + } + } + + currentDir = dirname(currentDir); + } + + throw new Error(`Could not resolve package.json for ${packageName}`); +} + +async function packageInfo(packageName) { + const { packageJsonPath, packageJson } = await resolvePackageJson(packageName); + const packageDir = await realpath(dirname(packageJsonPath)); + return { + name: packageName, + version: packageJson.version, + path: relative(kloRoot, packageDir), + bytes: await directoryBytes(packageDir), + }; +} + +async function createDb(PGlite, vector, pg_trgm, dataDir) { + const db = await PGlite.create({ + dataDir, + extensions: { + vector, + pg_trgm, + }, + }); + + await db.exec(` + CREATE EXTENSION IF NOT EXISTS vector; + CREATE EXTENSION IF NOT EXISTS pg_trgm; + CREATE TABLE IF NOT EXISTS spike_documents ( + id TEXT PRIMARY KEY, + search_text TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + embedding vector(3) NOT NULL + ); + CREATE INDEX IF NOT EXISTS spike_documents_fts_idx + ON spike_documents + USING GIN (to_tsvector('english', search_text)); + CREATE INDEX IF NOT EXISTS spike_documents_vector_idx + ON spike_documents + USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 1); + CREATE TABLE IF NOT EXISTS spike_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + CREATE INDEX IF NOT EXISTS spike_dictionary_values_trgm_idx + ON spike_dictionary_values + USING GIN (value gin_trgm_ops); + `); + + return db; +} + +async function seed(db) { + await db.query( + ` + INSERT INTO spike_documents (id, search_text, metadata, embedding) + VALUES + ($1, $2, $3::jsonb, $4::vector), + ($5, $6, $7::jsonb, $8::vector), + ($9, $10, $11::jsonb, $12::vector) + ON CONFLICT (id) DO UPDATE + SET search_text = EXCLUDED.search_text, + metadata = EXCLUDED.metadata, + embedding = EXCLUDED.embedding + `, + [ + 'warehouse/orders', + 'orders paid revenue refund status customer', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'orders' }), + JSON.stringify([1, 0, 0]), + 'finance/orders', + 'orders finance bookings gross margin', + JSON.stringify({ connectionId: 'finance', sourceName: 'orders' }), + JSON.stringify([0.72, 0.28, 0]), + 'warehouse/customers', + 'customers accounts lifecycle region', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'customers' }), + JSON.stringify([0, 1, 0]), + ], + ); + + await db.query(` + INSERT INTO spike_dictionary_values (connection_id, source_name, column_name, value) + VALUES + ('warehouse', 'orders', 'status', 'refunded'), + ('warehouse', 'orders', 'status', 'paid'), + ('warehouse', 'customers', 'region', 'emea') + ON CONFLICT DO NOTHING + `); +} + +async function closeDb(db) { + if (typeof db.close === 'function') { + await db.close(); + } +} + +async function main() { + const importTimer = await timed('dynamic import @electric-sql/pglite', async () => { + const [{ PGlite }, { vector }, { pg_trgm }] = await Promise.all([ + import('@electric-sql/pglite'), + import('@electric-sql/pglite/vector'), + import('@electric-sql/pglite/contrib/pg_trgm'), + ]); + return { PGlite, vector, pg_trgm }; + }); + + const { PGlite, vector, pg_trgm } = importTimer.value; + const tempDir = await mkdtemp(join(tmpdir(), 'klo-pglite-report-')); + const dataDir = join(tempDir, 'pgdata'); + + let db; + let reopened; + + try { + const createTimer = await timed('create persistent PGlite database and load extensions', async () => { + db = await createDb(PGlite, vector, pg_trgm, dataDir); + return true; + }); + + const seedTimer = await timed('seed hybrid search fixture', async () => seed(db)); + + const ftsTimer = await timed('Postgres FTS query', () => + db.query( + ` + SELECT id + FROM spike_documents + WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1) + ORDER BY ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) DESC, id ASC + LIMIT 1 + `, + ['paid orders'], + ), + ); + + const vectorTimer = await timed('pgvector cosine query', () => + db.query( + ` + SELECT id, 1 - (embedding <=> $1::vector) AS similarity + FROM spike_documents + ORDER BY embedding <=> $1::vector, id ASC + LIMIT 1 + `, + [JSON.stringify([1, 0, 0])], + ), + ); + + const trigramTimer = await timed('pg_trgm dictionary query', () => + db.query( + ` + SELECT connection_id || '/' || source_name AS id, value, similarity(value, $1) AS score + FROM spike_dictionary_values + WHERE similarity(value, $1) > 0 + ORDER BY score DESC, id ASC, value ASC + LIMIT 1 + `, + ['refund'], + ), + ); + + const sameInstanceTimer = await timed('same instance parallel reads', () => + Promise.all(Array.from({ length: 4 }, () => db.query('SELECT COUNT(*)::int AS count FROM spike_documents'))), + ); + + let secondOpenStatus = 'opened'; + let secondOpenMessage = 'Second direct opener executed SELECT 1.'; + let second; + try { + second = await createDb(PGlite, vector, pg_trgm, dataDir); + await second.query('SELECT 1'); + } catch (error) { + secondOpenStatus = 'blocked'; + secondOpenMessage = error instanceof Error ? error.message : String(error); + } finally { + if (second) { + await closeDb(second); + } + } + + await closeDb(db); + db = undefined; + + const reopenTimer = await timed('reopen persistent PGlite database', async () => { + reopened = await createDb(PGlite, vector, pg_trgm, dataDir); + return reopened.query('SELECT COUNT(*)::int AS count FROM spike_documents'); + }); + + const packages = await Promise.all([ + packageInfo('@electric-sql/pglite'), + packageInfo('@electric-sql/pglite-socket'), + ]); + + const result = { + generatedAt: new Date().toISOString(), + node: process.version, + packages, + timingsMs: { + import: importTimer.durationMs, + createAndExtensions: createTimer.durationMs, + seed: seedTimer.durationMs, + ftsQuery: ftsTimer.durationMs, + vectorQuery: vectorTimer.durationMs, + trigramQuery: trigramTimer.durationMs, + sameInstanceParallelReads: sameInstanceTimer.durationMs, + reopen: reopenTimer.durationMs, + }, + topResults: { + fts: ftsTimer.value.rows[0]?.id ?? null, + vector: vectorTimer.value.rows[0]?.id ?? null, + trigram: trigramTimer.value.rows[0]?.id ?? null, + persistedRowCount: reopenTimer.value.rows[0]?.count ?? null, + }, + concurrency: { + sameInstanceReadCounts: sameInstanceTimer.value.map((queryResult) => queryResult.rows[0]?.count ?? null), + secondDirectOpenStatus: secondOpenStatus, + secondDirectOpenMessage: secondOpenMessage, + }, + }; + + const totalPackageBytes = packages.reduce((sum, pkg) => sum + pkg.bytes, 0); + const recommendation = + secondOpenStatus === 'opened' + ? 'Prototype a PGlite backend behind an explicit owner process or socket before exposing CLI plus MCP concurrent access.' + : 'Use a socket or owner-process architecture for any PGlite backend prototype because direct second opener access was blocked.'; + + const markdown = `# Hybrid Search PGlite Spike + +Generated: ${result.generatedAt} + +## Summary + +PGlite loaded in Node ${result.node}, enabled vector and pg_trgm extensions, executed Postgres FTS, pgvector cosine ranking, pg_trgm dictionary ranking, and reopened a persistent filesystem database. + +Recommendation: ${recommendation} + +## Package Footprint + +| Package | Version | Approx bytes | Resolved path | +| --- | --- | ---: | --- | +${packages.map((pkg) => `| \`${pkg.name}\` | \`${pkg.version}\` | ${pkg.bytes} | \`${pkg.path}\` |`).join('\n')} + +Total measured package bytes: ${totalPackageBytes} + +## Timings + +| Probe | Duration ms | +| --- | ---: | +${Object.entries(result.timingsMs) + .map(([name, ms]) => `| ${name} | ${ms} |`) + .join('\n')} + +## Search Feature Results + +| Probe | Top result | +| --- | --- | +| Postgres FTS | \`${result.topResults.fts}\` | +| pgvector cosine | \`${result.topResults.vector}\` | +| pg_trgm dictionary | \`${result.topResults.trigram}\` | +| Reopened persisted row count | \`${result.topResults.persistedRowCount}\` | + +## Concurrency Observation + +Same-instance parallel read counts: \`${result.concurrency.sameInstanceReadCounts.join(', ')}\` + +Second direct opener status: \`${result.concurrency.secondDirectOpenStatus}\` + +Second direct opener message: + +\`\`\`text +${result.concurrency.secondDirectOpenMessage} +\`\`\` + +## Decision + +The SQLite backend remains the production default. The next PGlite step, if approved, is an owner-process or socket-backed prototype that reuses the existing \`SearchBackendCapabilities\` and backend conformance helpers without changing the public CLI surface. +`; + + await writeFile(reportPath, markdown); + process.stdout.write(`Wrote ${relative(process.cwd(), reportPath)}\n`); + process.stdout.write(JSON.stringify(result, null, 2)); + process.stdout.write('\n'); + } finally { + if (db) { + await closeDb(db); + } + if (reopened) { + await closeDb(reopened); + } + await rm(tempDir, { recursive: true, force: true }); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/packages/context/scripts/pglite-owner-process-prototype.mjs b/packages/context/scripts/pglite-owner-process-prototype.mjs new file mode 100644 index 00000000..cef6bb52 --- /dev/null +++ b/packages/context/scripts/pglite-owner-process-prototype.mjs @@ -0,0 +1,317 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { createServer } from 'node:net'; +import { tmpdir } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; +import { performance } from 'node:perf_hooks'; +import { fileURLToPath } from 'node:url'; +import { PGlite } from '@electric-sql/pglite'; +import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm'; +import { vector } from '@electric-sql/pglite/vector'; +import { PGLiteSocketServer } from '@electric-sql/pglite-socket'; +import { Client } from 'pg'; + +const scriptDir = dirname(fileURLToPath(import.meta.url)); +const contextDir = resolve(scriptDir, '..'); +const kloRoot = resolve(contextDir, '../..'); +const reportPath = join(kloRoot, 'docs', 'hybrid-search-pglite-owner-process.md'); + +async function timed(label, fn) { + const started = performance.now(); + const value = await fn(); + return { + label, + durationMs: Number((performance.now() - started).toFixed(2)), + value, + }; +} + +async function allocatePort() { + const server = createServer(); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const address = server.address(); + if (typeof address !== 'object' || address === null) { + throw new Error('Expected TCP server address while allocating a PGlite owner-process port.'); + } + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(); + }); + }); + return address.port; +} + +async function createOwner(dataDir, port) { + const db = await PGlite.create({ + dataDir, + extensions: { + vector, + pg_trgm, + }, + }); + + await db.exec(` + CREATE EXTENSION IF NOT EXISTS vector; + CREATE EXTENSION IF NOT EXISTS pg_trgm; + CREATE TABLE IF NOT EXISTS prototype_documents ( + id TEXT PRIMARY KEY, + search_text TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + embedding vector(3) NOT NULL + ); + CREATE INDEX IF NOT EXISTS prototype_documents_fts_idx + ON prototype_documents + USING GIN (to_tsvector('english', search_text)); + CREATE INDEX IF NOT EXISTS prototype_documents_vector_idx + ON prototype_documents + USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 1); + CREATE TABLE IF NOT EXISTS prototype_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + CREATE INDEX IF NOT EXISTS prototype_dictionary_values_trgm_idx + ON prototype_dictionary_values + USING GIN (value gin_trgm_ops); + `); + + const server = new PGLiteSocketServer({ + db, + host: '127.0.0.1', + port, + maxConnections: 100, + }); + + await server.start(); + + return { + db, + server, + connectionConfig: { + host: '127.0.0.1', + port, + user: 'postgres', + database: 'postgres', + application_name: 'klo-pglite-owner-report', + connectionTimeoutMillis: 5_000, + }, + }; +} + +async function withClient(connectionConfig, fn) { + const client = new Client(connectionConfig); + await client.connect(); + try { + return await fn(client); + } finally { + await client.end(); + } +} + +async function seed(connectionConfig) { + await withClient(connectionConfig, async (client) => { + await client.query( + ` + INSERT INTO prototype_documents (id, search_text, metadata, embedding) + VALUES + ($1, $2, $3::jsonb, $4::vector), + ($5, $6, $7::jsonb, $8::vector), + ($9, $10, $11::jsonb, $12::vector) + ON CONFLICT (id) DO UPDATE + SET search_text = EXCLUDED.search_text, + metadata = EXCLUDED.metadata, + embedding = EXCLUDED.embedding + `, + [ + 'warehouse/orders', + 'orders paid revenue refund status customer', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'orders' }), + JSON.stringify([1, 0, 0]), + 'finance/orders', + 'orders finance bookings gross margin', + JSON.stringify({ connectionId: 'finance', sourceName: 'orders' }), + JSON.stringify([0.72, 0.28, 0]), + 'warehouse/customers', + 'customers accounts lifecycle region', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'customers' }), + JSON.stringify([0, 1, 0]), + ], + ); + + await client.query(` + INSERT INTO prototype_dictionary_values (connection_id, source_name, column_name, value) + VALUES + ('warehouse', 'orders', 'status', 'refunded'), + ('warehouse', 'orders', 'status', 'paid'), + ('warehouse', 'customers', 'region', 'emea') + ON CONFLICT DO NOTHING + `); + }); +} + +async function queryTopResults(connectionConfig) { + return await withClient(connectionConfig, async (client) => { + const lexical = await client.query( + ` + SELECT id + FROM prototype_documents + WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1) + ORDER BY ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) DESC, id ASC + LIMIT 1 + `, + ['paid orders'], + ); + + const semantic = await client.query( + ` + SELECT id + FROM prototype_documents + ORDER BY embedding <=> $1::vector, id ASC + LIMIT 1 + `, + [JSON.stringify([1, 0, 0])], + ); + + const dictionary = await client.query( + ` + SELECT connection_id || '/' || source_name AS id + FROM prototype_dictionary_values + WHERE similarity(value, $1) > 0 + ORDER BY similarity(value, $1) DESC, id ASC, value ASC + LIMIT 1 + `, + ['refund'], + ); + + return { + lexical: lexical.rows[0]?.id ?? '', + semantic: semantic.rows[0]?.id ?? '', + dictionary: dictionary.rows[0]?.id ?? '', + }; + }); +} + +async function concurrentReads(connectionConfig) { + const clients = await Promise.all( + Array.from({ length: 4 }, async () => { + const client = new Client(connectionConfig); + await client.connect(); + return client; + }), + ); + + try { + const results = await Promise.all( + clients.map((client) => client.query('SELECT COUNT(*)::int AS count FROM prototype_documents')), + ); + return results.map((result) => result.rows[0]?.count ?? null); + } finally { + await Promise.all(clients.map((client) => client.end().catch(() => undefined))); + } +} + +async function stopOwner(owner) { + await owner.server.stop(); + await owner.db.close(); +} + +async function main() { + const tempDir = await mkdtemp(join(tmpdir(), 'klo-pglite-owner-report-')); + const dataDir = join(tempDir, 'pgdata'); + const port = await allocatePort(); + + let owner; + + try { + const startTimer = await timed('startOwner', async () => await createOwner(dataDir, port)); + owner = startTimer.value; + + const seedTimer = await timed('seed', async () => await seed(owner.connectionConfig)); + const queryTimer = await timed('searchQueries', async () => await queryTopResults(owner.connectionConfig)); + const concurrentTimer = await timed('concurrentReads', async () => await concurrentReads(owner.connectionConfig)); + + await stopOwner(owner); + owner = undefined; + + const restartTimer = await timed('restartOwner', async () => await createOwner(dataDir, port)); + owner = restartTimer.value; + + const persisted = await withClient(owner.connectionConfig, async (client) => { + const result = await client.query('SELECT COUNT(*)::int AS count FROM prototype_documents'); + return result.rows[0]?.count ?? null; + }); + + const markdown = `# Hybrid Search PGlite Owner Process Prototype + +Generated: ${new Date().toISOString()} + +## Summary + +PGlite started behind one explicit owner process, enabled vector and pg_trgm extensions, served PostgreSQL clients through \`@electric-sql/pglite-socket\`, answered lexical, semantic, and dictionary probes, and preserved rows across owner restart. + +Recommendation: Keep SQLite as the production default. The next PGlite implementation step should be a private adapter prototype behind an explicit configuration flag, still guarded by backend conformance tests, before any CLI or MCP default changes. + +## Timings + +| Probe | Duration ms | +| --- | ---: | +| startOwner | ${startTimer.durationMs} | +| seed | ${seedTimer.durationMs} | +| searchQueries | ${queryTimer.durationMs} | +| concurrentReads | ${concurrentTimer.durationMs} | +| restartOwner | ${restartTimer.durationMs} | + +## Search Feature Results + +| Probe | Top result | +| --- | --- | +| Postgres FTS through socket | \`${queryTimer.value.lexical}\` | +| pgvector cosine through socket | \`${queryTimer.value.semantic}\` | +| pg_trgm dictionary through socket | \`${queryTimer.value.dictionary}\` | +| Reopened persisted row count | \`${persisted}\` | + +## Concurrency Observation + +Concurrent socket read counts: \`${concurrentTimer.value.join(', ')}\` + +## Decision + +The owner-process shape is viable for a prototype because it gives CLI and MCP callers a PostgreSQL protocol boundary without opening the same PGlite data directory from independent runtimes. This report is not a production adapter acceptance record. +`; + + await writeFile(reportPath, markdown); + console.log(`Wrote ${reportPath}`); + console.log( + JSON.stringify( + { + port, + timings: { + startOwner: startTimer.durationMs, + seed: seedTimer.durationMs, + searchQueries: queryTimer.durationMs, + concurrentReads: concurrentTimer.durationMs, + restartOwner: restartTimer.durationMs, + }, + topResults: queryTimer.value, + concurrentReads: concurrentTimer.value, + persisted, + }, + null, + 2, + ), + ); + } finally { + if (owner) { + await stopOwner(owner).catch(() => undefined); + } + await rm(tempDir, { recursive: true, force: true }); + } +} + +await main(); diff --git a/packages/context/scripts/pglite-sl-search-prototype.mjs b/packages/context/scripts/pglite-sl-search-prototype.mjs new file mode 100644 index 00000000..ec78adda --- /dev/null +++ b/packages/context/scripts/pglite-sl-search-prototype.mjs @@ -0,0 +1,263 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { createServer } from 'node:net'; +import { tmpdir } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; +import { performance } from 'node:perf_hooks'; +import { fileURLToPath } from 'node:url'; +import { PGlite } from '@electric-sql/pglite'; +import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm'; +import { vector } from '@electric-sql/pglite/vector'; +import { PGLiteSocketServer } from '@electric-sql/pglite-socket'; +import { Client } from 'pg'; + +const scriptDir = dirname(fileURLToPath(import.meta.url)); +const contextDir = resolve(scriptDir, '..'); +const kloRoot = resolve(contextDir, '../..'); +const reportPath = join(kloRoot, 'docs', 'hybrid-search-pglite-sl-adapter-prototype.md'); + +async function timed(label, fn) { + const started = performance.now(); + const value = await fn(); + return { + label, + durationMs: Number((performance.now() - started).toFixed(2)), + value, + }; +} + +async function allocatePort() { + const server = createServer(); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const address = server.address(); + if (typeof address !== 'object' || address === null) { + throw new Error('Expected TCP server address while allocating a PGlite SL prototype port.'); + } + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(); + }); + }); + return address.port; +} + +async function createOwner(dataDir, port) { + const db = await PGlite.create({ + dataDir, + extensions: { vector, pg_trgm }, + }); + + await db.exec(` + CREATE EXTENSION IF NOT EXISTS vector; + CREATE EXTENSION IF NOT EXISTS pg_trgm; + CREATE TABLE prototype_sl_sources ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + search_text TEXT NOT NULL, + embedding vector(3), + PRIMARY KEY (connection_id, source_name) + ); + CREATE INDEX prototype_sl_sources_fts_idx + ON prototype_sl_sources + USING GIN (to_tsvector('english', search_text)); + CREATE INDEX prototype_sl_sources_vector_idx + ON prototype_sl_sources + USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 1); + CREATE TABLE prototype_sl_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + value_lower TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + CREATE INDEX prototype_sl_dictionary_values_trgm_idx + ON prototype_sl_dictionary_values + USING GIN (value gin_trgm_ops); + `); + + const server = new PGLiteSocketServer({ db, host: '127.0.0.1', port, maxConnections: 100 }); + await server.start(); + + return { + db, + server, + connectionConfig: { + host: '127.0.0.1', + port, + user: 'postgres', + database: 'postgres', + application_name: 'klo-pglite-sl-prototype-report', + connectionTimeoutMillis: 5_000, + }, + }; +} + +async function withClient(connectionConfig, fn) { + const client = new Client(connectionConfig); + await client.connect(); + try { + return await fn(client); + } finally { + await client.end(); + } +} + +async function seed(connectionConfig) { + await withClient(connectionConfig, async (client) => { + await client.query( + ` + INSERT INTO prototype_sl_sources (connection_id, source_name, search_text, embedding) + VALUES + ($1, $2, $3, $4::vector), + ($5, $6, $7, $8::vector), + ($9, $10, $11, $12::vector) + `, + [ + 'warehouse', + 'orders', + 'orders paid revenue refund status customer', + JSON.stringify([1, 0, 0]), + 'finance', + 'orders', + 'orders finance bookings gross margin', + JSON.stringify([0.72, 0.28, 0]), + 'warehouse', + 'customers', + 'customers accounts lifecycle region', + JSON.stringify([0, 1, 0]), + ], + ); + + await client.query(` + INSERT INTO prototype_sl_dictionary_values (connection_id, source_name, column_name, value, value_lower) + VALUES + ('warehouse', 'orders', 'status', 'refunded', 'refunded'), + ('warehouse', 'orders', 'status', 'paid', 'paid'), + ('warehouse', 'customers', 'region', 'emea', 'emea') + `); + }); +} + +async function queryTopResults(connectionConfig) { + return withClient(connectionConfig, async (client) => { + const lexical = await client.query( + ` + SELECT connection_id || '/' || source_name AS id + FROM prototype_sl_sources + WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1) + ORDER BY ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) DESC, id ASC + LIMIT 1 + `, + ['paid revenue'], + ); + + const semantic = await client.query( + ` + SELECT connection_id || '/' || source_name AS id + FROM prototype_sl_sources + ORDER BY embedding <=> $1::vector, id ASC + LIMIT 1 + `, + [JSON.stringify([1, 0, 0])], + ); + + const dictionary = await client.query( + ` + SELECT connection_id || '/' || source_name AS id + FROM prototype_sl_dictionary_values + WHERE similarity(value, $1) > 0 OR value_lower LIKE '%' || lower($1) || '%' + ORDER BY GREATEST(similarity(value, $1), CASE WHEN value_lower LIKE '%' || lower($1) || '%' THEN 0.75 ELSE 0 END) DESC, + id ASC, + value ASC + LIMIT 1 + `, + ['refund'], + ); + + return { + lexical: lexical.rows[0]?.id ?? '', + semantic: semantic.rows[0]?.id ?? '', + dictionary: dictionary.rows[0]?.id ?? '', + }; + }); +} + +async function stopOwner(owner) { + await owner.server.stop(); + await owner.db.close(); +} + +async function main() { + const tempDir = await mkdtemp(join(tmpdir(), 'klo-pglite-sl-prototype-report-')); + const dataDir = join(tempDir, 'pgdata'); + const port = await allocatePort(); + let owner; + + try { + const startTimer = await timed('startOwner', async () => createOwner(dataDir, port)); + owner = startTimer.value; + const seedTimer = await timed('seedSemanticLayerIndex', async () => seed(owner.connectionConfig)); + const searchTimer = await timed('searchQueries', async () => queryTopResults(owner.connectionConfig)); + + const markdown = `# Hybrid Search PGlite Semantic-Layer Adapter Prototype + +Generated: ${new Date().toISOString()} + +## Summary + +PGlite served a semantic-layer-style search index through one owner process and PostgreSQL clients. The probe returned lexical, semantic, and dictionary top results through Postgres FTS, pgvector ordering, and pg_trgm matching. + +Recommendation: Keep SQLite as the production default. The PGlite semantic-layer adapter remains private and explicitly opt-in until a separate plan decides runtime dependencies, long-lived owner lifecycle, and CLI/MCP routing. + +## Timings + +| Probe | Duration ms | +| --- | ---: | +| startOwner | ${startTimer.durationMs} | +| seedSemanticLayerIndex | ${seedTimer.durationMs} | +| searchQueries | ${searchTimer.durationMs} | + +## Search Feature Results + +| Probe | Top result | +| --- | --- | +| Postgres FTS through socket | \`${searchTimer.value.lexical}\` | +| pgvector cosine through socket | \`${searchTimer.value.semantic}\` | +| pg_trgm dictionary through socket | \`${searchTimer.value.dictionary}\` | + +## Decision + +The private adapter shape is viable for semantic-layer search prototypes. It is not a production backend acceptance record and does not change the default SQLite search path. +`; + + await writeFile(reportPath, markdown); + console.log(`Wrote ${reportPath}`); + console.log( + JSON.stringify( + { + port, + timings: { + startOwner: startTimer.durationMs, + seed: seedTimer.durationMs, + searchQueries: searchTimer.durationMs, + }, + topResults: searchTimer.value, + }, + null, + 2, + ), + ); + } finally { + if (owner) { + await stopOwner(owner).catch(() => undefined); + } + await rm(tempDir, { recursive: true, force: true }); + } +} + +await main(); diff --git a/packages/context/scripts/relationship-benchmark-report.mjs b/packages/context/scripts/relationship-benchmark-report.mjs new file mode 100644 index 00000000..0d1bf0f8 --- /dev/null +++ b/packages/context/scripts/relationship-benchmark-report.mjs @@ -0,0 +1,52 @@ +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { + KLO_RELATIONSHIP_BENCHMARK_MODES, + buildKloRelationshipBenchmarkReport, + currentKloRelationshipBenchmarkDetector, + formatKloRelationshipBenchmarkReportMarkdown, + kloRelationshipBenchmarkDetectorWithLlm, + loadKloRelationshipBenchmarkFixtures, + runKloRelationshipBenchmarkSuite, +} from '../dist/scan/index.js'; + +const scriptDir = dirname(fileURLToPath(import.meta.url)); +const packageRoot = resolve(scriptDir, '..'); +const fixtureRoot = join(packageRoot, 'test/fixtures/relationship-benchmarks'); + +async function buildDetector() { + const backend = process.env.KLO_BENCHMARK_LLM_BACKEND; + if (!backend || backend === 'none') { + return currentKloRelationshipBenchmarkDetector(); + } + if (backend !== 'vertex') { + throw new Error(`Unsupported KLO_BENCHMARK_LLM_BACKEND: ${backend}`); + } + const project = process.env.KLO_BENCHMARK_VERTEX_PROJECT; + const location = process.env.KLO_BENCHMARK_VERTEX_LOCATION; + const model = process.env.KLO_BENCHMARK_LLM_MODEL ?? 'claude-sonnet-4-6'; + if (!project || !location) { + throw new Error('KLO_BENCHMARK_VERTEX_PROJECT and KLO_BENCHMARK_VERTEX_LOCATION are required for vertex backend'); + } + const { createKloLlmProvider } = await import('@klo/llm'); + const provider = createKloLlmProvider({ + backend: 'vertex', + vertex: { project, location }, + modelSlots: { default: model }, + }); + return kloRelationshipBenchmarkDetectorWithLlm(provider); +} + +const fixtures = await loadKloRelationshipBenchmarkFixtures(fixtureRoot); +const detector = await buildDetector(); +const suite = await runKloRelationshipBenchmarkSuite({ + fixtures, + detector, +}); +const report = buildKloRelationshipBenchmarkReport({ + fixtures, + suite, + modes: KLO_RELATIONSHIP_BENCHMARK_MODES, +}); + +process.stdout.write(formatKloRelationshipBenchmarkReportMarkdown(report)); diff --git a/packages/context/skills/dbt_ingest/SKILL.md b/packages/context/skills/dbt_ingest/SKILL.md new file mode 100644 index 00000000..bc6190c3 --- /dev/null +++ b/packages/context/skills/dbt_ingest/SKILL.md @@ -0,0 +1,34 @@ +--- +name: dbt_ingest +description: Map dbt `schema.yml` / `properties.yml` models and sources into KLO semantic-layer overlays and column notes. Covers `sources:` vs `models:`, column `data_tests` (not_null, unique, accepted_values, relationships), and how bundle-time writes complement manifest backfill from git sync. Load when the WorkUnit's `skillNames` includes `dbt_ingest` or when raw files are dbt YAML under `models/` / `sources/`. +callers: [memory_agent] +--- + +# dbt → KLO (bundle ingest) + +Use this skill for **uploaded** dbt projects (`dbt_project.yml` at stage root, `models/**`, `sources/**`, `schema.yml`). There is **no** `fetch()` in v1 — scheduled `dbt parse` / `manifest.json` pulls are out of scope; host-provided dbt sync may still backfill structured test metadata into `_schema` on the next sync. + +## Mapping (models / sources → SL) + +| dbt | KLO | Notes | +|-----|--------|--------| +| `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `wiki_sl_search` / `sl_describe_table`) | One SL source per physical table; model name may differ from DB name — resolve with `read_raw_file` + warehouse context. | +| `sources:` → `tables:` | Same as models; use `identifier` when present instead of logical `name`. | Schema + name must match how the connection sees tables. | +| Column `description` | `descriptions.user` or merged `descriptions` map on the column | Do not overwrite `dbt` description keys from sync. | +| `data_tests: not_null` / `unique` | Short hint in column `descriptions` or notes: “dbt: not null”, “dbt: unique” | Full structured metadata lands in manifest via **sync**; the skill keeps bundle-time SL text useful for the agent. | +| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `wiki_sl_search` / filters. | +| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `wiki_sl_search` / `sl_describe_table` | If the ref cannot be resolved, capture the intent in a wiki page instead. | + +## 1.1 test hints (descriptions / meta) + +When YAML shows `accepted_values` or `not_null`, add **short** hints into `columns[].descriptions` (e.g. under `user`) or freeform column notes so chat and validation see intent before the next git sync refreshes `constraints` / `enum_values` in `_schema`. Keep hints under a few words when possible. + +## Overlap with MetricFlow + +If the same bundle also has MetricFlow `semantic_models:` / `metrics:`, the **`metricflow_ingest`** skill owns semantic/metric shapes. This skill focuses on **raw dbt schema** YAML (`models`, `sources`, tests). If both apply, load `metricflow_ingest` first when the file is clearly MetricFlow; otherwise use `dbt_ingest` for `schema.yml` without semantic_models. + +## Do not + +- Do not run `dbt` CLI or assume `target/` / `manifest.json` exists in the upload. +- Do not invent joins from `relationships` tests if the target model/table is not found in SL or the warehouse. +- Do not read `peerFileIndex` paths — use `read_raw_file` only on `rawFiles` and `dependencyPaths` from the WorkUnit. diff --git a/packages/context/skills/historic_sql_curator/SKILL.md b/packages/context/skills/historic_sql_curator/SKILL.md new file mode 100644 index 00000000..615bf2ea --- /dev/null +++ b/packages/context/skills/historic_sql_curator/SKILL.md @@ -0,0 +1,153 @@ +--- +name: historic_sql_curator +description: Reconcile historic-SQL query knowledge pages by deduping collapsed intents, cross-linking categorical sub-clusters, and demoting stale low-signal pages. +callers: [memory_agent] +--- + +# Historic SQL Curator + +Use this skill during Stage 4 reconciliation for the `historic-sql` source. It runs after `historic_sql_ingest` has written query knowledge pages from full-tier template WorkUnits. The Stage 4 runner may use curator pagination, so treat the current prompt as one bounded page of work and finish every listed item you inspect. + +## Input Shape + +The reconciliation prompt normally exposes: + +- `# Stage Index` with WorkUnit keys, raw paths, and wiki or SL actions from Stage 3. +- `# Eviction Set` with deleted raw paths from retired templates. +- `# Curator Pass State` when curator pagination splits reconciliation into multiple passes. +- `# Source Reconciliation Notes` with run-level notes such as staged template count. + +Use tools instead of guessing: + +- `stage_list` shows every WorkUnit raw path and action. +- `stage_diff` compares two WorkUnits by written artifact overlap. +- `read_raw_span` reads staged `metadata.json`, `page.md`, `usage.json`, and `manifest.json` snippets when page content is not enough. +- `wiki_search`, `wiki_read`, and `wiki_write` inspect and update query knowledge pages. +- `emit_artifact_resolution` records merged or subsumed wiki pages for provenance. +- `eviction_list` and `emit_eviction_decision` handle deleted raw paths. + +## Required Workflow + +1. Read the `# Stage Index`, `# Eviction Set`, `# Curator Pass State`, and `# Source Reconciliation Notes` sections first. +2. Call `stage_list` when the prompt omits raw paths or when more than one WorkUnit wrote a `queries/...` page. +3. For each successful historic-SQL WorkUnit that wrote a wiki page, call `wiki_read` on that page before deciding whether to merge, cross-link, or demote it. +4. If the page body does not show fingerprint, sub-cluster, tables, or usage clearly enough, call `read_raw_span` on that WorkUnit's `metadata.json` and `usage.json` raw paths. +5. Build intent clusters using table overlap, representative SQL shape, page summaries, fingerprints, sub-cluster IDs, and usage. Same table is not enough to merge; the business intent must collapse. +6. Deduplicate collapsed intents by electing one canonical page, merging useful variant details into it with `wiki_write`, and recording each merged loser with `emit_artifact_resolution`. +7. Cross-link categorical sub-cluster pages that share the same base fingerprint but differ by `__cat_...` sub-cluster ID. +8. Demote pages whose underlying cluster has decayed below the floor in the most recent 3 windows, or in the current window plus eviction evidence showing the template retired. +9. For every deleted raw path in the Eviction Set that you inspect, call `eviction_list` and then `emit_eviction_decision`. + +## Canonical Page Election + +When two or more pages describe the same query intent, choose the canonical page with this order: + +1. The clearest human-readable intent summary. +2. The page with broader non-service-account usage. +3. The page covering more fingerprints or categorical variants of the same intent. +4. The page with the most recent successful usage. +5. Lexicographically first page key. + +After electing the canonical page: + +- Read every page that will be merged. +- Update the canonical page so it contains one "Historic SQL Variants" section with fingerprints, sub-cluster IDs, tables, usage summaries, and links to sibling page keys when retained. +- Keep `tags` including `historic-sql` and `query-pattern`. +- Preserve useful `sl_refs`; when replacing refs, include the union of cleanly matched SL refs from merged pages. +- For each merged loser, call `emit_artifact_resolution` with: + +```json +{ + "rawPath": "", + "artifactKind": "wiki", + "artifactKey": "", + "actionType": "merged", + "reason": "Historic-SQL query intent collapsed into ." +} +``` + +Use `actionType: "subsumed"` only when the loser page is a thin duplicate with no unique facts worth retaining in the canonical body. + +## Categorical Sub-Cluster Cross-Links + +A categorical sub-cluster normally has a staged ID like `__cat_` or page content that says `Sub-cluster: `. For sibling pages that share the same base fingerprint: + +1. Read all sibling pages visible in the current Stage Index or found through `wiki_search`. +2. Keep one page per meaningful category value. +3. Add or update a "Categorical Variants" section in each sibling page: + +```markdown +### Categorical Variants +- ``: [[queries/]] - +``` + +4. Use `wiki_write` with `refs` containing the sibling page keys so cross-links also live in frontmatter. +5. Do not merge categorical siblings only because they share a fingerprint. Merge them only when the category value no longer changes intent. + +## Demotion + +Demotion preserves history; it is not deletion. A page is demoted when evidence shows its underlying cluster has fallen below the historic-SQL floor: + +- `executions < 3`, or +- `distinct_users < 2`, or +- service-account-only usage below the frequency floor, or +- the template was evicted and no active sibling or replacement page supports the same intent. + +Require the low-signal state across the most recent 3 windows when page history is available. If only the current window is visible, demote only when eviction evidence confirms the raw template retired; otherwise add a caveat and leave the page active. + +Use `wiki_write` to express demotion with the current wiki frontmatter fields: + +- Add the `historic-sql-demoted` tag while preserving `historic-sql` and `query-pattern`. +- Prefix the summary with `Demoted historic-SQL pattern: ` unless it already begins with that phrase. +- Add a `### Demotion` section in the body with the last observed usage window, the floor that failed, and the raw path or fingerprint that supports the decision. + +When demoting because of an eviction, also call `emit_eviction_decision`: + +```json +{ + "rawPath": "", + "artifactKind": "wiki", + "artifactKey": "", + "action": "retained_deprecated", + "reason": "Historic-SQL template retired or decayed below the floor; page retained with historic-sql-demoted frontmatter tag." +} +``` + +## What To Write + +Use `wiki_write` for every page update. The tool supports `summary`, `content`, `tags`, `refs`, and `sl_refs` frontmatter fields. + +Canonical pages should keep this body shape: + +```markdown +## +- Source: historic-sql +- Tables: +- Fingerprints: +- Usage: , , first seen , last seen + +### Representative SQL +```sql + +``` + +### Historic SQL Variants +- ``: + +### Categorical Variants +- ``: [[queries/]] - + +### Demotion +- Omit this section unless the page is demoted. +``` + +## Boundaries + +- Do not call `context_candidate_write`; historic-SQL Stage 3 writes query pages directly. +- Do not create new artifact types, stores, ports, or tables. +- Do not group low-tier templates that triage already filtered out. +- Do not merge pages on table overlap alone. +- Do not delete a query page solely because usage is low; demote it unless eviction rules and inbound-reference evidence make removal clearly safer. +- Do not copy unredacted sample `bound_sql`, user emails, account IDs, tokens, or free-text literal values into wiki or SL output. +- Do not edit SL unless the reconciliation prompt shows a concrete same-intent conflict or duplicate that requires an existing SL artifact resolution. +- Do not finish a curator pagination pass while a merged page, demoted page, or inspected eviction lacks the corresponding provenance call. diff --git a/packages/context/skills/historic_sql_ingest/SKILL.md b/packages/context/skills/historic_sql_ingest/SKILL.md new file mode 100644 index 00000000..f8650a99 --- /dev/null +++ b/packages/context/skills/historic_sql_ingest/SKILL.md @@ -0,0 +1,170 @@ +--- +name: historic_sql_ingest +description: Convert one full-tier historic-SQL template WorkUnit into a canonical query knowledge page, linked SL refs, and optional semantic-layer proposals. +callers: [memory_agent] +--- + +# Historic SQL Ingest + +Use this skill when the WorkUnit contains files under `raw-sources//historic-sql//templates//`. + +Read exactly one historic-SQL template WorkUnit. Each WorkUnit represents one staged template or categorical sub-cluster that already survived full-tier page triage. It is not an intent cluster. + +## Input Shape + +The WorkUnit normally exposes: + +- `metadata.json` in `rawFiles`. +- `page.md` in `rawFiles`. +- `usage.json` in `dependencyPaths`. +- `manifest.json` in `dependencyPaths`. +- `peerFileIndex` containing sibling templates that you cannot read. + +`metadata.json` has the stable identity: + +```json +{ + "id": "fp_1", + "title": "snowflake - analytics.orders [fp_1]", + "path": "templates/fp_1/page.md", + "objectType": "historic_sql_template", + "lastEditedAt": null, + "properties": { + "fingerprint": "fp_1", + "sub_cluster_id": null, + "dialect": "snowflake", + "tables_touched": ["analytics.orders"], + "literal_slots": [ + { "position": 1, "type": "string", "classification": "constant" }, + { "position": 2, "type": "date", "classification": "runtime" } + ], + "triage_signals": { + "executions_bucket": "high", + "distinct_users_bucket": "team", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "slot_summary": "1 constant, 1 runtime" + } + } +} +``` + +`page.md` contains mechanically generated normalized SQL and touched tables: + +```text +# fp_1 + +## Normalized SQL +SELECT date_trunc(?, created_at), count(*) FROM analytics.orders WHERE status = ? AND created_at >= ? GROUP BY 1 + +## Tables touched +- analytics.orders +``` + +`usage.json` contains volatile stats, literal top values, and redacted samples. Use it for intent inference and usage summaries. Do not treat usage-only drift as a reason to group this template with siblings. + +## Required Workflow + +1. Read the WorkUnit section in the prompt first. +2. Call `read_raw_file` for `metadata.json`, `page.md`, `usage.json`, and `manifest.json`. +3. Confirm `metadata.objectType === "historic_sql_template"`. If it is not, call `emit_unmapped_fallback` with `reason: "parse_error"`, `fallback: "flagged"`, and the `metadata.json` raw path. +4. Extract `fingerprint`, `sub_cluster_id`, `dialect`, `tables_touched`, `literal_slots`, normalized SQL, usage stats, top literal values, and sample timestamps. +5. Infer one canonical query intent from this template only. Use table names, selected expressions, aggregations, joins, grouping, constant literal slots, and repeated successful samples. Runtime literal slots are parameters, not fixed business rules. +6. Build a short intent slug in kebab-case. Use `queries/` as the wiki key. +7. Search existing knowledge with `wiki_search` using the intent phrase and the primary table. Prefer updating an existing `queries/...` page when it is the same intent. +8. Discover touched tables with `sl_discover`. Add cleanly matched source names to `sl_refs`. If a table does not map cleanly, keep it in the page body and do not include it in `sl_refs`. +9. Write or update the query page with `wiki_write`. +10. Apply the SL proposal threshold below. If it passes and a useful generic measure, segment, join, or overlay is clear, update the semantic layer and run `sl_validate`. +11. Exit without reading peer files or grouping sibling templates. + +## Wiki Page Shape + +Use `wiki_write` for pages. Emit the spec frontmatter fields directly on the query page. + +Use this shape: + +```json +{ + "key": "queries/", + "summary": "", + "tags": ["historic-sql", "query-pattern"], + "sl_refs": [""], + "source": "historic-sql", + "intent": "", + "tables": [""], + "representative_sql": "", + "usage": { + "executions": 47812, + "distinct_users": 12, + "first_seen": "2026-02-01", + "last_seen": "2026-04-30", + "p50_runtime_ms": 320, + "p95_runtime_ms": 1180, + "error_rate": 0.0007 + }, + "fingerprints": [""], + "content": "## \n\n### Parameters\n- \n\n### When To Use\n- \n\n### Caveats\n- " +} +``` + +For Snowflake templates include `usage.rows_produced` when present in `usage.json`; for BigQuery v1 omit `usage.rows_produced`. + +The `key: "queries/"` value writes to `knowledge/global/queries/.md` during external ingest because bundle ingests write global wiki pages. + +## Representative SQL Rules + +- Start from normalized SQL in `page.md`. +- For constant slots, use the dominant `usage.literal_slots[].top_values[0][0]` when it has definitional meaning. Quote string and date values in the representative SQL. +- For runtime slots, render named parameters such as `:start_date`, `:as_of`, `:status`, or `:threshold`. +- For categorical slots, document the known categories and write this WorkUnit's sub-cluster value when `sub_cluster_id` is present. +- Preserve the warehouse dialect named by `metadata.properties.dialect`. +- Do not copy sample bound_sql into the wiki unless it is visibly redacted and safer than the normalized SQL. Prefer normalized SQL plus parameter notes. + +## SL Proposal Threshold + +Only propose semantic-layer changes when all are true: + +1. This WorkUnit reached Stage 3 full tier. The runner normally guarantees this, but treat `executions_bucket=low` plus `distinct_users_bucket=solo` or `service_account_only=true` as a reason to write wiki only. +2. At least one `literal_slots[]` entry has `classification: "constant"` and the value has durable business meaning, such as a status, plan tier, channel, threshold, or fixed category. +3. Every table in `tables_touched` maps cleanly through `sl_discover` to an existing SL source. + +When the threshold passes: + +- Call `sl_read_source` before editing an existing source. +- Prefer adding a measure, segment, computed dimension, join, or manifest-backed overlay over creating a standalone SQL source. +- Use `sl_write_source` for a manifest-backed overlay only with `name:` plus additive fields such as `measures:`, `segments:`, `description:`, or `joins:`. Do not include `sql:`, `table:`, `grain:`, or `columns:` on manifest-backed overlays. +- Use `sl_edit_source` for targeted edits when the source file already exists. +- Run `sl_validate` after every SL write or edit. +- Keep runtime parameters as caller filters. Do not bake dates, user ids, ids, search strings, or other runtime slots into SL measures. + +When the threshold does not pass, write the wiki page and set `sl_refs` for any cleanly discovered touched tables. A wiki-only result is valid. + +## Intent Inference Guidance + +Prefer canonical intent names that describe the business question, not the SQL shape: + +- Good: `queries/monthly-paid-order-count` +- Good: `queries/enterprise-contract-renewal-risk` +- Good: `queries/support-ticket-first-response-time` +- Weak: `queries/fp-1` +- Weak: `queries/count-orders-group-by-date` + +Use the SQL shape to infer intent: + +- `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `GROUP BY`, and date truncation usually indicate metrics or rollups. +- Constant slots often name segments, statuses, tiers, regions, or thresholds. +- Runtime slots usually represent time windows, selected entities, or caller filters. +- Repeated successful samples from multiple human users make the page more durable. +- High error rates, service-account-only use, or old `last_seen` values belong in caveats. + +## Boundaries + +- Do not group sibling templates. Stage 4 `historic_sql_curator` owns cross-template clustering and dedupe. +- Do not read paths listed only in `peerFileIndex`. +- Do not create or update `historic_sql_curator`. +- Do not call `context_candidate_write`; historic-SQL Stage 3 writes final wiki and optional SL artifacts directly. +- Do not invent joins, measures, or definitions that are not supported by the normalized SQL, touched tables, literal slots, or existing SL sources. +- Do not copy unredacted sample `bound_sql`, user emails, account ids, tokens, or free-text literal values into wiki or SL output. +- Do not write SL changes when any touched table lacks a clean SL mapping. +- Do not finish after only an SL write. Always write or update the query knowledge page first so the canonical SQL pattern is searchable. diff --git a/packages/context/skills/ingest_triage/SKILL.md b/packages/context/skills/ingest_triage/SKILL.md new file mode 100644 index 00000000..1ac3d108 --- /dev/null +++ b/packages/context/skills/ingest_triage/SKILL.md @@ -0,0 +1,77 @@ +--- +name: ingest_triage +description: Classify and resolve conflicts detected during bundle ingest (structural duplicates, definitional contradictions, near-duplicate clusters, re-ingest changes, evictions). +callers: [memory_agent] +--- + +# Ingest Triage — conflict classification and resolution + +This skill is loaded in two contexts: +- By a Stage 3 WorkUnit agent when `sl_discover` or an `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write. +- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions. + +Apply the rules below before every write that could collide with an existing artifact. + +## Decision tree + +1. **Is this the same artifact I'm producing now, or a different one with the same name?** + Read both. If names match and content matches (modulo whitespace): no conflict — skip the write, the prior one stands. + +2. **If content differs, is it an expression-only change (e.g. a different `sql:` body for the same measure name, same grain, same columns)?** + Re-ingest change (expression-only): silently replace via `sl_edit_source`. No flag. + +3. **If the difference is structural — grain, columns, filter, join shape — is the current bundle the re-ingest of a previously-ingested bundle (i.e. `priorProvenance` has a row for this raw file and artifact)?** + Re-ingest change (semantic break): replace + flag. Record in the IngestReport's `conflicts_resolved` list with `flagged_for_human: true`. + +4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:** + + | Kind | Detection | Resolution | + |---|---|---| + | Structural duplicate | Same name, near-identical expression | Elect canonical by: (a) highest inbound-ref count from other sources; tiebreak: (b) lexicographically first unit key; (c) lexicographically first source name. Subsume losers into `-variants.md` wiki page. Do NOT flag unless ambiguous. | + | Near-duplicate cluster | Different names, overlapping shape (same table, similar formulas) | Same as structural; one canonical, others subsumed. Flag only if no canonical emerges. | + | Definitional contradiction | Same name, substantively different formulas (different aggregation, different filters, different columns) | **Rename + capture**: disambiguate ALL variants with suffix derived from the domain (`churn_risk_engagement_based`, `churn_risk_billing_based`) and write a unified wiki page listing every variant with provenance. The contested name does NOT land in the SL. **Always flag.** | + +5. **Eviction (Stage 4 only)**: for each entry in `eviction_list()`: + - `inbound_refs: []` → remove the artifact (`sl_delete` for SL sources, `wiki_remove` for wiki pages). + - `inbound_refs: [...]` → retain the artifact, set `deprecated: true` on SL sources (via `sl_edit_source`), write a wiki note "origin file removed in ; preserved because referenced by: …". Flag in the IngestReport so the user can plan migration. + +## Why same-ingest vs re-ingest differs + +Within ONE bundle there's no user signal telling us which duplicate wins — we capture all variants and flag. Across bundles, re-uploading IS the signal that the new state is intended — we replace silently for expression changes and flag for semantic breaks. + +## Naming disambiguation hints + +When you rename to disambiguate, prefer domain suffixes that match the containing view/table/collection name: `customers.churn_risk_score` → `customers.churn_risk_engagement_based` (if the `customer_churn` view computes it from engagement); `billing.churn_risk_score` → `billing.churn_risk_billing_based`. Avoid numeric suffixes (`churn_risk_1`, `churn_risk_2`) — they disclose nothing. + +## Applying canonical pins + +When the Stage 4 system prompt includes a `` block, treat each pin as a prior user decision for that `contestedKey`. + +- If the pinned `canonicalArtifactKey` is present in the Stage Index or already exists in SL, keep it as the canonical artifact for that contested key. +- Disambiguate competing artifacts instead of using the contested name for them. +- Do not flag the pinned contested key solely because the variants disagree; the user has already chosen the canonical artifact. +- If the pinned artifact cannot be found and no current WU can recreate it, emit `emit_conflict_resolution` with `flaggedForHuman: true` and explain that the pin references a missing canonical artifact. + +When a pin applies cleanly, call `emit_conflict_resolution` with `kind: "definitional_contradiction"`, `artifactKey` set to the pinned `canonicalArtifactKey`, `detail` describing the pinned election, and `flaggedForHuman: false`. + +## What to write in the unified wiki page + +When you perform rename + capture, also write one page named `-definitions.md` under the wiki GLOBAL scope. Structure: + +- One heading per variant, referencing the disambiguated SL name. +- One paragraph per variant: what it computes, where it came from (raw file + line range), when to use it. +- A closing "Choosing between these" paragraph if the variants are legitimately domain-specific. + +Do not attempt to rank variants or pick a "best" — that's user-override territory. + +## Silence rules + +Flag for human review when: +- You did rename + capture for a definitional contradiction (kind 3 above). +- You performed an eviction retention (kind 5, second row). +- An override constraint (from a Stage 4 re-run) conflicts with current inbound refs. + +Do NOT flag: +- Same-content duplicate skip (trivial). +- Structural duplicate with clear canonical election. +- Expression-only re-ingest replace. diff --git a/packages/context/skills/knowledge_capture/SKILL.md b/packages/context/skills/knowledge_capture/SKILL.md new file mode 100644 index 00000000..8265c950 --- /dev/null +++ b/packages/context/skills/knowledge_capture/SKILL.md @@ -0,0 +1,124 @@ +--- +name: knowledge_capture +description: KLO's knowledge base — wiki pages for durable, reusable business knowledge. Covers capture workflow for user preferences, metric definitions, organizational conventions, and cross-references between knowledge pages and semantic-layer sources. Loaded by the post-turn memory-agent only. The research agent reads wiki via `wiki_read`/`wiki_search` but does not write it. +callers: [memory_agent] +--- + +# Knowledge Capture + +## Role + +The knowledge base stores durable, reusable business knowledge for an analytics assistant. Each page is a self-contained rule, definition, or convention that answers "how should this concept be handled in this organization?" — written once and reused across chats. + +Scope selection is handled by the runtime: +- When user-scoped knowledge is enabled AND the caller is a chat turn, writes go to the user's **personal** scope. +- When the caller is an admin-driven ingest (`sourceType: 'external_ingest'`), writes go to the **global** scope. +- When user-scoped knowledge is disabled, all writes go to the global scope. + +The `wiki_write` tool picks the right scope based on the session. Capture logic does not need to choose — focus on whether the content is worth capturing at all. + +## What to capture + +Capture when the user or the ingested document expresses: +- A metric definition ("revenue means booked revenue after refunds"). +- A filter or convention that should always apply ("exclude test accounts when reporting ARR"). +- A mapping or alias ("mood_stress_sleep = Oxytocin protocol"). +- A domain rule that is not visible from column names alone ("status = 'T' means terminated, not 'terminated'"). +- A link or external system convention ("medplum_patient_id is the primary key in the EMR at https://emr.example/patients/{id}"). + +Do NOT capture: +- One-off requests ("answer under 100 words"). +- Temporary instructions scoped to the current chat. +- Ad-hoc formatting preferences. +- Information already present in the semantic layer (column names, join paths, measure formulas — those belong in SL). +- **Query results, snapshots, or time-bounded benchmark tables.** Numbers go stale; pasting "Oct 2025: 25%, Nov 2025: 19.9%, …" creates misinformation as soon as new data lands. Reference the SL source by name (`sl_refs`) and let future queries pull live data — the wiki captures the *rule* (definition, exclusion, segmentation), the SL source captures the *measure*, and `semantic_query` captures the *current values*. +- **Interpretive narrative tied to a specific snapshot** ("M1 retention degraded sharply from Dec 2025"). The observation is anchored to data that will move; the actionable convention (e.g., "always exclude in-progress cohorts") may be worth capturing on its own, but the snapshot-specific commentary is not. + +If nothing is worth capturing, respond without calling any tool. + +## Workflow + +1. Read the wiki index (provided in the prompt) and decide whether the turn introduces durable knowledge. +2. **Before writing**, search for related content so cross-references are accurate: + - `wiki_search` with the topic — find related wiki pages to populate `refs`. + - `sl_discover` with the concept — if the page defines a metric (revenue, churn, retention, LTV, ARR, MRR, CAC, attribution, etc.), find matching SL sources or measures to populate `sl_refs`. If no matches, pass `sl_refs: []` so future readers know you checked. +3. If updating an existing page, `wiki_read` it first. The read result begins with `[scope: ... | tags: ... | refs: ... | sl_refs: ...]` showing current frontmatter. +4. `wiki_write` to create or update. Prefer merging into an existing page over creating a new one. +5. `wiki_remove` only when a page is truly obsolete — not to replace stale content (update it instead). + +## Keys, summaries, and content + +- **Keys** are short kebab-case topic identifiers: `leads-source-filter`, `revenue-definition`, `churn-calculation`. No namespacing, no prefixes. +- **Summary** is a one-line hook (≤200 chars) shown in the index. +- **Content** is concise markdown — actionable rules, not prose. + +``` +## [Topic Title] +- Rule or preference statement +- Another rule if applicable +``` + +Prefer fewer, richer pages over many thin ones. Each page covers one coherent topic thoroughly. If the new information relates to an existing page, update that page instead of fragmenting the knowledge. + +## Tags, refs, sl_refs + +The `wiki_write` tool accepts three array fields that go into the page frontmatter: + +- **`tags`**: 1–3 short lowercase topic tags (`["finance"]`, `["data-quality"]`). Call `wiki_list_tags` first to reuse existing tags for consistency. +- **`refs`**: keys of related wiki pages. Add when the new page materially depends on concepts from another (e.g., a churn definition that uses the paid-orders filter from a revenue definition). Don't add refs just because pages share a topic area. +- **`sl_refs`**: names of SL sources or measures the page relates to. Format: `"source_name"` or `"source_name.measure_name"`. Discover via `sl_discover` → inspect with `sl_read_source` → include the confirmed matches. + +### Replace semantics + +All three fields use REPLACE semantics on update: + +- Omit the field → existing value is kept. +- Pass `[]` → field is cleared. +- Pass `[values]` → replaces existing with exactly those values (no merging). + +## Editing existing pages + +Two modes: + +- **Full content** — pass `content` to rewrite the whole page. Use when the page structure needs to change. +- **Targeted edits** — pass `replacements: [{ oldText, newText }]` to apply exact-string replacements. Use for small updates; preserves the rest of the page. + +When editing, read the page first so the edit matches exact whitespace and indentation. + +## Overriding an organization rule + +Organization (GLOBAL) pages are read-only from a user's personal-scope session. To override a global rule for a single user, write a personal page with the **same key**. At read time the USER page wins. + +## Worked example — capturing a metric with cross-references + +User says: "Going forward, the official refund rate is total refunded amount divided by total gross transaction amount." + +``` +wiki_list_tags() + → existing tags include "finance" +wiki_search({ query: "refund revenue paid orders" }) + → returns `revenue-definition` (related — defines paid-orders filter) +sl_discover({ query: "refund rate" }) + → returns fct_orders (score 0.08), fct_gaap_revenue (0.06) +sl_read_source({ sourceName: "fct_orders" }) + → confirms amount_refunded_dollars and transaction_amount_dollars exist +wiki_write({ + key: "refund-rate-definition", + summary: "Refund rate = refunded amount / gross transaction amount", + content: "## Refund Rate\n- Definition: sum(amount_refunded_dollars) / sum(transaction_amount_dollars)\n- Source of truth: fct_orders\n- Related: see revenue-definition for paid-orders filter.", + tags: ["finance"], + refs: ["revenue-definition"], + sl_refs: ["fct_orders.refund_rate_pct", "fct_orders"] +}) +``` + +Search-then-write order matters. Cross-references are part of the page's identity, not an afterthought. + +## Rules + +- Read existing pages before updating them. +- Prefer merging into an existing page over creating a new one. +- Prefer fewer, richer pages over many thin ones. +- Write content as clear, actionable rules — not narrative prose. +- Discover cross-references via search before writing, not after. +- If nothing is worth capturing, respond without calling any tool. diff --git a/packages/context/skills/live_database_ingest/SKILL.md b/packages/context/skills/live_database_ingest/SKILL.md new file mode 100644 index 00000000..9db52484 --- /dev/null +++ b/packages/context/skills/live_database_ingest/SKILL.md @@ -0,0 +1,58 @@ +--- +name: live_database_ingest +description: Capture semantic-layer and knowledge updates from a live database schema snapshot. +callers: [memory_agent] +--- + +# Live Database Ingest + +Use this skill when the ingest work unit contains raw files under +`raw-sources//live-database//`. + +## Workflow + +1. Read the table JSON file listed in the work unit. +2. Read `connection.json` to understand the snapshot metadata. +3. Read `foreign-keys.json` when the table has a foreign key or when joins are + needed for the semantic-layer source. +4. Create or update one semantic-layer source for the table with + `sl_write_source`. +5. Use the physical table name from the raw JSON as the source `table` field. +6. Preserve database comments as `descriptions.db` on tables and columns. +7. Add joins only when the foreign key index names both sides. +8. Write wiki pages only for durable business meaning that is present in table + or column comments. +9. Run `sl_validate` for the table source before the work unit completes. + +## Source shape + +For a raw table with this shape: + +```json +{ + "name": "orders", + "db": "public", + "columns": [ + { "name": "id", "type": "integer", "nullable": false, "primaryKey": true } + ] +} +``` + +Write a semantic-layer source with this shape: + +```yaml +name: orders +table: public.orders +grain: id +columns: + - name: id + type: number +``` + +Use `string`, `number`, `time`, or `boolean` for column types. When a database +type is ambiguous, use `string`. + +## Boundaries + +The raw snapshot is structural evidence. Do not invent measures, segments, +business definitions, or joins that are not present in the snapshot files. diff --git a/packages/context/skills/looker_ingest/SKILL.md b/packages/context/skills/looker_ingest/SKILL.md new file mode 100644 index 00000000..ded805eb --- /dev/null +++ b/packages/context/skills/looker_ingest/SKILL.md @@ -0,0 +1,217 @@ +--- +name: looker_ingest +description: Extract durable KLO knowledge and semantic-layer contribution proposals from staged Looker runtime dashboard, Look, and explore JSON. Load for WorkUnits whose raw files are under explores/, dashboards/, or looks/. +callers: [memory_agent] +--- + +# Looker Runtime Ingest + +Looker runtime ingest turns API-staged dashboards, Looks, and explores into durable KLO memory. Runtime entities are evidence. They are not themselves the final knowledge shape. + +## Required Workflow + +1. Read every `rawFiles` entry for the WorkUnit. +2. Read relevant `dependencyPaths` before making a decision. For dashboard and Look WUs this usually includes the referenced explore JSON, signal files, `folders/tree.json`, and `users/.json`. +3. Treat `signals/*.json`, owners, folders, schedules, and favorites as prioritization or provenance context only. +4. Extract generalizable metric formulas, segment definitions, field semantics, and domain conventions. +5. Use `wiki_search`, `sl_discover`, and `sl_read_source` before writing so new content merges with existing memory instead of duplicating it. +6. Use `context_evidence_search` or `context_evidence_read` to obtain evidence chunk IDs for any wiki-bound knowledge candidate. +7. Use `context_candidate_write` for durable wiki-bound knowledge. Do not call `wiki_write` from a Looker WorkUnit; Stage 4 reconciliation promotes candidates and writes wiki pages. +8. Use `looker_query_to_sl` for each Look query or dashboard tile query that has a `query` object. +9. Write SL from Looker runtime evidence only through the staged warehouse target contract. For explores and inherited dashboard/Look queries, branch on `targetTable.ok`; when it is true, write on `targetWarehouseConnectionId` and use `targetTable.canonicalTable` as `source.table`. When it is false or missing, write wiki knowledge candidates and record `emit_unmapped_fallback` with the staged reason. +10. Run `sl_validate` after every SL write. If validation fails, fix the source or roll it back before the WorkUnit ends. + +## Explore WorkUnits + +Explore WUs have raw files like `explores//.json` and usually depend on `lookml_models.json`. + +Use the deterministic API-derived source key: + +```text +looker____ +``` + +For example, `modelName: "b2b"` and `exploreName: "sales_pipeline"` map to `looker__b2b__sales_pipeline`. + +Mapped explore write shape: + +```json +{ + "connectionId": "22222222-2222-4222-8222-222222222222", + "sourceName": "looker__b2b__sales_pipeline", + "source": { + "name": "looker__b2b__sales_pipeline", + "table": "proj.dataset.opportunities", + "grain": ["opportunity_id"], + "columns": [ + { + "name": "opportunity_id", + "type": "string" + }, + { + "name": "arr", + "type": "number" + } + ], + "measures": [ + { + "name": "total_arr", + "expr": "sum(arr)" + } + ] + } +} +``` + +Every concrete value in that example must be backed by raw Looker field SQL, `source_tables` preflight, `source_columns`, or existing SL when applied to a real WorkUnit. If the evidence is not present, write wiki candidates and emit `emit_unmapped_fallback`. + +The staged explore file carries warehouse target fields populated before the WU starts: + +- `connectionName`: the Looker runtime connection name. +- `targetWarehouseConnectionId`: the resolved warehouse connection id, or `null` when the Looker connection is unmapped. +- `rawSqlTableName`: Looker's verbatim `sql_table_name`. Keep it as provenance only. +- `targetTable`: the parsed target-table union. Use this as the sole branch condition. + +When `targetTable.ok === true`, the explore has a complete KLO backing target. Before writing: + +1. Use `targetTable.catalog`, `targetTable.schema`, and `targetTable.name` for `source_tables` preflight matching through `sl_discover` or `sl_read_source`. +2. Use Looker field `sql`, labels, descriptions, and type metadata to derive source columns, measures, segments, joins, and grain. +3. Call `sl_write_source` or `sl_edit_source` with `connectionId: targetWarehouseConnectionId`. +4. Set `source.name` to the deterministic API-derived source key, for example `looker__b2b__sales_pipeline`. +5. Set `source.table` to `targetTable.canonicalTable`. +6. Run `sl_validate` after every SL write. + +The `table` field is `targetTable.canonicalTable`, not `rawSqlTableName`. Raw Looker values can contain aliases such as `schema.table AS x`, Looker templates such as `${TABLE}`, or derived-table SQL. Those raw forms do not compose safely with SL generation. `targetTable.canonicalTable` is the dialect-quoted identifier rebuilt by the parser. + +Use `targetTable.{catalog,schema,name}` only for source_tables preflight. Do not put those tuple fields separately into the SL source unless the SL schema already asks for them. + +When `targetTable.ok === false`, keep the WU wiki-only for SL purposes. Capture durable domain semantics with `context_candidate_write`, then emit a fallback with the EXACT structured `reason` code from `targetTable.reason`. Put any human-readable context in `detail`, NOT in `reason`: + +```json +{ + "rawPath": "explores/b2b/sales_pipeline.json", + "reason": "no_connection_mapping", + "detail": "Looker connection b2b_sandbox_bq is not mapped to a warehouse connection", + "fallback": "wiki_only" +} +``` + +Valid `reason` codes (use exactly one, no other strings allowed): `no_connection_mapping`, `looker_template_unresolved`, `derived_table_not_supported`, `no_physical_table`, `multiple_table_references`, `unsupported_dialect`, `parse_error`, `missing_target_table`. + +When `targetTable` is `null`, read the raw explore file again. If the target is still absent, emit the same fallback with `"reason": "missing_target_table"`. + +## Look And Dashboard WorkUnits + +Looks have raw files like `looks/.json`. Dashboards have raw files like `dashboards/.json`. Dashboard tiles with inline `query` objects follow the same decision rules as Looks. + +For each query: + +1. Call `looker_query_to_sl` with the query JSON, title, content type, and usage counts if available. +2. Read the proposal's `targetStatus`, `targetWarehouseConnectionId`, `targetTable`, `sourceTable`, and `canWriteStandaloneSource`. +3. If `canWriteStandaloneSource` is true, use `targetWarehouseConnectionId` for SL tools and `sourceTable` / `targetTable.canonicalTable` as the source table. Verify the proposal against the parent explore dependency and existing SL before writing. +4. If the proposal decision is `measure_added`, add or edit a measure only after verifying the expression against the explore field SQL or an existing source column. +5. If the proposal decision is `source_created`, create a source only when `canWriteStandaloneSource` is true and the filter is canonical. Use `source.table = targetTable.canonicalTable`. +6. If `targetStatus` is `unmapped`, `unparseable`, or `missing_target_table`, keep SL wiki-only for this query and call `emit_unmapped_fallback` with the proposal's target reason or status. +7. If the proposal decision is `wiki_only`, write a context candidate only when the Look or dashboard names a reusable business concept. + +## Capture Rules + +Write SL for: + +- reusable aggregations with clear formulas; +- reusable segment predicates that appear canonical; +- calculated dimensions that are stable and backed by raw Looker query evidence; +- joins or source relationships that are explicit in the explore JSON. + +Write wiki for: + +- metric definitions in dashboard or Look titles, descriptions, axis labels, and filter semantics; +- business meaning of an explore; +- concept aliases used by teams; +- caveats about multiple competing definitions. + +Skip: + +- point-in-time values and chart screenshots; +- dashboard layout, tile positions, colors, visualization types, and render settings; +- owner names, top users, recipient counts, favorite counts, schedules, and usage counts as narrative content; +- ad-hoc low-usage queries with no durable business semantics; +- simple saved views of fields with no metric, segment, or concept definition. + +## Usage Signals + +Use usage only to prioritize: + +- zero or near-zero usage lowers priority and often means skip; +- high usage raises confidence that a metric or segment is canonical; +- schedules and favorites can break ties between otherwise similar candidates. + +When calling `context_candidate_write`, usage can affect scoring: + +- High usage (`queryCount30d >= 10` or `uniqueUsers30d >= 3`) can justify `authorityScore: 3` and `reuseScore: 3` when the evidence is otherwise durable. +- Zero recent usage should usually use `actionHint: "skip"` or lower `reuseScore` unless the content clearly defines a canonical business concept. +- Schedules and favorites can raise `reuseScore` by 1 when deciding between otherwise similar candidate scores. + +Never include the usage counts themselves in `assertion`, `rationale`, or eventual wiki prose. + +Never write usage numbers, owner names, folder names, top users, schedule counts, or recipients into wiki article prose. If attribution is needed, keep it in provenance through the normal ingest action trail. + +## Provenance And Cross-References + +When writing candidates from Looker evidence, cite chunk IDs from `context_evidence_search` or `context_evidence_read`. Stage 4 reconciliation writes wiki pages from promoted candidates and sets `sl_refs` when the source exists or was created in the run. + +When an SL action is written on `targetWarehouseConnectionId`, the runner records `targetConnectionId` on the action and syncs `knowledge_sl_refs` to the warehouse connection. The wiki article still belongs to the Looker run connection; the SL ref belongs to the warehouse. Do not rewrite the source name or connection id in wiki frontmatter by hand. Use normal SL tool calls and let Stage 4 reconcile refs from actions. + +Use these source-key conventions: + +- API-derived explore source: `looker____` +- API-derived segment source: `looker____` +- File-adapter source, when present: `__` without the `looker__` prefix + +During Stage 4 reconciliation, when both `looker____` and `__` exist for the same connection, treat the unprefixed file-adapter source as canonical. Rewrite wiki `sl_refs` to the unprefixed source, remove the API-derived source if it was created in this run, and call `emit_artifact_resolution` with `actionType: "subsumed"`, `artifactKind: "sl"`, `artifactKey: "looker____"`, and the raw explore path that produced it. + +If a file-adapter source already exists and clearly subsumes the API-derived source, prefer the file-adapter source in `sl_refs` and mention the API entity only as evidence in the wiki content. + +## Examples + +Measure proposal from a Look: + +```json +{ + "title": "Open Pipeline ARR", + "query": { + "model": "b2b", + "view": "sales_pipeline", + "fields": ["opportunities.arr", "opportunities.stage"], + "filters": { "opportunities.stage": "open" } + } +} +``` + +Expected handling: + +- call `looker_query_to_sl`; +- verify `opportunities.arr` and `opportunities.stage` against the explore dependency and existing SL; +- add or update a measure only if the resulting expression validates; +- write wiki for the durable definition "open pipeline ARR" if it is not already captured; +- avoid mentioning query counts or users in wiki prose. + +Simple saved view: + +```json +{ + "title": "Accounts By Region", + "query": { + "model": "b2b", + "view": "accounts", + "fields": ["accounts.region", "accounts.segment"], + "filters": {} + } +} +``` + +Expected handling: + +- no SL write; +- wiki only if the title or description defines a reusable company concept; +- otherwise skip. diff --git a/packages/context/skills/lookml_ingest/SKILL.md b/packages/context/skills/lookml_ingest/SKILL.md new file mode 100644 index 00000000..fe6d753b --- /dev/null +++ b/packages/context/skills/lookml_ingest/SKILL.md @@ -0,0 +1,180 @@ +--- +name: lookml_ingest +description: Map a LookML view/model/explore into KLO semantic layer sources. Covers the LookML to KLO primitive table, provenance tagging, and three worked examples (overlay, standalone from derived_table, standalone with sql_always_where). Load when the turn contains `.lkml` content. +callers: [memory_agent] +--- + +# LookML to KLO Semantic Layer + +LookML views map to SL sources, `measure:` to measures, `explore: { join: }` to the join graph. This skill lays out the mapping and the three capture shapes. + +## Mapping table + +| LookML | KLO form | Notes | +|---|---|---| +| `view: X { sql_table_name: …; measure:/dimension:/join: }` | **Overlay** at `/X.yaml` with `measures`, `columns` (computed), `joins`, `segments` | Manifest-backed; inherit grain/columns | +| `view: X { derived_table: { sql: … } }` | **Standalone** with top-level `sql:`, explicit `grain:` + `columns:` | No manifest entry exists | +| `view: X { sql_always_where:

}` | **Standalone** with `sql: SELECT * FROM WHERE

` | Enforcement, not opt-in | +| `explore: { join: Y { sql_on: …; relationship: … } }` | `joins:` entry `{ to: Y, on: " = Y.", relationship: … }` | On the overlay or standalone | +| `conditionally_filter` / `always_filter` | `segments: [{ name, expr }]` | Callers reference by name | +| Manifest entry | `_schema/*.yaml` | **Never edit** — auto-imported | + +Type map: `date`/`datetime`/`timestamp` → `time`; `yesno` → `boolean`; `number` → `number`; `string` → `string`. Ignore `drill_fields:` (UI only). + +## Decision rules + +LookML writes target the run connection directly. Unlike Looker runtime ingestion, the LookML adapter is configured on the warehouse KLO connection, so do not look for `targetWarehouseConnectionId` and do not route through a mapping array. + +Before any SL write, inspect the WorkUnit notes. + +If notes contain: + +```text +[LOOKML SL WRITES DISALLOWED] +reason: lookml_connection_mismatch +... +[/LOOKML SL WRITES DISALLOWED] +``` + +this is a hard gate. The model's declared Looker `connection:` does not match the warehouse connection's configured `expectedLookerConnectionName`. Continue wiki extraction and context candidates. Do not call `sl_write_source` or `sl_edit_source` for that WorkUnit. The runner also removes those write tools for this WorkUnit; treat the missing tools as expected. Preserve the mismatch reason in any `emit_unmapped_fallback` you create. + +When SL is allowed: + +- **Overlay** when the view is a thin wrapper over a manifest table (`sql_table_name:` matches a manifest entry). Do not repeat base columns or grain. +- **Standalone** when the view uses `derived_table:` or `sql_always_where:`. `sl_write_source` rejects overlays whose name has no manifest entry; that error points here. +- **Skip** a view with only `view:`, `sql_table_name:`, and bare `dimension:` entries (no `measure:`, `description:`, `derived_table:`, `sql_always_where:`, `join:`). The pre-filter already short-circuits those. + +## Preflight: never guess column names + +LookML's `dimension_group: date { type: time; timeframes: [raw, date, week, month] }` expands at Looker-render time into `${view.date_raw}`, `${view.date_date}`, `${view.date_week}`, and so on. **These are NOT physical warehouse columns.** The physical column is whatever the group's `sql:` clause references (e.g. `${TABLE}.date` → column `date`). + +A prior replay hallucinated `date_date`, `date_week` into `sql:`, `columns:`, and `grain:` across 4+ standalones; every measure on each affected source returned `400 Unrecognized name: date_date` at query time. Preventable. + +**Required flow before writing any overlay or standalone**: + +1. Call `sl_discover()` for each base table you're about to touch. That returns the real columns. +2. If the table isn't in the manifest, fall back to `sql_execution({ sql: "SELECT column_name FROM .INFORMATION_SCHEMA.COLUMNS WHERE table_name = ''" })` (session shape — a connection is already pinned by the ingest session). +3. Use only those names in `sql:`, `columns:`, and `grain:`. Map each `dimension_group` to ONE `{ name: , type: time, role: time }` entry — never one per timeframe. + +| LookML input | KLO `columns:` entry | +|---|---| +| `dimension_group: month { type: time; timeframes: [month]; sql: ${TABLE}.month_date ;; }` | `{ name: month_date, type: time, role: time }` | +| `dimension_group: date { type: time; timeframes: [raw, date, week, month]; sql: ${TABLE}.date ;; }` | `{ name: date, type: time, role: time }` — single entry, NOT `date_raw`/`date_date`/`date_week` | + +**After every `sl_write_source`**: call `sl_validate`. It runs `SELECT * FROM () LIMIT 0` against the connection. If a column name was invented, the warehouse's `Unrecognized name: …` error comes back verbatim. Treat that as a hard failure — re-read the real columns with `sl_discover` and rewrite. + +## Provenance markers + +When a wiki mixes LookML source prose with `sl_discover` output, tag sections: + +```markdown + +Customers fan out many-to-one into `accounts` via `account_id`. + + +`customers.admin_user_id` is nullable — orphan rows exist. + +``` + +Invisible in most renderers; lets a future pass audit provenance. + +## Example 1 — overlay (thin wrapper) + +LookML (excerpt): + +```lookml +view: fct_labs { + sql_table_name: analytics.fct_labs ;; + dimension: is_byol { type: yesno; sql: ${TABLE}.lab_type = 'byol' ;; } + measure: count_lab_orders { type: count; description: "Total lab orders." } + measure: count_byol_labs { type: count; filters: [is_byol: "yes"] } +} +explore: fct_labs { + join: dim_customers { sql_on: ${fct_labs.admin_user_id} = ${dim_customers.admin_user_id} ;; relationship: many_to_one } +} +``` + +KLO overlay at `/fct_labs.yaml`: + +```yaml +name: fct_labs +description: "Lab-order fact table. One row per lab order event." +columns: + - name: is_byol + type: boolean + expr: "lab_type = 'byol'" +measures: + - name: count_lab_orders + expr: count(lab_order_id) + description: Total lab orders. + - name: count_byol_labs + expr: count(lab_order_id) + filter: "is_byol = true" +joins: + - to: dim_customers + on: "admin_user_id = dim_customers.admin_user_id" + relationship: many_to_one +``` + +## Example 2 — standalone from `derived_table` + +```lookml +view: lab_results { + derived_table: { sql: + SELECT lab_order_id, admin_user_id, lab_date, biomarker, value, + value - LAG(value) OVER (PARTITION BY admin_user_id, biomarker ORDER BY lab_date) AS delta + FROM analytics.raw_lab_results WHERE status = 'final' ;; } + dimension: lab_order_id { primary_key: yes; type: string } + measure: avg_delta { type: average; sql: ${delta} ;; } +} +``` + +```yaml +name: lab_results +description: "Lab results with biomarker delta vs previous reading per user." +source_type: sql +sql: | + SELECT lab_order_id, admin_user_id, lab_date, biomarker, value, + value - LAG(value) OVER (PARTITION BY admin_user_id, biomarker ORDER BY lab_date) AS delta + FROM analytics.raw_lab_results WHERE status = 'final' +grain: [lab_order_id] +columns: + - { name: lab_order_id, type: string } + - { name: admin_user_id, type: string } + - { name: lab_date, type: time, role: time } + - { name: biomarker, type: string } + - { name: value, type: number } + - { name: delta, type: number } +measures: + - { name: count_lab_results, expr: "count(lab_order_id)" } + - { name: avg_delta, expr: "avg(delta)" } +``` + +## Example 3 — standalone with `sql_always_where` + +```lookml +view: rpt_daily_braze_email { + sql_table_name: analytics.fct_email_sends ;; + sql_always_where: ${TABLE}.channel = 'braze' AND ${TABLE}.status = 'delivered' ;; + dimension: send_id { primary_key: yes; type: string } + measure: delivered_count { type: count } +} +``` + +```yaml +name: rpt_daily_braze_email +description: "Delivered Braze email sends (enforced filter: channel='braze', status='delivered')." +source_type: sql +sql: | + SELECT * FROM analytics.fct_email_sends + WHERE channel = 'braze' AND status = 'delivered' +grain: [send_id] +columns: + - { name: send_id, type: string } + - { name: admin_user_id, type: string } + - { name: sent_at, type: time, role: time } +measures: + - { name: delivered_count, expr: "count(send_id)" } +``` + +`sql_always_where` is enforcement → wrap into the `sql:`. Don't model it as a segment (segments are opt-in) or per-measure filter (fragile, duplicated). diff --git a/packages/context/skills/metabase_ingest/SKILL.md b/packages/context/skills/metabase_ingest/SKILL.md new file mode 100644 index 00000000..b33ed488 --- /dev/null +++ b/packages/context/skills/metabase_ingest/SKILL.md @@ -0,0 +1,218 @@ +--- +name: metabase_ingest +description: Convert Metabase questions, models, and metrics into KLO Semantic Layer source definitions. Covers result-metadata to KSL column type mapping, FK/PK detection, near-duplicate deduplication, pre-aggregation decomposition, join-graph connectivity, and how to react to priorProvenance from earlier ingest syncs. Load when the WorkUnit contains `cards/.json` files under a Metabase bundle. +callers: [memory_agent] +--- + +# Metabase to KLO Semantic Layer + +Each WorkUnit represents one Metabase collection's cards for one Metabase database (mapped to exactly one KLO connection). Every `cards/.json` file carries the resolved SQL, result_metadata, card type, collection path, and referenced-card ids. The WU's `sync-config.json` tells you which sync mode is active and which selections apply. `databases/.json` tells you the target KLO connection. + +## Context format + +Each card JSON looks like: +```json +{ + "metabaseId": 7, + "name": "Daily orders", + "description": "Orders by day", + "type": "model", + "databaseId": 42, + "collectionId": 5, + "resolvedSql": "SELECT ...", + "templateTags": [{"name": "ref", "type": "card", "cardReference": 10}], + "resultMetadata": [ + {"name": "day", "base_type": "type/DateTime", "semantic_type": "type/CreationTimestamp"}, + {"name": "order_count", "base_type": "type/Integer"} + ], + "collectionPath": ["Data", "Orders Team"], + "referencedCardIds": [10] +} +``` + +Use `resultMetadata` to: +- Map `base_type` to KSL column type: `type/Integer`, `type/Float`, `type/Decimal`, `type/BigInteger` → `number`; `type/Text`, `type/TextLike` → `string`; `type/DateTime`, `type/Date`, `type/DateTimeWithTZ` → `time`; `type/Boolean` → `boolean`. +- Identify grain candidates: columns with `semantic_type: type/PK`. +- Identify join candidates: columns with `semantic_type: type/FK` plus `fk_target_field_id`. +- Identify time columns: `semantic_type: type/CreationTimestamp` or `type/UpdatedTimestamp` → set `role: time`. +- Use `display_name` for measure descriptions when available. + +### Additional card metadata + +- `parameters`: list of card-level parameters with widget types and defaults. When SQL resolution fell back to unresolved SQL, use this to drive Step A of the SQL-translation workflow (drop optional clauses): knowing each `{{ var }}` is `type: "date/range"` vs `type: "category"` tells you what kind of clause it is. +- `resultMetadata[i].field_ref`: Metabase's canonical reference to the source warehouse field. Shape `["field", , ]`. When this is set, the column maps directly to a warehouse field, which is useful for declaring joins from FK metadata without re-parsing SQL. +- `lastRunAt`: ISO timestamp of the card's last execution. If null or very old, the card may be dead; prefer skipping over creating a source. +- `dashboardCount`: number of dashboards referencing the card. Cards with `dashboardCount: 0` and a stale `lastRunAt` are strong skip signals. + +## Decision tree + +For each card: +1. Analyze `resolvedSql` + `resultMetadata`: identify base tables, aggregations, joins, filters, column types. +2. Check `sl_discover` and `sl_read_source` for existing sources that overlap. +3. Decide: + - Simple aggregation on a table that already has a source → `sl_edit_source` to add a measure. + - Join between tables that should be linked in the SL graph → `sl_edit_source` to add a join. + - Complex derived SQL (CTEs, multi-layer aggregation, scoring models) → `sl_write_source` with `source_type: sql`. When the SQL projects/filters from a single manifest-backed base table, set `inherits_columns_from: ` so columns inherit type and description from the manifest — see `sl_capture` skill for the slim form. Use `sl_discover` to discover the manifest key from the table reference in the SQL (it accepts `MARTS.CONSIGNMENTS`, `ANALYTICS.MARTS.CONSIGNMENTS`, or `CONSIGNMENTS`). + - New base table not yet in the semantic layer → `sl_write_source` with `source_type: table`. + - Trivial query (`SELECT *`, simple `COUNT(*)` with no business logic) → do nothing; the runner will record this card as `action_type='skipped'`. + - Duplicate of an existing measure → same as trivial; do nothing for this card. + +**Manifest-only names need an overlay first.** If `sl_discover` shows a source name with `Type: table` but `sl_read_source` returns "Source not found", the source lives only in the schema manifest (no standalone overlay yet). `sl_edit_source` cannot edit manifest-only names — you must bootstrap an overlay with `sl_write_source` using the overlay shape: + +```yaml +name: +measures: + - name: + expr: "" +``` + +Overlay shape: `name:` plus any of `measures:`, `segments:`, `description:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: [""]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule. + +**Join discovery:** When your card's SQL references warehouse tables (e.g. in `FROM` or `JOIN` clauses), call `sl_discover({ query: '
' })` before writing. The matching manifest entry's `name` is the value you put in `joins: [- to: ]`. Use `many_to_one` for FK-to-dimension joins, `one_to_many` for the reverse. + +## priorProvenance + +If the WU prompt includes a `priorProvenance` section for a card, it tells you what happened on prior ingest syncs. Treat it as advisory: +- `action_type: source_created` on source X → prefer editing X with `sl_edit_source` rather than writing a new source. +- `action_type: measure_added` on source X → you already contributed to X; add only measures that aren't present. +- `action_type: subsumed` or `merged` → this card was folded into another source last time; unless its SQL has changed structurally, keep it subsumed (no new write). +- `action_type: skipped` → last time we decided not to ingest this card; re-read the SQL and confirm the decision still holds. If the card now has non-trivial business logic, ingest it. + +## Deduplication + +Before writing, scan all cards in this WU for near-duplicate groups — cards whose `resolvedSql` shares the same CTEs, base tables, joins, and aggregation structure but differs only in: +- Trailing filters (e.g. `date_trunc(week, date)` vs `date_trunc(month, date)`). +- Minor `WHERE` clause variations. +- Column aliases or output column subsets. +- Aggregation granularity (daily vs weekly vs monthly). + +When you find a group of near-duplicates: +1. Create ONE generalized source from the most comprehensive card in the group. +2. Strip card-specific trailing filters from the SQL so the source covers all variants (e.g. keep daily grain instead of filtering to week/month). +3. If each card had a distinct measure or filter, add them as separate measures on the single source. +4. For all cards except the canonical one, do nothing — they'll be recorded as `action_type='skipped'` automatically by the runner. + +Do NOT merge cards with fundamentally different business logic, even if they share CTEs. + +## Pre-aggregation decomposition + +When a card's `resolvedSql` contains `GROUP BY` with aggregation functions (`SUM`, `COUNT`, `AVG`, …): + +1. **Detect**: simple aggregation on base tables/joins — `SELECT` with `GROUP BY`, no complex CTEs or window functions. +2. **Decompose**: strip the `GROUP BY` and aggregation functions. Keep `FROM`, `JOIN`, and `WHERE` intact. +3. **Expose row-level columns**: include the grouped-by columns AND the raw columns being aggregated (e.g. `money_out` instead of `SUM(money_out) AS total_money_out`). +4. **Define aggregations as measures**: convert each aggregation into a KSL measure (e.g. `sum(money_out)`). +5. **Add joins**: with FK columns now exposed, declare joins to dimension sources. + +Exception: keep the pre-aggregated SQL when the query involves multi-CTE pipelines, window functions, or recursive logic where decomposition would lose business logic. + +## SQL translation from raw native to KSL + +Every card carries a `resolvedSql` field. Check the staged card's `resolutionStatus` first: + +- `resolutionStatus: "resolved"` — `{{#N}}` references are inlined and `[[ ... ]]` optional clauses have been dropped locally. If the resolved SQL contains no other parameters the SQL is executable as-is. If the card had **required** (non-bracketed) `{{ var }}` placeholders, the SQL is prefixed with a placeholder-warning comment block listing every dummy substitution Metabase made — see "Step A" below. +- `resolutionStatus: "fallback"` — Metabase failed to resolve. The SQL still contains `{{#N}}`, `{{#N-name}} alias`, `{{ var }}`, and `[[ ... ]]` syntax. Do the translation steps below before writing a source. + +### Step A — Handle dummy-substituted placeholders (resolved cards only) + +When a card has a required `{{ var }}` outside any `[[ ]]` block, the resolver substitutes a **dummy value** purely so Metabase's parser will accept the query. The resulting SQL is prefixed with a comment like: + +```sql +-- PLACEHOLDER_WARNING: this SQL was extracted from a Metabase card with +-- unbound template parameters. The placeholders below were substituted with DUMMY +-- values to satisfy Metabase's parser — they DO NOT represent intended filters. +-- Drop the corresponding clauses (or expose them as runtime SL filters) before +-- persisting this SQL as a semantic-layer source. +-- {{ auction_end }} (type=dimension, widget=date/all-options) → '2020-01-01~2020-12-31' +-- {{ status }} (type=text) → 'placeholder' +SELECT ... +WHERE start_date >= '2020-01-01' AND start_date < '2021-01-01' AND status = 'placeholder' +``` + +For each listed placeholder: locate the WHERE clause(s) in the SQL that reference the dummy literal and **drop them**, then strip the warning comment. SL chat-time filters compose narrowing predicates dynamically, so the source should represent the unfiltered dataset. + +For `fallback` cards, dropping is simpler — the SQL still has the `[[ ... ]]` brackets and `{{ var }}` placeholders intact: + +```sql +-- before: +WHERE 1=1 + [[AND {{ auction_end }} ]] + [[AND status = {{ status }} ]] + +-- after: +WHERE 1=1 +``` + +### Step B — Inline `{{#N}}` references (fallback cards only) + +Resolved cards already have `{{#N}}` inlined for you. For `fallback` cards, each `{{#N}}` (or `{{#N-some-slug}}`) in the SQL refers to another card's `resolvedSql`. The referenced card is in the WU's `rawFiles` or `dependencyPaths`. Read it with `read_raw_file`, then inline its SQL. + +If the reference has an alias (`from {{#5996-listing-interactions}} tb`), the **outer** SQL probably uses that alias (`select tb.* ...`, `tb.column_name`, etc.). When you inline, you must EITHER: + +1. **Pick a single base table inside the inlined SQL and rename its alias to the outer alias.** Useful when the inlined card is `SELECT * FROM listings JOIN ...` — set the LISTINGS alias to `tb` and `tb.*` keeps working in the outer query. +2. **Replace the outer alias references with explicit columns from the inlined SQL.** Useful when the inlined card has multiple JOINs and `tb.*` is ambiguous. + +Never leave the outer alias dangling: after inlining, **grep your SQL for the outer alias name and rewrite or remove every reference**. A leftover `tb.*` with no `tb` table is the most common failure mode here. + +### Step C — Inlining cleanup checklist + +After Steps A and B, your SQL must: + +- Contain no placeholder-warning comment, no `{{`, `}}`, `[[`, or `]]` characters anywhere. +- Reference no aliases that aren't defined inside the SQL itself. +- Be valid as a standalone subquery (the validator runs `SELECT * FROM (your_sql) LIMIT 1`). + +If `resolutionStatus: "fallback"` and the SQL is still complex enough that you can't confidently translate it, **skip the card** rather than writing broken SQL. Call `emit_unmapped_fallback` with the staged card path as `rawPath`, `reason: "metabase_sql_untranslated"`, and `fallback: "flagged"`. + +## Join-graph connectivity + +For `source_type: table`: +- Use FK columns (`semantic_type: type/FK`) to declare `many_to_one` joins to dimension sources. +- Match column names ending in `_id` against existing sources' grain columns. + +For `source_type: sql`: +- The validator parses your SQL and **rejects the write** if any FROM/JOIN table has a manifest entry that you did not declare in `joins:`. The error names every missing join target — declare a `many_to_one` join for each and reissue. +- Tables outside the manifest (schemas not covered by this connection — e.g. `staging.*` referenced from a MARTS source) are not flagged. For those, write a single-line `wiki_write` with key `unmapped-table-` so the gap is documented, then call `emit_unmapped_fallback` with the staged card path as `rawPath`, `reason: "table_outside_manifest"`, and `fallback: "wiki_only"`. + +Joins on manifest-backed names compose: the manifest's joins are inherited automatically, and any overlay `joins:` are merged on top (deduped by `to` + `on`). Use `disable_joins: [""]` in the overlay to suppress a specific manifest join. If `sl_discover` shows a manifest-backed source with `Joins: 0` and the warehouse FK metadata is genuinely absent, declaring application-level joins via the overlay is fair game — bootstrap with `sl_write_source` (overlay shape above), then refine via `sl_edit_source`. + +## Cross-card references (`{{#N}}`) + +Resolved cards (`resolutionStatus: "resolved"`) have these inlined for you. Unresolved cards (`resolutionStatus: "fallback"`) need manual handling — see "SQL translation from raw native to KSL" above. + +## Provenance markers + +Every SL source and wiki page you write carries HTML-comment provenance tags pointing to the `cards/.json` files they derive from: + +```yaml +# +name: orders +... +``` + +If a source is derived from multiple cards (e.g. a generalized source for a near-duplicate group), emit one tag per contributing card. + +## Quality standards + +Source definitions must follow klo-sl YAML conventions: +- `source_type`: `"table"` (physical table/view) or `"sql"` (arbitrary SQL / derived view). +- `table`: required when `source_type: "table"` (e.g. `"public.orders"`). +- `sql`: required when `source_type: "sql"`. +- `grain`: what one row represents (e.g. `[id]`, `[customer_id, product_id]`). +- `columns`: all columns with correct types (`string`, `number`, `time`, `boolean`). +- Time columns: mark with `role: time`. +- `joins`: use correct `relationship` types (`many_to_one` for FK→PK, `one_to_many` for reverse). +- `joins.on`: `local_column = TARGET_SOURCE.target_column` — the right side MUST include the target source name. +- `measures.expr`: aggregation expression (e.g. `"sum(amount)"`); optional `filter` for business rules; required `description`. + +Measure naming: descriptive `snake_case` (e.g. `total_revenue`, `avg_order_value`). + +## Rules + +- Prefer adding measures to existing sources over creating new ones. +- Before editing, always `sl_read_source` the source to check for existing measures. +- Don't duplicate measures (same aggregation on the same column). +- If two measures differ only by a filter (e.g. `revenue` vs `paid_revenue`), they are distinct. +- Use the card's `name` + `description` to write meaningful measure descriptions. +- When multiple cards in a WU are near-duplicates, create ONE generalized source; the runner will skip the rest automatically. +- Process every card in the WU — don't stop early. diff --git a/packages/context/skills/metricflow_ingest/SKILL.md b/packages/context/skills/metricflow_ingest/SKILL.md new file mode 100644 index 00000000..5c56161b --- /dev/null +++ b/packages/context/skills/metricflow_ingest/SKILL.md @@ -0,0 +1,274 @@ +--- +name: metricflow_ingest +description: Map a MetricFlow semantic_model or metric into KLO semantic layer sources. Covers the MetricFlow to KLO primitive table, `extends:` inheritance flattening, metric-type handling (simple / derived / ratio / cumulative / conversion), `model: ref('x')` resolution, and four worked examples. Load when the turn contains `.yml`/`.yaml` files with top-level `semantic_models:` or `metrics:`. +callers: [memory_agent] +--- + +# MetricFlow to KLO Semantic Layer + +A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to KLO measures; MetricFlow `entities` map to KLO `joins`; MetricFlow `metrics` (top-level) map to KLO measures OR to cross-model derived measures. Files in one WorkUnit are ALWAYS part of the same logical entity (a connected component, possibly spanning `extends:` + cross-model metric refs). Flatten inheritance and cross-file references at write time. + +## Mapping table + +| MetricFlow | KLO form | Notes | +|---|---|---| +| `semantic_model: X { model: ref('t') }` with measures + dimensions | **Overlay** at `/X.yaml` with `measures`, `columns` (computed), `joins` | The `model:` ref resolves to a manifest table. | +| `semantic_model: X { model: source('s','t') }` | **Overlay** at `/X.yaml` over table `t`. | Same shape; `source()` still resolves to a physical table. | +| `semantic_model: X { model: }` with no manifest entry | **Standalone** with explicit `sql:`, `grain:`, `columns:` | Happens when the dbt manifest isn't available. | +| `semantic_model: Y { extends: X }` | **Merge** Y's measures/dimensions/entities into X's overlay, or write a single overlay named for the most-derived child (Y) containing both X's and Y's primitives | Do not emit a second overlay for X — flatten. | +| `measures: [{ name, agg, expr }]` | `measures: [{ name, expr: "()" }]` | Aggregation inlined. `agg: count_distinct` → `count(distinct ...)`. | +| `entities: [{ name, type: primary }]` | `grain: []` on the overlay/standalone | Primary/unique entities drive grain. | +| `entities: [{ name, type: foreign }]` | `joins:` entry joining to the primary-entity's semantic_model | Only when a matching primary is discoverable. | +| `metrics: [{ type: simple, type_params: { measure: X } }]` | If the base measure is labeled/described by the metric: in-place edit to the existing measure. Otherwise leave as-is. | Same-name metrics can absorb metadata. | +| `metrics: [{ type: simple, filter: }]` | **New measure** on the same source, with the filter translated to SQL and attached via `filter:` | Translate Jinja `{{ Dimension('x__y') }}` to the column name `y`. | +| `metrics: [{ type: derived, type_params: { expr, metrics } }]` | **Derived measure** on whichever source owns the referenced measures, with `expr:` referencing measure names | If the metric spans models, still write it once on the source owning the "primary" measure (the one the agent judges most central). Mention the cross-model chain in the description. | +| `metrics: [{ type: ratio, type_params: { numerator, denominator } }]` | Same as derived; `expr: "numerator / NULLIF(denominator, 0)"` if no explicit expr | Safe-division by default. | +| `metrics: [{ type: cumulative, type_params: { window, grain_to_date } }]` | **Standalone** source with a window-function SQL; reference the resulting column as a normal measure | KLO SL has no first-class cumulative primitive (spec Non-goals). | +| `metrics: [{ type: conversion }]` | **Flag for human** — do NOT write. Emit a wiki note describing the intended semantics. | No KLO equivalent in v1. | +| Metric not mappable | Wiki page `-definition.md` with the full YAML body quoted | Capture the intent even if we can't emit SL. | + +Type map: MetricFlow `time` to KLO `time`; `categorical` to `string`; `number` to `number`; `boolean` to `boolean`. Follow `expr` over `name` when both differ — `expr` is the physical column. + +## Flattening `extends:` + +Within one WorkUnit, multiple semantic_models linked by `extends:` are guaranteed to be present (the chunker groups them). Resolve inheritance **before** writing: + +1. Start with the most-derived child (the one that no other semantic_model extends). +2. Walk the `extends:` chain upward, accumulating measures, dimensions, entities. +3. Write ONE overlay/standalone, named for the most-derived child's SL-appropriate name (not the base). +4. Parents that lack their own distinctive content should NOT get a separate overlay. If a parent has unique measures a child doesn't inherit, consider whether the base is used elsewhere — if yes, write both; if no, still one overlay. +5. Measure/dimension name collisions: child wins, but note the overridden parent in the overlay's description or in a sibling wiki page. + +The spec's worked example has `orders`, `orders_ext` (extends orders), and `metrics/orders_final.yml` (defines `revenue` referencing both). The right output is ONE overlay named `orders_ext` (or `orders` if the team's naming favors the base) containing `order_count`, `gross_amount`, `refund_amount`, and a derived `revenue` measure. Provenance tags point to all three source files. + +## `model:` ref resolution + +The `model:` field on a semantic_model is a string like `ref('table_name')`, `source('src','table_name')`, or a literal. Resolve: + +- `ref('x')` → table name `x`. Verify via `sl_discover(x)`. +- `source('s','t')` → table name `t`. Verify via `sl_discover(t)`. +- Literal (no `ref(...)` / `source(...)`) → treat as the table name directly. + +If `sl_discover` errors (no such table), fall back to `sql_execution({ sql: "SELECT column_name FROM .INFORMATION_SCHEMA.COLUMNS WHERE table_name = ''" })` (session shape — a connection is already pinned by the ingest session). **Never invent column names** — every column in `columns:`, `grain:`, and `sql:` must be sourced from a real probe. + +After every `sl_write_source`, call `sl_validate`. The warehouse will reject invented columns with `Unrecognized name: ` — treat as a hard failure and re-read the schema. + +## Cumulative metrics — sql-standalone fallback + +KLO SL has no first-class `window:` or `grain_to_date:` primitive in v1 (spec Non-goals). Translate a MetricFlow cumulative metric to a standalone SL source with a window-function SQL: + +```yaml +# MetricFlow input: +metrics: + - name: cum_revenue_7d + type: cumulative + type_params: + measure: gross_amount + window: 7 days +``` + +```yaml +# KLO standalone output: +name: cum_revenue_7d +source_type: sql +sql: | + SELECT + ordered_at, + SUM(amount) OVER (ORDER BY ordered_at RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW) AS cum_revenue_7d, + order_id + FROM analytics.orders +grain: [order_id] +columns: + - {name: ordered_at, type: time, role: time} + - {name: cum_revenue_7d, type: number} + - {name: order_id, type: string} +measures: + - {name: cum_revenue_7d, expr: "max(cum_revenue_7d)"} +``` + +Pick the time column based on the semantic_model's `defaults.agg_time_dimension` (e.g. `ordered_at`). If the MetricFlow config omits it, probe the base table for time-typed columns and choose the most obvious. After writing the standalone SQL source, call `emit_unmapped_fallback` with `rawPath` set to the MetricFlow file path, `reason: "cumulative_metric_unsupported"`, and `fallback: "sql_standalone"`. + +## Conversion metrics — flag for human + +```yaml +metrics: + - name: signup_to_first_order + type: conversion + type_params: + conversion_type_params: + entity: customer + base_measure: signup_count + conversion_measure: first_order_count + window: 30 days +``` + +Do NOT emit SL for this. Instead: +- Write a wiki page at `knowledge/global/-intent.md` quoting the full YAML body and a one-line explanation of the intended semantics (base event → conversion event within window). +- Call `emit_unmapped_fallback` with `rawPath` set to the MetricFlow file path, `reason: "conversion_metric_unsupported"`, and `fallback: "flagged"`. + +When KLO SL gains conversion primitives, re-ingesting will find the prior wiki note (via `priorProvenance`) and replace it with an SL source. + +## Provenance markers + +Every overlay/standalone/wiki page emitted from a MetricFlow source carries HTML-comment provenance tags. When one overlay derives from multiple files (e.g. an extends chain), emit one tag per contributing file: + +```yaml +# +# +# +name: orders_ext +... +``` + +Line ranges (`#L-`) point to the exact YAML span within the file (the `semantic_models:` entry for its own `name`). Use `read_raw_span` to identify those ranges before writing. + +## Example 1 — single semantic_model to overlay + +```yaml +# MetricFlow: +semantic_models: + - name: orders + model: ref('orders') + entities: + - {name: order_id, type: primary} + measures: + - {name: order_count, agg: count, expr: order_id} + - {name: gross_amount, agg: sum, expr: amount} +``` + +```yaml +# KLO overlay at /orders.yaml: +# +name: orders +description: Order fact table. +measures: + - {name: order_count, expr: "count(order_id)"} + - {name: gross_amount, expr: "sum(amount)"} +grain: [order_id] +``` + +## Example 2 — extends chain → one flattened overlay + +```yaml +# MetricFlow: +# models/orders.yml +semantic_models: + - name: orders + model: ref('orders') + measures: + - {name: order_count, agg: count, expr: order_id} + - {name: gross_amount, agg: sum, expr: amount} + +# models/orders_ext.yml +semantic_models: + - name: orders_ext + model: ref('orders_ext') + extends: orders + measures: + - {name: refund_amount, agg: sum, expr: refund_amt} + +# metrics/orders_final.yml +metrics: + - name: revenue + type: derived + type_params: + expr: gross_amount - refund_amount + metrics: + - {name: gross_amount} + - {name: refund_amount} +``` + +```yaml +# KLO overlay at /orders_ext.yaml (one file; inheritance flattened): +# +# +# +name: orders_ext +description: Extended order fact including refund handling; `revenue` = gross - refund. +measures: + - {name: order_count, expr: "count(order_id)"} + - {name: gross_amount, expr: "sum(amount)"} + - {name: refund_amount, expr: "sum(refund_amt)"} + - {name: revenue, expr: "gross_amount - refund_amount"} +grain: [order_id] +``` + +## Example 3 — derived metric spanning two semantic_models + +```yaml +# models/sales.yml +semantic_models: + - name: sales + model: ref('sales') + measures: + - {name: revenue, agg: sum, expr: revenue_cents} +# models/costs.yml +semantic_models: + - name: costs + model: ref('costs') + measures: + - {name: cost, agg: sum, expr: cost_cents} +# metrics/margin.yml +metrics: + - name: margin + type: derived + type_params: + expr: revenue - cost + metrics: [{name: revenue}, {name: cost}] +``` + +Because the WorkUnit bundles all three files (cross-component union via the metric), write the derived measure on ONE of the two sources — pick the source whose domain "owns" the metric (here, `sales` — margin is inherently a sales metric). Cross-source references aren't native in KLO SL; treat the metric's operands as already-resolvable in the target source's query context OR emit a standalone SQL that joins the two tables: + +```yaml +# /sales.yaml +# +# +# +name: sales +measures: + - {name: revenue, expr: "sum(revenue_cents)"} +``` + +```yaml +# /margin.yaml — standalone because it spans two tables +# +# +# +name: margin +source_type: sql +sql: | + SELECT s.period_id, s.revenue_cents, COALESCE(c.cost_cents, 0) AS cost_cents + FROM analytics.sales s + LEFT JOIN analytics.costs c ON c.period_id = s.period_id +grain: [period_id] +columns: + - {name: period_id, type: string} + - {name: revenue_cents, type: number} + - {name: cost_cents, type: number} +measures: + - {name: revenue, expr: "sum(revenue_cents)"} + - {name: cost, expr: "sum(cost_cents)"} + - {name: margin, expr: "sum(revenue_cents) - sum(cost_cents)"} +``` + +Also write a wiki page at `knowledge/global/margin-metric.md` explaining the cross-source origin. + +## Example 4 — filtered metric creates a new measure + +```yaml +metrics: + - name: paid_order_count + type: simple + type_params: + measure: order_count + filter: "{{ Dimension('orders__status') }} = 'paid'" +``` + +```yaml +# /orders.yaml +measures: + - {name: order_count, expr: "count(order_id)"} + - {name: paid_order_count, expr: "count(order_id)", filter: "status = 'paid'"} +``` + +Translate `{{ Dimension('orders__status') }}` to the bare column name `status` (the table alias prefix is implicit within the SL source's scope). diff --git a/packages/context/skills/notion_synthesize/SKILL.md b/packages/context/skills/notion_synthesize/SKILL.md new file mode 100644 index 00000000..3de8ee0d --- /dev/null +++ b/packages/context/skills/notion_synthesize/SKILL.md @@ -0,0 +1,69 @@ +--- +name: notion_synthesize +description: Synthesize durable KLO wiki pages and semantic-layer sources from staged Notion pages, databases, data-source rows, and clustered Notion evidence. Load when a WorkUnit contains Notion raw files or Notion evidence chunks. +callers: [memory_agent] +--- + +# Notion Cluster Synthesis + +Use this skill when a WorkUnit contains staged Notion content from `pages/**`, `databases/**`, `data-sources/**`, or clustered Notion evidence. + +## Role + +Each WorkUnit is either a single Notion page/span or a topical cluster of related Notion pages, pre-grouped by embedding similarity. Read the assigned raw files, then write a small set of durable wiki entries and, when applicable, semantic-layer sources that synthesize the WorkUnit's knowledge. Write final memory directly; do not write candidates. + +## Required Workflow + +1. Read the WorkUnit notes and rawFiles list. Page content lives in `page.md`; `metadata.json` holds title, path, object type, data-source ids, last edited metadata, and properties. +2. For each assigned page, call `read_raw_file`, or `read_raw_span` for oversized pages when the notes specify a span. +3. Search `wiki_search` for existing pages that overlap the WorkUnit topics. Prefer updating an existing page over creating a duplicate. +4. Use `context_evidence_search`, `context_evidence_read`, and `context_evidence_neighbors` to pull supporting chunks when indexed evidence is relevant. Pass `chunkId` and `documentId` values verbatim as returned by the evidence tools. +5. Write durable business knowledge with `wiki_write`. Aim for a small number of high-quality pages per WorkUnit or cluster. +6. When the Notion content defines a reusable dataset, metric, segment, join rule, source-of-truth mapping, or table with explicit columns, load `sl_capture`, discover existing sources first with `sl_discover` or `sl_read_source`, then use `sl_write_source` or `sl_edit_source`. +7. For every deleted raw path in the Eviction Set, call `eviction_list`, decide retention, then `context_eviction_decision_write`. Do this even when no wiki write is needed. + +## What To Capture + +Capture durable, reusable company knowledge: + +- metric definitions, KPI formulas, named business concepts, and reusable filters +- workflows, policies, ownership rules, approval conventions, and source-of-truth mappings +- data-source row pages that describe tables, columns, semantic models, dashboards, or business entities +- cross-system aliases connecting Notion terms to warehouse, dbt, Looker, Metabase, or MetricFlow names +- caveats, conflicts, supersession notes, and customer/product assumptions affecting future analysis + +Skip noisy or transient content: + +- meeting notes with no reusable rule +- task lists, project status updates, and time-bounded snapshots +- duplicate docs with no new fact +- database metadata pages when row pages contain the actual business content +- transient announcements and long page summaries + +## Quality + +Prefer fewer, stronger entries. Every wiki entry must cite at least one Notion page or row using its path and last edited date when available. When evidence conflicts, write a conflict note inside the wiki page rather than choosing silently. + +If a clustered WorkUnit includes several related pages, synthesize the shared rule or concept instead of writing one thin page per source. For oversized page spans, read only the assigned span unless the WorkUnit explicitly asks for neighboring context. + +## Citation Style + +```md +## Revenue Recognition +- Booked revenue excludes refunds and test accounts. +- Source: Notion - Company Handbook / Finance / Revenue Recognition, last edited 2026-04-12. +- Conflict note: An older Sales Ops page uses gross revenue before refunds; treat the Finance Handbook as current unless Finance says otherwise. +``` + +## Semantic-Layer Rules + +- Load `sl_capture` before writing or editing SL sources. +- Discover existing sources first with `sl_discover`; read existing source YAML before editing. +- Prefer overlays on manifest-backed sources over standalone SQL. +- If Notion describes a dashboard or metric but does not define executable logic, write a wiki page and attach `sl_refs` only after confirming the referenced source exists. + +## Tools + +Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `eviction_list`, `context_eviction_decision_write`. + +Not allowed: `context_candidate_write`, `context_candidate_mark`. diff --git a/packages/context/skills/sl/SKILL.md b/packages/context/skills/sl/SKILL.md new file mode 100644 index 00000000..128c579c --- /dev/null +++ b/packages/context/skills/sl/SKILL.md @@ -0,0 +1,240 @@ +--- +name: sl +description: KLO's semantic layer — a structured catalog of sources (tables/views), measures, joins, and segments expressed as YAML. Covers the schema and how to query it via `semantic_query`. Use when the task involves querying pre-defined metrics (ARR, churn, retention, LTV, MAU) or reading SL source YAML to understand the catalog. Capture is handled by the `sl_capture` skill (memory-agent only). +--- + +# Semantic Layer + +KLO's semantic layer (SL) is a structured catalog. Each **source** represents a table, a SQL view, or an overlay that enriches a manifest-backed table with measures, computed columns, joins, and named segments. The catalog is the single source of truth for reusable business metrics. + +This skill covers two parts: +- **Part 1** — Schema reference (what an SL source looks like). +- **Part 2** — Querying via `semantic_query`. + +Capture (when and how to add new patterns to the SL) is a separate concern handled by the memory-agent — see the `sl_capture` skill if you are running in capture mode. The research agent **reads** and **queries** the SL via the tools described here; it does not write to it. + +--- + +## Part 1 — Schema reference + +An SL source is a YAML file at `semantic-layer//.yaml`. There are three flavors: + +### Overlay sources + +Enrich a manifest-backed table with measures, computed columns, joins, and segments. No `table` or `sql` field. The base table's columns and grain are inherited from the manifest. + +```yaml +name: fct_orders # must match an existing manifest table +description: "Overlay adding business measures to the orders fact table." +measures: + - name: total_revenue + expr: sum(amount) + description: Total order revenue — filter by status or region at query time +columns: # computed dimensions only + - name: is_large_order + type: boolean + expr: "amount > 1000" +segments: + - name: paid_non_refunded + expr: "is_paid = true AND is_refunded = false" +joins: + - to: customers + on: "customer_id = customers.id" + relationship: many_to_one +``` + +Rules: +- Do **not** repeat base-table columns, grain, `table`, or `source_type` in an overlay — those are inherited. +- Overlay columns MUST be computed (`expr` + `type`). +- `exclude_columns` hides specific manifest columns; `disable_joins` suppresses specific auto-detected joins. + +### Standalone table sources + +Self-contained; own their schema. Has `source_type: table` and `table:`. + +```yaml +name: account_health_scores +source_type: table +table: "analytics.account_health_scores" +grain: [account_id, snapshot_date] +columns: + - name: account_id + type: string + - name: snapshot_date + type: time + role: time + - name: health_score + type: number +measures: + - name: avg_health_score + expr: avg(health_score) +``` + +### Standalone SQL sources + +Self-contained; schema derived from a SQL query. Has `source_type: sql` and `sql:`. + +```yaml +name: monthly_cancellations +source_type: sql +sql: | + SELECT + date_trunc('month', cancelled_at) AS month, + customer_id, + plan_name, + mrr_amount + FROM subscriptions + WHERE status = 'cancelled' +grain: [customer_id, month] +columns: + - name: month + type: time + role: time + - name: customer_id + type: string + - name: plan_name + type: string + - name: mrr_amount + type: number +measures: + - name: cancellation_count + expr: count(*) +``` + +An SQL source is a one-shot answer: the aggregation is frozen, callers cannot re-group or re-filter by columns the SQL has collapsed, and the source is disconnected from the join graph. Prefer overlays + measures over SQL sources when possible — the `sl_capture` skill covers when SQL is justified. + +### Columns + +Every standalone column requires `name` and `type`. Overlays have computed columns only. + +- `type`: one of `string`, `number`, `boolean`, `time`. Map LookML `date`/`datetime`/`timestamp` → `time`. Map LookML `yesno` → `boolean`. +- `role` (optional): `time` enables time-granularity queries (month, week, day). `default` is the implicit fallback. +- `visibility` (optional): `public`, `internal`, or `hidden`. +- `expr` (optional for standalone, required for overlay columns): SQL expression that computes the value. Expanded by sqlglot before generating SQL, so you can reference other columns on the same source. + +### Grain + +`grain: [col_a, col_b]` — the set of columns that uniquely identify one row. The query engine uses grain to prevent fan-out in joins. Overlays inherit grain from the manifest unless they override. + +### Joins + +```yaml +joins: + - to: customers # target source name + on: "customer_id = customers.id" # local_col = TARGET.target_col + relationship: many_to_one # or one_to_many, one_to_one + alias: primary_customer # optional — lets you join the same target twice +``` + +- `on` format: `local_col = TARGET.target_col`. Always qualify the right side with the target source name. +- `relationship` is the cardinality **from this source to the target**. Most joins are `many_to_one` (FK → PK on the parent). + +### Measures + +```yaml +measures: + - name: total_arr + expr: sum(arr_amount) + description: Sum of ARR — filter by plan_name at query time + filter: "is_active = true" + segments: [paid_non_refunded] +``` + +- `name` (required, snake_case). +- `expr` (required): any valid SQL aggregate — `sum(x)`, `count(*)`, `count(distinct user_id)`, `avg(score)`. +- `description` (required on capture): what the measure computes and how to use it. +- `filter` (optional): SQL predicate applied as a WHERE clause specific to this measure. +- `segments` (optional): names of segments defined on the same source. The engine AND-composes each segment's `expr` into this measure's effective filter. + +Use `safe_divide(num, den)` for ratio measures to avoid division by zero. + +### Segments + +```yaml +segments: + - name: paid_non_refunded + expr: "is_paid = true AND is_refunded = false" + description: Orders that were paid and not refunded +``` + +Named, reusable boolean predicates scoped to one source. Reference by bare name in a measure's `segments: []`, or by dotted form `source.segment_name` in a `semantic_query`. Segments are predicates only — they are NOT selectable as dimensions. If you need to group by the predicate, add a `columns[]` entry instead. + +### Cross-references with the wiki + +The reverse edge (wiki pages that cite this source) is derived automatically from each wiki's `sl_refs:` — you don't emit anything on the SL side. Author the edge once on the wiki via `sl_refs:`; the post-write reconciler populates the knowledge↔SL index. + +--- + +## Part 2 — Querying via `semantic_query` + +The `semantic_query` tool generates correct SQL from a structured query. It handles joins, fan-out prevention, aggregation correctness, and filter classification automatically. Prefer it over writing raw SQL whenever the SL has the relevant sources. + +### When to prefer semantic_query over raw SQL + +- A pre-defined measure already exists (`source.measure_name` appears in the catalog). +- The question combines fields from multiple sources — the engine resolves the join path automatically. +- The question asks for a standard metric (revenue, ARR, churn, retention, LTV, conversion, MAU, etc.) — even if no pre-defined measure exists, a runtime aggregation over a catalog column is usually correct. + +Use raw SQL (`sql_execution`) only when: +- The computation requires multi-step CTEs whose intermediate grain is not a column in any source. +- The question explicitly asks for a one-off exploration that will never be asked again. + +### Input shape + +```json +{ + "connectionId": "uuid-of-the-connection", + "reasoning": "Brief note on what this query analyzes", + "query": { + "measures": ["orders.total_revenue", "sum(orders.amount)"], + "dimensions": ["customers.segment", { "field": "orders.created_at", "granularity": "month" }], + "filters": ["orders.status != 'cancelled'", "orders.total_revenue > 10000"], + "segments": ["orders.paid_non_refunded"], + "order_by": [{ "field": "orders.created_at", "direction": "desc" }], + "limit": 1000 + } +} +``` + +- **`measures`**: mix pre-defined refs (`source.measure`) and runtime aggregations (`sum(source.column)`). +- **`dimensions`**: column refs or `{ field, granularity }` objects for time grains (`day`, `week`, `month`, `quarter`, `year`). +- **`filters`**: free-form SQL predicates. The engine auto-classifies each as WHERE or HAVING based on whether it references an aggregated measure. +- **`segments`**: dotted `source.segment_name`. Each segment is AND-ed into the effective filter of every measure whose base source matches. Segments never become a global WHERE — use `filters` for cross-source predicates. +- **`order_by`**: string or `{ field, direction }`. Direction defaults to `asc`. +- **`limit`**: integer row cap. + +### Join resolution + +You don't specify a base table. The engine infers the set of sources needed from the fields you reference and resolves the shortest join path through the catalog's declared joins. If no path exists between two sources, the query fails with a path-not-found error — check `discover_data` or `sl_discover` to see which sources are connected. + +### Worked examples + +Cross-source query — engine resolves `account_health_scores → accounts ← opportunities` automatically: + +```json +{ + "measures": ["account_health_scores.avg_health_score"], + "dimensions": ["opportunities.stage"], + "filters": ["opportunities.stage != 'Closed Won'"] +} +``` + +Monthly ARR trend with a segment: + +```json +{ + "measures": ["subscriptions.arr"], + "dimensions": [{ "field": "subscriptions.month", "granularity": "month" }], + "segments": ["subscriptions.paid_non_refunded"], + "order_by": [{ "field": "subscriptions.month", "direction": "asc" }] +} +``` + +Multi-source with runtime aggregation: + +```json +{ + "measures": ["sum(orders.amount)", "count(support_tickets.ticket_id)"], + "dimensions": ["customers.segment"] +} +``` diff --git a/packages/context/skills/sl_capture/SKILL.md b/packages/context/skills/sl_capture/SKILL.md new file mode 100644 index 00000000..af8cee15 --- /dev/null +++ b/packages/context/skills/sl_capture/SKILL.md @@ -0,0 +1,276 @@ +--- +name: sl_capture +description: How to capture new reusable patterns into KLO's semantic layer — when a measure, segment, or join belongs in the catalog and how to write it generically so it stays small and useful over time. Loaded by the post-turn memory-agent only. The research agent does not write to the SL. +callers: [memory_agent] +--- + +# Semantic Layer — Capture + +This skill covers **when** and **how** to capture new patterns into the semantic layer. For schema reference and query grammar, load the `sl` skill first. + +When the current turn produces a reusable pattern (business metric, derived view, join pattern, computed dimension), capture it so future queries can reach for it instead of rediscovering it. + +## SQL dialect + +The user-facing prompt includes a `Warehouse:` line under the SL Sources index +(e.g. `Warehouse: BIGQUERY`). All `expr` strings — measure expressions, segment +predicates, computed-column SQL — execute on that warehouse and must use its +syntax. Date arithmetic in particular varies by dialect: + +- **BigQuery**: `transaction_date >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)` (when the column is `TIMESTAMP`); `event_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)` (when `DATE`). +- **Postgres / Redshift**: `transaction_date >= current_date - interval '90 days'`. +- **Snowflake**: `transaction_date >= dateadd(day, -90, current_timestamp())`. + +Match the column's manifest type (`type: time` → TIMESTAMP/DATETIME on the +warehouse) — comparing TIMESTAMP to a DATE-arithmetic result fails on +BigQuery. After every `sl_edit_source`/`sl_write_source`, the inline validator runs a +`LIMIT 1` warehouse probe per measure and surfaces dialect mismatches; if +you see an error trailer, fix the expression and retry rather than leaving +the source for the post-squash gate to revert. + +## What's worth capturing + +- Business metric aggregations (ARR, MRR, revenue, churn, retention, conversion, LTV, CAC). +- Derived calculations combining multiple signals (risk scores, health scores, composite KPIs). +- Multi-table join patterns producing a reusable analytical view. +- Computed categories or flags useful as reusable dimensions (`case when num_protocols >= 3 then 'power' else 'regular' end`). +- Missing joins between two sources that both exist but aren't connected in the join graph. + +Skip: +- Simple `SELECT * LIMIT 10` previews. +- Trivial `COUNT(*)` on one table with no business filtering. +- One-off ad-hoc explorations unlikely to repeat. +- Equivalent measures that already exist (cite the existing one as `source.measure_name`). + +When in doubt, capture. Measures are easy to remove but impossible to recover from a lost conversation. + +## Generalization rules + +The SL must stay small and general over time. Before adding a measure, decide whether it belongs as a generic pattern or a specific constant. + +**Prefer one generic measure with query-time filters over N hardcoded variants.** + +Anti-pattern: +```yaml +- name: revenue_us_region + expr: sum(case when region = 'US' then amount end) +- name: revenue_eu_region + expr: sum(case when region = 'EU' then amount end) +``` + +Preferred: +```yaml +- name: total_revenue + expr: sum(amount) +``` +Callers filter `region = 'US'` at `semantic_query` time. + +**Bake constants in only when the filter has named business meaning that won't change** (`enterprise_arr` for a contractually defined tier), cannot be expressed via the source's dimensions, or comes from a regulated/fixed list. + +**Time anchors and value lists belong in callers' filters, not in measure expressions or source SQL.** +- Anti-pattern (date anchor inlined): `expr: count(distinct case when transaction_date >= '2026-04-12' then customer_id end)` — the date will need editing every time the question shifts, and every reader has to discover it. +- Anti-pattern (value list inlined in source SQL): `WHERE product_category_1 IN ('Testosterone', 'Weight Loss', …)` — locks the source to today's catalog and blocks callers from broadening or narrowing. +- Preferred: a generic measure (`count(distinct customer_id)`) plus either a named segment that captures the *meaning* of the anchor (`gh_new_products_since_launch`) or a query-time filter. Callers compose; the source stays small. +- A date is durable to bake in only when it represents a regulatory cutover, a contractually fixed boundary, or a one-time event that reshapes how the source itself is read. + +**If you create a segment whose expr matches a measure's filter, the measure MUST reference the segment via `segments: [segment_name]` rather than re-inlining the predicate.** This is the canonical pattern even with a single measure — duplicating the predicate inline defeats the purpose of naming it. + +Anti-pattern: +```yaml +segments: + - name: engaged_subscriber + expr: "is_paid = true AND " +measures: + - name: engaged_subscriber_count + expr: "count(distinct case when is_paid = true and transaction_date >= current_date - interval '90 day' then admin_user_id end)" +``` + +Preferred: +```yaml +segments: + - name: engaged_subscriber + expr: "is_paid = true AND " +measures: + - name: engaged_subscriber_count + expr: "count(distinct admin_user_id)" + segments: [engaged_subscriber] +``` + +**Use computed dimensions for derived categories.** A flag like `is_power_user` belongs on `columns[]` with `expr`, not inlined into every measure. + +**Extract repeated filter bundles into named segments.** If the same predicate appears on multiple measures of the same source, lift it to a `segments[]` entry and have each measure reference it. One edit updates every measure that depends on it. + +**Never write a standalone file on a manifest-backed name.** If `sl_discover({ tableName })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`description:` only — no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name. + +**Prefer overlay decomposition over standalone SQL sources.** Before reaching for `source_type: sql`, check whether the metric decomposes into measures on existing overlays (including cross-source derived measures). Use `source_type: sql` only when: +- The metric requires per-user/per-entity derivation that cannot be expressed as a single `expr` (e.g., `EXISTS` over a time-windowed subset), OR +- The metric requires multi-step CTEs whose intermediate grain is not a column in any existing source. + +When an `sql` source is unavoidable, note in its `description` which SL gap forced the choice so it can be retired once the primitive ships. It must target a name NOT in the manifest — pick a distinct one (e.g. `mrr_waterfall_rollup`, not `fct_orders`). + +## Slim standalone sources via `inherits_columns_from` + +When a standalone SQL source filters or projects from a single manifest-backed base table (the common pattern for derived views like `aav_consignments` over `MARTS.CONSIGNMENTS`), set `inherits_columns_from:` to the base table's manifest key and list only column **names** in `columns:`. Compose-time enrichment fills `type`, `descriptions`, and `role` from the matching manifest column. + +Discover the manifest key with `sl_discover` — pass the bare name (`CONSIGNMENTS`), the fully-qualified path (`ANALYTICS.MARTS.CONSIGNMENTS`), or any suffix; the tool resolves all forms and prints the canonical key in its output. + +```yaml +name: aav_consignments +description: AAV consignments — filtered view of MARTS.CONSIGNMENTS for the auto-auction-vaulting channel. +source_type: sql +sql: | + SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT, ALT_VALUE_COMBINED, my_derived_flag + FROM MARTS.CONSIGNMENTS + WHERE IS_AUTO_AUCTION_VAULTING_SUBMISSION = TRUE + AND IS_CARD_SHOW_SUBMISSION = FALSE + AND CONSIGNMENT_CANCELED_FLAG = FALSE +inherits_columns_from: CONSIGNMENTS +grain: [CONSIGNED_ITEM_ID] +columns: + - { name: CONSIGNED_ITEM_ID } # type/description inherited from manifest + - { name: CASH_ADV_AMOUNT } + - { name: ALT_VALUE_COMBINED } + - { name: my_derived_flag, type: boolean, expr: "CASH_ADV_AMOUNT > 0", description: "Computed locally — has any cash advance." } +measures: + - name: total_cash_advance + expr: sum(CASH_ADV_AMOUNT) +``` + +Rules: + +- Inheritance fills only **blank** fields. If you set a `description` locally, it wins — useful when the base description is misleading in the filtered view. +- A column not in the manifest (a derived/aliased column, or one from a different table in a `JOIN`) needs its own `type` and `description` declared. +- If `inherits_columns_from` doesn't resolve, the source still loads, but every column without a type triggers a validator error on the warehouse probe — `sl_discover` first to confirm the key. +- Don't use `inherits_columns_from` for sources backed by `table:` (those should be overlays — see the rule against shadowing the manifest above). + +## Refinement — replace, don't append + +When the user corrects a prior answer, the existing measure is wrong by the user's own standard. Replace it, don't add a parallel measure. + +Signals that the current turn is a refinement: +- "no, I meant...", "actually use X", "exclude Y", "wait, by X I mean Z". +- Pushback on a prior result ("that's wrong because...", "this should be higher"). +- Redefinition of a term used in an existing measure. + +Distinguishing question: *would the prior measure still be correct for someone else asking the prior question?* If no → replace. If yes → add. + +## Edit SL vs document in wiki + +If the user explicitly names an SL artifact and asks to change it, the primary +action is always an SL tool call. Examples: + +- "edit the source", "edit the YAML", "edit `fct_intakes.yaml`" → `sl_edit_source` or + `sl_write_source`. +- "refine the measure", "change the filter on `active_users`", "fix the expr", + "add `is_test = false`" → `sl_edit_source` on the source that owns the measure. +- "don't create a new one, update the existing" → `sl_edit_source` (never `sl_write_source` + with a new source name; never `wiki_write` as the only action). + +A wiki update may ALSO make sense in the same turn (owner note, lineage, +caveat), but it is never a substitute for editing the YAML when the user's +request is about changing the measure/source definition itself. + +Wiki-only is correct when the user is documenting *about* the measure +(definition in business terms, owner, policy, glossary, examples of when to +use it) without changing its SQL expression or filters. + +## Tool sequence + +1. `sl_discover` — see what source files exist. +2. `sl_discover({ tableName })` — **REQUIRED before the first write on any name**. Shows columns/joins/grain from the manifest. If the call returns a schema, you MUST write an overlay, not a standalone. Skipping this is the #1 cause of accidentally shadowing the manifest. +3. `sl_read_source({ sourceName })` — read the raw YAML before editing. +4. For modifications: `sl_edit_source({ sourceName, old_string, new_string })` with exact-string replacements. `old_string` must match exactly and be unique in the file. +5. For new sources or full rewrites: `sl_write_source({ sourceName, content })` with the full YAML content. +6. For join discovery: `sql_execution({ sql })` to verify the join key exists in both tables and assess cardinality before declaring the join. +7. Cross-reference knowledge: author the edge once on the **wiki** side via `sl_refs: [source_name]` in the page's front-matter. The reverse edge (wiki pages that cite an SL source) is derived automatically by the reconciler — do not add a `knowledge_refs:` field to SL YAMLs. +8. `sl_validate` — run after writing or editing to surface schema issues, duplicate measure names, and cross-source validation errors. Read-only; the writes are already committed (the squash-at-end flow will collapse them into one commit). + +## Editing patterns + +- **`sl_edit_source`** is the workhorse for additive changes: add a measure, add a join, tweak a description, replace a filter. Cheap, targeted, preserves the rest of the file. +- **`sl_write_source`** is for brand-new sources or when the entire file needs restructuring. It overwrites the file completely. +- Do NOT modify existing measures or their descriptions unless the current turn explicitly corrects them. + +## Worked example — additive overlay + +Conversation: +- User: "What was the average order value last quarter?" +- Assistant fell back to SQL: `SELECT AVG(amount) FROM orders WHERE order_date >= ...` + +Existing index: `orders [measures=0, joins=0] — candidate for enrichment`. + +``` +sl_discover() + → orders.yaml does not exist yet +sl_discover({ tableName: "orders" }) + → see grain, columns, no current overlay +sl_write_source({ + sourceName: "orders", + content: "name: orders\nmeasures:\n - name: avg_order_value\n expr: avg(amount)\n description: Mean order transaction amount — filter by product_category at query time\n" +}) +sl_validate() + → clean +``` + +The overlay only contains `name` and `measures` — no columns, grain, or table. Those are inherited from the manifest. + +## Worked example — refinement (replace) + +Prior turn: +- [user] "How many active users do we have per region?" +- [assistant] "… used `count(*) filter: last_login_at > now() - interval '30 days'`" + +Current user: "Wait, by 'active' I mean users who have placed an order in the last 30 days, not just logged in." + +The existing `users.active_count` measure is wrong by the new definition. + +``` +sl_read_source({ sourceName: "users" }) + → see the wrong measure +sl_edit_source({ + sourceName: "users", + yaml_edits: [{ + oldText: " - name: active_count\n expr: \"count(*)\"\n filter: \"last_login_at > now() - interval '30 days'\"\n description: Users who logged in within the last 30 days", + newText: " - name: active_count\n expr: \"count(distinct case when last_order_at > now() - interval '30 days' then user_id end)\"\n description: Users with at least one order in the last 30 days" + }] +}) +sl_validate() +``` + +If you only added a new measure, the old incorrect `active_count` would stay and future queries would keep answering the wrong question. + +## Worked example — new join + +Prior turn: user asked to correlate LTV with protocol count; assistant joined `fct_orders` with `fct_mau_multiprotocol` on `admin_user_id` in raw SQL. + +``` +sl_read_source({ sourceName: "fct_orders" }) + → no joins section yet +sql_execution({ + sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM fct_orders a JOIN fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1" +}) + → confirms cardinality (many orders per MAU row = many_to_one) +sl_edit_source({ + sourceName: "fct_orders", + yaml_edits: [{ + oldText: "measures:", + newText: "joins:\n - to: fct_mau_multiprotocol\n on: admin_user_id = fct_mau_multiprotocol.admin_user_id\n relationship: many_to_one\nmeasures:" + }] +}) +sl_validate() +``` + +Always verify joins with `sql_execution` before adding them. + +## Rules recap + +- Read existing sources before editing (`sl_read_source` or `sl_discover`). +- Prefer overlays over standalone sources on manifest-backed tables. +- Prefer generic measures + query-time filters over per-value variants. +- Time anchors and value lists belong in callers' filters, not in measure expressions. +- A measure whose filter matches a segment MUST reference the segment via `segments: [name]`. +- Extract repeated predicates into named segments. +- Use computed dimensions for derived categories. +- When the user corrects a prior answer, replace — don't append. +- Always run `sl_validate` after writing to surface issues. +- If nothing is worth capturing, respond without calling any SL write tool. diff --git a/packages/context/src/agent/agent-runner.service.test.ts b/packages/context/src/agent/agent-runner.service.test.ts new file mode 100644 index 00000000..70b4e0da --- /dev/null +++ b/packages/context/src/agent/agent-runner.service.test.ts @@ -0,0 +1,330 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('ai', () => ({ + generateText: vi.fn(), + stepCountIs: (n: number) => n, + tool: (def: unknown) => def, +})); + +import { generateText } from 'ai'; +import { AgentRunnerService, type RunLoopStepInfo } from './agent-runner.service.js'; + +describe('AgentRunnerService.runLoop', () => { + let runner: AgentRunnerService; + const llmProvider = { + getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }), + getModelByName: vi.fn(), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(), + telemetryConfig: vi.fn(), + promptCachingConfig: vi.fn(() => ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + })), + activeBackend: vi.fn(() => 'anthropic'), + }; + + beforeEach(() => { + vi.clearAllMocks(); + runner = new AgentRunnerService({ llmProvider: llmProvider as any }); + }); + + afterEach(() => vi.clearAllMocks()); + + it('passes systemPrompt, userPrompt, tools, and step budget through to generateText', async () => { + (generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] }); + const tools = { noop: { description: 'noop', inputSchema: {}, execute: vi.fn() } }; + await runner.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: 'SYS', + userPrompt: 'USR', + toolSet: tools as any, + stepBudget: 17, + telemetryTags: { source: 'test' }, + }); + const call = (generateText as any).mock.calls[0][0]; + expect(call.messages).toEqual([ + { role: 'system', content: 'SYS' }, + { role: 'user', content: 'USR' }, + ]); + expect(call.system).toBeUndefined(); + expect(call.prompt).toBeUndefined(); + expect(call.tools).toEqual(tools); + expect(call.stopWhen).toBe(17); + expect(call.temperature).toBe(0); + expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction'); + }); + + it('returns stopReason=natural when the loop completes without error', async () => { + (generateText as any).mockResolvedValue({ text: 'done', toolCalls: [], steps: [] }); + const result = await runner.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: 'system', + userPrompt: 'user', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + }); + expect(result.stopReason).toBe('natural'); + expect(result.error).toBeUndefined(); + expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction'); + expect(generateText).toHaveBeenCalledWith( + expect.objectContaining({ + messages: [ + { role: 'system', content: 'system' }, + { role: 'user', content: 'user' }, + ], + }), + ); + }); + + it('returns stopReason=error with the error on generateText failure', async () => { + const err = new Error('LLM unavailable'); + (generateText as any).mockRejectedValue(err); + const result = await runner.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + }); + expect(result.stopReason).toBe('error'); + expect(result.error).toBe(err); + }); + + it('invokes caller onStepFinish with incrementing stepIndex and total budget', async () => { + const calls: RunLoopStepInfo[] = []; + (generateText as any).mockImplementation(async (opts: any) => { + for (let i = 0; i < 3; i++) { + await opts.onStepFinish({}); + } + return { text: 'ok', toolCalls: [], steps: [] }; + }); + + await runner.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + onStepFinish: (info) => { + calls.push(info); + }, + }); + + expect(calls).toEqual([ + { stepIndex: 1, stepBudget: 10 }, + { stepIndex: 2, stepBudget: 10 }, + { stepIndex: 3, stepBudget: 10 }, + ]); + }); + + it('swallows errors thrown from caller onStepFinish without aborting the loop', async () => { + (generateText as any).mockImplementation(async (opts: any) => { + await opts.onStepFinish({}); + return { text: 'ok', toolCalls: [], steps: [] }; + }); + + const result = await runner.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + onStepFinish: () => { + throw new Error('boom'); + }, + }); + + expect(result.stopReason).toBe('natural'); + }); + + it('forwards telemetryTags.source through experimental_telemetry metadata', async () => { + (generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] }); + const telemetryConfigEnabled = { + isEnabled: () => true, + devtoolsEnabled: false, + appSettingsService: { + settings: { telemetry: { recordInputs: false, recordOutputs: false } }, + }, + systemConfigService: { + config: { instance: { name: 'test-instance' } }, + }, + } as any; + const runnerWithTelemetry = new AgentRunnerService({ + llmProvider: llmProvider as any, + telemetry: { + createTelemetry: (tags) => ({ + isEnabled: telemetryConfigEnabled.isEnabled(), + metadata: { + source: tags.source ?? 'RESEARCH', + jobId: tags.jobId, + unitKey: tags.unitKey, + }, + }), + }, + }); + await runnerWithTelemetry.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: { source: 'metabase', jobId: 'job-123', unitKey: 'u/1' }, + }); + const call = (generateText as any).mock.calls[0][0]; + expect(call.experimental_telemetry.metadata.source).toBe('metabase'); + }); + + it('defaults to source=RESEARCH when telemetryTags omits source', async () => { + (generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] }); + const telemetryConfigEnabled = { + isEnabled: () => true, + devtoolsEnabled: false, + appSettingsService: { + settings: { telemetry: { recordInputs: false, recordOutputs: false } }, + }, + systemConfigService: { + config: { instance: { name: 'test-instance' } }, + }, + } as any; + const runnerWithTelemetry = new AgentRunnerService({ + llmProvider: llmProvider as any, + telemetry: { + createTelemetry: (tags) => ({ + isEnabled: telemetryConfigEnabled.isEnabled(), + metadata: { + source: tags.source ?? 'RESEARCH', + jobId: tags.jobId, + unitKey: tags.unitKey, + }, + }), + }, + }); + await runnerWithTelemetry.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: { operationName: 'memory-agent-ingest' }, + }); + const call = (generateText as any).mock.calls[0][0]; + expect(call.experimental_telemetry.metadata.source).toBe('RESEARCH'); + }); + + it('forwards jobId and unitKey through experimental_telemetry metadata', async () => { + (generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] }); + const telemetryConfigEnabled = { + isEnabled: () => true, + devtoolsEnabled: false, + appSettingsService: { + settings: { telemetry: { recordInputs: false, recordOutputs: false } }, + }, + systemConfigService: { + config: { instance: { name: 'test-instance' } }, + }, + } as any; + const runnerWithTelemetry = new AgentRunnerService({ + llmProvider: llmProvider as any, + telemetry: { + createTelemetry: (tags) => ({ + isEnabled: telemetryConfigEnabled.isEnabled(), + metadata: { + source: tags.source ?? 'RESEARCH', + jobId: tags.jobId, + unitKey: tags.unitKey, + }, + }), + }, + }); + await runnerWithTelemetry.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: { source: 'metabase', jobId: 'job-777', unitKey: 'sources/users' }, + }); + const call = (generateText as any).mock.calls[0][0]; + expect(call.experimental_telemetry.metadata.jobId).toBe('job-777'); + expect(call.experimental_telemetry.metadata.unitKey).toBe('sources/users'); + }); + + it('records a sanitized LLM debug request when a recorder is injected', async () => { + (generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] }); + const record = vi.fn(); + const provider = { + ...llmProvider, + cacheMarker: vi.fn((ttl: '5m' | '1h') => ({ + anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } }, + })), + promptCachingConfig: vi.fn(() => ({ + enabled: true, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + })), + }; + const runnerWithDebug = new AgentRunnerService({ + llmProvider: provider as any, + debugRequestRecorder: { record }, + }); + + await runnerWithDebug.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt: 'SECRET SYSTEM PROMPT', + userPrompt: 'SECRET USER PROMPT', + toolSet: { + emit_candidate: { + description: 'SECRET TOOL DESCRIPTION', + inputSchema: {}, + execute: vi.fn(), + } as any, + }, + stepBudget: 10, + telemetryTags: { operationName: 'ingest-bundle-wu', source: 'metabase', jobId: 'job-1', unitKey: 'cards/1' }, + }); + + expect(record).toHaveBeenCalledTimes(1); + expect(record).toHaveBeenCalledWith( + expect.objectContaining({ + operationName: 'ingest-bundle-wu', + source: 'metabase', + jobId: 'job-1', + unitKey: 'cards/1', + modelRole: 'candidateExtraction', + modelId: 'claude-sonnet-4-6', + messageCount: 2, + toolNames: ['emit_candidate'], + }), + ); + const providerOptions = record.mock.calls[0][0].providerOptions; + expect(providerOptions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ target: 'message', index: 0, role: 'system' }), + expect.objectContaining({ target: 'message-part', index: 1, role: 'user', partIndex: 0 }), + expect.objectContaining({ target: 'tool', name: 'emit_candidate' }), + ]), + ); + expect(providerOptions).toHaveLength(3); + const serialized = JSON.stringify(record.mock.calls[0][0]); + expect(serialized).not.toContain('SECRET SYSTEM PROMPT'); + expect(serialized).not.toContain('SECRET USER PROMPT'); + expect(serialized).not.toContain('SECRET TOOL DESCRIPTION'); + }); +}); diff --git a/packages/context/src/agent/agent-runner.service.ts b/packages/context/src/agent/agent-runner.service.ts new file mode 100644 index 00000000..75bdbefb --- /dev/null +++ b/packages/context/src/agent/agent-runner.service.ts @@ -0,0 +1,101 @@ +import { KloMessageBuilder, type KloLlmProvider, type KloModelRole } from '@klo/llm'; +import { generateText, stepCountIs, type TelemetrySettings, type Tool } from 'ai'; +import { noopLogger, type KloLogger } from '../core/index.js'; +import { summarizeKloLlmDebugRequest, type KloLlmDebugRequestRecorder } from '../llm/index.js'; + +export type RunLoopStopReason = 'budget' | 'natural' | 'error'; + +export interface RunLoopStepInfo { + stepIndex: number; + stepBudget: number; +} + +export interface RunLoopParams { + modelRole: KloModelRole; + systemPrompt: string; + userPrompt: string; + toolSet: Record; + stepBudget: number; + telemetryTags: Record; + onStepFinish?: (info: RunLoopStepInfo) => void | Promise; +} + +export interface RunLoopResult { + stopReason: RunLoopStopReason; + error?: Error; +} + +export interface AgentTelemetryPort { + createTelemetry(tags: Record): TelemetrySettings; +} + +export interface AgentRunnerServiceDeps { + llmProvider: KloLlmProvider; + telemetry?: AgentTelemetryPort; + debugRequestRecorder?: KloLlmDebugRequestRecorder; + logger?: KloLogger; +} + +export class AgentRunnerService { + private readonly logger: KloLogger; + + constructor(private readonly deps: AgentRunnerServiceDeps) { + this.logger = deps.logger ?? noopLogger; + } + + async runLoop(params: RunLoopParams): Promise { + let stepIndex = 0; + try { + const model = this.deps.llmProvider.getModel(params.modelRole); + const builder = new KloMessageBuilder(this.deps.llmProvider); + const built = builder.wrapSimple({ + system: params.systemPrompt, + messages: [{ role: 'user', content: params.userPrompt }], + tools: params.toolSet, + model, + }); + + await this.deps.debugRequestRecorder?.record( + summarizeKloLlmDebugRequest({ + operationName: params.telemetryTags.operationName ?? 'klo-agent-runner', + source: params.telemetryTags.source, + jobId: params.telemetryTags.jobId, + unitKey: params.telemetryTags.unitKey, + modelRole: params.modelRole, + modelId: (model as { modelId?: string }).modelId ?? params.modelRole, + messages: built.messages, + tools: built.tools as Record, + }), + ); + + await generateText({ + model, + temperature: 0, + stopWhen: stepCountIs(params.stepBudget), + experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags), + messages: built.messages, + tools: built.tools as Record, + onStepFinish: async () => { + stepIndex += 1; + if (!params.onStepFinish) { + return; + } + try { + await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget }); + } catch (err) { + this.logger.warn( + `[agent-runner] onStepFinish callback threw; ignoring: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } + }, + }); + return { stopReason: 'natural' }; + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + this.logger.warn(`[agent-runner] loop failed: ${err.message}`); + return { stopReason: 'error', error: err }; + } + } +} diff --git a/packages/context/src/agent/index.ts b/packages/context/src/agent/index.ts new file mode 100644 index 00000000..b4b94167 --- /dev/null +++ b/packages/context/src/agent/index.ts @@ -0,0 +1,9 @@ +export type { + AgentRunnerServiceDeps, + AgentTelemetryPort, + RunLoopParams, + RunLoopResult, + RunLoopStepInfo, + RunLoopStopReason, +} from './agent-runner.service.js'; +export { AgentRunnerService } from './agent-runner.service.js'; diff --git a/packages/context/src/connections/connection-type.ts b/packages/context/src/connections/connection-type.ts new file mode 100644 index 00000000..81c17bb4 --- /dev/null +++ b/packages/context/src/connections/connection-type.ts @@ -0,0 +1,28 @@ +import { z } from 'zod'; + +export const connectionTypeSchema = z.enum([ + 'POSTGRESQL', + 'SQLITE', + 'SQLSERVER', + 'BIGQUERY', + 'SNOWFLAKE', + 'CENTRALREACH', + 'EPIC', + 'CERNER', + 'ATHENA', + 'QUICKBOOKS', + 'WORKDAY', + 'REST', + 'S3', + 'SLACK', + 'METABASE', + 'LOOKER', + 'NOTION', + 'POSTHOG', + 'MYSQL', + 'CLICKHOUSE', + 'PLAIN', + 'BETTERSTACK', +]); + +export type ConnectionType = z.infer; diff --git a/packages/context/src/connections/index.ts b/packages/context/src/connections/index.ts new file mode 100644 index 00000000..3f833617 --- /dev/null +++ b/packages/context/src/connections/index.ts @@ -0,0 +1,27 @@ +export type { + KloSqlQueryExecutionInput, + KloSqlQueryExecutionResult, + KloSqlQueryExecutorPort, +} from './query-executor.js'; +export { createDefaultLocalQueryExecutor, type DefaultLocalQueryExecutorOptions } from './local-query-executor.js'; +export { normalizeQueryRows } from './query-executor.js'; +export { createPostgresQueryExecutor } from './postgres-query-executor.js'; +export { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js'; +export { createSqliteQueryExecutor, sqliteDatabasePathFromConnection } from './sqlite-query-executor.js'; +export { connectionTypeSchema, type ConnectionType } from './connection-type.js'; +export { + localConnectionInfoFromConfig, + localConnectionToWarehouseDescriptor, + localConnectionTypeForConfig, + type LocalConnectionInfo, + type LocalWarehouseDescriptor, +} from './local-warehouse-descriptor.js'; +export { + KLO_NOTION_ORG_KNOWLEDGE_WARNING, + notionConnectionToPullConfig, + parseNotionConnectionConfig, + redactNotionConnectionConfig, + resolveNotionAuthToken, + type KloNotionConnectionConfig, + type RedactedKloNotionConnectionConfig, +} from './notion-config.js'; diff --git a/packages/context/src/connections/local-query-executor.test.ts b/packages/context/src/connections/local-query-executor.test.ts new file mode 100644 index 00000000..fd94c6dc --- /dev/null +++ b/packages/context/src/connections/local-query-executor.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createDefaultLocalQueryExecutor } from './local-query-executor.js'; + +describe('createDefaultLocalQueryExecutor', () => { + it('dispatches postgres and sqlite drivers to their executors', async () => { + const postgres = { + execute: vi.fn(async () => ({ + headers: ['pg'], + rows: [[1]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })), + }; + const sqlite = { + execute: vi.fn(async () => ({ + headers: ['sqlite'], + rows: [[2]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })), + }; + const executor = createDefaultLocalQueryExecutor({ postgres, sqlite }); + + await expect( + executor.execute({ + connectionId: 'pg', + connection: { driver: 'postgres', readonly: true }, + sql: 'select 1', + }), + ).resolves.toMatchObject({ headers: ['pg'] }); + await expect( + executor.execute({ + connectionId: 'local', + connection: { driver: 'sqlite', readonly: true }, + sql: 'select 1', + }), + ).resolves.toMatchObject({ headers: ['sqlite'] }); + + expect(postgres.execute).toHaveBeenCalledTimes(1); + expect(sqlite.execute).toHaveBeenCalledTimes(1); + }); + + it('rejects unsupported local execution drivers', async () => { + const executor = createDefaultLocalQueryExecutor({ + postgres: { execute: vi.fn() }, + sqlite: { execute: vi.fn() }, + }); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'snowflake', readonly: true }, + sql: 'select 1', + }), + ).rejects.toThrow('No local query executor is configured for driver "snowflake".'); + }); +}); diff --git a/packages/context/src/connections/local-query-executor.ts b/packages/context/src/connections/local-query-executor.ts new file mode 100644 index 00000000..46d95b4b --- /dev/null +++ b/packages/context/src/connections/local-query-executor.ts @@ -0,0 +1,34 @@ +import { createPostgresQueryExecutor } from './postgres-query-executor.js'; +import type { + KloSqlQueryExecutionInput, + KloSqlQueryExecutionResult, + KloSqlQueryExecutorPort, +} from './query-executor.js'; +import { createSqliteQueryExecutor } from './sqlite-query-executor.js'; + +export interface DefaultLocalQueryExecutorOptions { + postgres?: KloSqlQueryExecutorPort; + sqlite?: KloSqlQueryExecutorPort; +} + +function driverFor(input: KloSqlQueryExecutionInput): string { + return String(input.connection?.driver ?? '').toLowerCase(); +} + +export function createDefaultLocalQueryExecutor(options: DefaultLocalQueryExecutorOptions = {}): KloSqlQueryExecutorPort { + const postgres = options.postgres ?? createPostgresQueryExecutor(); + const sqlite = options.sqlite ?? createSqliteQueryExecutor(); + + return { + async execute(input: KloSqlQueryExecutionInput): Promise { + const driver = driverFor(input); + if (driver === 'postgres' || driver === 'postgresql') { + return postgres.execute(input); + } + if (driver === 'sqlite' || driver === 'sqlite3') { + return sqlite.execute(input); + } + throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`); + }, + }; +} diff --git a/packages/context/src/connections/local-warehouse-descriptor.test.ts b/packages/context/src/connections/local-warehouse-descriptor.test.ts new file mode 100644 index 00000000..5864c833 --- /dev/null +++ b/packages/context/src/connections/local-warehouse-descriptor.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'vitest'; +import { + localConnectionInfoFromConfig, + localConnectionToWarehouseDescriptor, + localConnectionTypeForConfig, +} from './local-warehouse-descriptor.js'; + +describe('localConnectionToWarehouseDescriptor', () => { + it('maps local Postgres URLs to canonical warehouse descriptors', () => { + expect( + localConnectionToWarehouseDescriptor('warehouse', { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }), + ).toMatchObject({ + id: 'warehouse', + connection_type: 'POSTGRESQL', + host: 'db.example.test', + database: 'analytics', + }); + }); + + it('maps BigQuery project and dataset from explicit fields', () => { + expect( + localConnectionToWarehouseDescriptor('bq', { + driver: 'bigquery', + project_id: 'acme', + dataset_id: 'warehouse', + }), + ).toMatchObject({ + id: 'bq', + connection_type: 'BIGQUERY', + project_id: 'acme', + dataset_id: 'warehouse', + }); + }); + + it('returns null for non-warehouse adapters', () => { + expect(localConnectionToWarehouseDescriptor('looker', { driver: 'looker' })).toBeNull(); + }); +}); + +describe('local connection info helpers', () => { + it('returns canonical warehouse connection types for local catalogs', () => { + expect(localConnectionTypeForConfig('warehouse', { driver: 'postgres' })).toBe('POSTGRESQL'); + expect(localConnectionTypeForConfig('bq', { driver: 'bigquery', project_id: 'acme' })).toBe('BIGQUERY'); + expect(localConnectionTypeForConfig('snowflake', { driver: 'snowflake' })).toBe('SNOWFLAKE'); + }); + + it('keeps non-warehouse adapter labels for display-only local connection surfaces', () => { + expect(localConnectionTypeForConfig('prod-metabase', { driver: 'metabase' })).toBe('metabase'); + expect(localConnectionTypeForConfig('missing-driver', {} as never)).toBe('unknown'); + }); + + it('builds nullable local connection info records', () => { + expect(localConnectionInfoFromConfig('warehouse', { driver: 'postgres' })).toEqual({ + id: 'warehouse', + name: 'warehouse', + connectionType: 'POSTGRESQL', + }); + expect(localConnectionInfoFromConfig('missing', undefined)).toBeNull(); + }); +}); diff --git a/packages/context/src/connections/local-warehouse-descriptor.ts b/packages/context/src/connections/local-warehouse-descriptor.ts new file mode 100644 index 00000000..9a4bee8c --- /dev/null +++ b/packages/context/src/connections/local-warehouse-descriptor.ts @@ -0,0 +1,102 @@ +import type { KloProjectConnectionConfig } from '../project/config.js'; +import type { ConnectionType } from './connection-type.js'; + +export interface LocalWarehouseDescriptor { + id: string; + connection_type: ConnectionType; + host?: string | null; + database?: string | null; + account?: string | null; + project_id?: string | null; + dataset_id?: string | null; + connection_params: Record; +} + +export interface LocalConnectionInfo { + id: string; + name: string; + connectionType: string; +} + +const DRIVER_TO_CONNECTION_TYPE: Record = { + postgres: 'POSTGRESQL', + postgresql: 'POSTGRESQL', + sqlite: 'SQLITE', + sqlserver: 'SQLSERVER', + mssql: 'SQLSERVER', + mysql: 'MYSQL', + clickhouse: 'CLICKHOUSE', + snowflake: 'SNOWFLAKE', + bigquery: 'BIGQUERY', +}; + +export function localConnectionToWarehouseDescriptor( + id: string, + connection: KloProjectConnectionConfig | undefined, +): LocalWarehouseDescriptor | null { + if (!connection) { + return null; + } + const connectionType = DRIVER_TO_CONNECTION_TYPE[String(connection.driver ?? '').toLowerCase()]; + if (!connectionType) { + return null; + } + + const info: LocalWarehouseDescriptor = { + id, + connection_type: connectionType, + connection_params: { ...connection }, + }; + const url = typeof connection.url === 'string' ? connection.url : null; + if (url && !url.startsWith('env:') && !url.startsWith('file:')) { + try { + const parsed = new URL(url); + info.host = parsed.hostname || null; + if (parsed.pathname.length > 1) { + const [first, second] = parsed.pathname.slice(1).split('/'); + if (connectionType === 'BIGQUERY') { + info.project_id = stringField(connection.project_id) ?? parsed.hostname ?? first ?? null; + info.dataset_id = stringField(connection.dataset_id) ?? second ?? null; + } else { + info.database = first ?? null; + } + } + } catch { + info.host = stringField(connection.host); + } + } + + info.host = stringField(connection.host) ?? info.host ?? null; + info.database = stringField(connection.database) ?? info.database ?? null; + info.account = stringField(connection.account) ?? null; + info.project_id = stringField(connection.project_id) ?? info.project_id ?? null; + info.dataset_id = stringField(connection.dataset_id) ?? info.dataset_id ?? null; + return info; +} + +export function localConnectionTypeForConfig(id: string, connection: KloProjectConnectionConfig | undefined): string { + const descriptor = localConnectionToWarehouseDescriptor(id, connection); + if (descriptor) { + return descriptor.connection_type; + } + const driver = typeof connection?.driver === 'string' ? connection.driver.trim() : ''; + return driver.length > 0 ? driver : 'unknown'; +} + +export function localConnectionInfoFromConfig( + id: string, + connection: KloProjectConnectionConfig | undefined, +): LocalConnectionInfo | null { + if (!connection) { + return null; + } + return { + id, + name: id, + connectionType: localConnectionTypeForConfig(id, connection), + }; +} + +function stringField(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} diff --git a/packages/context/src/connections/notion-config.test.ts b/packages/context/src/connections/notion-config.test.ts new file mode 100644 index 00000000..38772ec4 --- /dev/null +++ b/packages/context/src/connections/notion-config.test.ts @@ -0,0 +1,120 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + notionConnectionToPullConfig, + parseNotionConnectionConfig, + redactNotionConnectionConfig, + resolveNotionAuthToken, +} from './notion-config.js'; + +describe('standalone Notion connection config', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-notion-config-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('parses selected-root Notion config with safe defaults', () => { + const parsed = parseNotionConnectionConfig({ + driver: 'notion', + auth_token_ref: 'env:NOTION_AUTH_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: ['page-1'], + }); + + expect(parsed).toEqual({ + driver: 'notion', + auth_token_ref: 'env:NOTION_AUTH_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: ['page-1'], + root_database_ids: [], + root_data_source_ids: [], + max_pages_per_run: 1000, + max_knowledge_creates_per_run: 5, + max_knowledge_updates_per_run: 20, + last_successful_cursor: null, + }); + }); + + it('redacts token references from display output', () => { + expect( + redactNotionConnectionConfig( + parseNotionConnectionConfig({ + driver: 'notion', + auth_token_ref: 'file:/Users/example/.config/notion-token', + crawl_mode: 'all_accessible', + max_pages_per_run: 80, + }), + ), + ).toEqual({ + driver: 'notion', + hasAuthToken: true, + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 80, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + warning: 'Anything accessible to this Notion integration can become organization knowledge.', + }); + }); + + it('requires at least one selected root in selected_roots mode', () => { + expect(() => + parseNotionConnectionConfig({ + driver: 'notion', + auth_token_ref: 'env:NOTION_AUTH_TOKEN', + crawl_mode: 'selected_roots', + }), + ).toThrow('selected_roots requires at least one root page, database, or data source id'); + }); + + it('resolves env and file token references without exposing the reference in errors', async () => { + const tokenPath = join(tempDir, 'notion-token.txt'); + await writeFile(tokenPath, 'ntn_file_token\n', 'utf-8'); + + await expect( + resolveNotionAuthToken('env:NOTION_AUTH_TOKEN', { + env: { NOTION_AUTH_TOKEN: 'ntn_env_token' }, + }), + ).resolves.toBe('ntn_env_token'); + await expect(resolveNotionAuthToken(`file:${tokenPath}`)).resolves.toBe('ntn_file_token'); + await expect(resolveNotionAuthToken('env:MISSING_NOTION_TOKEN', { env: {} })).rejects.toThrow( + 'Notion token environment variable MISSING_NOTION_TOKEN is not set', + ); + }); + + it('converts standalone config into adapter pull config', async () => { + const pullConfig = await notionConnectionToPullConfig( + parseNotionConnectionConfig({ + driver: 'notion', + auth_token_ref: 'env:NOTION_AUTH_TOKEN', + crawl_mode: 'all_accessible', + max_pages_per_run: 12, + max_knowledge_creates_per_run: 2, + max_knowledge_updates_per_run: 7, + last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}', + }), + { env: { NOTION_AUTH_TOKEN: 'ntn_env_token' } }, + ); + + expect(pullConfig).toEqual({ + authToken: 'ntn_env_token', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 12, + maxKnowledgeCreatesPerRun: 2, + maxKnowledgeUpdatesPerRun: 7, + lastSuccessfulCursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}', + }); + }); +}); diff --git a/packages/context/src/connections/notion-config.ts b/packages/context/src/connections/notion-config.ts new file mode 100644 index 00000000..96ce453e --- /dev/null +++ b/packages/context/src/connections/notion-config.ts @@ -0,0 +1,196 @@ +import { readFile } from 'node:fs/promises'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; +import { type NotionPullConfig, notionPullConfigSchema } from '../ingest/adapters/notion/types.js'; +import type { KloProjectConnectionConfig } from '../project/config.js'; + +export const KLO_NOTION_ORG_KNOWLEDGE_WARNING = + 'Anything accessible to this Notion integration can become organization knowledge.'; + +type KloNotionCrawlMode = 'all_accessible' | 'selected_roots'; + +export interface KloNotionConnectionConfig extends KloProjectConnectionConfig { + driver: 'notion'; + auth_token_ref: string; + crawl_mode: KloNotionCrawlMode; + root_page_ids: string[]; + root_database_ids: string[]; + root_data_source_ids: string[]; + max_pages_per_run: number; + max_knowledge_creates_per_run: number; + max_knowledge_updates_per_run: number; + last_successful_cursor: string | null; +} + +export interface RedactedKloNotionConnectionConfig { + driver: 'notion'; + hasAuthToken: boolean; + crawlMode: KloNotionCrawlMode; + rootPageIds: string[]; + rootDatabaseIds: string[]; + rootDataSourceIds: string[]; + maxPagesPerRun: number; + maxKnowledgeCreatesPerRun: number; + maxKnowledgeUpdatesPerRun: number; + warning: typeof KLO_NOTION_ORG_KNOWLEDGE_WARNING; +} + +interface ResolveNotionTokenOptions { + env?: Record; + readTextFile?: (path: string) => Promise; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function record(value: unknown): Record { + if (!isRecord(value)) { + throw new Error('Notion connection config must be an object'); + } + return value; +} + +function stringValue(value: unknown, fallback: string): string { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : fallback; +} + +function optionalString(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function stringArray(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.filter((item): item is string => typeof item === 'string' && item.trim().length > 0); +} + +function integerWithFallback(value: unknown, fallback: number, name: string): number { + if (value === undefined || value === null) { + return fallback; + } + if (typeof value !== 'number' || !Number.isInteger(value)) { + throw new Error(`${name} must be an integer`); + } + return value; +} + +function boundedInteger(value: unknown, fallback: number, name: string, min: number, max: number): number { + const parsed = integerWithFallback(value, fallback, name); + if (parsed < min || parsed > max) { + throw new Error(`${name} must be between ${min} and ${max}`); + } + return parsed; +} + +export function parseNotionConnectionConfig(raw: unknown): KloNotionConnectionConfig { + const input = record(raw); + if (input.driver !== 'notion') { + throw new Error('Notion connection config requires driver: notion'); + } + const authTokenRef = stringValue(input.auth_token_ref, ''); + if (!authTokenRef) { + throw new Error('Notion connection config requires auth_token_ref'); + } + if (!authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) { + throw new Error('Notion auth_token_ref must use env:NAME or file:/path'); + } + + const crawlMode = stringValue(input.crawl_mode, 'selected_roots'); + if (crawlMode !== 'selected_roots' && crawlMode !== 'all_accessible') { + throw new Error(`Unsupported Notion crawl_mode: ${crawlMode}`); + } + const rootPageIds = stringArray(input.root_page_ids); + const rootDatabaseIds = stringArray(input.root_database_ids); + const rootDataSourceIds = stringArray(input.root_data_source_ids); + if (crawlMode === 'selected_roots' && rootPageIds.length + rootDatabaseIds.length + rootDataSourceIds.length === 0) { + throw new Error('selected_roots requires at least one root page, database, or data source id'); + } + + return { + ...input, + driver: 'notion', + auth_token_ref: authTokenRef, + crawl_mode: crawlMode, + root_page_ids: rootPageIds, + root_database_ids: rootDatabaseIds, + root_data_source_ids: rootDataSourceIds, + max_pages_per_run: boundedInteger(input.max_pages_per_run, 1000, 'max_pages_per_run', 1, 10_000), + max_knowledge_creates_per_run: boundedInteger( + input.max_knowledge_creates_per_run, + 5, + 'max_knowledge_creates_per_run', + 0, + 25, + ), + max_knowledge_updates_per_run: boundedInteger( + input.max_knowledge_updates_per_run, + 20, + 'max_knowledge_updates_per_run', + 0, + 100, + ), + last_successful_cursor: optionalString(input.last_successful_cursor), + }; +} + +export function redactNotionConnectionConfig(config: KloNotionConnectionConfig): RedactedKloNotionConnectionConfig { + return { + driver: 'notion', + hasAuthToken: Boolean(config.auth_token_ref), + crawlMode: config.crawl_mode, + rootPageIds: config.root_page_ids, + rootDatabaseIds: config.root_database_ids, + rootDataSourceIds: config.root_data_source_ids, + maxPagesPerRun: config.max_pages_per_run, + maxKnowledgeCreatesPerRun: config.max_knowledge_creates_per_run, + maxKnowledgeUpdatesPerRun: config.max_knowledge_updates_per_run, + warning: KLO_NOTION_ORG_KNOWLEDGE_WARNING, + }; +} + +function expandHome(path: string): string { + return path === '~' || path.startsWith('~/') ? resolve(homedir(), path.slice(2)) : path; +} + +export async function resolveNotionAuthToken( + authTokenRef: string, + options: ResolveNotionTokenOptions = {}, +): Promise { + if (authTokenRef.startsWith('env:')) { + const envName = authTokenRef.slice('env:'.length); + const value = (options.env ?? process.env)[envName]; + if (!value) { + throw new Error(`Notion token environment variable ${envName} is not set`); + } + return value.trim(); + } + if (authTokenRef.startsWith('file:')) { + const path = expandHome(authTokenRef.slice('file:'.length)); + const readTextFile = options.readTextFile ?? ((filePath: string) => readFile(filePath, 'utf-8')); + const value = (await readTextFile(path)).trim(); + if (!value) { + throw new Error(`Notion token file is empty: ${path}`); + } + return value; + } + throw new Error('Notion auth_token_ref must use env:NAME or file:/path'); +} + +export async function notionConnectionToPullConfig( + config: KloNotionConnectionConfig, + options: ResolveNotionTokenOptions = {}, +): Promise { + return notionPullConfigSchema.parse({ + authToken: await resolveNotionAuthToken(config.auth_token_ref, options), + crawlMode: config.crawl_mode, + rootPageIds: config.root_page_ids, + rootDatabaseIds: config.root_database_ids, + rootDataSourceIds: config.root_data_source_ids, + maxPagesPerRun: config.max_pages_per_run, + maxKnowledgeCreatesPerRun: config.max_knowledge_creates_per_run, + maxKnowledgeUpdatesPerRun: config.max_knowledge_updates_per_run, + lastSuccessfulCursor: config.last_successful_cursor, + }); +} diff --git a/packages/context/src/connections/postgres-query-executor.test.ts b/packages/context/src/connections/postgres-query-executor.test.ts new file mode 100644 index 00000000..6fc1a3e5 --- /dev/null +++ b/packages/context/src/connections/postgres-query-executor.test.ts @@ -0,0 +1,111 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createPostgresQueryExecutor } from './postgres-query-executor.js'; + +function makeClient() { + const calls: unknown[] = []; + const client = { + connect: vi.fn(async () => undefined), + query: vi.fn(async (input: unknown) => { + calls.push(input); + if (input === 'BEGIN READ ONLY') { + return { rows: [], fields: [], rowCount: null, command: 'BEGIN' }; + } + if (input === 'COMMIT') { + return { rows: [], fields: [], rowCount: null, command: 'COMMIT' }; + } + return { + rows: [ + ['paid', 2], + ['open', 1], + ], + fields: [{ name: 'status' }, { name: 'order_count' }], + rowCount: 2, + command: 'SELECT', + }; + }), + end: vi.fn(async () => undefined), + }; + return { client, calls }; +} + +describe('createPostgresQueryExecutor', () => { + it('runs a read-only transaction in array row mode and closes the client', async () => { + const { client, calls } = makeClient(); + const executor = createPostgresQueryExecutor({ + clientFactory: vi.fn(() => client), + }); + + const result = await executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres', url: 'postgres://example/db', readonly: true }, + sql: 'select status, count(*) as order_count from public.orders group by status', + maxRows: 50, + }); + + expect(client.connect).toHaveBeenCalledTimes(1); + expect(calls[0]).toBe('BEGIN READ ONLY'); + expect(calls[1]).toEqual({ + text: 'select * from (select status, count(*) as order_count from public.orders group by status) as klo_query_result limit 50', + rowMode: 'array', + }); + expect(calls[2]).toBe('COMMIT'); + expect(client.end).toHaveBeenCalledTimes(1); + expect(result).toEqual({ + headers: ['status', 'order_count'], + rows: [ + ['paid', 2], + ['open', 1], + ], + totalRows: 2, + command: 'SELECT', + rowCount: 2, + }); + }); + + it('rolls back and closes the client when query execution fails', async () => { + const client = { + connect: vi.fn(async () => undefined), + query: vi.fn(async (input: unknown) => { + if (input === 'BEGIN READ ONLY' || input === 'ROLLBACK') { + return { rows: [], fields: [], rowCount: null, command: String(input) }; + } + throw new Error('syntax error'); + }), + end: vi.fn(async () => undefined), + }; + const executor = createPostgresQueryExecutor({ + clientFactory: vi.fn(() => client), + }); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres', url: 'postgres://example/db', readonly: true }, + sql: 'select * from broken', + maxRows: 10, + }), + ).rejects.toThrow('syntax error'); + expect(client.query).toHaveBeenCalledWith('ROLLBACK'); + expect(client.end).toHaveBeenCalledTimes(1); + }); + + it('requires a Postgres url and read-only connection config', async () => { + const executor = createPostgresQueryExecutor({ clientFactory: vi.fn() }); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres', readonly: true }, + sql: 'select 1', + }), + ).rejects.toThrow('Local Postgres execution requires connections.warehouse.url'); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres', url: 'postgres://example/db', readonly: false }, + sql: 'select 1', + }), + ).rejects.toThrow('Local query execution requires connections.warehouse.readonly: true'); + }); +}); diff --git a/packages/context/src/connections/postgres-query-executor.ts b/packages/context/src/connections/postgres-query-executor.ts new file mode 100644 index 00000000..95466be0 --- /dev/null +++ b/packages/context/src/connections/postgres-query-executor.ts @@ -0,0 +1,80 @@ +import { Client, type ClientConfig } from 'pg'; +import type { + KloSqlQueryExecutionInput, + KloSqlQueryExecutionResult, + KloSqlQueryExecutorPort, +} from './query-executor.js'; +import { limitSqlForExecution } from './read-only-sql.js'; + +interface PgClientLike { + connect(): Promise; + query(input: string | { text: string; rowMode: 'array' }): Promise<{ + fields: Array<{ name: string }>; + rows: unknown[][]; + command: string; + rowCount: number | null; + }>; + end(): Promise; +} + +interface PostgresQueryExecutorOptions { + statementTimeoutMs?: number; + queryTimeoutMs?: number; + connectionTimeoutMs?: number; + clientFactory?: (config: ClientConfig) => PgClientLike; +} + +function connectionDriver(input: KloSqlQueryExecutionInput): string { + return String(input.connection?.driver ?? '').toLowerCase(); +} + +function createDefaultClient(config: ClientConfig): PgClientLike { + return new Client(config); +} + +export function createPostgresQueryExecutor(options: PostgresQueryExecutorOptions = {}): KloSqlQueryExecutorPort { + const clientFactory = options.clientFactory ?? createDefaultClient; + return { + async execute(input: KloSqlQueryExecutionInput): Promise { + const driver = connectionDriver(input); + if (driver !== 'postgres' && driver !== 'postgresql') { + throw new Error(`Local Postgres execution cannot run driver "${input.connection?.driver ?? 'unknown'}".`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Local query execution requires connections.${input.connectionId}.readonly: true.`); + } + if (typeof input.connection.url !== 'string' || input.connection.url.trim().length === 0) { + throw new Error(`Local Postgres execution requires connections.${input.connectionId}.url.`); + } + + const client = clientFactory({ + connectionString: input.connection.url, + statement_timeout: options.statementTimeoutMs ?? 30_000, + query_timeout: options.queryTimeoutMs ?? 35_000, + connectionTimeoutMillis: options.connectionTimeoutMs ?? 5_000, + application_name: 'klo-local-query', + }); + await client.connect(); + try { + await client.query('BEGIN READ ONLY'); + const result = await client.query({ + text: limitSqlForExecution(input.sql, input.maxRows), + rowMode: 'array', + }); + await client.query('COMMIT'); + return { + headers: result.fields.map((field) => field.name), + rows: result.rows, + totalRows: result.rows.length, + command: result.command, + rowCount: result.rowCount, + }; + } catch (error) { + await client.query('ROLLBACK').catch(() => undefined); + throw error; + } finally { + await client.end(); + } + }, + }; +} diff --git a/packages/context/src/connections/query-executor.ts b/packages/context/src/connections/query-executor.ts new file mode 100644 index 00000000..f7ec2ce0 --- /dev/null +++ b/packages/context/src/connections/query-executor.ts @@ -0,0 +1,25 @@ +import type { KloProjectConnectionConfig } from '../project/index.js'; + +export interface KloSqlQueryExecutionInput { + connectionId: string; + projectDir?: string; + connection: KloProjectConnectionConfig | undefined; + sql: string; + maxRows?: number; +} + +export interface KloSqlQueryExecutionResult { + headers: string[]; + rows: unknown[][]; + totalRows: number; + command: string; + rowCount: number | null; +} + +export interface KloSqlQueryExecutorPort { + execute(input: KloSqlQueryExecutionInput): Promise; +} + +export function normalizeQueryRows(rows: unknown[]): unknown[][] { + return rows.map((row) => (Array.isArray(row) ? row : Object.values(row as Record))); +} diff --git a/packages/context/src/connections/read-only-sql.test.ts b/packages/context/src/connections/read-only-sql.test.ts new file mode 100644 index 00000000..f2fea2bb --- /dev/null +++ b/packages/context/src/connections/read-only-sql.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from 'vitest'; +import { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js'; + +describe('assertReadOnlySql', () => { + it('allows select and with queries', () => { + expect(assertReadOnlySql('select * from orders')).toBe('select * from orders'); + expect(assertReadOnlySql('with paid as (select * from orders) select * from paid')).toContain('with paid'); + }); + + it('rejects mutating statements before opening a database connection', () => { + expect(() => assertReadOnlySql('delete from orders')).toThrow( + 'Only read-only SELECT/WITH queries can be executed locally', + ); + expect(() => assertReadOnlySql('create table x(id int)')).toThrow( + 'Only read-only SELECT/WITH queries can be executed locally', + ); + }); +}); + +describe('limitSqlForExecution', () => { + it('wraps compiled SQL and strips trailing semicolons', () => { + expect(limitSqlForExecution('select * from public.orders; ', 25)).toBe( + 'select * from (select * from public.orders) as klo_query_result limit 25', + ); + }); + + it('returns the trimmed SQL when no maxRows value is provided', () => { + expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders'); + }); +}); diff --git a/packages/context/src/connections/read-only-sql.ts b/packages/context/src/connections/read-only-sql.ts new file mode 100644 index 00000000..f12aa661 --- /dev/null +++ b/packages/context/src/connections/read-only-sql.ts @@ -0,0 +1,22 @@ +const MUTATING_SQL = + /^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i; +const READ_SQL = /^\s*(select|with)\b/i; + +export function assertReadOnlySql(sql: string): string { + const trimmed = sql.trim(); + if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) { + throw new Error('Only read-only SELECT/WITH queries can be executed locally.'); + } + return trimmed; +} + +export function limitSqlForExecution(sql: string, maxRows: number | undefined): string { + const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, ''); + if (!maxRows) { + return trimmed; + } + if (!Number.isInteger(maxRows) || maxRows <= 0) { + throw new Error('maxRows must be a positive integer.'); + } + return `select * from (${trimmed}) as klo_query_result limit ${maxRows}`; +} diff --git a/packages/context/src/connections/sqlite-query-executor.test.ts b/packages/context/src/connections/sqlite-query-executor.test.ts new file mode 100644 index 00000000..fc664496 --- /dev/null +++ b/packages/context/src/connections/sqlite-query-executor.test.ts @@ -0,0 +1,148 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { createSqliteQueryExecutor, sqliteDatabasePathFromConnection } from './sqlite-query-executor.js'; + +describe('createSqliteQueryExecutor', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-sqlite-query-')); + dbPath = join(tempDir, 'warehouse.db'); + const db = new Database(dbPath); + db.exec(` + CREATE TABLE orders ( + id INTEGER PRIMARY KEY, + status TEXT NOT NULL, + amount INTEGER NOT NULL + ); + INSERT INTO orders (status, amount) VALUES + ('paid', 20), + ('paid', 30), + ('open', 10); + `); + db.close(); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('executes read-only SELECT SQL against a relative SQLite path', async () => { + const executor = createSqliteQueryExecutor(); + + const result = await executor.execute({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', path: 'warehouse.db', readonly: true }, + sql: 'select status, count(*) as order_count from orders group by status order by status', + maxRows: 10, + }); + + expect(result).toEqual({ + headers: ['status', 'order_count'], + rows: [ + ['open', 1], + ['paid', 2], + ], + totalRows: 2, + command: 'SELECT', + rowCount: 2, + }); + }); + + it('supports file urls for SQLite database paths', async () => { + expect( + sqliteDatabasePathFromConnection({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', url: `file://${dbPath}`, readonly: true }, + sql: 'select 1', + }), + ).toBe(dbPath); + }); + + it('resolves file references for SQLite path fields', async () => { + const pointerPath = join(tempDir, 'sqlite-path.txt'); + writeFileSync(pointerPath, dbPath, 'utf-8'); + + expect( + sqliteDatabasePathFromConnection({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', path: `file:${pointerPath}`, readonly: true }, + sql: 'select 1', + }), + ).toBe(dbPath); + }); + + it('resolves env references for SQLite database urls', async () => { + const originalDatabaseUrl = process.env.KLO_SQLITE_TEST_URL; + process.env.KLO_SQLITE_TEST_URL = `sqlite:${dbPath}`; + + try { + expect( + sqliteDatabasePathFromConnection({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', url: 'env:KLO_SQLITE_TEST_URL', readonly: true }, + sql: 'select 1', + }), + ).toBe(dbPath); + } finally { + if (originalDatabaseUrl === undefined) { + delete process.env.KLO_SQLITE_TEST_URL; + } else { + process.env.KLO_SQLITE_TEST_URL = originalDatabaseUrl; + } + } + }); + + it('rejects mutating SQL before opening the database', async () => { + const executor = createSqliteQueryExecutor(); + + await expect( + executor.execute({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', path: 'warehouse.db', readonly: true }, + sql: 'delete from orders', + }), + ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally'); + }); + + it('requires a SQLite driver, read-only config, and a database path', async () => { + const executor = createSqliteQueryExecutor(); + + await expect( + executor.execute({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'postgres', path: 'warehouse.db', readonly: true }, + sql: 'select 1', + }), + ).rejects.toThrow('Local SQLite execution cannot run driver "postgres"'); + + await expect( + executor.execute({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', path: 'warehouse.db', readonly: false }, + sql: 'select 1', + }), + ).rejects.toThrow('Local query execution requires connections.warehouse.readonly: true'); + + await expect( + executor.execute({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'sqlite', readonly: true }, + sql: 'select 1', + }), + ).rejects.toThrow('Local SQLite execution requires connections.warehouse.path or connections.warehouse.url'); + }); +}); diff --git a/packages/context/src/connections/sqlite-query-executor.ts b/packages/context/src/connections/sqlite-query-executor.ts new file mode 100644 index 00000000..12e60afc --- /dev/null +++ b/packages/context/src/connections/sqlite-query-executor.ts @@ -0,0 +1,94 @@ +import { isAbsolute, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import Database from 'better-sqlite3'; +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import type { + KloSqlQueryExecutionInput, + KloSqlQueryExecutionResult, + KloSqlQueryExecutorPort, +} from './query-executor.js'; +import { normalizeQueryRows } from './query-executor.js'; +import { limitSqlForExecution } from './read-only-sql.js'; + +type SqliteConnectionConfig = Record | undefined; + +function connectionDriver(input: KloSqlQueryExecutionInput): string { + return String(input.connection?.driver ?? '').toLowerCase(); +} + +function stringConfigValue(connection: SqliteConnectionConfig, key: string): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined; +} + +function resolveStringReference(key: string, value: string): string { + if (value.startsWith('env:')) { + return process.env[value.slice('env:'.length)] ?? ''; + } + if (key !== 'url' && value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} + +function sqlitePathFromUrl(url: string): string { + if (url.startsWith('file:')) { + return fileURLToPath(url); + } + + if (url.startsWith('sqlite:')) { + const parsed = new URL(url); + if (parsed.pathname.length > 0) { + return decodeURIComponent(parsed.pathname); + } + } + + return url; +} + +export function sqliteDatabasePathFromConnection(input: KloSqlQueryExecutionInput): string { + const driver = connectionDriver(input); + if (driver !== 'sqlite' && driver !== 'sqlite3') { + throw new Error(`Local SQLite execution cannot run driver "${input.connection?.driver ?? 'unknown'}".`); + } + if (input.connection?.readonly !== true) { + throw new Error(`Local query execution requires connections.${input.connectionId}.readonly: true.`); + } + + const pathValue = stringConfigValue(input.connection, 'path'); + const urlValue = stringConfigValue(input.connection, 'url'); + if (!pathValue && !urlValue) { + throw new Error( + `Local SQLite execution requires connections.${input.connectionId}.path or connections.${input.connectionId}.url.`, + ); + } + + const candidate = pathValue ?? sqlitePathFromUrl(urlValue as string); + return isAbsolute(candidate) ? candidate : resolve(input.projectDir ?? process.cwd(), candidate); +} + +export function createSqliteQueryExecutor(): KloSqlQueryExecutorPort { + return { + async execute(input: KloSqlQueryExecutionInput): Promise { + const sql = limitSqlForExecution(input.sql, input.maxRows); + const dbPath = sqliteDatabasePathFromConnection(input); + const db = new Database(dbPath, { readonly: true, fileMustExist: true }); + try { + const statement = db.prepare(sql); + const rows = statement.all() as unknown[]; + return { + headers: statement.columns().map((column) => column.name), + rows: normalizeQueryRows(rows), + totalRows: rows.length, + command: 'SELECT', + rowCount: rows.length, + }; + } finally { + db.close(); + } + }, + }; +} diff --git a/packages/context/src/core/config-reference.test.ts b/packages/context/src/core/config-reference.test.ts new file mode 100644 index 00000000..865c164c --- /dev/null +++ b/packages/context/src/core/config-reference.test.ts @@ -0,0 +1,34 @@ +import { mkdir, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { resolveKloConfigReference, resolveKloHomePath } from './config-reference.js'; + +describe('KLO config references', () => { + it('resolves env references without returning empty values', () => { + expect(resolveKloConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' gateway-key ' })).toBe( + 'gateway-key', + ); + expect(resolveKloConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' ' })).toBeUndefined(); + expect(resolveKloConfigReference('env:AI_GATEWAY_API_KEY', {})).toBeUndefined(); + }); + + it('resolves file references and trims file content', async () => { + const dir = join(tmpdir(), `klo-config-reference-${process.pid}`); + await mkdir(dir, { recursive: true }); + const keyPath = join(dir, 'gateway-key.txt'); + await writeFile(keyPath, 'file-gateway-key\n', 'utf8'); + + expect(resolveKloConfigReference(`file:${keyPath}`, {})).toBe('file-gateway-key'); + }); + + it('returns literal values unchanged after trimming blank-only values', () => { + expect(resolveKloConfigReference('provider/model', {})).toBe('provider/model'); + expect(resolveKloConfigReference(' ', {})).toBeUndefined(); + expect(resolveKloConfigReference(undefined, {})).toBeUndefined(); + }); + + it('resolves home-prefixed paths', () => { + expect(resolveKloHomePath('~/klo/key.txt')).toContain('/klo/key.txt'); + }); +}); diff --git a/packages/context/src/core/config-reference.ts b/packages/context/src/core/config-reference.ts new file mode 100644 index 00000000..ba96d41b --- /dev/null +++ b/packages/context/src/core/config-reference.ts @@ -0,0 +1,36 @@ +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; + +export function resolveKloHomePath(path: string): string { + if (path === '~') { + return homedir(); + } + + if (path.startsWith('~/')) { + return resolve(homedir(), path.slice(2)); + } + + return resolve(path); +} + +export function resolveKloConfigReference(value: string | undefined, env: NodeJS.ProcessEnv): string | undefined { + if (!value) { + return undefined; + } + + if (value.startsWith('env:')) { + const envName = value.slice('env:'.length).trim(); + const envValue = env[envName]; + return envValue && envValue.trim().length > 0 ? envValue.trim() : undefined; + } + + if (value.startsWith('file:')) { + const filePath = resolveKloHomePath(value.slice('file:'.length).trim()); + const fileValue = readFileSync(filePath, 'utf8').trim(); + return fileValue.length > 0 ? fileValue : undefined; + } + + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; +} diff --git a/packages/context/src/core/config.ts b/packages/context/src/core/config.ts new file mode 100644 index 00000000..847d30ab --- /dev/null +++ b/packages/context/src/core/config.ts @@ -0,0 +1,42 @@ +export interface KloStorageConfig { + configDir?: string; + homeDir?: string; + worktreesDir?: string; +} + +export interface KloGitConfig { + userName: string; + userEmail: string; + bootstrapMessage?: string; + bootstrapAuthor?: string; + bootstrapAuthorEmail?: string; +} + +export interface KloCoreConfig { + storage: KloStorageConfig; + git: KloGitConfig; +} + +export interface KloLogger { + debug(message: string): void; + log(message: string): void; + warn(message: string): void; + error(message: string, error?: unknown): void; +} + +export const noopLogger: KloLogger = { + debug: () => undefined, + log: () => undefined, + warn: () => undefined, + error: () => undefined, +}; + +export function resolveConfigDir(config: KloCoreConfig): string { + const homeDir = config.storage.homeDir ?? '/tmp'; + return config.storage.configDir ?? `${homeDir}/klo/config`; +} + +export function resolveWorktreesDir(config: KloCoreConfig): string { + const homeDir = config.storage.homeDir ?? '/tmp'; + return config.storage.worktreesDir ?? `${homeDir}/.worktrees`; +} diff --git a/packages/context/src/core/embedding.ts b/packages/context/src/core/embedding.ts new file mode 100644 index 00000000..71bc7b09 --- /dev/null +++ b/packages/context/src/core/embedding.ts @@ -0,0 +1,5 @@ +export interface KloEmbeddingPort { + maxBatchSize: number; + computeEmbedding(text: string): Promise; + computeEmbeddingsBulk(texts: string[]): Promise; +} diff --git a/packages/context/src/core/file-store.ts b/packages/context/src/core/file-store.ts new file mode 100644 index 00000000..407b6842 --- /dev/null +++ b/packages/context/src/core/file-store.ts @@ -0,0 +1,43 @@ +export interface KloFileWriteResult { + commitHash?: string | null; + [key: string]: unknown; +} + +export interface KloFileReadResult { + content: string; + [key: string]: unknown; +} + +export interface KloFileListResult { + files: string[]; +} + +export interface KloFileHistoryEntry { + sha?: string; + message?: string; + author?: string; + date?: string | Date; + [key: string]: unknown; +} + +export interface KloFileStorePort { + writeFile( + path: string, + content: string, + author: string, + authorEmail: string, + commitMessage: string, + options?: { skipLock?: boolean }, + ): Promise; + readFile(path: string): Promise; + deleteFile( + path: string, + author: string, + authorEmail: string, + commitMessage: string, + options?: { skipLock?: boolean }, + ): Promise; + listFiles(path: string, recursive?: boolean): Promise; + getFileHistory(path: string): Promise; + forWorktree(workdir: string): TSelf; +} diff --git a/packages/context/src/core/git-env.ts b/packages/context/src/core/git-env.ts new file mode 100644 index 00000000..7952d9c2 --- /dev/null +++ b/packages/context/src/core/git-env.ts @@ -0,0 +1,29 @@ +import { simpleGit, type SimpleGit } from 'simple-git'; + +const GIT_HOOK_ENV_KEYS = [ + 'GIT_ALTERNATE_OBJECT_DIRECTORIES', + 'GIT_DIR', + 'GIT_INDEX_FILE', + 'GIT_OBJECT_DIRECTORY', + 'GIT_PREFIX', + 'GIT_QUARANTINE_PATH', + 'GIT_WORK_TREE', + 'GIT_EDITOR', + 'GIT_EXEC_PATH', + 'GIT_PAGER', + 'PAGER', + 'VISUAL', + 'EDITOR', +] as const; + +function sanitizedGitEnv(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv { + const sanitized = { ...env }; + for (const key of GIT_HOOK_ENV_KEYS) { + delete sanitized[key]; + } + return sanitized; +} + +export function createSimpleGit(baseDir: string): SimpleGit { + return simpleGit({ baseDir }).env(sanitizedGitEnv()); +} diff --git a/packages/context/src/core/git.service.assert-worktree-clean.test.ts b/packages/context/src/core/git.service.assert-worktree-clean.test.ts new file mode 100644 index 00000000..9567e408 --- /dev/null +++ b/packages/context/src/core/git.service.assert-worktree-clean.test.ts @@ -0,0 +1,75 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { SimpleGit } from 'simple-git'; +import type { KloCoreConfig } from './config.js'; +import { createSimpleGit } from './git-env.js'; +import { GitService } from './git.service.js'; + +describe('GitService.assertWorktreeClean', () => { + let workdir: string; + let git: SimpleGit; + let gitService: GitService; + + beforeEach(async () => { + workdir = await mkdtemp(join(tmpdir(), 'gitsvc-clean-')); + git = createSimpleGit(workdir); + await git.init(); + await git.addConfig('user.email', 't@test'); + await git.addConfig('user.name', 'Test'); + await writeFile(join(workdir, 'init'), 'init'); + await git.add('.'); + await git.commit('init'); + const coreConfig: KloCoreConfig = { + storage: { configDir: workdir, homeDir: workdir }, + git: { userName: 'Test', userEmail: 't@test' }, + }; + gitService = new GitService(coreConfig); + (gitService as any).git = git; + (gitService as any).configDir = workdir; + }); + + afterEach(async () => rm(workdir, { recursive: true, force: true })); + + it('does not throw on a clean worktree', async () => { + await expect(gitService.assertWorktreeClean()).resolves.toBeUndefined(); + }); + + it('throws when MERGE_HEAD exists', async () => { + await writeFile(join(workdir, '.git', 'MERGE_HEAD'), 'deadbeef\n'); + await expect(gitService.assertWorktreeClean()).rejects.toThrow(/MERGE_HEAD/); + }); + + it('throws when CHERRY_PICK_HEAD exists', async () => { + await writeFile(join(workdir, '.git', 'CHERRY_PICK_HEAD'), 'deadbeef\n'); + await expect(gitService.assertWorktreeClean()).rejects.toThrow(/CHERRY_PICK_HEAD/); + }); + + it('throws when REVERT_HEAD exists', async () => { + await writeFile(join(workdir, '.git', 'REVERT_HEAD'), 'deadbeef\n'); + await expect(gitService.assertWorktreeClean()).rejects.toThrow(/REVERT_HEAD/); + }); + + it('throws when sequencer/todo exists (interrupted multi-commit revert/cherry-pick)', async () => { + await mkdir(join(workdir, '.git', 'sequencer'), { recursive: true }); + await writeFile(join(workdir, '.git', 'sequencer', 'todo'), 'pick deadbeef foo\n'); + await expect(gitService.assertWorktreeClean()).rejects.toThrow(/sequencer/); + }); + + it('throws when the index has unmerged paths', async () => { + await git.checkoutLocalBranch('a'); + await writeFile(join(workdir, 'shared'), 'A version'); + await git.add('.'); + await git.commit('a'); + await git.checkout('master').catch(() => git.checkout('main')); + await git.checkoutLocalBranch('b'); + await writeFile(join(workdir, 'shared'), 'B version'); + await git.add('.'); + await git.commit('b'); + + await git.raw(['merge', 'a']).catch(() => undefined); + + await expect(gitService.assertWorktreeClean()).rejects.toThrow(); + }); +}); diff --git a/packages/context/src/core/git.service.delete-directories.test.ts b/packages/context/src/core/git.service.delete-directories.test.ts new file mode 100644 index 00000000..1eb5ac99 --- /dev/null +++ b/packages/context/src/core/git.service.delete-directories.test.ts @@ -0,0 +1,78 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { mkdir, mkdtemp, readdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { SimpleGit } from 'simple-git'; +import type { KloCoreConfig } from './config.js'; +import { createSimpleGit } from './git-env.js'; +import { GitService } from './git.service.js'; + +describe('GitService.deleteDirectories', () => { + let workdir: string; + let git: SimpleGit; + let gitService: GitService; + + beforeEach(async () => { + workdir = await mkdtemp(join(tmpdir(), 'gitsvc-dd-')); + git = createSimpleGit(workdir); + await git.init(); + await git.addConfig('user.email', 't@test'); + await git.addConfig('user.name', 'Test'); + await writeFile(join(workdir, 'keep'), 'k'); + await git.add('.'); + await git.commit('init'); + + const coreConfig: KloCoreConfig = { + storage: { configDir: workdir, homeDir: workdir }, + git: { userName: 'Test', userEmail: 't@test' }, + }; + gitService = new GitService(coreConfig); + (gitService as any).git = git; + (gitService as any).configDir = workdir; + }); + + afterEach(async () => rm(workdir, { recursive: true, force: true })); + + it('removes multiple directories in a single commit', async () => { + for (const name of ['a', 'b', 'c']) { + await mkdir(join(workdir, name), { recursive: true }); + await writeFile(join(workdir, name, 'f.txt'), name); + } + await git.add('.'); + await git.commit('seed 3 dirs'); + const beforeCommits = (await git.log()).total; + + const result = await gitService.deleteDirectories(['a', 'b'], 'gc: drop a+b', 'System User', 'system@example.com'); + expect(result.commitHash).toBeTruthy(); + + const entries = await readdir(workdir); + expect(entries).not.toContain('a'); + expect(entries).not.toContain('b'); + expect(entries).toContain('c'); + + const afterCommits = (await git.log()).total; + expect(afterCommits).toBe(beforeCommits + 1); + }); + + it('no-ops and returns a null hash when the input list is empty', async () => { + const result = await gitService.deleteDirectories([], 'empty', 'X', 'x@example.com'); + expect(result.commitHash).toBe(''); + expect(result.created).toBe(false); + }); + + it('ignores paths that have already been deleted — commits only the remaining ones', async () => { + await mkdir(join(workdir, 'stale'), { recursive: true }); + await writeFile(join(workdir, 'stale', 'x'), 'x'); + await git.add('.'); + await git.commit('seed stale'); + const result = await gitService.deleteDirectories( + ['stale', 'missing'], + 'gc: drop stale + missing', + 'System User', + 'system@example.com', + ); + expect(result.commitHash).toBeTruthy(); + const entries = await readdir(workdir); + expect(entries).not.toContain('stale'); + }); +}); diff --git a/packages/context/src/core/git.service.reset-hard.test.ts b/packages/context/src/core/git.service.reset-hard.test.ts new file mode 100644 index 00000000..aa256519 --- /dev/null +++ b/packages/context/src/core/git.service.reset-hard.test.ts @@ -0,0 +1,56 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { SimpleGit } from 'simple-git'; +import type { KloCoreConfig } from './config.js'; +import { createSimpleGit } from './git-env.js'; +import { GitService } from './git.service.js'; + +describe('GitService.resetHardTo', () => { + let workdir: string; + let git: SimpleGit; + let gitService: GitService; + + beforeEach(async () => { + workdir = await mkdtemp(join(tmpdir(), 'gitsvc-reset-')); + git = createSimpleGit(workdir); + await git.init(); + await git.addConfig('user.email', 't@test'); + await git.addConfig('user.name', 'Test'); + await writeFile(join(workdir, 'init'), 'init'); + await git.add('.'); + await git.commit('init'); + const coreConfig: KloCoreConfig = { + storage: { configDir: workdir, homeDir: workdir }, + git: { userName: 'Test', userEmail: 't@test' }, + }; + gitService = new GitService(coreConfig); + (gitService as any).git = git; + (gitService as any).configDir = workdir; + }); + + afterEach(async () => rm(workdir, { recursive: true, force: true })); + + it('rewinds HEAD to the target SHA, removing later commits and their files', async () => { + const baseSha = (await git.revparse(['HEAD'])).trim(); + await writeFile(join(workdir, 'a'), 'a1'); + await git.add('.'); + await git.commit('a'); + await writeFile(join(workdir, 'b'), 'b1'); + await git.add('.'); + await git.commit('b'); + + await gitService.resetHardTo(baseSha); + + expect((await git.revparse(['HEAD'])).trim()).toBe(baseSha); + expect(await readFile(join(workdir, 'a'), 'utf-8').catch(() => null)).toBeNull(); + expect(await readFile(join(workdir, 'b'), 'utf-8').catch(() => null)).toBeNull(); + }); + + it('is a no-op when target SHA equals current HEAD', async () => { + const sha = (await git.revparse(['HEAD'])).trim(); + await gitService.resetHardTo(sha); + expect((await git.revparse(['HEAD'])).trim()).toBe(sha); + }); +}); diff --git a/packages/context/src/core/git.service.test.ts b/packages/context/src/core/git.service.test.ts new file mode 100644 index 00000000..c4668a1a --- /dev/null +++ b/packages/context/src/core/git.service.test.ts @@ -0,0 +1,358 @@ +import { mkdtemp, realpath, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { KloCoreConfig } from './config.js'; +import { GitService } from './git.service.js'; + +// These tests drive a real git repo inside a temp directory — simple-git shells out to the +// system `git` binary. They are fast enough to run as unit tests and catch real issues that +// would be invisible with mocked git. +describe('GitService', () => { + let service: GitService; + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'git-service-spec-')); + + const coreConfig: KloCoreConfig = { + storage: { configDir: tempDir, homeDir: tempDir }, + git: { + userName: 'Test User', + userEmail: 'test@example.com', + bootstrapMessage: 'Initialize test config repo', + bootstrapAuthor: 'test-system', + bootstrapAuthorEmail: 'system@example.com', + }, + }; + + service = new GitService(coreConfig); + await service.onModuleInit(); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + const writeAndCommit = async (filePath: string, content: string, message = 'msg') => { + await writeFile(join(tempDir, filePath), content, 'utf-8'); + return service.commitFile(filePath, message, 'Test', 'test@example.com'); + }; + + describe('cold-start bootstrap commit', () => { + it('writes an empty commit on init so HEAD always resolves', async () => { + // beforeEach already ran onModuleInit() against an empty temp dir. + const head = await service.revParseHead(); + expect(head).toMatch(/^[0-9a-f]{40}$/); + }); + + it('does not double-commit when re-initialized', async () => { + const before = await service.revParseHead(); + await service.onModuleInit(); + const after = await service.revParseHead(); + expect(after).toBe(before); + }); + }); + + describe('commitFile `created` flag', () => { + it('is true for a real commit', async () => { + const info = await writeAndCommit('a.md', '# Hello'); + expect(info.created).toBe(true); + }); + + it('is false on a no-op write (content unchanged)', async () => { + await writeAndCommit('a.md', '# Hello'); + const second = await writeAndCommit('a.md', '# Hello', 'unused'); + expect(second.created).toBe(false); + }); + }); + + describe('addNote / getNote', () => { + it('attaches a note and reads it back', async () => { + const info = await writeAndCommit('a.md', '# Hello'); + await service.addNote(info.commitHash, 'Rich message from LLM'); + expect(await service.getNote(info.commitHash)).toBe('Rich message from LLM'); + }); + + it('returns undefined when no note exists', async () => { + const info = await writeAndCommit('a.md', '# Hello'); + expect(await service.getNote(info.commitHash)).toBeUndefined(); + }); + + it('overwrites an existing note (idempotent retries)', async () => { + const info = await writeAndCommit('a.md', '# Hello'); + await service.addNote(info.commitHash, 'First'); + await service.addNote(info.commitHash, 'Second'); + expect(await service.getNote(info.commitHash)).toBe('Second'); + }); + + it('skips empty/whitespace messages silently', async () => { + const info = await writeAndCommit('a.md', '# Hello'); + await service.addNote(info.commitHash, ' '); + expect(await service.getNote(info.commitHash)).toBeUndefined(); + }); + }); + + describe('getFileHistory', () => { + it('surfaces enhancedMessage when a note is present', async () => { + const info = await writeAndCommit('a.md', '# Hello'); + await service.addNote(info.commitHash, 'Note body'); + + const history = await service.getFileHistory('a.md'); + expect(history[0]?.enhancedMessage).toBe('Note body'); + }); + + it('leaves enhancedMessage undefined when no note is attached', async () => { + await writeAndCommit('a.md', '# Hello'); + const history = await service.getFileHistory('a.md'); + expect(history[0]?.enhancedMessage).toBeUndefined(); + }); + }); + + describe('getCommitDiff', () => { + it('returns the patch scoped to the requested path', async () => { + const info = await writeAndCommit('a.md', '# Hello'); + const diff = await service.getCommitDiff(info.commitHash, 'a.md'); + expect(diff).toContain('diff --git'); + expect(diff).toContain('Hello'); + }); + + it('handles the repository initial commit without throwing', async () => { + const info = await writeAndCommit('first.md', 'first'); + await expect(service.getCommitDiff(info.commitHash, 'first.md')).resolves.toBeDefined(); + }); + }); + + describe('squashTo', () => { + const writeAsSystem = async (filePath: string, content: string, message = 'msg') => { + await writeFile(join(tempDir, filePath), content, 'utf-8'); + return service.commitFile(filePath, message, 'System User', 'system@example.com'); + }; + + it('collapses 3 commits after preHead into a single commit', async () => { + const pre = await writeAsSystem('a.md', 'v1'); + const preHead = pre.commitHash; + + await writeAsSystem('b.md', 'b', 'add b'); + await writeAsSystem('c.md', 'c', 'add c'); + await writeAsSystem('a.md', 'v2', 'update a'); + + const result = await service.squashTo(preHead, { + message: 'Ingest: bundle 3 writes', + author: 'System User', + authorEmail: 'system@example.com', + }); + + expect(result.squashed).toBe(true); + expect(result.squashedCount).toBe(3); + expect(result.commitHash).toBeTruthy(); + expect(result.commitHash).not.toBe(preHead); + const commitHash = result.commitHash; + if (!commitHash) { + throw new Error('Expected squash commit hash'); + } + + // The squashed commit should preserve the final tree state. + const fileAtSquash = await service.getFileAtCommit('a.md', commitHash); + expect(fileAtSquash).toBe('v2'); + const bAtSquash = await service.getFileAtCommit('b.md', commitHash); + expect(bAtSquash).toBe('b'); + }); + + it('is a no-op when preHead equals HEAD', async () => { + const pre = await writeAsSystem('a.md', 'v1'); + + const result = await service.squashTo(pre.commitHash, { + message: 'nothing to squash', + author: 'System User', + authorEmail: 'system@example.com', + }); + + expect(result.squashed).toBe(false); + expect(result.commitHash).toBe(pre.commitHash); + }); + + it('skips squash when a foreign-author commit sits between preHead and HEAD', async () => { + const pre = await writeAsSystem('a.md', 'v1'); + const preHead = pre.commitHash; + + await writeAsSystem('b.md', 'from us', 'ours'); + // Foreign commit + await writeAndCommit('c.md', 'from someone else', 'foreign'); + await writeAsSystem('d.md', 'ours again', 'ours 2'); + + const result = await service.squashTo(preHead, { + message: 'should be skipped', + author: 'System User', + authorEmail: 'system@example.com', + }); + + expect(result.squashed).toBe(false); + expect(result.reason).toContain('foreign'); + expect(result.squashedCount).toBe(3); + }); + + it('returns cleanly when preHead is empty (no starting commit)', async () => { + const result = await service.squashTo('', { + message: 'would have squashed', + author: 'System User', + authorEmail: 'system@example.com', + }); + + expect(result.squashed).toBe(false); + expect(result.commitHash).toBeNull(); + }); + }); + + describe('worktree lifecycle', () => { + // macOS canonicalizes tmp paths (/var/folders → /private/var/folders) when git + // returns them from `worktree list`. Resolve through realpath() before comparing. + const canonicalSiblingPath = async (suffix: string): Promise => { + const parent = await realpath(join(tempDir, '..')); + return join(parent, `wt-${Date.now()}-${suffix}`); + }; + + it('addWorktree creates a branch + directory at the given startSha', async () => { + const { commitHash } = await writeAndCommit('seed.md', 'seed'); + const wtDir = await canonicalSiblingPath('add'); + await service.addWorktree(wtDir, 'session/alpha', commitHash); + const list = await service.listWorktrees(); + expect(list.find((e) => e.path === wtDir && e.branch === 'refs/heads/session/alpha')).toBeTruthy(); + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); + + it('removeWorktree detaches the worktree entry', async () => { + const { commitHash } = await writeAndCommit('seed.md', 'seed'); + const wtDir = await canonicalSiblingPath('rm'); + await service.addWorktree(wtDir, 'session/beta', commitHash); + await service.removeWorktree(wtDir); + const list = await service.listWorktrees(); + expect(list.find((e) => e.path === wtDir)).toBeFalsy(); + }); + + it('deleteBranch removes a branch ref', async () => { + const { commitHash } = await writeAndCommit('seed.md', 'seed'); + const wtDir = await canonicalSiblingPath('br'); + await service.addWorktree(wtDir, 'session/gamma', commitHash); + await service.removeWorktree(wtDir); + await service.deleteBranch('session/gamma', true); + const branches = await (service as unknown as { git: import('simple-git').SimpleGit }).git.branchLocal(); + expect(branches.all).not.toContain('session/gamma'); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); + }); + + describe('forWorktree', () => { + it('returns a GitService whose operations run inside the given worktree', async () => { + const { commitHash } = await writeAndCommit('seed.md', 'seed'); + const parent = await realpath(join(tempDir, '..')); + const wtDir = join(parent, `wt-${Date.now()}-fw`); + await service.addWorktree(wtDir, 'session/delta', commitHash); + + const scoped = service.forWorktree(wtDir); + expect(await scoped.revParseHead()).toBe(commitHash); + + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); + }); + + describe('squashMergeIntoMain', () => { + it('merges a session branch as one commit on main, returning the new SHA + touched paths', async () => { + const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed'); + const parent = await realpath(join(tempDir, '..')); + const wtDir = join(parent, `wt-${Date.now()}-sm`); + await service.addWorktree(wtDir, 'session/happy', baseSha); + + const scoped = service.forWorktree(wtDir); + await writeFile(join(wtDir, 'a.yaml'), 'one: 1\n', 'utf-8'); + await scoped.commitFile('a.yaml', 'wip a', 'System User', 'system@example.com'); + await writeFile(join(wtDir, 'b.yaml'), 'two: 2\n', 'utf-8'); + await scoped.commitFile('b.yaml', 'wip b', 'System User', 'system@example.com'); + + const result = await service.squashMergeIntoMain( + 'session/happy', + 'System User', + 'system@example.com', + 'Memory capture: 2 files [chat=abcd1234]', + ); + + expect(result.ok).toBe(true); + if (!result.ok) { + throw new Error('unreachable'); + } + expect(result.squashSha).toMatch(/^[0-9a-f]{40}$/); + expect(result.touchedPaths.sort()).toEqual(['a.yaml', 'b.yaml']); + + const mainHead = await service.revParseHead(); + expect(mainHead).toBe(result.squashSha); + expect(mainHead).not.toBe(baseSha); + + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); + + it('returns ok with empty touchedPaths when the session branch has no diff vs main', async () => { + const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed'); + const parent = await realpath(join(tempDir, '..')); + const wtDir = join(parent, `wt-${Date.now()}-sm-empty`); + await service.addWorktree(wtDir, 'session/empty', baseSha); + + const result = await service.squashMergeIntoMain( + 'session/empty', + 'System User', + 'system@example.com', + 'should be a no-op', + ); + + expect(result.ok).toBe(true); + if (!result.ok) { + throw new Error('unreachable'); + } + expect(result.touchedPaths).toEqual([]); + expect(result.squashSha).toBe(baseSha); + + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); + + it('returns conflict=true and leaves main clean when session+main touched same file differently', async () => { + await writeAndCommit('shared.yaml', 'base\n'); + const base = await service.revParseHead(); + if (!base) { + throw new Error('no base head'); + } + + const parent = await realpath(join(tempDir, '..')); + const wtDir = join(parent, `wt-${Date.now()}-conf`); + await service.addWorktree(wtDir, 'session/conf', base); + const scoped = service.forWorktree(wtDir); + await writeFile(join(wtDir, 'shared.yaml'), 'session-edit\n', 'utf-8'); + await scoped.commitFile('shared.yaml', 'session edit', 'System User', 'system@example.com'); + + // Main edits the same file a different way, after the session branched. + await writeAndCommit('shared.yaml', 'main-edit\n'); + + const result = await service.squashMergeIntoMain( + 'session/conf', + 'System User', + 'system@example.com', + 'Memory capture: 1 file [chat=dead1234]', + ); + + expect(result.ok).toBe(false); + if (result.ok) { + throw new Error('unreachable'); + } + expect(result.conflict).toBe(true); + expect(result.conflictPaths).toContain('shared.yaml'); + + const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status(); + expect(status.isClean()).toBe(true); + + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); + }); +}); diff --git a/packages/context/src/core/git.service.ts b/packages/context/src/core/git.service.ts new file mode 100644 index 00000000..16388969 --- /dev/null +++ b/packages/context/src/core/git.service.ts @@ -0,0 +1,855 @@ +import { promises as fs } from 'node:fs'; +import { join } from 'node:path'; +import type { SimpleGit } from 'simple-git'; +import { noopLogger, resolveConfigDir, type KloCoreConfig, type KloLogger } from './config.js'; +import { createSimpleGit } from './git-env.js'; + +export interface GitCommitInfo { + commitHash: string; + shortHash: string; + message: string; + author: string; + authorEmail: string; + timestamp: string; + committedDate: string; + /** + * True if this call produced a new commit. False when the file was already up-to-date + * and the returned info describes the pre-existing HEAD commit (no-op write). + */ + created: boolean; + /** Async LLM-generated commit summary attached as a git note. Undefined if no note present. */ + enhancedMessage?: string; +} + +export interface WorktreeEntry { + path: string; + branch: string | null; + head: string | null; +} + +export type SquashMergeResult = + | { ok: true; squashSha: string; touchedPaths: string[] } + | { ok: false; conflict: true; conflictPaths: string[] }; + +export class GitService { + private readonly logger: KloLogger; + private git!: SimpleGit; + private configDir: string; + + constructor( + private readonly config: KloCoreConfig, + logger?: KloLogger, + ) { + this.logger = logger ?? noopLogger; + this.configDir = resolveConfigDir(config); + } + + async onModuleInit(): Promise { + // Ensure config directory exists + await fs.mkdir(this.configDir, { recursive: true }); + this.logger.log(`Config directory ensured at: ${this.configDir}`); + + // Initialize simple-git + this.git = createSimpleGit(this.configDir); + + // Initialize git repository + await this.initialize(); + } + + private async initialize(): Promise { + try { + // Check if already initialized + const isRepo = await this.git.checkIsRepo(); + + if (!isRepo) { + await this.git.init(); + const gitConfig = this.config.git; + await this.git.addConfig('user.name', gitConfig.userName); + await this.git.addConfig('user.email', gitConfig.userEmail); + this.logger.log('Initialized git repository'); + } + + // Ensure HEAD always resolves to a commit so callers (e.g., the memory-agent squash flow) + // can rely on `revParseHead()` returning a SHA. Idempotent: skip if HEAD already exists. + const head = await this.revParseHead(); + if (!head) { + await this.git.commit(this.config.git.bootstrapMessage ?? 'Initialize klo project repository', { + '--allow-empty': null, + '--author': `${this.config.git.bootstrapAuthor ?? 'klo system'} <${ + this.config.git.bootstrapAuthorEmail ?? 'system@klo.local' + }>`, + }); + this.logger.log('Wrote bootstrap commit to config repo'); + } + } catch (error) { + this.logger.error('Failed to initialize git repository', error); + throw new Error('Failed to initialize git repository'); + } + } + + async commitFile( + filePath: string, + commitMessage: string, + author: string, + authorEmail: string, + ): Promise { + try { + // Stage the file + await this.git.add(filePath); + + // Check if there are any staged changes to commit + const stagedChanges = await this.git.diff(['--cached', '--name-only']); + + if (!stagedChanges.trim()) { + // No changes to commit, file already matches what's in git + this.logger.debug(`No changes to commit for ${filePath}, file already up to date`); + + // Return info about the current HEAD commit + const log = await this.git.log({ maxCount: 1 }); + const commit = log.latest; + + if (!commit) { + throw new Error('Failed to retrieve commit details'); + } + + return { + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: false, + }; + } + + // There are changes to commit + const result = await this.git.commit(commitMessage, { + '--author': `${author} <${authorEmail}>`, + }); + + if (!result.commit) { + throw new Error('No commit hash returned'); + } + + // Get commit details + const log = await this.git.log({ maxCount: 1 }); + const commit = log.latest; + + if (!commit) { + throw new Error('Failed to retrieve commit details'); + } + + return { + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: true, + }; + } catch (error) { + this.logger.error(`Failed to commit file ${filePath}`, error); + throw new Error(`Failed to commit file: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Stage multiple files and produce a single commit. Mirrors `commitFile` but batches + * N paths into one atomic commit — used by the SL capture agent to commit all edits at once. + */ + async commitFiles( + filePaths: string[], + commitMessage: string, + author: string, + authorEmail: string, + ): Promise { + try { + for (const filePath of filePaths) { + await this.git.add(filePath); + } + + const stagedChanges = await this.git.diff(['--cached', '--name-only']); + + if (!stagedChanges.trim()) { + this.logger.debug(`No changes to commit for ${filePaths.length} file(s), already up to date`); + const log = await this.git.log({ maxCount: 1 }); + const commit = log.latest; + if (!commit) { + throw new Error('Failed to retrieve commit details'); + } + return { + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: false, + }; + } + + const result = await this.git.commit(commitMessage, { + '--author': `${author} <${authorEmail}>`, + }); + + if (!result.commit) { + throw new Error('No commit hash returned'); + } + + const log = await this.git.log({ maxCount: 1 }); + const commit = log.latest; + if (!commit) { + throw new Error('Failed to retrieve commit details'); + } + + return { + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: true, + }; + } catch (error) { + this.logger.error(`Failed to batch commit ${filePaths.length} file(s)`, error); + throw new Error(`Failed to batch commit: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Revert working-tree changes for the given paths (equivalent to `git checkout -- `). + * Used to roll back dirty files when validation fails. + */ + async checkoutFiles(filePaths: string[]): Promise { + if (filePaths.length === 0) { + return; + } + try { + await this.git.checkout(['--', ...filePaths]); + } catch (error) { + this.logger.warn( + `Failed to checkout ${filePaths.length} file(s): ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + /** + * Read the content of `filePath` as it existed at `commitHash`. Equivalent to + * `git show :`. Reads from git object storage, so it's safe against + * concurrent working-tree mutations. + */ + async getFileAtCommit(filePath: string, commitHash: string): Promise { + try { + return await this.git.show([`${commitHash}:${filePath}`]); + } catch (error) { + this.logger.error(`Failed to read ${filePath} at ${commitHash}`, error); + throw new Error(`Failed to read file at commit: ${error instanceof Error ? error.message : String(error)}`); + } + } + + async getFileHistory(filePath: string, limit: number = 50): Promise { + try { + const log = await this.git.log({ + file: filePath, + maxCount: limit, + }); + + // N+1 fetch of notes is fine here: capped at 100 commits, cold UI path. + return Promise.all( + log.all.map(async (commit) => ({ + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: true, + enhancedMessage: await this.getNote(commit.hash), + })), + ); + } catch (error) { + this.logger.error(`Failed to get history for ${filePath}`, error); + throw new Error(`Failed to retrieve file history: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Attach or overwrite an LLM-generated summary note on a commit. + * Uses `-f` so retries overwrite rather than fail on existing notes (idempotent). + * Callers are responsible for holding `config:repo` Redlock — notes writes mutate + * `.git/refs/notes/commits` and must serialize with commits. + */ + async addNote(commitHash: string, message: string): Promise { + const trimmed = message.trim(); + if (!trimmed) { + return; + } + try { + await this.git.raw(['notes', 'add', '-f', '-m', trimmed, commitHash]); + } catch (error) { + this.logger.error(`Failed to attach note to ${commitHash}`, error); + throw new Error(`Failed to attach git note: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Read the LLM-generated note for a commit, or undefined if none present. + * Swallows `simple-git`'s "no note found" error so callers can treat it as optional. + */ + async getNote(commitHash: string): Promise { + try { + const note = await this.git.raw(['notes', 'show', commitHash]); + const trimmed = note.trim(); + return trimmed ? trimmed : undefined; + } catch { + // `git notes show` exits non-zero when no note exists — treat as "no note". + return undefined; + } + } + + /** + * Return the patch for a commit, optionally scoped to a single path. + * Strips the commit header above the first `diff --git` so only the patch body remains, + * and clips to 12 KB to bound LLM token cost. Returns '' if the commit changed nothing + * on the requested path (e.g. a commit that only touched other files). + */ + async getCommitDiff(commitHash: string, path?: string): Promise { + const args = ['show', '--format=', '--no-color', '--patch', commitHash]; + if (path) { + args.push('--', path); + } + try { + const raw = await this.git.raw(args); + const diffStart = raw.indexOf('diff --git'); + const body = diffStart >= 0 ? raw.slice(diffStart) : raw.trim(); + const MAX_DIFF_BYTES = 12_000; + return body.length > MAX_DIFF_BYTES ? `${body.slice(0, MAX_DIFF_BYTES)}\n… [diff truncated]` : body; + } catch (error) { + this.logger.error(`Failed to read diff for ${commitHash}`, error); + throw new Error(`Failed to read commit diff: ${error instanceof Error ? error.message : String(error)}`); + } + } + + async deleteFile( + filePath: string, + commitMessage: string, + author: string, + authorEmail: string, + ): Promise { + try { + // Remove the file from git + await this.git.rm(filePath); + + // Commit the deletion + const result = await this.git.commit(commitMessage, { + '--author': `${author} <${authorEmail}>`, + }); + + if (!result.commit) { + throw new Error('No commit hash returned'); + } + + // Get commit details + const log = await this.git.log({ maxCount: 1 }); + const commit = log.latest; + + if (!commit) { + throw new Error('Failed to retrieve commit details'); + } + + return { + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: true, + }; + } catch (error) { + this.logger.error(`Failed to delete file ${filePath}`, error); + throw new Error(`Failed to delete file: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Resolve HEAD to a full commit SHA. Returns the empty string if the repo has no commits yet + * (a freshly-init'd repo before any writes), so callers can treat that as "nothing to reconcile". + */ + async revParseHead(): Promise { + try { + const sha = await this.git.revparse(['HEAD']); + return sha.trim(); + } catch { + return ''; + } + } + + /** + * Verify a commit object exists in the local repo. Used by the reconciler to detect + * the "history was rewritten / partial clone" case before attempting `git diff $sha..HEAD`. + */ + async commitExists(commitHash: string): Promise { + if (!commitHash) { + return false; + } + try { + await this.git.raw(['cat-file', '-e', `${commitHash}^{commit}`]); + return true; + } catch { + return false; + } + } + + /** + * `git diff --name-status $from..$to -- $pathSpec`. Returns one entry per changed path. + * Renames (`R{score}\told\tnew`) are split into a `D` for the old path plus an `A` for + * the new — the reconciler treats each path independently and the new path's row will + * upsert with whatever content the file actually has. + */ + async diffNameStatus( + from: string, + to: string, + pathSpec?: string, + ): Promise> { + const args = ['diff', '--name-status', '-z', `${from}..${to}`]; + if (pathSpec) { + args.push('--', pathSpec); + } + const raw = await this.git.raw(args); + if (!raw) { + return []; + } + // -z output: NUL-separated fields. For A/M/D: "\0\0". For R/C: "\0\0\0". + const fields = raw.split('\0').filter((f) => f.length > 0); + const out: Array<{ status: 'A' | 'M' | 'D'; path: string }> = []; + let i = 0; + while (i < fields.length) { + const status = fields[i]; + const code = status[0]; + if (code === 'R' || code === 'C') { + const oldPath = fields[i + 1]; + const newPath = fields[i + 2]; + out.push({ status: 'D', path: oldPath }); + out.push({ status: 'A', path: newPath }); + i += 3; + } else if (code === 'A' || code === 'M' || code === 'D') { + out.push({ status: code, path: fields[i + 1] }); + i += 2; + } else { + // Unknown status (T type-change, U unmerged, X unknown) — treat as modify, skip if no path + if (fields[i + 1]) { + out.push({ status: 'M', path: fields[i + 1] }); + } + i += 2; + } + } + return out; + } + + /** + * List all paths under the working tree that match `pathSpec`, scoped to HEAD. + * Used for the reconciler's first-ever run when there's no watermark to diff from. + */ + async listFilesAtHead(pathSpec: string): Promise { + try { + const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', 'HEAD', '--', pathSpec]); + if (!raw) { + return []; + } + return raw.split('\0').filter((f) => f.length > 0); + } catch { + return []; + } + } + + /** + * Collapse all commits between `preHead` and current HEAD into a single commit with the given + * message. Used by the memory agent to squash N per-tool-call commits into one ingest commit. + * + * Author-check guard: if any commit between preHead..HEAD has an author other than + * `expectedAuthor`, skips the squash and returns `{ squashed: false, reason: ... }`. This + * prevents accidentally collapsing another writer's commits if writes interleaved with ours. + * + * Caller is responsible for holding the `config:repo` lock so writes and squash serialize. + */ + async squashTo( + preHead: string, + options: { message: string; author: string; authorEmail: string; expectedAuthor?: string }, + ): Promise<{ squashed: boolean; commitHash: string | null; reason?: string; squashedCount?: number }> { + const { message, author, authorEmail } = options; + const expectedAuthor = options.expectedAuthor ?? author; + + if (!preHead) { + return { squashed: false, commitHash: null, reason: 'no pre-head recorded (empty repo at start)' }; + } + + let currentHead: string; + try { + currentHead = (await this.git.revparse(['HEAD'])).trim(); + } catch { + return { squashed: false, commitHash: null, reason: 'no HEAD (repo is empty)' }; + } + + if (currentHead === preHead) { + return { squashed: false, commitHash: preHead, reason: 'no new commits' }; + } + + try { + const log = await this.git.log({ from: preHead, to: 'HEAD' }); + const commits = log.all; + if (commits.length === 0) { + return { squashed: false, commitHash: preHead, reason: 'no new commits' }; + } + const foreign = commits.find((c) => c.author_name !== expectedAuthor); + if (foreign) { + this.logger.warn( + `Skipping squash: commit ${foreign.hash.substring(0, 8)} authored by "${foreign.author_name}" ` + + `differs from expected "${expectedAuthor}". Leaving ${commits.length} commit(s) as-is.`, + ); + return { + squashed: false, + commitHash: currentHead, + reason: `foreign commit by ${foreign.author_name}`, + squashedCount: commits.length, + }; + } + + // Soft reset to preHead, then produce a single commit with all the staged changes. + await this.git.reset(['--soft', preHead]); + + const staged = await this.git.diff(['--cached', '--name-only']); + if (!staged.trim()) { + // All intervening commits cancelled each other out — return to preHead and commit nothing. + return { squashed: true, commitHash: preHead, reason: 'no net changes', squashedCount: commits.length }; + } + + await this.git.commit(message, { '--author': `${author} <${authorEmail}>` }); + const newHead = (await this.git.revparse(['HEAD'])).trim(); + this.logger.log( + `squashTo: collapsed ${commits.length} commit(s) into ${newHead.substring(0, 8)} (was ${currentHead.substring(0, 8)})`, + ); + return { squashed: true, commitHash: newHead, squashedCount: commits.length }; + } catch (error) { + this.logger.error('Failed to squash commits', error); + throw new Error(`Failed to squash commits: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Squash-merge `branch` into the currently-checked-out branch of THIS worktree (the + * main worktree, when called on the root GitService instance). Produces a single + * commit whose tree equals the source branch's tree, with the given message/author. + * Returns `{ ok: false, conflict: true, conflictPaths }` and leaves the main worktree + * clean if git reports merge conflicts. + * + * Caller must hold the `config:repo` lock so interactive writes don't race against the + * merge window. + */ + async squashMergeIntoMain( + branch: string, + author: string, + authorEmail: string, + commitMessage: string, + ): Promise { + // Diff of HEAD..branch (two dots) lists commits/files reachable from `branch` that + // aren't on HEAD — i.e. exactly what the squash would apply. Three dots (HEAD...branch) + // is symmetric difference and would mis-classify cases where main moved ahead. + const diff = await this.git.raw(['diff', '--name-only', `HEAD..${branch}`]); + const touchedPaths = diff + .split('\n') + .map((l) => l.trim()) + .filter(Boolean); + if (touchedPaths.length === 0) { + const head = (await this.git.revparse(['HEAD'])).trim(); + return { ok: true, squashSha: head, touchedPaths: [] }; + } + + // `git merge --squash` may NOT throw on a textual conflict — it stages the clean + // hunks and leaves conflicted paths unmerged in the index. simple-git may also + // throw if the underlying git exits non-zero. Handle both: try the merge, then + // independently inspect the index for unmerged paths before committing. + let mergeError: unknown = null; + try { + await this.git.raw(['merge', '--squash', branch]); + } catch (error) { + mergeError = error; + } + + const unmergedOut = await this.git.raw(['diff', '--name-only', '--diff-filter=U']).catch(() => ''); + const conflictPaths = unmergedOut + .split('\n') + .map((l) => l.trim()) + .filter(Boolean); + + if (conflictPaths.length > 0 || mergeError !== null) { + // `merge --abort` only works for an in-progress merge; squash sets MERGE_MSG but not + // MERGE_HEAD, so fall back to a hard reset which clears the index and worktree. + await this.git.raw(['merge', '--abort']).catch(() => undefined); + await this.git.raw(['reset', '--hard', 'HEAD']).catch(() => undefined); + this.logger.warn( + `squashMergeIntoMain: conflict merging ${branch} — aborted. conflictPaths=${conflictPaths.join(',')}` + + (mergeError ? ` error=${mergeError instanceof Error ? mergeError.message : String(mergeError)}` : ''), + ); + return { ok: false, conflict: true, conflictPaths }; + } + + await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` }); + const squashSha = (await this.git.revparse(['HEAD'])).trim(); + return { ok: true, squashSha, touchedPaths }; + } + + /** + * Rewinds the current branch's HEAD to `targetSha`, discarding all later commits and any + * uncommitted worktree changes. Used by Stage-3 to back out a failed work-unit's commits + * on the session worktree - simpler and more robust than `git revert` over a multi-commit + * range, which can pause the sequencer on conflicts. + */ + async resetHardTo(targetSha: string): Promise { + await this.git.raw(['reset', '--hard', targetSha]); + } + + /** + * Throws if the worktree is in a state that would make a downstream merge unsafe: an + * in-progress merge, rebase, cherry-pick, revert, interrupted sequencer operation, or + * unmerged paths in the index. + */ + async assertWorktreeClean(): Promise { + const inProgressMarkers: ReadonlyArray<{ relPath: string; label: string }> = [ + { relPath: 'MERGE_HEAD', label: 'MERGE_HEAD' }, + { relPath: 'REBASE_HEAD', label: 'REBASE_HEAD' }, + { relPath: 'CHERRY_PICK_HEAD', label: 'CHERRY_PICK_HEAD' }, + { relPath: 'REVERT_HEAD', label: 'REVERT_HEAD' }, + { relPath: 'sequencer/todo', label: 'sequencer (interrupted multi-commit op)' }, + ]; + + for (const { relPath, label } of inProgressMarkers) { + const gitPath = (await this.git.raw(['rev-parse', '--git-path', relPath])).trim(); + const fullPath = gitPath.startsWith('/') ? gitPath : join(this.configDir, gitPath); + if (await this.fileExists(fullPath)) { + throw new Error( + `Worktree has in-progress git operation (${label} present at ${fullPath}); refusing to proceed`, + ); + } + } + + const unmerged = (await this.git.raw(['diff', '--name-only', '--diff-filter=U']).catch(() => '')) + .split('\n') + .map((line) => line.trim()) + .filter(Boolean); + if (unmerged.length > 0) { + throw new Error( + `Worktree has ${unmerged.length} unmerged path(s): ${unmerged.slice(0, 5).join(', ')}; refusing to proceed`, + ); + } + } + + private async fileExists(path: string): Promise { + try { + await fs.access(path); + return true; + } catch { + return false; + } + } + + /** + * Create a new worktree at `path` with a new branch `branch` pointing at `startSha`. + * Used by the memory agent to isolate per-session writes from interactive saves on main. + */ + async addWorktree(path: string, branch: string, startSha: string): Promise { + try { + await this.git.raw(['worktree', 'add', '-b', branch, path, startSha]); + } catch (error) { + throw new Error(`Failed to add worktree at ${path}: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Remove the worktree entry and its on-disk directory. Uses `--force` because session + * worktrees are klo-internal — a clean working tree is not required. + */ + async removeWorktree(path: string): Promise { + try { + await this.git.raw(['worktree', 'remove', '--force', path]); + } catch (error) { + this.logger.warn( + `removeWorktree failed for ${path}: ${error instanceof Error ? error.message : String(error)} — attempting prune`, + ); + await this.git.raw(['worktree', 'prune']).catch(() => undefined); + } + } + + /** + * List all worktrees attached to this repo, parsed from `worktree list --porcelain`. + * The main worktree is included. + */ + async listWorktrees(): Promise { + const out = await this.git.raw(['worktree', 'list', '--porcelain']); + const entries: WorktreeEntry[] = []; + let current: Partial = {}; + for (const line of out.split('\n')) { + if (line.startsWith('worktree ')) { + if (current.path) { + entries.push({ + path: current.path, + branch: current.branch ?? null, + head: current.head ?? null, + }); + } + current = { path: line.slice('worktree '.length) }; + } else if (line.startsWith('HEAD ')) { + current.head = line.slice('HEAD '.length); + } else if (line.startsWith('branch ')) { + current.branch = line.slice('branch '.length); + } + } + if (current.path) { + entries.push({ + path: current.path, + branch: current.branch ?? null, + head: current.head ?? null, + }); + } + return entries; + } + + async deleteBranch(branch: string, force = false): Promise { + await this.git.raw(['branch', force ? '-D' : '-d', branch]); + } + + /** + * Lightweight factory returning a GitService instance whose simple-git client is scoped + * to `workdir`. Used by memory-agent session worktrees. The returned instance shares + * config and the logger with the parent; it does NOT run `onModuleInit` + * (the main instance has already initialized the repo). + */ + forWorktree(workdir: string): GitService { + const scoped = new GitService(this.config, this.logger); + scoped.git = createSimpleGit(workdir); + scoped.configDir = workdir; + return scoped; + } + + async deleteDirectory( + directoryPath: string, + commitMessage: string, + author: string, + authorEmail: string, + ): Promise { + try { + // Remove the directory recursively from git + await this.git.rm(['-r', directoryPath]); + + // Commit the deletion + const result = await this.git.commit(commitMessage, { + '--author': `${author} <${authorEmail}>`, + }); + + if (!result.commit) { + throw new Error('No commit hash returned'); + } + + // Get commit details + const log = await this.git.log({ maxCount: 1 }); + const commit = log.latest; + + if (!commit) { + throw new Error('Failed to retrieve commit details'); + } + + return { + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: true, + }; + } catch (error) { + this.logger.error(`Failed to delete directory ${directoryPath}`, error); + throw new Error(`Failed to delete directory: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Remove multiple directories recursively and commit them as one change. + * Paths that don't exist in the working tree are skipped silently (useful for GC + * where the DB-known path has already been evicted by a previous run). + * Returns a GitCommitInfo with created=false and an empty commitHash when no + * paths were actually removed. + */ + async deleteDirectories( + directoryPaths: string[], + commitMessage: string, + author: string, + authorEmail: string, + ): Promise { + if (directoryPaths.length === 0) { + return { + commitHash: '', + shortHash: '', + message: commitMessage, + author, + authorEmail, + timestamp: new Date().toISOString(), + committedDate: new Date().toISOString(), + created: false, + }; + } + const removed: string[] = []; + for (const path of directoryPaths) { + try { + await this.git.rm(['-r', path]); + removed.push(path); + } catch (error) { + this.logger.warn( + `deleteDirectories: skipping ${path}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + if (removed.length === 0) { + return { + commitHash: '', + shortHash: '', + message: commitMessage, + author, + authorEmail, + timestamp: new Date().toISOString(), + committedDate: new Date().toISOString(), + created: false, + }; + } + + const result = await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` }); + if (!result.commit) { + throw new Error('No commit hash returned from deleteDirectories'); + } + const log = await this.git.log({ maxCount: 1 }); + const commit = log.latest; + if (!commit) { + throw new Error('Failed to retrieve commit details after deleteDirectories'); + } + return { + commitHash: commit.hash, + shortHash: commit.hash.substring(0, 8), + message: commit.message, + author: commit.author_name, + authorEmail: commit.author_email, + timestamp: commit.date, + committedDate: new Date(commit.date).toISOString(), + created: true, + }; + } +} diff --git a/packages/context/src/core/index.ts b/packages/context/src/core/index.ts new file mode 100644 index 00000000..77f68e4a --- /dev/null +++ b/packages/context/src/core/index.ts @@ -0,0 +1,27 @@ +export type { KloCoreConfig, KloGitConfig, KloLogger, KloStorageConfig } from './config.js'; +export { noopLogger, resolveConfigDir, resolveWorktreesDir } from './config.js'; +export { resolveKloConfigReference, resolveKloHomePath } from './config-reference.js'; +export type { KloEmbeddingPort } from './embedding.js'; +export { + REDACTED_KLO_CREDENTIAL_VALUE, + redactKloSensitiveMetadata, + redactKloSensitiveText, + redactKloSensitiveValue, +} from './redaction.js'; +export type { + KloFileHistoryEntry, + KloFileListResult, + KloFileReadResult, + KloFileStorePort, + KloFileWriteResult, +} from './file-store.js'; +export type { GitCommitInfo, SquashMergeResult, WorktreeEntry } from './git.service.js'; +export { GitService } from './git.service.js'; +export type { + SentinelPayload, + SessionOutcome, + SessionWorktree, + SessionWorktreeServiceDeps, + WorktreeConfigPort, +} from './session-worktree.service.js'; +export { SessionWorktreeService } from './session-worktree.service.js'; diff --git a/packages/context/src/core/redaction.ts b/packages/context/src/core/redaction.ts new file mode 100644 index 00000000..545fb50e --- /dev/null +++ b/packages/context/src/core/redaction.ts @@ -0,0 +1,47 @@ +export const REDACTED_KLO_CREDENTIAL_VALUE = ''; + +const SENSITIVE_FIELD_NAME = /(password|secret|token|api[_-]?key|private[_-]?key|passphrase|credential|authorization|url)/i; +const URL_CREDENTIAL_PATTERN = /([a-z][a-z0-9+.-]*:\/\/[^:\s/@]+:)([^@\s/]+)(@)/gi; + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function isSensitiveField(key: string): boolean { + return SENSITIVE_FIELD_NAME.test(key); +} + +export function redactKloSensitiveValue(key: string, value: unknown): unknown { + if (isSensitiveField(key)) { + return REDACTED_KLO_CREDENTIAL_VALUE; + } + if (Array.isArray(value)) { + return value.map((item) => redactKloSensitiveValue(key, item)); + } + if (isRecord(value)) { + return redactKloSensitiveMetadata(value); + } + return value; +} + +export function redactKloSensitiveMetadata(metadata: Record): Record { + const redacted: Record = {}; + for (const [key, value] of Object.entries(metadata)) { + if (Array.isArray(value)) { + redacted[key] = value.map((item) => + isRecord(item) ? redactKloSensitiveMetadata(item) : redactKloSensitiveValue(key, item), + ); + continue; + } + if (isRecord(value)) { + redacted[key] = redactKloSensitiveValue(key, value); + continue; + } + redacted[key] = redactKloSensitiveValue(key, value); + } + return redacted; +} + +export function redactKloSensitiveText(value: string): string { + return value.replace(URL_CREDENTIAL_PATTERN, `$1${REDACTED_KLO_CREDENTIAL_VALUE}$3`); +} diff --git a/packages/context/src/core/session-worktree.service.test.ts b/packages/context/src/core/session-worktree.service.test.ts new file mode 100644 index 00000000..38353a38 --- /dev/null +++ b/packages/context/src/core/session-worktree.service.test.ts @@ -0,0 +1,124 @@ +import { mkdtemp, realpath, rm, stat } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { KloCoreConfig } from './config.js'; +import { GitService } from './git.service.js'; +import { SessionWorktreeService, type WorktreeConfigPort } from './session-worktree.service.js'; + +interface TestWorktreeConfig extends WorktreeConfigPort { + workdir?: string; +} + +// SessionWorktreeService glues a real GitService to a scoped config adapter. +describe('SessionWorktreeService', () => { + let sessionService: SessionWorktreeService; + let gitService: GitService; + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'sws-spec-')); + homeDir = await realpath(homeDir); + + const coreConfig: KloCoreConfig = { + storage: { configDir: homeDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'Initialize test config repo', + bootstrapAuthor: 'test-system', + bootstrapAuthorEmail: 'system@example.com', + }, + }; + + gitService = new GitService(coreConfig); + await gitService.onModuleInit(); + const configService: TestWorktreeConfig = { + forWorktree: vi.fn( + (workdir: string): TestWorktreeConfig => ({ workdir, forWorktree: configService.forWorktree }), + ), + }; + sessionService = new SessionWorktreeService({ + coreConfig, + gitService, + configService, + }); + }); + + afterEach(async () => { + await rm(homeDir, { recursive: true, force: true }); + }); + + describe('create', () => { + it('creates a worktree + branch and returns scoped services', async () => { + const baseSha = await gitService.revParseHead(); + if (!baseSha) { + throw new Error('no base sha'); + } + + const session = await sessionService.create('chat-abc', baseSha); + + expect(session.workdir).toBe(join(homeDir, '.worktrees', 'session-chat-abc')); + expect(session.branch).toBe('session/chat-abc'); + expect(session.baseSha).toBe(baseSha); + const stats = await stat(session.workdir); + expect(stats.isDirectory()).toBe(true); + + // Scoped git instance reports the worktree's HEAD (= baseSha at creation time). + expect(await session.git.revParseHead()).toBe(baseSha); + + const list = await gitService.listWorktrees(); + expect(list.find((e) => e.path === session.workdir)).toBeTruthy(); + }); + + it('appends a timestamp suffix when the primary dir already exists', async () => { + const baseSha = await gitService.revParseHead(); + if (!baseSha) { + throw new Error('no base sha'); + } + + const first = await sessionService.create('chat-dup', baseSha); + const second = await sessionService.create('chat-dup', baseSha); + + expect(first.workdir).not.toBe(second.workdir); + expect(second.branch).toMatch(/^session\/chat-dup-\d+$/); + }); + }); + + describe('cleanup', () => { + it('success removes the worktree dir and deletes the branch', async () => { + const baseSha = await gitService.revParseHead(); + if (!baseSha) { + throw new Error('no base sha'); + } + + const session = await sessionService.create('chat-cleanup-ok', baseSha); + await sessionService.cleanup(session, 'success'); + + const list = await gitService.listWorktrees(); + expect(list.find((e) => e.path === session.workdir)).toBeFalsy(); + await expect(stat(session.workdir)).rejects.toThrow(); + }); + + it('conflict keeps the worktree and writes a sentinel file', async () => { + const baseSha = await gitService.revParseHead(); + if (!baseSha) { + throw new Error('no base sha'); + } + + const session = await sessionService.create('chat-cleanup-conflict', baseSha); + await sessionService.cleanup(session, 'conflict', { conflictPaths: ['shared.yaml'] }); + + // Dir still exists. + await expect(stat(session.workdir)).resolves.toBeTruthy(); + + const { readFile } = await import('node:fs/promises'); + const raw = await readFile(join(session.workdir, '.klo-outcome'), 'utf-8'); + const parsed = JSON.parse(raw); + expect(parsed.outcome).toBe('conflict'); + expect(parsed.chatId).toBe('chat-cleanup-conflict'); + expect(parsed.conflictPaths).toEqual(['shared.yaml']); + expect(typeof parsed.at).toBe('string'); + }); + }); +}); diff --git a/packages/context/src/core/session-worktree.service.ts b/packages/context/src/core/session-worktree.service.ts new file mode 100644 index 00000000..0ae43576 --- /dev/null +++ b/packages/context/src/core/session-worktree.service.ts @@ -0,0 +1,113 @@ +import { mkdir, stat, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { noopLogger, resolveWorktreesDir, type KloCoreConfig, type KloLogger } from './config.js'; +import { GitService } from './git.service.js'; + +export type SessionOutcome = 'success' | 'empty' | 'conflict' | 'crash'; + +export interface SentinelPayload { + outcome: SessionOutcome; + at: string; + chatId: string; + baseSha: string; + conflictPaths?: string[]; +} + +export interface WorktreeConfigPort { + forWorktree(workdir: string): TConfig; +} + +export interface SessionWorktree { + chatId: string; + workdir: string; + branch: string; + baseSha: string; + createdAt: Date; + git: GitService; + config: TConfig; +} + +export interface SessionWorktreeServiceDeps> { + coreConfig: KloCoreConfig; + gitService: GitService; + configService: TConfig; + logger?: KloLogger; +} + +export class SessionWorktreeService = WorktreeConfigPort> { + private readonly logger: KloLogger; + private readonly worktreesRoot: string; + + constructor(private readonly deps: SessionWorktreeServiceDeps) { + this.logger = deps.logger ?? noopLogger; + this.worktreesRoot = resolveWorktreesDir(deps.coreConfig); + } + + async create(sessionKey: string, baseSha: string): Promise> { + await mkdir(this.worktreesRoot, { recursive: true }); + + let dirName = `session-${sessionKey}`; + let branch = `session/${sessionKey}`; + let workdir = join(this.worktreesRoot, dirName); + + try { + await stat(workdir); + const suffix = Date.now().toString(); + dirName = `session-${sessionKey}-${suffix}`; + branch = `session/${sessionKey}-${suffix}`; + workdir = join(this.worktreesRoot, dirName); + this.logger.warn(`session worktree collision for key=${sessionKey}; using suffix ${suffix}`); + } catch { + // no collision: primary name is free + } + + await this.deps.gitService.addWorktree(workdir, branch, baseSha); + + return { + chatId: sessionKey, + workdir, + branch, + baseSha, + createdAt: new Date(), + git: this.deps.gitService.forWorktree(workdir), + config: this.deps.configService.forWorktree(workdir), + }; + } + + async cleanup( + session: SessionWorktree, + outcome: SessionOutcome, + extra?: { conflictPaths?: string[] }, + ): Promise { + if (outcome === 'success' || outcome === 'empty') { + try { + await this.deps.gitService.removeWorktree(session.workdir); + await this.deps.gitService.deleteBranch(session.branch, true); + } catch (error) { + this.logger.warn( + `cleanup(${outcome}) failed for ${session.chatId}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + return; + } + + const payload: SentinelPayload = { + outcome, + at: new Date().toISOString(), + chatId: session.chatId, + baseSha: session.baseSha, + ...(extra?.conflictPaths ? { conflictPaths: extra.conflictPaths } : {}), + }; + try { + await writeFile(join(session.workdir, '.klo-outcome'), JSON.stringify(payload, null, 2), 'utf-8'); + } catch (error) { + this.logger.warn( + `cleanup(${outcome}) failed to write sentinel for ${session.chatId}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + } +} diff --git a/packages/context/src/daemon/index.ts b/packages/context/src/daemon/index.ts new file mode 100644 index 00000000..0a218181 --- /dev/null +++ b/packages/context/src/daemon/index.ts @@ -0,0 +1 @@ +export * from './semantic-layer-compute.js'; diff --git a/packages/context/src/daemon/semantic-layer-compute.test.ts b/packages/context/src/daemon/semantic-layer-compute.test.ts new file mode 100644 index 00000000..846f9355 --- /dev/null +++ b/packages/context/src/daemon/semantic-layer-compute.test.ts @@ -0,0 +1,339 @@ +import { once } from 'node:events'; +import { createServer } from 'node:http'; +import { describe, expect, it, vi } from 'vitest'; +import { createHttpSemanticLayerComputePort, createPythonSemanticLayerComputePort } from './semantic-layer-compute.js'; + +const source = { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [{ name: 'order_count', expr: 'count(*)' }], +}; + +const sourceGenerationInput = { + tables: [ + { + name: 'orders', + db: 'public', + comment: 'Orders table', + columns: [ + { name: 'id', type: 'integer', primaryKey: true, nullable: false, comment: 'Order ID' }, + { name: 'customer_id', type: 'integer' }, + { name: 'amount', type: 'decimal', comment: 'Order amount' }, + ], + }, + { + name: 'customers', + db: 'public', + columns: [ + { name: 'id', type: 'integer', primaryKey: true }, + { name: 'email', type: 'varchar' }, + ], + }, + ], + links: [ + { + fromTable: 'orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + relationshipType: 'MANY_TO_ONE', + }, + ], + dialect: 'postgres', +}; + +const sourceGenerationDaemonPayload = { + tables: [ + { + name: 'orders', + db: 'public', + comment: 'Orders table', + columns: [ + { name: 'id', type: 'integer', primary_key: true, nullable: false, comment: 'Order ID' }, + { name: 'customer_id', type: 'integer' }, + { name: 'amount', type: 'decimal', comment: 'Order amount' }, + ], + }, + { + name: 'customers', + db: 'public', + columns: [ + { name: 'id', type: 'integer', primary_key: true }, + { name: 'email', type: 'varchar' }, + ], + }, + ], + links: [ + { + from_table: 'orders', + from_column: 'customer_id', + to_table: 'customers', + to_column: 'id', + relationship_type: 'MANY_TO_ONE', + }, + ], + dialect: 'postgres', +}; + +const sourceGenerationDaemonResponse = { + source_count: 2, + sources: [ + { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [ + { + to: 'customers', + on: 'customer_id = customers.id', + relationship: 'many_to_one', + }, + ], + measures: [{ name: 'record_count', expr: 'count(id)' }], + }, + ], +}; + +describe('createPythonSemanticLayerComputePort', () => { + it('calls the semantic-query stdio command', async () => { + const runJson = vi.fn(async () => ({ + sql: 'select count(*) from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: { sources_used: ['orders'] }, + })); + const port = createPythonSemanticLayerComputePort({ runJson }); + + await expect( + port.query({ + sources: [source], + dialect: 'postgres', + query: { measures: ['orders.order_count'], dimensions: [] }, + }), + ).resolves.toEqual({ + sql: 'select count(*) from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: { sources_used: ['orders'] }, + }); + + expect(runJson).toHaveBeenCalledWith('semantic-query', { + sources: [source], + dialect: 'postgres', + query: { measures: ['orders.order_count'], dimensions: [] }, + }); + }); + + it('calls the semantic-validate stdio command', async () => { + const runJson = vi.fn(async () => ({ + valid: true, + errors: [], + warnings: [], + per_source_warnings: {}, + })); + const port = createPythonSemanticLayerComputePort({ runJson }); + + await expect( + port.validateSources({ + sources: [source], + dialect: 'postgres', + recentlyTouched: ['orders'], + }), + ).resolves.toEqual({ + valid: true, + errors: [], + warnings: [], + perSourceWarnings: {}, + }); + + expect(runJson).toHaveBeenCalledWith('semantic-validate', { + sources: [source], + dialect: 'postgres', + recently_touched: ['orders'], + }); + }); + + it('calls the semantic-generate-sources stdio command', async () => { + const runJson = vi.fn(async () => sourceGenerationDaemonResponse); + const port = createPythonSemanticLayerComputePort({ runJson }); + + await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({ + sourceCount: 2, + sources: sourceGenerationDaemonResponse.sources, + }); + + expect(runJson).toHaveBeenCalledWith('semantic-generate-sources', sourceGenerationDaemonPayload); + }); +}); + +describe('createHttpSemanticLayerComputePort', () => { + it('calls semantic query and validate HTTP endpoints through an injected runner', async () => { + const requestJson = vi.fn(async (path: string) => { + if (path === '/semantic-layer/query') { + return { + sql: 'select count(*) from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: { sources_used: ['orders'] }, + }; + } + return { + valid: true, + errors: [], + warnings: [], + per_source_warnings: {}, + }; + }); + const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson }); + + await expect( + port.query({ + sources: [source], + dialect: 'postgres', + query: { measures: ['orders.order_count'], dimensions: [] }, + }), + ).resolves.toEqual({ + sql: 'select count(*) from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: { sources_used: ['orders'] }, + }); + + await expect( + port.validateSources({ + sources: [source], + dialect: 'postgres', + recentlyTouched: ['orders'], + }), + ).resolves.toEqual({ + valid: true, + errors: [], + warnings: [], + perSourceWarnings: {}, + }); + + expect(requestJson).toHaveBeenNthCalledWith(1, '/semantic-layer/query', { + sources: [source], + dialect: 'postgres', + query: { measures: ['orders.order_count'], dimensions: [] }, + }); + expect(requestJson).toHaveBeenNthCalledWith(2, '/semantic-layer/validate', { + sources: [source], + dialect: 'postgres', + recently_touched: ['orders'], + }); + }); + + it('calls the semantic source-generation HTTP endpoint through an injected runner', async () => { + const requestJson = vi.fn(async () => sourceGenerationDaemonResponse); + const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson }); + + await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({ + sourceCount: 2, + sources: sourceGenerationDaemonResponse.sources, + }); + + expect(requestJson).toHaveBeenCalledWith('/semantic-layer/generate-sources', sourceGenerationDaemonPayload); + }); + + it('posts JSON to a running HTTP daemon endpoint', async () => { + const requests: Array<{ url: string | undefined; body: unknown }> = []; + const server = createServer((request, response) => { + const chunks: Buffer[] = []; + request.on('data', (chunk: Buffer) => chunks.push(chunk)); + request.on('end', () => { + requests.push({ + url: request.url, + body: JSON.parse(Buffer.concat(chunks).toString('utf8')), + }); + response.writeHead(200, { 'content-type': 'application/json' }); + response.end( + JSON.stringify({ + sql: 'select count(*) from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: { sources_used: ['orders'] }, + }), + ); + }); + }); + + server.listen(0, '127.0.0.1'); + await once(server, 'listening'); + try { + const address = server.address(); + if (!address || typeof address === 'string') { + throw new Error('expected TCP server address'); + } + const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` }); + + await expect( + port.query({ + sources: [source], + dialect: 'postgres', + query: { measures: ['orders.order_count'], dimensions: [] }, + }), + ).resolves.toMatchObject({ + sql: 'select count(*) from public.orders', + dialect: 'postgres', + }); + + expect(requests).toEqual([ + { + url: '/semantic-layer/query', + body: { + sources: [source], + dialect: 'postgres', + query: { measures: ['orders.order_count'], dimensions: [] }, + }, + }, + ]); + } finally { + server.close(); + } + }); + + it('posts source-generation JSON to a running HTTP daemon endpoint', async () => { + const requests: Array<{ url: string | undefined; body: unknown }> = []; + const server = createServer((request, response) => { + const chunks: Buffer[] = []; + request.on('data', (chunk: Buffer) => chunks.push(chunk)); + request.on('end', () => { + requests.push({ + url: request.url, + body: JSON.parse(Buffer.concat(chunks).toString('utf8')), + }); + response.writeHead(200, { 'content-type': 'application/json' }); + response.end(JSON.stringify(sourceGenerationDaemonResponse)); + }); + }); + + server.listen(0, '127.0.0.1'); + await once(server, 'listening'); + try { + const address = server.address(); + if (!address || typeof address === 'string') { + throw new Error('expected TCP server address'); + } + const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` }); + + await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({ + sourceCount: 2, + sources: sourceGenerationDaemonResponse.sources, + }); + + expect(requests).toEqual([ + { + url: '/semantic-layer/generate-sources', + body: sourceGenerationDaemonPayload, + }, + ]); + } finally { + server.close(); + } + }); +}); diff --git a/packages/context/src/daemon/semantic-layer-compute.ts b/packages/context/src/daemon/semantic-layer-compute.ts new file mode 100644 index 00000000..6305ce1f --- /dev/null +++ b/packages/context/src/daemon/semantic-layer-compute.ts @@ -0,0 +1,304 @@ +import { request as httpRequest } from 'node:http'; +import { request as httpsRequest } from 'node:https'; +import { URL } from 'node:url'; +import { spawn } from 'node:child_process'; +import type { SemanticLayerQueryInput, SemanticLayerSource } from '../sl/index.js'; + +export interface KloSemanticLayerComputeQueryResult { + sql: string; + dialect: string; + columns: Array>; + plan: Record; +} + +export interface KloSemanticLayerComputeValidationResult { + valid: boolean; + errors: string[]; + warnings: string[]; + perSourceWarnings: Record; +} + +export interface KloSemanticLayerSourceGenerationColumnInput { + name: string; + type: string; + primaryKey?: boolean; + nullable?: boolean; + comment?: string | null; +} + +export interface KloSemanticLayerSourceGenerationTableInput { + name: string; + catalog?: string | null; + db?: string | null; + comment?: string | null; + columns: KloSemanticLayerSourceGenerationColumnInput[]; +} + +export interface KloSemanticLayerSourceGenerationLinkInput { + fromTable: string; + fromColumn: string; + toTable: string; + toColumn: string; + relationshipType: string; +} + +export interface KloSemanticLayerSourceGenerationInput { + tables: KloSemanticLayerSourceGenerationTableInput[]; + links: KloSemanticLayerSourceGenerationLinkInput[]; + dialect?: string; +} + +export interface KloSemanticLayerSourceGenerationResult { + sources: Array>; + sourceCount: number; +} + +export interface KloSemanticLayerComputePort { + query(input: { + sources: Array | SemanticLayerSource>; + query: SemanticLayerQueryInput; + dialect: string; + }): Promise; + validateSources(input: { + sources: Array | SemanticLayerSource>; + dialect: string; + recentlyTouched?: string[]; + }): Promise; + generateSources(input: KloSemanticLayerSourceGenerationInput): Promise; +} + +export type KloDaemonCommand = 'semantic-query' | 'semantic-validate' | 'semantic-generate-sources'; + +export type KloDaemonJsonRunner = ( + subcommand: KloDaemonCommand, + payload: Record, +) => Promise>; + +export type KloDaemonHttpJsonRunner = (path: string, payload: Record) => Promise>; + +export interface PythonSemanticLayerComputeOptions { + command?: string; + args?: string[]; + cwd?: string; + env?: NodeJS.ProcessEnv; + runJson?: KloDaemonJsonRunner; +} + +export interface HttpSemanticLayerComputeOptions { + baseUrl: string; + requestJson?: KloDaemonHttpJsonRunner; +} + +function parseJsonObject(raw: string, subcommand: string): Record { + const parsed = JSON.parse(raw) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error(`klo-daemon ${subcommand} returned non-object JSON`); + } + return parsed as Record; +} + +function runProcessJson( + options: Required> & + Pick, +): KloDaemonJsonRunner { + return async (subcommand: KloDaemonCommand, payload: Record): Promise> => + new Promise((resolve, reject) => { + const child = spawn(options.command, [...options.args, subcommand], { + cwd: options.cwd, + env: { ...process.env, ...options.env }, + stdio: ['pipe', 'pipe', 'pipe'], + }); + const stdout: Buffer[] = []; + const stderr: Buffer[] = []; + + child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk)); + child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk)); + child.on('error', reject); + child.on('close', (code) => { + const stdoutText = Buffer.concat(stdout).toString('utf8').trim(); + const stderrText = Buffer.concat(stderr).toString('utf8').trim(); + if (code !== 0) { + reject(new Error(`klo-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`)); + return; + } + try { + resolve(parseJsonObject(stdoutText, subcommand)); + } catch (error) { + reject(error); + } + }); + child.stdin.end(`${JSON.stringify(payload)}\n`); + }); +} + +function normalizedBaseUrl(baseUrl: string): string { + return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`; +} + +function postJson(baseUrl: string): KloDaemonHttpJsonRunner { + return async (path, payload) => + new Promise((resolve, reject) => { + const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl)); + const body = JSON.stringify(payload); + const client = target.protocol === 'https:' ? httpsRequest : httpRequest; + const request = client( + target, + { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + 'content-length': Buffer.byteLength(body), + }, + }, + (response) => { + const chunks: Buffer[] = []; + response.on('data', (chunk: Buffer) => chunks.push(chunk)); + response.on('end', () => { + const text = Buffer.concat(chunks).toString('utf8'); + const statusCode = response.statusCode ?? 0; + if (statusCode < 200 || statusCode >= 300) { + reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`)); + return; + } + try { + resolve(parseJsonObject(text, path)); + } catch (error) { + reject(error); + } + }); + }, + ); + request.on('error', reject); + request.end(body); + }); +} + +function stringArray(value: unknown): string[] { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []; +} + +function recordValue(value: unknown): Record { + return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record) : {}; +} + +function recordArray(value: unknown): Array> { + return Array.isArray(value) + ? value.filter( + (item): item is Record => item !== null && typeof item === 'object' && !Array.isArray(item), + ) + : []; +} + +function sourceGenerationPayload(input: KloSemanticLayerSourceGenerationInput): Record { + return { + tables: input.tables.map((table) => ({ + name: table.name, + ...(table.catalog !== undefined ? { catalog: table.catalog } : {}), + ...(table.db !== undefined ? { db: table.db } : {}), + ...(table.comment !== undefined ? { comment: table.comment } : {}), + columns: table.columns.map((column) => ({ + name: column.name, + type: column.type, + ...(column.primaryKey !== undefined ? { primary_key: column.primaryKey } : {}), + ...(column.nullable !== undefined ? { nullable: column.nullable } : {}), + ...(column.comment !== undefined ? { comment: column.comment } : {}), + })), + })), + links: input.links.map((link) => ({ + from_table: link.fromTable, + from_column: link.fromColumn, + to_table: link.toTable, + to_column: link.toColumn, + relationship_type: link.relationshipType, + })), + dialect: input.dialect ?? 'postgres', + }; +} + +function sourceGenerationResult(raw: Record): KloSemanticLayerSourceGenerationResult { + return { + sources: recordArray(raw.sources), + sourceCount: typeof raw.source_count === 'number' ? raw.source_count : recordArray(raw.sources).length, + }; +} + +export function createPythonSemanticLayerComputePort( + options: PythonSemanticLayerComputeOptions = {}, +): KloSemanticLayerComputePort { + const command = options.command ?? 'python'; + const args = options.args ?? ['-m', 'klo_daemon']; + const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env }); + + return { + async query(input) { + const raw = await runJson('semantic-query', { + sources: input.sources, + dialect: input.dialect, + query: input.query, + }); + return { + sql: typeof raw.sql === 'string' ? raw.sql : '', + dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect, + columns: recordArray(raw.columns), + plan: recordValue(raw.plan), + }; + }, + async validateSources(input) { + const raw = await runJson('semantic-validate', { + sources: input.sources, + dialect: input.dialect, + recently_touched: input.recentlyTouched, + }); + return { + valid: raw.valid === true, + errors: stringArray(raw.errors), + warnings: stringArray(raw.warnings), + perSourceWarnings: recordValue(raw.per_source_warnings) as Record, + }; + }, + async generateSources(input) { + const raw = await runJson('semantic-generate-sources', sourceGenerationPayload(input)); + return sourceGenerationResult(raw); + }, + }; +} + +export function createHttpSemanticLayerComputePort( + options: HttpSemanticLayerComputeOptions, +): KloSemanticLayerComputePort { + const requestJson = options.requestJson ?? postJson(options.baseUrl); + + return { + async query(input) { + const raw = await requestJson('/semantic-layer/query', { + sources: input.sources, + dialect: input.dialect, + query: input.query, + }); + return { + sql: typeof raw.sql === 'string' ? raw.sql : '', + dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect, + columns: recordArray(raw.columns), + plan: recordValue(raw.plan), + }; + }, + async validateSources(input) { + const raw = await requestJson('/semantic-layer/validate', { + sources: input.sources, + dialect: input.dialect, + recently_touched: input.recentlyTouched, + }); + return { + valid: raw.valid === true, + errors: stringArray(raw.errors), + warnings: stringArray(raw.warnings), + perSourceWarnings: recordValue(raw.per_source_warnings) as Record, + }; + }, + async generateSources(input) { + const raw = await requestJson('/semantic-layer/generate-sources', sourceGenerationPayload(input)); + return sourceGenerationResult(raw); + }, + }; +} diff --git a/packages/context/src/index.test.ts b/packages/context/src/index.test.ts new file mode 100644 index 00000000..5845c0fc --- /dev/null +++ b/packages/context/src/index.test.ts @@ -0,0 +1,12 @@ +import { describe, expect, it } from 'vitest'; + +import { kloContextPackageInfo } from './index.js'; + +describe('kloContextPackageInfo', () => { + it('identifies the context package', () => { + expect(kloContextPackageInfo).toEqual({ + name: '@klo/context', + version: '0.0.0-private', + }); + }); +}); diff --git a/packages/context/src/index.ts b/packages/context/src/index.ts new file mode 100644 index 00000000..8125f9d3 --- /dev/null +++ b/packages/context/src/index.ts @@ -0,0 +1,144 @@ +export interface KloContextPackageInfo { + name: '@klo/context'; + version: '0.0.0-private'; +} + +export const kloContextPackageInfo: KloContextPackageInfo = { + name: '@klo/context', + version: '0.0.0-private', +}; + +export * from './agent/index.js'; +export * from './core/index.js'; +export * from './daemon/index.js'; +export * from './ingest/index.js'; +export * from './llm/index.js'; +export type { + CaptureSession, + CaptureSignals, + MemoryAgentInput, + MemoryAgentResult, + MemoryAgentServiceDeps, + MemoryAgentSettings, + MemoryAgentSourceType, + MemoryCommitMessagePort, + MemoryConnectionPort, + MemoryFileStorePort, + MemoryKnowledgeSlRefsPort, + MemoryLockPort, + MemorySlSourceReconcilerPort, + MemoryTelemetryPort, + MemoryToolSetLike, + MemoryToolsetFactoryPort, +} from './memory/index.js'; +export * from './project/index.js'; +export * from './prompts/index.js'; +export * from './search/index.js'; +export * from './sql-analysis/index.js'; +export type { + KloColumnAnalysisResult, + KloColumnDescriptionPromptInput, + KloColumnEmbeddingForeignKeys, + KloColumnEmbeddingTextInput, + KloColumnSampleInput, + KloColumnSampleResult, + KloColumnSampleUpdate, + KloColumnStatsInput, + KloColumnStatsResult, + KloConnectionDriver, + KloConnectorCapabilities, + KloCredentialEnvelope, + KloCredentialEnvReference, + KloCredentialFileReference, + KloDataDictionaryColumnState, + KloDataDictionarySampleDecision, + KloDataDictionarySettings, + KloDataDictionarySkipReason, + KloDataSourceDescriptionPromptInput, + KloDescriptionCachePort, + KloDescriptionColumn, + KloDescriptionColumnTable, + KloDescriptionGenerationSettings, + KloDescriptionGeneratorOptions, + KloDescriptionSource, + KloDescriptionTableInput, + KloDescriptionUpdate, + KloEmbeddingPort as KloScanEmbeddingPort, + KloEmbeddingUpdate, + KloEnrichedColumn, + KloEnrichedRelationship, + KloEnrichedSchema, + KloEnrichedTable, + KloEnrichmentScanPhaseResult, + KloGenerateColumnDescriptionsInput, + KloGenerateDataSourceDescriptionInput, + KloGenerateTableDescriptionInput, + KloOptionalConnectorCapabilities, + KloProgressPort, + KloQueryResult as KloScanQueryResult, + KloReadOnlyQueryInput, + KloRelationshipEndpoint, + KloRelationshipSource, + KloRelationshipType, + KloRelationshipUpdate, + KloResolvedCredentialEnvelope, + KloScanArtifactPaths, + KloScanConnector, + KloScanContext, + KloScanDiffSummary, + KloScanEnrichmentSummary, + KloScanInput, + KloScanLoggerPort, + KloScanMetadataStore, + KloScanMode, + KloScanOrchestratorOptions, + KloScanOrchestratorRunInput, + KloScanOrchestratorRunResult, + KloScanRelationshipSummary, + KloScanReport, + KloScanTrigger, + KloScanWarning, + KloScanWarningCode, + KloSchemaColumn, + KloSchemaDimensionType, + KloSchemaForeignKey, + KloSchemaScope, + KloSchemaSnapshot, + KloSchemaTable, + KloSchemaTableKind, + KloSkippedRelationship, + KloStructuralScanPhaseResult, + KloStructuralSyncPlan, + KloStructuralSyncStats, + KloTableDescriptionPromptInput, + KloTableRef, + KloTableSampleInput, + KloTableSampleResult, + KloColumnTypeMapping, +} from './scan/index.js'; +export { + appendKloWordLimitInstruction, + buildKloColumnDescriptionPrompt, + buildKloColumnEmbeddingText, + buildKloDataSourceDescriptionPrompt, + buildKloTableDescriptionPrompt, + createKloConnectorCapabilities, + defaultKloDataDictionarySettings, + inferKloDimensionType, + isKloDataDictionaryCandidate, + kloColumnTypeMappingFromNative, + KloDescriptionGenerator, + KloScanOrchestrator, + normalizeKloNativeType, + REDACTED_KLO_CREDENTIAL_VALUE, + redactKloCredentialEnvelope, + redactKloCredentialValue, + redactKloScanMetadata, + redactKloScanReport, + redactKloScanWarning, + shouldKloSampleColumnForDictionary, +} from './scan/index.js'; +export * from './skills/index.js'; +export * from './sl/index.js'; +export * from './tools/index.js'; +export * from './wiki/index.js'; diff --git a/packages/context/src/ingest/action-identity.test.ts b/packages/context/src/ingest/action-identity.test.ts new file mode 100644 index 00000000..0c855c41 --- /dev/null +++ b/packages/context/src/ingest/action-identity.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest'; +import { actionTargetConnectionId, memoryActionIdentity } from './action-identity.js'; + +describe('memory action target identity', () => { + it('keys SL actions by target connection and wiki actions by run connection', () => { + expect( + memoryActionIdentity( + { target: 'sl', type: 'created', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' }, + 'looker-run', + ), + ).toBe('sl:warehouse-b:orders'); + + expect(memoryActionIdentity({ target: 'sl', type: 'created', key: 'orders', detail: '' }, 'warehouse-a')).toBe( + 'sl:warehouse-a:orders', + ); + + expect( + memoryActionIdentity( + { + target: 'wiki', + type: 'created', + key: 'knowledge/global/orders.md', + detail: '', + targetConnectionId: 'ignored', + }, + 'looker-run', + ), + ).toBe('wiki:looker-run:knowledge/global/orders.md'); + }); + + it('resolves action target connection only for SL actions', () => { + expect( + actionTargetConnectionId( + { target: 'sl', type: 'updated', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' }, + 'looker-run', + ), + ).toBe('warehouse-b'); + expect(actionTargetConnectionId({ target: 'wiki', type: 'updated', key: 'orders', detail: '' }, 'looker-run')).toBe( + 'looker-run', + ); + }); +}); diff --git a/packages/context/src/ingest/action-identity.ts b/packages/context/src/ingest/action-identity.ts new file mode 100644 index 00000000..d0111d09 --- /dev/null +++ b/packages/context/src/ingest/action-identity.ts @@ -0,0 +1,9 @@ +import type { MemoryAction } from '../memory/index.js'; + +export function actionTargetConnectionId(action: MemoryAction, runConnectionId: string): string { + return action.target === 'sl' ? (action.targetConnectionId ?? runConnectionId) : runConnectionId; +} + +export function memoryActionIdentity(action: MemoryAction, runConnectionId: string): string { + return `${action.target}:${actionTargetConnectionId(action, runConnectionId)}:${action.key}`; +} diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/match-tables.test.ts b/packages/context/src/ingest/adapters/dbt-descriptions/match-tables.test.ts new file mode 100644 index 00000000..b8df30a2 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/match-tables.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it } from 'vitest'; +import type { DbtParsedTable } from './parse-schema.js'; +import { findMatchingKloTable, matchDbtTables, type DbtHostTableLite } from './match-tables.js'; + +const hostTables: DbtHostTableLite[] = [ + { id: '1', name: 'orders', catalog: 'warehouse', db: 'analytics', columns: [{ id: 'c1', name: 'id' }] }, + { id: '2', name: 'orders', catalog: 'warehouse', db: 'staging', columns: [{ id: 'c2', name: 'id' }] }, + { id: '3', name: 'customers', catalog: null, db: null, columns: [{ id: 'c3', name: 'id' }] }, +]; + +function table(input: Partial): DbtParsedTable { + return { + name: 'orders', + description: null, + database: null, + schema: null, + columns: [], + resourceType: 'model', + ...input, + }; +} + +describe('dbt descriptions table matching', () => { + it('uses schema plus name first and checks catalog when dbt database is present', () => { + expect( + findMatchingKloTable(table({ database: 'warehouse', schema: 'analytics' }), hostTables, null)?.id, + ).toBe('1'); + }); + + it('does not fall back to name-only for source tables', () => { + expect(findMatchingKloTable(table({ resourceType: 'source' }), hostTables, null)).toBeUndefined(); + }); + + it('uses targetSchema for models and name-only only when unique', () => { + expect(findMatchingKloTable(table({ resourceType: 'model' }), hostTables, 'staging')?.id).toBe('2'); + expect(findMatchingKloTable(table({ name: 'customers', resourceType: 'model' }), hostTables, null)?.id).toBe( + '3', + ); + expect(findMatchingKloTable(table({ resourceType: 'model' }), hostTables, null)).toBeUndefined(); + }); + + it('summarizes matched columns and descriptions', () => { + const matches = matchDbtTables( + [ + table({ + name: 'customers', + description: 'Customers', + columns: [ + { name: 'id', description: 'Primary key', dataType: null }, + { name: 'missing', description: 'Missing', dataType: null }, + ], + }), + ], + hostTables, + null, + ); + + expect(matches).toEqual([ + { + dbtTable: 'customers', + dbtSchema: null, + dbtDatabase: null, + hostTableId: '3', + hostTableName: 'customers', + matched: true, + tableDescriptionAction: 'import', + tableDescriptionFound: true, + columnsToImport: 1, + columnsMatched: 1, + columnsTotal: 2, + columnDescriptionsFound: 1, + }, + ]); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/match-tables.ts b/packages/context/src/ingest/adapters/dbt-descriptions/match-tables.ts new file mode 100644 index 00000000..08da04f6 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/match-tables.ts @@ -0,0 +1,127 @@ +import type { DbtParsedTable } from './parse-schema.js'; + +export interface DbtHostTableLite { + id: string; + name: string; + catalog: string | null; + db: string | null; + columns: Array<{ id: string; name: string }>; +} + +export interface DbtTableMatch { + dbtTable: string; + dbtSchema: string | null; + dbtDatabase: string | null; + hostTableId: string | null; + hostTableName: string | null; + matched: boolean; + tableDescriptionAction: 'skip' | 'import'; + tableDescriptionFound: boolean; + columnsToImport: number; + columnsMatched: number; + columnsTotal: number; + columnDescriptionsFound: number; +} + +export function matchDbtTables( + dbtTables: DbtParsedTable[], + hostTables: DbtHostTableLite[], + targetSchema?: string | null, +): DbtTableMatch[] { + return dbtTables.map((dbtTable) => { + const hostTable = findMatchingKloTable(dbtTable, hostTables, targetSchema); + + if (!hostTable) { + return { + dbtTable: dbtTable.name, + dbtSchema: dbtTable.schema, + dbtDatabase: dbtTable.database, + hostTableId: null, + hostTableName: null, + matched: false, + tableDescriptionAction: 'skip', + tableDescriptionFound: Boolean(dbtTable.description), + columnsToImport: 0, + columnsMatched: 0, + columnsTotal: dbtTable.columns.length, + columnDescriptionsFound: dbtTable.columns.filter((column) => Boolean(column.description)).length, + }; + } + + const analysis = analyzeColumns(dbtTable, hostTable); + return { + dbtTable: dbtTable.name, + dbtSchema: dbtTable.schema, + dbtDatabase: dbtTable.database, + hostTableId: hostTable.id, + hostTableName: hostTable.name, + matched: true, + tableDescriptionAction: dbtTable.description ? 'import' : 'skip', + tableDescriptionFound: Boolean(dbtTable.description), + ...analysis, + }; + }); +} + +export function findMatchingKloTable( + dbtTable: DbtParsedTable, + hostTables: DbtHostTableLite[], + targetSchema?: string | null, +): DbtHostTableLite | undefined { + const dbtName = dbtTable.name.toLowerCase(); + const effectiveSchema = dbtTable.schema ?? targetSchema ?? null; + + if (effectiveSchema) { + const strictMatch = hostTables.find((table) => { + const nameMatches = table.name.toLowerCase() === dbtName; + const schemaMatches = table.db?.toLowerCase() === effectiveSchema.toLowerCase(); + if (!nameMatches || !schemaMatches) { + return false; + } + if (dbtTable.database && table.catalog) { + return table.catalog.toLowerCase() === dbtTable.database.toLowerCase(); + } + return true; + }); + if (strictMatch) { + return strictMatch; + } + } + + if (dbtTable.resourceType === 'source') { + return undefined; + } + + const nameMatches = hostTables.filter((table) => table.name.toLowerCase() === dbtName); + return nameMatches.length === 1 ? nameMatches[0] : undefined; +} + +function analyzeColumns( + dbtTable: DbtParsedTable, + hostTable: DbtHostTableLite, +): Pick { + let columnsToImport = 0; + let columnsMatched = 0; + let columnDescriptionsFound = 0; + + for (const dbtColumn of dbtTable.columns) { + const hostColumn = hostTable.columns.find( + (column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(), + ); + if (!hostColumn) { + continue; + } + columnsMatched++; + if (dbtColumn.description) { + columnDescriptionsFound++; + columnsToImport++; + } + } + + return { + columnsToImport, + columnsMatched, + columnsTotal: dbtTable.columns.length, + columnDescriptionsFound, + }; +} diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.test.ts b/packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.test.ts new file mode 100644 index 00000000..9d1ec735 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from 'vitest'; +import type { ParsedSemanticModel } from '../metricflow/deep-parse.js'; +import { mergeSemanticModelTables } from './merge-semantic-model-tables.js'; +import type { DbtSchemaParseResult } from './parse-schema.js'; + +const semanticModel: ParsedSemanticModel = { + name: 'orders_semantic', + description: 'Order facts', + modelRef: 'fct_orders', + dimensions: [ + { name: 'status', column: 'status', type: 'categorical', description: 'Order status' }, + { name: 'ordered_at', column: 'ordered_at', type: 'time' }, + ], + measures: [], + entities: [], + defaultTimeDimension: null, +}; + +describe('mergeSemanticModelTables', () => { + it('adds missing MetricFlow model refs as dbt model tables', () => { + const input: DbtSchemaParseResult = { projectName: null, dbtVersion: null, tables: [], relationships: [] }; + + expect(mergeSemanticModelTables(input, [semanticModel])).toEqual({ + projectName: null, + dbtVersion: null, + relationships: [], + tables: [ + { + name: 'fct_orders', + description: 'Order facts', + database: null, + schema: null, + resourceType: 'model', + columns: [ + { name: 'status', description: 'Order status', dataType: null }, + { name: 'ordered_at', description: null, dataType: 'TIMESTAMP' }, + ], + }, + ], + }); + }); + + it('does not add a duplicate table when schema parsing already found the model ref', () => { + const input: DbtSchemaParseResult = { + projectName: null, + dbtVersion: null, + relationships: [], + tables: [ + { + name: 'FCT_ORDERS', + description: 'Existing', + database: null, + schema: null, + resourceType: 'model', + columns: [], + }, + ], + }; + + expect(mergeSemanticModelTables(input, [semanticModel]).tables).toHaveLength(1); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.ts b/packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.ts new file mode 100644 index 00000000..2991153f --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/merge-semantic-model-tables.ts @@ -0,0 +1,37 @@ +import type { ParsedSemanticModel } from '../metricflow/deep-parse.js'; +import type { DbtSchemaParseResult } from './parse-schema.js'; + +export function mergeSemanticModelTables( + parseResult: DbtSchemaParseResult, + semanticModels: ParsedSemanticModel[], +): DbtSchemaParseResult { + const merged: DbtSchemaParseResult = { + ...parseResult, + tables: [...parseResult.tables], + relationships: [...parseResult.relationships], + }; + const existingTableNames = new Set(merged.tables.map((table) => table.name.toLowerCase())); + + for (const model of semanticModels) { + const tableName = model.modelRef; + if (existingTableNames.has(tableName.toLowerCase())) { + continue; + } + + merged.tables.push({ + name: tableName, + description: model.description, + database: null, + schema: null, + columns: model.dimensions.map((dimension) => ({ + name: dimension.column, + description: dimension.description ?? null, + dataType: dimension.type === 'time' ? 'TIMESTAMP' : null, + })), + resourceType: 'model', + }); + existingTableNames.add(tableName.toLowerCase()); + } + + return merged; +} diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.test.ts b/packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.test.ts new file mode 100644 index 00000000..f29cab06 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.test.ts @@ -0,0 +1,214 @@ +import { describe, expect, it } from 'vitest'; +import { parseDbtSchemaFile, parseDbtSchemaFiles } from './parse-schema.js'; + +describe('dbt descriptions schema parser', () => { + it('resolves shared dbt vars and defaults before parsing schema YAML', () => { + const result = parseDbtSchemaFile( + ` +version: 2 +sources: + - name: raw + database: "{{ var('database') }}" + schema: "{{ var('schema', 'fallback_schema') }}" + tables: + - name: orders + identifier: fct_orders + description: "Orders from {{ var('database') }}" + columns: + - name: customer_id + description: "Customer id" + tests: + - relationships: + to: ref('customers') + field: id +models: + - name: "{{ var('model_name', 'orders_model') }}" + schema: "{{ var('model_schema') }}" + columns: + - name: id + description: "Order id" +`, + { path: 'models/schema.yml', variables: new Map([['database', 'analytics'], ['model_schema', 'mart']]) }, + ); + + expect(result.tables).toEqual([ + { + name: 'fct_orders', + description: 'Orders from analytics', + database: 'analytics', + schema: 'fallback_schema', + columns: [ + { + name: 'customer_id', + description: 'Customer id', + dataType: null, + dataTests: [{ name: 'relationships', package: 'dbt', kwargs: { to: "ref('customers')", field: 'id' } }], + }, + ], + resourceType: 'source', + }, + { + name: 'orders_model', + description: null, + database: null, + schema: 'mart', + columns: [{ name: 'id', description: 'Order id', dataType: null }], + resourceType: 'model', + }, + ]); + expect(result.relationships).toEqual([ + { + fromTable: 'fct_orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + fromSchema: 'fallback_schema', + }, + ]); + }); + + it('deduplicates tables by database schema and name while merging columns', () => { + const result = parseDbtSchemaFiles([ + { + path: 'models/a.yml', + content: ` +version: 2 +models: + - name: orders + description: Orders + columns: + - name: id + description: Primary key +`, + }, + { + path: 'models/b.yml', + content: ` +version: 2 +models: + - name: orders + columns: + - name: status + description: Status + - name: id + data_type: integer +`, + }, + ]); + + expect(result.tables).toEqual([ + { + name: 'orders', + description: 'Orders', + database: null, + schema: null, + resourceType: 'model', + columns: [ + { name: 'id', description: 'Primary key', dataType: 'integer' }, + { name: 'status', description: 'Status', dataType: null }, + ], + }, + ]); + }); + + it('returns an empty result for malformed YAML and preserves unresolved Jinja text', () => { + expect(parseDbtSchemaFile('{{{{ invalid yaml', { path: 'broken.yml' })).toEqual({ + projectName: null, + dbtVersion: null, + tables: [], + relationships: [], + }); + + const unresolved = parseDbtSchemaFile( + ` +version: 2 +models: + - name: "{{ var('missing_model') }}" +`, + { variables: new Map() }, + ); + expect(unresolved.tables[0]?.name).toBe("{{ var('missing_model') }}"); + }); + + it('extracts data tests, constraints, enum values, tags, and freshness', () => { + const result = parseDbtSchemaFile(` +version: 2 +sources: + - name: raw + schema: jaffle + tags: ["raw"] + tables: + - name: customers + tags: ["core"] + loaded_at_field: updated_at + freshness: + warn_after: { count: 12, period: hour } + columns: + - name: id + tests: + - not_null + - unique + - name: status + data_tests: + - accepted_values: + values: ['active', 'inactive'] +models: + - name: orders + tags: ["finance"] + loaded_at_field: run_at + columns: + - name: status + data_tests: + - dbt_utils.expression_is_true: + expression: "status is not null" + - accepted_values: ['placed', 'shipped'] +`); + + const customers = result.tables.find((table) => table.name === 'customers'); + expect(customers?.tagsDbt).toEqual(['raw', 'core']); + expect(customers?.freshnessDbt?.loadedAtField).toBe('updated_at'); + expect(customers?.freshnessDbt?.raw).toBeDefined(); + const id = customers?.columns.find((column) => column.name === 'id'); + expect(id?.constraints?.dbt).toEqual({ not_null: true, unique: true }); + const status = customers?.columns.find((column) => column.name === 'status'); + expect(status?.enumValuesDbt).toEqual(['active', 'inactive']); + + const orders = result.tables.find((table) => table.name === 'orders'); + expect(orders?.tagsDbt).toEqual(['finance']); + expect(orders?.freshnessDbt?.loadedAtField).toBe('run_at'); + const ordersStatus = orders?.columns.find((column) => column.name === 'status'); + expect(ordersStatus?.enumValuesDbt).toEqual(['placed', 'shipped']); + expect(ordersStatus?.dataTests).toEqual( + expect.arrayContaining([ + expect.objectContaining({ package: 'dbt_utils', name: 'expression_is_true' }), + expect.objectContaining({ package: 'dbt', name: 'accepted_values' }), + ]), + ); + }); + + it('parses relationships from model column data tests', () => { + const result = parseDbtSchemaFile(` +version: 2 +models: + - name: orders + schema: public + columns: + - name: customer_id + data_tests: + - relationships: + arguments: + to: "ref('customers')" + field: id +`); + + expect(result.relationships).toEqual([ + { + fromTable: 'orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + fromSchema: 'public', + }, + ]); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.ts b/packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.ts new file mode 100644 index 00000000..86b92ba7 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/parse-schema.ts @@ -0,0 +1,655 @@ +import { createHash } from 'node:crypto'; +import { parse as parseYaml } from 'yaml'; +import { type KloLogger, noopLogger } from '../../../core/index.js'; +import { resolveJinjaVariables } from '../../dbt-shared/project-vars.js'; + +export interface DbtParsedColumn { + name: string; + description: string | null; + dataType: string | null; + dataTests?: DbtDataTestRef[]; + constraints?: DbtColumnConstraints; + enumValuesDbt?: string[]; +} + +export interface DbtDataTestRef { + name: string; + package: string; + kwargs?: Record; +} + +export interface DbtColumnConstraints { + dbt: { + not_null?: boolean; + unique?: boolean; + }; +} + +export interface DbtParsedRelationship { + fromTable: string; + fromColumn: string; + toTable: string; + toColumn: string; + fromSchema?: string; + toSchema?: string; + description?: string; +} + +export interface DbtParsedTable { + name: string; + description: string | null; + database: string | null; + schema: string | null; + columns: DbtParsedColumn[]; + resourceType?: 'source' | 'model'; + tagsDbt?: string[]; + freshnessDbt?: { + raw?: unknown; + loadedAtField?: string | null; + }; +} + +export interface DbtSchemaParseResult { + projectName: string | null; + dbtVersion: string | null; + tables: DbtParsedTable[]; + relationships: DbtParsedRelationship[]; +} + +export interface DbtSchemaFile { + content: string; + path: string; +} + +interface ParseDbtSchemaOptions { + path?: string; + variables?: Map; + projectName?: string | null; + logger?: KloLogger; +} + +interface DbtSchemaYaml { + version?: number; + sources?: DbtSchemaSource[]; + models?: DbtSchemaModel[]; +} + +interface DbtSchemaSource { + name: string; + description?: string; + database?: string; + schema?: string; + tags?: string[]; + tables?: DbtSchemaTable[]; +} + +interface DbtSchemaTable { + name: string; + description?: string; + identifier?: string; + tags?: string[]; + loaded_at_field?: string; + freshness?: unknown; + columns?: DbtSchemaColumn[]; +} + +interface DbtSchemaModel { + name: string; + description?: string; + database?: string; + schema?: string; + tags?: string[]; + loaded_at_field?: string; + freshness?: unknown; + columns?: DbtSchemaColumn[]; +} + +interface DbtSchemaColumn { + name: string; + description?: string; + data_type?: string; + data_tests?: DbtSchemaDataTest[]; + tests?: DbtSchemaDataTest[]; +} + +type DbtSchemaDataTest = + | string + | { + relationships?: { + to?: string; + field?: string; + arguments?: { to?: string; field?: string }; + }; + not_null?: unknown; + unique?: unknown; + accepted_values?: { values?: unknown } | unknown; + [key: string]: unknown; + }; + +export function parseDbtSchemaFile(content: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult { + return new DbtSchemaParser(options.logger ?? noopLogger).parseFile(content, options); +} + +export function parseDbtSchemaFiles( + files: DbtSchemaFile[], + variables?: Map, + options: { projectName?: string | null; logger?: KloLogger } = {}, +): DbtSchemaParseResult { + return new DbtSchemaParser(options.logger ?? noopLogger).parseFiles(files, variables, options.projectName ?? null); +} + +export function computeDbtSchemaHash(files: DbtSchemaFile[]): string { + const combined = [...files] + .sort((a, b) => a.path.localeCompare(b.path)) + .map((file) => `${file.path}:${file.content}`) + .join('\n'); + return createHash('sha256').update(combined).digest('hex').substring(0, 16); +} + +class DbtSchemaParser { + constructor(private readonly logger: KloLogger) {} + + parseFile(yamlContent: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult { + this.logger.debug(`Parsing schema file: ${options.path ?? 'unknown'}`); + + const resolved = options.variables + ? resolveJinjaVariables(yamlContent, options.variables) + : { content: yamlContent, unresolvedVars: [] }; + if (resolved.unresolvedVars.length > 0) { + this.logger.warn( + `Unresolved dbt variables in ${options.path ?? 'schema file'}: ${resolved.unresolvedVars.join(', ')}`, + ); + } + + let schema: DbtSchemaYaml; + try { + schema = parseYaml(resolved.content) as DbtSchemaYaml; + } catch (error) { + this.logger.warn(`Failed to parse YAML${options.path ? ` at ${options.path}` : ''}: ${error}`); + return this.emptyResult(options.projectName ?? null); + } + + if (!schema || typeof schema !== 'object') { + return this.emptyResult(options.projectName ?? null); + } + + const tables = [...this.parseSources(schema.sources), ...this.parseModels(schema.models)]; + const relationships = [ + ...this.parseSourceRelationships(schema.sources), + ...this.parseModelRelationships(schema.models), + ]; + + return { + projectName: options.projectName ?? null, + dbtVersion: null, + tables, + relationships, + }; + } + + parseFiles( + files: DbtSchemaFile[], + variables?: Map, + projectName: string | null = null, + ): DbtSchemaParseResult { + const allTables: DbtParsedTable[] = []; + const allRelationships: DbtParsedRelationship[] = []; + + for (const file of files) { + const result = this.parseFile(file.content, { path: file.path, variables, projectName }); + allTables.push(...result.tables); + allRelationships.push(...result.relationships); + } + + return { + projectName, + dbtVersion: null, + tables: this.deduplicateTables(allTables), + relationships: this.deduplicateRelationships(allRelationships), + }; + } + + private parseSources(sources: DbtSchemaSource[] | undefined): DbtParsedTable[] { + if (!sources || !Array.isArray(sources)) { + return []; + } + + const tables: DbtParsedTable[] = []; + + for (const source of sources) { + const sourceSchema = source.schema ?? source.name; + const sourceDatabase = source.database ?? null; + const sourceTags = this.normalizeTagList(source.tags); + + if (!source.tables || !Array.isArray(source.tables)) { + continue; + } + + for (const table of source.tables) { + const tagsDbt = this.mergeTagsDbt(sourceTags, this.normalizeTagList(table.tags)); + const freshnessDbt = this.buildFreshnessDbt(table.freshness, table.loaded_at_field); + tables.push({ + name: table.identifier ?? table.name, + description: this.normalizeDescription(table.description), + database: sourceDatabase, + schema: sourceSchema, + columns: this.parseColumns(table.columns), + resourceType: 'source', + ...(tagsDbt ? { tagsDbt } : {}), + ...(freshnessDbt ? { freshnessDbt } : {}), + }); + } + } + + return tables; + } + + private parseModels(models: DbtSchemaModel[] | undefined): DbtParsedTable[] { + if (!models || !Array.isArray(models)) { + return []; + } + + const tables: DbtParsedTable[] = []; + + for (const model of models) { + if (!model.name) { + continue; + } + + const tagsDbt = this.mergeTagsDbt(this.normalizeTagList(model.tags)); + const freshnessDbt = this.buildFreshnessDbt(model.freshness, model.loaded_at_field); + tables.push({ + name: model.name, + description: this.normalizeDescription(model.description), + database: model.database ?? null, + schema: model.schema ?? null, + columns: this.parseColumns(model.columns), + resourceType: 'model', + ...(tagsDbt ? { tagsDbt } : {}), + ...(freshnessDbt ? { freshnessDbt } : {}), + }); + } + + return tables; + } + + private parseColumns(columns: DbtSchemaColumn[] | undefined): DbtParsedColumn[] { + if (!columns || !Array.isArray(columns)) { + return []; + } + + return columns.map((column) => { + const { refs, constraints, enumValues } = this.parseDataTests(column.data_tests ?? column.tests); + return { + name: column.name, + description: this.normalizeDescription(column.description), + dataType: column.data_type ?? null, + ...(refs.length > 0 ? { dataTests: refs } : {}), + ...(constraints ? { constraints } : {}), + ...(enumValues.length > 0 ? { enumValuesDbt: enumValues } : {}), + }; + }); + } + + private parseDataTests(tests: DbtSchemaDataTest[] | undefined): { + refs: DbtDataTestRef[]; + constraints: DbtColumnConstraints | undefined; + enumValues: string[]; + } { + const refs: DbtDataTestRef[] = []; + const dbt: { not_null?: boolean; unique?: boolean } = {}; + const enumValues: string[] = []; + if (!tests?.length) { + return { refs, constraints: undefined, enumValues }; + } + + for (const test of tests) { + if (typeof test === 'string') { + const parsed = this.parseTestNameString(test); + refs.push(parsed); + if (parsed.package === 'dbt' && parsed.name === 'not_null') { + dbt.not_null = true; + } + if (parsed.package === 'dbt' && parsed.name === 'unique') { + dbt.unique = true; + } + continue; + } + + for (const [key, value] of Object.entries(test)) { + if (key === 'relationships') { + refs.push({ + name: 'relationships', + package: 'dbt', + ...(value && typeof value === 'object' && !Array.isArray(value) + ? { kwargs: value as Record } + : {}), + }); + continue; + } + if (key === 'not_null') { + refs.push({ name: 'not_null', package: 'dbt' }); + dbt.not_null = true; + continue; + } + if (key === 'unique') { + refs.push({ name: 'unique', package: 'dbt' }); + dbt.unique = true; + continue; + } + if (key === 'accepted_values') { + if (Array.isArray(value)) { + enumValues.push(...value.map((item) => String(item))); + refs.push({ name: 'accepted_values', package: 'dbt', kwargs: { values: value } }); + continue; + } + if (value && typeof value === 'object' && !Array.isArray(value)) { + const values = (value as { values?: unknown }).values; + if (Array.isArray(values)) { + enumValues.push(...values.map((item) => String(item))); + } + refs.push({ name: 'accepted_values', package: 'dbt', kwargs: value as Record }); + continue; + } + } + refs.push({ + ...this.parseTestNameString(key), + ...(value && typeof value === 'object' && !Array.isArray(value) + ? { kwargs: value as Record } + : {}), + }); + } + } + + const constraints = dbt.not_null || dbt.unique ? { dbt } : undefined; + return { refs, constraints, enumValues }; + } + + private parseTestNameString(value: string): { name: string; package: string } { + const parts = value.split('.'); + if (parts.length >= 2) { + return { package: parts[0]!, name: parts.slice(1).join('.') }; + } + return { package: 'dbt', name: value }; + } + + private parseSourceRelationships(sources: DbtSchemaSource[] | undefined): DbtParsedRelationship[] { + if (!sources || !Array.isArray(sources)) { + return []; + } + + const relationships: DbtParsedRelationship[] = []; + + for (const source of sources) { + const sourceSchema = source.schema ?? source.name; + + if (!source.tables || !Array.isArray(source.tables)) { + continue; + } + + for (const table of source.tables) { + const tableName = table.identifier ?? table.name; + + if (!table.columns || !Array.isArray(table.columns)) { + continue; + } + + for (const column of table.columns) { + const tests = column.data_tests ?? column.tests ?? []; + + for (const test of tests) { + const relationship = this.parseRelationshipTest(test, tableName, column.name, sourceSchema); + if (relationship) { + relationships.push(relationship); + } + } + } + } + } + + return relationships; + } + + private parseModelRelationships(models: DbtSchemaModel[] | undefined): DbtParsedRelationship[] { + if (!models || !Array.isArray(models)) { + return []; + } + + const relationships: DbtParsedRelationship[] = []; + + for (const model of models) { + if (!model.name || !model.columns || !Array.isArray(model.columns)) { + continue; + } + + for (const column of model.columns) { + const tests = column.data_tests ?? column.tests ?? []; + + for (const test of tests) { + const relationship = this.parseRelationshipTest(test, model.name, column.name, model.schema ?? undefined); + if (relationship) { + relationships.push(relationship); + } + } + } + } + + return relationships; + } + + private parseRelationshipTest( + test: DbtSchemaDataTest, + fromTable: string, + fromColumn: string, + fromSchema?: string, + ): DbtParsedRelationship | null { + if (typeof test === 'string' || !test.relationships) { + return null; + } + + const relationship = test.relationships; + const toRef = relationship.to ?? relationship.arguments?.to; + const toColumn = relationship.field ?? relationship.arguments?.field; + + if (!toRef || !toColumn) { + this.logger.debug(`Skipping incomplete relationship test for ${fromTable}.${fromColumn}`); + return null; + } + + const toTable = this.parseRef(toRef); + if (!toTable) { + this.logger.debug(`Could not parse ref: ${toRef}`); + return null; + } + + return { + fromTable, + fromColumn, + toTable, + toColumn, + ...(fromSchema ? { fromSchema } : {}), + }; + } + + private parseRef(refString: string): string | null { + const refMatch = refString.match(/ref\s*\(\s*['"]([^'"]+)['"]\s*\)/); + if (refMatch) { + return refMatch[1]; + } + + const sourceMatch = refString.match(/source\s*\(\s*['"][^'"]+['"]\s*,\s*['"]([^'"]+)['"]\s*\)/); + if (sourceMatch) { + return sourceMatch[1]; + } + + return null; + } + + private normalizeDescription(description: string | undefined): string | null { + if (!description) { + return null; + } + const trimmed = description.trim(); + return trimmed.length > 0 ? trimmed : null; + } + + private normalizeTagList(tags: string[] | undefined): string[] { + if (!tags || !Array.isArray(tags)) { + return []; + } + return tags.map((tag) => String(tag)); + } + + private mergeTagsDbt(...lists: Array): string[] | undefined { + const merged: string[] = []; + const seen = new Set(); + for (const list of lists) { + for (const item of list ?? []) { + if (!seen.has(item)) { + seen.add(item); + merged.push(item); + } + } + } + return merged.length > 0 ? merged : undefined; + } + + private buildFreshnessDbt(freshness: unknown, loadedAtField: string | undefined): DbtParsedTable['freshnessDbt'] { + const loadedTrim = loadedAtField?.trim(); + const hasFreshness = freshness !== undefined && freshness !== null; + if (!hasFreshness && !loadedTrim) { + return undefined; + } + return { + ...(hasFreshness ? { raw: freshness } : {}), + ...(hasFreshness ? { loadedAtField: loadedTrim ?? null } : loadedTrim ? { loadedAtField: loadedTrim } : {}), + }; + } + + private deduplicateTables(tables: DbtParsedTable[]): DbtParsedTable[] { + const seen = new Map(); + + for (const table of tables) { + const key = `${table.database ?? ''}.${table.schema ?? ''}.${table.name}`.toLowerCase(); + const existing = seen.get(key); + + if (!existing) { + seen.set(key, table); + continue; + } + + seen.set(key, { + ...existing, + description: existing.description ?? table.description, + columns: this.mergeColumns(existing.columns, table.columns), + tagsDbt: this.mergeTagsDbt(existing.tagsDbt, table.tagsDbt), + freshnessDbt: this.mergeFreshnessDbt(existing.freshnessDbt, table.freshnessDbt), + }); + } + + return Array.from(seen.values()); + } + + private mergeColumns(existing: DbtParsedColumn[], incoming: DbtParsedColumn[]): DbtParsedColumn[] { + const seen = new Map(); + + for (const column of existing) { + seen.set(column.name.toLowerCase(), column); + } + + for (const column of incoming) { + const key = column.name.toLowerCase(); + const existingColumn = seen.get(key); + + if (!existingColumn) { + seen.set(key, column); + continue; + } + + seen.set(key, { + ...existingColumn, + description: existingColumn.description ?? column.description, + dataType: existingColumn.dataType ?? column.dataType, + dataTests: this.mergeDbtDataTests(existingColumn.dataTests, column.dataTests), + constraints: this.mergeDbtConstraints(existingColumn.constraints, column.constraints), + enumValuesDbt: this.mergeStringList(existingColumn.enumValuesDbt, column.enumValuesDbt), + }); + } + + return Array.from(seen.values()); + } + + private deduplicateRelationships(relationships: DbtParsedRelationship[]): DbtParsedRelationship[] { + const seen = new Set(); + const result: DbtParsedRelationship[] = []; + + for (const relationship of relationships) { + const key = + `${relationship.fromTable}.${relationship.fromColumn}->${relationship.toTable}.${relationship.toColumn}`.toLowerCase(); + if (!seen.has(key)) { + seen.add(key); + result.push(relationship); + } + } + + return result; + } + + private mergeFreshnessDbt( + existing?: DbtParsedTable['freshnessDbt'], + incoming?: DbtParsedTable['freshnessDbt'], + ): DbtParsedTable['freshnessDbt'] { + if (!existing && !incoming) { + return undefined; + } + const raw = existing?.raw !== undefined ? existing.raw : incoming?.raw; + const loadedAtField = existing?.loadedAtField ?? incoming?.loadedAtField; + return { + ...(raw !== undefined ? { raw } : {}), + ...(loadedAtField !== undefined ? { loadedAtField } : {}), + }; + } + + private mergeDbtConstraints( + existing?: DbtColumnConstraints, + incoming?: DbtColumnConstraints, + ): DbtColumnConstraints | undefined { + const notNull = !!(existing?.dbt.not_null || incoming?.dbt.not_null); + const unique = !!(existing?.dbt.unique || incoming?.dbt.unique); + if (!notNull && !unique) { + return undefined; + } + return { dbt: { ...(notNull ? { not_null: true } : {}), ...(unique ? { unique: true } : {}) } }; + } + + private mergeStringList(existing?: string[], incoming?: string[]): string[] | undefined { + return this.mergeTagsDbt(existing, incoming); + } + + private mergeDbtDataTests(existing?: DbtDataTestRef[], incoming?: DbtDataTestRef[]): DbtDataTestRef[] | undefined { + if (!existing?.length) { + return incoming?.length ? [...incoming] : undefined; + } + if (!incoming?.length) { + return [...existing]; + } + const tests = new Map(); + for (const test of [...existing, ...incoming]) { + const kwargsKey = + test.kwargs && Object.keys(test.kwargs).length > 0 + ? `:${createHash('sha256').update(JSON.stringify(test.kwargs)).digest('hex').slice(0, 16)}` + : ''; + tests.set(`${test.package}:${test.name}${kwargsKey}`, test); + } + return [...tests.values()]; + } + + private emptyResult(projectName: string | null): DbtSchemaParseResult { + return { + projectName, + dbtVersion: null, + tables: [], + relationships: [], + }; + } +} diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.test.ts b/packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.test.ts new file mode 100644 index 00000000..0b083213 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.test.ts @@ -0,0 +1,102 @@ +import { describe, expect, it } from 'vitest'; +import type { DbtSchemaParseResult } from './parse-schema.js'; +import { toDescriptionUpdates } from './to-description-updates.js'; +import type { DbtHostTableLite } from './match-tables.js'; + +const hostTables: DbtHostTableLite[] = [ + { + id: '1', + name: 'orders', + catalog: 'warehouse', + db: 'analytics', + columns: [ + { id: 'c1', name: 'id' }, + { id: 'c2', name: 'amount' }, + ], + }, +]; + +function parseResult(description: string | null, columnDescription: string | null): DbtSchemaParseResult { + return { + projectName: null, + dbtVersion: null, + relationships: [], + tables: [ + { + name: 'orders', + description, + database: 'warehouse', + schema: 'analytics', + resourceType: 'model', + columns: [ + { name: 'id', description: columnDescription, dataType: null }, + { name: 'missing', description: 'not imported', dataType: null }, + ], + }, + ], + }; +} + +describe('dbt descriptions update payloads', () => { + it('emits dbt writes and matching ai invalidations when descriptions exist', () => { + expect( + toDescriptionUpdates({ + connectionId: 'conn-1', + parseResult: parseResult('Orders table', 'Primary key'), + hostTables, + targetSchema: null, + }), + ).toEqual({ + dbt: [ + { + connectionId: 'conn-1', + table: { catalog: 'warehouse', db: 'analytics', name: 'orders' }, + source: 'dbt', + tableDescription: 'Orders table', + columnDescriptions: { id: 'Primary key' }, + }, + ], + aiInvalidations: [ + { + connectionId: 'conn-1', + table: { catalog: 'warehouse', db: 'analytics', name: 'orders' }, + source: 'ai', + }, + ], + }); + }); + + it('does not emit spurious dbt writes or ai invalidations when no descriptions exist', () => { + expect( + toDescriptionUpdates({ + connectionId: 'conn-1', + parseResult: parseResult(null, null), + hostTables, + targetSchema: null, + }), + ).toEqual({ dbt: [], aiInvalidations: [] }); + }); + + it('emits ai invalidation without a dbt description write when only structural metadata exists', () => { + const result = parseResult(null, null); + result.tables[0]!.tagsDbt = ['finance']; + + expect( + toDescriptionUpdates({ + connectionId: 'conn-1', + parseResult: result, + hostTables, + targetSchema: null, + }), + ).toEqual({ + dbt: [], + aiInvalidations: [ + { + connectionId: 'conn-1', + table: { catalog: 'warehouse', db: 'analytics', name: 'orders' }, + source: 'ai', + }, + ], + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.ts b/packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.ts new file mode 100644 index 00000000..b63c6e74 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/to-description-updates.ts @@ -0,0 +1,70 @@ +import type { KloDescriptionUpdate } from '../../../scan/enrichment-types.js'; +import { findMatchingKloTable, type DbtHostTableLite } from './match-tables.js'; +import type { DbtSchemaParseResult } from './parse-schema.js'; + +export interface DbtDescriptionUpdates { + dbt: KloDescriptionUpdate[]; + aiInvalidations: KloDescriptionUpdate[]; +} + +export function toDescriptionUpdates(input: { + connectionId: string; + parseResult: DbtSchemaParseResult; + hostTables: DbtHostTableLite[]; + targetSchema: string | null; +}): DbtDescriptionUpdates { + const dbt: KloDescriptionUpdate[] = []; + const aiInvalidations: KloDescriptionUpdate[] = []; + + for (const dbtTable of input.parseResult.tables) { + const hostTable = findMatchingKloTable(dbtTable, input.hostTables, input.targetSchema); + if (!hostTable) { + continue; + } + + const tableDescription = dbtTable.description ?? undefined; + const columnDescriptions: Record = {}; + + for (const dbtColumn of dbtTable.columns) { + if (!dbtColumn.description) { + continue; + } + const hostColumn = hostTable.columns.find( + (column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(), + ); + if (hostColumn) { + columnDescriptions[hostColumn.name] = dbtColumn.description; + } + } + + const hasColumnDescriptions = Object.keys(columnDescriptions).length > 0; + const hasDescriptionChange = tableDescription !== undefined || hasColumnDescriptions; + const hasMetadataChange = + !!dbtTable.tagsDbt?.length || + dbtTable.freshnessDbt !== undefined || + dbtTable.columns.some( + (column) => column.constraints !== undefined || !!column.enumValuesDbt?.length || !!column.dataTests?.length, + ); + if (!hasDescriptionChange && !hasMetadataChange) { + continue; + } + + const tableRef = { catalog: hostTable.catalog, db: hostTable.db, name: hostTable.name }; + if (hasDescriptionChange) { + dbt.push({ + connectionId: input.connectionId, + table: tableRef, + source: 'dbt', + ...(tableDescription !== undefined ? { tableDescription } : {}), + ...(hasColumnDescriptions ? { columnDescriptions } : {}), + }); + } + aiInvalidations.push({ + connectionId: input.connectionId, + table: tableRef, + source: 'ai', + }); + } + + return { dbt, aiInvalidations }; +} diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.test.ts b/packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.test.ts new file mode 100644 index 00000000..ee33b369 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from 'vitest'; +import { toMetadataUpdates } from './to-metadata-updates.js'; + +describe('toMetadataUpdates', () => { + it('emits source-keyed dbt metadata updates for matched tables and columns', () => { + const updates = toMetadataUpdates({ + connectionId: 'conn_1', + targetSchema: 'analytics', + hostTables: [ + { + id: 'orders-id', + name: 'orders', + catalog: 'warehouse', + db: 'analytics', + columns: [ + { id: 'status-id', name: 'status' }, + { id: 'created-id', name: 'created_at' }, + ], + }, + ], + parseResult: { + projectName: null, + dbtVersion: null, + relationships: [], + tables: [ + { + name: 'orders', + description: null, + database: 'warehouse', + schema: 'analytics', + resourceType: 'model', + tagsDbt: ['finance'], + freshnessDbt: { loadedAtField: 'created_at' }, + columns: [ + { + name: 'status', + description: null, + dataType: null, + enumValuesDbt: ['placed', 'shipped'], + constraints: { dbt: { not_null: true } }, + dataTests: [{ name: 'accepted_values', package: 'dbt', kwargs: { values: ['placed', 'shipped'] } }], + }, + ], + }, + ], + }, + }); + + expect(updates).toEqual([ + { + connectionId: 'conn_1', + table: { catalog: 'warehouse', db: 'analytics', name: 'orders' }, + source: 'dbt', + tableFields: { + tags: ['finance'], + freshness: { loaded_at_field: 'created_at' }, + }, + columnFields: { + status: { + constraints: { not_null: true }, + enum_values: ['placed', 'shipped'], + tests: [ + { name: 'accepted_values', package: 'dbt', kwargs: { values: ['placed', 'shipped'] } }, + ], + }, + }, + }, + ]); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.ts b/packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.ts new file mode 100644 index 00000000..ce7c7d82 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/to-metadata-updates.ts @@ -0,0 +1,74 @@ +import type { KloMetadataUpdate } from '../../../scan/enrichment-types.js'; +import { findMatchingKloTable, type DbtHostTableLite } from './match-tables.js'; +import type { DbtSchemaParseResult } from './parse-schema.js'; + +export function toMetadataUpdates(input: { + connectionId: string; + parseResult: DbtSchemaParseResult; + hostTables: DbtHostTableLite[]; + targetSchema: string | null; +}): KloMetadataUpdate[] { + const updates: KloMetadataUpdate[] = []; + + for (const dbtTable of input.parseResult.tables) { + const hostTable = findMatchingKloTable(dbtTable, input.hostTables, input.targetSchema); + if (!hostTable) { + continue; + } + + const tableFields: Record = {}; + if (dbtTable.tagsDbt?.length) { + tableFields.tags = dbtTable.tagsDbt; + } + if (dbtTable.freshnessDbt) { + tableFields.freshness = { + ...(dbtTable.freshnessDbt.raw !== undefined ? { raw: dbtTable.freshnessDbt.raw } : {}), + ...(dbtTable.freshnessDbt.loadedAtField !== undefined + ? { loaded_at_field: dbtTable.freshnessDbt.loadedAtField } + : {}), + }; + } + + const columnFields: Record> = {}; + for (const dbtColumn of dbtTable.columns) { + const hostColumn = hostTable.columns.find( + (column) => column.name.toLowerCase() === dbtColumn.name.toLowerCase(), + ); + if (!hostColumn) { + continue; + } + + const fields: Record = {}; + if (dbtColumn.constraints) { + fields.constraints = dbtColumn.constraints.dbt; + } + if (dbtColumn.enumValuesDbt?.length) { + fields.enum_values = dbtColumn.enumValuesDbt; + } + if (dbtColumn.dataTests?.length) { + fields.tests = dbtColumn.dataTests.map((test) => ({ + name: test.name, + package: test.package, + ...(test.kwargs ? { kwargs: test.kwargs } : {}), + })); + } + if (Object.keys(fields).length > 0) { + columnFields[hostColumn.name] = fields; + } + } + + if (Object.keys(tableFields).length === 0 && Object.keys(columnFields).length === 0) { + continue; + } + + updates.push({ + connectionId: input.connectionId, + table: { catalog: hostTable.catalog, db: hostTable.db, name: hostTable.name }, + source: 'dbt', + ...(Object.keys(tableFields).length > 0 ? { tableFields } : {}), + ...(Object.keys(columnFields).length > 0 ? { columnFields } : {}), + }); + } + + return updates; +} diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.test.ts b/packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.test.ts new file mode 100644 index 00000000..02fe5f63 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from 'vitest'; +import type { DbtHostTableLite } from './match-tables.js'; +import type { DbtSchemaParseResult } from './parse-schema.js'; +import { toRelationshipUpdates } from './to-relationship-updates.js'; + +const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join(''); + +const hostTables: DbtHostTableLite[] = [ + { + id: '1', + name: 'orders', + catalog: 'warehouse', + db: 'analytics', + columns: [{ id: 'c1', name: 'customer_id' }], + }, + { + id: '2', + name: 'customers', + catalog: 'warehouse', + db: 'staging', + columns: [{ id: 'c2', name: 'id' }], + }, +]; + +const parseResult: DbtSchemaParseResult = { + projectName: null, + dbtVersion: null, + tables: [], + relationships: [ + { + fromTable: 'orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + fromSchema: 'analytics', + toSchema: 'analytics', + description: 'schema intentionally differs from the host customers table', + }, + { fromTable: 'orders', fromColumn: 'missing', toTable: 'customers', toColumn: 'id' }, + { fromTable: 'orders', fromColumn: 'customer_id', toTable: 'missing_table', toColumn: 'id' }, + ], +}; + +describe('dbt relationship update payloads', () => { + it('validates relationships using the current name-only matching behavior and dbt provenance', () => { + expect(toRelationshipUpdates({ connectionId: 'conn-1', parseResult, hostTables })).toEqual({ + joins: [ + { + connectionId: 'conn-1', + fromTable: 'orders', + fromColumns: ['customer_id'], + toTable: 'customers', + toColumns: ['id'], + relationship: 'many_to_one', + author: 'dbt', + authorEmail: DBT_SYSTEM_EMAIL, + }, + ], + skippedNoMatch: 2, + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.ts b/packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.ts new file mode 100644 index 00000000..d09ce05f --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-descriptions/to-relationship-updates.ts @@ -0,0 +1,57 @@ +import type { KloJoinUpdate } from '../../../scan/enrichment-types.js'; +import type { DbtHostTableLite } from './match-tables.js'; +import type { DbtSchemaParseResult } from './parse-schema.js'; + +export interface DbtRelationshipUpdates { + joins: KloJoinUpdate[]; + skippedNoMatch: number; +} + +const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join(''); + +export function toRelationshipUpdates(input: { + connectionId: string; + parseResult: DbtSchemaParseResult; + hostTables: DbtHostTableLite[]; +}): DbtRelationshipUpdates { + const tablesByName = new Map(); + for (const table of input.hostTables) { + tablesByName.set(table.name.toLowerCase(), table); + } + + const joins: KloJoinUpdate[] = []; + let skippedNoMatch = 0; + + for (const relationship of input.parseResult.relationships) { + const fromTable = tablesByName.get(relationship.fromTable.toLowerCase()); + const toTable = tablesByName.get(relationship.toTable.toLowerCase()); + if (!fromTable || !toTable) { + skippedNoMatch++; + continue; + } + + const fromColumn = fromTable.columns.find( + (column) => column.name.toLowerCase() === relationship.fromColumn.toLowerCase(), + ); + const toColumn = toTable.columns.find( + (column) => column.name.toLowerCase() === relationship.toColumn.toLowerCase(), + ); + if (!fromColumn || !toColumn) { + skippedNoMatch++; + continue; + } + + joins.push({ + connectionId: input.connectionId, + fromTable: fromTable.name, + fromColumns: [fromColumn.name], + toTable: toTable.name, + toColumns: [toColumn.name], + relationship: 'many_to_one', + author: 'dbt', + authorEmail: DBT_SYSTEM_EMAIL, + }); + } + + return { joins, skippedNoMatch }; +} diff --git a/packages/context/src/ingest/adapters/dbt-extraction-golden-parity.test.ts b/packages/context/src/ingest/adapters/dbt-extraction-golden-parity.test.ts new file mode 100644 index 00000000..ee96377c --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt-extraction-golden-parity.test.ts @@ -0,0 +1,410 @@ +import { describe, expect, it } from 'vitest'; +import { type DbtHostTableLite, matchDbtTables } from './dbt-descriptions/match-tables.js'; +import { mergeSemanticModelTables } from './dbt-descriptions/merge-semantic-model-tables.js'; +import { parseDbtSchemaFiles } from './dbt-descriptions/parse-schema.js'; +import { toDescriptionUpdates } from './dbt-descriptions/to-description-updates.js'; +import { toRelationshipUpdates } from './dbt-descriptions/to-relationship-updates.js'; +import { parseMetricflowFiles } from './metricflow/deep-parse.js'; +import { mapCrossModelMetricToSource, mapSemanticModelToSource } from './metricflow/semantic-models.js'; + +const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join(''); + +const metricflowYaml = ` +semantic_models: + - name: orders_semantic + description: MetricFlow order facts + model: ref('fct_orders') + defaults: + agg_time_dimension: ordered_at + entities: + - name: customer + type: foreign + expr: customer_id + description: Customer relationship + dimensions: + - name: status + type: categorical + expr: status + description: Order status + - name: ordered_at + type: time + expr: ordered_at + measures: + - name: total_revenue + agg: sum + expr: amount + description: Revenue + - name: customers_semantic + description: Customer dimension + model: ref('dim_customers') + entities: + - name: customer + type: primary + expr: id + dimensions: + - name: country + type: categorical + expr: country + description: Customer country + measures: + - name: customer_count + agg: count + expr: id + description: Customer count +metrics: + - name: total_revenue + type: simple + type_params: + measure: total_revenue + - name: customer_count + type: simple + type_params: + measure: customer_count + - name: revenue_per_customer + description: Revenue per customer + type: derived + type_params: + expr: total_revenue / NULLIF(customer_count, 0) + metrics: + - name: total_revenue + alias: total_revenue + - name: customer_count + alias: customer_count +`; + +const schemaYaml = ` +version: 2 +sources: + - name: raw + database: warehouse + schema: landing + tables: + - name: customers + identifier: dim_customers + description: Raw customer dimension + columns: + - name: id + description: Customer primary key + - name: country + description: Country name +models: + - name: "{{ var('orders_model', 'fct_orders') }}" + schema: "{{ var('mart_schema', 'analytics') }}" + description: Modeled orders + columns: + - name: customer_id + description: Linked customer id + tests: + - relationships: + to: ref('dim_customers') + field: id + - name: status + description: Order status + - name: amount + description: Gross amount +`; + +const hostTables: DbtHostTableLite[] = [ + { + id: 'orders-table', + name: 'fct_orders', + catalog: 'warehouse', + db: 'analytics', + columns: [ + { id: 'orders-customer-id', name: 'customer_id' }, + { id: 'orders-status', name: 'status' }, + { id: 'orders-amount', name: 'amount' }, + { id: 'orders-ordered-at', name: 'ordered_at' }, + ], + }, + { + id: 'customers-table', + name: 'dim_customers', + catalog: 'warehouse', + db: 'landing', + columns: [ + { id: 'customers-id', name: 'id' }, + { id: 'customers-country', name: 'country' }, + ], + }, +]; + +describe('dbt extraction golden parity fixture', () => { + it('freezes the relocated MetricFlow and dbt-description contract together', () => { + const metricflow = parseMetricflowFiles([{ path: 'semantic_models/orders.yml', content: metricflowYaml }]); + + expect(metricflow).toEqual({ + semanticModels: [ + { + name: 'orders_semantic', + description: 'MetricFlow order facts', + modelRef: 'fct_orders', + dimensions: [ + { + name: 'status', + column: 'status', + type: 'string', + label: 'Status', + description: 'Order status', + }, + { + name: 'ordered_at', + column: 'ordered_at', + type: 'time', + label: 'Ordered At', + description: undefined, + }, + ], + measures: [ + { + type: 'simple', + name: 'total_revenue', + column: 'amount', + aggregation: 'sum', + label: 'Total Revenue', + description: 'Revenue', + }, + ], + entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id', description: 'Customer relationship' }], + defaultTimeDimension: 'ordered_at', + }, + { + name: 'customers_semantic', + description: 'Customer dimension', + modelRef: 'dim_customers', + dimensions: [ + { + name: 'country', + column: 'country', + type: 'string', + label: 'Country', + description: 'Customer country', + }, + ], + measures: [ + { + type: 'simple', + name: 'customer_count', + column: 'id', + aggregation: 'count', + label: 'Customer Count', + description: 'Customer count', + }, + ], + entities: [{ name: 'customer', type: 'primary', expr: 'id' }], + defaultTimeDimension: null, + }, + ], + crossModelMetrics: [ + { + name: 'revenue_per_customer', + label: null, + description: 'Revenue per customer', + type: 'derived', + expr: 'total_revenue / NULLIF(customer_count, 0)', + dependsOn: [ + { metricName: 'orders_semantic', alias: 'total_revenue' }, + { metricName: 'customers_semantic', alias: 'customer_count' }, + ], + filter: null, + }, + ], + relationships: [ + { + fromTable: 'fct_orders', + fromColumn: 'customer_id', + toTable: 'dim_customers', + toColumn: 'id', + description: 'Customer relationship', + }, + ], + warnings: [], + }); + + expect(mapSemanticModelToSource(metricflow.semanticModels[0], 'analytics.fct_orders')).toEqual({ + name: 'fct-orders', + table: 'analytics.fct_orders', + grain: ['status', 'ordered_at'], + columns: [ + { name: 'status', type: 'string', description: 'Order status' }, + { name: 'ordered_at', type: 'time' }, + ], + measures: [ + { + name: 'total_revenue', + expr: 'sum(amount)', + description: 'Revenue', + }, + ], + joins: [], + descriptions: { dbt: 'MetricFlow order facts' }, + }); + + expect(mapCrossModelMetricToSource(metricflow.crossModelMetrics[0])).toEqual({ + name: 'revenue-per-customer', + sql: 'total_revenue / NULLIF(customer_count, 0)', + descriptions: { dbt: 'Revenue per customer' }, + grain: [], + columns: [], + measures: [ + { + name: 'revenue_per_customer', + expr: 'total_revenue / NULLIF(customer_count, 0)', + description: 'Revenue per customer', + }, + ], + joins: [], + }); + + const schema = parseDbtSchemaFiles( + [{ path: 'models/schema.yml', content: schemaYaml }], + new Map([ + ['orders_model', 'fct_orders'], + ['mart_schema', 'analytics'], + ]), + ); + const merged = mergeSemanticModelTables(schema, metricflow.semanticModels); + + expect(merged).toEqual({ + projectName: null, + dbtVersion: null, + tables: [ + { + name: 'dim_customers', + description: 'Raw customer dimension', + database: 'warehouse', + schema: 'landing', + columns: [ + { name: 'id', description: 'Customer primary key', dataType: null }, + { name: 'country', description: 'Country name', dataType: null }, + ], + resourceType: 'source', + }, + { + name: 'fct_orders', + description: 'Modeled orders', + database: null, + schema: 'analytics', + columns: [ + { + name: 'customer_id', + description: 'Linked customer id', + dataType: null, + dataTests: [ + { + name: 'relationships', + package: 'dbt', + kwargs: { to: "ref('dim_customers')", field: 'id' }, + }, + ], + }, + { name: 'status', description: 'Order status', dataType: null }, + { name: 'amount', description: 'Gross amount', dataType: null }, + ], + resourceType: 'model', + }, + ], + relationships: [ + { + fromTable: 'fct_orders', + fromColumn: 'customer_id', + toTable: 'dim_customers', + toColumn: 'id', + fromSchema: 'analytics', + }, + ], + }); + + expect(matchDbtTables(merged.tables, hostTables, 'analytics')).toEqual([ + { + dbtTable: 'dim_customers', + dbtSchema: 'landing', + dbtDatabase: 'warehouse', + hostTableId: 'customers-table', + hostTableName: 'dim_customers', + matched: true, + tableDescriptionAction: 'import', + tableDescriptionFound: true, + columnsToImport: 2, + columnsMatched: 2, + columnsTotal: 2, + columnDescriptionsFound: 2, + }, + { + dbtTable: 'fct_orders', + dbtSchema: 'analytics', + dbtDatabase: null, + hostTableId: 'orders-table', + hostTableName: 'fct_orders', + matched: true, + tableDescriptionAction: 'import', + tableDescriptionFound: true, + columnsToImport: 3, + columnsMatched: 3, + columnsTotal: 3, + columnDescriptionsFound: 3, + }, + ]); + + expect( + toDescriptionUpdates({ + connectionId: 'warehouse-1', + parseResult: merged, + hostTables, + targetSchema: 'analytics', + }), + ).toEqual({ + dbt: [ + { + connectionId: 'warehouse-1', + table: { catalog: 'warehouse', db: 'landing', name: 'dim_customers' }, + source: 'dbt', + tableDescription: 'Raw customer dimension', + columnDescriptions: { + id: 'Customer primary key', + country: 'Country name', + }, + }, + { + connectionId: 'warehouse-1', + table: { catalog: 'warehouse', db: 'analytics', name: 'fct_orders' }, + source: 'dbt', + tableDescription: 'Modeled orders', + columnDescriptions: { + customer_id: 'Linked customer id', + status: 'Order status', + amount: 'Gross amount', + }, + }, + ], + aiInvalidations: [ + { + connectionId: 'warehouse-1', + table: { catalog: 'warehouse', db: 'landing', name: 'dim_customers' }, + source: 'ai', + }, + { + connectionId: 'warehouse-1', + table: { catalog: 'warehouse', db: 'analytics', name: 'fct_orders' }, + source: 'ai', + }, + ], + }); + + expect(toRelationshipUpdates({ connectionId: 'warehouse-1', parseResult: merged, hostTables })).toEqual({ + joins: [ + { + connectionId: 'warehouse-1', + fromTable: 'fct_orders', + fromColumns: ['customer_id'], + toTable: 'dim_customers', + toColumns: ['id'], + relationship: 'many_to_one', + author: 'dbt', + authorEmail: DBT_SYSTEM_EMAIL, + }, + ], + skippedNoMatch: 0, + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt/chunk.test.ts b/packages/context/src/ingest/adapters/dbt/chunk.test.ts new file mode 100644 index 00000000..6eece2ac --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/chunk.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it } from 'vitest'; +import { chunkDbtProject } from './chunk.js'; + +describe('chunkDbtProject', () => { + const diffSet = (modified: string[]) => ({ added: [], modified, deleted: [], unchanged: [] }); + + it('caps peerFileIndex when the project has very many yaml files', () => { + const modelPaths = Array.from({ length: 201 }, (_, i) => `models/m${i}.yml`); + const allPaths = ['dbt_project.yml', ...modelPaths].sort(); + const { workUnits } = chunkDbtProject({ allPaths }); + const [first] = workUnits; + expect(first).toBeDefined(); + expect(first?.peerFileIndex).toHaveLength(200); + expect(first?.notes).toMatch(/capped at 200/); + }); + + it('keeps large-project model work units when dbt_project.yml changes', () => { + const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`); + const allPaths = ['dbt_project.yml', ...modelPaths].sort(); + const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['dbt_project.yml']) }); + + expect(workUnits).toHaveLength(30); + expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']); + expect(workUnits[0]?.dependencyPaths).toContain('dbt_project.yml'); + }); + + it('keeps large-project model work units when non-model yaml peers change', () => { + const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`); + const allPaths = ['dbt_project.yml', 'seeds/seed_properties.yml', ...modelPaths].sort(); + const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['seeds/seed_properties.yml']) }); + + expect(workUnits).toHaveLength(30); + expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']); + expect(workUnits[0]?.dependencyPaths).toContain('seeds/seed_properties.yml'); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt/chunk.ts b/packages/context/src/ingest/adapters/dbt/chunk.ts new file mode 100644 index 00000000..f5651459 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/chunk.ts @@ -0,0 +1,130 @@ +import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js'; +import type { ParsedDbtProject } from './parse.js'; + +interface ChunkOptions { + diffSet?: DiffSet; +} + +/** + * Per-model work units (when the project has more than 25 YAML files) only name `rawFiles` under + * `models/**`. Other `.yml` (e.g. some `seeds/` or custom layouts) still appear in `peerFileIndex` + * or in the small-project / no-models fallbacks — v1 does not emit one WU per non-models file. + */ +const MODELS_PREFIX = 'models/'; + +/** `peerFileIndex` is a hint only (agents may not read those paths). Cap to limit prompt size. */ +const MAX_PEER_FILE_INDEX = 200; + +function projectYamlPath(allPaths: string[]): string | undefined { + if (allPaths.includes('dbt_project.yml')) { + return 'dbt_project.yml'; + } + if (allPaths.includes('dbt_project.yaml')) { + return 'dbt_project.yaml'; + } + return undefined; +} + +function modelRelativePaths(allPaths: string[]): string[] { + return allPaths.filter((p) => p.replace(/\\/g, '/').startsWith(MODELS_PREFIX)).sort(); +} + +function unitKeyForModelFile(mf: string): string { + const base = mf + .replace(/\.(ya?ml)$/i, '') + .replace(/\\/g, '/') + .replace(/[^a-zA-Z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); + return `dbt-${base.toLowerCase()}`; +} + +function emitFirstRunWorkUnits(allPaths: string[], dbtDep: string | undefined): WorkUnit[] { + if (allPaths.length === 0) { + return []; + } + + if (allPaths.length <= 25) { + return [ + { + unitKey: 'dbt-all', + displayLabel: 'dbt project (all yaml)', + rawFiles: [...allPaths], + peerFileIndex: [], + dependencyPaths: [], + notes: 'dbt project — all YAML in one WorkUnit (≤25 files)', + }, + ]; + } + + const modelFiles = modelRelativePaths(allPaths); + if (modelFiles.length === 0) { + return [ + { + unitKey: 'dbt-all', + displayLabel: 'dbt project (all yaml, no models/**)', + rawFiles: [...allPaths], + peerFileIndex: [], + dependencyPaths: dbtDep ? [dbtDep] : [], + notes: 'dbt: no models/**/*.yml — single slice with dbt_project as dependency if present', + }, + ]; + } + + return modelFiles.map((mf) => { + const allPeers = allPaths.filter((p) => p !== mf).sort(); + const truncated = allPeers.length > MAX_PEER_FILE_INDEX; + const peerFileIndex = truncated ? allPeers.slice(0, MAX_PEER_FILE_INDEX) : allPeers; + const dependencyPaths = dbtDep && allPaths.includes(dbtDep) && mf !== dbtDep ? [dbtDep].sort() : []; + const notes = truncated + ? `dbt model schema slice (peer index capped at ${MAX_PEER_FILE_INDEX} of ${allPeers.length} paths)` + : 'dbt model schema slice'; + return { + unitKey: unitKeyForModelFile(mf), + displayLabel: `dbt ${mf}`, + rawFiles: [mf], + peerFileIndex, + dependencyPaths: dependencyPaths, + notes, + }; + }); +} + +function applyDiffSet(firstRunUnits: WorkUnit[], diffSet: DiffSet): ChunkResult { + const touched = new Set([...diffSet.added, ...diffSet.modified]); + const kept: WorkUnit[] = []; + + for (const wu of firstRunUnits) { + const touchedRawFiles = wu.rawFiles.filter((p) => touched.has(p)); + const touchedDependencies = wu.dependencyPaths.filter((p) => touched.has(p)); + const touchedPeerFiles = wu.peerFileIndex.filter((p) => touched.has(p)); + if (touchedRawFiles.length === 0 && touchedDependencies.length === 0 && touchedPeerFiles.length === 0) { + continue; + } + + const rawFiles = touchedRawFiles.length > 0 ? touchedRawFiles : wu.rawFiles; + const unchangedRaw = touchedRawFiles.length > 0 ? wu.rawFiles.filter((p) => !touched.has(p)) : []; + for (const p of wu.rawFiles) { + if (!rawFiles.includes(p) && !unchangedRaw.includes(p)) { + unchangedRaw.push(p); + } + } + const combinedDeps = new Set([...wu.dependencyPaths, ...unchangedRaw, ...touchedPeerFiles]); + kept.push({ + ...wu, + rawFiles: rawFiles.sort(), + dependencyPaths: [...combinedDeps].sort(), + }); + } + + const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined; + return { workUnits: kept, eviction }; +} + +export function chunkDbtProject(project: ParsedDbtProject, opts: ChunkOptions = {}): ChunkResult { + const dbtDep = projectYamlPath(project.allPaths); + const firstRun = emitFirstRunWorkUnits(project.allPaths, dbtDep); + if (!opts.diffSet) { + return { workUnits: firstRun }; + } + return applyDiffSet(firstRun, opts.diffSet); +} diff --git a/packages/context/src/ingest/adapters/dbt/dbt.adapter.test.ts b/packages/context/src/ingest/adapters/dbt/dbt.adapter.test.ts new file mode 100644 index 00000000..dad64232 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/dbt.adapter.test.ts @@ -0,0 +1,51 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { SourceAdapter } from '../../types.js'; +import { DbtSourceAdapter } from './dbt.adapter.js'; + +describe('DbtSourceAdapter', () => { + let stagedDir: string; + let adapter: SourceAdapter; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'dbt-adapter-')); + adapter = new DbtSourceAdapter(); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('declares the expected source key and skill list', () => { + expect(adapter.source).toBe('dbt'); + expect(adapter.skillNames).toEqual(['dbt_ingest']); + }); + + it('detects a staged dbt project root (dbt_project.yml)', async () => { + await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\nversion: '1.0.0'\n", 'utf-8'); + expect(await adapter.detect(stagedDir)).toBe(true); + }); + + it('chunk: dbt_project.yml + models/a.yml yields one WU (≤25 files)', async () => { + await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\n", 'utf-8'); + await mkdir(join(stagedDir, 'models'), { recursive: true }); + await writeFile( + join(stagedDir, 'models/a.yml'), + 'version: 2\nmodels:\n - name: orders\n description: Orders\n', + 'utf-8', + ); + const result = await adapter.chunk(stagedDir); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0].unitKey).toBe('dbt-all'); + expect(result.parseArtifacts).toMatchObject({ + projectName: 'jaffle', + tables: [{ name: 'orders', description: 'Orders' }], + }); + }); + + it('implements fetch() for git-backed dbt source setup', () => { + expect(adapter.fetch).toBeTypeOf('function'); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt/dbt.adapter.ts b/packages/context/src/ingest/adapters/dbt/dbt.adapter.ts new file mode 100644 index 00000000..feb31b28 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/dbt.adapter.ts @@ -0,0 +1,48 @@ +import { join } from 'node:path'; +import type { ChunkResult, DiffSet, SourceAdapter } from '../../types.js'; +import type { FetchContext } from '../../types.js'; +import { loadProjectInfo } from '../../dbt-shared/project-vars.js'; +import { loadDbtSchemaFiles } from '../../dbt-shared/schema-files.js'; +import { parseDbtSchemaFiles } from '../dbt-descriptions/parse-schema.js'; +import { chunkDbtProject } from './chunk.js'; +import { detectDbtStagedDir } from './detect.js'; +import { fetchDbtRepo, type DbtPullConfig } from './fetch.js'; +import { parseDbtStagedDir } from './parse.js'; + +interface DbtSourceAdapterOptions { + homeDir?: string; +} + +export class DbtSourceAdapter implements SourceAdapter { + readonly source = 'dbt' as const; + /** Runner merges: ingest_triage, sl_capture, knowledge_capture (see ingest-bundle.runner.ts) */ + readonly skillNames: string[] = ['dbt_ingest']; + + constructor(private readonly options: DbtSourceAdapterOptions = {}) {} + + detect(stagedDir: string): Promise { + return detectDbtStagedDir(stagedDir); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = pullConfig as DbtPullConfig | undefined; + if (!config?.repoUrl) { + throw new Error('dbt fetch requires repoUrl'); + } + await fetchDbtRepo({ + config, + cacheDir: join(this.options.homeDir ?? '.klo/cache', 'dbt', ctx.connectionId), + stagedDir, + }); + } + + async chunk(stagedDir: string, diffSet?: DiffSet): Promise { + const project = await parseDbtStagedDir(stagedDir); + const projectInfo = await loadProjectInfo(stagedDir); + const schemaFiles = await loadDbtSchemaFiles(stagedDir); + const parseArtifacts = parseDbtSchemaFiles(schemaFiles, projectInfo.variables, { + projectName: projectInfo.projectName, + }); + return { ...chunkDbtProject(project, { diffSet }), parseArtifacts }; + } +} diff --git a/packages/context/src/ingest/adapters/dbt/detect.ts b/packages/context/src/ingest/adapters/dbt/detect.ts new file mode 100644 index 00000000..9c21fa38 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/detect.ts @@ -0,0 +1,12 @@ +import { access } from 'node:fs/promises'; +import { join } from 'node:path'; + +export async function detectDbtStagedDir(stagedDir: string): Promise { + for (const name of ['dbt_project.yml', 'dbt_project.yaml'] as const) { + try { + await access(join(stagedDir, name)); + return true; + } catch {} + } + return false; +} diff --git a/packages/context/src/ingest/adapters/dbt/fetch.test.ts b/packages/context/src/ingest/adapters/dbt/fetch.test.ts new file mode 100644 index 00000000..8905d1af --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/fetch.test.ts @@ -0,0 +1,38 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { fetchDbtRepo } from './fetch.js'; + +describe('fetchDbtRepo', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-dbt-fetch-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('copies dbt yaml files from a fetched repo subpath into staged dir', async () => { + const cacheDir = join(tempDir, 'cache'); + const stagedDir = join(tempDir, 'staged'); + await mkdir(join(cacheDir, 'analytics', 'models'), { recursive: true }); + await writeFile(join(cacheDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n', 'utf-8'); + await writeFile(join(cacheDir, 'analytics', 'models', 'orders.yml'), 'models: []\n', 'utf-8'); + const cloneOrPull = vi.fn(async () => ({ commitHash: 'abc123' })); + + await expect( + fetchDbtRepo({ + config: { repoUrl: 'https://github.com/acme/dbt.git', path: 'analytics' }, + cacheDir, + stagedDir, + deps: { cloneOrPull }, + }), + ).resolves.toEqual({ commitHash: 'abc123', filesCopied: 2 }); + + await expect(readFile(join(stagedDir, 'dbt_project.yml'), 'utf-8')).resolves.toContain('analytics'); + await expect(readFile(join(stagedDir, 'models', 'orders.yml'), 'utf-8')).resolves.toContain('models'); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt/fetch.ts b/packages/context/src/ingest/adapters/dbt/fetch.ts new file mode 100644 index 00000000..9a44035c --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/fetch.ts @@ -0,0 +1,60 @@ +import { access, copyFile, mkdir, readdir } from 'node:fs/promises'; +import { dirname, join, relative } from 'node:path'; +import { cloneOrPull, sanitizeRepoError } from '../../repo-fetch.js'; + +export interface DbtPullConfig { + repoUrl: string; + branch?: string; + path?: string; + authToken?: string | null; +} + +export interface FetchDbtRepoParams { + config: DbtPullConfig; + cacheDir: string; + stagedDir: string; + deps?: { + cloneOrPull?: typeof cloneOrPull; + }; +} + +export async function fetchDbtRepo(params: FetchDbtRepoParams): Promise<{ commitHash: string; filesCopied: number }> { + try { + const runCloneOrPull = params.deps?.cloneOrPull ?? cloneOrPull; + const { commitHash } = await runCloneOrPull({ + repoUrl: params.config.repoUrl, + authToken: params.config.authToken, + cacheDir: params.cacheDir, + branch: params.config.branch ?? 'main', + }); + const sourceRoot = params.config.path ? join(params.cacheDir, params.config.path) : params.cacheDir; + const filesCopied = await copyYamlFilesRecursive(sourceRoot, params.stagedDir); + return { commitHash, filesCopied }; + } catch (error) { + throw new Error(sanitizeRepoError(error, params.config.authToken)); + } +} + +async function copyYamlFilesRecursive(sourceRoot: string, destRoot: string): Promise { + try { + await access(sourceRoot); + } catch { + return 0; + } + + await mkdir(destRoot, { recursive: true }); + const entries = await readdir(sourceRoot, { withFileTypes: true, recursive: true }); + let copied = 0; + for (const entry of entries) { + if (!entry.isFile() || !/\.ya?ml$/i.test(entry.name)) { + continue; + } + const absSrc = join(entry.parentPath, entry.name); + const rel = relative(sourceRoot, absSrc); + const dest = join(destRoot, rel); + await mkdir(dirname(dest), { recursive: true }); + await copyFile(absSrc, dest); + copied += 1; + } + return copied; +} diff --git a/packages/context/src/ingest/adapters/dbt/parse.test.ts b/packages/context/src/ingest/adapters/dbt/parse.test.ts new file mode 100644 index 00000000..f373fd5b --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/parse.test.ts @@ -0,0 +1,8 @@ +import { describe, expect, it } from 'vitest'; +import { normalizeDbtPath } from './parse.js'; + +describe('normalizeDbtPath', () => { + it('normalizes Windows separators to POSIX separators', () => { + expect(normalizeDbtPath('models\\marts\\orders.yml')).toBe('models/marts/orders.yml'); + }); +}); diff --git a/packages/context/src/ingest/adapters/dbt/parse.ts b/packages/context/src/ingest/adapters/dbt/parse.ts new file mode 100644 index 00000000..2402df36 --- /dev/null +++ b/packages/context/src/ingest/adapters/dbt/parse.ts @@ -0,0 +1,32 @@ +import { readdir } from 'node:fs/promises'; +import { join, relative } from 'node:path'; + +const YAML_EXT_RE = /\.(ya?ml)$/i; + +export function normalizeDbtPath(path: string): string { + return path.replaceAll('\\', '/'); +} + +async function collectYamlFiles(stagedDir: string): Promise { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + const paths: string[] = []; + for (const entry of entries) { + if (!entry.isFile() || !YAML_EXT_RE.test(entry.name)) { + continue; + } + const abs = join(entry.parentPath, entry.name); + paths.push(normalizeDbtPath(relative(stagedDir, abs))); + } + paths.sort(); + return paths; +} + +export interface ParsedDbtProject { + /** All `.yml` / `.yaml` paths under stagedDir, relative + sorted. */ + allPaths: string[]; +} + +export async function parseDbtStagedDir(stagedDir: string): Promise { + const allPaths = await collectYamlFiles(stagedDir); + return { allPaths }; +} diff --git a/packages/context/src/ingest/adapters/fake/fake.adapter.ts b/packages/context/src/ingest/adapters/fake/fake.adapter.ts new file mode 100644 index 00000000..690b1f86 --- /dev/null +++ b/packages/context/src/ingest/adapters/fake/fake.adapter.ts @@ -0,0 +1,48 @@ +import { readdir } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import type { ChunkResult, DiffSet, SourceAdapter, WorkUnit } from '../../types.js'; + +export class FakeSourceAdapter implements SourceAdapter { + readonly source = 'fake'; + readonly skillNames: string[] = []; + + detect(): Promise { + return Promise.resolve(true); + } + + async chunk(stagedDir: string, diffSet?: DiffSet): Promise { + const subDirs = (await readdir(stagedDir, { withFileTypes: true })) + .filter((e) => e.isDirectory()) + .map((e) => e.name) + .sort(); + + const workUnits: WorkUnit[] = []; + for (const subDir of subDirs) { + const entries = await readdir(join(stagedDir, subDir), { withFileTypes: true, recursive: true }); + const rawFiles = entries + .filter((e) => e.isFile()) + .map((e) => relative(stagedDir, join(e.parentPath, e.name))) + .sort(); + if (rawFiles.length === 0) { + continue; + } + if (diffSet) { + const touched = new Set([...diffSet.added, ...diffSet.modified]); + const anyTouched = rawFiles.some((p) => touched.has(p)); + if (!anyTouched) { + continue; + } + } + workUnits.push({ + unitKey: `fake-${subDir}`, + displayLabel: subDir, + rawFiles, + peerFileIndex: [], + dependencyPaths: [], + }); + } + + const eviction = diffSet && diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted] } : undefined; + return { workUnits, eviction }; + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/eviction-churn/input.json b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/eviction-churn/input.json new file mode 100644 index 00000000..6be4e518 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/eviction-churn/input.json @@ -0,0 +1,146 @@ +{ + "name": "eviction-churn", + "now": "2026-05-08T12:00:00.000Z", + "connectionId": "warehouse", + "probe": { + "pgServerVersion": "PostgreSQL 16.4", + "warnings": [ + "pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn" + ] + }, + "snapshot": { + "statsResetAt": "2026-05-08T08:00:00.000Z", + "deallocCount": 3, + "rows": [ + { + "queryid": "501", + "userid": "11", + "username": "analyst", + "dbid": "5", + "database": "analytics", + "query": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "calls": 20, + "totalExecTime": 500, + "meanExecTime": 25, + "totalRows": 40 + } + ] + }, + "pullConfig": { + "dialect": "postgres", + "windowDays": 90, + "lastSuccessfulCursor": null, + "serviceAccountUserPatterns": [], + "redactionPatterns": [], + "maxTemplatesPerRun": 5000, + "minCalls": 5 + }, + "analysisBySql": { + "SELECT count(*) FROM analytics.orders WHERE status = $1": { + "fingerprint": "fp_orders_status", + "normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "tablesTouched": [ + "analytics.orders" + ], + "literalSlots": [] + } + }, + "baseline": null, + "expectedBaseline": { + "version": 1, + "fetchedAt": "2026-05-08T12:00:00.000Z", + "statsResetAt": "2026-05-08T08:00:00.000Z", + "pgServerVersion": "PostgreSQL 16.4", + "templates": { + "db5_q501": { + "firstObservedAt": "2026-05-08T12:00:00.000Z", + "perUser": { + "11": { + "calls": 20, + "totalExecTime": 500, + "totalRows": 40 + } + } + } + } + }, + "expectedFiles": { + "manifest.json": { + "json": { + "source": "historic-sql", + "connectionId": "warehouse", + "dialect": "postgres", + "fetchedAt": "2026-05-08T12:00:00.000Z", + "windowStart": "2026-05-08T08:00:00.000Z", + "windowEnd": "2026-05-08T12:00:00.000Z", + "nextSuccessfulCursor": "2026-05-08T12:00:00.000Z", + "templateCount": 1, + "capped": false, + "warnings": [ + "pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn", + "pgss_dealloc_count:3; pg_stat_statements.max may be too low, causing template eviction churn", + "baseline_first_run:no_previous_pgss_baseline" + ], + "degraded": true, + "statsResetAt": "2026-05-08T08:00:00.000Z", + "baselineFirstRun": true, + "pgServerVersion": "PostgreSQL 16.4", + "deallocCount": 3, + "templates": [ + { + "id": "db5_q501", + "fingerprint": "fp_orders_status", + "subClusterId": null, + "path": "templates/db5_q501/page.md" + } + ] + } + }, + "templates/db5_q501/metadata.json": { + "json": { + "id": "db5_q501", + "title": "postgres · analytics.orders [db5_q501]", + "path": "templates/db5_q501/page.md", + "objectType": "historic_sql_template", + "lastEditedAt": null, + "properties": { + "fingerprint": "fp_orders_status", + "sub_cluster_id": null, + "dialect": "postgres", + "tables_touched": [ + "analytics.orders" + ], + "literal_slots": [], + "triage_signals": { + "executions_bucket": "mid", + "distinct_users_bucket": "solo", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "runtime_bucket": "fast" + } + } + } + }, + "templates/db5_q501/page.md": { + "text": "# db5_q501\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n" + }, + "templates/db5_q501/usage.json": { + "json": { + "stats": { + "executions": 20, + "distinct_users": 1, + "first_seen": "2026-05-08T12:00:00.000Z", + "last_seen": "2026-05-08T12:00:00.000Z", + "p50_runtime_ms": null, + "p95_runtime_ms": null, + "mean_runtime_ms": 25, + "error_rate": 0, + "rows_produced": 40 + }, + "literal_slots": [], + "samples": [] + } + } + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/first-run/input.json b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/first-run/input.json new file mode 100644 index 00000000..5835ab3a --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/first-run/input.json @@ -0,0 +1,144 @@ +{ + "name": "first-run", + "now": "2026-05-08T12:00:00.000Z", + "connectionId": "warehouse", + "probe": { + "pgServerVersion": "PostgreSQL 16.4", + "warnings": [] + }, + "snapshot": { + "statsResetAt": "2026-05-08T08:00:00.000Z", + "deallocCount": 0, + "rows": [ + { + "queryid": "101", + "userid": "11", + "username": "analyst", + "dbid": "5", + "database": "analytics", + "query": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "calls": 10, + "totalExecTime": 250, + "meanExecTime": 25, + "totalRows": 20 + } + ] + }, + "pullConfig": { + "dialect": "postgres", + "windowDays": 90, + "lastSuccessfulCursor": null, + "serviceAccountUserPatterns": [ + "^svc_" + ], + "redactionPatterns": [], + "maxTemplatesPerRun": 5000, + "minCalls": 5 + }, + "analysisBySql": { + "SELECT count(*) FROM analytics.orders WHERE status = $1": { + "fingerprint": "fp_orders_status", + "normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "tablesTouched": [ + "analytics.orders" + ], + "literalSlots": [] + } + }, + "baseline": null, + "expectedBaseline": { + "version": 1, + "fetchedAt": "2026-05-08T12:00:00.000Z", + "statsResetAt": "2026-05-08T08:00:00.000Z", + "pgServerVersion": "PostgreSQL 16.4", + "templates": { + "db5_q101": { + "firstObservedAt": "2026-05-08T12:00:00.000Z", + "perUser": { + "11": { + "calls": 10, + "totalExecTime": 250, + "totalRows": 20 + } + } + } + } + }, + "expectedFiles": { + "manifest.json": { + "json": { + "source": "historic-sql", + "connectionId": "warehouse", + "dialect": "postgres", + "fetchedAt": "2026-05-08T12:00:00.000Z", + "windowStart": "2026-05-08T08:00:00.000Z", + "windowEnd": "2026-05-08T12:00:00.000Z", + "nextSuccessfulCursor": "2026-05-08T12:00:00.000Z", + "templateCount": 1, + "capped": false, + "warnings": [ + "baseline_first_run:no_previous_pgss_baseline" + ], + "degraded": true, + "statsResetAt": "2026-05-08T08:00:00.000Z", + "baselineFirstRun": true, + "pgServerVersion": "PostgreSQL 16.4", + "deallocCount": 0, + "templates": [ + { + "id": "db5_q101", + "fingerprint": "fp_orders_status", + "subClusterId": null, + "path": "templates/db5_q101/page.md" + } + ] + } + }, + "templates/db5_q101/metadata.json": { + "json": { + "id": "db5_q101", + "title": "postgres · analytics.orders [db5_q101]", + "path": "templates/db5_q101/page.md", + "objectType": "historic_sql_template", + "lastEditedAt": null, + "properties": { + "fingerprint": "fp_orders_status", + "sub_cluster_id": null, + "dialect": "postgres", + "tables_touched": [ + "analytics.orders" + ], + "literal_slots": [], + "triage_signals": { + "executions_bucket": "mid", + "distinct_users_bucket": "solo", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "runtime_bucket": "fast" + } + } + } + }, + "templates/db5_q101/page.md": { + "text": "# db5_q101\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n" + }, + "templates/db5_q101/usage.json": { + "json": { + "stats": { + "executions": 10, + "distinct_users": 1, + "first_seen": "2026-05-08T12:00:00.000Z", + "last_seen": "2026-05-08T12:00:00.000Z", + "p50_runtime_ms": null, + "p95_runtime_ms": null, + "mean_runtime_ms": 25, + "error_rate": 0, + "rows_produced": 20 + }, + "literal_slots": [], + "samples": [] + } + } + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/normal-delta/input.json b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/normal-delta/input.json new file mode 100644 index 00000000..2cc386da --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/normal-delta/input.json @@ -0,0 +1,181 @@ +{ + "name": "normal-delta", + "now": "2026-05-08T12:00:00.000Z", + "connectionId": "warehouse", + "probe": { + "pgServerVersion": "PostgreSQL 16.4", + "warnings": [] + }, + "snapshot": { + "statsResetAt": "2026-05-08T08:00:00.000Z", + "deallocCount": 0, + "rows": [ + { + "queryid": "201", + "userid": "11", + "username": "analyst", + "dbid": "5", + "database": "analytics", + "query": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "calls": 12, + "totalExecTime": 160, + "meanExecTime": 13.333333333333334, + "totalRows": 58 + }, + { + "queryid": "201", + "userid": "12", + "username": "svc_loader", + "dbid": "5", + "database": "analytics", + "query": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "calls": 5, + "totalExecTime": 50, + "meanExecTime": 10, + "totalRows": 25 + } + ] + }, + "pullConfig": { + "dialect": "postgres", + "windowDays": 90, + "lastSuccessfulCursor": null, + "serviceAccountUserPatterns": [ + "^svc_" + ], + "redactionPatterns": [], + "maxTemplatesPerRun": 5000, + "minCalls": 5 + }, + "analysisBySql": { + "SELECT count(*) FROM analytics.orders WHERE status = $1": { + "fingerprint": "fp_orders_status", + "normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "tablesTouched": [ + "analytics.orders" + ], + "literalSlots": [] + } + }, + "baseline": { + "version": 1, + "fetchedAt": "2026-05-08T10:00:00.000Z", + "statsResetAt": "2026-05-08T08:00:00.000Z", + "pgServerVersion": "PostgreSQL 16.4", + "templates": { + "db5_q201": { + "firstObservedAt": "2026-05-08T09:00:00.000Z", + "perUser": { + "11": { + "calls": 10, + "totalExecTime": 100, + "totalRows": 50 + }, + "12": { + "calls": 5, + "totalExecTime": 50, + "totalRows": 25 + } + } + } + } + }, + "expectedBaseline": { + "version": 1, + "fetchedAt": "2026-05-08T12:00:00.000Z", + "statsResetAt": "2026-05-08T08:00:00.000Z", + "pgServerVersion": "PostgreSQL 16.4", + "templates": { + "db5_q201": { + "firstObservedAt": "2026-05-08T09:00:00.000Z", + "perUser": { + "11": { + "calls": 12, + "totalExecTime": 160, + "totalRows": 58 + }, + "12": { + "calls": 5, + "totalExecTime": 50, + "totalRows": 25 + } + } + } + } + }, + "expectedFiles": { + "manifest.json": { + "json": { + "source": "historic-sql", + "connectionId": "warehouse", + "dialect": "postgres", + "fetchedAt": "2026-05-08T12:00:00.000Z", + "windowStart": "2026-05-08T10:00:00.000Z", + "windowEnd": "2026-05-08T12:00:00.000Z", + "nextSuccessfulCursor": "2026-05-08T12:00:00.000Z", + "templateCount": 1, + "capped": false, + "warnings": [], + "degraded": true, + "statsResetAt": "2026-05-08T08:00:00.000Z", + "baselineFirstRun": false, + "pgServerVersion": "PostgreSQL 16.4", + "deallocCount": 0, + "templates": [ + { + "id": "db5_q201", + "fingerprint": "fp_orders_status", + "subClusterId": null, + "path": "templates/db5_q201/page.md" + } + ] + } + }, + "templates/db5_q201/metadata.json": { + "json": { + "id": "db5_q201", + "title": "postgres · analytics.orders [db5_q201]", + "path": "templates/db5_q201/page.md", + "objectType": "historic_sql_template", + "lastEditedAt": null, + "properties": { + "fingerprint": "fp_orders_status", + "sub_cluster_id": null, + "dialect": "postgres", + "tables_touched": [ + "analytics.orders" + ], + "literal_slots": [], + "triage_signals": { + "executions_bucket": "low", + "distinct_users_bucket": "solo", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "runtime_bucket": "fast" + } + } + } + }, + "templates/db5_q201/page.md": { + "text": "# db5_q201\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n" + }, + "templates/db5_q201/usage.json": { + "json": { + "stats": { + "executions": 2, + "distinct_users": 1, + "first_seen": "2026-05-08T09:00:00.000Z", + "last_seen": "2026-05-08T12:00:00.000Z", + "p50_runtime_ms": null, + "p95_runtime_ms": null, + "mean_runtime_ms": 30, + "error_rate": 0, + "rows_produced": 8 + }, + "literal_slots": [], + "samples": [] + } + } + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/reset-detected/input.json b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/reset-detected/input.json new file mode 100644 index 00000000..f2e0b16f --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/reset-detected/input.json @@ -0,0 +1,159 @@ +{ + "name": "reset-detected", + "now": "2026-05-08T12:00:00.000Z", + "connectionId": "warehouse", + "probe": { + "pgServerVersion": "PostgreSQL 16.4", + "warnings": [] + }, + "snapshot": { + "statsResetAt": "2026-05-08T11:00:00.000Z", + "deallocCount": 0, + "rows": [ + { + "queryid": "301", + "userid": "11", + "username": "analyst", + "dbid": "5", + "database": "analytics", + "query": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "calls": 3, + "totalExecTime": 90, + "meanExecTime": 30, + "totalRows": 9 + } + ] + }, + "pullConfig": { + "dialect": "postgres", + "windowDays": 90, + "lastSuccessfulCursor": null, + "serviceAccountUserPatterns": [], + "redactionPatterns": [], + "maxTemplatesPerRun": 5000, + "minCalls": 5 + }, + "analysisBySql": { + "SELECT count(*) FROM analytics.orders WHERE status = $1": { + "fingerprint": "fp_orders_status", + "normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "tablesTouched": [ + "analytics.orders" + ], + "literalSlots": [] + } + }, + "baseline": { + "version": 1, + "fetchedAt": "2026-05-08T10:00:00.000Z", + "statsResetAt": "2026-05-08T08:00:00.000Z", + "pgServerVersion": "PostgreSQL 16.4", + "templates": { + "db5_q301": { + "firstObservedAt": "2026-05-08T09:00:00.000Z", + "perUser": { + "11": { + "calls": 100, + "totalExecTime": 1000, + "totalRows": 500 + } + } + } + } + }, + "expectedBaseline": { + "version": 1, + "fetchedAt": "2026-05-08T12:00:00.000Z", + "statsResetAt": "2026-05-08T11:00:00.000Z", + "pgServerVersion": "PostgreSQL 16.4", + "templates": { + "db5_q301": { + "firstObservedAt": "2026-05-08T12:00:00.000Z", + "perUser": { + "11": { + "calls": 3, + "totalExecTime": 90, + "totalRows": 9 + } + } + } + } + }, + "expectedFiles": { + "manifest.json": { + "json": { + "source": "historic-sql", + "connectionId": "warehouse", + "dialect": "postgres", + "fetchedAt": "2026-05-08T12:00:00.000Z", + "windowStart": "2026-05-08T10:00:00.000Z", + "windowEnd": "2026-05-08T12:00:00.000Z", + "nextSuccessfulCursor": "2026-05-08T12:00:00.000Z", + "templateCount": 1, + "capped": false, + "warnings": [ + "baseline_reset:stats_reset advanced from 2026-05-08T08:00:00.000Z to 2026-05-08T11:00:00.000Z" + ], + "degraded": true, + "statsResetAt": "2026-05-08T11:00:00.000Z", + "baselineFirstRun": true, + "pgServerVersion": "PostgreSQL 16.4", + "deallocCount": 0, + "templates": [ + { + "id": "db5_q301", + "fingerprint": "fp_orders_status", + "subClusterId": null, + "path": "templates/db5_q301/page.md" + } + ] + } + }, + "templates/db5_q301/metadata.json": { + "json": { + "id": "db5_q301", + "title": "postgres · analytics.orders [db5_q301]", + "path": "templates/db5_q301/page.md", + "objectType": "historic_sql_template", + "lastEditedAt": null, + "properties": { + "fingerprint": "fp_orders_status", + "sub_cluster_id": null, + "dialect": "postgres", + "tables_touched": [ + "analytics.orders" + ], + "literal_slots": [], + "triage_signals": { + "executions_bucket": "mid", + "distinct_users_bucket": "solo", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "runtime_bucket": "fast" + } + } + } + }, + "templates/db5_q301/page.md": { + "text": "# db5_q301\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n" + }, + "templates/db5_q301/usage.json": { + "json": { + "stats": { + "executions": 3, + "distinct_users": 1, + "first_seen": "2026-05-08T12:00:00.000Z", + "last_seen": "2026-05-08T12:00:00.000Z", + "p50_runtime_ms": null, + "p95_runtime_ms": null, + "mean_runtime_ms": 30, + "error_rate": 0, + "rows_produced": 9 + }, + "literal_slots": [], + "samples": [] + } + } + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/version-change/input.json b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/version-change/input.json new file mode 100644 index 00000000..1618e3ca --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/__fixtures__/postgres/version-change/input.json @@ -0,0 +1,159 @@ +{ + "name": "version-change", + "now": "2026-05-08T12:00:00.000Z", + "connectionId": "warehouse", + "probe": { + "pgServerVersion": "PostgreSQL 16.4", + "warnings": [] + }, + "snapshot": { + "statsResetAt": "2026-05-08T08:00:00.000Z", + "deallocCount": 0, + "rows": [ + { + "queryid": "401", + "userid": "11", + "username": "analyst", + "dbid": "5", + "database": "analytics", + "query": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "calls": 4, + "totalExecTime": 80, + "meanExecTime": 20, + "totalRows": 8 + } + ] + }, + "pullConfig": { + "dialect": "postgres", + "windowDays": 90, + "lastSuccessfulCursor": null, + "serviceAccountUserPatterns": [], + "redactionPatterns": [], + "maxTemplatesPerRun": 5000, + "minCalls": 5 + }, + "analysisBySql": { + "SELECT count(*) FROM analytics.orders WHERE status = $1": { + "fingerprint": "fp_orders_status", + "normalizedSql": "SELECT count(*) FROM analytics.orders WHERE status = $1", + "tablesTouched": [ + "analytics.orders" + ], + "literalSlots": [] + } + }, + "baseline": { + "version": 1, + "fetchedAt": "2026-05-08T10:00:00.000Z", + "statsResetAt": "2026-05-08T08:00:00.000Z", + "pgServerVersion": "PostgreSQL 15.7", + "templates": { + "db5_q401": { + "firstObservedAt": "2026-05-08T09:00:00.000Z", + "perUser": { + "11": { + "calls": 100, + "totalExecTime": 1000, + "totalRows": 500 + } + } + } + } + }, + "expectedBaseline": { + "version": 1, + "fetchedAt": "2026-05-08T12:00:00.000Z", + "statsResetAt": "2026-05-08T08:00:00.000Z", + "pgServerVersion": "PostgreSQL 16.4", + "templates": { + "db5_q401": { + "firstObservedAt": "2026-05-08T12:00:00.000Z", + "perUser": { + "11": { + "calls": 4, + "totalExecTime": 80, + "totalRows": 8 + } + } + } + } + }, + "expectedFiles": { + "manifest.json": { + "json": { + "source": "historic-sql", + "connectionId": "warehouse", + "dialect": "postgres", + "fetchedAt": "2026-05-08T12:00:00.000Z", + "windowStart": "2026-05-08T10:00:00.000Z", + "windowEnd": "2026-05-08T12:00:00.000Z", + "nextSuccessfulCursor": "2026-05-08T12:00:00.000Z", + "templateCount": 1, + "capped": false, + "warnings": [ + "baseline_reset:pg_server_major changed from 15 to 16" + ], + "degraded": true, + "statsResetAt": "2026-05-08T08:00:00.000Z", + "baselineFirstRun": true, + "pgServerVersion": "PostgreSQL 16.4", + "deallocCount": 0, + "templates": [ + { + "id": "db5_q401", + "fingerprint": "fp_orders_status", + "subClusterId": null, + "path": "templates/db5_q401/page.md" + } + ] + } + }, + "templates/db5_q401/metadata.json": { + "json": { + "id": "db5_q401", + "title": "postgres · analytics.orders [db5_q401]", + "path": "templates/db5_q401/page.md", + "objectType": "historic_sql_template", + "lastEditedAt": null, + "properties": { + "fingerprint": "fp_orders_status", + "sub_cluster_id": null, + "dialect": "postgres", + "tables_touched": [ + "analytics.orders" + ], + "literal_slots": [], + "triage_signals": { + "executions_bucket": "mid", + "distinct_users_bucket": "solo", + "error_rate_bucket": "ok", + "recency_bucket": "active", + "service_account_only": "false", + "runtime_bucket": "fast" + } + } + } + }, + "templates/db5_q401/page.md": { + "text": "# db5_q401\n\n## Normalized SQL\n```sql\nSELECT count(*) FROM analytics.orders WHERE status = $1\n```\n\n## Tables touched\n- analytics.orders\n" + }, + "templates/db5_q401/usage.json": { + "json": { + "stats": { + "executions": 4, + "distinct_users": 1, + "first_seen": "2026-05-08T12:00:00.000Z", + "last_seen": "2026-05-08T12:00:00.000Z", + "p50_runtime_ms": null, + "p95_runtime_ms": null, + "mean_runtime_ms": 20, + "error_rate": 0, + "rows_produced": 8 + }, + "literal_slots": [], + "samples": [] + } + } + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts b/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts new file mode 100644 index 00000000..e0a5e07d --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts @@ -0,0 +1,200 @@ +import { describe, expect, it, vi } from 'vitest'; +import { BigQueryHistoricSqlQueryHistoryReader } from './bigquery-query-history-reader.js'; +import { HistoricSqlGrantsMissingError } from './errors.js'; + +interface FakeQueryResult { + headers: string[]; + rows: unknown[][]; + totalRows: number; + error?: string; +} + +function queryClient(results: FakeQueryResult[]) { + const executeQuery = vi.fn(async (_query: string) => { + const next = results.shift(); + if (!next) { + throw new Error('unexpected query'); + } + return next; + }); + return { executeQuery }; +} + +function firstQuery(client: ReturnType): string { + const call = client.executeQuery.mock.calls[0]; + if (!call) { + throw new Error('expected query client to be called'); + } + return call[0]; +} + +describe('BigQueryHistoricSqlQueryHistoryReader', () => { + it('probes region-qualified INFORMATION_SCHEMA.JOBS_BY_PROJECT', async () => { + const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]); + const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' }); + + await expect(reader.probe(client)).resolves.toBeUndefined(); + + expect(client.executeQuery).toHaveBeenCalledWith( + 'SELECT 1 FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT` LIMIT 1', + ); + }); + + it('turns probe result errors into HistoricSqlGrantsMissingError', async () => { + const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Access Denied: jobs.listAll' }]); + const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'us-central1' }); + + await expect(reader.probe(client)).rejects.toMatchObject({ + name: 'HistoricSqlGrantsMissingError', + dialect: 'bigquery', + remediation: + 'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.', + }); + }); + + it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => { + const client = { + executeQuery: vi.fn(async () => { + throw new Error('permission denied'); + }), + }; + const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' }); + + await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError); + }); + + it('fetches BigQuery jobs with cursor and maps them into RawQueryRow shape without rowsProduced', async () => { + const client = queryClient([ + { + headers: [ + 'job_id', + 'query', + 'user_email', + 'creation_time', + 'end_time', + 'runtime_ms', + 'total_slot_ms', + 'total_bytes_processed', + 'state', + 'error_reason', + 'error_message', + 'statement_type', + ], + rows: [ + [ + 'bquxjob_1', + "SELECT COUNT(*) FROM `project-1.analytics.orders` WHERE status = 'paid'", + 'analyst-a@example.test', + '2026-05-04T10:00:00.000Z', + '2026-05-04T10:00:01.250Z', + 1250, + 3106, + 161164718, + 'DONE', + null, + null, + 'SELECT', + ], + [ + 'bquxjob_2', + 'SELECT * FROM `project-1.analytics.missing_table`', + 'analyst-b@example.test', + new Date('2026-05-04T10:05:00.000Z'), + null, + null, + 0, + 0, + 'DONE', + 'notFound', + 'Not found: Table project-1.analytics.missing_table', + 'SELECT', + ], + ], + totalRows: 2, + }, + ]); + const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' }); + + const rows = []; + for await (const row of reader.fetch( + client, + { + start: new Date('2026-05-01T00:00:00.000Z'), + end: new Date('2026-05-04T12:00:00.000Z'), + }, + '2026-05-03T00:00:00.000Z', + )) { + rows.push(row); + } + + expect(client.executeQuery).toHaveBeenCalledTimes(1); + const sql = firstQuery(client); + expect(sql).toContain('FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT`'); + expect(sql).toContain("creation_time >= TIMESTAMP('2026-05-03T00:00:00.000Z')"); + expect(sql).toContain("creation_time < TIMESTAMP('2026-05-04T12:00:00.000Z')"); + expect(sql).toContain("job_type = 'QUERY'"); + expect(sql).toContain("(statement_type IS NULL OR statement_type != 'SCRIPT')"); + expect(sql).toContain('ORDER BY creation_time ASC, job_id ASC'); + expect(sql).toContain('total_slot_ms'); + expect(sql).toContain('total_bytes_processed'); + expect(sql).not.toMatch(/total_rows/i); + + expect(rows).toEqual([ + { + id: 'bquxjob_1', + sql: "SELECT COUNT(*) FROM `project-1.analytics.orders` WHERE status = 'paid'", + user: 'analyst-a@example.test', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: '2026-05-04T10:00:01.250Z', + runtimeMs: 1250, + success: true, + errorMessage: null, + }, + { + id: 'bquxjob_2', + sql: 'SELECT * FROM `project-1.analytics.missing_table`', + user: 'analyst-b@example.test', + startedAt: '2026-05-04T10:05:00.000Z', + endedAt: null, + runtimeMs: null, + success: false, + errorMessage: 'notFound: Not found: Table project-1.analytics.missing_table', + }, + ]); + }); + + it('uses the window start when no cursor is available', async () => { + const client = queryClient([{ headers: ['job_id'], rows: [], totalRows: 0 }]); + const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'EU' }); + + for await (const _row of reader.fetch(client, { + start: new Date('2026-02-03T12:00:00.000Z'), + end: new Date('2026-05-04T12:00:00.000Z'), + })) { + throw new Error('empty result should not yield rows'); + } + + const sql = firstQuery(client); + expect(sql).toContain('FROM `project-1.region-eu.INFORMATION_SCHEMA.JOBS_BY_PROJECT`'); + expect(sql).toContain("creation_time >= TIMESTAMP('2026-02-03T12:00:00.000Z')"); + }); + + it('throws a clear error when the query client cannot execute SQL', async () => { + const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' }); + + await expect(async () => { + for await (const _row of reader.fetch({}, { start: new Date(), end: new Date() })) { + throw new Error('unreachable'); + } + }).rejects.toThrow('Historic SQL BigQuery reader requires a query client with executeQuery(query)'); + }); + + it('rejects unsafe project and region identifiers before building SQL', () => { + expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project`1', region: 'US' })).toThrow( + 'Invalid BigQuery project id for historic-SQL ingest: project`1', + ); + expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US;DROP' })).toThrow( + 'Invalid BigQuery region for historic-SQL ingest: US;DROP', + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.ts b/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.ts new file mode 100644 index 00000000..ea8fb00e --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.ts @@ -0,0 +1,219 @@ +import { HistoricSqlGrantsMissingError } from './errors.js'; +import type { HistoricSqlQueryHistoryReader, HistoricSqlRawQueryRow, HistoricSqlTimeWindow } from './types.js'; + +interface QueryResultLike { + headers: string[]; + rows: unknown[][]; + totalRows: number; + error?: string; +} + +interface QueryClientLike { + executeQuery(query: string): Promise; +} + +export interface BigQueryHistoricSqlQueryHistoryReaderOptions { + projectId: string; + region: string; +} + +const BIGQUERY_GRANTS_REMEDIATION = + 'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.'; + +function queryClient(client: unknown): QueryClientLike { + if ( + client && + typeof client === 'object' && + 'executeQuery' in client && + typeof (client as { executeQuery?: unknown }).executeQuery === 'function' + ) { + return client as QueryClientLike; + } + throw new Error('Historic SQL BigQuery reader requires a query client with executeQuery(query)'); +} + +function grantsError(cause: unknown): HistoricSqlGrantsMissingError { + const message = + cause instanceof Error + ? cause.message + : typeof cause === 'string' + ? cause + : 'BigQuery principal cannot query INFORMATION_SCHEMA.JOBS_BY_PROJECT.'; + return new HistoricSqlGrantsMissingError({ + dialect: 'bigquery', + message: `Missing BigQuery audit grants for historic-SQL ingest: ${message}`, + remediation: BIGQUERY_GRANTS_REMEDIATION, + cause, + }); +} + +function normalizeProjectId(value: string): string { + if (!/^[A-Za-z0-9_-]+$/.test(value)) { + throw new Error(`Invalid BigQuery project id for historic-SQL ingest: ${value}`); + } + return value; +} + +function normalizeRegion(value: string): string { + const region = value.trim().toLowerCase().replace(/^region-/, ''); + if (!/^[a-z0-9-]+$/.test(region)) { + throw new Error(`Invalid BigQuery region for historic-SQL ingest: ${value}`); + } + return region; +} + +function timestampExpression(value: Date | string): string { + const date = value instanceof Date ? value : new Date(value); + if (Number.isNaN(date.getTime())) { + throw new Error(`Invalid BigQuery query-history timestamp: ${String(value)}`); + } + return `TIMESTAMP('${date.toISOString().replace(/'/g, "\\'")}')`; +} + +function indexByHeader(headers: string[]): Map { + const out = new Map(); + headers.forEach((header, index) => { + out.set(header.toUpperCase(), index); + }); + return out; +} + +function value(row: unknown[], indexes: Map, name: string): unknown { + const index = indexes.get(name.toUpperCase()); + return index === undefined ? null : row[index]; +} + +function nullableString(raw: unknown): string | null { + if (raw === null || raw === undefined) { + return null; + } + const text = String(raw); + return text.length > 0 ? text : null; +} + +function requiredString(raw: unknown, field: string): string { + const text = nullableString(raw); + if (!text) { + throw new Error(`BigQuery JOBS_BY_PROJECT row is missing ${field}`); + } + return text; +} + +function nullableNumber(raw: unknown): number | null { + if (raw === null || raw === undefined || raw === '') { + return null; + } + const number = typeof raw === 'number' ? raw : Number(raw); + if (!Number.isFinite(number)) { + return null; + } + return Math.max(0, number); +} + +function isoTimestamp(raw: unknown, field: string): string { + if (raw instanceof Date) { + return raw.toISOString(); + } + const text = requiredString(raw, field); + const date = new Date(text); + if (Number.isNaN(date.getTime())) { + throw new Error(`BigQuery JOBS_BY_PROJECT row has invalid ${field}: ${text}`); + } + return date.toISOString(); +} + +function nullableIsoTimestamp(raw: unknown): string | null { + if (raw === null || raw === undefined || raw === '') { + return null; + } + return isoTimestamp(raw, 'end_time'); +} + +function executionSucceeded(state: string | null, errorReason: string | null, errorMessage: string | null): boolean { + if (errorReason || errorMessage) { + return false; + } + return state === null || state.toUpperCase() === 'DONE'; +} + +function combinedErrorMessage(errorReason: string | null, errorMessage: string | null): string | null { + if (errorReason && errorMessage) { + return `${errorReason}: ${errorMessage}`; + } + return errorMessage ?? errorReason; +} + +function mapRow(row: unknown[], indexes: Map): HistoricSqlRawQueryRow { + const errorReason = nullableString(value(row, indexes, 'error_reason')); + const errorMessage = nullableString(value(row, indexes, 'error_message')); + return { + id: requiredString(value(row, indexes, 'job_id'), 'job_id'), + sql: requiredString(value(row, indexes, 'query'), 'query'), + user: nullableString(value(row, indexes, 'user_email')), + startedAt: isoTimestamp(value(row, indexes, 'creation_time'), 'creation_time'), + endedAt: nullableIsoTimestamp(value(row, indexes, 'end_time')), + runtimeMs: nullableNumber(value(row, indexes, 'runtime_ms')), + success: executionSucceeded(nullableString(value(row, indexes, 'state')), errorReason, errorMessage), + errorMessage: combinedErrorMessage(errorReason, errorMessage), + }; +} + +export class BigQueryHistoricSqlQueryHistoryReader implements HistoricSqlQueryHistoryReader { + private readonly viewPath: string; + + constructor(options: BigQueryHistoricSqlQueryHistoryReaderOptions) { + const projectId = normalizeProjectId(options.projectId); + const region = normalizeRegion(options.region); + this.viewPath = `\`${projectId}.region-${region}.INFORMATION_SCHEMA.JOBS_BY_PROJECT\``; + } + + async probe(client: unknown): Promise { + let result: QueryResultLike; + try { + result = await queryClient(client).executeQuery(`SELECT 1 FROM ${this.viewPath} LIMIT 1`); + } catch (error) { + throw grantsError(error); + } + if (result.error) { + throw grantsError(result.error); + } + } + + async *fetch( + client: unknown, + window: HistoricSqlTimeWindow, + cursor?: string | null, + ): AsyncIterable { + const start = timestampExpression(cursor ?? window.start); + const end = timestampExpression(window.end); + const sql = ` +SELECT + job_id, + query, + user_email, + creation_time, + end_time, + TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND) AS runtime_ms, + total_slot_ms, + total_bytes_processed, + state, + error_result.reason AS error_reason, + error_result.message AS error_message, + statement_type +FROM ${this.viewPath} +WHERE creation_time >= ${start} + AND creation_time < ${end} + AND job_type = 'QUERY' + AND query IS NOT NULL + AND (statement_type IS NULL OR statement_type != 'SCRIPT') +ORDER BY creation_time ASC, job_id ASC`.trim(); + const result = await queryClient(client).executeQuery(sql); + if (result.error) { + throw grantsError(result.error); + } + const indexes = indexByHeader(result.headers); + for (const row of result.rows) { + yield mapRow(row, indexes); + } + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/chunk.test.ts b/packages/context/src/ingest/adapters/historic-sql/chunk.test.ts new file mode 100644 index 00000000..a7941c65 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/chunk.test.ts @@ -0,0 +1,251 @@ +import { mkdir, mkdtemp, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { chunkHistoricSqlStagedDir, describeHistoricSqlScope } from './chunk.js'; + +async function tempDir(): Promise { + return mkdtemp(join(tmpdir(), 'historic-sql-chunk-')); +} + +async function writeJson(root: string, relPath: string, value: unknown): Promise { + const target = join(root, relPath); + await mkdir(join(target, '..'), { recursive: true }); + await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +async function writeTemplate(root: string): Promise { + await writeJson(root, 'manifest.json', { + source: 'historic-sql', + connectionId: 'conn_1', + dialect: 'snowflake', + fetchedAt: '2026-05-04T12:00:00.000Z', + windowStart: '2026-02-03T12:00:00.000Z', + windowEnd: '2026-05-04T12:00:00.000Z', + nextSuccessfulCursor: '2026-05-04T11:55:00.000Z', + templateCount: 1, + capped: false, + warnings: ['source warning'], + templates: [{ id: 'fp_1', fingerprint: 'fp_1', subClusterId: null, path: 'templates/fp_1/page.md' }], + }); + await writeJson(root, 'templates/fp_1/metadata.json', { + id: 'fp_1', + title: 'snowflake · analytics.orders [fp_1]', + path: 'templates/fp_1/page.md', + objectType: 'historic_sql_template', + lastEditedAt: null, + properties: { + fingerprint: 'fp_1', + sub_cluster_id: null, + dialect: 'snowflake', + tables_touched: ['analytics.orders'], + literal_slots: [{ position: 1, type: 'string', classification: 'constant' }], + triage_signals: { + executions_bucket: 'high', + distinct_users_bucket: 'team', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '1 constant, 0 runtime', + }, + }, + }); + await writeFile(join(root, 'templates/fp_1/page.md'), '# fp_1\n', 'utf-8'); + await writeJson(root, 'templates/fp_1/usage.json', { + stats: { + executions: 20, + distinct_users: 3, + first_seen: '2026-05-01T00:00:00.000Z', + last_seen: '2026-05-04T11:55:00.000Z', + p50_runtime_ms: 100, + p95_runtime_ms: 200, + error_rate: 0, + rows_produced: 20, + }, + literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 20]] }], + samples: [], + }); +} + +async function writeSubclusterTemplates(root: string): Promise { + await writeJson(root, 'manifest.json', { + source: 'historic-sql', + connectionId: 'conn_1', + dialect: 'snowflake', + fetchedAt: '2026-05-04T12:00:00.000Z', + windowStart: '2026-02-03T12:00:00.000Z', + windowEnd: '2026-05-04T12:00:00.000Z', + nextSuccessfulCursor: '2026-05-04T11:55:00.000Z', + templateCount: 2, + capped: false, + warnings: [], + templates: [ + { + id: 'fp_order_status__cat_2b2ff2318877', + fingerprint: 'fp_order_status', + subClusterId: 'cat_2b2ff2318877', + path: 'templates/fp_order_status__cat_2b2ff2318877/page.md', + }, + { + id: 'fp_order_status__cat_34f037ddcbfa', + fingerprint: 'fp_order_status', + subClusterId: 'cat_34f037ddcbfa', + path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md', + }, + ], + }); + + for (const template of [ + { id: 'fp_order_status__cat_2b2ff2318877', subClusterId: 'cat_2b2ff2318877' }, + { id: 'fp_order_status__cat_34f037ddcbfa', subClusterId: 'cat_34f037ddcbfa' }, + ]) { + await writeJson(root, `templates/${template.id}/metadata.json`, { + id: template.id, + title: `snowflake · analytics.orders [fp_ord:${template.subClusterId.slice(-6)}]`, + path: `templates/${template.id}/page.md`, + objectType: 'historic_sql_template', + lastEditedAt: null, + properties: { + fingerprint: 'fp_order_status', + sub_cluster_id: template.subClusterId, + dialect: 'snowflake', + tables_touched: ['analytics.orders'], + literal_slots: [{ position: 1, type: 'string', classification: 'categorical' }], + triage_signals: { + executions_bucket: 'mid', + distinct_users_bucket: 'team', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '0 constant, 0 runtime', + }, + }, + }); + await writeFile(join(root, `templates/${template.id}/page.md`), `# ${template.id}\n`, 'utf-8'); + await writeJson(root, `templates/${template.id}/usage.json`, { + stats: { + executions: 3, + distinct_users: 3, + first_seen: '2026-05-04T10:00:00.000Z', + last_seen: '2026-05-04T10:05:00.000Z', + p50_runtime_ms: 120, + p95_runtime_ms: 150, + error_rate: 0, + rows_produced: 36, + }, + literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 3]] }], + samples: [], + }); + } +} + +describe('chunkHistoricSqlStagedDir', () => { + it('emits one WorkUnit per changed template and keeps usage as dependency', async () => { + const stagedDir = await tempDir(); + await writeTemplate(stagedDir); + + const result = await chunkHistoricSqlStagedDir(stagedDir, { + added: ['templates/fp_1/metadata.json'], + modified: [], + deleted: [], + unchanged: ['templates/fp_1/page.md', 'templates/fp_1/usage.json', 'manifest.json'], + }); + + expect(result.workUnits).toEqual([ + { + unitKey: 'historic-sql-fp-1', + displayLabel: 'snowflake · analytics.orders [fp_1]', + rawFiles: ['templates/fp_1/metadata.json'], + dependencyPaths: ['manifest.json', 'templates/fp_1/usage.json'], + peerFileIndex: ['templates/fp_1/page.md'], + notes: + 'Infer canonical query intent for this single historic-SQL template only. Read metadata.json, page.md, and usage.json for this template; do not group sibling templates in this WorkUnit.', + }, + ]); + expect(result.contextReport).toEqual({ capped: false, warnings: ['source warning'] }); + }); + + it('emits one WorkUnit per changed categorical sub-cluster', async () => { + const stagedDir = await tempDir(); + await writeSubclusterTemplates(stagedDir); + + const result = await chunkHistoricSqlStagedDir(stagedDir, { + added: [ + 'templates/fp_order_status__cat_2b2ff2318877/metadata.json', + 'templates/fp_order_status__cat_34f037ddcbfa/metadata.json', + ], + modified: [], + deleted: [], + unchanged: [ + 'manifest.json', + 'templates/fp_order_status__cat_2b2ff2318877/page.md', + 'templates/fp_order_status__cat_2b2ff2318877/usage.json', + 'templates/fp_order_status__cat_34f037ddcbfa/page.md', + 'templates/fp_order_status__cat_34f037ddcbfa/usage.json', + ], + }); + + expect( + result.workUnits.map((unit) => ({ + unitKey: unit.unitKey, + displayLabel: unit.displayLabel, + rawFiles: unit.rawFiles, + dependencyPaths: unit.dependencyPaths, + })), + ).toEqual([ + { + unitKey: 'historic-sql-fp-order-status-cat-2b2ff2318877', + displayLabel: 'snowflake · analytics.orders [fp_ord:318877]', + rawFiles: ['templates/fp_order_status__cat_2b2ff2318877/metadata.json'], + dependencyPaths: ['manifest.json', 'templates/fp_order_status__cat_2b2ff2318877/usage.json'], + }, + { + unitKey: 'historic-sql-fp-order-status-cat-34f037ddcbfa', + displayLabel: 'snowflake · analytics.orders [fp_ord:ddcbfa]', + rawFiles: ['templates/fp_order_status__cat_34f037ddcbfa/metadata.json'], + dependencyPaths: ['manifest.json', 'templates/fp_order_status__cat_34f037ddcbfa/usage.json'], + }, + ]); + }); + + it('emits zero WorkUnits for usage-only diffs', async () => { + const stagedDir = await tempDir(); + await writeTemplate(stagedDir); + + const result = await chunkHistoricSqlStagedDir(stagedDir, { + added: [], + modified: ['templates/fp_1/usage.json'], + deleted: [], + unchanged: ['templates/fp_1/metadata.json', 'templates/fp_1/page.md', 'manifest.json'], + }); + + expect(result.workUnits).toEqual([]); + expect(result.eviction).toBeUndefined(); + }); + + it('emits eviction only for deleted metadata or page files', async () => { + const stagedDir = await tempDir(); + await writeTemplate(stagedDir); + + const result = await chunkHistoricSqlStagedDir(stagedDir, { + added: [], + modified: [], + deleted: ['templates/fp_1/usage.json', 'templates/fp_2/page.md'], + unchanged: [], + }); + + expect(result.eviction).toEqual({ deletedRawPaths: ['templates/fp_2/page.md'] }); + }); + + it('describes historic-sql scope without including unrelated paths', async () => { + const stagedDir = await tempDir(); + await writeTemplate(stagedDir); + + const scope = await describeHistoricSqlScope(stagedDir); + + expect(scope.fingerprint).toHaveLength(64); + expect(scope.isPathInScope('manifest.json')).toBe(true); + expect(scope.isPathInScope('templates/fp_1/usage.json')).toBe(true); + expect(scope.isPathInScope('pages/notion/page.md')).toBe(false); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/chunk.ts b/packages/context/src/ingest/adapters/historic-sql/chunk.ts new file mode 100644 index 00000000..5d959bc0 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/chunk.ts @@ -0,0 +1,86 @@ +import { createHash } from 'node:crypto'; +import { readFile, readdir } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js'; +import { historicSqlManifestSchema, historicSqlMetadataSchema } from './types.js'; + +async function walk(root: string): Promise { + const entries = await readdir(root, { withFileTypes: true, recursive: true }); + return entries + .filter((entry) => entry.isFile()) + .map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/')) + .sort(); +} + +function safeUnitKey(id: string): string { + return `historic-sql-${id.replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`; +} + +async function readManifest(stagedDir: string) { + try { + return historicSqlManifestSchema.parse(JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8'))); + } catch (error) { + throw new Error(`Invalid historic-SQL manifest: ${error instanceof Error ? error.message : String(error)}`); + } +} + +export async function chunkHistoricSqlStagedDir(stagedDir: string, diffSet?: DiffSet): Promise { + const files = await walk(stagedDir); + const manifest = await readManifest(stagedDir); + const touched = diffSet ? new Set([...diffSet.added, ...diffSet.modified]) : null; + const workUnits: WorkUnit[] = []; + + for (const pagePath of files.filter((path) => /^templates\/[^/]+\/page\.md$/.test(path))) { + const metadataPath = pagePath.replace(/\/page\.md$/, '/metadata.json'); + const usagePath = pagePath.replace(/\/page\.md$/, '/usage.json'); + const primary = [metadataPath, pagePath].filter((path) => files.includes(path)); + if (touched && !primary.some((path) => touched.has(path))) { + continue; + } + + const metadata = historicSqlMetadataSchema.parse(JSON.parse(await readFile(join(stagedDir, metadataPath), 'utf-8'))); + const rawFiles = touched ? primary.filter((path) => touched.has(path)).sort() : primary.sort(); + const dependencyPaths = ['manifest.json', files.includes(usagePath) ? usagePath : null] + .filter((path): path is string => typeof path === 'string' && !rawFiles.includes(path)) + .sort(); + const excluded = new Set([...rawFiles, ...dependencyPaths]); + const peerFileIndex = files.filter((path) => !excluded.has(path)).sort(); + + workUnits.push({ + unitKey: safeUnitKey(metadata.id), + displayLabel: metadata.title, + rawFiles, + dependencyPaths, + peerFileIndex, + notes: + 'Infer canonical query intent for this single historic-SQL template only. Read metadata.json, page.md, and usage.json for this template; do not group sibling templates in this WorkUnit.', + }); + } + + const deletedPrimary = diffSet?.deleted.filter((path) => /^templates\/[^/]+\/(metadata\.json|page\.md)$/.test(path)); + + return { + workUnits, + eviction: deletedPrimary && deletedPrimary.length > 0 ? { deletedRawPaths: deletedPrimary.sort() } : undefined, + reconcileNotes: [`Historic-SQL staged templates=${manifest.templateCount}`], + contextReport: { + capped: manifest.capped, + warnings: manifest.warnings, + }, + }; +} + +export async function describeHistoricSqlScope(stagedDir: string): Promise { + const manifest = await readManifest(stagedDir); + const scopeKey = JSON.stringify({ + connectionId: manifest.connectionId, + dialect: manifest.dialect, + windowStart: manifest.windowStart, + windowEnd: manifest.windowEnd, + }); + const fingerprint = createHash('sha256').update(scopeKey).digest('hex'); + return { + fingerprint, + isPathInScope: (rawPath) => rawPath === 'manifest.json' || rawPath.startsWith('templates/'), + }; +} diff --git a/packages/context/src/ingest/adapters/historic-sql/detect.test.ts b/packages/context/src/ingest/adapters/historic-sql/detect.test.ts new file mode 100644 index 00000000..c4240192 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/detect.test.ts @@ -0,0 +1,197 @@ +import { mkdir, mkdtemp, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { detectHistoricSqlStagedDir } from './detect.js'; +import { + HISTORIC_SQL_SOURCE_KEY, + historicSqlManifestSchema, + historicSqlMetadataSchema, + historicSqlPullConfigSchema, + historicSqlUsageSchema, +} from './types.js'; + +async function tempDir(): Promise { + return mkdtemp(join(tmpdir(), 'historic-sql-detect-')); +} + +async function writeJson(root: string, relPath: string, value: unknown): Promise { + const target = join(root, relPath); + await mkdir(join(target, '..'), { recursive: true }); + await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +describe('historic-sql staged dir detection', () => { + it('detects manifest source', async () => { + const stagedDir = await tempDir(); + await writeJson(stagedDir, 'manifest.json', { + source: HISTORIC_SQL_SOURCE_KEY, + connectionId: 'conn_1', + dialect: 'snowflake', + fetchedAt: '2026-05-04T12:00:00.000Z', + windowStart: '2026-02-03T12:00:00.000Z', + windowEnd: '2026-05-04T12:00:00.000Z', + nextSuccessfulCursor: '2026-05-04T11:55:00.000Z', + templateCount: 0, + capped: false, + warnings: [], + templates: [], + }); + + await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true); + }); + + it('detects document-shaped template structure without manifest', async () => { + const stagedDir = await tempDir(); + await writeFile(join(stagedDir, 'not-a-match.txt'), 'x', 'utf-8'); + await mkdir(join(stagedDir, 'templates', 'fp_1'), { recursive: true }); + await writeFile(join(stagedDir, 'templates', 'fp_1', 'metadata.json'), '{}', 'utf-8'); + await writeFile(join(stagedDir, 'templates', 'fp_1', 'page.md'), '# fp_1\n', 'utf-8'); + + await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true); + }); + + it('does not detect unrelated directories', async () => { + const stagedDir = await tempDir(); + await writeJson(stagedDir, 'manifest.json', { source: 'notion' }); + + await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(false); + }); +}); + +describe('historic-sql schemas', () => { + it('defaults disabled optional pull-config fields through the parser', () => { + expect( + historicSqlPullConfigSchema.parse({ + dialect: 'bigquery', + }), + ).toEqual({ + dialect: 'bigquery', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 5, + }); + }); + + it('accepts postgres pull config with a minCalls floor', () => { + expect( + historicSqlPullConfigSchema.parse({ + dialect: 'postgres', + minCalls: 12, + }), + ).toEqual({ + dialect: 'postgres', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 12, + }); + }); + + it('accepts postgres manifest fields with defaults for older dialects', () => { + expect( + historicSqlManifestSchema.parse({ + source: HISTORIC_SQL_SOURCE_KEY, + connectionId: 'conn_pg', + dialect: 'postgres', + fetchedAt: '2026-05-08T12:00:00.000Z', + windowStart: '2026-05-08T11:00:00.000Z', + windowEnd: '2026-05-08T12:00:00.000Z', + nextSuccessfulCursor: '2026-05-08T12:00:00.000Z', + templateCount: 0, + capped: false, + warnings: [], + templates: [], + degraded: true, + statsResetAt: '2026-05-01T00:00:00.000Z', + baselineFirstRun: true, + pgServerVersion: 'PostgreSQL 16.4', + deallocCount: 3, + }), + ).toMatchObject({ + dialect: 'postgres', + degraded: true, + statsResetAt: '2026-05-01T00:00:00.000Z', + baselineFirstRun: true, + pgServerVersion: 'PostgreSQL 16.4', + deallocCount: 3, + }); + + expect( + historicSqlManifestSchema.parse({ + source: HISTORIC_SQL_SOURCE_KEY, + connectionId: 'conn_sf', + dialect: 'snowflake', + fetchedAt: '2026-05-08T12:00:00.000Z', + windowStart: '2026-05-01T12:00:00.000Z', + windowEnd: '2026-05-08T12:00:00.000Z', + nextSuccessfulCursor: null, + templateCount: 0, + capped: false, + warnings: [], + templates: [], + }), + ).toMatchObject({ + degraded: false, + statsResetAt: null, + baselineFirstRun: false, + pgServerVersion: null, + deallocCount: null, + }); + }); + + it('accepts postgres usage stats with mean_runtime_ms and empty samples', () => { + const parsed = historicSqlUsageSchema.parse({ + stats: { + executions: 25, + distinct_users: 2, + first_seen: '2026-05-08T10:00:00.000Z', + last_seen: '2026-05-08T12:00:00.000Z', + p50_runtime_ms: null, + p95_runtime_ms: null, + mean_runtime_ms: 32.5, + error_rate: 0, + rows_produced: 1042, + }, + literal_slots: [], + samples: [], + }); + + expect(parsed.stats.mean_runtime_ms).toBe(32.5); + expect(parsed.samples).toEqual([]); + }); + + it('pins the Notion-compatible metadata envelope', () => { + const parsed = historicSqlMetadataSchema.parse({ + id: 'fp_1', + title: 'snowflake · analytics.orders [fp_1]', + path: 'templates/fp_1/page.md', + objectType: 'historic_sql_template', + lastEditedAt: null, + properties: { + fingerprint: 'fp_1', + sub_cluster_id: null, + dialect: 'snowflake', + tables_touched: ['analytics.orders'], + literal_slots: [{ position: 1, type: 'string', classification: 'constant' }], + triage_signals: { + executions_bucket: 'high', + distinct_users_bucket: 'team', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '1 constant, 0 runtime', + }, + }, + }); + + expect(parsed.objectType).toBe('historic_sql_template'); + expect(parsed.lastEditedAt).toBeNull(); + expect(parsed.properties.triage_signals.service_account_only).toBe('false'); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/detect.ts b/packages/context/src/ingest/adapters/historic-sql/detect.ts new file mode 100644 index 00000000..d0a1652b --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/detect.ts @@ -0,0 +1,37 @@ +import { readFile, readdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import { HISTORIC_SQL_SOURCE_KEY } from './types.js'; + +export async function detectHistoricSqlStagedDir(stagedDir: string): Promise { + try { + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as { source?: unknown }; + if (manifest.source === HISTORIC_SQL_SOURCE_KEY) { + return true; + } + if (manifest.source !== undefined) { + return false; + } + } catch { + // Fall through to structural detection for stage-only fixtures. + } + + try { + const entries = await readdir(join(stagedDir, 'templates'), { withFileTypes: true, recursive: true }); + const metadataDirs = new Set(); + const pageDirs = new Set(); + for (const entry of entries) { + if (!entry.isFile()) { + continue; + } + if (entry.name === 'metadata.json') { + metadataDirs.add(entry.parentPath); + } + if (entry.name === 'page.md') { + pageDirs.add(entry.parentPath); + } + } + return [...metadataDirs].some((dir) => pageDirs.has(dir)); + } catch { + return false; + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/errors.ts b/packages/context/src/ingest/adapters/historic-sql/errors.ts new file mode 100644 index 00000000..f888ff3f --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/errors.ts @@ -0,0 +1,61 @@ +import type { HistoricSqlDialect } from './types.js'; + +interface HistoricSqlGrantsMissingErrorOptions { + dialect: HistoricSqlDialect; + message: string; + remediation: string; + cause?: unknown; +} + +export class HistoricSqlGrantsMissingError extends Error { + readonly dialect: HistoricSqlDialect; + readonly remediation: string; + + constructor(options: HistoricSqlGrantsMissingErrorOptions) { + super(options.message, options.cause === undefined ? undefined : { cause: options.cause }); + this.name = 'HistoricSqlGrantsMissingError'; + this.dialect = options.dialect; + this.remediation = options.remediation; + } +} + +interface HistoricSqlExtensionMissingErrorOptions { + dialect: HistoricSqlDialect; + message: string; + remediation: string; + cause?: unknown; +} + +export class HistoricSqlExtensionMissingError extends Error { + readonly dialect: HistoricSqlDialect; + readonly remediation: string; + + constructor(options: HistoricSqlExtensionMissingErrorOptions) { + super(options.message, options.cause === undefined ? undefined : { cause: options.cause }); + this.name = 'HistoricSqlExtensionMissingError'; + this.dialect = options.dialect; + this.remediation = options.remediation; + } +} + +interface HistoricSqlVersionUnsupportedErrorOptions { + dialect: HistoricSqlDialect; + detectedVersion: string; + minimumVersion: string; +} + +export class HistoricSqlVersionUnsupportedError extends Error { + readonly dialect: HistoricSqlDialect; + readonly detectedVersion: string; + readonly minimumVersion: string; + + constructor(options: HistoricSqlVersionUnsupportedErrorOptions) { + super( + `Unsupported ${options.dialect} version for historic-SQL ingest: detected ${options.detectedVersion}; requires ${options.minimumVersion} or newer.`, + ); + this.name = 'HistoricSqlVersionUnsupportedError'; + this.dialect = options.dialect; + this.detectedVersion = options.detectedVersion; + this.minimumVersion = options.minimumVersion; + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts new file mode 100644 index 00000000..40926965 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts @@ -0,0 +1,304 @@ +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import type { SqlAnalysisPort } from '../../../sql-analysis/index.js'; +import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js'; +import { pgssBaselinePath } from './stage-pgss.js'; +import type { HistoricSqlQueryHistoryReader, PostgresPgssReader } from './types.js'; + +async function tempDir(): Promise { + return mkdtemp(join(tmpdir(), 'historic-sql-adapter-')); +} + +async function writeJson(root: string, relPath: string, value: unknown): Promise { + const target = join(root, relPath); + await mkdir(join(target, '..'), { recursive: true }); + await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +const sqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint() { + return { + fingerprint: 'fp_1', + normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?', + tablesTouched: ['analytics.orders'], + literalSlots: [{ position: 1, type: 'string', exampleValue: 'paid' }], + }; + }, +}; + +const reader: HistoricSqlQueryHistoryReader = { + async probe() {}, + async *fetch() { + yield { + id: 'q1', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + user: 'analyst', + startedAt: '2026-05-04T11:00:00.000Z', + endedAt: null, + runtimeMs: 10, + rowsProduced: 1, + success: true, + errorMessage: null, + }; + }, +}; + +describe('HistoricSqlSourceAdapter', () => { + it('declares canonical adapter metadata', () => { + const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} }); + + expect(adapter.source).toBe('historic-sql'); + expect(adapter.skillNames).toEqual(['historic_sql_ingest']); + expect(adapter.reconcileSkillNames).toEqual(['historic_sql_curator']); + expect(adapter.evidenceIndexing).toBe('documents'); + expect(adapter.triageSupported).toBe(true); + }); + + it('fetches staged templates through injected reader and SqlAnalysisPort', async () => { + const stagedDir = await tempDir(); + const adapter = new HistoricSqlSourceAdapter({ + sqlAnalysis, + reader, + queryClient: {}, + now: () => new Date('2026-05-04T12:00:00.000Z'), + }); + + await adapter.fetch( + { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + }, + stagedDir, + { connectionId: 'conn_1', sourceKey: 'historic-sql' }, + ); + + await expect(adapter.detect(stagedDir)).resolves.toBe(true); + }); + + it('reads triage signals from usage.json and metadata properties', async () => { + const stagedDir = await tempDir(); + await writeJson(stagedDir, 'manifest.json', { + source: 'historic-sql', + connectionId: 'conn_1', + dialect: 'snowflake', + fetchedAt: '2026-05-04T12:00:00.000Z', + windowStart: '2026-02-03T12:00:00.000Z', + windowEnd: '2026-05-04T12:00:00.000Z', + nextSuccessfulCursor: '2026-05-04T11:55:00.000Z', + templateCount: 1, + capped: false, + warnings: [], + templates: [{ id: 'fp_1', fingerprint: 'fp_1', subClusterId: null, path: 'templates/fp_1/page.md' }], + }); + await writeJson(stagedDir, 'templates/fp_1/metadata.json', { + id: 'fp_1', + title: 'snowflake · analytics.orders [fp_1]', + path: 'templates/fp_1/page.md', + objectType: 'historic_sql_template', + lastEditedAt: null, + properties: { + fingerprint: 'fp_1', + sub_cluster_id: null, + dialect: 'snowflake', + tables_touched: ['analytics.orders'], + literal_slots: [{ position: 1, type: 'string', classification: 'constant' }], + triage_signals: { + executions_bucket: 'high', + distinct_users_bucket: 'team', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '1 constant, 0 runtime', + }, + }, + }); + await writeFile(join(stagedDir, 'templates/fp_1/page.md'), '# fp_1\n', 'utf-8'); + await writeJson(stagedDir, 'templates/fp_1/usage.json', { + stats: { + executions: 20, + distinct_users: 3, + first_seen: '2026-05-01T00:00:00.000Z', + last_seen: '2026-05-04T11:55:00.000Z', + p50_runtime_ms: 100, + p95_runtime_ms: 200, + error_rate: 0, + }, + literal_slots: [{ position: 1, distinct_values: 1, top_values: [['paid', 20]] }], + samples: [], + }); + + const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} }); + + await expect(adapter.getTriageSignals(stagedDir, 'fp_1')).resolves.toEqual({ + objectType: 'historic_sql_template', + lastEditedAt: '2026-05-04T11:55:00.000Z', + propertyHints: { + executions_bucket: 'high', + distinct_users_bucket: 'team', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '1 constant, 0 runtime', + }, + }); + }); + + it('dispatches postgres fetches through PGSS staging and writes the baseline only after pull success', async () => { + const stagedDir = await tempDir(); + const baselineRootDir = await tempDir(); + const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg'); + const unusedPerExecutionReader: HistoricSqlQueryHistoryReader = { + async probe() { + throw new Error('per-execution reader must not be used for postgres'); + }, + async *fetch() { + throw new Error('per-execution reader must not be used for postgres'); + }, + }; + const postgresReader: PostgresPgssReader = { + async probe() { + return { pgServerVersion: 'PostgreSQL 16.4', warnings: [] }; + }, + async readSnapshot() { + return { + statsResetAt: '2026-05-08T08:00:00.000Z', + deallocCount: 0, + rows: [ + { + queryid: '901', + userid: '11', + username: 'analyst', + dbid: '5', + database: 'warehouse', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 9, + totalExecTime: 90, + meanExecTime: 10, + totalRows: 18, + }, + ], + }; + }, + }; + const adapter = new HistoricSqlSourceAdapter({ + sqlAnalysis, + reader: unusedPerExecutionReader, + queryClient: {}, + postgresReader, + postgresQueryClient: { + async executeQuery() { + return { headers: [], rows: [] }; + }, + }, + postgresBaselineRootDir: baselineRootDir, + now: () => new Date('2026-05-08T12:00:00.000Z'), + }); + + await adapter.fetch( + { + dialect: 'postgres', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + stagedDir, + { connectionId: 'conn_pg', sourceKey: 'historic-sql' }, + ); + + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as { + dialect: string; + baselineFirstRun: boolean; + templates: Array<{ id: string }>; + }; + expect(manifest.dialect).toBe('postgres'); + expect(manifest.baselineFirstRun).toBe(true); + expect(manifest.templates).toEqual([ + { id: 'db5_q901', fingerprint: 'fp_1', subClusterId: null, path: 'templates/db5_q901/page.md' }, + ]); + await expect(readFile(baselinePath, 'utf-8')).rejects.toMatchObject({ code: 'ENOENT' }); + + await adapter.onPullSucceeded({ + connectionId: 'conn_pg', + sourceKey: 'historic-sql', + syncId: 'sync_pg', + trigger: 'scheduled_pull', + completedAt: new Date('2026-05-08T12:01:00.000Z'), + stagedDir, + }); + + const baseline = JSON.parse(await readFile(baselinePath, 'utf-8')) as { + fetchedAt: string; + templates: Record }>; + }; + expect(baseline.fetchedAt).toBe('2026-05-08T12:00:00.000Z'); + expect(baseline.templates.db5_q901.perUser['11'].calls).toBe(9); + }); + + it('fails postgres fetches clearly when no PGSS reader is configured', async () => { + const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} }); + + await expect( + adapter.fetch( + { + dialect: 'postgres', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + await tempDir(), + { connectionId: 'conn_pg', sourceKey: 'historic-sql' }, + ), + ).rejects.toThrow('Historic SQL Postgres fetch requires deps.postgresReader'); + }); + + it('forwards manifest cursor through onPullSucceeded without changing the SourceAdapter signature', async () => { + const stagedDir = await tempDir(); + await writeJson(stagedDir, 'manifest.json', { + source: 'historic-sql', + connectionId: 'conn_1', + dialect: 'snowflake', + fetchedAt: '2026-05-04T12:00:00.000Z', + windowStart: '2026-02-03T12:00:00.000Z', + windowEnd: '2026-05-04T12:00:00.000Z', + nextSuccessfulCursor: '2026-05-04T11:55:00.000Z', + templateCount: 0, + capped: false, + warnings: [], + templates: [], + }); + const onPullSucceeded = vi.fn(async () => {}); + const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {}, onPullSucceeded }); + const completedAt = new Date('2026-05-04T12:01:00.000Z'); + + await adapter.onPullSucceeded({ + connectionId: 'conn_1', + sourceKey: 'historic-sql', + syncId: 'sync_1', + trigger: 'scheduled_pull', + completedAt, + stagedDir, + }); + + expect(onPullSucceeded).toHaveBeenCalledWith({ + connectionId: 'conn_1', + sourceKey: 'historic-sql', + syncId: 'sync_1', + trigger: 'scheduled_pull', + completedAt, + stagedDir, + nextSuccessfulCursor: '2026-05-04T11:55:00.000Z', + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts new file mode 100644 index 00000000..e66b1cd1 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts @@ -0,0 +1,135 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { + ChunkResult, + DiffSet, + FetchContext, + IngestTrigger, + ScopeDescriptor, + SourceAdapter, + TriageSignals, +} from '../../types.js'; +import { chunkHistoricSqlStagedDir, describeHistoricSqlScope } from './chunk.js'; +import { detectHistoricSqlStagedDir } from './detect.js'; +import { stageHistoricSqlTemplates } from './stage.js'; +import { + pgssBaselinePath, + stagePgStatStatementsTemplates, + writePgssBaselineAtomic, + type StagePgStatStatementsTemplatesResult, +} from './stage-pgss.js'; +import { + historicSqlManifestSchema, + historicSqlMetadataSchema, + historicSqlPullConfigSchema, + historicSqlUsageSchema, + type HistoricSqlSourceAdapterDeps, +} from './types.js'; + +export class HistoricSqlSourceAdapter implements SourceAdapter { + readonly source = 'historic-sql'; + readonly skillNames = ['historic_sql_ingest']; + readonly reconcileSkillNames = ['historic_sql_curator']; + readonly evidenceIndexing = 'documents' as const; + readonly triageSupported = true; + + private readonly pendingPgssBaselines = new Map(); + + constructor(private readonly deps: HistoricSqlSourceAdapterDeps) {} + + detect(stagedDir: string): Promise { + return detectHistoricSqlStagedDir(stagedDir); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = historicSqlPullConfigSchema.parse(pullConfig); + if (config.dialect === 'postgres') { + if (!this.deps.postgresReader) { + throw new Error('Historic SQL Postgres fetch requires deps.postgresReader'); + } + const postgresQueryClient = this.deps.postgresQueryClient ?? this.deps.queryClient; + if ( + !postgresQueryClient || + typeof postgresQueryClient !== 'object' || + !('executeQuery' in postgresQueryClient) || + typeof (postgresQueryClient as { executeQuery?: unknown }).executeQuery !== 'function' + ) { + throw new Error('Historic SQL Postgres fetch requires deps.postgresQueryClient with executeQuery(sql, params?)'); + } + const result = await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: ctx.connectionId, + queryClient: postgresQueryClient as NonNullable, + reader: this.deps.postgresReader, + sqlAnalysis: this.deps.sqlAnalysis, + pullConfig: config, + baselinePath: pgssBaselinePath(this.deps.postgresBaselineRootDir, ctx.connectionId), + now: this.deps.now?.(), + }); + this.pendingPgssBaselines.set(stagedDir, result); + return; + } + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: ctx.connectionId, + queryClient: this.deps.queryClient, + reader: this.deps.reader, + sqlAnalysis: this.deps.sqlAnalysis, + pullConfig: config, + now: this.deps.now?.(), + }); + } + + chunk(stagedDir: string, diffSet?: DiffSet): Promise { + return chunkHistoricSqlStagedDir(stagedDir, diffSet); + } + + describeScope(stagedDir: string): Promise { + return describeHistoricSqlScope(stagedDir); + } + + async getTriageSignals(stagedDir: string, externalId: string): Promise { + const manifest = historicSqlManifestSchema.parse( + JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')), + ); + const template = manifest.templates.find((entry) => entry.id === externalId); + if (!template) { + return {}; + } + const templateDir = template.path.replace(/\/page\.md$/, ''); + const metadata = historicSqlMetadataSchema.parse( + JSON.parse(await readFile(join(stagedDir, templateDir, 'metadata.json'), 'utf-8')), + ); + const usage = historicSqlUsageSchema.parse( + JSON.parse(await readFile(join(stagedDir, templateDir, 'usage.json'), 'utf-8')), + ); + + return { + objectType: metadata.objectType, + lastEditedAt: usage.stats.last_seen, + propertyHints: metadata.properties.triage_signals, + }; + } + + async onPullSucceeded(ctx: { + connectionId: string; + sourceKey: string; + syncId: string; + trigger: IngestTrigger; + completedAt: Date; + stagedDir: string; + }): Promise { + const manifest = historicSqlManifestSchema.parse( + JSON.parse(await readFile(join(ctx.stagedDir, 'manifest.json'), 'utf-8')), + ); + if (manifest.dialect === 'postgres') { + const pending = this.pendingPgssBaselines.get(ctx.stagedDir); + if (pending) { + await writePgssBaselineAtomic(pending.baselinePath, pending.baseline); + this.pendingPgssBaselines.delete(ctx.stagedDir); + } + } + await this.deps.onPullSucceeded?.({ ...ctx, nextSuccessfulCursor: manifest.nextSuccessfulCursor }); + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.test.ts b/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.test.ts new file mode 100644 index 00000000..3f7b3fca --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.test.ts @@ -0,0 +1,281 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + HistoricSqlExtensionMissingError, + HistoricSqlGrantsMissingError, + HistoricSqlVersionUnsupportedError, +} from './errors.js'; +import { PostgresPgssQueryHistoryReader } from './postgres-pgss-query-history-reader.js'; + +interface FakeQueryResult { + headers: string[]; + rows: unknown[][]; + totalRows?: number; + error?: string; +} + +function queryClient(results: Array) { + const executeQuery = vi.fn(async (_query: string, _params?: unknown[]) => { + const next = results.shift(); + if (!next) { + throw new Error('unexpected query'); + } + if (next instanceof Error) { + throw next; + } + return next; + }); + return { executeQuery }; +} + +function executedSql(client: ReturnType, index: number): string { + const call = client.executeQuery.mock.calls[index]; + if (!call) { + throw new Error(`expected query client call ${index}`); + } + return call[0]; +} + +describe('PostgresPgssQueryHistoryReader', () => { + it('probes version, extension presence, grants, and tracking state', async () => { + const client = queryClient([ + { + headers: ['server_version_num', 'server_version'], + rows: [[160004, 'PostgreSQL 16.4 on x86_64-apple-darwin']], + }, + { headers: ['?column?'], rows: [[1]] }, + { headers: ['has_role'], rows: [[true]] }, + { headers: ['track'], rows: [['top']] }, + { headers: ['max'], rows: [['5000']] }, + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + await expect(reader.probe(client)).resolves.toEqual({ + pgServerVersion: 'PostgreSQL 16.4 on x86_64-apple-darwin', + warnings: [], + }); + + expect(executedSql(client, 0)).toContain("current_setting('server_version_num')::int"); + expect(executedSql(client, 1)).toBe('SELECT 1 FROM pg_stat_statements LIMIT 1'); + expect(executedSql(client, 2)).toBe( + "SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role", + ); + expect(executedSql(client, 3)).toBe("SELECT current_setting('pg_stat_statements.track') AS track"); + expect(executedSql(client, 4)).toBe("SELECT current_setting('pg_stat_statements.max') AS max"); + }); + + it('rejects PostgreSQL versions older than 14 without probing the extension', async () => { + const client = queryClient([ + { + headers: ['server_version_num', 'server_version'], + rows: [[130012, 'PostgreSQL 13.12']], + }, + { + headers: ['stats_reset', 'dealloc'], + rows: [[new Date('2026-05-01T00:00:00.000Z'), 7]], + }, + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + const promise = reader.probe(client); + await expect(promise).rejects.toMatchObject({ + name: 'HistoricSqlVersionUnsupportedError', + dialect: 'postgres', + detectedVersion: 'PostgreSQL 13.12', + minimumVersion: 'PostgreSQL 14', + }); + await expect(promise).rejects.toBeInstanceOf(HistoricSqlVersionUnsupportedError); + expect(client.executeQuery).toHaveBeenCalledTimes(1); + }); + + it('maps a missing pg_stat_statements relation to HistoricSqlExtensionMissingError', async () => { + const client = queryClient([ + { + headers: ['server_version_num', 'server_version'], + rows: [[160004, 'PostgreSQL 16.4']], + }, + new Error('relation "pg_stat_statements" does not exist'), + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + const promise = reader.probe(client); + await expect(promise).rejects.toMatchObject({ + name: 'HistoricSqlExtensionMissingError', + dialect: 'postgres', + }); + await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError); + }); + + it('maps pg_stat_statements preload failures to HistoricSqlExtensionMissingError with preload remediation', async () => { + const client = queryClient([ + { + headers: ['server_version_num', 'server_version'], + rows: [[160004, 'PostgreSQL 16.4']], + }, + new Error('pg_stat_statements must be loaded via shared_preload_libraries'), + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + const promise = reader.probe(client); + await expect(promise).rejects.toMatchObject({ + name: 'HistoricSqlExtensionMissingError', + dialect: 'postgres', + message: 'pg_stat_statements is installed but not loaded via shared_preload_libraries.', + remediation: expect.stringContaining("shared_preload_libraries includes 'pg_stat_statements'"), + }); + await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError); + }); + + it('maps missing pg_read_all_stats membership to HistoricSqlGrantsMissingError', async () => { + const client = queryClient([ + { + headers: ['server_version_num', 'server_version'], + rows: [[160004, 'PostgreSQL 16.4']], + }, + { headers: ['?column?'], rows: [[1]] }, + { headers: ['has_role'], rows: [[false]] }, + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + const promise = reader.probe(client); + await expect(promise).rejects.toMatchObject({ + name: 'HistoricSqlGrantsMissingError', + dialect: 'postgres', + remediation: 'GRANT pg_read_all_stats TO ;', + }); + await expect(promise).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError); + }); + + it('returns a warning instead of failing when pg_stat_statements.track is none', async () => { + const client = queryClient([ + { + headers: ['server_version_num', 'server_version'], + rows: [[160004, 'PostgreSQL 16.4']], + }, + { headers: ['?column?'], rows: [[1]] }, + { headers: ['has_role'], rows: [[true]] }, + { headers: ['track'], rows: [['none']] }, + { headers: ['max'], rows: [['5000']] }, + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + await expect(reader.probe(client)).resolves.toEqual({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [ + "pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config", + ], + }); + }); + + it('warns when pg_stat_statements.max is below the recommended floor', async () => { + const client = queryClient([ + { + headers: ['server_version_num', 'server_version'], + rows: [[160004, 'PostgreSQL 16.4']], + }, + { headers: ['?column?'], rows: [[1]] }, + { headers: ['has_role'], rows: [[true]] }, + { headers: ['track'], rows: [['top']] }, + { headers: ['max'], rows: [['1000']] }, + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + await expect(reader.probe(client)).resolves.toEqual({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [ + 'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn', + ], + }); + }); + + it('reads a parameterized pg_stat_statements snapshot and stats info', async () => { + const client = queryClient([ + { + headers: [ + 'queryid', + 'userid', + 'username', + 'dbid', + 'database', + 'query', + 'calls', + 'total_exec_time', + 'mean_exec_time', + 'total_rows', + ], + rows: [ + [ + '922337203685477580', + '16384', + 'analyst', + '16385', + 'warehouse', + 'SELECT count(*) FROM public.orders WHERE status = $1', + '42', + '2100.5', + '50.0119', + '9001', + ], + [ + '922337203685477581', + '16386', + 'unknown', + '16385', + 'warehouse', + 'SELECT * FROM public.customers WHERE id = $1', + 5, + 30, + 6, + 5, + ], + ], + }, + { + headers: ['stats_reset', 'dealloc'], + rows: [[new Date('2026-05-01T00:00:00.000Z'), 7]], + }, + ]); + const reader = new PostgresPgssQueryHistoryReader(); + + await expect(reader.readSnapshot(client, { minCalls: 5, maxTemplates: 500 })).resolves.toEqual({ + statsResetAt: '2026-05-01T00:00:00.000Z', + deallocCount: 7, + rows: [ + { + queryid: '922337203685477580', + userid: '16384', + username: 'analyst', + dbid: '16385', + database: 'warehouse', + query: 'SELECT count(*) FROM public.orders WHERE status = $1', + calls: 42, + totalExecTime: 2100.5, + meanExecTime: 50.0119, + totalRows: 9001, + }, + { + queryid: '922337203685477581', + userid: '16386', + username: 'unknown', + dbid: '16385', + database: 'warehouse', + query: 'SELECT * FROM public.customers WHERE id = $1', + calls: 5, + totalExecTime: 30, + meanExecTime: 6, + totalRows: 5, + }, + ], + }); + + const snapshotSql = executedSql(client, 0); + expect(snapshotSql).toContain('FROM pg_stat_statements s'); + expect(snapshotSql).toContain('LEFT JOIN pg_roles'); + expect(snapshotSql).toContain('LEFT JOIN pg_database'); + expect(snapshotSql).toContain('WHERE s.toplevel = true'); + expect(snapshotSql).toContain('AND s.calls >= $1'); + expect(snapshotSql).toContain('ORDER BY s.total_exec_time DESC'); + expect(snapshotSql).toContain('LIMIT $2'); + expect(client.executeQuery.mock.calls[0]?.[1]).toEqual([5, 500]); + expect(executedSql(client, 1)).toBe('SELECT stats_reset, dealloc FROM pg_stat_statements_info'); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.ts b/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.ts new file mode 100644 index 00000000..c0c7dd70 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-query-history-reader.ts @@ -0,0 +1,262 @@ +import { + HistoricSqlExtensionMissingError, + HistoricSqlGrantsMissingError, + HistoricSqlVersionUnsupportedError, +} from './errors.js'; +import type { + KloPostgresQueryClient, + PostgresPgssProbeResult, + PostgresPgssReader, + PostgresPgssRow, + PostgresPgssSnapshot, +} from './types.js'; + +interface QueryResultLike { + headers: string[]; + rows: unknown[][]; + totalRows?: number; + error?: string; +} + +const VERSION_SQL = ` +SELECT current_setting('server_version_num')::int AS server_version_num, + version() AS server_version +`.trim(); + +const EXTENSION_PROBE_SQL = 'SELECT 1 FROM pg_stat_statements LIMIT 1'; +const GRANTS_PROBE_SQL = "SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role"; +const TRACKING_PROBE_SQL = "SELECT current_setting('pg_stat_statements.track') AS track"; +const MAX_SETTING_PROBE_SQL = "SELECT current_setting('pg_stat_statements.max') AS max"; +const RECOMMENDED_PGSS_MAX = 5000; +const STATS_INFO_SQL = 'SELECT stats_reset, dealloc FROM pg_stat_statements_info'; + +const SNAPSHOT_SQL = ` +SELECT + s.queryid::text AS queryid, + s.userid::text AS userid, + COALESCE(r.rolname, 'unknown') AS username, + s.dbid::text AS dbid, + d.datname AS database, + s.query, + s.calls, + s.total_exec_time, + s.mean_exec_time, + s.rows AS total_rows +FROM pg_stat_statements s +LEFT JOIN pg_roles r ON s.userid = r.oid +LEFT JOIN pg_database d ON s.dbid = d.oid +WHERE s.toplevel = true + AND s.calls >= $1 +ORDER BY s.total_exec_time DESC +LIMIT $2 +`.trim(); + +const POSTGRES_EXTENSION_REMEDIATION = [ + 'Run CREATE EXTENSION pg_stat_statements; against the connection database.', + "Ensure shared_preload_libraries includes 'pg_stat_statements' in the Postgres parameter group or config.", +].join(' '); + +const POSTGRES_GRANTS_REMEDIATION = 'GRANT pg_read_all_stats TO ;'; + +function queryClient(client: unknown): KloPostgresQueryClient { + if ( + client && + typeof client === 'object' && + 'executeQuery' in client && + typeof (client as { executeQuery?: unknown }).executeQuery === 'function' + ) { + return client as KloPostgresQueryClient; + } + throw new Error('Historic SQL Postgres PGSS reader requires a query client with executeQuery(sql, params?)'); +} + +async function execute(client: KloPostgresQueryClient, sql: string, params?: unknown[]): Promise { + const result = await client.executeQuery(sql, params); + if ('error' in result && typeof result.error === 'string' && result.error.length > 0) { + throw new Error(result.error); + } + return result; +} + +function indexes(headers: string[]): Map { + const out = new Map(); + headers.forEach((header, index) => out.set(header.toLowerCase(), index)); + return out; +} + +function value(row: unknown[], headerIndexes: Map, header: string): unknown { + const index = headerIndexes.get(header.toLowerCase()); + return index === undefined ? null : row[index]; +} + +function nullableString(raw: unknown): string | null { + if (raw === null || raw === undefined) { + return null; + } + const text = String(raw); + return text.length > 0 ? text : null; +} + +function requiredString(raw: unknown, field: string): string { + const text = nullableString(raw); + if (!text) { + throw new Error(`Postgres pg_stat_statements row is missing ${field}`); + } + return text; +} + +function requiredFiniteNumber(raw: unknown, field: string): number { + const number = typeof raw === 'number' ? raw : Number(raw); + if (!Number.isFinite(number)) { + throw new Error(`Postgres pg_stat_statements row has invalid ${field}: ${String(raw)}`); + } + return number; +} + +function nullableInteger(raw: unknown): number | null { + if (raw === null || raw === undefined || raw === '') { + return null; + } + const number = typeof raw === 'number' ? raw : Number(raw); + return Number.isFinite(number) ? Math.trunc(number) : null; +} + +function nullableIsoTimestamp(raw: unknown): string | null { + if (raw === null || raw === undefined || raw === '') { + return null; + } + if (raw instanceof Date) { + return raw.toISOString(); + } + const date = new Date(String(raw)); + return Number.isNaN(date.getTime()) ? null : date.toISOString(); +} + +function firstRow(result: QueryResultLike, context: string): { row: unknown[]; headers: Map } { + const row = result.rows[0]; + if (!row) { + throw new Error(`Postgres historic-SQL ${context} query returned no rows`); + } + return { row, headers: indexes(result.headers) }; +} + +function isMissingPgssRelation(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error); + return /relation ["']?pg_stat_statements["']? does not exist/i.test(message); +} + +function isPgssPreloadRequired(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error); + return /pg_stat_statements.*shared_preload_libraries/i.test(message); +} + +function extensionMissingError(cause: unknown, message?: string): HistoricSqlExtensionMissingError { + return new HistoricSqlExtensionMissingError({ + dialect: 'postgres', + message: message ?? 'pg_stat_statements extension is not installed in the connection database.', + remediation: POSTGRES_EXTENSION_REMEDIATION, + cause, + }); +} + +function grantsMissingError(): HistoricSqlGrantsMissingError { + return new HistoricSqlGrantsMissingError({ + dialect: 'postgres', + message: 'Postgres connection role lacks pg_read_all_stats for historic-SQL ingest.', + remediation: POSTGRES_GRANTS_REMEDIATION, + }); +} + +function mapSnapshotRow(row: unknown[], headerIndexes: Map): PostgresPgssRow { + return { + queryid: requiredString(value(row, headerIndexes, 'queryid'), 'queryid'), + userid: requiredString(value(row, headerIndexes, 'userid'), 'userid'), + username: nullableString(value(row, headerIndexes, 'username')), + dbid: requiredString(value(row, headerIndexes, 'dbid'), 'dbid'), + database: nullableString(value(row, headerIndexes, 'database')), + query: requiredString(value(row, headerIndexes, 'query'), 'query'), + calls: Math.trunc(requiredFiniteNumber(value(row, headerIndexes, 'calls'), 'calls')), + totalExecTime: requiredFiniteNumber(value(row, headerIndexes, 'total_exec_time'), 'total_exec_time'), + meanExecTime: requiredFiniteNumber(value(row, headerIndexes, 'mean_exec_time'), 'mean_exec_time'), + totalRows: Math.trunc(requiredFiniteNumber(value(row, headerIndexes, 'total_rows'), 'total_rows')), + }; +} + +export class PostgresPgssQueryHistoryReader implements PostgresPgssReader { + async probe(client: unknown): Promise { + const pgClient = queryClient(client); + const versionResult = await execute(pgClient, VERSION_SQL); + const { row: versionRow, headers: versionHeaders } = firstRow(versionResult, 'version probe'); + const serverVersionNum = requiredFiniteNumber( + value(versionRow, versionHeaders, 'server_version_num'), + 'server_version_num', + ); + const pgServerVersion = requiredString(value(versionRow, versionHeaders, 'server_version'), 'server_version'); + + if (serverVersionNum < 140000) { + throw new HistoricSqlVersionUnsupportedError({ + dialect: 'postgres', + detectedVersion: pgServerVersion, + minimumVersion: 'PostgreSQL 14', + }); + } + + try { + await execute(pgClient, EXTENSION_PROBE_SQL); + } catch (error) { + if (isMissingPgssRelation(error)) { + throw extensionMissingError(error); + } + if (isPgssPreloadRequired(error)) { + throw extensionMissingError( + error, + 'pg_stat_statements is installed but not loaded via shared_preload_libraries.', + ); + } + throw error; + } + + const grantsResult = await execute(pgClient, GRANTS_PROBE_SQL); + const { row: grantsRow, headers: grantsHeaders } = firstRow(grantsResult, 'grant probe'); + if (value(grantsRow, grantsHeaders, 'has_role') !== true) { + throw grantsMissingError(); + } + + const trackingResult = await execute(pgClient, TRACKING_PROBE_SQL); + const { row: trackingRow, headers: trackingHeaders } = firstRow(trackingResult, 'tracking probe'); + const track = nullableString(value(trackingRow, trackingHeaders, 'track')); + + const maxResult = await execute(pgClient, MAX_SETTING_PROBE_SQL); + const { row: maxRow, headers: maxHeaders } = firstRow(maxResult, 'max-setting probe'); + const pgssMax = nullableInteger(value(maxRow, maxHeaders, 'max')); + + const warnings: string[] = []; + if (track === 'none') { + warnings.push('pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config'); + } + if (pgssMax !== null && pgssMax < RECOMMENDED_PGSS_MAX) { + warnings.push( + `pg_stat_statements.max is ${pgssMax}; set it to at least ${RECOMMENDED_PGSS_MAX} to reduce query-template eviction churn`, + ); + } + + return { pgServerVersion, warnings }; + } + + async readSnapshot( + client: unknown, + options: { minCalls: number; maxTemplates: number }, + ): Promise { + const pgClient = queryClient(client); + const snapshotResult = await execute(pgClient, SNAPSHOT_SQL, [options.minCalls, options.maxTemplates]); + const snapshotHeaders = indexes(snapshotResult.headers); + const statsResult = await execute(pgClient, STATS_INFO_SQL); + const { row: statsRow, headers: statsHeaders } = firstRow(statsResult, 'stats-info'); + + return { + statsResetAt: nullableIsoTimestamp(value(statsRow, statsHeaders, 'stats_reset')), + deallocCount: nullableInteger(value(statsRow, statsHeaders, 'dealloc')), + rows: snapshotResult.rows.map((row) => mapSnapshotRow(row, snapshotHeaders)), + }; + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts b/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts new file mode 100644 index 00000000..d8253df9 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts @@ -0,0 +1,193 @@ +import { describe, expect, it, vi } from 'vitest'; +import { HistoricSqlGrantsMissingError } from './errors.js'; +import { SnowflakeHistoricSqlQueryHistoryReader } from './snowflake-query-history-reader.js'; + +interface FakeQueryResult { + headers: string[]; + rows: unknown[][]; + totalRows: number; + error?: string; +} + +function queryClient(results: FakeQueryResult[]) { + const executeQuery = vi.fn(async (_query: string) => { + const next = results.shift(); + if (!next) { + throw new Error('unexpected query'); + } + return next; + }); + return { executeQuery }; +} + +function firstQuery(client: ReturnType): string { + const call = client.executeQuery.mock.calls[0]; + if (!call) { + throw new Error('expected query client to be called'); + } + return call[0]; +} + +describe('SnowflakeHistoricSqlQueryHistoryReader', () => { + it('probes SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', async () => { + const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]); + const reader = new SnowflakeHistoricSqlQueryHistoryReader(); + + await expect(reader.probe(client)).resolves.toBeUndefined(); + + expect(client.executeQuery).toHaveBeenCalledWith( + 'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1', + ); + }); + + it('turns probe result errors into HistoricSqlGrantsMissingError', async () => { + const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Object does not exist or not authorized' }]); + const reader = new SnowflakeHistoricSqlQueryHistoryReader(); + + await expect(reader.probe(client)).rejects.toMatchObject({ + name: 'HistoricSqlGrantsMissingError', + dialect: 'snowflake', + remediation: 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ;', + }); + }); + + it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => { + const client = { + executeQuery: vi.fn(async () => { + throw new Error('permission denied'); + }), + }; + const reader = new SnowflakeHistoricSqlQueryHistoryReader(); + + await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError); + }); + + it('fetches query-history rows with cursor and maps them into RawQueryRow shape', async () => { + const client = queryClient([ + { + headers: [ + 'QUERY_ID', + 'QUERY_TEXT', + 'USER_NAME', + 'ROLE_NAME', + 'WAREHOUSE_NAME', + 'DATABASE_NAME', + 'SCHEMA_NAME', + 'START_TIME', + 'END_TIME', + 'TOTAL_ELAPSED_TIME', + 'ROWS_PRODUCED', + 'EXECUTION_STATUS', + 'ERROR_CODE', + 'ERROR_MESSAGE', + ], + rows: [ + [ + '01a', + "SELECT count(*) FROM ANALYTICS.ORDERS WHERE STATUS = 'paid'", + 'ANALYST_A', + 'ANALYST_ROLE', + 'WH_XS', + 'ANALYTICS', + 'PUBLIC', + '2026-05-04T10:00:00.000Z', + '2026-05-04T10:00:01.250Z', + 1250, + 12, + 'SUCCESS', + null, + null, + ], + [ + '01b', + 'SELECT * FROM MISSING_TABLE', + 'ANALYST_B', + 'ANALYST_ROLE', + 'WH_XS', + 'ANALYTICS', + 'PUBLIC', + new Date('2026-05-04T10:05:00.000Z'), + null, + null, + null, + 'FAILED_WITH_ERROR', + '002003', + 'SQL compilation error', + ], + ], + totalRows: 2, + }, + ]); + const reader = new SnowflakeHistoricSqlQueryHistoryReader(); + + const rows = []; + for await (const row of reader.fetch( + client, + { + start: new Date('2026-05-01T00:00:00.000Z'), + end: new Date('2026-05-04T12:00:00.000Z'), + }, + '2026-05-03T00:00:00.000Z', + )) { + rows.push(row); + } + + expect(client.executeQuery).toHaveBeenCalledTimes(1); + const sql = firstQuery(client); + expect(sql).toContain('FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'); + expect(sql).toContain("START_TIME >= '2026-05-03T00:00:00.000Z'::TIMESTAMP_TZ"); + expect(sql).toContain("START_TIME < '2026-05-04T12:00:00.000Z'::TIMESTAMP_TZ"); + expect(sql).toContain('ORDER BY START_TIME ASC, QUERY_ID ASC'); + expect(sql).toContain('ROWS_PRODUCED'); + + expect(rows).toEqual([ + { + id: '01a', + sql: "SELECT count(*) FROM ANALYTICS.ORDERS WHERE STATUS = 'paid'", + user: 'ANALYST_A', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: '2026-05-04T10:00:01.250Z', + runtimeMs: 1250, + rowsProduced: 12, + success: true, + errorMessage: null, + }, + { + id: '01b', + sql: 'SELECT * FROM MISSING_TABLE', + user: 'ANALYST_B', + startedAt: '2026-05-04T10:05:00.000Z', + endedAt: null, + runtimeMs: null, + rowsProduced: null, + success: false, + errorMessage: '002003: SQL compilation error', + }, + ]); + }); + + it('uses the window start when no cursor is available', async () => { + const client = queryClient([{ headers: ['QUERY_ID'], rows: [], totalRows: 0 }]); + const reader = new SnowflakeHistoricSqlQueryHistoryReader(); + + for await (const _row of reader.fetch(client, { + start: new Date('2026-02-03T12:00:00.000Z'), + end: new Date('2026-05-04T12:00:00.000Z'), + })) { + throw new Error('empty result should not yield rows'); + } + + const sql = firstQuery(client); + expect(sql).toContain("START_TIME >= '2026-02-03T12:00:00.000Z'::TIMESTAMP_TZ"); + }); + + it('throws a clear error when the query client cannot execute SQL', async () => { + const reader = new SnowflakeHistoricSqlQueryHistoryReader(); + + await expect(async () => { + for await (const _row of reader.fetch({}, { start: new Date(), end: new Date() })) { + throw new Error('unreachable'); + } + }).rejects.toThrow('Historic SQL Snowflake reader requires a query client with executeQuery(query)'); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.ts b/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.ts new file mode 100644 index 00000000..b149a34b --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.ts @@ -0,0 +1,203 @@ +import { HistoricSqlGrantsMissingError } from './errors.js'; +import type { HistoricSqlQueryHistoryReader, HistoricSqlRawQueryRow, HistoricSqlTimeWindow } from './types.js'; + +interface QueryResultLike { + headers: string[]; + rows: unknown[][]; + totalRows: number; + error?: string; +} + +interface QueryClientLike { + executeQuery(query: string): Promise; +} + +const PROBE_SQL = 'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1'; + +const SNOWFLAKE_GRANTS_REMEDIATION = + 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ;'; + +function queryClient(client: unknown): QueryClientLike { + if ( + client && + typeof client === 'object' && + 'executeQuery' in client && + typeof (client as { executeQuery?: unknown }).executeQuery === 'function' + ) { + return client as QueryClientLike; + } + throw new Error('Historic SQL Snowflake reader requires a query client with executeQuery(query)'); +} + +function grantsError(cause: unknown): HistoricSqlGrantsMissingError { + const message = + cause instanceof Error + ? cause.message + : typeof cause === 'string' + ? cause + : 'Snowflake role cannot query SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY.'; + return new HistoricSqlGrantsMissingError({ + dialect: 'snowflake', + message: `Missing Snowflake audit grants for historic-SQL ingest: ${message}`, + remediation: SNOWFLAKE_GRANTS_REMEDIATION, + cause, + }); +} + +function timestampLiteral(value: Date | string): string { + const date = value instanceof Date ? value : new Date(value); + if (Number.isNaN(date.getTime())) { + throw new Error(`Invalid Snowflake query-history timestamp: ${String(value)}`); + } + return `'${date.toISOString().replace(/'/g, "''")}'::TIMESTAMP_TZ`; +} + +function queryHistorySql(window: HistoricSqlTimeWindow, cursor?: string | null): string { + const start = timestampLiteral(cursor ?? window.start); + const end = timestampLiteral(window.end); + return ` +SELECT + QUERY_ID, + QUERY_TEXT, + USER_NAME, + ROLE_NAME, + WAREHOUSE_NAME, + DATABASE_NAME, + SCHEMA_NAME, + START_TIME, + END_TIME, + TOTAL_ELAPSED_TIME, + ROWS_PRODUCED, + EXECUTION_STATUS, + ERROR_CODE, + ERROR_MESSAGE +FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY +WHERE START_TIME >= ${start} + AND START_TIME < ${end} + AND QUERY_TEXT IS NOT NULL +ORDER BY START_TIME ASC, QUERY_ID ASC`.trim(); +} + +function indexByHeader(headers: string[]): Map { + const out = new Map(); + headers.forEach((header, index) => { + out.set(header.toUpperCase(), index); + }); + return out; +} + +function value(row: unknown[], indexes: Map, name: string): unknown { + const index = indexes.get(name); + return index === undefined ? null : row[index]; +} + +function nullableString(raw: unknown): string | null { + if (raw === null || raw === undefined) { + return null; + } + const text = String(raw); + return text.length > 0 ? text : null; +} + +function requiredString(raw: unknown, field: string): string { + const text = nullableString(raw); + if (!text) { + throw new Error(`Snowflake QUERY_HISTORY row is missing ${field}`); + } + return text; +} + +function nullableNumber(raw: unknown): number | null { + if (raw === null || raw === undefined || raw === '') { + return null; + } + const number = typeof raw === 'number' ? raw : Number(raw); + if (!Number.isFinite(number)) { + return null; + } + return number; +} + +function nullableInteger(raw: unknown): number | null { + const number = nullableNumber(raw); + return number === null ? null : Math.trunc(number); +} + +function isoTimestamp(raw: unknown, field: string): string { + if (raw instanceof Date) { + return raw.toISOString(); + } + const text = requiredString(raw, field); + const date = new Date(text); + if (Number.isNaN(date.getTime())) { + throw new Error(`Snowflake QUERY_HISTORY row has invalid ${field}: ${text}`); + } + return date.toISOString(); +} + +function nullableIsoTimestamp(raw: unknown): string | null { + if (raw === null || raw === undefined || raw === '') { + return null; + } + return isoTimestamp(raw, 'END_TIME'); +} + +function executionSucceeded(status: string | null, errorCode: string | null, errorMessage: string | null): boolean { + if (errorCode || errorMessage) { + return false; + } + return status === null || status.toUpperCase().startsWith('SUCCESS'); +} + +function combinedErrorMessage(errorCode: string | null, errorMessage: string | null): string | null { + if (errorCode && errorMessage) { + return `${errorCode}: ${errorMessage}`; + } + return errorMessage ?? errorCode; +} + +function mapRow(row: unknown[], indexes: Map): HistoricSqlRawQueryRow { + const errorCode = nullableString(value(row, indexes, 'ERROR_CODE')); + const errorMessage = nullableString(value(row, indexes, 'ERROR_MESSAGE')); + const rowsProduced = nullableInteger(value(row, indexes, 'ROWS_PRODUCED')); + return { + id: requiredString(value(row, indexes, 'QUERY_ID'), 'QUERY_ID'), + sql: requiredString(value(row, indexes, 'QUERY_TEXT'), 'QUERY_TEXT'), + user: nullableString(value(row, indexes, 'USER_NAME')), + startedAt: isoTimestamp(value(row, indexes, 'START_TIME'), 'START_TIME'), + endedAt: nullableIsoTimestamp(value(row, indexes, 'END_TIME')), + runtimeMs: nullableNumber(value(row, indexes, 'TOTAL_ELAPSED_TIME')), + rowsProduced, + success: executionSucceeded(nullableString(value(row, indexes, 'EXECUTION_STATUS')), errorCode, errorMessage), + errorMessage: combinedErrorMessage(errorCode, errorMessage), + }; +} + +export class SnowflakeHistoricSqlQueryHistoryReader implements HistoricSqlQueryHistoryReader { + async probe(client: unknown): Promise { + let result: QueryResultLike; + try { + result = await queryClient(client).executeQuery(PROBE_SQL); + } catch (error) { + throw grantsError(error); + } + if (result.error) { + throw grantsError(result.error); + } + } + + async *fetch( + client: unknown, + window: HistoricSqlTimeWindow, + cursor?: string | null, + ): AsyncIterable { + const result = await queryClient(client).executeQuery(queryHistorySql(window, cursor)); + if (result.error) { + throw grantsError(result.error); + } + const indexes = indexByHeader(result.headers); + for (const row of result.rows) { + yield mapRow(row, indexes); + } + } +} diff --git a/packages/context/src/ingest/adapters/historic-sql/stage-pgss-golden.test.ts b/packages/context/src/ingest/adapters/historic-sql/stage-pgss-golden.test.ts new file mode 100644 index 00000000..329e6dcd --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/stage-pgss-golden.test.ts @@ -0,0 +1,152 @@ +import { mkdir, mkdtemp, readdir, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join, relative } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import type { SqlAnalysisPort } from '../../../sql-analysis/index.js'; +import { stagePgStatStatementsTemplates, writePgssBaselineAtomic, type PgssBaseline } from './stage-pgss.js'; +import type { HistoricSqlPullConfig, KloPostgresQueryClient, PostgresPgssReader, PostgresPgssRow } from './types.js'; + +const FIXTURE_ROOT = join(__dirname, '__fixtures__/postgres'); + +interface GoldenFixture { + name: string; + now: string; + connectionId: string; + probe: { + pgServerVersion: string; + warnings: string[]; + }; + snapshot: { + statsResetAt: string | null; + deallocCount: number | null; + rows: PostgresPgssRow[]; + }; + pullConfig: HistoricSqlPullConfig & { dialect: 'postgres' }; + analysisBySql: Record< + string, + { + fingerprint: string; + normalizedSql: string; + tablesTouched: string[]; + literalSlots: []; + error?: string; + } + >; + baseline: PgssBaseline | null; + expectedBaseline: PgssBaseline; + expectedFiles: Record; +} + +async function readFixture(name: string): Promise { + return JSON.parse(await readFile(join(FIXTURE_ROOT, name, 'input.json'), 'utf-8')) as GoldenFixture; +} + +async function tempDir(prefix: string): Promise { + return mkdtemp(join(tmpdir(), prefix)); +} + +function fakePgClient(): KloPostgresQueryClient { + return { + async executeQuery() { + return { headers: [], rows: [] }; + }, + }; +} + +function fixtureReader(fixture: GoldenFixture): PostgresPgssReader { + return { + async probe() { + return fixture.probe; + }, + async readSnapshot(_client, options) { + return { + statsResetAt: fixture.snapshot.statsResetAt, + deallocCount: fixture.snapshot.deallocCount, + rows: fixture.snapshot.rows.slice(0, options.maxTemplates), + }; + }, + }; +} + +function fixtureSqlAnalysis(fixture: GoldenFixture): SqlAnalysisPort { + return { + async analyzeForFingerprint(sql) { + const result = fixture.analysisBySql[sql]; + if (!result) { + return { + fingerprint: '', + normalizedSql: '', + tablesTouched: [], + literalSlots: [], + error: `missing fixture analysis for ${sql}`, + }; + } + return result; + }, + }; +} + +async function writeFixtureBaseline(path: string, baseline: PgssBaseline | null): Promise { + if (!baseline) { + return; + } + await writePgssBaselineAtomic(path, baseline); +} + +async function listFiles(root: string, current = root): Promise { + const entries = await readdir(current, { withFileTypes: true }); + const files: string[] = []; + for (const entry of entries) { + const fullPath = join(current, entry.name); + if (entry.isDirectory()) { + files.push(...(await listFiles(root, fullPath))); + } else { + files.push(relative(root, fullPath)); + } + } + return files; +} + +async function expectGoldenFiles(stagedDir: string, expectedFiles: GoldenFixture['expectedFiles']): Promise { + const actualFiles = await listFiles(stagedDir); + const expectedPaths = Object.keys(expectedFiles).sort(); + expect(actualFiles.sort()).toEqual(expectedPaths); + + for (const path of expectedPaths) { + const expected = expectedFiles[path]; + const actual = await readFile(join(stagedDir, path), 'utf-8'); + if ('json' in expected) { + expect(JSON.parse(actual)).toEqual(expected.json); + } else { + expect(actual).toBe(expected.text); + } + } +} + +describe('stagePgStatStatementsTemplates golden fixtures', () => { + it.each(['first-run', 'normal-delta', 'reset-detected', 'version-change', 'eviction-churn'] as const)( + 'matches the committed %s golden output', + async (fixtureName) => { + const fixture = await readFixture(fixtureName); + const root = await tempDir(`pgss-golden-${fixtureName}-`); + const stagedDir = join(root, 'staged'); + const baselinePath = join(root, 'cache', fixture.connectionId, 'pgss-baseline.json'); + await mkdir(dirname(baselinePath), { recursive: true }); + await writeFixtureBaseline(baselinePath, fixture.baseline); + + const result = await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: fixture.connectionId, + queryClient: fakePgClient(), + reader: fixtureReader(fixture), + sqlAnalysis: fixtureSqlAnalysis(fixture), + pullConfig: fixture.pullConfig, + baselinePath, + now: new Date(fixture.now), + }); + + await expectGoldenFiles(stagedDir, fixture.expectedFiles); + expect(result.baseline).toEqual(fixture.expectedBaseline); + }, + ); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/stage-pgss.test.ts b/packages/context/src/ingest/adapters/historic-sql/stage-pgss.test.ts new file mode 100644 index 00000000..7589f8a7 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/stage-pgss.test.ts @@ -0,0 +1,652 @@ +import { mkdtemp, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import type { SqlAnalysisPort } from '../../../sql-analysis/index.js'; +import { + pgssBaselinePath, + readPgssBaseline, + stagePgStatStatementsTemplates, + writePgssBaselineAtomic, + type PgssBaseline, +} from './stage-pgss.js'; +import { historicSqlManifestSchema, historicSqlMetadataSchema, historicSqlUsageSchema } from './types.js'; +import type { KloPostgresQueryClient, PostgresPgssReader, PostgresPgssRow } from './types.js'; + +async function tempDir(prefix: string): Promise { + return mkdtemp(join(tmpdir(), prefix)); +} + +async function readJson(root: string, relPath: string): Promise { + return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T; +} + +function fakePgClient(): KloPostgresQueryClient { + return { + async executeQuery() { + return { headers: [], rows: [] }; + }, + }; +} + +function row(overrides: Partial & Pick): PostgresPgssRow { + return { + userid: '11', + username: 'analyst', + dbid: '5', + database: 'warehouse', + calls: 10, + totalExecTime: 250, + meanExecTime: 25, + totalRows: 20, + ...overrides, + }; +} + +function fakeReader(input: { + pgServerVersion?: string; + warnings?: string[]; + statsResetAt?: string | null; + deallocCount?: number | null; + rows: PostgresPgssRow[]; +}): PostgresPgssReader { + return { + probe: vi.fn(async () => ({ + pgServerVersion: input.pgServerVersion ?? 'PostgreSQL 16.4', + warnings: input.warnings ?? [], + })), + readSnapshot: vi.fn(async (_client, options) => ({ + statsResetAt: input.statsResetAt ?? '2026-05-08T08:00:00.000Z', + deallocCount: input.deallocCount ?? 0, + rows: input.rows.slice(0, options.maxTemplates), + })), + }; +} + +const sqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint(sql) { + if (sql.includes('broken')) { + return { + fingerprint: '', + normalizedSql: '', + tablesTouched: [], + literalSlots: [], + error: 'parse failed', + }; + } + if (sql.includes('customers')) { + return { + fingerprint: 'fp_customers', + normalizedSql: 'SELECT count(*) FROM analytics.customers', + tablesTouched: ['analytics.customers'], + literalSlots: [], + }; + } + return { + fingerprint: 'fp_orders', + normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + tablesTouched: ['analytics.orders'], + literalSlots: [], + }; + }, +}; + +function postgresPullConfig(maxTemplatesPerRun = 5000) { + return { + dialect: 'postgres' as const, + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: ['^svc_'], + redactionPatterns: ['secret'], + maxTemplatesPerRun, + minCalls: 5, + }; +} + +describe('stagePgStatStatementsTemplates', () => { + it('stages first-run PGSS templates as degraded aggregate templates and builds a next baseline', async () => { + const stagedDir = await tempDir('pgss-stage-first-'); + const baselineRootDir = await tempDir('pgss-baseline-first-'); + const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg'); + + const result = await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: 'conn_pg', + queryClient: fakePgClient(), + reader: fakeReader({ + warnings: ['pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config'], + deallocCount: 2, + rows: [ + row({ + queryid: '101', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 10, + totalExecTime: 250, + totalRows: 20, + }), + row({ + queryid: '102', + query: 'SELECT * FROM pg_catalog.pg_class', + calls: 50, + totalExecTime: 500, + }), + row({ + queryid: '103', + query: 'BEGIN', + calls: 75, + totalExecTime: 75, + }), + row({ + queryid: '104', + query: 'SELECT broken FROM analytics.orders', + calls: 8, + totalExecTime: 80, + }), + ], + }), + sqlAnalysis, + pullConfig: postgresPullConfig(), + baselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest).toMatchObject({ + source: 'historic-sql', + connectionId: 'conn_pg', + dialect: 'postgres', + fetchedAt: '2026-05-08T12:00:00.000Z', + windowEnd: '2026-05-08T12:00:00.000Z', + nextSuccessfulCursor: '2026-05-08T12:00:00.000Z', + templateCount: 1, + capped: false, + degraded: true, + statsResetAt: '2026-05-08T08:00:00.000Z', + baselineFirstRun: true, + pgServerVersion: 'PostgreSQL 16.4', + deallocCount: 2, + }); + expect(manifest.warnings).toEqual([ + 'pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config', + 'pgss_dealloc_count:2; pg_stat_statements.max may be too low, causing template eviction churn', + 'baseline_first_run:no_previous_pgss_baseline', + 'analysis_failed:db5_q104', + ]); + expect(manifest.templates).toEqual([ + { + id: 'db5_q101', + fingerprint: 'fp_orders', + subClusterId: null, + path: 'templates/db5_q101/page.md', + }, + ]); + + const metadata = historicSqlMetadataSchema.parse(await readJson(stagedDir, 'templates/db5_q101/metadata.json')); + expect(metadata).toMatchObject({ + id: 'db5_q101', + title: 'postgres · analytics.orders [db5_q101]', + path: 'templates/db5_q101/page.md', + objectType: 'historic_sql_template', + lastEditedAt: null, + properties: { + fingerprint: 'fp_orders', + sub_cluster_id: null, + dialect: 'postgres', + tables_touched: ['analytics.orders'], + literal_slots: [], + }, + }); + expect(metadata.properties.triage_signals).toEqual({ + executions_bucket: 'mid', + distinct_users_bucket: 'solo', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + runtime_bucket: 'fast', + }); + + const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q101/usage.json')); + expect(usage).toEqual({ + stats: { + executions: 10, + distinct_users: 1, + first_seen: '2026-05-08T12:00:00.000Z', + last_seen: '2026-05-08T12:00:00.000Z', + p50_runtime_ms: null, + p95_runtime_ms: null, + mean_runtime_ms: 25, + error_rate: 0, + rows_produced: 20, + }, + literal_slots: [], + samples: [], + }); + + expect(await readFile(join(stagedDir, 'templates/db5_q101/page.md'), 'utf-8')).toContain( + 'SELECT count(*) FROM analytics.orders WHERE status = $1', + ); + expect(result.baselinePath).toBe(baselinePath); + expect(result.baseline.templates.db5_q101.perUser['11']).toEqual({ + calls: 10, + totalExecTime: 250, + totalRows: 20, + }); + await expect(readPgssBaseline(baselinePath)).resolves.toBeNull(); + }); + + it('warns when pg_stat_statements reports dealloc churn', async () => { + const root = await tempDir('pgss-churn-'); + const stagedDir = join(root, 'staged'); + const baselinePath = join(root, 'cache', 'warehouse', 'pgss-baseline.json'); + + await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: 'warehouse', + queryClient: fakePgClient(), + reader: fakeReader({ + rows: [ + row({ + queryid: '901', + query: 'SELECT COUNT(*) FROM public.orders WHERE status = $1', + calls: 20, + totalExecTime: 500, + meanExecTime: 25, + }), + ], + deallocCount: 3, + }), + sqlAnalysis, + pullConfig: postgresPullConfig(50), + baselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const manifest = await readJson<{ warnings: string[]; deallocCount: number }>(stagedDir, 'manifest.json'); + expect(manifest.deallocCount).toBe(3); + expect(manifest.warnings).toContain( + 'pgss_dealloc_count:3; pg_stat_statements.max may be too low, causing template eviction churn', + ); + }); + + it('uses the saved cumulative baseline to stage only positive deltas on later runs', async () => { + const stagedDir = await tempDir('pgss-stage-delta-'); + const baselineRootDir = await tempDir('pgss-baseline-delta-'); + const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg'); + const baseline: PgssBaseline = { + version: 1, + fetchedAt: '2026-05-08T10:00:00.000Z', + statsResetAt: '2026-05-08T08:00:00.000Z', + pgServerVersion: 'PostgreSQL 16.4', + templates: { + db5_q201: { + firstObservedAt: '2026-05-08T09:00:00.000Z', + perUser: { + '11': { calls: 10, totalExecTime: 100, totalRows: 50 }, + '12': { calls: 5, totalExecTime: 50, totalRows: 25 }, + }, + }, + }, + }; + await writePgssBaselineAtomic(baselinePath, baseline); + + await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: 'conn_pg', + queryClient: fakePgClient(), + reader: fakeReader({ + rows: [ + row({ + queryid: '201', + userid: '11', + username: 'analyst', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 12, + totalExecTime: 160, + totalRows: 58, + }), + row({ + queryid: '201', + userid: '12', + username: 'svc_loader', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 5, + totalExecTime: 50, + totalRows: 25, + }), + row({ + queryid: '202', + userid: '13', + username: 'analyst_2', + query: 'SELECT count(*) FROM analytics.customers', + calls: 7, + totalExecTime: 210, + totalRows: 7, + }), + ], + }), + sqlAnalysis, + pullConfig: postgresPullConfig(), + baselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest.baselineFirstRun).toBe(false); + expect(manifest.windowStart).toBe('2026-05-08T10:00:00.000Z'); + expect(manifest.templateCount).toBe(2); + expect(manifest.templates.map((template) => template.id)).toEqual(['db5_q202', 'db5_q201']); + + const usage201 = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q201/usage.json')); + expect(usage201.stats).toMatchObject({ + executions: 2, + distinct_users: 1, + first_seen: '2026-05-08T09:00:00.000Z', + last_seen: '2026-05-08T12:00:00.000Z', + mean_runtime_ms: 30, + rows_produced: 8, + }); + const metadata201 = historicSqlMetadataSchema.parse(await readJson(stagedDir, 'templates/db5_q201/metadata.json')); + expect(metadata201.properties.triage_signals.service_account_only).toBe('false'); + + const usage202 = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q202/usage.json')); + expect(usage202.stats).toMatchObject({ + executions: 7, + distinct_users: 1, + first_seen: '2026-05-08T12:00:00.000Z', + mean_runtime_ms: 30, + rows_produced: 7, + }); + }); + + it('keeps matching queryid values from different databases as distinct templates and baseline entries', async () => { + const stagedDir = await tempDir('pgss-stage-db-key-'); + const baselineRootDir = await tempDir('pgss-baseline-db-key-'); + const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg'); + await writePgssBaselineAtomic(baselinePath, { + version: 1, + fetchedAt: '2026-05-08T10:00:00.000Z', + statsResetAt: '2026-05-08T08:00:00.000Z', + pgServerVersion: 'PostgreSQL 16.4', + templates: { + db5_q701: { + firstObservedAt: '2026-05-08T09:00:00.000Z', + perUser: { + '11': { calls: 10, totalExecTime: 100, totalRows: 50 }, + }, + }, + db6_q701: { + firstObservedAt: '2026-05-08T09:30:00.000Z', + perUser: { + '11': { calls: 4, totalExecTime: 40, totalRows: 20 }, + }, + }, + }, + }); + + const result = await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: 'conn_pg', + queryClient: fakePgClient(), + reader: fakeReader({ + rows: [ + row({ + queryid: '701', + dbid: '5', + database: 'warehouse', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 12, + totalExecTime: 160, + totalRows: 58, + }), + row({ + queryid: '701', + dbid: '6', + database: 'app', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 9, + totalExecTime: 130, + totalRows: 35, + }), + ], + }), + sqlAnalysis, + pullConfig: postgresPullConfig(), + baselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest.templates.map((template) => template.id).sort()).toEqual(['db5_q701', 'db6_q701']); + + const warehouseUsage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q701/usage.json')); + expect(warehouseUsage.stats).toMatchObject({ + executions: 2, + rows_produced: 8, + first_seen: '2026-05-08T09:00:00.000Z', + }); + + const appUsage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db6_q701/usage.json')); + expect(appUsage.stats).toMatchObject({ + executions: 5, + rows_produced: 15, + first_seen: '2026-05-08T09:30:00.000Z', + }); + + expect(result.baseline.templates.db5_q701.perUser['11']).toEqual({ + calls: 12, + totalExecTime: 160, + totalRows: 58, + }); + expect(result.baseline.templates.db6_q701.perUser['11']).toEqual({ + calls: 9, + totalExecTime: 130, + totalRows: 35, + }); + }); + + it('treats stats_reset advancement and major-version changes as fresh baselines', async () => { + const resetStagedDir = await tempDir('pgss-stage-reset-'); + const resetBaselineRootDir = await tempDir('pgss-baseline-reset-'); + const resetBaselinePath = pgssBaselinePath(resetBaselineRootDir, 'conn_pg'); + await writePgssBaselineAtomic(resetBaselinePath, { + version: 1, + fetchedAt: '2026-05-08T10:00:00.000Z', + statsResetAt: '2026-05-08T08:00:00.000Z', + pgServerVersion: 'PostgreSQL 16.4', + templates: { + db5_q301: { + firstObservedAt: '2026-05-08T09:00:00.000Z', + perUser: { + '11': { calls: 100, totalExecTime: 1000, totalRows: 500 }, + }, + }, + }, + }); + + await stagePgStatStatementsTemplates({ + stagedDir: resetStagedDir, + connectionId: 'conn_pg', + queryClient: fakePgClient(), + reader: fakeReader({ + statsResetAt: '2026-05-08T11:00:00.000Z', + rows: [ + row({ + queryid: '301', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 3, + totalExecTime: 90, + totalRows: 9, + }), + ], + }), + sqlAnalysis, + pullConfig: postgresPullConfig(), + baselinePath: resetBaselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const resetManifest = historicSqlManifestSchema.parse(await readJson(resetStagedDir, 'manifest.json')); + expect(resetManifest.baselineFirstRun).toBe(true); + expect(resetManifest.warnings).toContain( + 'baseline_reset:stats_reset advanced from 2026-05-08T08:00:00.000Z to 2026-05-08T11:00:00.000Z', + ); + const resetUsage = historicSqlUsageSchema.parse(await readJson(resetStagedDir, 'templates/db5_q301/usage.json')); + expect(resetUsage.stats.executions).toBe(3); + + const versionStagedDir = await tempDir('pgss-stage-version-'); + const versionBaselineRootDir = await tempDir('pgss-baseline-version-'); + const versionBaselinePath = pgssBaselinePath(versionBaselineRootDir, 'conn_pg'); + await writePgssBaselineAtomic(versionBaselinePath, { + version: 1, + fetchedAt: '2026-05-08T10:00:00.000Z', + statsResetAt: '2026-05-08T08:00:00.000Z', + pgServerVersion: 'PostgreSQL 15.7', + templates: { + db5_q302: { + firstObservedAt: '2026-05-08T09:00:00.000Z', + perUser: { + '11': { calls: 100, totalExecTime: 1000, totalRows: 500 }, + }, + }, + }, + }); + + await stagePgStatStatementsTemplates({ + stagedDir: versionStagedDir, + connectionId: 'conn_pg', + queryClient: fakePgClient(), + reader: fakeReader({ + pgServerVersion: 'PostgreSQL 16.4', + rows: [ + row({ + queryid: '302', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 4, + totalExecTime: 80, + totalRows: 8, + }), + ], + }), + sqlAnalysis, + pullConfig: postgresPullConfig(), + baselinePath: versionBaselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const versionManifest = historicSqlManifestSchema.parse(await readJson(versionStagedDir, 'manifest.json')); + expect(versionManifest.baselineFirstRun).toBe(true); + expect(versionManifest.warnings).toContain('baseline_reset:pg_server_major changed from 15 to 16'); + }); + + it('handles scoped counter regressions without forcing a global first-run baseline', async () => { + const stagedDir = await tempDir('pgss-stage-scoped-'); + const baselineRootDir = await tempDir('pgss-baseline-scoped-'); + const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg'); + await writePgssBaselineAtomic(baselinePath, { + version: 1, + fetchedAt: '2026-05-08T10:00:00.000Z', + statsResetAt: '2026-05-08T08:00:00.000Z', + pgServerVersion: 'PostgreSQL 16.4', + templates: { + db5_q401: { + firstObservedAt: '2026-05-08T09:00:00.000Z', + perUser: { + '11': { calls: 100, totalExecTime: 1000, totalRows: 500 }, + '12': { calls: 50, totalExecTime: 500, totalRows: 250 }, + }, + }, + }, + }); + + await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: 'conn_pg', + queryClient: fakePgClient(), + reader: fakeReader({ + statsResetAt: '2026-05-08T08:00:00.000Z', + rows: [ + row({ + queryid: '401', + userid: '11', + username: 'analyst', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 2, + totalExecTime: 30, + totalRows: 6, + }), + row({ + queryid: '401', + userid: '12', + username: 'svc_loader', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 55, + totalExecTime: 650, + totalRows: 275, + }), + ], + }), + sqlAnalysis, + pullConfig: postgresPullConfig(), + baselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest.baselineFirstRun).toBe(false); + expect(manifest.warnings).toContain('scoped_reset:dbid=5 queryid=401 userid=11'); + + const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/db5_q401/usage.json')); + expect(usage.stats).toMatchObject({ + executions: 7, + distinct_users: 2, + mean_runtime_ms: 25.714285714285715, + rows_produced: 31, + }); + }); + + it('ranks and caps selected PGSS templates after skip and analysis filtering', async () => { + const stagedDir = await tempDir('pgss-stage-cap-'); + const baselineRootDir = await tempDir('pgss-baseline-cap-'); + const baselinePath = pgssBaselinePath(baselineRootDir, 'conn_pg'); + + await stagePgStatStatementsTemplates({ + stagedDir, + connectionId: 'conn_pg', + queryClient: fakePgClient(), + reader: fakeReader({ + rows: [ + row({ + queryid: '501', + username: 'analyst_a', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 2, + totalExecTime: 20, + }), + row({ + queryid: '502', + username: 'analyst_b', + query: 'SELECT count(*) FROM analytics.customers', + calls: 20, + totalExecTime: 200, + }), + row({ + queryid: '503', + username: 'analyst_c', + query: 'SELECT count(*) FROM analytics.orders WHERE status = $1', + calls: 10, + totalExecTime: 100, + }), + ], + }), + sqlAnalysis, + pullConfig: postgresPullConfig(2), + baselinePath, + now: new Date('2026-05-08T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest.capped).toBe(true); + expect(manifest.warnings).toContain('templates_truncated: kept 2 of 3 templates'); + expect(manifest.templates.map((template) => template.id)).toEqual(['db5_q502', 'db5_q503']); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/stage-pgss.ts b/packages/context/src/ingest/adapters/historic-sql/stage-pgss.ts new file mode 100644 index 00000000..75a3e18f --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/stage-pgss.ts @@ -0,0 +1,508 @@ +import { mkdir, readFile, rename, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { z } from 'zod'; +import type { SqlAnalysisFingerprintResult, SqlAnalysisPort } from '../../../sql-analysis/index.js'; +import { + HISTORIC_SQL_OBJECT_TYPE, + HISTORIC_SQL_SOURCE_KEY, + historicSqlPullConfigSchema, + type HistoricSqlManifest, + type HistoricSqlMetadata, + type HistoricSqlPullConfig, + type HistoricSqlUsage, + type KloPostgresQueryClient, + type PostgresPgssAggregateRow, + type PostgresPgssReader, + type PostgresPgssRow, +} from './types.js'; + +const PGSS_BASELINE_VERSION = 1 as const; + +const pgssCounterSchema = z.object({ + calls: z.number().int().nonnegative(), + totalExecTime: z.number().nonnegative(), + totalRows: z.number().int().nonnegative(), +}); + +const pgssBaselineSchema = z.object({ + version: z.literal(PGSS_BASELINE_VERSION), + fetchedAt: z.string().datetime(), + statsResetAt: z.string().datetime().nullable(), + pgServerVersion: z.string(), + templates: z.record( + z.string(), + z.object({ + firstObservedAt: z.string().datetime(), + perUser: z.record(z.string(), pgssCounterSchema), + }), + ), +}); + +export type PgssBaseline = z.infer; + +export interface StagePgStatStatementsTemplatesInput { + stagedDir: string; + connectionId: string; + queryClient: KloPostgresQueryClient; + reader: PostgresPgssReader; + sqlAnalysis: SqlAnalysisPort; + pullConfig: HistoricSqlPullConfig; + baselinePath: string; + now?: Date; +} + +export interface StagePgStatStatementsTemplatesResult { + baselinePath: string; + baseline: PgssBaseline; +} + +interface PgssBaselineCounter { + calls: number; + totalExecTime: number; + totalRows: number; +} + +interface PgssAggregateMutable { + id: string; + queryid: string; + dbid: string; + database: string | null; + query: string; + deltaCalls: number; + deltaExecTime: number; + deltaRows: number; + users: Set; + firstObservedAt: string; +} + +interface AnalyzedPgssTemplate { + aggregate: PostgresPgssAggregateRow; + analysis: SqlAnalysisFingerprintResult; +} + +const ZERO_COUNTER: PgssBaselineCounter = { + calls: 0, + totalExecTime: 0, + totalRows: 0, +}; + +const PGSS_SNAPSHOT_READ_LIMIT = 5000; +const PGSS_HARD_SKIP_PREFIX_RE = /^\s*(SHOW|DESCRIBE|DESC|EXPLAIN|USE|SET|BEGIN|COMMIT|ROLLBACK|VACUUM|ANALYZE)\b/i; +const PGSS_HARD_SKIP_TABLE_RE = /\b(INFORMATION_SCHEMA|pg_catalog\.|pg_toast\.|pg_stat_)/i; + +function pgssTemplateId(row: Pick): string { + return `db${row.dbid}_q${row.queryid}`; +} + +export function pgssBaselinePath(rootDir: string | undefined, connectionId: string): string { + return join(rootDir ?? join(process.cwd(), '.klo/cache/historic-sql'), connectionId, 'pgss-baseline.json'); +} + +export async function readPgssBaseline(path: string): Promise { + try { + return pgssBaselineSchema.parse(JSON.parse(await readFile(path, 'utf-8'))); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return null; + } + throw error; + } +} + +export async function writePgssBaselineAtomic(path: string, baseline: PgssBaseline): Promise { + const parsed = pgssBaselineSchema.parse(baseline); + await mkdir(dirname(path), { recursive: true }); + const tempPath = `${path}.tmp`; + await writeFile(tempPath, `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8'); + await rename(tempPath, path); +} + +export async function stagePgStatStatementsTemplates( + input: StagePgStatStatementsTemplatesInput, +): Promise { + const config = historicSqlPullConfigSchema.parse(input.pullConfig); + if (config.dialect !== 'postgres') { + throw new Error(`stagePgStatStatementsTemplates requires dialect postgres, got ${config.dialect}`); + } + + const now = input.now ?? new Date(); + const fetchedAt = now.toISOString(); + const probe = await input.reader.probe(input.queryClient); + const warnings = [...probe.warnings]; + const baseline = await readPgssBaseline(input.baselinePath); + const snapshot = await input.reader.readSnapshot(input.queryClient, { + minCalls: config.minCalls, + maxTemplates: PGSS_SNAPSHOT_READ_LIMIT, + }); + if (snapshot.deallocCount !== null && snapshot.deallocCount > 0) { + warnings.push( + `pgss_dealloc_count:${snapshot.deallocCount}; pg_stat_statements.max may be too low, causing template eviction churn`, + ); + } + const reset = detectBaselineReset({ + baseline, + snapshotStatsResetAt: snapshot.statsResetAt, + currentPgServerVersion: probe.pgServerVersion, + }); + warnings.push(...reset.warnings); + + const aggregates = aggregatePgssRows({ + rows: snapshot.rows, + baseline, + baselineFirstRun: reset.baselineFirstRun, + fetchedAt, + warnings, + }).filter((aggregate) => !shouldSkipPgssSql(aggregate.query)); + + const analyzed: AnalyzedPgssTemplate[] = []; + for (const aggregate of aggregates) { + const analysis = await input.sqlAnalysis.analyzeForFingerprint(aggregate.query, 'postgres'); + if (analysis.error || !analysis.fingerprint || !analysis.normalizedSql) { + warnings.push(`analysis_failed:${aggregate.id}`); + continue; + } + analyzed.push({ aggregate, analysis }); + } + + const selected = selectPgssTemplates(analyzed, config.maxTemplatesPerRun); + if (selected.length < analyzed.length) { + warnings.push(`templates_truncated: kept ${selected.length} of ${analyzed.length} templates`); + } + + await mkdir(input.stagedDir, { recursive: true }); + const templates: HistoricSqlManifest['templates'] = []; + for (const template of selected) { + const staged = buildPgssStagedTemplate(template, config, now); + const basePath = `templates/${staged.metadata.id}`; + await writeJson(input.stagedDir, `${basePath}/metadata.json`, staged.metadata); + await writeText(input.stagedDir, `${basePath}/page.md`, staged.pageMarkdown); + await writeJson(input.stagedDir, `${basePath}/usage.json`, staged.usage); + templates.push({ + id: staged.metadata.id, + fingerprint: staged.metadata.properties.fingerprint, + subClusterId: staged.metadata.properties.sub_cluster_id, + path: staged.metadata.path, + }); + } + + await writeJson(input.stagedDir, 'manifest.json', { + source: HISTORIC_SQL_SOURCE_KEY, + connectionId: input.connectionId, + dialect: 'postgres', + fetchedAt, + windowStart: baseline?.fetchedAt ?? snapshot.statsResetAt ?? fetchedAt, + windowEnd: fetchedAt, + nextSuccessfulCursor: fetchedAt, + templateCount: selected.length, + capped: selected.length < analyzed.length, + warnings, + degraded: true, + statsResetAt: snapshot.statsResetAt, + baselineFirstRun: reset.baselineFirstRun, + pgServerVersion: probe.pgServerVersion, + deallocCount: snapshot.deallocCount, + templates, + } satisfies HistoricSqlManifest); + + return { + baselinePath: input.baselinePath, + baseline: buildNextBaseline({ + rows: snapshot.rows, + fetchedAt, + statsResetAt: snapshot.statsResetAt, + pgServerVersion: probe.pgServerVersion, + previousBaseline: reset.baselineFirstRun ? null : baseline, + }), + }; +} + +function detectBaselineReset(input: { + baseline: PgssBaseline | null; + snapshotStatsResetAt: string | null; + currentPgServerVersion: string; +}): { baselineFirstRun: boolean; warnings: string[] } { + if (!input.baseline) { + return { baselineFirstRun: true, warnings: ['baseline_first_run:no_previous_pgss_baseline'] }; + } + + const warnings: string[] = []; + if ( + input.baseline.statsResetAt && + input.snapshotStatsResetAt && + input.baseline.statsResetAt < input.snapshotStatsResetAt + ) { + warnings.push( + `baseline_reset:stats_reset advanced from ${input.baseline.statsResetAt} to ${input.snapshotStatsResetAt}`, + ); + } + + const previousMajor = postgresMajor(input.baseline.pgServerVersion); + const currentMajor = postgresMajor(input.currentPgServerVersion); + if (previousMajor && currentMajor && previousMajor !== currentMajor) { + warnings.push(`baseline_reset:pg_server_major changed from ${previousMajor} to ${currentMajor}`); + } + + return { baselineFirstRun: warnings.length > 0, warnings }; +} + +function postgresMajor(version: string): string | null { + return version.match(/PostgreSQL\s+(\d+)/i)?.[1] ?? version.match(/^(\d+)(?:\.|$)/)?.[1] ?? null; +} + +function aggregatePgssRows(input: { + rows: PostgresPgssRow[]; + baseline: PgssBaseline | null; + baselineFirstRun: boolean; + fetchedAt: string; + warnings: string[]; +}): PostgresPgssAggregateRow[] { + const aggregates = new Map(); + + for (const row of input.rows) { + const templateId = pgssTemplateId(row); + const baselineTemplate = input.baselineFirstRun ? undefined : input.baseline?.templates[templateId]; + const baselineCounter = baselineTemplate?.perUser[row.userid]; + const previous = scopedCounterBaseline(row, baselineCounter, input.baselineFirstRun, input.warnings); + const deltaCalls = row.calls - previous.calls; + const deltaExecTime = row.totalExecTime - previous.totalExecTime; + const deltaRows = row.totalRows - previous.totalRows; + if (deltaCalls === 0 && !input.baselineFirstRun) { + continue; + } + + const existing = + aggregates.get(templateId) ?? + ({ + id: templateId, + queryid: row.queryid, + dbid: row.dbid, + database: row.database, + query: row.query, + deltaCalls: 0, + deltaExecTime: 0, + deltaRows: 0, + users: new Set(), + firstObservedAt: baselineTemplate?.firstObservedAt ?? input.fetchedAt, + } satisfies PgssAggregateMutable); + + existing.deltaCalls += Math.max(0, deltaCalls); + existing.deltaExecTime += Math.max(0, deltaExecTime); + existing.deltaRows += Math.max(0, deltaRows); + if (deltaCalls > 0) { + existing.users.add(row.username ?? 'unknown'); + } + aggregates.set(templateId, existing); + } + + return [...aggregates.values()] + .filter((aggregate) => aggregate.deltaCalls > 0) + .map((aggregate) => ({ + id: aggregate.id, + queryid: aggregate.queryid, + dbid: aggregate.dbid, + database: aggregate.database, + query: aggregate.query, + deltaCalls: aggregate.deltaCalls, + deltaExecTime: aggregate.deltaExecTime, + deltaRows: aggregate.deltaRows, + meanExecTime: aggregate.deltaExecTime / Math.max(aggregate.deltaCalls, 1), + distinctUsersDelta: aggregate.users.size, + users: [...aggregate.users].sort(), + firstObservedAt: aggregate.firstObservedAt, + })); +} + +function scopedCounterBaseline( + row: PostgresPgssRow, + baselineCounter: PgssBaselineCounter | undefined, + baselineFirstRun: boolean, + warnings: string[], +): PgssBaselineCounter { + if (!baselineCounter || baselineFirstRun) { + return ZERO_COUNTER; + } + if ( + baselineCounter.calls > row.calls || + baselineCounter.totalExecTime > row.totalExecTime || + baselineCounter.totalRows > row.totalRows + ) { + warnings.push(`scoped_reset:dbid=${row.dbid} queryid=${row.queryid} userid=${row.userid}`); + return ZERO_COUNTER; + } + return baselineCounter; +} + +function shouldSkipPgssSql(sql: string): boolean { + return PGSS_HARD_SKIP_PREFIX_RE.test(sql) || PGSS_HARD_SKIP_TABLE_RE.test(sql); +} + +function selectPgssTemplates(templates: AnalyzedPgssTemplate[], maxTemplatesPerRun: number): AnalyzedPgssTemplate[] { + return templates + .map((template) => ({ + template, + score: template.aggregate.users.length * Math.log1p(template.aggregate.deltaCalls), + })) + .sort( + (left, right) => right.score - left.score || left.template.aggregate.id.localeCompare(right.template.aggregate.id), + ) + .slice(0, maxTemplatesPerRun) + .map((entry) => entry.template); +} + +function buildPgssStagedTemplate( + template: AnalyzedPgssTemplate, + config: HistoricSqlPullConfig, + now: Date, +): { metadata: HistoricSqlMetadata; pageMarkdown: string; usage: HistoricSqlUsage } { + const tablesTouched = [...template.analysis.tablesTouched].sort(); + const firstTable = tablesTouched[0] ?? 'query'; + const id = template.aggregate.id; + + const metadata: HistoricSqlMetadata = { + id, + title: `postgres · ${firstTable} [${id.slice(0, 12)}]`, + path: `templates/${id}/page.md`, + objectType: HISTORIC_SQL_OBJECT_TYPE, + lastEditedAt: null, + properties: { + fingerprint: template.analysis.fingerprint, + sub_cluster_id: null, + dialect: 'postgres', + tables_touched: tablesTouched, + literal_slots: [], + triage_signals: buildPgssTriageSignals({ + executions: template.aggregate.deltaCalls, + distinctUsers: template.aggregate.distinctUsersDelta, + firstSeen: template.aggregate.firstObservedAt, + lastSeen: now.toISOString(), + meanRuntimeMs: template.aggregate.meanExecTime, + serviceAccountOnly: isServiceAccountOnly(template.aggregate.users, config.serviceAccountUserPatterns), + now, + }), + }, + }; + + return { + metadata, + pageMarkdown: renderTemplatePage(id, template.analysis.normalizedSql, tablesTouched), + usage: { + stats: { + executions: template.aggregate.deltaCalls, + distinct_users: template.aggregate.distinctUsersDelta, + first_seen: template.aggregate.firstObservedAt, + last_seen: now.toISOString(), + p50_runtime_ms: null, + p95_runtime_ms: null, + mean_runtime_ms: template.aggregate.meanExecTime, + error_rate: 0, + rows_produced: template.aggregate.deltaRows, + }, + literal_slots: [], + samples: [], + }, + }; +} + +function buildPgssTriageSignals(input: { + executions: number; + distinctUsers: number; + firstSeen: string; + lastSeen: string; + meanRuntimeMs: number; + serviceAccountOnly: boolean; + now: Date; +}): Record { + return { + executions_bucket: input.executions < 3 ? 'low' : input.executions < 50 ? 'mid' : 'high', + distinct_users_bucket: input.distinctUsers <= 1 ? 'solo' : input.distinctUsers <= 5 ? 'team' : 'broad', + error_rate_bucket: 'ok', + recency_bucket: recencyBucket(input.lastSeen, input.now), + service_account_only: String(input.serviceAccountOnly), + runtime_bucket: runtimeBucket(input.meanRuntimeMs), + }; +} + +function runtimeBucket(meanRuntimeMs: number): string { + if (meanRuntimeMs < 100) { + return 'fast'; + } + if (meanRuntimeMs < 1000) { + return 'moderate'; + } + return 'slow'; +} + +function recencyBucket(lastSeen: string, now: Date): string { + const ageDays = Math.max(0, (now.getTime() - new Date(lastSeen).getTime()) / 86400000); + if (ageDays <= 14) { + return 'active'; + } + if (ageDays <= 60) { + return 'warm'; + } + return 'cold'; +} + +function isServiceAccountOnly(users: string[], patterns: string[]): boolean { + if (users.length === 0 || patterns.length === 0) { + return false; + } + const regexes = patterns.map((pattern) => new RegExp(pattern)); + return users.every((user) => regexes.some((regex) => regex.test(user))); +} + +function renderTemplatePage(id: string, normalizedSql: string, tablesTouched: string[]): string { + return [ + `# ${id}`, + '', + '## Normalized SQL', + '```sql', + normalizedSql, + '```', + '', + '## Tables touched', + ...tablesTouched.map((table) => `- ${table}`), + '', + ].join('\n'); +} + +function buildNextBaseline(input: { + rows: PostgresPgssRow[]; + fetchedAt: string; + statsResetAt: string | null; + pgServerVersion: string; + previousBaseline: PgssBaseline | null; +}): PgssBaseline { + const templates: PgssBaseline['templates'] = {}; + for (const row of input.rows) { + const templateId = pgssTemplateId(row); + const previous = input.previousBaseline?.templates[templateId]; + const template = templates[templateId] ?? { + firstObservedAt: previous?.firstObservedAt ?? input.fetchedAt, + perUser: {}, + }; + template.perUser[row.userid] = { + calls: row.calls, + totalExecTime: row.totalExecTime, + totalRows: row.totalRows, + }; + templates[templateId] = template; + } + return { + version: PGSS_BASELINE_VERSION, + fetchedAt: input.fetchedAt, + statsResetAt: input.statsResetAt, + pgServerVersion: input.pgServerVersion, + templates, + }; +} + +async function writeJson(root: string, relPath: string, value: unknown): Promise { + await writeText(root, relPath, `${JSON.stringify(value, null, 2)}\n`); +} + +async function writeText(root: string, relPath: string, value: string): Promise { + const target = join(root, relPath); + await mkdir(dirname(target), { recursive: true }); + await writeFile(target, value, 'utf-8'); +} diff --git a/packages/context/src/ingest/adapters/historic-sql/stage.test.ts b/packages/context/src/ingest/adapters/historic-sql/stage.test.ts new file mode 100644 index 00000000..dfaed511 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/stage.test.ts @@ -0,0 +1,798 @@ +import { mkdtemp, readFile, readdir } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import type { SqlAnalysisPort } from '../../../sql-analysis/index.js'; +import { stageHistoricSqlTemplates } from './stage.js'; +import { + historicSqlManifestSchema, + historicSqlMetadataSchema, + historicSqlUsageSchema, + type HistoricSqlQueryHistoryReader, + type HistoricSqlRawQueryRow, +} from './types.js'; + +async function tempDir(): Promise { + return mkdtemp(join(tmpdir(), 'historic-sql-stage-')); +} + +async function readJson(root: string, relPath: string): Promise { + return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T; +} + +function fakeReader(rows: HistoricSqlRawQueryRow[]): HistoricSqlQueryHistoryReader { + return { + async probe() {}, + async *fetch() { + for (const row of rows) { + yield row; + } + }, + }; +} + +const fakeSqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint(sql) { + if (sql.includes('paid')) { + return { + fingerprint: 'fp_paid_orders', + normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ? AND created_at >= ?', + tablesTouched: ['analytics.orders'], + literalSlots: [ + { position: 1, type: 'string', exampleValue: 'paid' }, + { position: 2, type: 'date', exampleValue: '2026-04-01' }, + ], + }; + } + return { + fingerprint: 'fp_refunds', + normalizedSql: 'SELECT count(*) FROM analytics.refunds WHERE state = ?', + tablesTouched: ['analytics.refunds'], + literalSlots: [{ position: 1, type: 'string', exampleValue: 'complete' }], + }; + }, +}; + +const categoricalSqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint(sql) { + const status = sql.includes("'refunded'") ? 'refunded' : 'paid'; + return { + fingerprint: 'fp_order_status', + normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?', + tablesTouched: ['analytics.orders'], + literalSlots: [{ position: 1, type: 'string', exampleValue: status }], + }; + }, +}; + +function categoricalRows(): HistoricSqlRawQueryRow[] { + return [ + { + id: 'paid-1', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + user: 'analyst-a', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: null, + runtimeMs: 100, + rowsProduced: 11, + success: true, + errorMessage: null, + }, + { + id: 'paid-2', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + user: 'analyst-b', + startedAt: '2026-05-04T10:01:00.000Z', + endedAt: null, + runtimeMs: 110, + rowsProduced: 12, + success: true, + errorMessage: null, + }, + { + id: 'paid-3', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + user: 'analyst-c', + startedAt: '2026-05-04T10:02:00.000Z', + endedAt: null, + runtimeMs: 120, + rowsProduced: 13, + success: true, + errorMessage: null, + }, + { + id: 'refunded-1', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'", + user: 'analyst-a', + startedAt: '2026-05-04T10:03:00.000Z', + endedAt: null, + runtimeMs: 130, + rowsProduced: 21, + success: true, + errorMessage: null, + }, + { + id: 'refunded-2', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'", + user: 'analyst-b', + startedAt: '2026-05-04T10:04:00.000Z', + endedAt: null, + runtimeMs: 140, + rowsProduced: 22, + success: true, + errorMessage: null, + }, + { + id: 'refunded-3', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'refunded'", + user: 'analyst-c', + startedAt: '2026-05-04T10:05:00.000Z', + endedAt: null, + runtimeMs: 150, + rowsProduced: 23, + success: true, + errorMessage: null, + }, + ]; +} + +const diverseSqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint(sql) { + const value = sql.match(/status = '([^']+)'/)?.[1] ?? 'unknown'; + return { + fingerprint: 'fp_diverse_samples', + normalizedSql: 'SELECT count(*) FROM analytics.orders WHERE status = ?', + tablesTouched: ['analytics.orders'], + literalSlots: [{ position: 1, type: 'string', exampleValue: value }], + }; + }, +}; + +const classificationMatrixSqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint(sql) { + if (sql.includes('stale_orders')) { + return { + fingerprint: 'fp_stale_date', + normalizedSql: 'SELECT count(*) FROM analytics.stale_orders WHERE created_at >= ?', + tablesTouched: ['analytics.stale_orders'], + literalSlots: [{ position: 1, type: 'date', exampleValue: '2026-04-01' }], + }; + } + + const stringValue = (field: string): string => sql.match(new RegExp(`${field} = '([^']+)'`))?.[1] ?? 'unknown'; + const amount = sql.match(/amount >= (\d+)/)?.[1] ?? '0'; + const asOf = sql.match(/created_at >= '([^']+)'/)?.[1] ?? '2026-05-01'; + + return { + fingerprint: 'fp_classification_matrix', + normalizedSql: + 'SELECT count(*) FROM analytics.orders WHERE region = ? AND plan = ? AND status = ? AND amount >= ? AND created_at >= ?', + tablesTouched: ['analytics.orders'], + literalSlots: [ + { position: 1, type: 'string', exampleValue: stringValue('region') }, + { position: 2, type: 'string', exampleValue: stringValue('plan') }, + { position: 3, type: 'string', exampleValue: stringValue('status') }, + { position: 4, type: 'number', exampleValue: amount }, + { position: 5, type: 'date', exampleValue: asOf }, + ], + }; + }, +}; + +function classificationMatrixRows(): HistoricSqlRawQueryRow[] { + const rows: HistoricSqlRawQueryRow[] = Array.from({ length: 20 }, (_, index) => { + const status = index < 10 ? 'paid' : 'refunded'; + const plan = index === 19 ? 'self_serve' : 'enterprise'; + const amount = 100 + index; + const asOf = `2026-05-${String(1 + Math.floor(index / 5)).padStart(2, '0')}`; + return { + id: `matrix-${index + 1}`, + sql: `SELECT count(*) FROM analytics.orders WHERE region = 'us' AND plan = '${plan}' AND status = '${status}' AND amount >= ${amount} AND created_at >= '${asOf}'`, + user: `analyst-${(index % 4) + 1}`, + startedAt: `2026-05-04T10:${String(index).padStart(2, '0')}:00.000Z`, + endedAt: null, + runtimeMs: 100 + index, + rowsProduced: 1, + success: true, + errorMessage: null, + }; + }); + + return [ + ...rows, + { + id: 'stale-date-1', + sql: "SELECT count(*) FROM analytics.stale_orders WHERE created_at >= '2026-04-01'", + user: 'analyst-1', + startedAt: '2026-05-04T11:00:00.000Z', + endedAt: null, + runtimeMs: 75, + rowsProduced: 1, + success: true, + errorMessage: null, + }, + ]; +} + +describe('stageHistoricSqlTemplates', () => { + it('compresses rows by fingerprint into document-shaped staged templates', async () => { + const stagedDir = await tempDir(); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader([ + { + id: 'q1', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-04-01' AND email = 'analyst@example.com'", + user: 'analyst@example.com', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: '2026-05-04T10:00:01.000Z', + runtimeMs: 100, + rowsProduced: 1, + success: true, + errorMessage: null, + }, + { + id: 'q2', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-05-01' AND email = 'analyst-2@example.com'", + user: 'analyst-2@example.com', + startedAt: '2026-05-04T11:00:00.000Z', + endedAt: '2026-05-04T11:00:01.000Z', + runtimeMs: 300, + rowsProduced: 1, + success: true, + errorMessage: null, + }, + ]), + sqlAnalysis: fakeSqlAnalysis, + pullConfig: { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: ['^svc_'], + redactionPatterns: ['[\\w.+-]+@[\\w-]+\\.[\\w.-]+'], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest).toMatchObject({ + source: 'historic-sql', + connectionId: 'conn_1', + dialect: 'snowflake', + nextSuccessfulCursor: '2026-05-04T11:00:00.000Z', + templateCount: 1, + capped: false, + }); + + const files = (await readdir(join(stagedDir, 'templates', 'fp_paid_orders'))).sort(); + expect(files).toEqual(['metadata.json', 'page.md', 'usage.json']); + + const metadata = historicSqlMetadataSchema.parse( + await readJson(stagedDir, 'templates/fp_paid_orders/metadata.json'), + ); + expect(metadata).toEqual({ + id: 'fp_paid_orders', + title: 'snowflake · analytics.orders [fp_pai]', + path: 'templates/fp_paid_orders/page.md', + objectType: 'historic_sql_template', + lastEditedAt: null, + properties: { + fingerprint: 'fp_paid_orders', + sub_cluster_id: null, + dialect: 'snowflake', + tables_touched: ['analytics.orders'], + literal_slots: [ + { position: 1, type: 'string', classification: 'constant' }, + { position: 2, type: 'date', classification: 'runtime' }, + ], + triage_signals: { + executions_bucket: 'low', + distinct_users_bucket: 'team', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '1 constant, 1 runtime', + }, + }, + }); + + const page = await readFile(join(stagedDir, 'templates/fp_paid_orders/page.md'), 'utf-8'); + expect(page).toContain('## Normalized SQL'); + expect(page).toContain('SELECT count(*) FROM analytics.orders WHERE status = ? AND created_at >= ?'); + expect(page).toContain('- analytics.orders'); + + const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_paid_orders/usage.json')); + expect(usage.stats).toMatchObject({ + executions: 2, + distinct_users: 2, + first_seen: '2026-05-04T10:00:00.000Z', + last_seen: '2026-05-04T11:00:00.000Z', + p50_runtime_ms: 100, + p95_runtime_ms: 300, + error_rate: 0, + }); + expect(usage.samples).toHaveLength(1); + expect(usage.samples[0].bound_sql).toContain(''); + expect(usage.samples[0].bound_sql).not.toContain('analyst@example.com'); + expect(usage.samples[0].bound_sql).not.toContain('analyst-2@example.com'); + }); + + it('skips hard-noise SQL and caps templates deterministically', async () => { + const stagedDir = await tempDir(); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader([ + { + id: 'show-1', + sql: 'SHOW TABLES', + user: 'analyst', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: null, + runtimeMs: null, + success: true, + errorMessage: null, + }, + { + id: 'q3', + sql: "SELECT count(*) FROM analytics.refunds WHERE state = 'complete'", + user: 'analyst', + startedAt: '2026-05-04T11:00:00.000Z', + endedAt: null, + runtimeMs: 50, + success: true, + errorMessage: null, + }, + { + id: 'q4', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid' AND created_at >= '2026-04-01'", + user: 'analyst', + startedAt: '2026-05-04T11:30:00.000Z', + endedAt: null, + runtimeMs: 40, + success: true, + errorMessage: null, + }, + ]), + sqlAnalysis: fakeSqlAnalysis, + pullConfig: { + dialect: 'bigquery', + windowDays: 7, + lastSuccessfulCursor: '2026-05-01T00:00:00.000Z', + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 1, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest.templateCount).toBe(1); + expect(manifest.capped).toBe(true); + expect(manifest.warnings).toEqual(['templates_truncated: kept 1 of 2 templates']); + expect(manifest.templates.map((template) => template.id)).toEqual(['fp_paid_orders']); + }); + + it('splits categorical fingerprints into one document directory per dominant value', async () => { + const stagedDir = await tempDir(); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader(categoricalRows()), + sqlAnalysis: categoricalSqlAnalysis, + pullConfig: { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + const templates = manifest.templates + .map((template) => ({ + id: template.id, + fingerprint: template.fingerprint, + subClusterId: template.subClusterId, + path: template.path, + })) + .sort((left, right) => left.id.localeCompare(right.id)); + + expect(manifest.templateCount).toBe(2); + expect(templates).toEqual([ + { + id: 'fp_order_status__cat_2b2ff2318877', + fingerprint: 'fp_order_status', + subClusterId: 'cat_2b2ff2318877', + path: 'templates/fp_order_status__cat_2b2ff2318877/page.md', + }, + { + id: 'fp_order_status__cat_34f037ddcbfa', + fingerprint: 'fp_order_status', + subClusterId: 'cat_34f037ddcbfa', + path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md', + }, + ]); + + const paidMetadata = historicSqlMetadataSchema.parse( + await readJson(stagedDir, 'templates/fp_order_status__cat_34f037ddcbfa/metadata.json'), + ); + expect(paidMetadata).toMatchObject({ + id: 'fp_order_status__cat_34f037ddcbfa', + title: 'snowflake · analytics.orders [fp_ord:ddcbfa]', + path: 'templates/fp_order_status__cat_34f037ddcbfa/page.md', + properties: { + fingerprint: 'fp_order_status', + sub_cluster_id: 'cat_34f037ddcbfa', + dialect: 'snowflake', + tables_touched: ['analytics.orders'], + literal_slots: [{ position: 1, type: 'string', classification: 'categorical' }], + }, + }); + + const paidUsage = historicSqlUsageSchema.parse( + await readJson(stagedDir, 'templates/fp_order_status__cat_34f037ddcbfa/usage.json'), + ); + expect(paidUsage.stats).toMatchObject({ + executions: 3, + distinct_users: 3, + first_seen: '2026-05-04T10:00:00.000Z', + last_seen: '2026-05-04T10:02:00.000Z', + rows_produced: 36, + }); + expect(paidUsage.literal_slots).toEqual([{ position: 1, distinct_values: 1, top_values: [['paid', 3]] }]); + + const refundedUsage = historicSqlUsageSchema.parse( + await readJson(stagedDir, 'templates/fp_order_status__cat_2b2ff2318877/usage.json'), + ); + expect(refundedUsage.stats).toMatchObject({ + executions: 3, + distinct_users: 3, + first_seen: '2026-05-04T10:03:00.000Z', + last_seen: '2026-05-04T10:05:00.000Z', + rows_produced: 66, + }); + expect(refundedUsage.literal_slots).toEqual([ + { position: 1, distinct_values: 1, top_values: [['refunded', 3]] }, + ]); + }); + + it('classifies literal slots across the spec matrix and stale-date demotion', async () => { + const stagedDir = await tempDir(); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader(classificationMatrixRows()), + sqlAnalysis: classificationMatrixSqlAnalysis, + pullConfig: { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + const matrixTemplates = manifest.templates.filter((template) => template.fingerprint === 'fp_classification_matrix'); + expect(matrixTemplates).toHaveLength(2); + expect(matrixTemplates.every((template) => template.subClusterId?.startsWith('cat_'))).toBe(true); + + const matrixTemplate = matrixTemplates[0]; + if (!matrixTemplate) { + throw new Error('expected classification matrix template'); + } + const matrixMetadata = historicSqlMetadataSchema.parse( + await readJson(stagedDir, matrixTemplate.path.replace('/page.md', '/metadata.json')), + ); + expect(matrixMetadata.properties.literal_slots).toMatchInlineSnapshot(` + [ + { + "classification": "constant", + "position": 1, + "type": "string", + }, + { + "classification": "constant", + "position": 2, + "type": "string", + }, + { + "classification": "categorical", + "position": 3, + "type": "string", + }, + { + "classification": "runtime", + "position": 4, + "type": "number", + }, + { + "classification": "runtime", + "position": 5, + "type": "date", + }, + ] + `); + expect(matrixMetadata.properties.triage_signals.slot_summary).toBe('2 constant, 2 runtime'); + + const staleMetadata = historicSqlMetadataSchema.parse( + await readJson(stagedDir, 'templates/fp_stale_date/metadata.json'), + ); + expect(staleMetadata.properties.literal_slots).toMatchInlineSnapshot(` + [ + { + "classification": "runtime", + "position": 1, + "type": "date", + }, + ] + `); + expect(staleMetadata.properties.triage_signals.slot_summary).toBe('0 constant, 1 runtime'); + }); + + it('applies the templates-per-run cap after categorical expansion', async () => { + const stagedDir = await tempDir(); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader(categoricalRows()), + sqlAnalysis: categoricalSqlAnalysis, + pullConfig: { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 1, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest.templateCount).toBe(1); + expect(manifest.capped).toBe(true); + expect(manifest.warnings).toEqual(['templates_truncated: kept 1 of 2 templates']); + expect(manifest.templates).toHaveLength(1); + expect(manifest.templates[0].id).toMatch(/^fp_order_status__cat_/); + }); + + it('omits rows_produced for BigQuery templates when reader rows have no row counts', async () => { + const stagedDir = await tempDir(); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_bq', + queryClient: {}, + reader: fakeReader([ + { + id: 'bq-1', + sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + user: 'analyst-a@example.com', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: null, + runtimeMs: 100, + success: true, + errorMessage: null, + }, + ]), + sqlAnalysis: fakeSqlAnalysis, + pullConfig: { + dialect: 'bigquery', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_paid_orders/usage.json')); + expect(usage.stats).not.toHaveProperty('rows_produced'); + expect(usage.samples[0]).not.toHaveProperty('rows_produced'); + }); + + it('keeps at most five diverse samples, preferring recent successful representatives per literal tuple', async () => { + const stagedDir = await tempDir(); + const statuses = [ + 'paid', + 'refunded', + 'pending', + 'failed', + 'trial', + 'cancelled', + 'draft', + 'returned', + 'review', + 'held', + 'archived', + ]; + const rows: HistoricSqlRawQueryRow[] = statuses.flatMap((status, index) => [ + { + id: `${status}-old`, + sql: `SELECT count(*) FROM analytics.orders WHERE status = '${status}'`, + user: 'analyst-a', + startedAt: `2026-05-04T10:${String(index).padStart(2, '0')}:00.000Z`, + endedAt: null, + runtimeMs: 100, + rowsProduced: 1, + success: false, + errorMessage: 'old failed sample', + }, + { + id: `${status}-new`, + sql: `SELECT count(*) FROM analytics.orders WHERE status = '${status}'`, + user: 'analyst-a', + startedAt: `2026-05-04T11:${String(index).padStart(2, '0')}:00.000Z`, + endedAt: null, + runtimeMs: 90, + rowsProduced: 2, + success: true, + errorMessage: null, + }, + ]); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader(rows), + sqlAnalysis: diverseSqlAnalysis, + pullConfig: { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_diverse_samples/usage.json')); + expect(usage.samples).toHaveLength(5); + expect(usage.samples.every((sample) => sample.success)).toBe(true); + expect(new Set(usage.samples.map((sample) => sample.bound_sql.match(/status = '([^']+)'/)?.[1])).size).toBe(5); + expect(usage.samples.map((sample) => sample.started_at)).toEqual([ + '2026-05-04T11:10:00.000Z', + '2026-05-04T11:09:00.000Z', + '2026-05-04T11:08:00.000Z', + '2026-05-04T11:07:00.000Z', + '2026-05-04T11:06:00.000Z', + ]); + }); + + it('uses recency as a tie-breaker when the templates-per-run cap overflows', async () => { + const stagedDir = await tempDir(); + const sqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint(sql) { + const table = sql.includes('fresh_orders') ? 'fresh_orders' : 'stale_orders'; + return { + fingerprint: `fp_${table}`, + normalizedSql: `SELECT count(*) FROM analytics.${table}`, + tablesTouched: [`analytics.${table}`], + literalSlots: [], + }; + }, + }; + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader([ + { + id: 'stale-1', + sql: 'SELECT count(*) FROM analytics.stale_orders', + user: 'analyst-a', + startedAt: '2026-02-04T10:00:00.000Z', + endedAt: null, + runtimeMs: 100, + rowsProduced: 1, + success: true, + errorMessage: null, + }, + { + id: 'fresh-1', + sql: 'SELECT count(*) FROM analytics.fresh_orders', + user: 'analyst-a', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: null, + runtimeMs: 100, + rowsProduced: 1, + success: true, + errorMessage: null, + }, + ]), + sqlAnalysis, + pullConfig: { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: [], + maxTemplatesPerRun: 1, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + expect(manifest.templates.map((template) => template.id)).toEqual(['fp_fresh_orders']); + }); + + it('does not persist bound SQL samples when redaction patterns are invalid', async () => { + const stagedDir = await tempDir(); + + await stageHistoricSqlTemplates({ + stagedDir, + connectionId: 'conn_1', + queryClient: {}, + reader: fakeReader([ + { + id: 'q1', + sql: "SELECT * FROM analytics.orders WHERE email = 'analyst@example.com'", + user: 'analyst@example.com', + startedAt: '2026-05-04T10:00:00.000Z', + endedAt: null, + runtimeMs: 100, + rowsProduced: 1, + success: true, + errorMessage: null, + }, + ]), + sqlAnalysis: { + async analyzeForFingerprint() { + return { + fingerprint: 'fp_redaction', + normalizedSql: 'SELECT * FROM analytics.orders WHERE email = ?', + tablesTouched: ['analytics.orders'], + literalSlots: [{ position: 1, type: 'string', exampleValue: 'analyst@example.com' }], + }; + }, + }, + pullConfig: { + dialect: 'snowflake', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: [], + redactionPatterns: ['['], + maxTemplatesPerRun: 5000, + minCalls: 5, + }, + now: new Date('2026-05-04T12:00:00.000Z'), + }); + + const manifest = historicSqlManifestSchema.parse(await readJson(stagedDir, 'manifest.json')); + const usage = historicSqlUsageSchema.parse(await readJson(stagedDir, 'templates/fp_redaction/usage.json')); + expect(manifest.warnings.some((warning) => warning.startsWith('redaction_skipped:invalid_redaction_pattern'))).toBe( + true, + ); + expect(usage.samples).toEqual([]); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/stage.ts b/packages/context/src/ingest/adapters/historic-sql/stage.ts new file mode 100644 index 00000000..9c380a9f --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/stage.ts @@ -0,0 +1,630 @@ +import { createHash } from 'node:crypto'; +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import type { + SqlAnalysisFingerprintResult, + SqlAnalysisLiteralSlot, + SqlAnalysisLiteralSlotType, + SqlAnalysisPort, +} from '../../../sql-analysis/index.js'; +import { + HISTORIC_SQL_OBJECT_TYPE, + HISTORIC_SQL_SOURCE_KEY, + historicSqlPullConfigSchema, + historicSqlRawQueryRowSchema, + type HistoricSqlLiteralSlotClassification, + type HistoricSqlManifest, + type HistoricSqlMetadata, + type HistoricSqlPullConfig, + type HistoricSqlQueryHistoryReader, + type HistoricSqlRawQueryRow, + type HistoricSqlUsage, +} from './types.js'; + +interface StageHistoricSqlTemplatesInput { + stagedDir: string; + connectionId: string; + queryClient: unknown; + reader: HistoricSqlQueryHistoryReader; + sqlAnalysis: SqlAnalysisPort; + pullConfig: HistoricSqlPullConfig; + now?: Date; +} + +interface SlotObservation { + value: string; + rowStartedAt: string; +} + +interface SlotStats { + position: number; + type: SqlAnalysisLiteralSlotType; + values: Map; + observations: SlotObservation[]; +} + +interface TemplateAccumulator { + fingerprint: string; + normalizedSql: string; + tablesTouched: Set; + rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>; + slotStats: Map; +} + +interface ClassifiedLiteralSlot { + position: number; + type: SqlAnalysisLiteralSlotType; + classification: HistoricSqlLiteralSlotClassification; +} + +interface TemplateVariant { + id: string; + fingerprint: string; + subClusterId: string | null; + normalizedSql: string; + tablesTouched: Set; + rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>; + slotStats: Map; + slotClassifications: ClassifiedLiteralSlot[]; +} + +interface CategoricalTupleEntry { + position: number; + value: string; +} + +interface RedactionPolicy { + redactors: RegExp[]; + samplesAllowed: boolean; +} + +const HARD_SKIP_PREFIX_RE = /^\s*(SHOW|DESCRIBE|DESC|EXPLAIN|USE|SET)\b/i; +const HARD_SKIP_TABLE_RE = /\b(INFORMATION_SCHEMA|SNOWFLAKE\.ACCOUNT_USAGE|pg_|system\.)/i; + +export async function stageHistoricSqlTemplates(input: StageHistoricSqlTemplatesInput): Promise { + const config = historicSqlPullConfigSchema.parse(input.pullConfig); + const now = input.now ?? new Date(); + const windowStart = config.lastSuccessfulCursor + ? new Date(config.lastSuccessfulCursor) + : new Date(now.getTime() - config.windowDays * 24 * 60 * 60 * 1000); + const warnings: string[] = []; + const redaction = compileRedactors(config.redactionPatterns, warnings); + const groups = new Map(); + let nextSuccessfulCursor: string | null = null; + + await input.reader.probe(input.queryClient); + + for await (const rawRow of input.reader.fetch( + input.queryClient, + { start: windowStart, end: now }, + config.lastSuccessfulCursor, + )) { + const row = historicSqlRawQueryRowSchema.parse(rawRow); + if (!nextSuccessfulCursor || row.startedAt > nextSuccessfulCursor) { + nextSuccessfulCursor = row.startedAt; + } + if (shouldSkipSql(row.sql)) { + continue; + } + + const analysis = await input.sqlAnalysis.analyzeForFingerprint(row.sql, config.dialect); + if (analysis.error || !analysis.fingerprint || !analysis.normalizedSql) { + warnings.push(`analysis_failed:${row.id}`); + continue; + } + + const group = + groups.get(analysis.fingerprint) ?? + { + fingerprint: analysis.fingerprint, + normalizedSql: analysis.normalizedSql, + tablesTouched: new Set(), + rows: [], + slotStats: new Map(), + }; + + for (const table of analysis.tablesTouched) { + group.tablesTouched.add(table); + } + for (const slot of analysis.literalSlots) { + recordSlot(group.slotStats, slot, redaction.redactors, row.startedAt); + } + group.rows.push({ row, analysis }); + groups.set(analysis.fingerprint, group); + } + + const expandedTemplates = expandCategoricalTemplates([...groups.values()], redaction.redactors); + const selected = selectTemplates(expandedTemplates, config.maxTemplatesPerRun, now); + if (selected.length < expandedTemplates.length) { + warnings.push(`templates_truncated: kept ${selected.length} of ${expandedTemplates.length} templates`); + } + + await mkdir(input.stagedDir, { recursive: true }); + const templates: HistoricSqlManifest['templates'] = []; + for (const template of selected) { + const staged = buildStagedTemplate(template, config, redaction, now); + const basePath = `templates/${staged.metadata.id}`; + await writeJson(input.stagedDir, `${basePath}/metadata.json`, staged.metadata); + await writeText(input.stagedDir, `${basePath}/page.md`, staged.pageMarkdown); + await writeJson(input.stagedDir, `${basePath}/usage.json`, staged.usage); + templates.push({ + id: staged.metadata.id, + fingerprint: staged.metadata.properties.fingerprint, + subClusterId: staged.metadata.properties.sub_cluster_id, + path: staged.metadata.path, + }); + } + + await writeJson(input.stagedDir, 'manifest.json', { + source: HISTORIC_SQL_SOURCE_KEY, + connectionId: input.connectionId, + dialect: config.dialect, + fetchedAt: now.toISOString(), + windowStart: windowStart.toISOString(), + windowEnd: now.toISOString(), + nextSuccessfulCursor, + templateCount: selected.length, + capped: selected.length < expandedTemplates.length, + warnings, + degraded: false, + statsResetAt: null, + baselineFirstRun: false, + pgServerVersion: null, + deallocCount: null, + templates, + } satisfies HistoricSqlManifest); +} + +function shouldSkipSql(sql: string): boolean { + return HARD_SKIP_PREFIX_RE.test(sql) || HARD_SKIP_TABLE_RE.test(sql); +} + +function recordSlot( + slotStats: Map, + slot: SqlAnalysisLiteralSlot, + redactors: RegExp[], + rowStartedAt: string, +): void { + const existing = slotStats.get(slot.position) ?? { + position: slot.position, + type: slot.type, + values: new Map(), + observations: [], + }; + const persistedValue = redactText(slot.exampleValue, redactors); + existing.values.set(persistedValue, (existing.values.get(persistedValue) ?? 0) + 1); + existing.observations.push({ value: persistedValue, rowStartedAt }); + slotStats.set(slot.position, existing); +} + +function expandCategoricalTemplates(groups: TemplateAccumulator[], redactors: RegExp[]): TemplateVariant[] { + return groups.flatMap((group) => expandTemplateGroup(group, redactors)); +} + +function expandTemplateGroup(group: TemplateAccumulator, redactors: RegExp[]): TemplateVariant[] { + const rows = [...group.rows].sort((left, right) => left.row.startedAt.localeCompare(right.row.startedAt)); + const firstSeen = rows[0]?.row.startedAt; + if (!firstSeen) { + return []; + } + + const slotClassifications = classifySlots(group.slotStats, rows.length, firstSeen); + const categoricalPositions = slotClassifications + .filter((slot) => slot.classification === 'categorical') + .map((slot) => slot.position) + .sort((left, right) => left - right); + + if (categoricalPositions.length === 0) { + return [ + { + id: group.fingerprint, + fingerprint: group.fingerprint, + subClusterId: null, + normalizedSql: group.normalizedSql, + tablesTouched: group.tablesTouched, + rows, + slotStats: group.slotStats, + slotClassifications, + }, + ]; + } + + const byTuple = new Map< + string, + { + tuple: CategoricalTupleEntry[]; + rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>; + } + >(); + + for (const entry of rows) { + const tuple = categoricalTuple(entry.analysis.literalSlots, categoricalPositions, redactors); + const key = JSON.stringify(tuple); + const existing = byTuple.get(key) ?? { tuple, rows: [] }; + existing.rows.push(entry); + byTuple.set(key, existing); + } + + return [...byTuple.values()] + .map(({ tuple, rows: tupleRows }) => { + const subClusterId = subClusterIdForTuple(tuple); + return { + id: `${group.fingerprint}__${subClusterId}`, + fingerprint: group.fingerprint, + subClusterId, + normalizedSql: group.normalizedSql, + tablesTouched: group.tablesTouched, + rows: tupleRows, + slotStats: collectSlotStats(tupleRows, redactors), + slotClassifications, + }; + }) + .sort((left, right) => left.id.localeCompare(right.id)); +} + +function classifySlots( + slotStats: Map, + executions: number, + firstSeen: string, +): ClassifiedLiteralSlot[] { + return [...slotStats.values()] + .sort((left, right) => left.position - right.position) + .map((slot) => ({ + position: slot.position, + type: slot.type, + classification: classifySlot(slot, executions, firstSeen), + })); +} + +function collectSlotStats( + rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>, + redactors: RegExp[], +): Map { + const slotStats = new Map(); + for (const entry of rows) { + for (const slot of entry.analysis.literalSlots) { + recordSlot(slotStats, slot, redactors, entry.row.startedAt); + } + } + return slotStats; +} + +function categoricalTuple( + literalSlots: SqlAnalysisLiteralSlot[], + categoricalPositions: number[], + redactors: RegExp[], +): CategoricalTupleEntry[] { + const valuesByPosition = new Map( + literalSlots.map((slot) => [slot.position, redactText(slot.exampleValue, redactors)] as const), + ); + return categoricalPositions.map((position) => ({ + position, + value: valuesByPosition.get(position) ?? '', + })); +} + +function subClusterIdForTuple(tuple: CategoricalTupleEntry[]): string { + return `cat_${createHash('sha256').update(JSON.stringify(tuple)).digest('hex').slice(0, 12)}`; +} + +function buildStagedTemplate( + template: TemplateVariant, + config: HistoricSqlPullConfig, + redaction: RedactionPolicy, + now: Date, +): { metadata: HistoricSqlMetadata; pageMarkdown: string; usage: HistoricSqlUsage } { + const rows = template.rows + .map((entry) => entry.row) + .sort((left, right) => left.startedAt.localeCompare(right.startedAt)); + const firstSeen = rows[0].startedAt; + const lastSeen = rows[rows.length - 1].startedAt; + const distinctUsers = new Set(rows.map((row) => row.user).filter((user): user is string => !!user)).size; + const errorCount = rows.filter((row) => !row.success).length; + const runtimes = rows + .map((row) => row.runtimeMs) + .filter((runtime): runtime is number => typeof runtime === 'number') + .sort((left, right) => left - right); + const triageSignals = buildTriageSignals({ + executions: rows.length, + distinctUsers, + errorRate: rows.length === 0 ? 0 : errorCount / rows.length, + lastSeen, + now, + serviceAccountOnly: isServiceAccountOnly(rows, config.serviceAccountUserPatterns), + slotClassifications: template.slotClassifications.map((slot) => slot.classification), + }); + const tablesTouched = [...template.tablesTouched].sort(); + const firstTable = tablesTouched[0] ?? 'query'; + const id = template.id; + const rowsProduced = sumRowsProduced(rows); + const metadata: HistoricSqlMetadata = { + id, + title: buildTemplateTitle(config.dialect, firstTable, template.fingerprint, template.subClusterId), + path: `templates/${id}/page.md`, + objectType: HISTORIC_SQL_OBJECT_TYPE, + lastEditedAt: null, + properties: { + fingerprint: template.fingerprint, + sub_cluster_id: template.subClusterId, + dialect: config.dialect, + tables_touched: tablesTouched, + literal_slots: template.slotClassifications, + triage_signals: triageSignals, + }, + }; + + return { + metadata, + pageMarkdown: renderTemplatePage(id, template.normalizedSql, tablesTouched), + usage: { + stats: { + executions: rows.length, + distinct_users: distinctUsers, + first_seen: firstSeen, + last_seen: lastSeen, + p50_runtime_ms: percentile(runtimes, 0.5), + p95_runtime_ms: percentile(runtimes, 0.95), + error_rate: rows.length === 0 ? 0 : errorCount / rows.length, + ...(rowsProduced === null ? {} : { rows_produced: rowsProduced }), + }, + literal_slots: [...template.slotStats.values()] + .sort((left, right) => left.position - right.position) + .map((slot) => ({ + position: slot.position, + distinct_values: slot.values.size, + top_values: [...slot.values.entries()] + .sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])) + .slice(0, 10), + })), + samples: selectSamples(template.rows, redaction), + }, + }; +} + +const TEMPORAL_SLOT_TYPES = new Set(['date', 'timestamp']); + +function isStaleDateConstant(slot: SlotStats, value: string, firstSeen: string): boolean { + return slot.type === 'date' && parseTemporalSlotValue(value) !== null && value < firstSeen.slice(0, 10); +} + +function isMovingTemporalSlot(slot: SlotStats): boolean { + if (!TEMPORAL_SLOT_TYPES.has(slot.type) || slot.values.size < 2) { + return false; + } + + const observations: Array<{ rowStartedAt: number; literalTime: number }> = []; + for (const observation of slot.observations) { + const rowStartedAt = Date.parse(observation.rowStartedAt); + const literalTime = parseTemporalSlotValue(observation.value); + if (Number.isNaN(rowStartedAt) || literalTime === null) { + return false; + } + observations.push({ rowStartedAt, literalTime }); + } + + const literalTimes = observations + .sort((left, right) => left.rowStartedAt - right.rowStartedAt) + .map((observation) => observation.literalTime); + + return isMonotonic(literalTimes); +} + +function parseTemporalSlotValue(value: string): number | null { + const parsed = Date.parse(value); + return Number.isNaN(parsed) ? null : parsed; +} + +function isMonotonic(values: number[]): boolean { + if (values.length < 2) { + return false; + } + + let nonDecreasing = true; + let nonIncreasing = true; + for (let index = 1; index < values.length; index += 1) { + if (values[index] < values[index - 1]) { + nonDecreasing = false; + } + if (values[index] > values[index - 1]) { + nonIncreasing = false; + } + } + + return nonDecreasing || nonIncreasing; +} + +function classifySlot( + slot: SlotStats, + executions: number, + firstSeen: string, +): HistoricSqlLiteralSlotClassification { + const ordered = [...slot.values.entries()].sort((left, right) => right[1] - left[1]); + const distinct = ordered.length; + const topCount = ordered[0]?.[1] ?? 0; + const topValue = ordered[0]?.[0] ?? ''; + const staleDateConstant = isStaleDateConstant(slot, topValue, firstSeen); + + if (distinct === 1 && !staleDateConstant) { + return 'constant'; + } + if (executions > 0 && topCount / executions >= 0.95 && !staleDateConstant) { + return 'constant'; + } + if (isMovingTemporalSlot(slot)) { + return 'runtime'; + } + if (executions > 0 && distinct >= 2 && distinct <= 10 && ordered.every(([, count]) => count / executions >= 0.05)) { + return 'categorical'; + } + return 'runtime'; +} + +function buildTriageSignals(input: { + executions: number; + distinctUsers: number; + errorRate: number; + lastSeen: string; + now: Date; + serviceAccountOnly: boolean; + slotClassifications: HistoricSqlLiteralSlotClassification[]; +}): Record { + const runtimeCount = input.slotClassifications.filter((classification) => classification === 'runtime').length; + const constantCount = input.slotClassifications.filter((classification) => classification === 'constant').length; + return { + executions_bucket: input.executions < 3 ? 'low' : input.executions < 50 ? 'mid' : 'high', + distinct_users_bucket: input.distinctUsers <= 1 ? 'solo' : input.distinctUsers <= 5 ? 'team' : 'broad', + error_rate_bucket: input.errorRate <= 0.01 ? 'ok' : input.errorRate <= 0.1 ? 'noisy' : 'broken', + recency_bucket: recencyBucket(input.lastSeen, input.now), + service_account_only: String(input.serviceAccountOnly), + slot_summary: `${constantCount} constant, ${runtimeCount} runtime`, + }; +} + +function recencyBucket(lastSeen: string, now: Date): string { + const ageDays = Math.max(0, (now.getTime() - new Date(lastSeen).getTime()) / (24 * 60 * 60 * 1000)); + if (ageDays <= 14) { + return 'active'; + } + if (ageDays <= 60) { + return 'warm'; + } + return 'cold'; +} + +function isServiceAccountOnly(rows: HistoricSqlRawQueryRow[], patterns: string[]): boolean { + const users = rows.map((row) => row.user).filter((user): user is string => !!user); + if (users.length === 0 || patterns.length === 0) { + return false; + } + const regexes = patterns.map((pattern) => new RegExp(pattern)); + return users.every((user) => regexes.some((regex) => regex.test(user))); +} + +function buildTemplateTitle( + dialect: HistoricSqlPullConfig['dialect'], + firstTable: string, + fingerprint: string, + subClusterId: string | null, +): string { + if (!subClusterId) { + return `${dialect} · ${firstTable} [${fingerprint.slice(0, 6)}]`; + } + return `${dialect} · ${firstTable} [${fingerprint.slice(0, 6)}:${subClusterId.slice(-6)}]`; +} + +function renderTemplatePage(fingerprint: string, normalizedSql: string, tablesTouched: string[]): string { + return [ + `# ${fingerprint}`, + '', + '## Normalized SQL', + '```sql', + normalizedSql, + '```', + '', + '## Tables touched', + ...tablesTouched.map((table) => `- ${table}`), + '', + ].join('\n'); +} + +function selectSamples( + rows: Array<{ row: HistoricSqlRawQueryRow; analysis: SqlAnalysisFingerprintResult }>, + redaction: RedactionPolicy, +): HistoricSqlUsage['samples'] { + if (!redaction.samplesAllowed) { + return []; + } + + const byLiteralTuple = new Map(); + const preferred = [...rows].sort((left, right) => { + if (left.row.success !== right.row.success) { + return left.row.success ? -1 : 1; + } + return right.row.startedAt.localeCompare(left.row.startedAt); + }); + + for (const entry of preferred) { + const key = [...entry.analysis.literalSlots] + .sort((left, right) => left.position - right.position) + .map((slot) => slot.exampleValue) + .join('\u001f'); + if (!byLiteralTuple.has(key)) { + byLiteralTuple.set(key, entry); + } + } + + return [...byLiteralTuple.values()] + .sort((left, right) => right.row.startedAt.localeCompare(left.row.startedAt)) + .slice(0, 5) + .map(({ row }) => ({ + started_at: row.startedAt, + user: row.user, + bound_sql: redactText(row.sql, redaction.redactors), + ...(row.rowsProduced === undefined ? {} : { rows_produced: row.rowsProduced ?? null }), + runtime_ms: row.runtimeMs, + success: row.success, + })); +} + +function selectTemplates(templates: TemplateVariant[], maxTemplatesPerRun: number, now: Date): TemplateVariant[] { + return templates + .map((template) => ({ template, score: rankTemplate(template, now) })) + .sort((left, right) => right.score - left.score || left.template.id.localeCompare(right.template.id)) + .slice(0, maxTemplatesPerRun) + .map((entry) => entry.template); +} + +function rankTemplate(template: TemplateVariant, now: Date): number { + const users = new Set(template.rows.map(({ row }) => row.user).filter((user): user is string => !!user)).size; + const latestStartedAt = template.rows.reduce( + (latest, { row }) => (latest === null || row.startedAt > latest ? row.startedAt : latest), + null, + ); + const ageDays = + latestStartedAt === null ? 365 : Math.max(0, (now.getTime() - new Date(latestStartedAt).getTime()) / 86400000); + const recencyWeight = 1 / (1 + ageDays / 30); + return users * Math.log1p(template.rows.length) * recencyWeight; +} + +function percentile(values: number[], percentileValue: number): number | null { + if (values.length === 0) { + return null; + } + const index = Math.min(values.length - 1, Math.max(0, Math.ceil(values.length * percentileValue) - 1)); + return values[index]; +} + +function sumRowsProduced(rows: HistoricSqlRawQueryRow[]): number | null { + const values = rows.map((row) => row.rowsProduced).filter((value): value is number => typeof value === 'number'); + return values.length > 0 ? values.reduce((sum, value) => sum + value, 0) : null; +} + +function compileRedactors(patterns: string[], warnings: string[]): RedactionPolicy { + let samplesAllowed = true; + const redactors = patterns.flatMap((pattern) => { + try { + return [new RegExp(pattern, 'g')]; + } catch (error) { + samplesAllowed = false; + warnings.push( + `redaction_skipped:invalid_redaction_pattern:${pattern}:${error instanceof Error ? error.message : String(error)}`, + ); + return []; + } + }); + return { redactors, samplesAllowed }; +} + +function redactText(value: string, redactors: RegExp[]): string { + return redactors.reduce((current, regex) => current.replace(regex, ''), value); +} + +async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise { + await writeText(stagedDir, relPath, `${JSON.stringify(value, null, 2)}\n`); +} + +async function writeText(stagedDir: string, relPath: string, value: string): Promise { + const target = join(stagedDir, relPath); + await mkdir(dirname(target), { recursive: true }); + await writeFile(target, value, 'utf-8'); +} diff --git a/packages/context/src/ingest/adapters/historic-sql/types.ts b/packages/context/src/ingest/adapters/historic-sql/types.ts new file mode 100644 index 00000000..81104cf5 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/types.ts @@ -0,0 +1,201 @@ +import { z } from 'zod'; +import type { SqlAnalysisPort } from '../../../sql-analysis/index.js'; + +export const HISTORIC_SQL_SOURCE_KEY = 'historic-sql' as const; +export const HISTORIC_SQL_OBJECT_TYPE = 'historic_sql_template' as const; + +const historicSqlDialectSchema = z.enum(['snowflake', 'bigquery', 'postgres']); +export type HistoricSqlDialect = z.infer; + +export const historicSqlPullConfigSchema = z.object({ + dialect: historicSqlDialectSchema, + windowDays: z.number().int().min(1).max(365).default(90), + lastSuccessfulCursor: z.string().datetime().nullable().default(null), + serviceAccountUserPatterns: z.array(z.string()).default([]), + redactionPatterns: z.array(z.string()).default([]), + maxTemplatesPerRun: z.number().int().min(1).max(5000).default(5000), + minCalls: z.number().int().min(1).default(5), +}); +export type HistoricSqlPullConfig = z.infer; + +export interface HistoricSqlTimeWindow { + start: Date; + end: Date; +} + +export const historicSqlRawQueryRowSchema = z.object({ + id: z.string().min(1), + sql: z.string().min(1), + user: z.string().nullable().default(null), + startedAt: z.string().datetime(), + endedAt: z.string().datetime().nullable().default(null), + runtimeMs: z.number().nonnegative().nullable().default(null), + rowsProduced: z.number().int().nonnegative().nullable().optional(), + success: z.boolean().default(true), + errorMessage: z.string().nullable().default(null), +}); +export type HistoricSqlRawQueryRow = z.infer; + +export interface HistoricSqlQueryHistoryReader { + probe(client: unknown): Promise; + fetch( + client: unknown, + window: HistoricSqlTimeWindow, + cursor?: string | null, + ): AsyncIterable; +} + +export interface KloPostgresQueryClient { + executeQuery(sql: string, params?: unknown[]): Promise<{ headers: string[]; rows: unknown[][]; totalRows?: number }>; +} + +export interface PostgresPgssProbeResult { + pgServerVersion: string; + warnings: string[]; +} + +export interface PostgresPgssSnapshot { + statsResetAt: string | null; + deallocCount: number | null; + rows: PostgresPgssRow[]; +} + +export interface PostgresPgssReader { + probe(client: KloPostgresQueryClient): Promise; + readSnapshot( + client: KloPostgresQueryClient, + options: { minCalls: number; maxTemplates: number }, + ): Promise; +} + +export interface PostgresPgssRow { + queryid: string; + userid: string; + username: string | null; + dbid: string; + database: string | null; + query: string; + calls: number; + totalExecTime: number; + meanExecTime: number; + totalRows: number; +} + +export interface PostgresPgssAggregateRow { + id: string; + queryid: string; + dbid: string; + database: string | null; + query: string; + deltaCalls: number; + deltaExecTime: number; + deltaRows: number; + meanExecTime: number; + distinctUsersDelta: number; + users: string[]; + firstObservedAt: string; +} + +export interface HistoricSqlSourceAdapterDeps { + sqlAnalysis: SqlAnalysisPort; + reader: HistoricSqlQueryHistoryReader; + queryClient: unknown; + postgresReader?: PostgresPgssReader; + postgresQueryClient?: KloPostgresQueryClient; + postgresBaselineRootDir?: string; + now?: () => Date; + onPullSucceeded?: (ctx: { + connectionId: string; + sourceKey: string; + syncId: string; + trigger: import('../../types.js').IngestTrigger; + completedAt: Date; + stagedDir: string; + nextSuccessfulCursor: string | null; + }) => Promise; +} + +const historicSqlLiteralSlotClassificationSchema = z.enum(['constant', 'runtime', 'categorical']); +export type HistoricSqlLiteralSlotClassification = z.infer; + +export const historicSqlMetadataSchema = z.object({ + id: z.string().min(1), + title: z.string().min(1), + path: z.string().min(1), + objectType: z.literal(HISTORIC_SQL_OBJECT_TYPE), + lastEditedAt: z.null(), + properties: z.object({ + fingerprint: z.string().min(1), + sub_cluster_id: z.string().nullable(), + dialect: historicSqlDialectSchema, + tables_touched: z.array(z.string()), + literal_slots: z.array( + z.object({ + position: z.number().int().min(1), + type: z.enum(['string', 'number', 'timestamp', 'date', 'boolean', 'null', 'unknown']), + classification: historicSqlLiteralSlotClassificationSchema, + }), + ), + triage_signals: z.record(z.string(), z.string()), + }), +}); +export type HistoricSqlMetadata = z.infer; + +export const historicSqlUsageSchema = z.object({ + stats: z.object({ + executions: z.number().int().nonnegative(), + distinct_users: z.number().int().nonnegative(), + first_seen: z.string().datetime(), + last_seen: z.string().datetime(), + p50_runtime_ms: z.number().nonnegative().nullable(), + p95_runtime_ms: z.number().nonnegative().nullable(), + mean_runtime_ms: z.number().nonnegative().nullable().optional(), + error_rate: z.number().min(0).max(1), + rows_produced: z.number().int().nonnegative().nullable().optional(), + }), + literal_slots: z.array( + z.object({ + position: z.number().int().min(1), + distinct_values: z.number().int().nonnegative(), + top_values: z.array(z.tuple([z.string(), z.number().int().nonnegative()])), + }), + ), + samples: z.array( + z.object({ + started_at: z.string().datetime(), + user: z.string().nullable(), + bound_sql: z.string(), + rows_produced: z.number().int().nonnegative().nullable().optional(), + runtime_ms: z.number().nonnegative().nullable(), + success: z.boolean(), + }), + ), +}); +export type HistoricSqlUsage = z.infer; + +export const historicSqlManifestSchema = z.object({ + source: z.literal(HISTORIC_SQL_SOURCE_KEY), + connectionId: z.string().min(1), + dialect: historicSqlDialectSchema, + fetchedAt: z.string().datetime(), + windowStart: z.string().datetime(), + windowEnd: z.string().datetime(), + nextSuccessfulCursor: z.string().datetime().nullable(), + templateCount: z.number().int().nonnegative(), + capped: z.boolean(), + warnings: z.array(z.string()), + degraded: z.boolean().default(false), + statsResetAt: z.string().datetime().nullable().default(null), + baselineFirstRun: z.boolean().default(false), + pgServerVersion: z.string().nullable().default(null), + deallocCount: z.number().int().nonnegative().nullable().default(null), + templates: z.array( + z.object({ + id: z.string().min(1), + fingerprint: z.string().min(1), + subClusterId: z.string().nullable(), + path: z.string().min(1), + }), + ), +}); +export type HistoricSqlManifest = z.infer; diff --git a/packages/context/src/ingest/adapters/live-database/chunk.test.ts b/packages/context/src/ingest/adapters/live-database/chunk.test.ts new file mode 100644 index 00000000..259d7a9c --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/chunk.test.ts @@ -0,0 +1,107 @@ +import { mkdtemp } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import type { KloSchemaSnapshot } from '../../../scan/types.js'; +import { chunkLiveDatabaseStagedDir } from './chunk.js'; +import { liveDatabaseTablePath, writeLiveDatabaseSnapshot } from './stage.js'; + +function snapshot(): KloSchemaSnapshot { + return { + connectionId: 'conn-1', + driver: 'postgres', + extractedAt: '2026-04-27T00:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + kind: 'table', + comment: null, + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [], + }, + { + name: 'customers', + catalog: null, + db: 'public', + kind: 'table', + comment: null, + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [], + }, + ], + }; +} + +describe('chunkLiveDatabaseStagedDir', () => { + it('emits one work unit per table on the first run', async () => { + const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-chunk-')); + await writeLiveDatabaseSnapshot(dir, snapshot()); + + const result = await chunkLiveDatabaseStagedDir(dir); + expect(result.workUnits.map((wu) => wu.unitKey)).toEqual([ + 'live-database-public-customers', + 'live-database-public-orders', + ]); + expect(result.workUnits[0]?.dependencyPaths).toEqual(['connection.json', 'foreign-keys.json']); + expect(result.workUnits[0]?.peerFileIndex).toContain( + liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' }), + ); + }); + + it('keeps only changed tables during incremental syncs and records table evictions', async () => { + const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-diff-')); + await writeLiveDatabaseSnapshot(dir, snapshot()); + const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' }); + const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' }); + + const result = await chunkLiveDatabaseStagedDir(dir, { + added: [], + modified: [ordersPath], + deleted: [customersPath], + unchanged: ['connection.json', 'foreign-keys.json'], + }); + + expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']); + expect(result.eviction?.deletedRawPaths).toEqual([customersPath]); + }); + + it('fans out all table work units when the foreign-key index changes', async () => { + const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-fk-')); + await writeLiveDatabaseSnapshot(dir, snapshot()); + + const result = await chunkLiveDatabaseStagedDir(dir, { + added: [], + modified: ['foreign-keys.json'], + deleted: [], + unchanged: [], + }); + + expect(result.workUnits).toHaveLength(2); + }); +}); diff --git a/packages/context/src/ingest/adapters/live-database/chunk.ts b/packages/context/src/ingest/adapters/live-database/chunk.ts new file mode 100644 index 00000000..3348b98a --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/chunk.ts @@ -0,0 +1,58 @@ +import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js'; +import type { KloSchemaTable } from '../../../scan/types.js'; +import { LIVE_DATABASE_FOREIGN_KEYS_FILE, LIVE_DATABASE_META_FILE, readLiveDatabaseTableFiles } from './stage.js'; + +function unitKey(table: KloSchemaTable): string { + const parts = [table.catalog, table.db, table.name] + .filter((part): part is string => typeof part === 'string' && part.length > 0) + .map((part) => + part + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''), + ) + .filter(Boolean); + return `live-database-${parts.join('-') || 'table'}`; +} + +function displayName(table: KloSchemaTable): string { + return [table.catalog, table.db, table.name].filter(Boolean).join('.'); +} + +function isTablePath(path: string): boolean { + return path.startsWith('tables/') && path.endsWith('.json'); +} + +export async function chunkLiveDatabaseStagedDir(stagedDir: string, diffSet?: DiffSet): Promise { + const tableFiles = await readLiveDatabaseTableFiles(stagedDir); + const allTablePaths = tableFiles.map((file) => file.path); + const globalDeps = [LIVE_DATABASE_META_FILE, LIVE_DATABASE_FOREIGN_KEYS_FILE]; + const touched = diffSet ? new Set([...diffSet.added, ...diffSet.modified]) : null; + const globalTouched = Boolean( + touched && (touched.has(LIVE_DATABASE_META_FILE) || touched.has(LIVE_DATABASE_FOREIGN_KEYS_FILE)), + ); + + const workUnits: WorkUnit[] = []; + for (const file of tableFiles) { + if (touched && !globalTouched && !touched.has(file.path)) { + continue; + } + const peers = allTablePaths.filter((path) => path !== file.path).sort(); + workUnits.push({ + unitKey: unitKey(file.table), + displayLabel: `Live database table ${displayName(file.table)}`, + rawFiles: [file.path], + peerFileIndex: peers, + dependencyPaths: globalDeps, + notes: `Database catalog snapshot for ${displayName(file.table)} with ${file.table.columns.length} column${ + file.table.columns.length === 1 ? '' : 's' + }.`, + }); + } + + const deletedRawPaths = diffSet ? diffSet.deleted.filter(isTablePath).sort() : []; + return { + workUnits, + ...(deletedRawPaths.length > 0 ? { eviction: { deletedRawPaths } } : {}), + }; +} diff --git a/packages/context/src/ingest/adapters/live-database/daemon-introspection.test.ts b/packages/context/src/ingest/adapters/live-database/daemon-introspection.test.ts new file mode 100644 index 00000000..fe65920e --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/daemon-introspection.test.ts @@ -0,0 +1,224 @@ +import { once } from 'node:events'; +import { createServer } from 'node:http'; +import { describe, expect, it, vi } from 'vitest'; +import { createDaemonLiveDatabaseIntrospection } from './daemon-introspection.js'; + +const daemonResponse = { + connection_id: 'warehouse', + extracted_at: '2026-04-28T10:00:00+00:00', + metadata: { driver: 'postgres', schemas: ['public'] }, + tables: [ + { + catalog: 'warehouse', + db: 'public', + name: 'customers', + comment: null, + columns: [{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: null }], + foreign_keys: [], + }, + { + catalog: 'warehouse', + db: 'public', + name: 'orders', + comment: 'Order facts', + columns: [ + { name: 'id', type: 'integer', nullable: false, primary_key: true, comment: 'Order id' }, + { name: 'customer_id', type: 'integer', nullable: false, primary_key: false, comment: null }, + ], + foreign_keys: [ + { + from_column: 'customer_id', + to_table: 'customers', + to_column: 'id', + constraint_name: 'orders_customer_id_fkey', + }, + ], + }, + ], +}; + +describe('createDaemonLiveDatabaseIntrospection', () => { + it('calls the database-introspect daemon command and maps the snapshot response', async () => { + const runJson = vi.fn(async () => daemonResponse); + const introspection = createDaemonLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'postgres', + url: 'postgres://localhost:5432/warehouse', + readonly: true, + }, + }, + schemas: ['public'], + runJson, + }); + + await expect(introspection.extractSchema('warehouse')).resolves.toEqual({ + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-04-28T10:00:00+00:00', + scope: { schemas: ['public'] }, + metadata: { driver: 'postgres', schemas: ['public'] }, + tables: [ + { + catalog: 'warehouse', + db: 'public', + name: 'customers', + kind: 'table', + comment: null, + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [], + }, + { + catalog: 'warehouse', + db: 'public', + name: 'orders', + kind: 'table', + comment: 'Order facts', + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: null, + toTable: 'customers', + toColumn: 'id', + constraintName: 'orders_customer_id_fkey', + }, + ], + }, + ], + }); + + expect(runJson).toHaveBeenCalledWith('database-introspect', { + connection_id: 'warehouse', + driver: 'postgres', + url: 'postgres://localhost:5432/warehouse', + schemas: ['public'], + statement_timeout_ms: 30_000, + connection_timeout_seconds: 5, + }); + }); + + it('calls a running daemon HTTP endpoint when baseUrl is configured', async () => { + const requests: Array<{ url: string | undefined; body: unknown }> = []; + const server = createServer((request, response) => { + const chunks: Buffer[] = []; + request.on('data', (chunk: Buffer) => chunks.push(chunk)); + request.on('end', () => { + requests.push({ + url: request.url, + body: JSON.parse(Buffer.concat(chunks).toString('utf8')), + }); + response.writeHead(200, { 'content-type': 'application/json' }); + response.end(JSON.stringify(daemonResponse)); + }); + }); + + server.listen(0, '127.0.0.1'); + await once(server, 'listening'); + try { + const address = server.address(); + if (!address || typeof address === 'string') { + throw new Error('expected TCP server address'); + } + const introspection = createDaemonLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'postgresql', + url: 'postgres://localhost:5432/warehouse', + readonly: true, + }, + }, + baseUrl: `http://127.0.0.1:${address.port}`, + }); + + await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({ + connectionId: 'warehouse', + tables: [{ name: 'customers' }, { name: 'orders' }], + }); + + expect(requests).toEqual([ + { + url: '/database/introspect', + body: { + connection_id: 'warehouse', + driver: 'postgres', + url: 'postgres://localhost:5432/warehouse', + schemas: ['public'], + statement_timeout_ms: 30_000, + connection_timeout_seconds: 5, + }, + }, + ]); + } finally { + server.close(); + } + }); + + it('requires a configured read-only postgres connection with a url', async () => { + const introspection = createDaemonLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'postgres', + url: 'postgres://localhost:5432/warehouse', + readonly: false, + }, + }, + runJson: vi.fn(async () => daemonResponse), + }); + + await expect(introspection.extractSchema('warehouse')).rejects.toThrow( + 'Local live-database ingest requires connections.warehouse.readonly: true.', + ); + }); + + it('rejects unsupported local connection drivers before calling the daemon', async () => { + const runJson = vi.fn(async () => daemonResponse); + const introspection = createDaemonLiveDatabaseIntrospection({ + connections: { + warehouse: { + driver: 'snowflake', + url: 'snowflake://example', + readonly: true, + }, + }, + runJson, + }); + + await expect(introspection.extractSchema('warehouse')).rejects.toThrow( + 'Local live-database ingest cannot run driver "snowflake".', + ); + expect(runJson).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/ingest/adapters/live-database/daemon-introspection.ts b/packages/context/src/ingest/adapters/live-database/daemon-introspection.ts new file mode 100644 index 00000000..48077949 --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/daemon-introspection.ts @@ -0,0 +1,256 @@ +import { spawn } from 'node:child_process'; +import { request as httpRequest } from 'node:http'; +import { request as httpsRequest } from 'node:https'; +import { URL } from 'node:url'; +import type { KloProjectConnectionConfig } from '../../../project/config.js'; +import type { KloSchemaColumn, KloSchemaForeignKey, KloSchemaSnapshot, KloSchemaTable } from '../../../scan/types.js'; +import { inferKloDimensionType, normalizeKloNativeType } from '../../../scan/type-normalization.js'; +import type { LiveDatabaseIntrospectionPort } from './types.js'; + +export type KloDaemonDatabaseIntrospectionCommand = 'database-introspect'; + +export type KloDaemonDatabaseJsonRunner = ( + subcommand: KloDaemonDatabaseIntrospectionCommand, + payload: Record, +) => Promise>; + +export type KloDaemonDatabaseHttpJsonRunner = ( + path: string, + payload: Record, +) => Promise>; + +export interface DaemonLiveDatabaseIntrospectionOptions { + connections: Record; + schemas?: string[]; + statementTimeoutMs?: number; + connectionTimeoutSeconds?: number; + command?: string; + args?: string[]; + cwd?: string; + env?: NodeJS.ProcessEnv; + baseUrl?: string; + runJson?: KloDaemonDatabaseJsonRunner; + requestJson?: KloDaemonDatabaseHttpJsonRunner; + now?: () => Date; +} + +const DEFAULT_SCHEMAS = ['public']; + +function parseJsonObject(raw: string, subcommand: string): Record { + const parsed = JSON.parse(raw) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error(`klo-daemon ${subcommand} returned non-object JSON`); + } + return parsed as Record; +} + +function runProcessJson( + options: Required> & + Pick, +): KloDaemonDatabaseJsonRunner { + return async (subcommand, payload) => + new Promise((resolve, reject) => { + const child = spawn(options.command, [...options.args, subcommand], { + cwd: options.cwd, + env: { ...process.env, ...options.env }, + stdio: ['pipe', 'pipe', 'pipe'], + }); + const stdout: Buffer[] = []; + const stderr: Buffer[] = []; + + child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk)); + child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk)); + child.on('error', reject); + child.on('close', (code) => { + const stdoutText = Buffer.concat(stdout).toString('utf8').trim(); + const stderrText = Buffer.concat(stderr).toString('utf8').trim(); + if (code !== 0) { + reject(new Error(`klo-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`)); + return; + } + try { + resolve(parseJsonObject(stdoutText, subcommand)); + } catch (error) { + reject(error); + } + }); + child.stdin.end(`${JSON.stringify(payload)}\n`); + }); +} + +function normalizedBaseUrl(baseUrl: string): string { + return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`; +} + +function postJson(baseUrl: string): KloDaemonDatabaseHttpJsonRunner { + return async (path, payload) => + new Promise((resolve, reject) => { + const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl)); + const body = JSON.stringify(payload); + const client = target.protocol === 'https:' ? httpsRequest : httpRequest; + const request = client( + target, + { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + 'content-length': Buffer.byteLength(body), + }, + }, + (response) => { + const chunks: Buffer[] = []; + response.on('data', (chunk: Buffer) => chunks.push(chunk)); + response.on('end', () => { + const text = Buffer.concat(chunks).toString('utf8'); + const statusCode = response.statusCode ?? 0; + if (statusCode < 200 || statusCode >= 300) { + reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`)); + return; + } + try { + resolve(parseJsonObject(text, path)); + } catch (error) { + reject(error); + } + }); + }, + ); + request.on('error', reject); + request.end(body); + }); +} + +function recordValue(value: unknown): Record { + return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record) : {}; +} + +function recordArray(value: unknown): Array> { + return Array.isArray(value) + ? value.filter( + (item): item is Record => item !== null && typeof item === 'object' && !Array.isArray(item), + ) + : []; +} + +function requiredString(value: unknown, field: string): string { + if (typeof value !== 'string' || value.length === 0) { + throw new Error(`klo-daemon database introspection response is missing string field ${field}`); + } + return value; +} + +function nullableString(value: unknown): string | null { + return typeof value === 'string' ? value : null; +} + +function optionalString(value: unknown): string | undefined { + return typeof value === 'string' ? value : undefined; +} + +function normalizeDriver(driver: unknown): string { + const normalized = String(driver ?? '').trim().toLowerCase(); + return normalized === 'postgresql' ? 'postgres' : normalized; +} + +function requirePostgresConnection( + connections: Record, + connectionId: string, +): KloProjectConnectionConfig & { url: string } { + const connection = connections[connectionId]; + const driver = normalizeDriver(connection?.driver); + if (driver !== 'postgres') { + throw new Error(`Local live-database ingest cannot run driver "${connection?.driver ?? 'unknown'}".`); + } + if (connection?.readonly !== true) { + throw new Error(`Local live-database ingest requires connections.${connectionId}.readonly: true.`); + } + if (typeof connection.url !== 'string' || connection.url.trim().length === 0) { + throw new Error(`Local live-database ingest requires connections.${connectionId}.url.`); + } + return connection as KloProjectConnectionConfig & { url: string }; +} + +function mapColumn(raw: Record): KloSchemaColumn { + const nativeType = requiredString(raw.type, 'tables[].columns[].type'); + return { + name: requiredString(raw.name, 'tables[].columns[].name'), + nativeType, + normalizedType: normalizeKloNativeType(nativeType), + dimensionType: inferKloDimensionType(nativeType), + nullable: raw.nullable !== false ? true : false, + primaryKey: raw.primary_key === true, + comment: nullableString(raw.comment), + }; +} + +function mapForeignKey(raw: Record): KloSchemaForeignKey { + return { + fromColumn: requiredString(raw.from_column, 'tables[].foreign_keys[].from_column'), + toCatalog: null, + toDb: null, + toTable: requiredString(raw.to_table, 'tables[].foreign_keys[].to_table'), + toColumn: requiredString(raw.to_column, 'tables[].foreign_keys[].to_column'), + constraintName: nullableString(raw.constraint_name), + }; +} + +function mapTable(raw: Record): KloSchemaTable { + return { + catalog: nullableString(raw.catalog), + db: nullableString(raw.db), + name: requiredString(raw.name, 'tables[].name'), + kind: 'table', + comment: nullableString(raw.comment), + estimatedRows: null, + columns: recordArray(raw.columns).map(mapColumn), + foreignKeys: recordArray(raw.foreign_keys).map(mapForeignKey), + }; +} + +function mapDaemonSnapshot( + raw: Record, + input: { connectionId: string; extractedAt: string; schemas: string[] }, +): KloSchemaSnapshot { + return { + connectionId: requiredString(raw.connection_id, 'connection_id') || input.connectionId, + driver: 'postgres', + extractedAt: optionalString(raw.extracted_at) ?? input.extractedAt, + scope: { schemas: input.schemas }, + metadata: recordValue(raw.metadata), + tables: recordArray(raw.tables).map(mapTable), + }; +} + +export function createDaemonLiveDatabaseIntrospection( + options: DaemonLiveDatabaseIntrospectionOptions, +): LiveDatabaseIntrospectionPort { + const schemas = options.schemas ?? DEFAULT_SCHEMAS; + const command = options.command ?? 'python'; + const args = options.args ?? ['-m', 'klo_daemon']; + const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env }); + const requestJson = options.requestJson ?? (options.baseUrl ? postJson(options.baseUrl) : undefined); + const now = options.now ?? (() => new Date()); + + return { + async extractSchema(connectionId: string): Promise { + const connection = requirePostgresConnection(options.connections, connectionId); + const payload = { + connection_id: connectionId, + driver: normalizeDriver(connection.driver), + url: connection.url, + schemas, + statement_timeout_ms: options.statementTimeoutMs ?? 30_000, + connection_timeout_seconds: options.connectionTimeoutSeconds ?? 5, + }; + const raw = requestJson + ? await requestJson('/database/introspect', payload) + : await runJson('database-introspect', payload); + return mapDaemonSnapshot(raw, { + connectionId, + extractedAt: now().toISOString(), + schemas, + }); + }, + }; +} diff --git a/packages/context/src/ingest/adapters/live-database/extracted-schema.test.ts b/packages/context/src/ingest/adapters/live-database/extracted-schema.test.ts new file mode 100644 index 00000000..13147ad5 --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/extracted-schema.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it } from 'vitest'; +import type { KloSchemaSnapshot } from '../../../scan/types.js'; +import { buildLiveDatabaseTableNaturalKey, kloSchemaSnapshotToExtractedSchema } from './extracted-schema.js'; + +function snapshot(): KloSchemaSnapshot { + return { + connectionId: 'conn-1', + driver: 'postgres', + extractedAt: '2026-04-27T00:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: { driver: 'postgres' }, + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + kind: 'table', + comment: 'Orders placed by customers', + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Primary key', + }, + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: 'public', + toTable: 'customers', + toColumn: 'id', + constraintName: 'orders_customer_id_fkey', + }, + ], + }, + { + name: 'customers', + catalog: null, + db: 'public', + kind: 'table', + comment: null, + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [], + }, + ], + }; +} + +describe('kloSchemaSnapshotToExtractedSchema', () => { + it('preserves structural table, column, comment, and key metadata', () => { + const extracted = kloSchemaSnapshotToExtractedSchema(snapshot()); + + expect(extracted.tables).toEqual([ + { + name: 'orders', + catalog: null, + db: 'public', + dbComment: 'Orders placed by customers', + columns: [ + { + name: 'id', + type: 'integer', + nullable: false, + primaryKey: true, + dbComment: 'Primary key', + }, + { + name: 'customer_id', + type: 'integer', + nullable: false, + primaryKey: false, + dbComment: null, + }, + ], + foreignKeys: [ + { + fromTable: 'orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + constraintName: 'orders_customer_id_fkey', + }, + ], + }, + { + name: 'customers', + catalog: null, + db: 'public', + dbComment: null, + columns: [ + { + name: 'id', + type: 'integer', + nullable: false, + primaryKey: true, + dbComment: null, + }, + ], + foreignKeys: [], + }, + ]); + }); + + it('builds the same natural key shape used by schema sync', () => { + expect(buildLiveDatabaseTableNaturalKey({ catalog: null, db: 'public', name: 'orders' })).toBe('|public|orders'); + expect(buildLiveDatabaseTableNaturalKey({ catalog: 'warehouse', db: 'analytics', name: 'events' })).toBe( + 'warehouse|analytics|events', + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/live-database/extracted-schema.ts b/packages/context/src/ingest/adapters/live-database/extracted-schema.ts new file mode 100644 index 00000000..35f39cca --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/extracted-schema.ts @@ -0,0 +1,61 @@ +import type { KloSchemaSnapshot, KloSchemaTable } from '../../../scan/types.js'; + +export interface LiveDatabaseExtractedForeignKey { + fromTable: string; + fromColumn: string; + toTable: string; + toColumn: string; + constraintName?: string; +} + +export interface LiveDatabaseExtractedColumn { + name: string; + type: string; + nullable: boolean; + primaryKey: boolean; + dbComment: string | null; +} + +export interface LiveDatabaseExtractedTable { + name: string; + catalog: string | null; + db: string | null; + dbComment: string | null; + columns: LiveDatabaseExtractedColumn[]; + foreignKeys: LiveDatabaseExtractedForeignKey[]; +} + +export interface LiveDatabaseExtractedSchema { + connectionId?: string; + tables: LiveDatabaseExtractedTable[]; +} + +export function buildLiveDatabaseTableNaturalKey(table: Pick): string { + return `${table.catalog ?? ''}|${table.db ?? ''}|${table.name}`; +} + +export function kloSchemaSnapshotToExtractedSchema(snapshot: KloSchemaSnapshot): LiveDatabaseExtractedSchema { + return { + connectionId: snapshot.connectionId, + tables: snapshot.tables.map((table) => ({ + name: table.name, + catalog: table.catalog ?? null, + db: table.db ?? null, + dbComment: table.comment ?? null, + columns: table.columns.map((column) => ({ + name: column.name, + type: column.nativeType, + nullable: column.nullable, + primaryKey: column.primaryKey, + dbComment: column.comment ?? null, + })), + foreignKeys: table.foreignKeys.map((foreignKey) => ({ + fromTable: table.name, + fromColumn: foreignKey.fromColumn, + toTable: foreignKey.toTable, + toColumn: foreignKey.toColumn, + ...(foreignKey.constraintName ? { constraintName: foreignKey.constraintName } : {}), + })), + })), + }; +} diff --git a/packages/context/src/ingest/adapters/live-database/live-database.adapter.test.ts b/packages/context/src/ingest/adapters/live-database/live-database.adapter.test.ts new file mode 100644 index 00000000..f3c3935c --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/live-database.adapter.test.ts @@ -0,0 +1,59 @@ +import { mkdtemp } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { LiveDatabaseSourceAdapter } from './live-database.adapter.js'; + +describe('LiveDatabaseSourceAdapter', () => { + it('fetches a schema snapshot through the introspection port', async () => { + const extractSchema = vi.fn().mockResolvedValue({ + connectionId: 'conn-1', + driver: 'postgres', + extractedAt: '2026-04-27T00:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + kind: 'table', + comment: null, + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [], + }, + ], + }); + const adapter = new LiveDatabaseSourceAdapter({ + introspection: { extractSchema }, + now: () => new Date('2026-04-27T00:00:00.000Z'), + }); + const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-adapter-')); + + await adapter.fetch(undefined, dir, { connectionId: 'conn-1', sourceKey: 'live-database' }); + + expect(extractSchema).toHaveBeenCalledWith('conn-1'); + await expect(adapter.detect(dir)).resolves.toBe(true); + const chunked = await adapter.chunk(dir); + expect(chunked.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']); + }); + + it('declares the live database source and skill', () => { + const adapter = new LiveDatabaseSourceAdapter({ + introspection: { extractSchema: vi.fn() }, + }); + expect(adapter.source).toBe('live-database'); + expect(adapter.skillNames).toEqual(['live_database_ingest']); + }); +}); diff --git a/packages/context/src/ingest/adapters/live-database/live-database.adapter.ts b/packages/context/src/ingest/adapters/live-database/live-database.adapter.ts new file mode 100644 index 00000000..9e5076ab --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/live-database.adapter.ts @@ -0,0 +1,28 @@ +import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js'; +import { chunkLiveDatabaseStagedDir } from './chunk.js'; +import { detectLiveDatabaseStagedDir, writeLiveDatabaseSnapshot } from './stage.js'; +import type { LiveDatabaseSourceAdapterDeps } from './types.js'; + +export class LiveDatabaseSourceAdapter implements SourceAdapter { + readonly source = 'live-database'; + readonly skillNames = ['live_database_ingest']; + + constructor(private readonly deps: LiveDatabaseSourceAdapterDeps) {} + + detect(stagedDir: string): Promise { + return detectLiveDatabaseStagedDir(stagedDir); + } + + async fetch(_pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const snapshot = await this.deps.introspection.extractSchema(ctx.connectionId); + await writeLiveDatabaseSnapshot(stagedDir, { + ...snapshot, + connectionId: ctx.connectionId, + extractedAt: snapshot.extractedAt ?? (this.deps.now ?? (() => new Date()))().toISOString(), + }); + } + + chunk(stagedDir: string, diffSet?: DiffSet): Promise { + return chunkLiveDatabaseStagedDir(stagedDir, diffSet); + } +} diff --git a/packages/context/src/ingest/adapters/live-database/manifest.test.ts b/packages/context/src/ingest/adapters/live-database/manifest.test.ts new file mode 100644 index 00000000..75a41067 --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/manifest.test.ts @@ -0,0 +1,252 @@ +import { describe, expect, it } from 'vitest'; +import { + buildLiveDatabaseManifestShards, + type LiveDatabaseManifestExistingDescriptions, + type LiveDatabaseManifestJoinEntry, + type LiveDatabaseManifestShard, +} from './manifest.js'; + +function shardObject(shards: Map): Record { + return Object.fromEntries([...shards.entries()].sort(([a], [b]) => a.localeCompare(b))); +} + +describe('buildLiveDatabaseManifestShards', () => { + it('builds shard objects with generated joins and preserved external descriptions', () => { + const existingDescriptions = new Map([ + [ + 'orders', + { + table: { user: 'Pinned analyst description', db: 'Old db description' }, + columns: new Map([['id', { user: 'Pinned id description', db: 'Old id description' }]]), + }, + ], + ]); + + const preservedJoins = new Map([ + [ + 'orders', + [ + { + to: 'customers', + on: 'orders.account_id = customers.id', + relationship: 'many_to_one', + source: 'manual', + }, + { + to: 'missing_accounts', + on: 'orders.account_id = missing_accounts.id', + relationship: 'many_to_one', + source: 'manual', + }, + ], + ], + ]); + + const result = buildLiveDatabaseManifestShards({ + connectionType: 'POSTGRESQL', + mapColumnType: (nativeType) => nativeType.toLowerCase(), + existingDescriptions, + existingPreservedJoins: preservedJoins, + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + descriptions: { db: 'Fresh db description', ai: 'Generated AI description' }, + columns: [ + { + name: 'id', + type: 'INTEGER', + pk: true, + nullable: false, + descriptions: { db: 'Fresh id description' }, + }, + { + name: 'customer_id', + type: 'INTEGER', + }, + ], + }, + { + name: 'customers', + catalog: null, + db: 'public', + columns: [ + { + name: 'id', + type: 'INTEGER', + pk: true, + nullable: false, + }, + ], + }, + ], + joins: [ + { + fromTable: 'orders', + fromColumns: ['customer_id'], + toTable: 'customers', + toColumns: ['id'], + relationship: 'MANY_TO_ONE', + source: 'formal', + }, + ], + }); + + expect(result.tablesProcessed).toBe(2); + expect(shardObject(result.shards)).toEqual({ + public: { + tables: { + orders: { + table: 'public.orders', + descriptions: { + user: 'Pinned analyst description', + db: 'Fresh db description', + ai: 'Generated AI description', + }, + columns: [ + { + name: 'id', + type: 'integer', + pk: true, + nullable: false, + descriptions: { + user: 'Pinned id description', + db: 'Fresh id description', + }, + }, + { + name: 'customer_id', + type: 'integer', + }, + ], + joins: [ + { + to: 'customers', + on: 'orders.customer_id = customers.id', + relationship: 'many_to_one', + source: 'formal', + }, + { + to: 'customers', + on: 'orders.account_id = customers.id', + relationship: 'many_to_one', + source: 'manual', + }, + ], + }, + customers: { + table: 'public.customers', + columns: [ + { + name: 'id', + type: 'integer', + pk: true, + nullable: false, + }, + ], + joins: [ + { + to: 'orders', + on: 'customers.id = orders.customer_id', + relationship: 'one_to_many', + source: 'formal', + }, + ], + }, + }, + }, + }); + }); + + it('uses warehouse and schema shard keys for snowflake-style connections', () => { + const result = buildLiveDatabaseManifestShards({ + connectionType: 'SNOWFLAKE', + mapColumnType: (nativeType) => nativeType.toLowerCase(), + tables: [ + { + name: 'accounts', + catalog: 'ANALYTICS', + db: 'CORE', + columns: [{ name: 'id', type: 'NUMBER' }], + }, + ], + joins: [], + }); + + expect(shardObject(result.shards)).toEqual({ + 'ANALYTICS.CORE': { + tables: { + accounts: { + table: 'ANALYTICS.CORE.accounts', + columns: [{ name: 'id', type: 'number' }], + }, + }, + }, + }); + }); + + it('renders ordered multi-column joins in both directions', () => { + const result = buildLiveDatabaseManifestShards({ + connectionType: 'POSTGRESQL', + mapColumnType: (nativeType) => nativeType, + tables: [ + { + name: 'order_lines', + catalog: null, + db: 'public', + columns: [ + { name: 'order_id', type: 'integer' }, + { name: 'line_number', type: 'integer' }, + ], + }, + { + name: 'order_line_allocations', + catalog: null, + db: 'public', + columns: [ + { name: 'order_id', type: 'integer' }, + { name: 'line_number', type: 'integer' }, + ], + }, + ], + joins: [ + { + fromTable: 'order_line_allocations', + fromColumns: ['order_id', 'line_number'], + toTable: 'order_lines', + toColumns: ['order_id', 'line_number'], + relationship: 'many_to_one', + source: 'inferred', + }, + ], + }); + + expect(shardObject(result.shards)).toMatchObject({ + public: { + tables: { + order_line_allocations: { + joins: [ + { + to: 'order_lines', + on: 'order_line_allocations.order_id = order_lines.order_id AND order_line_allocations.line_number = order_lines.line_number', + relationship: 'many_to_one', + source: 'inferred', + }, + ], + }, + order_lines: { + joins: [ + { + to: 'order_line_allocations', + on: 'order_lines.order_id = order_line_allocations.order_id AND order_lines.line_number = order_line_allocations.line_number', + relationship: 'one_to_many', + source: 'inferred', + }, + ], + }, + }, + }, + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/live-database/manifest.ts b/packages/context/src/ingest/adapters/live-database/manifest.ts new file mode 100644 index 00000000..d7315f9e --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/manifest.ts @@ -0,0 +1,270 @@ +const RELATIONSHIP_MAP: Record = { + MANY_TO_ONE: 'many_to_one', + ONE_TO_MANY: 'one_to_many', + ONE_TO_ONE: 'one_to_one', +}; + +const RELATIONSHIP_INVERSE: Record = { + many_to_one: 'one_to_many', + one_to_many: 'many_to_one', + one_to_one: 'one_to_one', +}; + +const SCAN_MANAGED_DESCRIPTION_KEYS = new Set(['db', 'ai']); + +export interface LiveDatabaseManifestColumn { + name: string; + type: string; + pk?: boolean; + nullable?: boolean; + descriptions?: Record; +} + +export interface LiveDatabaseManifestJoinEntry { + to: string; + on: string; + relationship: string; + source: string; +} + +export interface LiveDatabaseManifestTableEntry { + table: string; + descriptions?: Record; + columns: LiveDatabaseManifestColumn[]; + joins?: LiveDatabaseManifestJoinEntry[]; +} + +export interface LiveDatabaseManifestShard { + tables: Record; +} + +export interface LiveDatabaseManifestTableData { + name: string; + catalog: string | null; + db: string | null; + descriptions?: Record; + columns: Array<{ + name: string; + type: string; + pk?: boolean; + nullable?: boolean; + descriptions?: Record; + }>; +} + +export interface LiveDatabaseManifestJoinData { + fromTable: string; + fromColumns: string[]; + toTable: string; + toColumns: string[]; + relationship: string; + source: 'formal' | 'inferred' | 'manual'; +} + +export interface LiveDatabaseManifestExistingDescriptions { + table?: Record; + columns: Map>; +} + +export interface BuildLiveDatabaseManifestShardsInput { + connectionType: string; + tables: LiveDatabaseManifestTableData[]; + joins: LiveDatabaseManifestJoinData[]; + mapColumnType: (nativeType: string) => string; + existingPreservedJoins?: Map; + existingDescriptions?: Map; +} + +export interface BuildLiveDatabaseManifestShardsResult { + shards: Map; + tablesProcessed: number; +} + +function mergeDescriptionsPreservingExternal( + existing: Record | undefined, + incoming: Record | undefined, +): Record | undefined { + if (!existing && !incoming) { + return undefined; + } + const result: Record = {}; + if (existing) { + for (const [key, value] of Object.entries(existing)) { + if (!SCAN_MANAGED_DESCRIPTION_KEYS.has(key)) { + result[key] = value; + } + } + } + if (incoming) { + Object.assign(result, incoming); + } + return Object.keys(result).length > 0 ? result : undefined; +} + +function getShardKey(connectionType: string, catalog: string | null, db: string | null): string { + const normalized = connectionType.toUpperCase(); + + switch (normalized) { + case 'SNOWFLAKE': + case 'DATABRICKS': { + const catalogPart = catalog ?? 'default'; + const schemaPart = db ?? 'public'; + return `${catalogPart}.${schemaPart}`; + } + case 'BIGQUERY': { + return db ?? catalog ?? 'default'; + } + case 'MYSQL': + case 'CLICKHOUSE': { + return db ?? catalog ?? 'default'; + } + default: { + return db ?? 'public'; + } + } +} + +function buildTableRef(name: string, catalog: string | null, db: string | null): string { + const parts: string[] = []; + if (catalog) { + parts.push(catalog); + } + if (db) { + parts.push(db); + } + parts.push(name); + return parts.join('.'); +} + +function addJoinOnce( + joinsByTable: Map, + tableName: string, + join: LiveDatabaseManifestJoinEntry, +): void { + const joins = joinsByTable.get(tableName) ?? []; + const exists = joins.some((candidate) => candidate.to === join.to && candidate.on === join.on); + if (!exists) { + joins.push(join); + } + joinsByTable.set(tableName, joins); +} + +function joinCondition( + leftTable: string, + leftColumns: readonly string[], + rightTable: string, + rightColumns: readonly string[], +): string { + if (leftColumns.length === 0 || leftColumns.length !== rightColumns.length) { + throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: column tuple widths differ`); + } + return leftColumns + .map((leftColumn, index) => { + const rightColumn = rightColumns[index]; + if (!rightColumn) { + throw new Error(`Invalid relationship join from ${leftTable} to ${rightTable}: missing target column`); + } + return `${leftTable}.${leftColumn} = ${rightTable}.${rightColumn}`; + }) + .join(' AND '); +} + +function buildJoinsByTable( + tableNames: Set, + joins: LiveDatabaseManifestJoinData[], + preservedJoins: Map, +): Map { + const joinsByTable = new Map(); + + for (const join of joins) { + if (!tableNames.has(join.fromTable) || !tableNames.has(join.toTable)) { + continue; + } + const relationship = RELATIONSHIP_MAP[join.relationship] ?? join.relationship; + addJoinOnce(joinsByTable, join.fromTable, { + to: join.toTable, + on: joinCondition(join.fromTable, join.fromColumns, join.toTable, join.toColumns), + relationship, + source: join.source, + }); + + const reverseRelationship = RELATIONSHIP_INVERSE[relationship] ?? 'one_to_many'; + addJoinOnce(joinsByTable, join.toTable, { + to: join.fromTable, + on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns), + relationship: reverseRelationship, + source: join.source, + }); + } + + for (const [tableName, tableJoins] of preservedJoins) { + if (!tableNames.has(tableName)) { + continue; + } + for (const join of tableJoins) { + if (tableNames.has(join.to)) { + addJoinOnce(joinsByTable, tableName, join); + } + } + } + + return joinsByTable; +} + +export function buildLiveDatabaseManifestShards( + input: BuildLiveDatabaseManifestShardsInput, +): BuildLiveDatabaseManifestShardsResult { + const tableNames = new Set(input.tables.map((table) => table.name)); + const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map()); + const shards = new Map(); + + for (const table of input.tables) { + const shardKey = getShardKey(input.connectionType, table.catalog, table.db); + const shard = shards.get(shardKey) ?? { tables: {} }; + const existingDescriptions = input.existingDescriptions?.get(table.name); + + const columns: LiveDatabaseManifestColumn[] = table.columns.map((column) => { + const manifestColumn: LiveDatabaseManifestColumn = { + name: column.name, + type: input.mapColumnType(column.type), + }; + if (column.pk) { + manifestColumn.pk = true; + } + if (column.nullable === false) { + manifestColumn.nullable = false; + } + const descriptions = mergeDescriptionsPreservingExternal( + existingDescriptions?.columns.get(column.name), + column.descriptions, + ); + if (descriptions) { + manifestColumn.descriptions = descriptions; + } + return manifestColumn; + }); + + const entry: LiveDatabaseManifestTableEntry = { + table: buildTableRef(table.name, table.catalog, table.db), + columns, + }; + + const tableDescriptions = mergeDescriptionsPreservingExternal(existingDescriptions?.table, table.descriptions); + if (tableDescriptions) { + entry.descriptions = tableDescriptions; + } + + const tableJoins = joinsByTable.get(table.name); + if (tableJoins && tableJoins.length > 0) { + entry.joins = tableJoins; + } + + shard.tables[table.name] = entry; + shards.set(shardKey, shard); + } + + return { + shards, + tablesProcessed: input.tables.length, + }; +} diff --git a/packages/context/src/ingest/adapters/live-database/stage.test.ts b/packages/context/src/ingest/adapters/live-database/stage.test.ts new file mode 100644 index 00000000..4869a516 --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/stage.test.ts @@ -0,0 +1,152 @@ +import { mkdtemp, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { + detectLiveDatabaseStagedDir, + LIVE_DATABASE_FOREIGN_KEYS_FILE, + LIVE_DATABASE_META_FILE, + liveDatabaseTablePath, + readLiveDatabaseTableFiles, + writeLiveDatabaseSnapshot, +} from './stage.js'; +import type { KloSchemaSnapshot } from '../../../scan/types.js'; + +function snapshot(): KloSchemaSnapshot { + return { + connectionId: 'conn-1', + driver: 'postgres', + extractedAt: '2026-04-27T00:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: { dialect: 'postgres' }, + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + kind: 'table', + comment: 'Orders placed by customers', + estimatedRows: 200, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'total', + nativeType: 'numeric', + normalizedType: 'numeric', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: 'public', + toTable: 'customers', + toColumn: 'id', + constraintName: null, + }, + ], + }, + { + name: 'customers', + catalog: null, + db: 'public', + kind: 'table', + comment: null, + estimatedRows: 50, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [], + }, + ], + }; +} + +describe('live-database staged snapshot files', () => { + it('writes deterministic metadata, table, and foreign-key files', async () => { + const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-stage-')); + await writeLiveDatabaseSnapshot(dir, snapshot()); + + await expect(readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8')).resolves.toContain('"connectionId": "conn-1"'); + await expect(readFile(join(dir, LIVE_DATABASE_FOREIGN_KEYS_FILE), 'utf8')).resolves.toContain( + '"fromTable": "orders"', + ); + const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8'); + expect(connectionJson).toContain('"driver": "postgres"'); + expect(connectionJson).toContain('"schemas"'); + + const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' }); + const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' }); + expect(ordersPath).toMatch(/^tables\/[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.json$/); + await expect(readFile(join(dir, ordersPath), 'utf8')).resolves.toContain('"name": "orders"'); + await expect(readFile(join(dir, customersPath), 'utf8')).resolves.toContain('"name": "customers"'); + const ordersJson = await readFile(join(dir, ordersPath), 'utf8'); + expect(ordersJson).toContain('"kind": "table"'); + expect(ordersJson).toContain('"estimatedRows": 200'); + expect(ordersJson).toContain('"nativeType": "integer"'); + expect(ordersJson).toContain('"normalizedType": "integer"'); + expect(ordersJson).not.toContain('"type": "integer"'); + + const tableFiles = await readLiveDatabaseTableFiles(dir); + expect(tableFiles.map((file) => file.table.name)).toEqual(['customers', 'orders']); + expect(await detectLiveDatabaseStagedDir(dir)).toBe(true); + }); + + it('redacts sensitive snapshot metadata before writing connection metadata', async () => { + const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-redacted-stage-')); + await writeLiveDatabaseSnapshot(dir, { + ...snapshot(), + metadata: { + dialect: 'postgres', + url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret + serviceAccountJson: { + client_email: 'reader@example.test', + private_key: 'pem-value', // pragma: allowlist secret + }, + }, + }); + + const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8'); + + expect(connectionJson).toContain('"dialect": "postgres"'); + expect(connectionJson).toContain('"client_email": "reader@example.test"'); + expect(connectionJson).toContain('"url": ""'); + expect(connectionJson).toContain('"private_key": ""'); + expect(connectionJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret + expect(connectionJson).not.toContain('pem-value'); + }); + + it('returns false for a directory that is missing live database metadata', async () => { + const dir = await mkdtemp(join(tmpdir(), 'klo-live-db-empty-')); + expect(await detectLiveDatabaseStagedDir(dir)).toBe(false); + }); +}); diff --git a/packages/context/src/ingest/adapters/live-database/stage.ts b/packages/context/src/ingest/adapters/live-database/stage.ts new file mode 100644 index 00000000..cc3af2d9 --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/stage.ts @@ -0,0 +1,138 @@ +import { Buffer } from 'node:buffer'; +import type { Dirent } from 'node:fs'; +import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import { redactKloSensitiveMetadata } from '../../../core/redaction.js'; +import type { KloSchemaSnapshot, KloSchemaTable, KloTableRef } from '../../../scan/types.js'; + +export const LIVE_DATABASE_META_FILE = 'connection.json'; +export const LIVE_DATABASE_FOREIGN_KEYS_FILE = 'foreign-keys.json'; +const LIVE_DATABASE_TABLES_DIR = 'tables'; + +interface LiveDatabaseTableFile { + path: string; + table: KloSchemaTable; +} + +interface ForeignKeyIndexEntry { + fromTable: string; + fromTablePath: string; + fromColumn: string; + toCatalog: string | null; + toDb: string | null; + toTable: string; + toColumn: string; + constraintName: string | null; +} + +function encodePathPart(value: string | null | undefined): string { + return Buffer.from(value ?? '_', 'utf8').toString('base64url'); +} + +function tableSortKey(table: KloTableRef): string { + return `${table.catalog ?? ''}\u0000${table.db ?? ''}\u0000${table.name}`; +} + +export function liveDatabaseTablePath(table: KloTableRef): string { + return `${LIVE_DATABASE_TABLES_DIR}/${encodePathPart(table.catalog)}.${encodePathPart(table.db)}.${encodePathPart( + table.name, + )}.json`; +} + +async function walkFiles(root: string, dir = root): Promise { + let entries: Dirent[]; + try { + entries = await readdir(dir, { withFileTypes: true }); + } catch { + return []; + } + const files: string[] = []; + for (const entry of entries) { + const absolute = join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...(await walkFiles(root, absolute))); + } else if (entry.isFile()) { + files.push(relative(root, absolute).replace(/\\/g, '/')); + } + } + return files.sort(); +} + +function stableJson(value: unknown): string { + return `${JSON.stringify(value, null, 2)}\n`; +} + +function foreignKeyIndex(snapshot: KloSchemaSnapshot): ForeignKeyIndexEntry[] { + const entries: ForeignKeyIndexEntry[] = []; + for (const table of snapshot.tables) { + for (const fk of table.foreignKeys) { + entries.push({ + fromTable: table.name, + fromTablePath: liveDatabaseTablePath(table), + fromColumn: fk.fromColumn, + toCatalog: fk.toCatalog, + toDb: fk.toDb, + toTable: fk.toTable, + toColumn: fk.toColumn, + constraintName: fk.constraintName, + }); + } + } + entries.sort( + (a, b) => + a.fromTable.localeCompare(b.fromTable) || + a.fromColumn.localeCompare(b.fromColumn) || + a.toTable.localeCompare(b.toTable) || + a.toColumn.localeCompare(b.toColumn), + ); + return entries; +} + +export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: KloSchemaSnapshot): Promise { + await mkdir(join(stagedDir, LIVE_DATABASE_TABLES_DIR), { recursive: true }); + const sortedTables = [...snapshot.tables].sort((a, b) => tableSortKey(a).localeCompare(tableSortKey(b))); + const metadata = { + connectionId: snapshot.connectionId, + driver: snapshot.driver, + extractedAt: snapshot.extractedAt, + scope: snapshot.scope, + metadata: redactKloSensitiveMetadata(snapshot.metadata), + tableCount: sortedTables.length, + }; + await writeFile(join(stagedDir, LIVE_DATABASE_META_FILE), stableJson(metadata)); + await writeFile( + join(stagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE), + stableJson({ foreignKeys: foreignKeyIndex(snapshot) }), + ); + for (const table of sortedTables) { + await writeFile(join(stagedDir, liveDatabaseTablePath(table)), stableJson(table)); + } +} + +export async function readLiveDatabaseTableFiles(stagedDir: string): Promise { + const files = await walkFiles(join(stagedDir, LIVE_DATABASE_TABLES_DIR)); + const out: LiveDatabaseTableFile[] = []; + for (const file of files.filter((path) => path.endsWith('.json'))) { + const path = `${LIVE_DATABASE_TABLES_DIR}/${file}`; + const raw = await readFile(join(stagedDir, path), 'utf8'); + const parsed = JSON.parse(raw) as KloSchemaTable; + if (parsed && typeof parsed.name === 'string' && Array.isArray(parsed.columns)) { + out.push({ path, table: parsed }); + } + } + out.sort((a, b) => tableSortKey(a.table).localeCompare(tableSortKey(b.table))); + return out; +} + +export async function detectLiveDatabaseStagedDir(stagedDir: string): Promise { + try { + const meta = JSON.parse(await readFile(join(stagedDir, LIVE_DATABASE_META_FILE), 'utf8')) as unknown; + if (!meta || typeof meta !== 'object' || Array.isArray(meta)) { + return false; + } + const files = await readLiveDatabaseTableFiles(stagedDir); + return files.length > 0; + } catch { + return false; + } +} diff --git a/packages/context/src/ingest/adapters/live-database/structural-sync.test.ts b/packages/context/src/ingest/adapters/live-database/structural-sync.test.ts new file mode 100644 index 00000000..1df5faf8 --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/structural-sync.test.ts @@ -0,0 +1,428 @@ +import { describe, expect, it } from 'vitest'; +import { type LiveDatabaseSyncedSchema, planLiveDatabaseStructuralSync } from './structural-sync.js'; + +function idFactory(): () => string { + let next = 1; + return () => `id-${next++}`; +} + +describe('planLiveDatabaseStructuralSync', () => { + it('plans table and column creates, updates, deletes, and metadata invalidation', () => { + const current: LiveDatabaseSyncedSchema = { + connectionId: 'conn-1', + tables: [ + { + id: 'tbl-orders', + name: 'orders', + catalog: null, + db: 'public', + enabled: true, + descriptions: { ai: 'Old AI order text', db: 'Old DB order text' }, + columns: [ + { + id: 'col-order-id', + name: 'id', + type: 'number', + nullable: false, + primaryKey: true, + parentColumnId: null, + descriptions: { db: 'Order id' }, + embedding: [1, 2, 3], + sampleValues: null, + cardinality: null, + }, + { + id: 'col-order-total', + name: 'total', + type: 'number', + nullable: true, + primaryKey: false, + parentColumnId: null, + descriptions: { ai: 'Old AI total text', db: 'Old total text' }, + embedding: [4, 5, 6], + sampleValues: ['10'], + cardinality: 12, + }, + { + id: 'col-order-removed', + name: 'removed', + type: 'string', + nullable: true, + primaryKey: false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + { + id: 'tbl-removed', + name: 'removed_table', + catalog: null, + db: 'public', + enabled: true, + descriptions: {}, + columns: [ + { + id: 'col-removed-id', + name: 'id', + type: 'number', + nullable: false, + primaryKey: true, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + ], + links: [ + { + id: 'inferred-total-link', + fromTableId: 'tbl-orders', + fromColumnId: 'col-order-total', + toTableId: 'tbl-orders', + toColumnId: 'col-order-id', + source: 'inferred', + confidence: 0.7, + relationshipType: 'MANY_TO_ONE', + isPrimaryKeyReference: true, + }, + ], + }; + + const plan = planLiveDatabaseStructuralSync({ + connectionId: 'conn-1', + current, + extracted: { + connectionId: 'conn-1', + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + dbComment: 'Fresh DB order text', + columns: [ + { + name: 'id', + type: 'number', + nullable: false, + primaryKey: true, + dbComment: 'Order id', + }, + { + name: 'total', + type: 'string', + nullable: false, + primaryKey: false, + dbComment: 'Fresh total text', + }, + { + name: 'created_at', + type: 'time', + nullable: false, + primaryKey: false, + dbComment: 'Creation timestamp', + }, + ], + foreignKeys: [], + }, + { + name: 'customers', + catalog: null, + db: 'public', + dbComment: 'Customer table', + columns: [ + { + name: 'id', + type: 'number', + nullable: false, + primaryKey: true, + dbComment: null, + }, + ], + foreignKeys: [], + }, + ], + }, + idFactory: idFactory(), + }); + + expect(plan.stats).toEqual({ + tablesCreated: 1, + tablesDeleted: 1, + columnsCreated: 2, + columnsDeleted: 2, + columnsModified: 1, + formalLinksCreated: 0, + formalLinksDeleted: 0, + }); + expect(plan.operations.deleteTableIds).toEqual(['tbl-removed']); + expect(plan.operations.deleteColumnIds).toEqual(['col-order-removed']); + expect(plan.operations.insertTables).toEqual([ + { + id: 'id-2', + connectionId: 'conn-1', + name: 'customers', + catalog: null, + db: 'public', + enabled: true, + }, + ]); + expect(plan.operations.insertColumns).toEqual([ + { + id: 'id-1', + tableId: 'tbl-orders', + name: 'created_at', + parentColumnId: null, + }, + { + id: 'id-3', + tableId: 'id-2', + name: 'id', + parentColumnId: null, + }, + ]); + expect(plan.operations.touchColumnIds).toEqual(['col-order-total']); + expect(plan.operations.invalidateColumnEmbeddingIds).toEqual(['col-order-total']); + expect(plan.inferredLinksToValidate).toEqual(['inferred-total-link']); + expect(plan.changes).toEqual({ + newTableIds: ['id-2'], + newColumnIds: ['id-1', 'id-3'], + tablesWithStructuralChanges: ['tbl-orders', 'id-2'], + columnsWithTypeChange: ['col-order-total'], + columnsWithDescriptionChange: ['col-order-total'], + tablesWithDescriptionChange: ['tbl-orders'], + }); + + const orders = plan.schema.tables.find((table) => table.name === 'orders'); + expect(orders?.descriptions).toEqual({ db: 'Fresh DB order text' }); + expect(orders?.columns.map((column) => column.name)).toEqual(['id', 'total', 'created_at']); + expect(orders?.columns.find((column) => column.name === 'total')).toMatchObject({ + id: 'col-order-total', + type: 'string', + nullable: false, + primaryKey: false, + descriptions: { db: 'Fresh total text' }, + embedding: null, + sampleValues: ['10'], + cardinality: 12, + }); + }); + + it('builds formal links from extracted foreign keys and preserves valid inferred links', () => { + const current: LiveDatabaseSyncedSchema = { + connectionId: 'conn-1', + tables: [ + { + id: 'tbl-orders', + name: 'orders', + catalog: null, + db: 'public', + enabled: true, + descriptions: {}, + columns: [ + { + id: 'col-orders-id', + name: 'id', + type: 'number', + nullable: false, + primaryKey: true, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + { + id: 'col-orders-customer', + name: 'customer_id', + type: 'number', + nullable: false, + primaryKey: false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + { + id: 'tbl-customers', + name: 'customers', + catalog: null, + db: 'public', + enabled: true, + descriptions: {}, + columns: [ + { + id: 'col-customers-id', + name: 'id', + type: 'number', + nullable: false, + primaryKey: true, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + ], + links: [ + { + id: 'formal-existing', + fromTableId: 'tbl-orders', + fromColumnId: 'col-orders-customer', + toTableId: 'tbl-customers', + toColumnId: 'col-customers-id', + source: 'formal', + confidence: 1, + relationshipType: 'MANY_TO_ONE', + isPrimaryKeyReference: true, + }, + { + id: 'inferred-existing', + fromTableId: 'tbl-orders', + fromColumnId: 'col-orders-id', + toTableId: 'tbl-customers', + toColumnId: 'col-customers-id', + source: 'inferred', + confidence: 0.6, + relationshipType: 'MANY_TO_ONE', + isPrimaryKeyReference: true, + }, + ], + }; + + const plan = planLiveDatabaseStructuralSync({ + connectionId: 'conn-1', + current, + extracted: { + connectionId: 'conn-1', + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + dbComment: null, + columns: [ + { name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }, + { name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null }, + ], + foreignKeys: [ + { + fromTable: 'orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + }, + ], + }, + { + name: 'customers', + catalog: null, + db: 'public', + dbComment: null, + columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }], + foreignKeys: [], + }, + ], + }, + idFactory: idFactory(), + }); + + expect(plan.stats.formalLinksCreated).toBe(0); + expect(plan.stats.formalLinksDeleted).toBe(0); + expect(plan.schema.links.map((link) => link.id)).toEqual(['formal-existing', 'inferred-existing']); + + const planAfterForeignKeyRemoval = planLiveDatabaseStructuralSync({ + connectionId: 'conn-1', + current, + extracted: { + connectionId: 'conn-1', + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + dbComment: null, + columns: [ + { name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }, + { name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null }, + ], + foreignKeys: [], + }, + { + name: 'customers', + catalog: null, + db: 'public', + dbComment: null, + columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }], + foreignKeys: [], + }, + ], + }, + idFactory: idFactory(), + }); + + expect(planAfterForeignKeyRemoval.stats.formalLinksDeleted).toBe(1); + expect(planAfterForeignKeyRemoval.schema.links.map((link) => link.id)).toEqual(['inferred-existing']); + + const planAfterForeignKeyCreation = planLiveDatabaseStructuralSync({ + connectionId: 'conn-1', + current: { ...current, links: [current.links[1]] }, + extracted: { + connectionId: 'conn-1', + tables: [ + { + name: 'orders', + catalog: null, + db: 'public', + dbComment: null, + columns: [ + { name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }, + { name: 'customer_id', type: 'number', nullable: false, primaryKey: false, dbComment: null }, + ], + foreignKeys: [ + { + fromTable: 'orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + }, + ], + }, + { + name: 'customers', + catalog: null, + db: 'public', + dbComment: null, + columns: [{ name: 'id', type: 'number', nullable: false, primaryKey: true, dbComment: null }], + foreignKeys: [], + }, + ], + }, + idFactory: idFactory(), + }); + + expect(planAfterForeignKeyCreation.stats.formalLinksCreated).toBe(1); + expect(planAfterForeignKeyCreation.schema.links[0]).toMatchObject({ + id: 'id-1', + fromTableId: 'tbl-orders', + fromColumnId: 'col-orders-customer', + toTableId: 'tbl-customers', + toColumnId: 'col-customers-id', + source: 'formal', + confidence: 1, + relationshipType: 'MANY_TO_ONE', + isPrimaryKeyReference: true, + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/live-database/structural-sync.ts b/packages/context/src/ingest/adapters/live-database/structural-sync.ts new file mode 100644 index 00000000..d98a692b --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/structural-sync.ts @@ -0,0 +1,525 @@ +import type { LiveDatabaseExtractedSchema, LiveDatabaseExtractedTable } from './extracted-schema.js'; +import { buildLiveDatabaseTableNaturalKey } from './extracted-schema.js'; + +export interface LiveDatabaseSyncedColumn { + id: string; + name: string; + type: string; + nullable: boolean; + primaryKey: boolean; + parentColumnId: string | null; + descriptions: Record; + embedding: number[] | null; + sampleValues: string[] | null; + cardinality: number | null; +} + +export interface LiveDatabaseSyncedTable { + id: string; + name: string; + catalog: string | null; + db: string | null; + enabled: boolean; + descriptions: Record; + columns: LiveDatabaseSyncedColumn[]; +} + +export interface LiveDatabaseSyncedLink { + id: string; + fromTableId: string; + fromColumnId: string; + toTableId: string; + toColumnId: string; + source: 'formal' | 'inferred' | 'manual'; + confidence: number; + relationshipType: string; + isPrimaryKeyReference: boolean; +} + +export interface LiveDatabaseSyncedSchema { + connectionId: string; + tables: LiveDatabaseSyncedTable[]; + links: LiveDatabaseSyncedLink[]; +} + +export interface LiveDatabaseStructuralChanges { + newTableIds: string[]; + newColumnIds: string[]; + tablesWithStructuralChanges: string[]; + columnsWithTypeChange: string[]; + columnsWithDescriptionChange: string[]; + tablesWithDescriptionChange: string[]; +} + +export interface LiveDatabaseStructuralSyncStats { + tablesCreated: number; + tablesDeleted: number; + columnsCreated: number; + columnsDeleted: number; + columnsModified: number; + formalLinksCreated: number; + formalLinksDeleted: number; +} + +export interface LiveDatabaseStructuralSyncOperations { + deleteTableIds: string[]; + deleteColumnIds: string[]; + insertTables: Array<{ + id: string; + connectionId: string; + name: string; + catalog: string | null; + db: string | null; + enabled: boolean; + }>; + insertColumns: Array<{ + id: string; + tableId: string; + name: string; + parentColumnId: string | null; + }>; + touchColumnIds: string[]; + invalidateColumnEmbeddingIds: string[]; +} + +export interface LiveDatabaseStructuralSyncPlan { + schema: LiveDatabaseSyncedSchema; + inferredLinksToValidate: string[]; + stats: LiveDatabaseStructuralSyncStats; + changes: LiveDatabaseStructuralChanges; + operations: LiveDatabaseStructuralSyncOperations; +} + +export interface PlanLiveDatabaseStructuralSyncInput { + connectionId: string; + current: LiveDatabaseSyncedSchema | null; + extracted: LiveDatabaseExtractedSchema; + idFactory: () => string; +} + +interface UpdatedTableResult { + table: LiveDatabaseSyncedTable; + columnsCreated: number; + columnsDeleted: number; + columnsModified: number; + newColumnIds: string[]; + columnsWithTypeChange: string[]; + columnsWithDescriptionChange: string[]; + tableDescriptionChanged: boolean; +} + +function updateDescription( + descriptions: Record, + dbComment: string | null | undefined, + changed: boolean, +): Record { + const updated = { ...descriptions }; + if (dbComment) { + updated.db = dbComment; + } else { + delete updated.db; + } + if (changed) { + delete updated.ai; + } + return updated; +} + +function descriptionFromDbComment(dbComment: string | null | undefined): Record { + return dbComment ? { db: dbComment } : {}; +} + +function planUpdatedTable(args: { + currentTable: LiveDatabaseSyncedTable; + extractedTable: LiveDatabaseExtractedTable; + currentLinks: LiveDatabaseSyncedLink[]; + inferredLinksToValidate: string[]; + operations: LiveDatabaseStructuralSyncOperations; + idFactory: () => string; +}): UpdatedTableResult { + const { currentTable, extractedTable, currentLinks, inferredLinksToValidate, operations, idFactory } = args; + + let columnsCreated = 0; + let columnsDeleted = 0; + let columnsModified = 0; + const newColumnIds: string[] = []; + const columnsWithTypeChange: string[] = []; + const columnsWithDescriptionChange: string[] = []; + const updatedColumns: LiveDatabaseSyncedColumn[] = []; + + const tableDescriptionChanged = (currentTable.descriptions.db ?? null) !== (extractedTable.dbComment ?? null); + const currentColumnsByName = new Map(currentTable.columns.map((column) => [column.name, column])); + const extractedColumnsByName = new Map(extractedTable.columns.map((column) => [column.name, column])); + + for (const [name, currentColumn] of currentColumnsByName) { + if (!extractedColumnsByName.has(name)) { + operations.deleteColumnIds.push(currentColumn.id); + columnsDeleted++; + } + } + + for (const [name, extractedColumn] of extractedColumnsByName) { + const currentColumn = currentColumnsByName.get(name); + if (!currentColumn) { + const columnId = idFactory(); + operations.insertColumns.push({ + id: columnId, + tableId: currentTable.id, + name: extractedColumn.name, + parentColumnId: null, + }); + columnsCreated++; + newColumnIds.push(columnId); + updatedColumns.push({ + id: columnId, + name: extractedColumn.name, + type: extractedColumn.type, + nullable: extractedColumn.nullable, + primaryKey: extractedColumn.primaryKey, + descriptions: descriptionFromDbComment(extractedColumn.dbComment), + parentColumnId: null, + embedding: null, + sampleValues: null, + cardinality: null, + }); + continue; + } + + const typeChanged = currentColumn.type !== extractedColumn.type; + const nullableChanged = currentColumn.nullable !== extractedColumn.nullable; + const primaryKeyChanged = currentColumn.primaryKey !== extractedColumn.primaryKey; + const dbDescriptionChanged = (currentColumn.descriptions.db ?? null) !== (extractedColumn.dbComment ?? null); + + if (typeChanged || nullableChanged || primaryKeyChanged || dbDescriptionChanged) { + operations.touchColumnIds.push(currentColumn.id); + columnsModified++; + + if (typeChanged || dbDescriptionChanged) { + operations.invalidateColumnEmbeddingIds.push(currentColumn.id); + } + + if (typeChanged) { + columnsWithTypeChange.push(currentColumn.id); + const affectedLinks = currentLinks.filter( + (link) => + link.source === 'inferred' && + (link.fromColumnId === currentColumn.id || link.toColumnId === currentColumn.id), + ); + for (const link of affectedLinks) { + if (!inferredLinksToValidate.includes(link.id)) { + inferredLinksToValidate.push(link.id); + } + } + } + + if (dbDescriptionChanged) { + columnsWithDescriptionChange.push(currentColumn.id); + } + } + + updatedColumns.push({ + ...currentColumn, + type: extractedColumn.type, + nullable: extractedColumn.nullable, + primaryKey: extractedColumn.primaryKey, + descriptions: updateDescription(currentColumn.descriptions, extractedColumn.dbComment, dbDescriptionChanged), + embedding: typeChanged ? null : currentColumn.embedding, + }); + } + + return { + table: { + ...currentTable, + descriptions: updateDescription(currentTable.descriptions, extractedTable.dbComment, tableDescriptionChanged), + columns: updatedColumns, + }, + columnsCreated, + columnsDeleted, + columnsModified, + newColumnIds, + columnsWithTypeChange, + columnsWithDescriptionChange, + tableDescriptionChanged, + }; +} + +function planCreatedTable(args: { + connectionId: string; + extractedTable: LiveDatabaseExtractedTable; + operations: LiveDatabaseStructuralSyncOperations; + idFactory: () => string; +}): LiveDatabaseSyncedTable { + const { connectionId, extractedTable, operations, idFactory } = args; + const tableId = idFactory(); + operations.insertTables.push({ + id: tableId, + connectionId, + name: extractedTable.name, + catalog: extractedTable.catalog, + db: extractedTable.db, + enabled: true, + }); + + const columns: LiveDatabaseSyncedColumn[] = extractedTable.columns.map((extractedColumn) => { + const columnId = idFactory(); + operations.insertColumns.push({ + id: columnId, + tableId, + name: extractedColumn.name, + parentColumnId: null, + }); + return { + id: columnId, + name: extractedColumn.name, + type: extractedColumn.type, + nullable: extractedColumn.nullable, + primaryKey: extractedColumn.primaryKey, + descriptions: descriptionFromDbComment(extractedColumn.dbComment), + parentColumnId: null, + embedding: null, + sampleValues: null, + cardinality: null, + }; + }); + + return { + id: tableId, + name: extractedTable.name, + catalog: extractedTable.catalog, + db: extractedTable.db, + enabled: true, + descriptions: descriptionFromDbComment(extractedTable.dbComment), + columns, + }; +} + +function syncFormalLinks(args: { + extracted: LiveDatabaseExtractedSchema; + tables: LiveDatabaseSyncedTable[]; + tableNaturalKeyToId: Map; + currentLinks: LiveDatabaseSyncedLink[]; + idFactory: () => string; +}): { links: LiveDatabaseSyncedLink[]; created: number; deleted: number } { + const { extracted, tables, tableNaturalKeyToId, currentLinks, idFactory } = args; + const columnKeyToId = new Map(); + + for (const table of tables) { + const tableKey = buildLiveDatabaseTableNaturalKey(table); + for (const column of table.columns) { + columnKeyToId.set(`${tableKey}.${column.name}`, column.id); + } + } + + const extractedFormalLinks: Array<{ + fromTableId: string; + fromColumnId: string; + toTableId: string; + toColumnId: string; + }> = []; + + for (const table of extracted.tables) { + const fromTableKey = buildLiveDatabaseTableNaturalKey(table); + const fromTableId = tableNaturalKeyToId.get(fromTableKey); + if (!fromTableId) { + continue; + } + + for (const foreignKey of table.foreignKeys) { + const toTableKey = buildLiveDatabaseTableNaturalKey({ + catalog: table.catalog, + db: table.db, + name: foreignKey.toTable, + }); + const toTableId = tableNaturalKeyToId.get(toTableKey); + if (!toTableId) { + continue; + } + + const fromColumnId = columnKeyToId.get(`${fromTableKey}.${foreignKey.fromColumn}`); + const toColumnId = columnKeyToId.get(`${toTableKey}.${foreignKey.toColumn}`); + if (!fromColumnId || !toColumnId) { + continue; + } + + extractedFormalLinks.push({ fromTableId, fromColumnId, toTableId, toColumnId }); + } + } + + const currentFormalLinks = currentLinks.filter((link) => link.source === 'formal'); + const extractedLinkKeys = new Set(extractedFormalLinks.map((link) => `${link.fromColumnId}->${link.toColumnId}`)); + const linksToDelete = currentFormalLinks.filter( + (link) => !extractedLinkKeys.has(`${link.fromColumnId}->${link.toColumnId}`), + ); + + const currentLinkKeys = new Set(currentFormalLinks.map((link) => `${link.fromColumnId}->${link.toColumnId}`)); + const linksToCreate = extractedFormalLinks.filter( + (link) => !currentLinkKeys.has(`${link.fromColumnId}->${link.toColumnId}`), + ); + + const newLinks = linksToCreate.map((linkData) => ({ + id: idFactory(), + fromTableId: linkData.fromTableId, + fromColumnId: linkData.fromColumnId, + toTableId: linkData.toTableId, + toColumnId: linkData.toColumnId, + source: 'formal' as const, + confidence: 1, + relationshipType: 'MANY_TO_ONE', + isPrimaryKeyReference: true, + })); + + const deletedLinkIds = new Set(linksToDelete.map((link) => link.id)); + const preservedFormalLinks = currentFormalLinks.filter((link) => !deletedLinkIds.has(link.id)); + + return { + links: [...preservedFormalLinks, ...newLinks], + created: linksToCreate.length, + deleted: linksToDelete.length, + }; +} + +export function planLiveDatabaseStructuralSync( + input: PlanLiveDatabaseStructuralSyncInput, +): LiveDatabaseStructuralSyncPlan { + const operations: LiveDatabaseStructuralSyncOperations = { + deleteTableIds: [], + deleteColumnIds: [], + insertTables: [], + insertColumns: [], + touchColumnIds: [], + invalidateColumnEmbeddingIds: [], + }; + const stats: LiveDatabaseStructuralSyncStats = { + tablesCreated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsDeleted: 0, + columnsModified: 0, + formalLinksCreated: 0, + formalLinksDeleted: 0, + }; + const changes: LiveDatabaseStructuralChanges = { + newTableIds: [], + newColumnIds: [], + tablesWithStructuralChanges: [], + columnsWithTypeChange: [], + columnsWithDescriptionChange: [], + tablesWithDescriptionChange: [], + }; + const inferredLinksToValidate: string[] = []; + + const currentTablesByKey = new Map(); + const extractedTablesByKey = new Map(); + + if (input.current) { + for (const table of input.current.tables) { + currentTablesByKey.set(buildLiveDatabaseTableNaturalKey(table), table); + } + } + for (const table of input.extracted.tables) { + extractedTablesByKey.set(buildLiveDatabaseTableNaturalKey(table), table); + } + + const tablesToDelete: LiveDatabaseSyncedTable[] = []; + const tablesToUpdate: Array<{ + current: LiveDatabaseSyncedTable; + extracted: LiveDatabaseExtractedTable; + }> = []; + const tablesToCreate: LiveDatabaseExtractedTable[] = []; + + for (const [key, table] of currentTablesByKey) { + const extractedTable = extractedTablesByKey.get(key); + if (!extractedTable) { + tablesToDelete.push(table); + } else { + tablesToUpdate.push({ current: table, extracted: extractedTable }); + } + } + + for (const [key, table] of extractedTablesByKey) { + if (!currentTablesByKey.has(key)) { + tablesToCreate.push(table); + } + } + + for (const table of tablesToDelete) { + operations.deleteTableIds.push(table.id); + stats.tablesDeleted++; + stats.columnsDeleted += table.columns.length; + } + + const updatedTables: LiveDatabaseSyncedTable[] = []; + for (const { current, extracted } of tablesToUpdate) { + const result = planUpdatedTable({ + currentTable: current, + extractedTable: extracted, + currentLinks: input.current?.links ?? [], + inferredLinksToValidate, + operations, + idFactory: input.idFactory, + }); + updatedTables.push(result.table); + stats.columnsCreated += result.columnsCreated; + stats.columnsDeleted += result.columnsDeleted; + stats.columnsModified += result.columnsModified; + changes.newColumnIds.push(...result.newColumnIds); + changes.columnsWithTypeChange.push(...result.columnsWithTypeChange); + changes.columnsWithDescriptionChange.push(...result.columnsWithDescriptionChange); + if (result.tableDescriptionChanged) { + changes.tablesWithDescriptionChange.push(current.id); + } + if (result.columnsCreated > 0 || result.columnsDeleted > 0 || result.columnsWithTypeChange.length > 0) { + changes.tablesWithStructuralChanges.push(current.id); + } + } + + const createdTables: LiveDatabaseSyncedTable[] = []; + for (const extractedTable of tablesToCreate) { + const table = planCreatedTable({ + connectionId: input.connectionId, + extractedTable, + operations, + idFactory: input.idFactory, + }); + createdTables.push(table); + stats.tablesCreated++; + stats.columnsCreated += table.columns.length; + changes.newTableIds.push(table.id); + changes.newColumnIds.push(...table.columns.map((column) => column.id)); + changes.tablesWithStructuralChanges.push(table.id); + } + + const allTables = [...updatedTables, ...createdTables]; + const tableNaturalKeyToId = new Map(); + for (const table of allTables) { + tableNaturalKeyToId.set(buildLiveDatabaseTableNaturalKey(table), table.id); + } + + const formalLinkResult = syncFormalLinks({ + extracted: input.extracted, + tables: allTables, + tableNaturalKeyToId, + currentLinks: input.current?.links ?? [], + idFactory: input.idFactory, + }); + stats.formalLinksCreated = formalLinkResult.created; + stats.formalLinksDeleted = formalLinkResult.deleted; + + const deletedTableIds = new Set(tablesToDelete.map((table) => table.id)); + const preservedInferredLinks = (input.current?.links ?? []).filter( + (link) => + link.source === 'inferred' && !deletedTableIds.has(link.fromTableId) && !deletedTableIds.has(link.toTableId), + ); + + return { + schema: { + connectionId: input.connectionId, + tables: allTables, + links: [...formalLinkResult.links, ...preservedInferredLinks], + }, + inferredLinksToValidate, + stats, + changes, + operations, + }; +} diff --git a/packages/context/src/ingest/adapters/live-database/types.ts b/packages/context/src/ingest/adapters/live-database/types.ts new file mode 100644 index 00000000..6ce0d150 --- /dev/null +++ b/packages/context/src/ingest/adapters/live-database/types.ts @@ -0,0 +1,10 @@ +import type { KloSchemaSnapshot } from '../../../scan/types.js'; + +export interface LiveDatabaseIntrospectionPort { + extractSchema(connectionId: string): Promise; +} + +export interface LiveDatabaseSourceAdapterDeps { + introspection: LiveDatabaseIntrospectionPort; + now?: () => Date; +} diff --git a/packages/context/src/ingest/adapters/looker/chunk.test.ts b/packages/context/src/ingest/adapters/looker/chunk.test.ts new file mode 100644 index 00000000..9d41d37a --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/chunk.test.ts @@ -0,0 +1,154 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { chunkLookerStagedDir } from './chunk.js'; +import { writeLookerEvidenceDocuments } from './evidence-documents.js'; + +async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise { + const abs = join(stagedDir, relPath); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +async function writeSmallFixture(stagedDir: string): Promise { + await writeJson(stagedDir, 'sync-config.json', { + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + fetchedAt: '2026-04-30T12:30:00.000Z', + }); + await writeJson(stagedDir, 'lookml_models.json', { + models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }], + }); + await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', { + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] }, + joins: [], + }); + await writeJson(stagedDir, 'dashboards/10.json', { + lookerId: '10', + title: 'Sales Pipeline', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T12:00:00.000Z', + tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }], + }); + await writeJson(stagedDir, 'looks/20.json', { + lookerId: '20', + title: 'Open Pipeline', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T12:00:00.000Z', + query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] }, + }); + await writeJson(stagedDir, 'folders/tree.json', { + folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }], + }); + await writeJson(stagedDir, 'users/3.json', { id: '3', displayName: 'Ada Lovelace', email: null }); + await writeJson(stagedDir, 'signals/dashboard_usage.json', [ + { contentId: '10', queryCount30d: 50, uniqueUsers30d: 8 }, + ]); + await writeJson(stagedDir, 'signals/look_usage.json', [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5 }]); + await writeJson(stagedDir, 'signals/scheduled_plans.json', [ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 }, + ]); + await writeJson(stagedDir, 'signals/favorites.json', [ + { contentId: '10', contentType: 'dashboard', favoriteCount: 4 }, + ]); + await writeLookerEvidenceDocuments(stagedDir); +} + +describe('chunkLookerStagedDir', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-chunk-')); + await writeSmallFixture(stagedDir); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('emits one WU per explore, dashboard, and Look with readable dependencies', async () => { + const result = await chunkLookerStagedDir(stagedDir); + expect(result.reconcileNotes).toEqual([ + expect.stringContaining('emit_artifact_resolution with actionType="subsumed"'), + ]); + expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([ + 'looker-dashboard-10', + 'looker-explore-b2b-sales_pipeline', + 'looker-look-20', + ]); + + const dashboard = result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10'); + expect(dashboard?.rawFiles).toEqual([ + 'dashboards/10.json', + 'evidence/dashboards/10/metadata.json', + 'evidence/dashboards/10/page.md', + ]); + expect(dashboard?.notes).toContain('context_candidate_write'); + expect(dashboard?.notes).not.toContain('wiki_write'); + expect(dashboard?.dependencyPaths.sort()).toEqual([ + 'explores/b2b/sales_pipeline.json', + 'folders/tree.json', + 'signals/dashboard_usage.json', + 'signals/favorites.json', + 'signals/scheduled_plans.json', + 'users/3.json', + ]); + + const explore = result.workUnits.find((wu) => wu.unitKey === 'looker-explore-b2b-sales_pipeline'); + expect(explore?.rawFiles).toEqual([ + 'explores/b2b/sales_pipeline.json', + 'evidence/explores/b2b/sales_pipeline/metadata.json', + 'evidence/explores/b2b/sales_pipeline/page.md', + ]); + expect(explore?.dependencyPaths).toEqual(['lookml_models.json']); + }); + + it('keeps downstream dashboard and Look WUs when an explore dependency changes', async () => { + const result = await chunkLookerStagedDir(stagedDir, { + added: [], + modified: ['explores/b2b/sales_pipeline.json'], + deleted: [], + unchanged: [ + 'dashboards/10.json', + 'looks/20.json', + 'lookml_models.json', + 'folders/tree.json', + 'users/3.json', + 'signals/dashboard_usage.json', + 'signals/look_usage.json', + 'signals/scheduled_plans.json', + 'signals/favorites.json', + ], + }); + + expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([ + 'looker-dashboard-10', + 'looker-explore-b2b-sales_pipeline', + 'looker-look-20', + ]); + expect(result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10')?.rawFiles).toEqual([ + 'dashboards/10.json', + 'evidence/dashboards/10/metadata.json', + 'evidence/dashboards/10/page.md', + ]); + }); + + it('returns an EvictionUnit for deleted runtime entity raw paths', async () => { + const result = await chunkLookerStagedDir(stagedDir, { + added: [], + modified: [], + deleted: ['looks/20.json'], + unchanged: ['dashboards/10.json', 'explores/b2b/sales_pipeline.json'], + }); + + expect(result.eviction).toEqual({ deletedRawPaths: ['looks/20.json'] }); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/chunk.ts b/packages/context/src/ingest/adapters/looker/chunk.ts new file mode 100644 index 00000000..22199736 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/chunk.ts @@ -0,0 +1,198 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js'; +import { buildLookerReconcileNotes } from './reconcile.js'; +import { + STAGED_FILES, + type StagedDashboardFile, + type StagedLookerQuery, + type StagedLookFile, + stagedDashboardFileSchema, + stagedExploreFileSchema, + stagedLookFileSchema, +} from './types.js'; + +interface LoadedLookerProject { + allPaths: string[]; + dashboardsByPath: Map; + looksByPath: Map; + explorePaths: string[]; +} + +async function walk(root: string): Promise { + const entries = await readdir(root, { withFileTypes: true, recursive: true }); + return entries + .filter((entry) => entry.isFile()) + .map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/')) + .sort(); +} + +async function loadProject(stagedDir: string): Promise { + const allPaths = await walk(stagedDir); + const dashboardsByPath = new Map(); + const looksByPath = new Map(); + const explorePaths: string[] = []; + + for (const path of allPaths) { + if (/^dashboards\/[^/]+\.json$/.test(path)) { + dashboardsByPath.set( + path, + stagedDashboardFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8'))), + ); + continue; + } + if (/^looks\/[^/]+\.json$/.test(path)) { + looksByPath.set(path, stagedLookFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8')))); + continue; + } + if (/^explores\/[^/]+\/[^/]+\.json$/.test(path)) { + const explore = stagedExploreFileSchema.parse(JSON.parse(await readFile(join(stagedDir, path), 'utf-8'))); + explorePaths.push(explorePath(explore.modelName, explore.exploreName)); + } + } + + return { allPaths, dashboardsByPath, looksByPath, explorePaths: [...new Set(explorePaths)].sort() }; +} + +export async function chunkLookerStagedDir(stagedDir: string, diffSet?: DiffSet): Promise { + const project = await loadProject(stagedDir); + const firstRunUnits = emitFirstRunWorkUnits(project); + const result = diffSet ? applyDiffSet(firstRunUnits, diffSet) : { workUnits: firstRunUnits }; + const eviction = + diffSet && diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : result.eviction; + return { + ...result, + eviction, + reconcileNotes: result.workUnits.length > 0 || eviction ? buildLookerReconcileNotes() : [], + }; +} + +function emitFirstRunWorkUnits(project: LoadedLookerProject): WorkUnit[] { + const units: WorkUnit[] = []; + + for (const path of project.explorePaths) { + const parts = /^explores\/([^/]+)\/([^/]+)\.json$/.exec(path); + if (!parts) { + continue; + } + const deps = project.allPaths.includes(STAGED_FILES.lookmlModels) ? [STAGED_FILES.lookmlModels] : []; + units.push( + buildUnit(project, { + unitKey: `looker-explore-${parts[1]}-${parts[2]}`, + displayLabel: `Looker explore ${parts[1]}.${parts[2]}`, + rawFiles: [path, ...evidencePathsForExplore(project, parts[1], parts[2])], + dependencyPaths: deps, + notes: `Write API-derived SL source looker__${parts[1]}__${parts[2]} and durable domain knowledge for this Looker explore.`, + }), + ); + } + + for (const [path, dashboard] of [...project.dashboardsByPath.entries()].sort(([a], [b]) => a.localeCompare(b))) { + const deps = new Set(); + addIfPresent(project, deps, STAGED_FILES.foldersTree); + addIfPresent(project, deps, STAGED_FILES.signals.dashboardUsage); + addIfPresent(project, deps, STAGED_FILES.signals.scheduledPlans); + addIfPresent(project, deps, STAGED_FILES.signals.favorites); + if (dashboard.ownerId) { + addIfPresent(project, deps, `users/${dashboard.ownerId}.json`); + } + for (const tile of dashboard.tiles) { + addExploreDependency(project, deps, tile.query); + } + + units.push( + buildUnit(project, { + unitKey: `looker-dashboard-${dashboard.lookerId}`, + displayLabel: `Looker dashboard "${dashboard.title}"`, + rawFiles: [path, ...evidencePathsForDashboard(project, dashboard.lookerId)], + dependencyPaths: [...deps].sort(), + notes: + 'Extract generalizable metric, segment, and domain knowledge from this dashboard. Treat usage, owner, and folder data as prioritization/provenance context only. Use context_evidence_search/context_evidence_read and context_candidate_write for wiki-bound knowledge; do not write wiki pages directly from this WorkUnit.', + }), + ); + } + + for (const [path, look] of [...project.looksByPath.entries()].sort(([a], [b]) => a.localeCompare(b))) { + const deps = new Set(); + addIfPresent(project, deps, STAGED_FILES.foldersTree); + addIfPresent(project, deps, STAGED_FILES.signals.lookUsage); + addIfPresent(project, deps, STAGED_FILES.signals.scheduledPlans); + addIfPresent(project, deps, STAGED_FILES.signals.favorites); + if (look.ownerId) { + addIfPresent(project, deps, `users/${look.ownerId}.json`); + } + addExploreDependency(project, deps, look.query); + + units.push( + buildUnit(project, { + unitKey: `looker-look-${look.lookerId}`, + displayLabel: `Looker Look "${look.title}"`, + rawFiles: [path, ...evidencePathsForLook(project, look.lookerId)], + dependencyPaths: [...deps].sort(), + notes: + 'Extract generalizable metric, segment, and domain knowledge from this Look. Treat usage, owner, and folder data as prioritization/provenance context only. Use context_evidence_search/context_evidence_read and context_candidate_write for wiki-bound knowledge; do not write wiki pages directly from this WorkUnit.', + }), + ); + } + + return units.sort((a, b) => a.unitKey.localeCompare(b.unitKey)); +} + +function buildUnit( + project: LoadedLookerProject, + input: Pick, +): WorkUnit { + const excluded = new Set([...input.rawFiles, ...input.dependencyPaths]); + return { + ...input, + peerFileIndex: project.allPaths.filter((path) => !excluded.has(path)).sort(), + }; +} + +function applyDiffSet(firstRunUnits: WorkUnit[], diffSet: DiffSet): ChunkResult { + const touched = new Set([...diffSet.added, ...diffSet.modified]); + const workUnits = firstRunUnits.filter((wu) => { + const readablePaths = [...wu.rawFiles, ...wu.dependencyPaths]; + return readablePaths.some((path) => touched.has(path)); + }); + return { workUnits }; +} + +function addIfPresent(project: LoadedLookerProject, deps: Set, path: string): void { + if (project.allPaths.includes(path)) { + deps.add(path); + } +} + +function addExploreDependency(project: LoadedLookerProject, deps: Set, query: StagedLookerQuery | null): void { + if (!query) { + return; + } + addIfPresent(project, deps, explorePath(query.model, query.view)); +} + +function evidencePathsForExplore(project: LoadedLookerProject, modelName: string, exploreName: string): string[] { + return existingPaths(project, [ + `evidence/explores/${modelName}/${exploreName}/metadata.json`, + `evidence/explores/${modelName}/${exploreName}/page.md`, + ]); +} + +function evidencePathsForDashboard(project: LoadedLookerProject, dashboardId: string): string[] { + return existingPaths(project, [ + `evidence/dashboards/${dashboardId}/metadata.json`, + `evidence/dashboards/${dashboardId}/page.md`, + ]); +} + +function evidencePathsForLook(project: LoadedLookerProject, lookId: string): string[] { + return existingPaths(project, [`evidence/looks/${lookId}/metadata.json`, `evidence/looks/${lookId}/page.md`]); +} + +function existingPaths(project: LoadedLookerProject, paths: string[]): string[] { + return paths.filter((path) => project.allPaths.includes(path)); +} + +function explorePath(modelName: string, exploreName: string): string { + return `explores/${modelName}/${exploreName}.json`; +} diff --git a/packages/context/src/ingest/adapters/looker/client-boundary.test.ts b/packages/context/src/ingest/adapters/looker/client-boundary.test.ts new file mode 100644 index 00000000..9172e23f --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/client-boundary.test.ts @@ -0,0 +1,14 @@ +import { readFile } from 'node:fs/promises'; +import { describe, expect, it } from 'vitest'; + +describe('LookerClient boundary', () => { + it('does not import server or NestJS modules', async () => { + const source = await readFile(new URL('./client.ts', import.meta.url), 'utf-8'); + + expect(source).not.toMatch(/@nestjs\/common/); + expect(source).not.toMatch(/DataSourceClient/); + expect(source).not.toMatch(/\.\.\/interfaces/); + expect(source).not.toMatch(/\.\.\/types/); + expect(source).not.toMatch(/server\/src/); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/client.test.ts b/packages/context/src/ingest/adapters/looker/client.test.ts new file mode 100644 index 00000000..a7d4e604 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/client.test.ts @@ -0,0 +1,455 @@ +import { describe, expect, it, vi } from 'vitest'; +import { LookerClient, type LookerSdkPort } from './client.js'; + +const clientSecretParam = 'client_secret'; // pragma: allowlist secret + +function params(): Record { + return { + base_url: 'https://example.looker.com', + client_id: 'id', + [clientSecretParam]: 'credential', // pragma: allowlist secret + }; +} + +function sdk(overrides: Partial = {}): LookerSdkPort { + const port: LookerSdkPort = { + me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }), + search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]), + dashboard: vi.fn().mockResolvedValue({ + id: '10', + title: 'Revenue Dashboard', + description: 'Revenue concepts', + folder_id: '20', + user_id: '1', + updated_at: '2026-04-30T00:00:00.000Z', + dashboard_elements: [ + { + id: '99', + title: 'ARR', + look_id: null, + query: { + id: 'q1', + model: 'b2b', + view: 'sales_pipeline', + fields: ['opportunities.arr', 'opportunities.stage'], + filters: { 'opportunities.stage': 'open' }, + sorts: ['opportunities.arr desc'], + limit: '500', + }, + }, + ], + }), + search_looks: vi.fn().mockResolvedValue([{ id: '30' }]), + search_scheduled_plans: vi.fn().mockResolvedValue([]), + look: vi.fn().mockResolvedValue({ + id: '30', + title: 'Open Pipeline ARR', + description: 'ARR for open opportunities', + folder_id: '20', + user_id: '1', + updated_at: '2026-04-30T00:00:00.000Z', + query: { + id: 'q2', + model: 'b2b', + view: 'sales_pipeline', + fields: ['opportunities.arr'], + filters: { 'opportunities.stage': 'open' }, + }, + }), + all_folders: vi.fn().mockResolvedValue([{ id: '20', name: 'Executive', parent_id: null }]), + all_users: vi.fn().mockResolvedValue([{ id: '1', display_name: 'API User', email: 'api@example.com' }]), + all_groups: vi.fn().mockResolvedValue([{ id: '2', name: 'Finance' }]), + all_connections: vi.fn().mockResolvedValue([ + { + name: 'b2b_sandbox_bq', + host: 'warehouse.example.com', + database: 'analytics', + schema: 'public', + dialect_name: 'bigquery_standard_sql', + }, + ]), + all_lookml_models: vi + .fn() + .mockResolvedValue([ + { name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }, + ]), + lookml_model_explore: vi.fn().mockResolvedValue({ + name: 'sales_pipeline', + label: 'Sales Pipeline', + description: 'Opportunity pipeline', + sql_table_name: 'proj.dataset.opportunities AS opportunities', + connection_name: 'b2b_sandbox_bq', + view_name: 'opportunities', + fields: { + dimensions: [{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '$' + '{TABLE}.stage' }], + measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '$' + '{TABLE}.arr' }], + }, + joins: [ + { + name: 'accounts', + type: 'left_outer', + relationship: 'many_to_one', + sql_table_name: 'proj.dataset.accounts', + sql_on: '$' + '{opportunities.account_id} = $' + '{accounts.id}', + from: null, + }, + ], + }), + run_inline_query: vi.fn().mockResolvedValue('[]'), + logout: vi.fn().mockResolvedValue(undefined), + ...overrides, + }; + return port; +} + +describe('LookerClient', () => { + it('validates credentials with me()', async () => { + const client = new LookerClient(params(), { sdkFactory: () => sdk() }); + + await expect(client.testConnection()).resolves.toEqual({ + success: true, + metadata: { userId: '1', displayName: 'API User', email: 'api@example.com' }, + }); + }); + + it('maps dashboards, looks, folders, models, explores, users, and groups to staged DTOs', async () => { + const fakeSdk = sdk(); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk }); + + await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]); + await expect(client.getDashboard('10')).resolves.toMatchObject({ + lookerId: '10', + title: 'Revenue Dashboard', + tiles: [{ id: '99', query: { model: 'b2b', view: 'sales_pipeline' } }], + }); + await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: null }]); + await expect(client.getLook('30')).resolves.toMatchObject({ + lookerId: '30', + title: 'Open Pipeline ARR', + query: { model: 'b2b', view: 'sales_pipeline' }, + }); + await expect(client.listFolders()).resolves.toEqual({ + folders: [{ id: '20', name: 'Executive', parentId: null, path: ['Executive'] }], + }); + await expect(client.listLookmlModels()).resolves.toEqual({ + models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }], + }); + await expect(client.listLookerConnections()).resolves.toEqual([ + { + name: 'b2b_sandbox_bq', + host: 'warehouse.example.com', + database: 'analytics', + schema: 'public', + dialect: 'bigquery_standard_sql', + }, + ]); + await expect(client.getExplore('b2b', 'sales_pipeline')).resolves.toMatchObject({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + rawSqlTableName: 'proj.dataset.opportunities AS opportunities', + connectionName: 'b2b_sandbox_bq', + viewName: 'opportunities', + fields: { dimensions: [{ name: 'opportunities.stage' }], measures: [{ name: 'opportunities.arr' }] }, + joins: [ + { + name: 'accounts', + rawSqlTableName: 'proj.dataset.accounts', + sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}', + from: null, + targetTable: null, + }, + ], + targetWarehouseConnectionId: null, + targetTable: null, + }); + expect(fakeSdk.dashboard).toHaveBeenCalledWith( + '10', + 'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))', + ); + expect(fakeSdk.look).toHaveBeenCalledWith( + '30', + 'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)', + ); + expect(fakeSdk.lookml_model_explore).toHaveBeenCalledWith( + 'b2b', + 'sales_pipeline', + 'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)', + ); + expect(fakeSdk.all_connections).toHaveBeenCalledWith('name,host,database,schema,dialect_name'); + }); + + it('returns empty usage signals when system activity access fails', async () => { + const client = new LookerClient(params(), { + sdkFactory: () => + sdk({ + run_inline_query: vi.fn().mockRejectedValue(new Error('access denied')), + search_dashboards: vi.fn().mockResolvedValue([{ id: '10', favorite_count: 4 }]), + search_looks: vi.fn().mockResolvedValue([{ id: '30', favorite_count: 2 }]), + search_scheduled_plans: vi.fn().mockResolvedValue([]), + }), + }); + + await expect(client.getSignals()).resolves.toEqual({ + dashboardUsage: [], + lookUsage: [], + scheduledPlans: [], + favorites: [ + { contentId: '10', contentType: 'dashboard', favoriteCount: 4 }, + { contentId: '30', contentType: 'look', favoriteCount: 2 }, + ], + }); + }); + + it('paginates dashboard and Look searches', async () => { + const dashboardPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1) })); + const lookPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1001) })); + const fakeSdk = sdk({ + search_dashboards: vi + .fn() + .mockResolvedValueOnce(dashboardPageOne) + .mockResolvedValueOnce([{ id: '501' }]), + search_looks: vi + .fn() + .mockResolvedValueOnce(lookPageOne) + .mockResolvedValueOnce([{ id: '1501' }]), + }); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk }); + + await expect(client.listDashboards()).resolves.toHaveLength(501); + await expect(client.listLooks()).resolves.toHaveLength(501); + + expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + deleted: false, + fields: 'id,updated_at', + limit: 500, + offset: 0, + sorts: 'id', + }), + ); + expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + limit: 500, + offset: 500, + }), + ); + expect(fakeSdk.search_looks).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + deleted: false, + fields: 'id,updated_at', + limit: 500, + offset: 0, + sorts: 'id', + }), + ); + expect(fakeSdk.search_looks).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + limit: 500, + offset: 500, + }), + ); + }); + + it('returns updatedAt cursors from dashboard and Look listing rows', async () => { + const fakeSdk = sdk({ + search_dashboards: vi.fn().mockResolvedValue([{ id: '10', updated_at: '2026-04-30T12:00:00.000Z' }]), + search_looks: vi.fn().mockResolvedValue([{ id: '30', updated_at: '2026-04-30T11:00:00.000Z' }]), + }); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk }); + + await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' }]); + await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: '2026-04-30T11:00:00.000Z' }]); + }); + + it('logs out the SDK session during cleanup', async () => { + const fakeSdk = sdk(); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk }); + + await client.testConnection(); + await client.cleanup(); + + expect(fakeSdk.logout).toHaveBeenCalledTimes(1); + }); + + it('aggregates usage, scheduled-plan, and favorite signals', async () => { + const runInlineQuery = vi + .fn() + .mockResolvedValueOnce( + JSON.stringify([ + { + 'dashboard.id': '10', + 'history.query_run_count': 3, + 'history.created_date': '2026-04-30', + 'user.id': 'user-1', + }, + { + 'dashboard.id': '10', + 'history.query_run_count': '2', + 'history.created_date': '2026-04-29', + 'user.id': 'user-2', + }, + ]), + ) + .mockResolvedValueOnce( + JSON.stringify([ + { + 'look.id': '30', + 'history.query_run_count': 7, + 'history.created_date': '2026-04-28', + 'user.id': 'user-1', + }, + ]), + ); + const fakeSdk = sdk({ + run_inline_query: runInlineQuery, + search_dashboards: vi.fn().mockResolvedValueOnce([{ id: '10', favorite_count: 4 }]), + search_looks: vi.fn().mockResolvedValueOnce([{ id: '30', favorite_count: 2 }]), + search_scheduled_plans: vi.fn().mockResolvedValueOnce([ + { + id: 'sp-dashboard', + dashboard_id: '10', + look_id: null, + enabled: true, + scheduled_plan_destination: [{ id: 'dest-1' }, { id: 'dest-2' }], + }, + { + id: 'sp-look', + dashboard_id: null, + look_id: '30', + enabled: true, + scheduled_plan_destination: [{ id: 'dest-3' }], + }, + ]), + }); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk }); + + await expect(client.getSignals()).resolves.toEqual({ + dashboardUsage: [ + { + contentId: '10', + queryCount30d: 5, + uniqueUsers30d: 2, + lastRunAt: '2026-04-30', + topUsers: ['user-1', 'user-2'], + }, + ], + lookUsage: [ + { + contentId: '30', + queryCount30d: 7, + uniqueUsers30d: 1, + lastRunAt: '2026-04-28', + topUsers: ['user-1'], + }, + ], + scheduledPlans: [ + { + contentId: '10', + contentType: 'dashboard', + isScheduled: true, + scheduleCount: 1, + recipientCount: 2, + }, + { + contentId: '30', + contentType: 'look', + isScheduled: true, + scheduleCount: 1, + recipientCount: 1, + }, + ], + favorites: [ + { contentId: '10', contentType: 'dashboard', favoriteCount: 4 }, + { contentId: '30', contentType: 'look', favoriteCount: 2 }, + ], + }); + + expect(runInlineQuery).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + result_format: 'json', + body: expect.objectContaining({ + model: 'system__activity', + view: 'history', + fields: ['dashboard.id', 'history.query_run_count', 'history.created_date', 'user.id'], + }), + }), + ); + expect(fakeSdk.search_scheduled_plans).toHaveBeenCalledWith( + expect.objectContaining({ + all_users: true, + fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination', + limit: 500, + offset: 0, + sorts: 'id', + }), + ); + }); + + it('retries a 429 response once using Retry-After seconds', async () => { + const sleep = vi.fn().mockResolvedValue(undefined); + const rateLimitError = new Error('rate limited'); + Object.assign(rateLimitError, { statusCode: 429, headers: { 'retry-after': '2' } }); + const fakeSdk = sdk({ + search_dashboards: vi + .fn() + .mockRejectedValueOnce(rateLimitError) + .mockResolvedValueOnce([{ id: '10' }]), + }); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep }); + + await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]); + + expect(sleep).toHaveBeenCalledWith(2000); + expect(fakeSdk.search_dashboards).toHaveBeenCalledTimes(2); + }); + + it('does not retry non-429 errors', async () => { + const sleep = vi.fn().mockResolvedValue(undefined); + const error = new Error('broken dashboard'); + Object.assign(error, { statusCode: 500 }); + const fakeSdk = sdk({ dashboard: vi.fn().mockRejectedValue(error) }); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep }); + + await expect(client.getDashboard('10')).rejects.toThrow('broken dashboard'); + + expect(sleep).not.toHaveBeenCalled(); + expect(fakeSdk.dashboard).toHaveBeenCalledTimes(1); + }); + + it('initializes the real @looker/sdk-node SDK with inline credentials without throwing', async () => { + const client = new LookerClient(params()); + + const result = await client.testConnection(); + + // Without injected sdkFactory the real SDK is constructed via InlineLookerSettings. + // This used to throw "Missing required configuration values like base_url" because + // the parent NodeSettingsIniFile constructor validated config before the override + // could supply credentials. Whatever happens now (auth/network failure against the + // bogus example URL is fine) — what must NOT happen is a synchronous SDK-init throw. + expect(result.success).toBe(false); + expect(result.error).toBeDefined(); + expect(result.error).not.toMatch(/Missing required configuration values/i); + + await client.cleanup(); + }); + + it('strips trailing /api/4.0 from base_url so the SDK does not double-prefix it', async () => { + const clientWithSuffix = new LookerClient({ + base_url: 'https://example.looker.com/api/4.0', + client_id: 'id', + [clientSecretParam]: 'credential', // pragma: allowlist secret + }); + const result = await clientWithSuffix.testConnection(); + expect(result.success).toBe(false); + // If base_url is double-prefixed the SDK would hit /api/4.0/api/4.0/login. Either + // the URL is correctly normalized (transport-level network failure) or we'd see a + // 404/HTML response — either way the stack must not be a config-validation throw. + expect(result.error).not.toMatch(/Missing required configuration values/i); + await clientWithSuffix.cleanup(); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/client.ts b/packages/context/src/ingest/adapters/looker/client.ts new file mode 100644 index 00000000..50b0b104 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/client.ts @@ -0,0 +1,732 @@ +import type { + IRequestRunInlineQuery, + IRequestSearchDashboards, + IRequestSearchLooks, + IRequestSearchScheduledPlans, +} from '@looker/sdk'; +import type { IApiSection, IApiSettings } from '@looker/sdk-rtl'; +import { LookerNodeSDK, NodeSettings } from '@looker/sdk-node'; +import type { LookerRuntimeClient } from './fetch.js'; +import type { + StagedDashboardFile, + StagedExploreFile, + StagedFoldersTreeFile, + StagedGroupFile, + StagedLookerQuery, + StagedLookerSignalsFile, + StagedLookFile, + StagedLookmlModelsFile, + StagedUserFile, +} from './types.js'; + +type LookerRecord = Record; + +export interface TestConnectionResult { + success: boolean; + error?: string; + metadata?: Record; +} + +export interface LookerConnectionParams extends Record { + base_url: string; + client_id: string; + client_secret: string; +} + +export interface LookerWarehouseConnectionInfo { + name: string; + host: string | null; + database: string | null; + schema: string | null; + dialect: string | null; +} + +const LOOKER_PAGE_SIZE = 500; +const LOOKER_DASHBOARD_FIELDS = + 'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))'; +const LOOKER_LOOK_FIELDS = + 'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)'; +const LOOKER_EXPLORE_FIELDS = + 'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)'; + +export interface LookerSdkPort { + me(fields?: string): Promise; + search_dashboards(request?: LookerRecord): Promise; + dashboard(id: string, fields?: string): Promise; + search_looks(request?: LookerRecord): Promise; + search_scheduled_plans(request?: LookerRecord): Promise; + look(id: string, fields?: string): Promise; + all_folders(fields?: string): Promise; + all_users(fields?: string): Promise; + all_groups(fields?: string): Promise; + all_connections(fields?: string): Promise; + all_lookml_models(fields?: string): Promise; + lookml_model_explore(modelName: string, exploreName: string, fields?: string): Promise; + run_inline_query(request: IRequestRunInlineQuery): Promise; + logout(): Promise; +} + +export interface LookerClientLogger { + log(message: string): void; + warn(message: string): void; + error(message: string): void; + debug?(message: string): void; +} + +export interface LookerClientDeps { + sdkFactory?: (params: LookerConnectionParams) => LookerSdkPort; + sleep?: (ms: number) => Promise; + logger?: LookerClientLogger; +} + +const defaultLogger: LookerClientLogger = { + log: (message) => console.log(message), + warn: (message) => console.warn(message), + error: (message) => console.error(message), + debug: (message) => console.debug(message), +}; + +class InlineLookerSettings extends NodeSettings { + constructor(private readonly params: LookerConnectionParams) { + super('', { + base_url: normalizeBaseUrl(params.base_url), + client_id: params.client_id, + client_secret: params.client_secret, // pragma: allowlist secret + verify_ssl: 'true', + timeout: '120', + } as unknown as IApiSettings); + } + + override readConfig(_section?: string): IApiSection { + return { + base_url: normalizeBaseUrl(this.params.base_url), + client_id: this.params.client_id, + client_secret: this.params.client_secret, // pragma: allowlist secret + verify_ssl: 'true', + timeout: '120', + }; + } +} + +function createLookerSdkPort(params: LookerConnectionParams): LookerSdkPort { + const sdk = LookerNodeSDK.init40(new InlineLookerSettings(params)); + return { + me: (fields) => sdk.ok(sdk.me(fields)).then(toRecord), + search_dashboards: (request) => + sdk.ok(sdk.search_dashboards((request ?? {}) as IRequestSearchDashboards)).then(toRecordArray), + dashboard: (id, fields) => sdk.ok(sdk.dashboard(id, fields)).then(toRecord), + search_looks: (request) => sdk.ok(sdk.search_looks((request ?? {}) as IRequestSearchLooks)).then(toRecordArray), + search_scheduled_plans: (request) => + sdk.ok(sdk.search_scheduled_plans((request ?? {}) as IRequestSearchScheduledPlans)).then(toRecordArray), + look: (id, fields) => sdk.ok(sdk.look(id, fields)).then(toRecord), + all_folders: (fields) => sdk.ok(sdk.all_folders(fields)).then(toRecordArray), + all_users: (fields) => sdk.ok(sdk.all_users({ fields })).then(toRecordArray), + all_groups: (fields) => sdk.ok(sdk.all_groups({ fields })).then(toRecordArray), + all_connections: (fields) => sdk.ok(sdk.all_connections(fields)).then(toRecordArray), + all_lookml_models: (fields) => sdk.ok(sdk.all_lookml_models({ fields })).then(toRecordArray), + lookml_model_explore: (modelName, exploreName, fields) => + sdk + .ok(sdk.lookml_model_explore({ lookml_model_name: modelName, explore_name: exploreName, fields })) + .then(toRecord), + run_inline_query: (request) => sdk.ok(sdk.run_inline_query(request)), + logout: async () => { + await sdk.authSession.logout(); + }, + }; +} + +export class LookerClient implements LookerRuntimeClient { + private readonly logger: LookerClientLogger; + private readonly params: LookerConnectionParams; + private sdkInstance: LookerSdkPort | null = null; + + constructor( + connectionParams: Record, + private readonly deps: LookerClientDeps = {}, + ) { + this.logger = deps.logger ?? defaultLogger; + this.params = parseLookerConnectionParams(connectionParams); + } + + get dataSourceType(): string { + return 'LOOKER'; + } + + async testConnection(): Promise { + try { + const me = await this.withRateLimitRetry(() => this.sdk().me('id,display_name,email')); + return { + success: true, + metadata: { + userId: stringValue(me.id), + displayName: nullableString(me.display_name), + email: nullableString(me.email), + }, + }; + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async listDashboards(): Promise> { + const dashboards = await this.collectPaged((offset) => + this.sdk().search_dashboards({ + deleted: false, + fields: 'id,updated_at', + limit: LOOKER_PAGE_SIZE, + offset, + sorts: 'id', + }), + ); + return dashboards.flatMap(entityRef); + } + + async getDashboard(id: string): Promise { + const dashboard = await this.withRateLimitRetry(() => this.sdk().dashboard(id, LOOKER_DASHBOARD_FIELDS)); + const elements = arrayValue(dashboard.dashboard_elements); + return { + lookerId: stringValue(dashboard.id), + title: stringValue(dashboard.title), + description: nullableString(dashboard.description), + folderId: nullableString(dashboard.folder_id), + ownerId: nullableString(dashboard.user_id), + updatedAt: nullableString(dashboard.updated_at), + tiles: elements.map((tile) => ({ + id: stringValue(tile.id), + title: nullableString(tile.title), + lookId: nullableString(tile.look_id), + query: queryValue(tile.query), + })), + }; + } + + async listLooks(): Promise> { + const looks = await this.collectPaged((offset) => + this.sdk().search_looks({ + deleted: false, + fields: 'id,updated_at', + limit: LOOKER_PAGE_SIZE, + offset, + sorts: 'id', + }), + ); + return looks.flatMap(entityRef); + } + + async getLook(id: string): Promise { + const look = await this.withRateLimitRetry(() => this.sdk().look(id, LOOKER_LOOK_FIELDS)); + return { + lookerId: stringValue(look.id), + title: stringValue(look.title), + description: nullableString(look.description), + folderId: nullableString(look.folder_id), + ownerId: nullableString(look.user_id), + updatedAt: nullableString(look.updated_at), + query: queryValue(look.query), + }; + } + + async listFolders(): Promise { + const folders = await this.withRateLimitRetry(() => this.sdk().all_folders('id,name,parent_id')); + const byId = new Map(); + for (const folder of folders) { + byId.set(stringValue(folder.id), folder); + } + return { + folders: folders.map((folder) => ({ + id: stringValue(folder.id), + name: stringValue(folder.name), + parentId: nullableString(folder.parent_id), + path: folderPath(folder, byId), + })), + }; + } + + async listUsers(): Promise { + const users = await this.withRateLimitRetry(() => this.sdk().all_users('id,display_name,email')); + return users.map((user) => ({ + id: stringValue(user.id), + displayName: nullableString(user.display_name), + email: nullableString(user.email), + })); + } + + async listGroups(): Promise { + const groups = await this.withRateLimitRetry(() => this.sdk().all_groups('id,name')); + return groups.map((group) => ({ + id: stringValue(group.id), + name: stringValue(group.name), + })); + } + + async listLookmlModels(): Promise { + const models = await this.withRateLimitRetry(() => this.sdk().all_lookml_models('name,label,explores')); + return { + models: models.map((model) => ({ + name: stringValue(model.name), + label: nullableString(model.label), + explores: arrayValue(model.explores).map((explore) => ({ + name: stringValue(explore.name), + label: nullableString(explore.label), + })), + })), + }; + } + + async listLookerConnections(): Promise { + const connections = await this.withRateLimitRetry(() => + this.sdk().all_connections('name,host,database,schema,dialect_name'), + ); + return connections.map((connection) => ({ + name: stringValue(connection.name), + host: nullableString(connection.host), + database: nullableString(connection.database), + schema: nullableString(connection.schema), + dialect: nullableString(connection.dialect_name ?? connection.dialect), + })); + } + + async getExplore(modelName: string, exploreName: string): Promise { + const explore = await this.withRateLimitRetry(() => + this.sdk().lookml_model_explore(modelName, exploreName, LOOKER_EXPLORE_FIELDS), + ); + const fields = recordValue(explore.fields); + return { + modelName, + exploreName: stringValue(explore.name), + label: nullableString(explore.label), + description: nullableString(explore.description), + rawSqlTableName: nullableString(explore.sql_table_name ?? explore.sqlTableName), + connectionName: nullableString(explore.connection_name ?? explore.connectionName), + viewName: nullableString(explore.view_name ?? explore.viewName), + fields: { + dimensions: arrayValue(fields.dimensions).map(stagedField), + measures: arrayValue(fields.measures).map(stagedField), + }, + joins: arrayValue(explore.joins).map((join) => ({ + name: stringValue(join.name), + type: nullableString(join.type), + relationship: nullableString(join.relationship), + rawSqlTableName: nullableString(join.sql_table_name ?? join.sqlTableName), + sqlOn: nullableString(join.sql_on ?? join.sqlOn), + from: nullableString(join.from), + targetTable: null, + })), + targetWarehouseConnectionId: null, + targetTable: null, + }; + } + + async getSignals(): Promise { + const [dashboardUsage, lookUsage, scheduledPlans, favorites] = await Promise.all([ + this.getUsageSignals('dashboard').catch((error) => + this.warnAndReturnEmpty('Looker system__activity dashboard usage unavailable', error), + ), + this.getUsageSignals('look').catch((error) => + this.warnAndReturnEmpty('Looker system__activity Look usage unavailable', error), + ), + this.getScheduledPlanSignals().catch((error) => + this.warnAndReturnEmpty('Looker scheduled-plan signals unavailable', error), + ), + this.getFavoriteSignals().catch((error) => this.warnAndReturnEmpty('Looker favorite signals unavailable', error)), + ]); + + return { dashboardUsage, lookUsage, scheduledPlans, favorites }; + } + + async cleanup(): Promise { + const sdk = this.sdkInstance; + if (!sdk) { + return; + } + await sdk.logout(); + this.sdkInstance = null; + } + + private async getUsageSignals(contentType: 'dashboard' | 'look'): Promise { + const idField = contentType === 'dashboard' ? 'dashboard.id' : 'look.id'; + const raw = await this.withRateLimitRetry(() => + this.sdk().run_inline_query({ + result_format: 'json', + body: { + model: 'system__activity', + view: 'history', + fields: [idField, 'history.query_run_count', 'history.created_date', 'user.id'], + filters: { + 'history.created_date': '30 days', + [idField]: '-NULL', + }, + sorts: ['history.query_run_count desc'], + limit: '5000', + }, + }), + ); + + return aggregateUsageRows(parseJsonRows(raw), idField); + } + + private async getScheduledPlanSignals(): Promise { + const plans = await this.collectPaged((offset) => + this.sdk().search_scheduled_plans({ + all_users: true, + fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination', + limit: LOOKER_PAGE_SIZE, + offset, + sorts: 'id', + }), + ); + const byContent = new Map< + string, + { + contentId: string; + contentType: 'dashboard' | 'look'; + isScheduled: boolean; + scheduleCount: number; + recipientCount: number; + } + >(); + + for (const plan of plans) { + const dashboardId = nullableString(plan.dashboard_id); + const lookId = nullableString(plan.look_id); + const contentType = dashboardId ? 'dashboard' : lookId ? 'look' : null; + const contentId = dashboardId ?? lookId; + if (!contentType || !contentId) { + continue; + } + const key = `${contentType}:${contentId}`; + const current = + byContent.get(key) ?? + ({ + contentId, + contentType, + isScheduled: false, + scheduleCount: 0, + recipientCount: 0, + } satisfies StagedLookerSignalsFile['scheduledPlans'][number]); + if (plan.enabled !== false) { + current.isScheduled = true; + current.scheduleCount += 1; + current.recipientCount += arrayValue(plan.scheduled_plan_destination).length; + } + byContent.set(key, current); + } + + return [...byContent.values()].filter((signal) => signal.scheduleCount > 0).sort(compareContentSignals); + } + + private async getFavoriteSignals(): Promise { + const dashboards = await this.collectPaged((offset) => + this.sdk().search_dashboards({ + deleted: false, + fields: 'id,favorite_count', + limit: LOOKER_PAGE_SIZE, + offset, + sorts: 'id', + }), + ); + const looks = await this.collectPaged((offset) => + this.sdk().search_looks({ + deleted: false, + fields: 'id,favorite_count', + limit: LOOKER_PAGE_SIZE, + offset, + sorts: 'id', + }), + ); + + return [ + ...dashboards.flatMap((dashboard) => favoriteSignal(dashboard, 'dashboard')), + ...looks.flatMap((look) => favoriteSignal(look, 'look')), + ].sort(compareContentSignals); + } + + private warnAndReturnEmpty(message: string, error: unknown): never[] { + this.logger.warn(`${message}; continuing without that prioritization input: ${errorMessage(error)}`); + return []; + } + + private async collectPaged(loadPage: (offset: number) => Promise): Promise { + const rows: LookerRecord[] = []; + for (let offset = 0; ; offset += LOOKER_PAGE_SIZE) { + const page = await this.withRateLimitRetry(() => loadPage(offset)); + rows.push(...page); + if (page.length < LOOKER_PAGE_SIZE) { + return rows; + } + } + } + + private async withRateLimitRetry(load: () => Promise): Promise { + try { + return await load(); + } catch (error) { + if (lookerStatusCode(error) !== 429) { + throw error; + } + await (this.deps.sleep ?? sleep)(retryAfterMs(error)); + return load(); + } + } + + private sdk(): LookerSdkPort { + if (!this.sdkInstance) { + this.sdkInstance = this.deps.sdkFactory?.(this.params) ?? createLookerSdkPort(this.params); + } + return this.sdkInstance; + } +} + +function parseLookerConnectionParams(raw: Record): LookerConnectionParams { + const baseUrl = raw.base_url; + const clientId = raw.client_id; + const apiCredential = raw.client_secret; // pragma: allowlist secret + if (typeof baseUrl !== 'string' || baseUrl.trim() === '') { + throw new Error('Looker base_url is required'); + } + if (typeof clientId !== 'string' || clientId.trim() === '') { + throw new Error('Looker client_id is required'); + } + if (typeof apiCredential !== 'string' || apiCredential.trim() === '') { + throw new Error('Looker client_secret is required'); // pragma: allowlist secret + } + return { base_url: baseUrl, client_id: clientId, client_secret: apiCredential }; // pragma: allowlist secret +} + +function toRecord(value: object): LookerRecord { + return value as LookerRecord; +} + +function toRecordArray(values: object[]): LookerRecord[] { + return values.map(toRecord); +} + +function normalizeBaseUrl(baseUrl: string): string { + return baseUrl + .trim() + .replace(/\/+$/, '') + .replace(/\/api\/(4\.0|3\.1)$/, ''); +} + +function entityRef(row: LookerRecord): Array<{ id: string; updatedAt: string | null }> { + if (row.id === null || row.id === undefined) { + return []; + } + return [{ id: String(row.id), updatedAt: nullableString(row.updated_at) }]; +} + +function queryValue(value: unknown): StagedLookerQuery | null { + if (!value || typeof value !== 'object') { + return null; + } + const record = value as LookerRecord; + if (typeof record.model !== 'string' || typeof record.view !== 'string') { + return null; + } + return { + id: nullableString(record.id) ?? undefined, + model: record.model, + view: record.view, + fields: stringArray(record.fields), + filters: recordValue(record.filters), + sorts: stringArray(record.sorts), + limit: typeof record.limit === 'string' || typeof record.limit === 'number' ? record.limit : null, + dynamicFields: nullableString(record.dynamic_fields ?? record.dynamicFields), + targetWarehouseConnectionId: null, + targetTable: null, + }; +} + +function parseJsonRows(raw: string): LookerRecord[] { + const parsed = JSON.parse(raw) as unknown; + return Array.isArray(parsed) ? parsed.filter((row): row is LookerRecord => !!row && typeof row === 'object') : []; +} + +function aggregateUsageRows( + rows: LookerRecord[], + idField: 'dashboard.id' | 'look.id', +): StagedLookerSignalsFile['dashboardUsage'] { + const byContent = new Map< + string, + { + contentId: string; + queryCount30d: number; + lastRunAt: string | null; + users: Set; + } + >(); + + for (const row of rows) { + const contentId = nullableString(row[idField]); + if (!contentId) { + continue; + } + const current = byContent.get(contentId) ?? { + contentId, + queryCount30d: 0, + lastRunAt: null, + users: new Set(), + }; + current.queryCount30d += numberValue(row['history.query_run_count']); + const userId = nullableString(row['user.id']); + if (userId) { + current.users.add(userId); + } + const lastRunAt = nullableString(row['history.created_date']); + if (lastRunAt && (!current.lastRunAt || lastRunAt > current.lastRunAt)) { + current.lastRunAt = lastRunAt; + } + byContent.set(contentId, current); + } + + return [...byContent.values()] + .map((signal) => ({ + contentId: signal.contentId, + queryCount30d: signal.queryCount30d, + uniqueUsers30d: signal.users.size, + lastRunAt: signal.lastRunAt, + topUsers: [...signal.users].sort().slice(0, 5), + })) + .sort((a, b) => a.contentId.localeCompare(b.contentId)); +} + +function favoriteSignal(row: LookerRecord, contentType: 'dashboard' | 'look'): StagedLookerSignalsFile['favorites'] { + const contentId = nullableString(row.id); + if (!contentId) { + return []; + } + return [{ contentId, contentType, favoriteCount: numberValue(row.favorite_count) }]; +} + +function compareContentSignals( + a: { contentType?: string; contentId: string }, + b: { contentType?: string; contentId: string }, +): number { + return `${a.contentType ?? ''}:${a.contentId}`.localeCompare(`${b.contentType ?? ''}:${b.contentId}`); +} + +function numberValue(value: unknown): number { + if (typeof value === 'number' && Number.isFinite(value)) { + return value; + } + if (typeof value === 'string' && value.trim() !== '') { + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : 0; + } + return 0; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +async function sleep(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + +function lookerStatusCode(error: unknown): number | null { + if (!error || typeof error !== 'object') { + return null; + } + const record = error as Record; + const direct = record.statusCode ?? record.status; + if (typeof direct === 'number') { + return direct; + } + if (typeof direct === 'string') { + const parsed = Number(direct); + return Number.isFinite(parsed) ? parsed : null; + } + const response = record.response; + if (response && typeof response === 'object') { + return lookerStatusCode(response); + } + return null; +} + +function retryAfterMs(error: unknown): number { + const value = retryAfterHeader(error); + if (!value) { + return 1000; + } + const seconds = Number(value); + if (Number.isFinite(seconds)) { + return Math.max(0, seconds * 1000); + } + const dateMs = Date.parse(value); + return Number.isFinite(dateMs) ? Math.max(0, dateMs - Date.now()) : 1000; +} + +function retryAfterHeader(error: unknown): string | null { + if (!error || typeof error !== 'object') { + return null; + } + const record = error as Record; + const response = record.response; + const responseRecord = response && typeof response === 'object' ? (response as Record) : null; + const headers = record.headers ?? responseRecord?.headers; + if (!headers || typeof headers !== 'object') { + return null; + } + const getter = (headers as { get?: unknown }).get; + if (typeof getter === 'function') { + const value = getter.call(headers, 'retry-after'); + return typeof value === 'string' ? value : null; + } + const headerRecord = headers as Record; + const direct = headerRecord['retry-after'] ?? headerRecord['Retry-After']; + return typeof direct === 'string' ? direct : null; +} + +function stagedField(value: LookerRecord) { + return { + name: stringValue(value.name), + label: nullableString(value.label), + type: nullableString(value.type), + sql: nullableString(value.sql), + description: nullableString(value.description), + }; +} + +function folderPath(folder: LookerRecord, byId: Map): string[] { + const path: string[] = []; + let current: LookerRecord | undefined = folder; + const seen = new Set(); + while (current) { + const id = stringValue(current.id); + if (seen.has(id)) { + break; + } + seen.add(id); + path.unshift(stringValue(current.name)); + const parentId = nullableString(current.parent_id); + current = parentId ? byId.get(parentId) : undefined; + } + return path; +} + +function arrayValue(value: unknown): LookerRecord[] { + return Array.isArray(value) ? value.filter((item): item is LookerRecord => !!item && typeof item === 'object') : []; +} + +function recordValue(value: unknown): Record { + return value && typeof value === 'object' && !Array.isArray(value) ? { ...(value as Record) } : {}; +} + +function stringArray(value: unknown): string[] { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []; +} + +function stringValue(value: unknown): string { + if (value === null || value === undefined) { + return ''; + } + return String(value); +} + +function nullableString(value: unknown): string | null { + if (value === null || value === undefined) { + return null; + } + return String(value); +} diff --git a/packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.test.ts b/packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.test.ts new file mode 100644 index 00000000..3f1ea6a9 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createDaemonLookerTableIdentifierParser } from './daemon-table-identifier-parser.js'; + +describe('createDaemonLookerTableIdentifierParser', () => { + it('posts parse items to the daemon endpoint', async () => { + const requestJson = vi.fn(async () => ({ + results: { + orders: { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + }, + })); + const parser = createDaemonLookerTableIdentifierParser({ + baseUrl: 'http://127.0.0.1:8765', + requestJson, + }); + + await expect(parser.parse([{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }])).resolves.toEqual({ + orders: { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + }); + expect(requestJson).toHaveBeenCalledWith('/sql/parse-table-identifier', { + items: [{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }], + }); + }); + + it('rejects non-object daemon responses', async () => { + const parser = createDaemonLookerTableIdentifierParser({ + baseUrl: 'http://127.0.0.1:8765', + requestJson: async () => ({ results: null }), + }); + + await expect(parser.parse([])).rejects.toThrow('klo-daemon table identifier parser returned invalid results'); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.ts b/packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.ts new file mode 100644 index 00000000..711c9e8b --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/daemon-table-identifier-parser.ts @@ -0,0 +1,81 @@ +import { request as httpRequest } from 'node:http'; +import { request as httpsRequest } from 'node:https'; +import { URL } from 'node:url'; +import type { + LookerParsedIdentifier, + LookerTableIdentifierParseItem, + LookerTableIdentifierParser, +} from './mapping.js'; + +export type KloDaemonTableIdentifierHttpJsonRunner = ( + path: string, + payload: Record, +) => Promise>; + +export interface DaemonLookerTableIdentifierParserOptions { + baseUrl: string; + requestJson?: KloDaemonTableIdentifierHttpJsonRunner; +} + +export function createDaemonLookerTableIdentifierParser( + options: DaemonLookerTableIdentifierParserOptions, +): LookerTableIdentifierParser { + const requestJson = options.requestJson ?? postJson(options.baseUrl); + return { + async parse(items: LookerTableIdentifierParseItem[]): Promise> { + const raw = await requestJson('/sql/parse-table-identifier', { items }); + if (!raw.results || typeof raw.results !== 'object' || Array.isArray(raw.results)) { + throw new Error('klo-daemon table identifier parser returned invalid results'); + } + return raw.results as Record; + }, + }; +} + +function normalizedBaseUrl(baseUrl: string): string { + return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`; +} + +function postJson(baseUrl: string): KloDaemonTableIdentifierHttpJsonRunner { + return async (path, payload) => + new Promise((resolve, reject) => { + const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl)); + const body = JSON.stringify(payload); + const client = target.protocol === 'https:' ? httpsRequest : httpRequest; + const request = client( + target, + { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + 'content-length': Buffer.byteLength(body), + }, + }, + (response) => { + const chunks: Buffer[] = []; + response.on('data', (chunk: Buffer) => chunks.push(chunk)); + response.on('end', () => { + const text = Buffer.concat(chunks).toString('utf8'); + const statusCode = response.statusCode ?? 0; + if (statusCode < 200 || statusCode >= 300) { + reject(new Error(`klo-daemon HTTP ${path} failed with ${statusCode}: ${text}`)); + return; + } + try { + const parsed = JSON.parse(text) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + reject(new Error(`klo-daemon HTTP ${path} returned non-object JSON`)); + return; + } + resolve(parsed as Record); + } catch (error) { + reject(error); + } + }); + }, + ); + request.on('error', reject); + request.end(body); + }); +} diff --git a/packages/context/src/ingest/adapters/looker/detect.test.ts b/packages/context/src/ingest/adapters/looker/detect.test.ts new file mode 100644 index 00000000..1490bcfa --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/detect.test.ts @@ -0,0 +1,47 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { detectLookerStagedDir } from './detect.js'; + +async function touch(stagedDir: string, relPath: string, body = '{}\n'): Promise { + const abs = join(stagedDir, relPath); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, body, 'utf-8'); +} + +describe('detectLookerStagedDir', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-detect-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('returns true when sync-config.json and at least one runtime entity are present', async () => { + await touch(stagedDir, 'sync-config.json'); + await touch(stagedDir, 'explores/b2b/sales_pipeline.json'); + expect(await detectLookerStagedDir(stagedDir)).toBe(true); + }); + + it('returns true for dashboard-only staged dirs', async () => { + await touch(stagedDir, 'sync-config.json'); + await touch(stagedDir, 'dashboards/10.json'); + expect(await detectLookerStagedDir(stagedDir)).toBe(true); + }); + + it('returns false without sync-config.json', async () => { + await touch(stagedDir, 'looks/20.json'); + expect(await detectLookerStagedDir(stagedDir)).toBe(false); + }); + + it('returns false when only control files are present', async () => { + await touch(stagedDir, 'sync-config.json'); + await touch(stagedDir, 'lookml_models.json'); + await touch(stagedDir, 'signals/dashboard_usage.json', '[]\n'); + expect(await detectLookerStagedDir(stagedDir)).toBe(false); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/detect.ts b/packages/context/src/ingest/adapters/looker/detect.ts new file mode 100644 index 00000000..d4570ed6 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/detect.ts @@ -0,0 +1,28 @@ +import { readdir, stat } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import { STAGED_FILES } from './types.js'; + +const LOOKER_ENTITY_FILE_RE = /^(explores\/[^/]+\/[^/]+|dashboards\/[^/]+|looks\/[^/]+)\.json$/; + +async function walk(root: string): Promise { + const entries = await readdir(root, { withFileTypes: true, recursive: true }); + return entries + .filter((entry) => entry.isFile()) + .map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/')) + .sort(); +} + +export async function detectLookerStagedDir(stagedDir: string): Promise { + try { + await stat(join(stagedDir, STAGED_FILES.syncConfig)); + } catch { + return false; + } + + try { + const paths = await walk(stagedDir); + return paths.some((path) => LOOKER_ENTITY_FILE_RE.test(path)); + } catch { + return false; + } +} diff --git a/packages/context/src/ingest/adapters/looker/evidence-documents.test.ts b/packages/context/src/ingest/adapters/looker/evidence-documents.test.ts new file mode 100644 index 00000000..6d4545ca --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/evidence-documents.test.ts @@ -0,0 +1,188 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { getLookerTriageSignals, writeLookerEvidenceDocuments } from './evidence-documents.js'; + +async function writeJson(root: string, relPath: string, value: unknown): Promise { + const target = join(root, relPath); + await mkdir(dirname(target), { recursive: true }); + await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +async function readJson(root: string, relPath: string): Promise { + return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T; +} + +describe('Looker evidence documents', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-evidence-docs-')); + await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', { + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: 'Pipeline analysis explore.', + fields: { + dimensions: [ + { name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '${TABLE}.stage', description: null }, + ], + measures: [ + { + name: 'opportunities.arr', + label: 'ARR', + type: 'sum', + sql: '${TABLE}.arr', + description: 'Annual recurring revenue.', + }, + ], + }, + joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }], + }); + await writeJson(stagedDir, 'dashboards/10.json', { + lookerId: '10', + title: 'Sales Pipeline Overview', + description: 'Executive dashboard for open pipeline ARR.', + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T10:00:00.000Z', + tiles: [ + { + id: '100', + title: 'Open Pipeline ARR', + lookId: null, + query: { + model: 'b2b', + view: 'sales_pipeline', + fields: ['opportunities.arr', 'opportunities.stage'], + filters: { 'opportunities.stage': 'open' }, + sorts: ['opportunities.arr desc'], + limit: '500', + }, + }, + ], + }); + await writeJson(stagedDir, 'looks/20.json', { + lookerId: '20', + title: 'Active Opportunity Pipeline', + description: 'Saved Look for active opportunity pipeline review.', + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T11:00:00.000Z', + query: { + model: 'b2b', + view: 'sales_pipeline', + fields: ['opportunities.arr'], + filters: { 'opportunities.stage': 'open' }, + sorts: [], + limit: '500', + }, + }); + await writeJson(stagedDir, 'signals/dashboard_usage.json', [ + { + contentId: '10', + queryCount30d: 80, + uniqueUsers30d: 12, + lastRunAt: '2026-04-30T09:00:00.000Z', + topUsers: ['3'], + }, + ]); + await writeJson(stagedDir, 'signals/look_usage.json', [ + { + contentId: '20', + queryCount30d: 2, + uniqueUsers30d: 1, + lastRunAt: '2026-04-29T09:00:00.000Z', + topUsers: ['3'], + }, + ]); + await writeJson(stagedDir, 'signals/scheduled_plans.json', [ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 2, recipientCount: 5 }, + ]); + await writeJson(stagedDir, 'signals/favorites.json', [ + { contentId: '10', contentType: 'dashboard', favoriteCount: 4 }, + ]); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('writes indexable metadata and markdown for explores, dashboards, and Looks', async () => { + await writeLookerEvidenceDocuments(stagedDir); + + await expect(readJson(stagedDir, 'evidence/explores/b2b/sales_pipeline/metadata.json')).resolves.toMatchObject({ + objectType: 'looker_explore', + id: 'looker:explore:b2b.sales_pipeline', + title: 'Sales Pipeline', + path: 'Looker / Explores / b2b.sales_pipeline', + properties: { + rawPath: 'explores/b2b/sales_pipeline.json', + modelName: 'b2b', + exploreName: 'sales_pipeline', + }, + }); + await expect(readJson(stagedDir, 'evidence/dashboards/10/metadata.json')).resolves.toMatchObject({ + objectType: 'looker_dashboard', + id: 'looker:dashboard:10', + title: 'Sales Pipeline Overview', + path: 'Looker / Dashboards / Sales Pipeline Overview', + lastEditedAt: '2026-04-30T10:00:00.000Z', + properties: { + rawPath: 'dashboards/10.json', + lookerId: '10', + }, + }); + await expect(readJson(stagedDir, 'evidence/looks/20/metadata.json')).resolves.toMatchObject({ + objectType: 'looker_look', + id: 'looker:look:20', + title: 'Active Opportunity Pipeline', + path: 'Looker / Looks / Active Opportunity Pipeline', + properties: { + rawPath: 'looks/20.json', + lookerId: '20', + }, + }); + + const dashboardMarkdown = await readFile(join(stagedDir, 'evidence/dashboards/10/page.md'), 'utf-8'); + expect(dashboardMarkdown).toContain('# Sales Pipeline Overview'); + expect(dashboardMarkdown).toContain('Executive dashboard for open pipeline ARR.'); + expect(dashboardMarkdown).toContain('## Tile: Open Pipeline ARR'); + expect(dashboardMarkdown).toContain('- model: b2b'); + expect(dashboardMarkdown).toContain('- explore: sales_pipeline'); + expect(dashboardMarkdown).toContain('- opportunities.stage = open'); + expect(dashboardMarkdown).not.toContain('80'); + expect(dashboardMarkdown).not.toContain('queryCount30d'); + expect(dashboardMarkdown).not.toContain('recipient'); + expect(dashboardMarkdown).not.toContain('favorite'); + expect(dashboardMarkdown).not.toContain('owner'); + }); + + it('returns usage-aware triage signals without exposing usage as document prose', async () => { + await writeLookerEvidenceDocuments(stagedDir); + + await expect(getLookerTriageSignals(stagedDir, 'looker:dashboard:10')).resolves.toEqual({ + objectType: 'looker_dashboard', + propertyHints: { + contentType: 'dashboard', + queryCount30d: '80', + uniqueUsers30d: '12', + isScheduled: 'true', + favoriteCount: '4', + }, + lastEditedAt: '2026-04-30T10:00:00.000Z', + }); + await expect(getLookerTriageSignals(stagedDir, 'looker:look:20')).resolves.toEqual({ + objectType: 'looker_look', + propertyHints: { + contentType: 'look', + queryCount30d: '2', + uniqueUsers30d: '1', + isScheduled: 'false', + favoriteCount: '0', + }, + lastEditedAt: '2026-04-30T11:00:00.000Z', + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/evidence-documents.ts b/packages/context/src/ingest/adapters/looker/evidence-documents.ts new file mode 100644 index 00000000..0f8c1faa --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/evidence-documents.ts @@ -0,0 +1,378 @@ +import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'; +import { dirname, join, relative } from 'node:path'; +import type { TriageSignals } from '../../types.js'; +import { + STAGED_FILES, + type StagedDashboardFile, + type StagedExploreFile, + type StagedLookerSignalsFile, + type StagedLookFile, + stagedDashboardFileSchema, + stagedExploreFileSchema, + stagedLookerSignalsFileSchema, + stagedLookFileSchema, +} from './types.js'; + +type JsonObject = Record; + +interface EvidenceDocument { + relDir: string; + metadata: JsonObject; + markdown: string; +} + +export async function writeLookerEvidenceDocuments(stagedDir: string): Promise { + const paths = await walkJson(stagedDir); + const signals = await readSignals(stagedDir); + const documents: EvidenceDocument[] = []; + + for (const relPath of paths) { + if (/^explores\/[^/]+\/[^/]+\.json$/.test(relPath)) { + const explore = await readJson(stagedDir, relPath, stagedExploreFileSchema); + documents.push(renderExploreEvidence(relPath, explore)); + continue; + } + if (/^dashboards\/[^/]+\.json$/.test(relPath)) { + const dashboard = await readJson(stagedDir, relPath, stagedDashboardFileSchema); + documents.push(renderDashboardEvidence(relPath, dashboard)); + continue; + } + if (/^looks\/[^/]+\.json$/.test(relPath)) { + const look = await readJson(stagedDir, relPath, stagedLookFileSchema); + documents.push(renderLookEvidence(relPath, look)); + } + } + + for (const document of documents) { + await writeJson(stagedDir, join(document.relDir, 'metadata.json'), document.metadata); + await writeText(stagedDir, join(document.relDir, 'page.md'), document.markdown); + } + + await writeJson(stagedDir, join(STAGED_FILES.evidenceRoot, 'signals-summary.json'), { + dashboardUsageCount: signals.dashboardUsage.length, + lookUsageCount: signals.lookUsage.length, + scheduledPlanCount: signals.scheduledPlans.length, + favoriteCount: signals.favorites.length, + }); +} + +export async function getLookerTriageSignals(stagedDir: string, externalId: string): Promise { + const signals = await readSignals(stagedDir); + const dashboardId = /^looker:dashboard:(.+)$/.exec(externalId)?.[1]; + if (dashboardId) { + const dashboard = await readOptionalJson( + stagedDir, + `dashboards/${safePathSegment(dashboardId)}.json`, + stagedDashboardFileSchema, + ); + const usage = signals.dashboardUsage.find((item) => item.contentId === dashboardId); + const schedule = signals.scheduledPlans.find( + (item) => item.contentType === 'dashboard' && item.contentId === dashboardId, + ); + const favorite = signals.favorites.find( + (item) => item.contentType === 'dashboard' && item.contentId === dashboardId, + ); + return { + objectType: 'looker_dashboard', + lastEditedAt: dashboard?.updatedAt ?? usage?.lastRunAt ?? undefined, + propertyHints: { + contentType: 'dashboard', + queryCount30d: String(usage?.queryCount30d ?? 0), + uniqueUsers30d: String(usage?.uniqueUsers30d ?? 0), + isScheduled: String(schedule?.isScheduled ?? false), + favoriteCount: String(favorite?.favoriteCount ?? 0), + }, + }; + } + + const lookId = /^looker:look:(.+)$/.exec(externalId)?.[1]; + if (lookId) { + const look = await readOptionalJson(stagedDir, `looks/${safePathSegment(lookId)}.json`, stagedLookFileSchema); + const usage = signals.lookUsage.find((item) => item.contentId === lookId); + const schedule = signals.scheduledPlans.find((item) => item.contentType === 'look' && item.contentId === lookId); + const favorite = signals.favorites.find((item) => item.contentType === 'look' && item.contentId === lookId); + return { + objectType: 'looker_look', + lastEditedAt: look?.updatedAt ?? usage?.lastRunAt ?? undefined, + propertyHints: { + contentType: 'look', + queryCount30d: String(usage?.queryCount30d ?? 0), + uniqueUsers30d: String(usage?.uniqueUsers30d ?? 0), + isScheduled: String(schedule?.isScheduled ?? false), + favoriteCount: String(favorite?.favoriteCount ?? 0), + }, + }; + } + + const explore = /^looker:explore:([^.]+)\.(.+)$/.exec(externalId); + if (explore) { + return { + objectType: 'looker_explore', + propertyHints: { + contentType: 'explore', + modelName: explore[1], + exploreName: explore[2], + }, + }; + } + + return { objectType: 'looker_runtime' }; +} + +function renderExploreEvidence(rawPath: string, explore: StagedExploreFile): EvidenceDocument { + const title = explore.label ?? `${explore.modelName}.${explore.exploreName}`; + const relDir = join( + STAGED_FILES.evidenceRoot, + 'explores', + safePathSegment(explore.modelName), + safePathSegment(explore.exploreName), + ); + const lines = [ + `# ${title}`, + '', + explore.description ? explore.description : '', + '', + '## Explore', + '', + `- model: ${explore.modelName}`, + `- explore: ${explore.exploreName}`, + '', + '## Dimensions', + '', + ...fieldLines(explore.fields.dimensions), + '', + '## Measures', + '', + ...fieldLines(explore.fields.measures), + '', + '## Joins', + '', + ...(explore.joins.length === 0 + ? ['- none'] + : explore.joins.map((item) => `- ${item.name}${item.relationship ? ` (${item.relationship})` : ''}`)), + ]; + return { + relDir, + metadata: { + objectType: 'looker_explore', + id: `looker:explore:${explore.modelName}.${explore.exploreName}`, + title, + path: `Looker / Explores / ${explore.modelName}.${explore.exploreName}`, + url: null, + parentId: null, + databaseId: null, + dataSourceId: null, + lastEditedAt: null, + lastEditedBy: null, + properties: { + rawPath, + modelName: explore.modelName, + exploreName: explore.exploreName, + }, + }, + markdown: normalizeMarkdown(lines), + }; +} + +function renderDashboardEvidence(rawPath: string, dashboard: StagedDashboardFile): EvidenceDocument { + const relDir = join(STAGED_FILES.evidenceRoot, 'dashboards', safePathSegment(dashboard.lookerId)); + const lines = [ + `# ${dashboard.title}`, + '', + dashboard.description ?? '', + '', + '## Dashboard Queries', + '', + ...dashboard.tiles.flatMap((tile) => [ + `## Tile: ${tile.title ?? tile.id}`, + '', + ...(tile.query ? queryLines(tile.query) : ['- no inline query captured']), + '', + ]), + ]; + return { + relDir, + metadata: { + objectType: 'looker_dashboard', + id: `looker:dashboard:${dashboard.lookerId}`, + title: dashboard.title, + path: `Looker / Dashboards / ${dashboard.title}`, + url: null, + parentId: dashboard.folderId, + databaseId: null, + dataSourceId: null, + lastEditedAt: dashboard.updatedAt, + lastEditedBy: null, + properties: { + rawPath, + lookerId: dashboard.lookerId, + }, + }, + markdown: normalizeMarkdown(lines), + }; +} + +function renderLookEvidence(rawPath: string, look: StagedLookFile): EvidenceDocument { + const relDir = join(STAGED_FILES.evidenceRoot, 'looks', safePathSegment(look.lookerId)); + const lines = [ + `# ${look.title}`, + '', + look.description ?? '', + '', + '## Look Query', + '', + ...(look.query ? queryLines(look.query) : ['- no query captured']), + ]; + return { + relDir, + metadata: { + objectType: 'looker_look', + id: `looker:look:${look.lookerId}`, + title: look.title, + path: `Looker / Looks / ${look.title}`, + url: null, + parentId: look.folderId, + databaseId: null, + dataSourceId: null, + lastEditedAt: look.updatedAt, + lastEditedBy: null, + properties: { + rawPath, + lookerId: look.lookerId, + }, + }, + markdown: normalizeMarkdown(lines), + }; +} + +function fieldLines( + fields: Array<{ + name: string; + label: string | null; + type: string | null; + sql: string | null; + description: string | null; + }>, +): string[] { + if (fields.length === 0) { + return ['- none']; + } + return fields.map((field) => { + const parts = [ + field.name, + field.label ? `label: ${field.label}` : null, + field.type ? `type: ${field.type}` : null, + field.description ? `description: ${field.description}` : null, + ].filter(Boolean); + return `- ${parts.join('; ')}`; + }); +} + +function queryLines(query: StagedDashboardFile['tiles'][number]['query']): string[] { + if (!query) { + return ['- no query captured']; + } + return [ + `- model: ${query.model}`, + `- explore: ${query.view}`, + '', + '### Fields', + '', + ...(query.fields.length === 0 ? ['- none'] : query.fields.map((field) => `- ${field}`)), + '', + '### Filters', + '', + ...filterLines(query.filters), + ]; +} + +function filterLines(filters: Record): string[] { + const entries = Object.entries(filters).filter( + ([, value]) => value !== null && value !== undefined && String(value).trim() !== '', + ); + if (entries.length === 0) { + return ['- none']; + } + return entries.map(([field, value]) => `- ${field} = ${String(value)}`); +} + +async function readSignals(stagedDir: string): Promise { + const [dashboardUsage, lookUsage, scheduledPlans, favorites] = await Promise.all([ + readOptionalArray(stagedDir, STAGED_FILES.signals.dashboardUsage), + readOptionalArray(stagedDir, STAGED_FILES.signals.lookUsage), + readOptionalArray(stagedDir, STAGED_FILES.signals.scheduledPlans), + readOptionalArray(stagedDir, STAGED_FILES.signals.favorites), + ]); + return stagedLookerSignalsFileSchema.parse({ dashboardUsage, lookUsage, scheduledPlans, favorites }); +} + +async function readOptionalArray(stagedDir: string, relPath: string): Promise { + try { + const parsed = JSON.parse(await readFile(join(stagedDir, relPath), 'utf-8')) as unknown; + return Array.isArray(parsed) ? parsed : []; + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return []; + } + throw error; + } +} + +async function readOptionalJson( + stagedDir: string, + relPath: string, + schema: { parse(value: unknown): T }, +): Promise { + try { + return await readJson(stagedDir, relPath, schema); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return null; + } + throw error; + } +} + +async function readJson(stagedDir: string, relPath: string, schema: { parse(value: unknown): T }): Promise { + return schema.parse(JSON.parse(await readFile(join(stagedDir, relPath), 'utf-8'))); +} + +async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise { + await writeText(stagedDir, relPath, `${JSON.stringify(value, null, 2)}\n`); +} + +async function writeText(stagedDir: string, relPath: string, body: string): Promise { + const target = join(stagedDir, relPath); + await mkdir(dirname(target), { recursive: true }); + await writeFile(target, body, 'utf-8'); +} + +async function walkJson(root: string, dir = root): Promise { + const entries = await readdir(dir, { withFileTypes: true }); + const paths: string[] = []; + for (const entry of entries) { + const absPath = join(dir, entry.name); + if (entry.isDirectory()) { + paths.push(...(await walkJson(root, absPath))); + continue; + } + if (entry.isFile() && entry.name.endsWith('.json')) { + paths.push(relative(root, absPath).replace(/\\/g, '/')); + } + } + return paths.sort(); +} + +function safePathSegment(value: string): string { + if (!/^[a-zA-Z0-9_-]+$/.test(value)) { + throw new Error(`Unsafe Looker evidence path segment: ${value}`); + } + return value; +} + +function normalizeMarkdown(lines: string[]): string { + return `${lines + .filter((line, index, all) => line !== '' || all[index - 1] !== '') + .join('\n') + .trim()}\n`; +} diff --git a/packages/context/src/ingest/adapters/looker/factory.test.ts b/packages/context/src/ingest/adapters/looker/factory.test.ts new file mode 100644 index 00000000..e049fc34 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/factory.test.ts @@ -0,0 +1,74 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { FetchContext } from '../../types.js'; +import type { LookerSdkPort } from './client.js'; +import { + DefaultLookerClientFactory, + DefaultLookerConnectionClientFactory, + type LookerCredentialResolver, +} from './factory.js'; +import type { LookerRuntimeClient } from './fetch.js'; +import type { LookerPullConfig } from './types.js'; + +function sdk(): LookerSdkPort { + return { + me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }), + search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]), + dashboard: vi.fn(), + search_looks: vi.fn().mockResolvedValue([]), + search_scheduled_plans: vi.fn().mockResolvedValue([]), + look: vi.fn(), + all_folders: vi.fn().mockResolvedValue([]), + all_users: vi.fn().mockResolvedValue([]), + all_groups: vi.fn().mockResolvedValue([]), + all_connections: vi.fn().mockResolvedValue([]), + all_lookml_models: vi.fn().mockResolvedValue([]), + lookml_model_explore: vi.fn(), + run_inline_query: vi.fn().mockResolvedValue('[]'), + logout: vi.fn().mockResolvedValue(undefined), + }; +} + +describe('DefaultLookerConnectionClientFactory', () => { + it('resolves credentials by Looker connection id and creates a KLO Looker client', async () => { + const fakeSdk = sdk(); + const resolver: LookerCredentialResolver = { + resolve: vi.fn().mockResolvedValue({ + base_url: 'https://example.looker.com', + client_id: 'id', + client_secret: 'credential', // pragma: allowlist secret + }), + }; + const factory = new DefaultLookerConnectionClientFactory(resolver, { sdkFactory: () => fakeSdk }); + + const client = await factory.createClient('prod-looker'); + + await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]); + expect(resolver.resolve).toHaveBeenCalledWith('prod-looker'); + }); +}); + +describe('DefaultLookerClientFactory', () => { + const ctx: FetchContext = { connectionId: 'ctx-looker', sourceKey: 'looker' }; + + it('uses pullConfig.lookerConnectionId when present', async () => { + const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient; + const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) }; + const factory = new DefaultLookerClientFactory(inner); + const config = { lookerConnectionId: 'prod-looker' } as LookerPullConfig; + + await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient); + + expect(inner.createClient).toHaveBeenCalledWith('prod-looker'); + }); + + it('falls back to ctx.connectionId when pullConfig.lookerConnectionId is absent', async () => { + const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient; + const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) }; + const factory = new DefaultLookerClientFactory(inner); + const config = {} as LookerPullConfig; + + await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient); + + expect(inner.createClient).toHaveBeenCalledWith('ctx-looker'); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/factory.ts b/packages/context/src/ingest/adapters/looker/factory.ts new file mode 100644 index 00000000..e80d781c --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/factory.ts @@ -0,0 +1,32 @@ +import type { FetchContext } from '../../types.js'; +import { LookerClient, type LookerClientDeps, type LookerConnectionParams } from './client.js'; +import type { LookerClientFactory, LookerRuntimeClient } from './fetch.js'; +import type { LookerPullConfig } from './types.js'; + +export interface LookerCredentialResolver { + resolve(lookerConnectionId: string): Promise; +} + +export interface LookerConnectionClientFactory { + createClient(lookerConnectionId: string): Promise; +} + +export class DefaultLookerConnectionClientFactory implements LookerConnectionClientFactory { + constructor( + private readonly resolver: LookerCredentialResolver, + private readonly deps: LookerClientDeps = {}, + ) {} + + async createClient(lookerConnectionId: string): Promise { + const credentials = await this.resolver.resolve(lookerConnectionId); + return new LookerClient(credentials, this.deps); + } +} + +export class DefaultLookerClientFactory implements LookerClientFactory { + constructor(private readonly inner: LookerConnectionClientFactory) {} + + async createClient(config: LookerPullConfig, ctx: FetchContext): Promise { + return this.inner.createClient(config.lookerConnectionId ?? ctx.connectionId); + } +} diff --git a/packages/context/src/ingest/adapters/looker/fetch-report.test.ts b/packages/context/src/ingest/adapters/looker/fetch-report.test.ts new file mode 100644 index 00000000..157a6770 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/fetch-report.test.ts @@ -0,0 +1,77 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { readLookerFetchReport, writeLookerFetchReport } from './fetch-report.js'; + +describe('Looker staged fetch report', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-report-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('returns null when a staged bundle has no fetch report', async () => { + await expect(readLookerFetchReport(stagedDir)).resolves.toBeNull(); + }); + + it('round-trips partial fetch issues', async () => { + await writeLookerFetchReport(stagedDir, { + status: 'partial', + retryRecommended: true, + skipped: [ + { + rawPath: 'dashboards/10.json', + entityType: 'dashboard', + entityId: '10', + severity: 'error', + statusCode: 429, + message: 'Looker API rate limit remained after retry', + retryRecommended: true, + }, + ], + warnings: [ + { + rawPath: 'signals/dashboard_usage.json', + entityType: 'signals', + entityId: null, + severity: 'warning', + statusCode: 403, + message: 'system__activity unavailable', + retryRecommended: false, + }, + ], + }); + + await expect(readLookerFetchReport(stagedDir)).resolves.toEqual({ + status: 'partial', + retryRecommended: true, + skipped: [ + { + rawPath: 'dashboards/10.json', + entityType: 'dashboard', + entityId: '10', + severity: 'error', + statusCode: 429, + message: 'Looker API rate limit remained after retry', + retryRecommended: true, + }, + ], + warnings: [ + { + rawPath: 'signals/dashboard_usage.json', + entityType: 'signals', + entityId: null, + severity: 'warning', + statusCode: 403, + message: 'system__activity unavailable', + retryRecommended: false, + }, + ], + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/fetch-report.ts b/packages/context/src/ingest/adapters/looker/fetch-report.ts new file mode 100644 index 00000000..a9fcf51c --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/fetch-report.ts @@ -0,0 +1,22 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { STAGED_FILES, type StagedLookerFetchReport, stagedLookerFetchReportSchema } from './types.js'; + +export async function readLookerFetchReport(stagedDir: string): Promise { + try { + const raw = await readFile(join(stagedDir, STAGED_FILES.fetchReport), 'utf-8'); + return stagedLookerFetchReportSchema.parse(JSON.parse(raw)); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return null; + } + throw error; + } +} + +export async function writeLookerFetchReport(stagedDir: string, report: StagedLookerFetchReport): Promise { + const parsed = stagedLookerFetchReportSchema.parse(report); + const target = join(stagedDir, STAGED_FILES.fetchReport); + await mkdir(dirname(target), { recursive: true }); + await writeFile(target, `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8'); +} diff --git a/packages/context/src/ingest/adapters/looker/fetch.test.ts b/packages/context/src/ingest/adapters/looker/fetch.test.ts new file mode 100644 index 00000000..2b18a3dd --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/fetch.test.ts @@ -0,0 +1,645 @@ +import { mkdtemp, readdir, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { chunkLookerStagedDir } from './chunk.js'; +import { fetchLookerRuntimeBundle, type LookerRuntimeClient } from './fetch.js'; + +const connectionId = '11111111-1111-4111-8111-111111111111'; + +function makeClient(): LookerRuntimeClient { + return { + listDashboards: vi.fn().mockResolvedValue([{ id: '10' }]), + getDashboard: vi.fn().mockResolvedValue({ + lookerId: '10', + title: 'Sales Pipeline', + description: 'Pipeline health', + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T12:00:00.000Z', + tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }], + }), + listLooks: vi.fn().mockResolvedValue([{ id: '20' }]), + getLook: vi.fn().mockResolvedValue({ + lookerId: '20', + title: 'Open Pipeline', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T12:00:00.000Z', + query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] }, + }), + listFolders: vi + .fn() + .mockResolvedValue({ folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }] }), + listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: null }]), + listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Sales' }]), + listLookmlModels: vi.fn().mockResolvedValue({ + models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }], + }), + getExplore: vi.fn().mockResolvedValue({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] }, + joins: [], + }), + getSignals: vi.fn().mockResolvedValue({ + dashboardUsage: [{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] }], + lookUsage: [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] }], + scheduledPlans: [ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 }, + ], + favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }], + }), + cleanup: vi.fn().mockResolvedValue(undefined), + }; +} + +describe('fetchLookerRuntimeBundle', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('writes dashboards, looks, folders, users, groups, models, explores, signals, and sync config', async () => { + const client = makeClient(); + await fetchLookerRuntimeBundle({ + pullConfig: { lookerConnectionId: connectionId, instanceBaseUrl: 'https://example.looker.com' }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + expect(await readdir(join(stagedDir, 'dashboards'))).toEqual(['10.json']); + expect(await readdir(join(stagedDir, 'looks'))).toEqual(['20.json']); + expect(await readdir(join(stagedDir, 'users'))).toEqual(['3.json']); + expect(await readdir(join(stagedDir, 'groups'))).toEqual(['4.json']); + expect(await readdir(join(stagedDir, 'explores/b2b'))).toEqual(['sales_pipeline.json']); + + const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8')); + expect(syncConfig).toEqual({ + lookerConnectionId: connectionId, + fetchedAt: '2026-04-30T12:30:00.000Z', + instanceBaseUrl: 'https://example.looker.com', + previousCursors: { + dashboardsLastSyncedAt: null, + looksLastSyncedAt: null, + }, + nextCursors: { + dashboardsLastSyncedAt: null, + looksLastSyncedAt: null, + }, + }); + + const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8')); + expect(scope).toEqual({ + mode: 'full', + knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'], + fetchedRawPaths: ['dashboards/10.json', 'looks/20.json'], + }); + + const dashboardUsage = JSON.parse(await readFile(join(stagedDir, 'signals/dashboard_usage.json'), 'utf-8')); + expect(dashboardUsage).toEqual([ + { contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] }, + ]); + + const lookUsage = JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8')); + const scheduledPlans = JSON.parse(await readFile(join(stagedDir, 'signals/scheduled_plans.json'), 'utf-8')); + const favorites = JSON.parse(await readFile(join(stagedDir, 'signals/favorites.json'), 'utf-8')); + + expect(lookUsage).toEqual([ + { contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] }, + ]); + expect(scheduledPlans).toEqual([ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 }, + ]); + expect(favorites).toEqual([{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }]); + }); + + it('stages only changed Dashboard and Look entity bodies during incremental pulls', async () => { + const client = makeClient(); + vi.mocked(client.listDashboards).mockResolvedValue([ + { id: '10', updatedAt: '2026-04-30T12:00:00.000Z' }, + { id: '11', updatedAt: '2026-04-30T12:10:00.000Z' }, + ]); + vi.mocked(client.getDashboard).mockImplementation(async (id: string) => ({ + lookerId: id, + title: `Dashboard ${id}`, + description: null, + folderId: '7', + ownerId: '3', + updatedAt: id === '11' ? '2026-04-30T12:10:00.000Z' : '2026-04-30T12:00:00.000Z', + tiles: [], + })); + vi.mocked(client.listLooks).mockResolvedValue([ + { id: '20', updatedAt: '2026-04-30T11:00:00.000Z' }, + { id: '21', updatedAt: null }, + ]); + vi.mocked(client.getLook).mockImplementation(async (id: string) => ({ + lookerId: id, + title: `Look ${id}`, + description: null, + folderId: '7', + ownerId: '3', + updatedAt: id === '21' ? null : '2026-04-30T11:00:00.000Z', + query: null, + })); + + await fetchLookerRuntimeBundle({ + pullConfig: { + lookerConnectionId: connectionId, + dashboardUpdatedSince: '2026-04-30T12:00:00.000Z', + lookUpdatedSince: '2026-04-30T11:00:00.000Z', + }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + expect(client.getDashboard).toHaveBeenCalledTimes(1); + expect(client.getDashboard).toHaveBeenCalledWith('11'); + expect(client.getLook).toHaveBeenCalledTimes(1); + expect(client.getLook).toHaveBeenCalledWith('21'); + + await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['11.json']); + await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['21.json']); + + const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8')); + expect(syncConfig.previousCursors).toEqual({ + dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z', + looksLastSyncedAt: '2026-04-30T11:00:00.000Z', + }); + expect(syncConfig.nextCursors).toEqual({ + dashboardsLastSyncedAt: '2026-04-30T12:10:00.000Z', + looksLastSyncedAt: '2026-04-30T11:00:00.000Z', + }); + + const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8')); + expect(scope).toEqual({ + mode: 'incremental', + knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json', 'looks/21.json'], + fetchedRawPaths: ['dashboards/11.json', 'looks/21.json'], + }); + }); + + it('falls back to empty signal files when the client has no signal support', async () => { + const client = makeClient(); + delete client.getSignals; + + await fetchLookerRuntimeBundle({ + pullConfig: { lookerConnectionId: connectionId }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + expect(JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'))).toEqual([]); + }); + + it('stamps explore warehouse targets from pull config and reports unmapped Looker connections', async () => { + const client = makeClient(); + const warehouseConnectionId = '22222222-2222-4222-8222-222222222222'; + vi.mocked(client.listLookmlModels).mockResolvedValue({ + models: [ + { + name: 'b2b', + label: 'B2B', + explores: [ + { name: 'sales_pipeline', label: 'Sales Pipeline' }, + { name: 'marketing', label: 'Marketing' }, + ], + }, + ], + }); + vi.mocked(client.getExplore).mockImplementation(async (_modelName: string, exploreName: string) => { + if (exploreName === 'marketing') { + return { + modelName: 'b2b', + exploreName: 'marketing', + label: 'Marketing', + description: null, + rawSqlTableName: 'proj.dataset.marketing', + connectionName: 'missing_mapping', + viewName: 'marketing', + fields: { + dimensions: [{ name: 'marketing.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'marketing.spend', label: null, type: null, sql: null, description: null }], + }, + joins: [], + targetWarehouseConnectionId: null, + targetTable: null, + }; + } + return { + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + rawSqlTableName: 'proj.dataset.opportunities AS opportunities', + connectionName: 'b2b_sandbox_bq', + viewName: 'opportunities', + fields: { + dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }], + }, + joins: [ + { + name: 'accounts', + type: 'left_outer', + relationship: 'many_to_one', + rawSqlTableName: 'proj.dataset.accounts', + sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}', + from: null, + targetTable: null, + }, + ], + targetWarehouseConnectionId: null, + targetTable: null, + }; + }); + + await fetchLookerRuntimeBundle({ + pullConfig: { + lookerConnectionId: connectionId, + connectionMappings: { b2b_sandbox_bq: warehouseConnectionId }, + connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' }, + parsedTargetTables: { + 'b2b.sales_pipeline': { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }, + 'b2b.sales_pipeline.accounts': { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'accounts', + canonicalTable: 'proj.dataset.accounts', + }, + }, + }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + const salesPipeline = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8')); + expect(salesPipeline).toMatchObject({ + connectionName: 'b2b_sandbox_bq', + targetWarehouseConnectionId: warehouseConnectionId, + targetTable: { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }, + joins: [ + { + name: 'accounts', + targetTable: { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'accounts', + canonicalTable: 'proj.dataset.accounts', + }, + }, + ], + }); + + const marketing = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/marketing.json'), 'utf-8')); + expect(marketing).toMatchObject({ + connectionName: 'missing_mapping', + targetWarehouseConnectionId: null, + targetTable: { + ok: false, + reason: 'no_connection_mapping', + }, + }); + + const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8')); + expect(report.status).toBe('partial'); + expect(report.skipped).toEqual([]); + expect(report.warnings).toEqual([ + { + rawPath: 'looker_connection_mappings/missing_mapping', + entityType: 'looker_connection_mapping', + entityId: 'missing_mapping', + severity: 'warning', + statusCode: null, + message: 'Looker connection missing_mapping is not mapped to a warehouse connection; 1 explore will be wiki-only.', + retryRecommended: false, + kind: 'unmapped_looker_connection', + details: { + lookerConnectionName: 'missing_mapping', + affectedExplores: ['b2b.marketing'], + }, + }, + ]); + }); + + it('reports parsed target table failures without retrying the Looker fetch', async () => { + const client = makeClient(); + const warehouseConnectionId = '22222222-2222-4222-8222-222222222222'; + vi.mocked(client.getExplore).mockResolvedValue({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}', + connectionName: 'b2b_sandbox_bq', + viewName: 'opportunities', + fields: { + dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }], + }, + joins: [], + targetWarehouseConnectionId: null, + targetTable: null, + }); + + await fetchLookerRuntimeBundle({ + pullConfig: { + lookerConnectionId: connectionId, + connectionMappings: { b2b_sandbox_bq: warehouseConnectionId }, + connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' }, + parsedTargetTables: { + 'b2b.sales_pipeline': { + ok: false, + reason: 'looker_template_unresolved', + detail: 'Looker template markers cannot be resolved before parsing.', + }, + }, + }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + const explore = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8')); + expect(explore).toMatchObject({ + targetWarehouseConnectionId: warehouseConnectionId, + targetTable: { + ok: false, + reason: 'looker_template_unresolved', + }, + }); + + const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8')); + expect(report).toMatchObject({ + status: 'partial', + retryRecommended: false, + skipped: [], + warnings: [ + { + rawPath: 'looker_connection_mappings/b2b_sandbox_bq', + entityType: 'looker_connection_mapping', + entityId: 'b2b_sandbox_bq', + severity: 'warning', + statusCode: null, + message: + 'Looker explore b2b.sales_pipeline has sql_table_name that cannot be mapped to a physical warehouse table: looker_template_unresolved.', + retryRecommended: false, + kind: 'looker_template_unresolved', + details: { + lookerConnectionName: 'b2b_sandbox_bq', + rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}', + reason: 'looker_template_unresolved', + }, + }, + ], + }); + }); + + it('propagates parent explore warehouse targets onto Dashboard tile and Look queries', async () => { + const client = makeClient(); + const warehouseConnectionId = '22222222-2222-4222-8222-222222222222'; + vi.mocked(client.getExplore).mockResolvedValue({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + rawSqlTableName: 'proj.dataset.opportunities AS opportunities', + connectionName: 'b2b_sandbox_bq', + viewName: 'opportunities', + fields: { + dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }], + }, + joins: [], + targetWarehouseConnectionId: null, + targetTable: null, + }); + + await fetchLookerRuntimeBundle({ + pullConfig: { + lookerConnectionId: connectionId, + connectionMappings: { b2b_sandbox_bq: warehouseConnectionId }, + connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' }, + parsedTargetTables: { + 'b2b.sales_pipeline': { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }, + }, + }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + const dashboard = JSON.parse(await readFile(join(stagedDir, 'dashboards/10.json'), 'utf-8')); + expect(dashboard.tiles[0].query).toMatchObject({ + model: 'b2b', + view: 'sales_pipeline', + targetWarehouseConnectionId: warehouseConnectionId, + targetTable: { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }, + }); + + const look = JSON.parse(await readFile(join(stagedDir, 'looks/20.json'), 'utf-8')); + expect(look.query).toMatchObject({ + model: 'b2b', + view: 'sales_pipeline', + targetWarehouseConnectionId: warehouseConnectionId, + targetTable: { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }, + }); + }); + + it('records skipped detail entities and keeps cursors pinned for affected entity types', async () => { + const client = makeClient(); + vi.mocked(client.listDashboards).mockResolvedValue([ + { id: '10', updatedAt: '2026-04-30T12:00:00.000Z' }, + { id: '11', updatedAt: '2026-04-30T12:10:00.000Z' }, + ]); + vi.mocked(client.getDashboard).mockImplementation(async (id: string) => { + if (id === '11') { + const error = new Error('Looker API rate limit remained after retry'); + Object.assign(error, { statusCode: 429 }); + throw error; + } + return { + lookerId: id, + title: `Dashboard ${id}`, + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T12:00:00.000Z', + tiles: [], + }; + }); + vi.mocked(client.listLooks).mockResolvedValue([{ id: '20', updatedAt: '2026-04-30T11:15:00.000Z' }]); + vi.mocked(client.getLook).mockResolvedValue({ + lookerId: '20', + title: 'Look 20', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-04-30T11:15:00.000Z', + query: null, + }); + + await fetchLookerRuntimeBundle({ + pullConfig: { + lookerConnectionId: connectionId, + dashboardUpdatedSince: '2026-04-30T12:00:00.000Z', + lookUpdatedSince: '2026-04-30T11:00:00.000Z', + }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + await expect(readdir(join(stagedDir, 'dashboards'))).rejects.toMatchObject({ code: 'ENOENT' }); + await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']); + + const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8')); + expect(syncConfig.nextCursors).toEqual({ + dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z', + looksLastSyncedAt: '2026-04-30T11:15:00.000Z', + }); + + const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8')); + expect(report).toEqual({ + status: 'partial', + retryRecommended: true, + skipped: [ + { + rawPath: 'dashboards/11.json', + entityType: 'dashboard', + entityId: '11', + severity: 'error', + statusCode: 429, + message: 'Looker API rate limit remained after retry', + retryRecommended: true, + }, + ], + warnings: [], + }); + }); + + it('continues without explore bootstrap when LookML model listing is denied', async () => { + const client = makeClient(); + const error = new Error('LookML model access denied'); + Object.assign(error, { statusCode: 403 }); + vi.mocked(client.listLookmlModels).mockRejectedValue(error); + + await fetchLookerRuntimeBundle({ + pullConfig: { lookerConnectionId: connectionId }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['10.json']); + await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']); + await expect(readFile(join(stagedDir, 'lookml_models.json'), 'utf-8')).resolves.toBe('{\n "models": []\n}\n'); + await expect(readdir(join(stagedDir, 'explores'))).rejects.toMatchObject({ code: 'ENOENT' }); + expect(client.getExplore).not.toHaveBeenCalled(); + + const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8')); + expect(report).toEqual({ + status: 'success', + retryRecommended: false, + skipped: [], + warnings: [ + { + rawPath: 'lookml_models.json', + entityType: 'lookml_models', + entityId: null, + severity: 'warning', + statusCode: 403, + message: 'LookML model access denied', + retryRecommended: false, + }, + ], + }); + + const chunked = await chunkLookerStagedDir(stagedDir); + expect(chunked.workUnits.map((wu) => wu.unitKey).sort()).toEqual(['looker-dashboard-10', 'looker-look-20']); + expect(chunked.workUnits.flatMap((wu) => wu.dependencyPaths)).not.toContain('explores/b2b/sales_pipeline.json'); + }); + + it('cleans up the Looker client after a successful fetch', async () => { + const client = makeClient(); + + await fetchLookerRuntimeBundle({ + pullConfig: { lookerConnectionId: connectionId }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + expect(client.cleanup).toHaveBeenCalledTimes(1); + }); + + it('cleans up the Looker client when fetch throws', async () => { + const client = makeClient(); + vi.mocked(client.listDashboards).mockRejectedValue(new Error('Looker API unavailable')); + + await expect( + fetchLookerRuntimeBundle({ + pullConfig: { lookerConnectionId: connectionId }, + stagedDir, + ctx: { connectionId, sourceKey: 'looker' }, + clientFactory: { createClient: vi.fn().mockResolvedValue(client) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }), + ).rejects.toThrow('Looker API unavailable'); + + expect(client.cleanup).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/fetch.ts b/packages/context/src/ingest/adapters/looker/fetch.ts new file mode 100644 index 00000000..2086b48c --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/fetch.ts @@ -0,0 +1,555 @@ +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import type { FetchContext } from '../../types.js'; +import { writeLookerEvidenceDocuments } from './evidence-documents.js'; +import { writeLookerFetchReport } from './fetch-report.js'; +import { + type LookerPullConfig, + type ParsedTargetTable, + parseLookerPullConfig, + STAGED_FILES, + type StagedDashboardFile, + type StagedExploreFile, + type StagedFoldersTreeFile, + type StagedGroupFile, + type StagedLookerFetchIssue, + type StagedLookerFetchReport, + type StagedLookerQuery, + type StagedLookerSignalsFile, + type StagedLookFile, + type StagedLookmlModelsFile, + type StagedUserFile, + stagedDashboardFileSchema, + stagedExploreFileSchema, + stagedFoldersTreeFileSchema, + stagedGroupFileSchema, + stagedLookerScopeFileSchema, + stagedLookerSignalsFileSchema, + stagedLookFileSchema, + stagedLookmlModelsFileSchema, + stagedSyncConfigSchema, + stagedUserFileSchema, +} from './types.js'; + +export interface LookerEntityRef { + id: string; + updatedAt?: string | null; +} + +export interface LookerRuntimeClient { + listDashboards(): Promise; + getDashboard(id: string): Promise; + listLooks(): Promise; + getLook(id: string): Promise; + listFolders(): Promise; + listUsers(): Promise; + listGroups(): Promise; + listLookmlModels(): Promise; + getExplore(modelName: string, exploreName: string): Promise; + getSignals?(): Promise; + cleanup?(): Promise; +} + +export interface LookerClientFactory { + createClient(config: LookerPullConfig, ctx: FetchContext): Promise | LookerRuntimeClient; +} + +interface ExploreTargetSummary { + targetWarehouseConnectionId: string | null; + targetTable: ParsedTargetTable | null; +} + +interface StampedExploreResult { + explore: StagedExploreFile; + targetSummary: ExploreTargetSummary; +} + +interface StagedJsonFile { + rawPath: string; + value: T; +} + +type ParsedTargetTableFailureReason = Extract['reason']; + +interface FetchLookerRuntimeBundleParams { + pullConfig: unknown; + stagedDir: string; + ctx: FetchContext; + clientFactory: LookerClientFactory; + now?: () => Date; +} + +export async function fetchLookerRuntimeBundle(params: FetchLookerRuntimeBundleParams): Promise { + const config = parseLookerPullConfig(params.pullConfig); + const connectionId = config.lookerConnectionId ?? params.ctx.connectionId; + const client = await params.clientFactory.createClient(config, params.ctx); + try { + const now = params.now ?? (() => new Date()); + const skipped: StagedLookerFetchIssue[] = []; + const warnings: StagedLookerFetchIssue[] = []; + let dashboardFetchHadSkips = false; + let lookFetchHadSkips = false; + const fetchedDashboards: Array> = []; + const fetchedLooks: Array> = []; + + const previousCursors = { + dashboardsLastSyncedAt: config.dashboardUpdatedSince ?? null, + looksLastSyncedAt: config.lookUpdatedSince ?? null, + }; + + const dashboards = await client.listDashboards(); + const dashboardRawPaths = dashboards.map((dashboardRef) => `dashboards/${safePathSegment(dashboardRef.id)}.json`); + const dashboardsToFetch = dashboards.filter((dashboardRef) => + shouldFetchEntity(dashboardRef, previousCursors.dashboardsLastSyncedAt), + ); + const fetchedRawPaths: string[] = []; + for (const dashboardRef of dashboardsToFetch) { + const rawPath = `dashboards/${safePathSegment(dashboardRef.id)}.json`; + try { + const dashboard = stagedDashboardFileSchema.parse(await client.getDashboard(dashboardRef.id)); + const dashboardRawPath = `dashboards/${safePathSegment(dashboard.lookerId)}.json`; + fetchedRawPaths.push(dashboardRawPath); + fetchedDashboards.push({ rawPath: dashboardRawPath, value: dashboard }); + } catch (error) { + dashboardFetchHadSkips = true; + skipped.push(issueForFetchError({ rawPath, entityType: 'dashboard', entityId: dashboardRef.id, error })); + } + } + + const looks = await client.listLooks(); + const lookRawPaths = looks.map((lookRef) => `looks/${safePathSegment(lookRef.id)}.json`); + const looksToFetch = looks.filter((lookRef) => shouldFetchEntity(lookRef, previousCursors.looksLastSyncedAt)); + for (const lookRef of looksToFetch) { + const rawPath = `looks/${safePathSegment(lookRef.id)}.json`; + try { + const look = stagedLookFileSchema.parse(await client.getLook(lookRef.id)); + const lookRawPath = `looks/${safePathSegment(look.lookerId)}.json`; + fetchedRawPaths.push(lookRawPath); + fetchedLooks.push({ rawPath: lookRawPath, value: look }); + } catch (error) { + lookFetchHadSkips = true; + skipped.push(issueForFetchError({ rawPath, entityType: 'look', entityId: lookRef.id, error })); + } + } + + const nextCursors = { + dashboardsLastSyncedAt: dashboardFetchHadSkips + ? previousCursors.dashboardsLastSyncedAt + : maxUpdatedAt(dashboards, previousCursors.dashboardsLastSyncedAt), + looksLastSyncedAt: lookFetchHadSkips + ? previousCursors.looksLastSyncedAt + : maxUpdatedAt(looks, previousCursors.looksLastSyncedAt), + }; + const fetchMode = + previousCursors.dashboardsLastSyncedAt || previousCursors.looksLastSyncedAt ? 'incremental' : 'full'; + + await writeJson( + params.stagedDir, + STAGED_FILES.syncConfig, + stagedSyncConfigSchema.parse({ + lookerConnectionId: connectionId, + fetchedAt: now().toISOString(), + ...(config.instanceBaseUrl ? { instanceBaseUrl: config.instanceBaseUrl } : {}), + previousCursors, + nextCursors, + }), + ); + + await writeJson( + params.stagedDir, + STAGED_FILES.scope, + stagedLookerScopeFileSchema.parse({ + mode: fetchMode, + knownCurrentRawPaths: [...dashboardRawPaths, ...lookRawPaths].sort(), + fetchedRawPaths: fetchedRawPaths.sort(), + }), + ); + + const folders = stagedFoldersTreeFileSchema.parse(await client.listFolders()); + await writeJson(params.stagedDir, STAGED_FILES.foldersTree, folders); + + const users = await client.listUsers(); + for (const rawUser of users) { + const user = stagedUserFileSchema.parse(rawUser); + await writeJson(params.stagedDir, `users/${safePathSegment(user.id)}.json`, user); + } + + const groups = await client.listGroups(); + for (const rawGroup of groups) { + const group = stagedGroupFileSchema.parse(rawGroup); + await writeJson(params.stagedDir, `groups/${safePathSegment(group.id)}.json`, group); + } + + let models: StagedLookmlModelsFile; + try { + models = stagedLookmlModelsFileSchema.parse(await client.listLookmlModels()); + } catch (error) { + warnings.push( + issueForFetchError({ + rawPath: STAGED_FILES.lookmlModels, + entityType: 'lookml_models', + entityId: null, + error, + severity: 'warning', + }), + ); + models = stagedLookmlModelsFileSchema.parse({ models: [] }); + } + await writeJson(params.stagedDir, STAGED_FILES.lookmlModels, models); + const exploreTargetsByKey = new Map(); + const stagedExplores: StagedExploreFile[] = []; + for (const model of models.models) { + for (const exploreRef of model.explores) { + const rawPath = `explores/${safePathSegment(model.name)}/${safePathSegment(exploreRef.name)}.json`; + try { + const result = stampExploreWarehouseTarget(await client.getExplore(model.name, exploreRef.name), config); + stagedExplores.push(result.explore); + exploreTargetsByKey.set(exploreKey(result.explore.modelName, result.explore.exploreName), result.targetSummary); + await writeJson( + params.stagedDir, + `explores/${safePathSegment(result.explore.modelName)}/${safePathSegment(result.explore.exploreName)}.json`, + result.explore, + ); + } catch (error) { + skipped.push( + issueForFetchError({ + rawPath, + entityType: 'explore', + entityId: `${model.name}.${exploreRef.name}`, + error, + }), + ); + } + } + } + warnings.push(...warehouseTargetWarnings(stagedExplores)); + + for (const dashboard of fetchedDashboards) { + await writeJson(params.stagedDir, dashboard.rawPath, stampDashboardQueries(dashboard.value, exploreTargetsByKey)); + } + + for (const look of fetchedLooks) { + await writeJson(params.stagedDir, look.rawPath, stampLookQuery(look.value, exploreTargetsByKey)); + } + + let signals: StagedLookerSignalsFile; + try { + signals = stagedLookerSignalsFileSchema.parse(client.getSignals ? await client.getSignals() : {}); + } catch (error) { + warnings.push( + issueForFetchError({ + rawPath: STAGED_FILES.signals.dashboardUsage, + entityType: 'signals', + entityId: null, + error, + }), + ); + signals = stagedLookerSignalsFileSchema.parse({}); + } + await writeJson(params.stagedDir, STAGED_FILES.signals.dashboardUsage, signals.dashboardUsage); + await writeJson(params.stagedDir, STAGED_FILES.signals.lookUsage, signals.lookUsage); + await writeJson(params.stagedDir, STAGED_FILES.signals.scheduledPlans, signals.scheduledPlans); + await writeJson(params.stagedDir, STAGED_FILES.signals.favorites, signals.favorites); + + await writeLookerEvidenceDocuments(params.stagedDir); + await writeLookerFetchReport(params.stagedDir, buildFetchReport(skipped, warnings)); + } finally { + await client.cleanup?.(); + } +} + +async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise { + const abs = join(stagedDir, relPath); + await mkdir(dirname(abs), { recursive: true }); + await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +function safePathSegment(value: string): string { + if (!/^[a-zA-Z0-9_-]+$/.test(value)) { + throw new Error(`Unsafe Looker staged path segment: ${value}`); + } + return value; +} + +function shouldFetchEntity(ref: LookerEntityRef, updatedSince: string | null): boolean { + if (!updatedSince) { + return true; + } + if (!ref.updatedAt) { + return true; + } + return Date.parse(ref.updatedAt) > Date.parse(updatedSince); +} + +function maxUpdatedAt(refs: LookerEntityRef[], fallback: string | null): string | null { + let max = fallback; + for (const ref of refs) { + if (!ref.updatedAt) { + continue; + } + if (!max || Date.parse(ref.updatedAt) > Date.parse(max)) { + max = ref.updatedAt; + } + } + if (!max) { + return null; + } + const ms = Date.parse(max); + return Number.isNaN(ms) ? null : new Date(ms).toISOString(); +} + +function stampExploreWarehouseTarget(rawExplore: unknown, config: LookerPullConfig): StampedExploreResult { + const parsed = stagedExploreFileSchema.parse(rawExplore); + const key = exploreKey(parsed.modelName, parsed.exploreName); + const targetWarehouseConnectionId = connectionMappingFor(parsed.connectionName, config); + const targetTable = targetTableFor({ + key, + rawSqlTableName: parsed.rawSqlTableName, + targetWarehouseConnectionId, + config, + entityLabel: `Looker explore ${key}`, + }); + + const explore = stagedExploreFileSchema.parse({ + ...parsed, + targetWarehouseConnectionId, + targetTable, + joins: parsed.joins.map((join) => ({ + ...join, + targetTable: join.rawSqlTableName + ? targetTableFor({ + key: `${key}.${join.name}`, + rawSqlTableName: join.rawSqlTableName, + targetWarehouseConnectionId, + config, + entityLabel: `Looker join ${key}.${join.name}`, + }) + : null, + })), + }); + + return { + explore, + targetSummary: { + targetWarehouseConnectionId: explore.targetWarehouseConnectionId, + targetTable: explore.targetTable, + }, + }; +} + +function connectionMappingFor(connectionName: string | null, config: LookerPullConfig): string | null { + if (!connectionName) { + return null; + } + return config.connectionMappings[connectionName] ?? null; +} + +function targetTableFor(input: { + key: string; + rawSqlTableName: string | null; + targetWarehouseConnectionId: string | null; + config: LookerPullConfig; + entityLabel: string; +}): ParsedTargetTable | null { + if (!input.rawSqlTableName && !input.targetWarehouseConnectionId) { + return null; + } + + if (!input.targetWarehouseConnectionId) { + return { + ok: false, + reason: 'no_connection_mapping', + detail: `${input.entityLabel} has no mapped warehouse connection.`, + }; + } + + const parsed = input.config.parsedTargetTables[input.key]; + if (parsed) { + return parsed; + } + + if (!input.rawSqlTableName) { + return null; + } + + return { + ok: false, + reason: 'parse_error', + detail: `${input.entityLabel} has raw sql_table_name but no parsedTargetTables entry for key ${input.key}.`, + }; +} + +function exploreKey(modelName: string, exploreName: string): string { + return `${modelName}.${exploreName}`; +} + +function stampQueryWarehouseTarget( + query: StagedLookerQuery | null, + exploreTargetsByKey: Map, +): StagedLookerQuery | null { + if (!query) { + return null; + } + + const target = exploreTargetsByKey.get(exploreKey(query.model, query.view)); + if (!target) { + return query; + } + + return { + ...query, + targetWarehouseConnectionId: target.targetWarehouseConnectionId, + targetTable: target.targetTable, + }; +} + +function stampDashboardQueries( + dashboard: StagedDashboardFile, + exploreTargetsByKey: Map, +): StagedDashboardFile { + return stagedDashboardFileSchema.parse({ + ...dashboard, + tiles: dashboard.tiles.map((tile) => ({ + ...tile, + query: stampQueryWarehouseTarget(tile.query, exploreTargetsByKey), + })), + }); +} + +function stampLookQuery(look: StagedLookFile, exploreTargetsByKey: Map): StagedLookFile { + return stagedLookFileSchema.parse({ + ...look, + query: stampQueryWarehouseTarget(look.query, exploreTargetsByKey), + }); +} + +function warehouseTargetWarnings(explores: StagedExploreFile[]): StagedLookerFetchIssue[] { + const unmapped = new Map(); + const warnings: StagedLookerFetchIssue[] = []; + + for (const explore of explores) { + const targetTable = explore.targetTable; + if (!targetTable || targetTable.ok) { + continue; + } + + const sourceKey = exploreKey(explore.modelName, explore.exploreName); + const lookerConnectionName = explore.connectionName ?? 'missing_connection_name'; + + if (targetTable.reason === 'no_connection_mapping') { + const existing = unmapped.get(lookerConnectionName) ?? []; + existing.push(sourceKey); + unmapped.set(lookerConnectionName, existing); + continue; + } + + warnings.push({ + rawPath: `looker_connection_mappings/${safeWarningPathSegment(lookerConnectionName)}`, + entityType: 'looker_connection_mapping', + entityId: explore.connectionName, + severity: 'warning', + statusCode: null, + message: `Looker explore ${sourceKey} has sql_table_name that cannot be mapped to a physical warehouse table: ${targetTable.reason}.`, + retryRecommended: false, + kind: warningKindForReason(targetTable.reason), + details: { + lookerConnectionName, + rawSqlTableName: explore.rawSqlTableName, + reason: targetTable.reason, + }, + }); + } + + for (const [lookerConnectionName, affectedExplores] of [...unmapped.entries()].sort(([a], [b]) => + a.localeCompare(b), + )) { + const sortedAffectedExplores = [...affectedExplores].sort(); + warnings.push({ + rawPath: `looker_connection_mappings/${safeWarningPathSegment(lookerConnectionName)}`, + entityType: 'looker_connection_mapping', + entityId: lookerConnectionName === 'missing_connection_name' ? null : lookerConnectionName, + severity: 'warning', + statusCode: null, + message: `Looker connection ${lookerConnectionName} is not mapped to a warehouse connection; ${sortedAffectedExplores.length} explore${sortedAffectedExplores.length === 1 ? '' : 's'} will be wiki-only.`, + retryRecommended: false, + kind: 'unmapped_looker_connection', + details: { + lookerConnectionName, + affectedExplores: sortedAffectedExplores, + }, + }); + } + + return warnings; +} + +function warningKindForReason(reason: ParsedTargetTableFailureReason): StagedLookerFetchIssue['kind'] { + if (reason === 'looker_template_unresolved') { + return 'looker_template_unresolved'; + } + if (reason === 'derived_table_not_supported') { + return 'derived_table_not_supported'; + } + return 'unparseable_sql_table_name'; +} + +function safeWarningPathSegment(value: string): string { + return value.replace(/[^a-zA-Z0-9_-]+/g, '_'); +} + +function issueForFetchError(input: { + rawPath: string; + entityType: StagedLookerFetchIssue['entityType']; + entityId: string | null; + error: unknown; + severity?: StagedLookerFetchIssue['severity']; +}): StagedLookerFetchIssue { + const statusCode = errorStatusCode(input.error); + return { + rawPath: input.rawPath, + entityType: input.entityType, + entityId: input.entityId, + severity: input.severity ?? (input.entityType === 'signals' ? 'warning' : 'error'), + statusCode, + message: errorMessage(input.error), + retryRecommended: statusCode === 429, + }; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function errorStatusCode(error: unknown): number | null { + if (!error || typeof error !== 'object') { + return null; + } + const record = error as Record; + const direct = record.statusCode ?? record.status; + if (typeof direct === 'number') { + return direct; + } + if (typeof direct === 'string') { + const parsed = Number(direct); + return Number.isFinite(parsed) ? parsed : null; + } + const response = record.response; + if (response && typeof response === 'object') { + return errorStatusCode(response); + } + return null; +} + +function buildFetchReport( + skipped: StagedLookerFetchIssue[], + warnings: StagedLookerFetchIssue[], +): StagedLookerFetchReport { + const retryRecommended = [...skipped, ...warnings].some((issue) => issue.retryRecommended); + const hasWarehouseTargetWarnings = warnings.some((issue) => issue.entityType === 'looker_connection_mapping'); + return { + status: skipped.length > 0 || hasWarehouseTargetWarnings ? 'partial' : 'success', + retryRecommended, + skipped, + warnings, + }; +} diff --git a/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts new file mode 100644 index 00000000..7736f425 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts @@ -0,0 +1,67 @@ +import type { KloLocalProject, KloProjectConnectionConfig } from '../../../project/index.js'; +import { + DefaultLookerClientFactory, + DefaultLookerConnectionClientFactory, + type LookerCredentialResolver, +} from './factory.js'; +import { LookerSourceAdapter } from './looker.adapter.js'; + +function stringField(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null { + if (!ref.startsWith('env:')) { + return null; + } + return stringField(env[ref.slice('env:'.length)]); +} + +export function lookerCredentialsFromLocalConnection( + connectionId: string, + connection: KloProjectConnectionConfig | undefined, + env: NodeJS.ProcessEnv = process.env, +) { + if (!connection || String(connection.driver).toLowerCase() !== 'looker') { + throw new Error(`Connection "${connectionId}" is not a Looker connection`); + } + const baseUrl = stringField(connection.base_url) ?? stringField(connection.baseUrl) ?? stringField(connection.url); + const clientId = stringField(connection.client_id) ?? stringField(connection.clientId); + const clientSecret = + stringField(connection.client_secret) ?? + stringField(connection.clientSecret) ?? + (stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null) ?? + (stringField(connection.clientSecretRef) ? resolveEnvReference(String(connection.clientSecretRef), env) : null); + + if (!baseUrl) { + throw new Error(`Connection "${connectionId}" is missing Looker base_url`); + } + if (!clientId) { + throw new Error(`Connection "${connectionId}" is missing Looker client_id`); + } + if (!clientSecret) { + throw new Error(`Connection "${connectionId}" is missing Looker client_secret or client_secret_ref`); + } + return { base_url: baseUrl, client_id: clientId, client_secret: clientSecret }; +} + +export function createLocalLookerCredentialResolver( + project: KloLocalProject, + env: NodeJS.ProcessEnv = process.env, +): LookerCredentialResolver { + return { + async resolve(lookerConnectionId) { + return lookerCredentialsFromLocalConnection(lookerConnectionId, project.config.connections[lookerConnectionId], env); + }, + }; +} + +export function createLocalLookerSourceAdapter( + project: KloLocalProject, + env: NodeJS.ProcessEnv = process.env, +): LookerSourceAdapter { + const connectionFactory = new DefaultLookerConnectionClientFactory(createLocalLookerCredentialResolver(project, env)); + return new LookerSourceAdapter({ + clientFactory: new DefaultLookerClientFactory(connectionFactory), + }); +} diff --git a/packages/context/src/ingest/adapters/looker/local-runtime-store.test.ts b/packages/context/src/ingest/adapters/looker/local-runtime-store.test.ts new file mode 100644 index 00000000..7eecfccd --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/local-runtime-store.test.ts @@ -0,0 +1,116 @@ +import { mkdtemp } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { LocalLookerRuntimeStore } from './local-runtime-store.js'; + +describe('LocalLookerRuntimeStore', () => { + async function store() { + const dir = await mkdtemp(join(tmpdir(), 'klo-looker-store-')); + return new LocalLookerRuntimeStore({ + dbPath: join(dir, 'db.sqlite'), + now: () => new Date('2026-05-05T12:00:00.000Z'), + }); + } + + it('stores cursors and connection mappings', async () => { + const local = await store(); + + await local.setCursors('prod-looker', { + dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z', + looksLastSyncedAt: null, + }); + await local.upsertConnectionMapping({ + lookerConnectionId: 'prod-looker', + lookerConnectionName: 'bq_reporting', + kloConnectionId: 'prod-warehouse', + source: 'cli', + }); + + await expect(local.readCursors('prod-looker')).resolves.toEqual({ + dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z', + looksLastSyncedAt: null, + }); + await expect(local.readMappings('prod-looker')).resolves.toEqual([ + { + lookerConnectionName: 'bq_reporting', + kloConnectionId: 'prod-warehouse', + lookerHost: null, + lookerDatabase: null, + lookerDialect: null, + }, + ]); + }); + + it('refreshes discovered metadata without dropping local targets', async () => { + const local = await store(); + await local.upsertConnectionMapping({ + lookerConnectionId: 'prod-looker', + lookerConnectionName: 'bq_reporting', + kloConnectionId: 'prod-warehouse', + source: 'cli', + }); + + await local.refreshDiscoveredConnections({ + lookerConnectionId: 'prod-looker', + discovered: [ + { + name: 'bq_reporting', + host: 'bigquery.googleapis.com', + database: 'analytics', + schema: null, + dialect: 'bigquery_standard_sql', + }, + ], + }); + + await expect(local.listConnectionMappings('prod-looker')).resolves.toEqual([ + { + lookerConnectionName: 'bq_reporting', + kloConnectionId: 'prod-warehouse', + lookerHost: 'bigquery.googleapis.com', + lookerDatabase: 'analytics', + lookerDialect: 'bigquery_standard_sql', + source: 'refresh', + }, + ]); + }); + + it('applies yaml mapping intent while preserving refresh metadata and cli overrides', async () => { + const local = await store(); + await local.refreshDiscoveredConnections({ + lookerConnectionId: 'prod-looker', + discovered: [{ name: 'analytics', host: 'looker-db.test', database: 'warehouse', schema: null, dialect: 'postgres' }], + }); + await local.upsertConnectionMapping({ + lookerConnectionId: 'prod-looker', + lookerConnectionName: 'manual', + kloConnectionId: 'cli-warehouse', + source: 'cli', + }); + + await local.applyYamlBootstrap({ + lookerConnectionId: 'prod-looker', + mappings: [ + { lookerConnectionName: 'analytics', kloConnectionId: 'yaml-warehouse' }, + { lookerConnectionName: 'manual', kloConnectionId: 'yaml-warehouse' }, + ], + }); + + await expect(local.listConnectionMappings('prod-looker')).resolves.toMatchObject([ + { + lookerConnectionName: 'analytics', + kloConnectionId: 'yaml-warehouse', + lookerHost: 'looker-db.test', + lookerDatabase: 'warehouse', + lookerDialect: 'postgres', + source: 'klo.yaml', + }, + { + lookerConnectionName: 'manual', + kloConnectionId: 'cli-warehouse', + source: 'cli', + }, + ]); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/local-runtime-store.ts b/packages/context/src/ingest/adapters/looker/local-runtime-store.ts new file mode 100644 index 00000000..7230d336 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/local-runtime-store.ts @@ -0,0 +1,280 @@ +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import type { LookerWarehouseConnectionInfo } from './client.js'; +import type { LookerConnectionMapping } from './mapping.js'; +import type { LookerRuntimeCursors } from './types.js'; + +export type LocalLookerMappingSource = 'klo.yaml' | 'cli' | 'refresh'; + +interface LocalLookerRuntimeStoreOptions { + dbPath: string; + now?: () => Date; +} + +export interface LocalLookerConnectionMappingListRow extends LookerConnectionMapping { + source: LocalLookerMappingSource; +} + +export interface UpsertLocalLookerConnectionMappingInput { + lookerConnectionId: string; + lookerConnectionName: string; + kloConnectionId: string | null; + source: LocalLookerMappingSource; +} + +interface ApplyLocalLookerYamlBootstrapInput { + lookerConnectionId: string; + mappings: Array<{ + lookerConnectionName: string; + kloConnectionId: string | null; + }>; +} + +export interface RefreshLocalLookerDiscoveredConnectionsInput { + lookerConnectionId: string; + discovered: LookerWarehouseConnectionInfo[]; +} + +export interface ClearLocalLookerMappingsInput { + lookerConnectionId: string; + lookerConnectionName?: string; +} + +export interface LookerSourceStateReader { + readMappings(lookerConnectionId: string): Promise; + readCursors(lookerConnectionId: string): Promise; +} + +export class LocalLookerRuntimeStore implements LookerSourceStateReader { + private readonly db: Database.Database; + private readonly now: () => Date; + + constructor(options: LocalLookerRuntimeStoreOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.now = options.now ?? (() => new Date()); + this.db.exec(` + CREATE TABLE IF NOT EXISTS local_looker_runtime_config ( + looker_connection_id TEXT PRIMARY KEY, + dashboards_last_synced_at TEXT, + looks_last_synced_at TEXT, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS local_looker_connection_mappings ( + looker_connection_id TEXT NOT NULL, + looker_connection_name TEXT NOT NULL, + klo_connection_id TEXT, + looker_host TEXT, + looker_database TEXT, + looker_dialect TEXT, + source TEXT NOT NULL, + updated_at TEXT NOT NULL, + PRIMARY KEY (looker_connection_id, looker_connection_name) + ); + `); + } + + async applyYamlBootstrap(input: ApplyLocalLookerYamlBootstrapInput): Promise { + const timestamp = this.now().toISOString(); + const apply = this.db.transaction(() => { + const existing = this.db.prepare(` + SELECT klo_connection_id, source + FROM local_looker_connection_mappings + WHERE looker_connection_id = ? AND looker_connection_name = ? + `); + const insert = this.db.prepare(` + INSERT INTO local_looker_connection_mappings ( + looker_connection_id, + looker_connection_name, + klo_connection_id, + looker_host, + looker_database, + looker_dialect, + source, + updated_at + ) + VALUES (?, ?, ?, NULL, NULL, NULL, 'klo.yaml', ?) + `); + const updateRefreshRow = this.db.prepare(` + UPDATE local_looker_connection_mappings + SET klo_connection_id = ?, + source = 'klo.yaml', + updated_at = ? + WHERE looker_connection_id = ? + AND looker_connection_name = ? + AND source = 'refresh' + AND klo_connection_id IS NULL + `); + + for (const mapping of input.mappings) { + const row = existing.get(input.lookerConnectionId, mapping.lookerConnectionName) as + | { klo_connection_id: string | null; source: LocalLookerMappingSource } + | undefined; + if (!row) { + insert.run(input.lookerConnectionId, mapping.lookerConnectionName, mapping.kloConnectionId, timestamp); + continue; + } + if (row.source === 'refresh' && row.klo_connection_id === null) { + updateRefreshRow.run(mapping.kloConnectionId, timestamp, input.lookerConnectionId, mapping.lookerConnectionName); + } + } + }); + + apply(); + } + + async readCursors(lookerConnectionId: string): Promise { + const row = this.db + .prepare( + ` + SELECT dashboards_last_synced_at, looks_last_synced_at + FROM local_looker_runtime_config + WHERE looker_connection_id = ? + `, + ) + .get(lookerConnectionId) as { dashboards_last_synced_at: string | null; looks_last_synced_at: string | null } | undefined; + + return { + dashboardsLastSyncedAt: row?.dashboards_last_synced_at ?? null, + looksLastSyncedAt: row?.looks_last_synced_at ?? null, + }; + } + + async setCursors(lookerConnectionId: string, cursors: LookerRuntimeCursors): Promise { + this.db + .prepare( + ` + INSERT INTO local_looker_runtime_config ( + looker_connection_id, + dashboards_last_synced_at, + looks_last_synced_at, + updated_at + ) + VALUES (?, ?, ?, ?) + ON CONFLICT(looker_connection_id) DO UPDATE SET + dashboards_last_synced_at = excluded.dashboards_last_synced_at, + looks_last_synced_at = excluded.looks_last_synced_at, + updated_at = excluded.updated_at + `, + ) + .run(lookerConnectionId, cursors.dashboardsLastSyncedAt, cursors.looksLastSyncedAt, this.now().toISOString()); + } + + async readMappings(lookerConnectionId: string): Promise { + return (await this.listConnectionMappings(lookerConnectionId)).map(({ source: _source, ...mapping }) => mapping); + } + + async listConnectionMappings(lookerConnectionId: string): Promise { + const rows = this.db + .prepare( + ` + SELECT + looker_connection_name, + klo_connection_id, + looker_host, + looker_database, + looker_dialect, + source + FROM local_looker_connection_mappings + WHERE looker_connection_id = ? + ORDER BY looker_connection_name + `, + ) + .all(lookerConnectionId) as Array<{ + looker_connection_name: string; + klo_connection_id: string | null; + looker_host: string | null; + looker_database: string | null; + looker_dialect: string | null; + source: LocalLookerMappingSource; + }>; + + return rows.map((row) => ({ + lookerConnectionName: row.looker_connection_name, + kloConnectionId: row.klo_connection_id, + lookerHost: row.looker_host, + lookerDatabase: row.looker_database, + lookerDialect: row.looker_dialect, + source: row.source, + })); + } + + async upsertConnectionMapping(input: UpsertLocalLookerConnectionMappingInput): Promise { + this.db + .prepare( + ` + INSERT INTO local_looker_connection_mappings ( + looker_connection_id, + looker_connection_name, + klo_connection_id, + looker_host, + looker_database, + looker_dialect, + source, + updated_at + ) + VALUES (?, ?, ?, NULL, NULL, NULL, ?, ?) + ON CONFLICT(looker_connection_id, looker_connection_name) DO UPDATE SET + klo_connection_id = excluded.klo_connection_id, + source = excluded.source, + updated_at = excluded.updated_at + `, + ) + .run(input.lookerConnectionId, input.lookerConnectionName, input.kloConnectionId, input.source, this.now().toISOString()); + } + + async refreshDiscoveredConnections(input: RefreshLocalLookerDiscoveredConnectionsInput): Promise { + const timestamp = this.now().toISOString(); + const update = this.db.transaction(() => { + const upsert = this.db.prepare(` + INSERT INTO local_looker_connection_mappings ( + looker_connection_id, + looker_connection_name, + klo_connection_id, + looker_host, + looker_database, + looker_dialect, + source, + updated_at + ) + VALUES (?, ?, NULL, ?, ?, ?, 'refresh', ?) + ON CONFLICT(looker_connection_id, looker_connection_name) DO UPDATE SET + looker_host = excluded.looker_host, + looker_database = excluded.looker_database, + looker_dialect = excluded.looker_dialect, + source = excluded.source, + updated_at = excluded.updated_at + `); + for (const connection of input.discovered) { + upsert.run( + input.lookerConnectionId, + connection.name, + connection.host, + connection.database, + connection.dialect, + timestamp, + ); + } + }); + update(); + } + + async clearConnectionMappings(input: ClearLocalLookerMappingsInput): Promise { + if (input.lookerConnectionName) { + this.db + .prepare( + ` + DELETE FROM local_looker_connection_mappings + WHERE looker_connection_id = ? AND looker_connection_name = ? + `, + ) + .run(input.lookerConnectionId, input.lookerConnectionName); + return; + } + this.db.prepare('DELETE FROM local_looker_connection_mappings WHERE looker_connection_id = ?').run(input.lookerConnectionId); + } +} diff --git a/packages/context/src/ingest/adapters/looker/looker.adapter.test.ts b/packages/context/src/ingest/adapters/looker/looker.adapter.test.ts new file mode 100644 index 00000000..64a35622 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/looker.adapter.test.ts @@ -0,0 +1,125 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { LookerRuntimeClient } from './fetch.js'; +import { LookerSourceAdapter } from './looker.adapter.js'; + +const connectionId = '11111111-1111-4111-8111-111111111111'; + +function makeClient(): LookerRuntimeClient { + return { + listDashboards: vi.fn().mockResolvedValue([]), + getDashboard: vi.fn(), + listLooks: vi.fn().mockResolvedValue([]), + getLook: vi.fn(), + listFolders: vi.fn().mockResolvedValue({ folders: [] }), + listUsers: vi.fn().mockResolvedValue([]), + listGroups: vi.fn().mockResolvedValue([]), + listLookmlModels: vi.fn().mockResolvedValue({ + models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }], + }), + getExplore: vi.fn().mockResolvedValue({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + fields: { dimensions: [], measures: [] }, + joins: [], + }), + }; +} + +describe('LookerSourceAdapter', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-adapter-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('exposes source="looker" and skillNames=["looker_ingest"]', () => { + const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } }); + expect(adapter.source).toBe('looker'); + expect(adapter.skillNames).toEqual(['looker_ingest']); + }); + + it('enables context evidence indexing and delegates triage signals', async () => { + const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } }); + + expect(adapter.evidenceIndexing).toBe('documents'); + expect(adapter.triageSupported).toBe(true); + await expect(adapter.getTriageSignals?.(stagedDir, 'looker:dashboard:10')).resolves.toMatchObject({ + objectType: 'looker_dashboard', + }); + }); + + it('fetches, detects, and chunks a runtime bundle through the composed adapter', async () => { + const adapter = new LookerSourceAdapter({ + clientFactory: { createClient: vi.fn().mockResolvedValue(makeClient()) }, + now: () => new Date('2026-04-30T12:30:00.000Z'), + }); + + await mkdir(stagedDir, { recursive: true }); + await adapter.fetch({ lookerConnectionId: connectionId }, stagedDir, { connectionId, sourceKey: 'looker' }); + + expect(await adapter.detect(stagedDir)).toBe(true); + expect(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8')).toContain('sales_pipeline'); + + const result = await adapter.chunk(stagedDir); + expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['looker-explore-b2b-sales_pipeline']); + }); + + it('passes pull success notifications to the server callback', async () => { + const onPullSucceeded = vi.fn().mockResolvedValue(undefined); + const adapter = new LookerSourceAdapter({ + clientFactory: { createClient: () => makeClient() }, + onPullSucceeded, + }); + const completedAt = new Date('2026-04-30T12:00:00.000Z'); + + await adapter.onPullSucceeded({ + connectionId, + sourceKey: 'looker', + syncId: 'sync-1', + trigger: 'scheduled_pull', + completedAt, + stagedDir: '/tmp/staged', + }); + + expect(onPullSucceeded).toHaveBeenCalledWith({ + connectionId, + sourceKey: 'looker', + syncId: 'sync-1', + trigger: 'scheduled_pull', + completedAt, + stagedDir: '/tmp/staged', + }); + }); + + it('describes incremental fetch scope from the staged scope file', async () => { + await mkdir(join(stagedDir, 'dashboards'), { recursive: true }); + await writeFile( + join(stagedDir, 'looker-scope.json'), + JSON.stringify( + { + mode: 'incremental', + knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json'], + fetchedRawPaths: ['dashboards/11.json'], + }, + null, + 2, + ), + ); + const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } }); + + const scope = await adapter.describeScope(stagedDir); + + expect(scope.isPathInScope('dashboards/10.json')).toBe(false); + expect(scope.isPathInScope('dashboards/11.json')).toBe(true); + expect(scope.isPathInScope('dashboards/12.json')).toBe(true); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/looker.adapter.ts b/packages/context/src/ingest/adapters/looker/looker.adapter.ts new file mode 100644 index 00000000..54b6d991 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/looker.adapter.ts @@ -0,0 +1,70 @@ +import type { ChunkResult, DiffSet, FetchContext, IngestTrigger, ScopeDescriptor, SourceAdapter } from '../../types.js'; +import { chunkLookerStagedDir } from './chunk.js'; +import { detectLookerStagedDir } from './detect.js'; +import { getLookerTriageSignals } from './evidence-documents.js'; +import { fetchLookerRuntimeBundle, type LookerClientFactory } from './fetch.js'; +import { readLookerFetchReport } from './fetch-report.js'; +import { describeLookerScope } from './scope.js'; +import { listLookerTargetConnectionIds } from './target-connections.js'; + +interface LookerPullSucceededContext { + connectionId: string; + sourceKey: string; + syncId: string; + trigger: IngestTrigger; + completedAt: Date; + stagedDir: string; +} + +export interface LookerSourceAdapterDeps { + clientFactory: LookerClientFactory; + now?: () => Date; + onPullSucceeded?: (ctx: LookerPullSucceededContext) => Promise; +} + +export class LookerSourceAdapter implements SourceAdapter { + readonly source = 'looker'; + readonly skillNames: string[] = ['looker_ingest']; + readonly evidenceIndexing = 'documents' as const; + readonly triageSupported = true; + + constructor(private readonly deps: LookerSourceAdapterDeps) {} + + detect(stagedDir: string): Promise { + return detectLookerStagedDir(stagedDir); + } + + fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + return fetchLookerRuntimeBundle({ + pullConfig, + stagedDir, + ctx, + clientFactory: this.deps.clientFactory, + now: this.deps.now, + }); + } + + chunk(stagedDir: string, diffSet?: DiffSet): Promise { + return chunkLookerStagedDir(stagedDir, diffSet); + } + + readFetchReport(stagedDir: string) { + return readLookerFetchReport(stagedDir); + } + + listTargetConnectionIds(stagedDir: string): Promise { + return listLookerTargetConnectionIds(stagedDir); + } + + getTriageSignals(stagedDir: string, externalId: string) { + return getLookerTriageSignals(stagedDir, externalId); + } + + describeScope(stagedDir: string): Promise { + return describeLookerScope(stagedDir); + } + + async onPullSucceeded(ctx: LookerPullSucceededContext): Promise { + await this.deps.onPullSucceeded?.(ctx); + } +} diff --git a/packages/context/src/ingest/adapters/looker/mapping.test.ts b/packages/context/src/ingest/adapters/looker/mapping.test.ts new file mode 100644 index 00000000..e86f65d0 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/mapping.test.ts @@ -0,0 +1,384 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { StagedExploreFile, StagedLookmlModelsFile } from './types.js'; +import { + buildLookerPullConfigFromInputs, + collectExploreParseItems, + computeLookerMappingDrift, + discoverLookerConnections, + lookerDialectToConnectionType, + projectParsedIdentifier, + refreshLookerMappingPlaceholders, + sqlglotDialectForConnectionType, + suggestKloConnectionForLookerConnection, + validateLookerMappings, + validateLookerWarehouseTarget, +} from './mapping.js'; + +const liveConnections = [ + { + name: 'b2b_sandbox_bq', + host: 'warehouse.example.com', + database: 'analytics', + schema: null, + dialect: 'bigquery_standard_sql', + }, + { + name: 'pg_runtime', + host: 'pg.internal:5432', + database: 'app', + schema: 'public', + dialect: 'postgres', + }, +]; + +const mappedExplore: StagedExploreFile = { + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + rawSqlTableName: 'proj.analytics.opportunities AS opportunities', + connectionName: 'b2b_sandbox_bq', + viewName: 'opportunities', + fields: { dimensions: [], measures: [] }, + joins: [ + { + name: 'accounts', + type: 'left_outer', + relationship: 'many_to_one', + rawSqlTableName: 'proj.analytics.accounts', + sqlOn: null, + from: null, + targetTable: null, + }, + ], + targetWarehouseConnectionId: null, + targetTable: null, +}; + +const models: StagedLookmlModelsFile = { + models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }], +}; + +describe('discoverLookerConnections', () => { + it('delegates to the runtime client connection discovery method', async () => { + const client = { listLookerConnections: vi.fn().mockResolvedValue(liveConnections) }; + + await expect(discoverLookerConnections(client)).resolves.toEqual(liveConnections); + expect(client.listLookerConnections).toHaveBeenCalledTimes(1); + }); +}); + +describe('looker dialect and target validation helpers', () => { + it('maps Looker dialect names to KLO connection types', () => { + expect(lookerDialectToConnectionType('bigquery_standard_sql')).toBe('BIGQUERY'); + expect(lookerDialectToConnectionType('postgres')).toBe('POSTGRESQL'); + expect(lookerDialectToConnectionType('mssql')).toBe('SQLSERVER'); + expect(lookerDialectToConnectionType('unknown')).toBeNull(); + }); + + it('maps supported warehouse connection types to sqlglot dialects', () => { + expect(sqlglotDialectForConnectionType('BIGQUERY')).toBe('bigquery'); + expect(sqlglotDialectForConnectionType('POSTGRESQL')).toBe('postgres'); + expect(sqlglotDialectForConnectionType('LOOKER')).toBeNull(); + }); + + it('returns a structured failure for unsupported Looker warehouse targets', () => { + expect(validateLookerWarehouseTarget('LOOKER')).toEqual({ + ok: false, + reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target', + }); + }); +}); + +describe('suggestKloConnectionForLookerConnection', () => { + it('returns the single deterministic target with matching type, host, and database', () => { + expect( + suggestKloConnectionForLookerConnection({ + lookerConnection: liveConnections[1], + candidateConnections: [ + { + id: 'wrong-type', + connection_type: 'MYSQL', + connection_params: { host: 'pg.internal', database: 'app' }, + }, + { + id: 'pg-target', + connection_type: 'POSTGRESQL', + connection_params: { host: 'PG.INTERNAL', database: 'APP' }, + }, + ], + }), + ).toBe('pg-target'); + }); + + it('returns null when more than one target matches', () => { + expect( + suggestKloConnectionForLookerConnection({ + lookerConnection: liveConnections[1], + candidateConnections: [ + { + id: 'first', + connection_type: 'POSTGRESQL', + connection_params: { host: 'pg.internal', database: 'app' }, + }, + { + id: 'second', + connection_type: 'POSTGRESQL', + connection_params: { host: 'pg.internal:5432', database: 'APP' }, + }, + ], + }), + ).toBeNull(); + }); +}); + +describe('refreshLookerMappingPlaceholders', () => { + it('adds newly discovered placeholders and refreshes live metadata without dropping saved targets', () => { + expect( + refreshLookerMappingPlaceholders({ + stored: [ + { + lookerConnectionName: 'b2b_sandbox_bq', + kloConnectionId: 'warehouse', + lookerHost: null, + lookerDatabase: null, + lookerDialect: null, + }, + ], + live: liveConnections, + }), + ).toEqual({ + changed: true, + mappings: [ + { + lookerConnectionName: 'b2b_sandbox_bq', + kloConnectionId: 'warehouse', + lookerHost: 'warehouse.example.com', + lookerDatabase: 'analytics', + lookerDialect: 'bigquery_standard_sql', + }, + { + lookerConnectionName: 'pg_runtime', + kloConnectionId: null, + lookerHost: 'pg.internal:5432', + lookerDatabase: 'app', + lookerDialect: 'postgres', + }, + ], + }); + }); +}); + +describe('computeLookerMappingDrift and validateLookerMappings', () => { + it('reports unmapped live connections, stale stored mappings, and in-sync mappings', () => { + expect( + computeLookerMappingDrift({ + storedMappings: [ + { + lookerConnectionName: 'b2b_sandbox_bq', + kloConnectionId: 'warehouse', + lookerHost: null, + lookerDatabase: null, + lookerDialect: null, + }, + { + lookerConnectionName: 'stale_runtime', + kloConnectionId: 'warehouse', + lookerHost: null, + lookerDatabase: null, + lookerDialect: null, + }, + ], + discovered: liveConnections, + }), + ).toEqual({ + unmappedDiscovered: [liveConnections[1]], + staleMappings: [{ lookerConnectionName: 'stale_runtime', reason: 'looker_connection_not_found' }], + inSync: [{ lookerConnectionName: 'b2b_sandbox_bq', kloConnectionId: 'warehouse' }], + }); + }); + + it('validates missing and unsupported target connection ids', () => { + expect( + validateLookerMappings({ + mappings: [ + { + lookerConnectionName: 'b2b_sandbox_bq', + kloConnectionId: 'missing', + lookerHost: null, + lookerDatabase: null, + lookerDialect: null, + }, + { + lookerConnectionName: 'pg_runtime', + kloConnectionId: 'looker-target', + lookerHost: null, + lookerDatabase: null, + lookerDialect: null, + }, + ], + knownKloConnectionIds: new Set(['looker-target']), + knownConnectionTypes: new Map([['looker-target', 'LOOKER']]), + }), + ).toEqual({ + ok: false, + errors: [ + { key: 'b2b_sandbox_bq', reason: 'KLO connection missing does not exist' }, + { + key: 'pg_runtime', + reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target', + }, + ], + }); + }); +}); + +describe('collectExploreParseItems and projectParsedIdentifier', () => { + it('collects base explore and join parser inputs for mapped explores', () => { + expect( + collectExploreParseItems({ + explore: mappedExplore, + connectionMappings: { b2b_sandbox_bq: 'warehouse' }, + targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]), + }), + ).toEqual({ + parsedTargetTables: {}, + parseItems: [ + { + key: 'b2b.sales_pipeline', + sql_table_name: 'proj.analytics.opportunities AS opportunities', + dialect: 'bigquery', + }, + { + key: 'b2b.sales_pipeline.accounts', + sql_table_name: 'proj.analytics.accounts', + dialect: 'bigquery', + }, + ], + }); + }); + + it('projects successful and failed parser rows into KLO parsed target tables', () => { + expect( + projectParsedIdentifier({ + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'accounts', + canonical_table: 'proj.analytics.accounts', + }), + ).toEqual({ + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'accounts', + canonicalTable: 'proj.analytics.accounts', + }); + + expect(projectParsedIdentifier({ ok: false, reason: 'derived_table_not_supported' })).toEqual({ + ok: false, + reason: 'derived_table_not_supported', + }); + }); +}); + +describe('buildLookerPullConfigFromInputs', () => { + it('builds the hosted-equivalent Looker pull config from caller-loaded inputs', async () => { + const parser = { + parse: vi.fn().mockResolvedValue({ + 'b2b.sales_pipeline': { + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'opportunities', + canonical_table: 'proj.analytics.opportunities', + }, + 'b2b.sales_pipeline.accounts': { + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'accounts', + canonical_table: 'proj.analytics.accounts', + }, + }), + }; + const client = { + listLookmlModels: vi.fn().mockResolvedValue(models), + getExplore: vi.fn().mockResolvedValue(mappedExplore), + }; + + await expect( + buildLookerPullConfigFromInputs({ + lookerConnectionId: 'prod-looker', + cursors: { + dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z', + looksLastSyncedAt: null, + }, + refreshedMappings: [ + { + lookerConnectionName: 'b2b_sandbox_bq', + kloConnectionId: 'warehouse', + lookerHost: 'warehouse.example.com', + lookerDatabase: 'analytics', + lookerDialect: 'bigquery_standard_sql', + }, + ], + targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]), + client, + parser, + }), + ).resolves.toEqual({ + lookerConnectionId: 'prod-looker', + dashboardUpdatedSince: '2026-05-01T00:00:00.000Z', + lookUpdatedSince: null, + connectionMappings: { b2b_sandbox_bq: 'warehouse' }, + connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' }, + parsedTargetTables: { + 'b2b.sales_pipeline': { + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'opportunities', + canonicalTable: 'proj.analytics.opportunities', + }, + 'b2b.sales_pipeline.accounts': { + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'accounts', + canonicalTable: 'proj.analytics.accounts', + }, + }, + }); + }); + + it('marks parser failures as parse_error without blocking pull-config construction', async () => { + const parser = { parse: vi.fn().mockRejectedValue(new Error('python unavailable')) }; + const client = { + listLookmlModels: vi.fn().mockResolvedValue(models), + getExplore: vi.fn().mockResolvedValue(mappedExplore), + }; + + const config = await buildLookerPullConfigFromInputs({ + lookerConnectionId: 'prod-looker', + cursors: { dashboardsLastSyncedAt: null, looksLastSyncedAt: null }, + refreshedMappings: [ + { + lookerConnectionName: 'b2b_sandbox_bq', + kloConnectionId: 'warehouse', + lookerHost: null, + lookerDatabase: null, + lookerDialect: null, + }, + ], + targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]), + client, + parser, + }); + + expect(config.parsedTargetTables).toMatchObject({ + 'b2b.sales_pipeline': { ok: false, reason: 'parse_error' }, + 'b2b.sales_pipeline.accounts': { ok: false, reason: 'parse_error' }, + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/mapping.ts b/packages/context/src/ingest/adapters/looker/mapping.ts new file mode 100644 index 00000000..d7d1227b --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/mapping.ts @@ -0,0 +1,442 @@ +import type { LookerWarehouseConnectionInfo } from './client.js'; +import type { + LookerPullConfig, + LookerRuntimeCursors, + ParsedTargetTable, + StagedExploreFile, + StagedLookmlModelsFile, +} from './types.js'; + +export const LOOKER_DIALECT_TO_CONNECTION_TYPE = { + bigquery: 'BIGQUERY', + bigquery_standard_sql: 'BIGQUERY', + snowflake: 'SNOWFLAKE', + postgres: 'POSTGRESQL', + postgresql: 'POSTGRESQL', + mysql: 'MYSQL', + sqlite: 'SQLITE', + sqlserver: 'SQLSERVER', + mssql: 'SQLSERVER', + tsql: 'SQLSERVER', + clickhouse: 'CLICKHOUSE', +} as const; + +export type LookerWarehouseTargetConnectionType = + (typeof LOOKER_DIALECT_TO_CONNECTION_TYPE)[keyof typeof LOOKER_DIALECT_TO_CONNECTION_TYPE]; + +export interface LookerConnectionMapping { + lookerConnectionName: string; + kloConnectionId: string | null; + lookerHost: string | null; + lookerDatabase: string | null; + lookerDialect: string | null; +} + +export interface LookerTargetConnection { + id: string; + connection_type: string; + connection_params?: Record | null; +} + +export interface LookerMappingCandidateConnection extends LookerTargetConnection {} + +export interface LookerMappingDrift { + unmappedDiscovered: LookerWarehouseConnectionInfo[]; + staleMappings: Array<{ lookerConnectionName: string; reason: 'looker_connection_not_found' }>; + inSync: Array<{ lookerConnectionName: string; kloConnectionId: string }>; +} + +export type LookerMappingValidationResult = + | { ok: true } + | { ok: false; errors: Array<{ key: string; reason: string }> }; + +export interface LookerTableIdentifierParseItem { + key: string; + sql_table_name: string; + dialect: string; +} + +type ParsedTargetTableFailureReason = Extract['reason']; + +export interface LookerParsedIdentifier { + ok: boolean; + catalog?: string | null; + schema?: string | null; + name?: string | null; + canonical_table?: string | null; + reason?: ParsedTargetTableFailureReason | null; + detail?: string | null; +} + +export interface LookerTableIdentifierParser { + parse(items: LookerTableIdentifierParseItem[]): Promise>; +} + +export interface LookerMappingClient { + listLookerConnections(): Promise; + listLookmlModels(): Promise; + getExplore(modelName: string, exploreName: string): Promise; +} + +const SQLGLOT_DIALECT_BY_CONNECTION_TYPE: Partial> = { + BIGQUERY: 'bigquery', + SNOWFLAKE: 'snowflake', + POSTGRESQL: 'postgres', + MYSQL: 'mysql', + SQLITE: 'sqlite', + SQLSERVER: 'tsql', + CLICKHOUSE: 'clickhouse', +}; + +export async function discoverLookerConnections( + client: Pick, +): Promise { + return client.listLookerConnections(); +} + +export function lookerDialectToConnectionType(dialect: string | null): LookerWarehouseTargetConnectionType | null { + if (!dialect) { + return null; + } + return ( + LOOKER_DIALECT_TO_CONNECTION_TYPE[dialect.toLowerCase() as keyof typeof LOOKER_DIALECT_TO_CONNECTION_TYPE] ?? null + ); +} + +export function sqlglotDialectForConnectionType(connectionType: string): string | null { + return SQLGLOT_DIALECT_BY_CONNECTION_TYPE[connectionType as LookerWarehouseTargetConnectionType] ?? null; +} + +export function validateLookerWarehouseTarget(connectionType: string): { ok: true } | { ok: false; reason: string } { + return sqlglotDialectForConnectionType(connectionType) + ? { ok: true } + : { + ok: false, + reason: `Connection type ${connectionType} cannot be used as a Looker warehouse mapping target`, + }; +} + +export function extractWarehouseHost(params: unknown, connectionType: string): string | null { + const record = isRecord(params) ? params : {}; + switch (connectionType) { + case 'POSTGRESQL': + case 'SQLSERVER': + case 'MYSQL': + case 'CLICKHOUSE': + return readString(record, 'host'); + case 'SNOWFLAKE': + return readString(record, 'account'); + default: + return null; + } +} + +export function extractWarehouseDatabase(params: unknown, connectionType: string): string | null { + const record = isRecord(params) ? params : {}; + switch (connectionType) { + case 'POSTGRESQL': + case 'SQLSERVER': + case 'MYSQL': + case 'CLICKHOUSE': + case 'SNOWFLAKE': + return readString(record, 'database'); + case 'BIGQUERY': + return readString(record, 'dataset_id'); + default: + return null; + } +} + +export function normalizeHost(value: string | null): string | null { + return value ? value.toLowerCase().replace(/:\d+$/, '') : null; +} + +export function normalizeName(value: string | null): string | null { + return value ? value.toLowerCase() : null; +} + +export function suggestKloConnectionForLookerConnection(args: { + lookerConnection: LookerWarehouseConnectionInfo; + candidateConnections: LookerMappingCandidateConnection[]; +}): string | null { + const expectedType = lookerDialectToConnectionType(args.lookerConnection.dialect); + if (!expectedType || !args.lookerConnection.host || !args.lookerConnection.database || !args.lookerConnection.dialect) { + return null; + } + + const matches = args.candidateConnections.filter((connection) => { + if (connection.connection_type !== expectedType) { + return false; + } + return ( + normalizeHost(extractWarehouseHost(connection.connection_params, connection.connection_type)) === + normalizeHost(args.lookerConnection.host) && + normalizeName(extractWarehouseDatabase(connection.connection_params, connection.connection_type)) === + normalizeName(args.lookerConnection.database) + ); + }); + + return matches.length === 1 ? matches[0].id : null; +} + +export function computeLookerMappingDrift(args: { + storedMappings: LookerConnectionMapping[]; + discovered: LookerWarehouseConnectionInfo[]; +}): LookerMappingDrift { + const discoveredByName = new Map(args.discovered.map((connection) => [connection.name, connection])); + const storedByName = new Map(args.storedMappings.map((mapping) => [mapping.lookerConnectionName, mapping])); + + return { + unmappedDiscovered: args.discovered.filter((connection) => !storedByName.get(connection.name)?.kloConnectionId), + staleMappings: args.storedMappings + .filter((mapping) => !discoveredByName.has(mapping.lookerConnectionName)) + .map((mapping) => ({ + lookerConnectionName: mapping.lookerConnectionName, + reason: 'looker_connection_not_found' as const, + })), + inSync: args.storedMappings + .filter((mapping) => discoveredByName.has(mapping.lookerConnectionName) && mapping.kloConnectionId) + .map((mapping) => ({ + lookerConnectionName: mapping.lookerConnectionName, + kloConnectionId: mapping.kloConnectionId as string, + })), + }; +} + +export function validateLookerMappings(args: { + mappings: LookerConnectionMapping[]; + knownKloConnectionIds: Set; + knownConnectionTypes: ReadonlyMap; +}): LookerMappingValidationResult { + const errors: Array<{ key: string; reason: string }> = []; + for (const mapping of args.mappings) { + if (!mapping.kloConnectionId) { + continue; + } + if (!args.knownKloConnectionIds.has(mapping.kloConnectionId)) { + errors.push({ + key: mapping.lookerConnectionName, + reason: `KLO connection ${mapping.kloConnectionId} does not exist`, + }); + continue; + } + const connectionType = args.knownConnectionTypes.get(mapping.kloConnectionId); + const validation = validateLookerWarehouseTarget(connectionType ?? 'unknown'); + if (!validation.ok) { + errors.push({ key: mapping.lookerConnectionName, reason: validation.reason }); + } + } + return errors.length === 0 ? { ok: true } : { ok: false, errors }; +} + +export function refreshLookerMappingPlaceholders(args: { + stored: LookerConnectionMapping[]; + live: LookerWarehouseConnectionInfo[]; +}): { mappings: LookerConnectionMapping[]; changed: boolean } { + const byName = new Map(args.stored.map((mapping) => [mapping.lookerConnectionName, mapping])); + let changed = false; + + for (const live of args.live) { + const existing = byName.get(live.name); + if (!existing) { + byName.set(live.name, { + lookerConnectionName: live.name, + kloConnectionId: null, + lookerHost: live.host, + lookerDatabase: live.database, + lookerDialect: live.dialect, + }); + changed = true; + continue; + } + + const refreshed: LookerConnectionMapping = { + ...existing, + lookerHost: live.host, + lookerDatabase: live.database, + lookerDialect: live.dialect, + }; + if ( + refreshed.lookerHost !== existing.lookerHost || + refreshed.lookerDatabase !== existing.lookerDatabase || + refreshed.lookerDialect !== existing.lookerDialect + ) { + byName.set(live.name, refreshed); + changed = true; + } + } + + return { mappings: [...byName.values()], changed }; +} + +export function collectExploreParseItems(args: { + explore: StagedExploreFile; + connectionMappings: Record; + targetConnections: ReadonlyMap>; +}): { parsedTargetTables: Record; parseItems: LookerTableIdentifierParseItem[] } { + const parsedTargetTables: Record = {}; + const parseItems: LookerTableIdentifierParseItem[] = []; + const lookerConnectionName = args.explore.connectionName; + const targetConnectionId = lookerConnectionName ? args.connectionMappings[lookerConnectionName] : undefined; + + if (!lookerConnectionName || !targetConnectionId) { + return { parsedTargetTables, parseItems }; + } + + const targetConnection = args.targetConnections.get(targetConnectionId); + const dialect = targetConnection ? sqlglotDialectForConnectionType(targetConnection.connection_type) : null; + const key = `${args.explore.modelName}.${args.explore.exploreName}`; + + if (!dialect) { + parsedTargetTables[key] = { + ok: false, + reason: 'unsupported_dialect', + detail: `Connection type ${targetConnection?.connection_type ?? 'unknown'} does not map to a supported sqlglot dialect.`, + }; + return { parsedTargetTables, parseItems }; + } + + if (args.explore.rawSqlTableName) { + parseItems.push({ key, sql_table_name: args.explore.rawSqlTableName, dialect }); + } + + for (const join of args.explore.joins) { + if (!join.rawSqlTableName) { + continue; + } + parseItems.push({ + key: `${key}.${join.name}`, + sql_table_name: join.rawSqlTableName, + dialect, + }); + } + + return { parsedTargetTables, parseItems }; +} + +export function projectParsedIdentifier(row: LookerParsedIdentifier | undefined): ParsedTargetTable { + if (!row) { + return { ok: false, reason: 'parse_error', detail: 'Python parser response was missing this key.' }; + } + if (row.ok && row.name && row.canonical_table) { + return { + ok: true, + catalog: row.catalog ?? null, + schema: row.schema ?? null, + name: row.name, + canonicalTable: row.canonical_table, + }; + } + return { + ok: false, + reason: row.reason ?? 'parse_error', + detail: row.reason ? undefined : 'Python parser returned an invalid success row without name or canonical_table.', + }; +} + +export async function buildLookerPullConfigFromInputs(args: { + lookerConnectionId: string; + cursors: LookerRuntimeCursors; + refreshedMappings: LookerConnectionMapping[]; + targetConnections: ReadonlyMap>; + client: Pick; + parser: LookerTableIdentifierParser; +}): Promise { + const connectionMappings: Record = {}; + const connectionTypes: Record = {}; + + for (const mapping of args.refreshedMappings) { + if (!mapping.kloConnectionId) { + continue; + } + const target = args.targetConnections.get(mapping.kloConnectionId); + if (!target || !validateLookerWarehouseTarget(target.connection_type).ok) { + continue; + } + connectionMappings[mapping.lookerConnectionName] = mapping.kloConnectionId; + connectionTypes[mapping.lookerConnectionName] = target.connection_type as LookerWarehouseTargetConnectionType; + } + + const parsedTargetTables = await parseExploreTargets({ + client: args.client, + connectionMappings, + targetConnections: args.targetConnections, + parser: args.parser, + }); + + return { + lookerConnectionId: args.lookerConnectionId, + dashboardUpdatedSince: args.cursors.dashboardsLastSyncedAt, + lookUpdatedSince: args.cursors.looksLastSyncedAt, + connectionMappings, + connectionTypes, + parsedTargetTables, + }; +} + +async function parseExploreTargets(args: { + client: Pick; + connectionMappings: Record; + targetConnections: ReadonlyMap>; + parser: LookerTableIdentifierParser; +}): Promise> { + const parsedTargetTables: Record = {}; + const parseItems: LookerTableIdentifierParseItem[] = []; + + let models: StagedLookmlModelsFile; + try { + models = await args.client.listLookmlModels(); + } catch { + return parsedTargetTables; + } + + for (const model of models.models) { + for (const exploreRef of model.explores) { + let explore: StagedExploreFile; + try { + explore = await args.client.getExplore(model.name, exploreRef.name); + } catch { + continue; + } + const collected = collectExploreParseItems({ + explore, + connectionMappings: args.connectionMappings, + targetConnections: args.targetConnections, + }); + Object.assign(parsedTargetTables, collected.parsedTargetTables); + parseItems.push(...collected.parseItems); + } + } + + if (parseItems.length === 0) { + return parsedTargetTables; + } + + let results: Record; + try { + results = await args.parser.parse(parseItems); + } catch { + for (const item of parseItems) { + parsedTargetTables[item.key] = { + ok: false, + reason: 'parse_error', + detail: 'Python parse-table-identifier failed during Looker pull-config projection.', + }; + } + return parsedTargetTables; + } + + for (const item of parseItems) { + parsedTargetTables[item.key] = projectParsedIdentifier(results[item.key]); + } + return parsedTargetTables; +} + +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === 'object' && !Array.isArray(value); +} + +function readString(record: Record, key: string): string | null { + const value = record[key]; + return typeof value === 'string' ? value : null; +} diff --git a/packages/context/src/ingest/adapters/looker/reconcile.test.ts b/packages/context/src/ingest/adapters/looker/reconcile.test.ts new file mode 100644 index 00000000..68e2cda8 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/reconcile.test.ts @@ -0,0 +1,25 @@ +import { describe, expect, it } from 'vitest'; +import { buildLookerReconcileNotes, lookerRuntimeSourceToFileAdapterSource } from './reconcile.js'; + +describe('lookerRuntimeSourceToFileAdapterSource', () => { + it('maps API-derived Looker source names to file-adapter source names', () => { + expect(lookerRuntimeSourceToFileAdapterSource('looker__b2b__sales_pipeline')).toBe('b2b__sales_pipeline'); + expect(lookerRuntimeSourceToFileAdapterSource('looker__finance__orders')).toBe('finance__orders'); + }); + + it('ignores non-Looker and malformed source names', () => { + expect(lookerRuntimeSourceToFileAdapterSource('b2b__sales_pipeline')).toBeNull(); + expect(lookerRuntimeSourceToFileAdapterSource('looker__missing_explore')).toBeNull(); + }); +}); + +describe('buildLookerReconcileNotes', () => { + it('instructs reconciliation to record subsumed provenance', () => { + expect(buildLookerReconcileNotes()).toEqual([ + [ + 'Looker runtime API-derived SL sources use looker____.', + 'If the unprefixed file-adapter source __ exists, prefer it in wiki sl_refs, delete or avoid the API-derived source, and call emit_artifact_resolution with actionType="subsumed" for the API raw explore path.', + ].join(' '), + ]); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/reconcile.ts b/packages/context/src/ingest/adapters/looker/reconcile.ts new file mode 100644 index 00000000..fe5e74a5 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/reconcile.ts @@ -0,0 +1,21 @@ +export function lookerRuntimeSourceToFileAdapterSource(sourceName: string): string | null { + if (!sourceName.startsWith('looker__')) { + return null; + } + const stripped = sourceName.slice('looker__'.length); + const parts = stripped.split('__'); + if (parts.length < 2 || parts.some((part) => part.length === 0)) { + return null; + } + const [model, ...exploreParts] = parts; + return `${model}__${exploreParts.join('__')}`; +} + +export function buildLookerReconcileNotes(): string[] { + return [ + [ + 'Looker runtime API-derived SL sources use looker____.', + 'If the unprefixed file-adapter source __ exists, prefer it in wiki sl_refs, delete or avoid the API-derived source, and call emit_artifact_resolution with actionType="subsumed" for the API raw explore path.', + ].join(' '), + ]; +} diff --git a/packages/context/src/ingest/adapters/looker/scope.test.ts b/packages/context/src/ingest/adapters/looker/scope.test.ts new file mode 100644 index 00000000..d7c2c56e --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/scope.test.ts @@ -0,0 +1,101 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { describeLookerScope, hashLookerScope, isPathInLookerScope } from './scope.js'; + +async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise { + const abs = join(stagedDir, relPath); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +describe('Looker runtime fetch scope', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-scope-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('keeps omitted known-current entity files out of the deletion baseline', () => { + const scope = { + mode: 'incremental' as const, + knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json'], + fetchedRawPaths: ['dashboards/11.json'], + }; + + expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(false); + expect(isPathInLookerScope('looks/20.json', scope)).toBe(false); + expect(isPathInLookerScope('dashboards/11.json', scope)).toBe(true); + expect(isPathInLookerScope('looks/21.json', scope)).toBe(true); + expect(isPathInLookerScope('signals/dashboard_usage.json', scope)).toBe(true); + expect(isPathInLookerScope('explores/b2b/sales_pipeline.json', scope)).toBe(true); + }); + + it('keeps omitted unchanged evidence documents out of incremental delete scope', () => { + const scope = { + mode: 'incremental' as const, + knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'], + fetchedRawPaths: ['dashboards/10.json'], + }; + + expect(isPathInLookerScope('evidence/dashboards/10/page.md', scope)).toBe(true); + expect(isPathInLookerScope('evidence/dashboards/10/metadata.json', scope)).toBe(true); + expect(isPathInLookerScope('evidence/looks/20/page.md', scope)).toBe(false); + expect(isPathInLookerScope('evidence/looks/20/metadata.json', scope)).toBe(false); + }); + + it('treats full scope as all raw paths in scope', () => { + const scope = { + mode: 'full' as const, + knownCurrentRawPaths: ['dashboards/10.json'], + fetchedRawPaths: ['dashboards/10.json'], + }; + + expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(true); + expect(isPathInLookerScope('dashboards/99.json', scope)).toBe(true); + expect(isPathInLookerScope('looks/20.json', scope)).toBe(true); + }); + + it('hashes scope order-insensitively', () => { + const a = hashLookerScope({ + mode: 'incremental', + knownCurrentRawPaths: ['looks/20.json', 'dashboards/10.json'], + fetchedRawPaths: ['dashboards/10.json'], + }); + const b = hashLookerScope({ + mode: 'incremental', + knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'], + fetchedRawPaths: ['dashboards/10.json'], + }); + + expect(a).toBe(b); + expect(a).toMatch(/^[0-9a-f]{64}$/); + }); + + it('reads staged scope and returns a SourceAdapter ScopeDescriptor', async () => { + await writeJson(stagedDir, 'looker-scope.json', { + mode: 'incremental', + knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'], + fetchedRawPaths: ['dashboards/10.json'], + }); + + const descriptor = await describeLookerScope(stagedDir); + + expect(descriptor.fingerprint).toMatch(/^[0-9a-f]{64}$/); + expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true); + expect(descriptor.isPathInScope('looks/20.json')).toBe(false); + expect(descriptor.isPathInScope('looks/99.json')).toBe(true); + }); + + it('falls back to full scope when old fixtures do not have a scope file', async () => { + const descriptor = await describeLookerScope(stagedDir); + + expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true); + expect(descriptor.isPathInScope('looks/20.json')).toBe(true); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/scope.ts b/packages/context/src/ingest/adapters/looker/scope.ts new file mode 100644 index 00000000..feabde72 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/scope.ts @@ -0,0 +1,63 @@ +import { createHash } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { ScopeDescriptor } from '../../types.js'; +import { STAGED_FILES, type StagedLookerScopeFile, stagedLookerScopeFileSchema } from './types.js'; + +const LOOKER_ENTITY_PATH_RE = /^(dashboards|looks)\/[^/]+\.json$/; +const LOOKER_EVIDENCE_ENTITY_PATH_RE = /^evidence\/(dashboards|looks)\/([^/]+)\/(?:metadata\.json|page\.md)$/; + +export async function describeLookerScope(stagedDir: string): Promise { + const scope = await readLookerScope(stagedDir); + return { + fingerprint: hashLookerScope(scope), + isPathInScope: (rawPath) => isPathInLookerScope(rawPath, scope), + }; +} + +export async function readLookerScope(stagedDir: string): Promise { + try { + const body = await readFile(join(stagedDir, STAGED_FILES.scope), 'utf-8'); + return stagedLookerScopeFileSchema.parse(JSON.parse(body)); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return { mode: 'full', knownCurrentRawPaths: [], fetchedRawPaths: [] }; + } + throw error; + } +} + +export function hashLookerScope(scope: StagedLookerScopeFile): string { + const canonical = JSON.stringify({ + mode: scope.mode, + knownCurrentRawPaths: [...scope.knownCurrentRawPaths].sort(), + fetchedRawPaths: [...scope.fetchedRawPaths].sort(), + }); + return createHash('sha256').update(canonical).digest('hex'); +} + +export function isPathInLookerScope(rawPath: string, scope: StagedLookerScopeFile): boolean { + if (scope.mode === 'full') { + return true; + } + + const entityRawPath = scopedEntityRawPath(rawPath); + if (!entityRawPath) { + return true; + } + + const knownCurrent = new Set(scope.knownCurrentRawPaths); + const fetched = new Set(scope.fetchedRawPaths); + return fetched.has(entityRawPath) || !knownCurrent.has(entityRawPath); +} + +function scopedEntityRawPath(rawPath: string): string | null { + if (LOOKER_ENTITY_PATH_RE.test(rawPath)) { + return rawPath; + } + const evidence = LOOKER_EVIDENCE_ENTITY_PATH_RE.exec(rawPath); + if (evidence) { + return `${evidence[1]}/${evidence[2]}.json`; + } + return null; +} diff --git a/packages/context/src/ingest/adapters/looker/target-connections.test.ts b/packages/context/src/ingest/adapters/looker/target-connections.test.ts new file mode 100644 index 00000000..10b2d892 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/target-connections.test.ts @@ -0,0 +1,86 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { listLookerTargetConnectionIds } from './target-connections.js'; + +describe('listLookerTargetConnectionIds', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'looker-targets-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('collects unique target warehouse IDs from explores, dashboard queries, and Look queries', async () => { + await mkdir(join(stagedDir, 'explores', 'b2b'), { recursive: true }); + await mkdir(join(stagedDir, 'dashboards'), { recursive: true }); + await mkdir(join(stagedDir, 'looks'), { recursive: true }); + + await writeFile( + join(stagedDir, 'explores', 'b2b', 'sales_pipeline.json'), + JSON.stringify({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: null, + description: null, + fields: { dimensions: [], measures: [] }, + joins: [], + targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222', + }), + ); + await writeFile( + join(stagedDir, 'dashboards', '1.json'), + JSON.stringify({ + lookerId: '1', + title: 'Pipeline', + description: null, + folderId: null, + ownerId: null, + updatedAt: null, + tiles: [ + { + id: '11', + title: 'ARR', + lookId: null, + query: { + model: 'b2b', + view: 'sales_pipeline', + fields: [], + filters: {}, + sorts: [], + targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333', + }, + }, + ], + }), + ); + await writeFile( + join(stagedDir, 'looks', '2.json'), + JSON.stringify({ + lookerId: '2', + title: 'Customers', + description: null, + folderId: null, + ownerId: null, + updatedAt: null, + query: { + model: 'b2b', + view: 'sales_pipeline', + fields: [], + filters: {}, + sorts: [], + targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222', + }, + }), + ); + + await expect(listLookerTargetConnectionIds(stagedDir)).resolves.toEqual([ + '22222222-2222-4222-8222-222222222222', + '33333333-3333-4333-8333-333333333333', + ]); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/target-connections.ts b/packages/context/src/ingest/adapters/looker/target-connections.ts new file mode 100644 index 00000000..1b85aba5 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/target-connections.ts @@ -0,0 +1,41 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import { stagedDashboardFileSchema, stagedExploreFileSchema, stagedLookFileSchema } from './types.js'; + +async function walk(root: string): Promise { + const entries = await readdir(root, { withFileTypes: true, recursive: true }); + return entries + .filter((entry) => entry.isFile()) + .map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/')) + .sort(); +} + +function addTarget(targets: Set, value: string | null | undefined): void { + if (value) { + targets.add(value); + } +} + +export async function listLookerTargetConnectionIds(stagedDir: string): Promise { + const targets = new Set(); + for (const path of await walk(stagedDir)) { + const fullPath = join(stagedDir, path); + if (/^explores\/[^/]+\/[^/]+\.json$/.test(path)) { + const explore = stagedExploreFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8'))); + addTarget(targets, explore.targetWarehouseConnectionId); + continue; + } + if (/^dashboards\/[^/]+\.json$/.test(path)) { + const dashboard = stagedDashboardFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8'))); + for (const tile of dashboard.tiles) { + addTarget(targets, tile.query?.targetWarehouseConnectionId); + } + continue; + } + if (/^looks\/[^/]+\.json$/.test(path)) { + const look = stagedLookFileSchema.parse(JSON.parse(await readFile(fullPath, 'utf-8'))); + addTarget(targets, look.query?.targetWarehouseConnectionId); + } + } + return [...targets].sort(); +} diff --git a/packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.test.ts b/packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.test.ts new file mode 100644 index 00000000..8df3d8af --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.test.ts @@ -0,0 +1,243 @@ +import { describe, expect, it } from 'vitest'; +import type { ToolOutput } from '../../../../tools/index.js'; +import { buildLookerSlProposal, createLookerQueryToSlTool, type LookerSlProposal } from './looker-query-to-sl.tool.js'; + +describe('buildLookerSlProposal', () => { + it('suggests a measure and segment for an aggregated filtered Looker query', () => { + const proposal = buildLookerSlProposal({ + contentTitle: 'Open Pipeline ARR', + contentType: 'look', + usage: { queryCount30d: 42, uniqueUsers30d: 7 }, + query: { + model: 'b2b', + view: 'sales_pipeline', + fields: ['opportunities.arr', 'opportunities.stage'], + filters: { 'opportunities.stage': 'open' }, + sorts: ['opportunities.arr desc'], + limit: '500', + }, + }); + + expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline'); + expect(proposal.triageLane).toBe('full'); + expect(proposal.decision).toBe('measure_added'); + expect(proposal.measures).toEqual([ + { + name: 'arr', + lookerField: 'opportunities.arr', + expr: 'sum(opportunities.arr)', + description: 'Suggested from Looker look "Open Pipeline ARR"; verify against explore field SQL before writing.', + }, + ]); + expect(proposal.dimensions).toEqual([{ name: 'stage', lookerField: 'opportunities.stage' }]); + expect(proposal.segments).toEqual([ + { + name: 'open_pipeline_arr', + filters: { 'opportunities.stage': 'open' }, + suggestedPredicate: "opportunities.stage = 'open'", + description: 'Reusable filter candidate from Looker look "Open Pipeline ARR".', + }, + ]); + expect(proposal.notes).toContain( + 'Usage signals can raise priority, but query counts, users, owners, and folders must not be written as wiki narrative.', + ); + }); + + it('keeps simple saved views as wiki-only candidates', () => { + const proposal = buildLookerSlProposal({ + contentTitle: 'Accounts By Region', + query: { + model: 'b2b', + view: 'accounts', + fields: ['accounts.region', 'accounts.segment'], + filters: {}, + }, + }); + + expect(proposal.sourceName).toBe('looker__b2b__accounts'); + expect(proposal.triageLane).toBe('light'); + expect(proposal.decision).toBe('wiki_only'); + expect(proposal.measures).toEqual([]); + expect(proposal.dimensions).toEqual([ + { name: 'region', lookerField: 'accounts.region' }, + { name: 'segment', lookerField: 'accounts.segment' }, + ]); + expect(proposal.segments).toEqual([]); + }); + + it('promotes high-usage filter-only queries as derived-source candidates', () => { + const proposal = buildLookerSlProposal({ + contentTitle: 'Active Customers', + usage: { queryCount30d: 15, uniqueUsers30d: 4 }, + query: { + model: 'b2b', + view: 'customers', + fields: ['customers.id', 'customers.name'], + filters: { 'customers.status': 'active', 'customers.is_test': '-yes' }, + }, + }); + + expect(proposal.sourceName).toBe('looker__b2b__customers'); + expect(proposal.decision).toBe('source_created'); + expect(proposal.segments).toEqual([ + { + name: 'active_customers', + filters: { 'customers.status': 'active', 'customers.is_test': '-yes' }, + suggestedPredicate: "customers.status = 'active' AND customers.is_test != 'yes'", + description: 'Reusable filter candidate from Looker look "Active Customers".', + }, + ]); + }); + + it('surfaces mapped warehouse target metadata for direct SL writes', () => { + const proposal = buildLookerSlProposal({ + contentTitle: 'Open Pipeline ARR', + contentType: 'dashboard_tile', + usage: { queryCount30d: 42, uniqueUsers30d: 7 }, + query: { + model: 'b2b', + view: 'sales_pipeline', + fields: ['opportunities.arr', 'opportunities.stage'], + filters: { 'opportunities.stage': 'open' }, + targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222', + targetTable: { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }, + }, + }); + + expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline'); + expect(proposal.targetStatus).toBe('mapped'); + expect(proposal.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222'); + expect(proposal.sourceTable).toBe('proj.dataset.opportunities'); + expect(proposal.canWriteStandaloneSource).toBe(true); + expect(proposal.targetTable).toEqual({ + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }); + expect(proposal.notes).toContain( + 'targetTable.ok is true: write or edit SL on targetWarehouseConnectionId using targetTable.canonicalTable as source.table.', + ); + }); + + it('surfaces unmapped and unparseable target reasons for wiki-only fallback', () => { + const unmapped = buildLookerSlProposal({ + contentTitle: 'Revenue Trend', + query: { + model: 'b2b', + view: 'revenue', + fields: ['revenue.arr'], + filters: {}, + targetWarehouseConnectionId: null, + targetTable: { + ok: false, + reason: 'no_connection_mapping', + }, + }, + }); + + expect(unmapped.targetStatus).toBe('unmapped'); + expect(unmapped.targetWarehouseConnectionId).toBeNull(); + expect(unmapped.sourceTable).toBeNull(); + expect(unmapped.canWriteStandaloneSource).toBe(false); + expect(unmapped.notes).toContain( + 'targetTable.ok is false (no_connection_mapping): keep this query wiki-only and pass the reason through emit_unmapped_fallback.', + ); + + const unparseable = buildLookerSlProposal({ + contentTitle: 'Templated Source', + query: { + model: 'b2b', + view: 'templated', + fields: ['templated.count'], + filters: {}, + targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222', + targetTable: { + ok: false, + reason: 'looker_template_unresolved', + detail: 'The sql_table_name contains ${derived.SQL_TABLE_NAME}.', + }, + }, + }); + + expect(unparseable.targetStatus).toBe('unparseable'); + expect(unparseable.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222'); + expect(unparseable.sourceTable).toBeNull(); + expect(unparseable.canWriteStandaloneSource).toBe(false); + expect(unparseable.notes).toContain( + 'targetTable.ok is false (looker_template_unresolved): keep this query wiki-only and pass the reason through emit_unmapped_fallback.', + ); + }); +}); + +describe('createLookerQueryToSlTool', () => { + it('returns markdown plus the structured proposal', async () => { + const lookerQueryToSl = createLookerQueryToSlTool(); + if (!lookerQueryToSl.execute) { + throw new Error('looker_query_to_sl tool must be executable'); + } + const output = (await lookerQueryToSl.execute( + { + contentTitle: 'Revenue Trend', + contentType: 'dashboard_tile', + query: { + model: 'finance', + view: 'orders', + fields: ['orders.total_revenue', 'orders.created_month'], + filters: { 'orders.status': 'paid' }, + sorts: [], + targetWarehouseConnectionId: null, + targetTable: null, + }, + }, + { toolCallId: 'call-1', messages: [] } as never, + )) as ToolOutput; + + expect(output.markdown).toContain('Looker query SL proposal'); + expect(output.markdown).toContain('looker__finance__orders'); + expect(output.structured.sourceName).toBe('looker__finance__orders'); + expect(output.structured.measures[0]?.name).toBe('total_revenue'); + }); + + it('prints target connection and canonical table in markdown output', async () => { + const lookerQueryToSl = createLookerQueryToSlTool(); + if (!lookerQueryToSl.execute) { + throw new Error('looker_query_to_sl tool must be executable'); + } + + const output = (await lookerQueryToSl.execute( + { + contentTitle: 'Revenue Trend', + contentType: 'dashboard_tile', + query: { + model: 'finance', + view: 'orders', + fields: ['orders.total_revenue', 'orders.created_month'], + filters: { 'orders.status': 'paid' }, + sorts: [], + targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333', + targetTable: { + ok: true, + catalog: 'proj', + schema: 'finance', + name: 'orders', + canonicalTable: 'proj.finance.orders', + }, + }, + }, + { toolCallId: 'call-1', messages: [] } as never, + )) as ToolOutput; + + expect(output.markdown).toContain('- targetStatus: mapped'); + expect(output.markdown).toContain('- targetWarehouseConnectionId: 33333333-3333-4333-8333-333333333333'); + expect(output.markdown).toContain('- sourceTable: proj.finance.orders'); + expect(output.structured.canWriteStandaloneSource).toBe(true); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.ts b/packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.ts new file mode 100644 index 00000000..67bb68c6 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/tools/looker-query-to-sl.tool.ts @@ -0,0 +1,305 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import type { ToolOutput } from '../../../../tools/index.js'; +import { type ParsedTargetTable, stagedLookerQuerySchema } from '../types.js'; + +const lookerUsageInputSchema = z.object({ + queryCount30d: z.number().int().nonnegative().default(0), + uniqueUsers30d: z.number().int().nonnegative().default(0), +}); + +export const lookerQueryToSlInputSchema = z.object({ + query: stagedLookerQuerySchema, + contentTitle: z.string().min(1).optional(), + contentType: z.enum(['look', 'dashboard_tile']).default('look'), + usage: lookerUsageInputSchema.optional(), +}); + +export type LookerQueryToSlInput = z.input; + +type LookerTargetStatus = 'mapped' | 'unmapped' | 'unparseable' | 'missing_target_table'; + +export interface LookerSlFieldProposal { + name: string; + lookerField: string; +} + +export interface LookerSlMeasureProposal extends LookerSlFieldProposal { + expr: string; + description: string; +} + +export interface LookerSlSegmentProposal { + name: string; + filters: Record; + suggestedPredicate: string; + description: string; +} + +export interface LookerSlProposal { + sourceName: string; + targetWarehouseConnectionId: string | null; + targetTable: ParsedTargetTable | null; + targetStatus: LookerTargetStatus; + sourceTable: string | null; + canWriteStandaloneSource: boolean; + triageLane: 'skip' | 'light' | 'full'; + decision: 'wiki_only' | 'measure_added' | 'source_created'; + dimensions: LookerSlFieldProposal[]; + measures: LookerSlMeasureProposal[]; + segments: LookerSlSegmentProposal[]; + notes: string[]; +} + +const MEASURE_FIELD_RE = + /\b(count|sum|total|revenue|arr|mrr|amount|avg|average|rate|ratio|percent|pct|margin|profit|value|score)\b/i; + +function targetStatus( + targetWarehouseConnectionId: string | null, + targetTable: ParsedTargetTable | null, +): LookerTargetStatus { + if (targetTable?.ok === true && targetWarehouseConnectionId) { + return 'mapped'; + } + if (targetTable?.ok === false && targetTable.reason === 'no_connection_mapping') { + return 'unmapped'; + } + if (targetTable?.ok === false) { + return 'unparseable'; + } + return 'missing_target_table'; +} + +function targetNotes(status: LookerTargetStatus, targetTable: ParsedTargetTable | null): string[] { + if (status === 'mapped') { + return [ + 'targetTable.ok is true: write or edit SL on targetWarehouseConnectionId using targetTable.canonicalTable as source.table.', + 'Use targetTable.catalog, targetTable.schema, and targetTable.name only for source_tables preflight matching.', + 'Never use rawSqlTableName as source.table; it may contain aliases, templates, or derived-table SQL.', + ]; + } + if (targetTable?.ok === false) { + return [ + `targetTable.ok is false (${targetTable.reason}): keep this query wiki-only and pass the reason through emit_unmapped_fallback.`, + ]; + } + return [ + 'No targetTable was staged for this query; read the parent explore dependency before attempting any SL write.', + ]; +} + +export function buildLookerSlProposal(raw: LookerQueryToSlInput): LookerSlProposal { + const input = lookerQueryToSlInputSchema.parse(raw); + const sourceName = `looker__${toSlName(input.query.model)}__${toSlName(input.query.view)}`; + const usage = input.usage; + const targetWarehouseConnectionId = input.query.targetWarehouseConnectionId ?? null; + const targetTable = input.query.targetTable ?? null; + const status = targetStatus(targetWarehouseConnectionId, targetTable); + const sourceTable = targetTable?.ok === true ? targetTable.canonicalTable : null; + const canWriteStandaloneSource = status === 'mapped'; + const triageLane = + usage && usage.queryCount30d === 0 && usage.uniqueUsers30d === 0 ? 'skip' : isHighUsage(usage) ? 'full' : 'light'; + const dimensions: LookerSlFieldProposal[] = []; + const measures: LookerSlMeasureProposal[] = []; + + for (const field of input.query.fields) { + const proposal = { name: toSlName(fieldLeaf(field)), lookerField: field }; + if (isMeasureLikeField(field)) { + measures.push({ + ...proposal, + expr: suggestedMeasureExpr(field), + description: `Suggested from Looker ${contentLabel(input)}; verify against explore field SQL before writing.`, + }); + } else { + dimensions.push(proposal); + } + } + + const filters = nonEmptyFilters(input.query.filters); + const segments = + Object.keys(filters).length === 0 + ? [] + : [ + { + name: toSlName(input.contentTitle ?? Object.keys(filters).map(fieldLeaf).join('_')), + filters, + suggestedPredicate: Object.entries(filters) + .map(([field, value]) => filterValueToPredicate(field, value)) + .join(' AND '), + description: `Reusable filter candidate from Looker ${contentLabel(input)}.`, + }, + ]; + + const decision = + measures.length > 0 ? 'measure_added' : segments.length > 0 && isHighUsage(usage) ? 'source_created' : 'wiki_only'; + + const notes = [ + ...targetNotes(status, targetTable), + 'Treat this as a proposal, not an instruction to write SL blindly.', + 'Verify field SQL, source shape, and existing SL overlap with sl_discover/sl_read_source before sl_write_source or sl_edit_source.', + 'Usage signals can raise priority, but query counts, users, owners, and folders must not be written as wiki narrative.', + ]; + if (triageLane === 'skip') { + notes.push('Zero recent usage is a skip signal unless the raw content clearly defines durable business semantics.'); + } + + return { + sourceName, + targetWarehouseConnectionId, + targetTable, + targetStatus: status, + sourceTable, + canWriteStandaloneSource, + triageLane, + decision, + dimensions, + measures, + segments, + notes, + }; +} + +export function createLookerQueryToSlTool() { + return tool({ + description: + 'Given one staged Looker query JSON, return a conservative proposal for SL measures, dimensions, reusable filters, and triage priority. The proposal is advisory; verify with SL tools before writing.', + inputSchema: lookerQueryToSlInputSchema, + execute: async (input): Promise> => { + const structured = buildLookerSlProposal(input); + return { + markdown: formatLookerSlProposal(structured), + structured, + }; + }, + toModelOutput: ({ output }) => { + const markdown = + output && typeof output === 'object' && 'markdown' in output + ? String((output as { markdown: unknown }).markdown) + : String(output); + return { type: 'content', value: [{ type: 'text', text: markdown }] }; + }, + }); +} + +export function formatLookerSlProposal(proposal: LookerSlProposal): string { + const lines = [ + '## Looker query SL proposal', + '', + `- sourceName: ${proposal.sourceName}`, + `- targetStatus: ${proposal.targetStatus}`, + `- targetWarehouseConnectionId: ${proposal.targetWarehouseConnectionId ?? '(none)'}`, + `- sourceTable: ${proposal.sourceTable ?? '(none)'}`, + `- canWriteStandaloneSource: ${proposal.canWriteStandaloneSource}`, + `- triageLane: ${proposal.triageLane}`, + `- decision: ${proposal.decision}`, + '', + '### Measures', + ...(proposal.measures.length === 0 + ? ['- (none)'] + : proposal.measures.map((measure) => `- ${measure.name}: ${measure.expr} (${measure.lookerField})`)), + '', + '### Dimensions', + ...(proposal.dimensions.length === 0 + ? ['- (none)'] + : proposal.dimensions.map((dimension) => `- ${dimension.name}: ${dimension.lookerField}`)), + '', + '### Segments', + ...(proposal.segments.length === 0 + ? ['- (none)'] + : proposal.segments.map((segment) => `- ${segment.name}: ${segment.suggestedPredicate}`)), + '', + '### Notes', + ...proposal.notes.map((note) => `- ${note}`), + ]; + return lines.join('\n'); +} + +function isHighUsage(usage: z.infer | undefined): boolean { + return !!usage && (usage.queryCount30d >= 10 || usage.uniqueUsers30d >= 3); +} + +function isMeasureLikeField(field: string): boolean { + return MEASURE_FIELD_RE.test(fieldLeaf(field).replace(/_/g, ' ')); +} + +function suggestedMeasureExpr(field: string): string { + const leaf = fieldLeaf(field); + if (/\b(count|count_distinct)\b/i.test(leaf.replace(/_/g, ' '))) { + return `count(${field})`; + } + if (/\b(avg|average|rate|ratio|percent|pct|margin|score)\b/i.test(leaf.replace(/_/g, ' '))) { + return `avg(${field})`; + } + return `sum(${field})`; +} + +function fieldLeaf(field: string): string { + const parts = field.split('.'); + return parts[parts.length - 1] || field; +} + +function nonEmptyFilters(filters: Record): Record { + return Object.fromEntries( + Object.entries(filters).filter(([, value]) => { + if (value === null || value === undefined) { + return false; + } + if (typeof value === 'string') { + return value.trim().length > 0; + } + if (Array.isArray(value)) { + return value.length > 0; + } + return true; + }), + ); +} + +function filterValueToPredicate(field: string, value: unknown): string { + if (Array.isArray(value)) { + return `${field} IN (${value.map(sqlLiteral).join(', ')})`; + } + if (typeof value === 'number' || typeof value === 'boolean') { + return `${field} = ${String(value)}`; + } + const raw = String(value).trim(); + if (raw.includes(',') && !raw.includes('"') && !raw.includes("'")) { + return `${field} IN (${raw + .split(',') + .map((part) => sqlLiteral(part.trim())) + .join(', ')})`; + } + if (raw.startsWith('-') && raw.length > 1) { + return `${field} != ${sqlLiteral(raw.slice(1).trim())}`; + } + if (raw.includes('%')) { + return `${field} LIKE ${sqlLiteral(raw)}`; + } + return `${field} = ${sqlLiteral(raw)}`; +} + +function sqlLiteral(value: unknown): string { + if (typeof value === 'number' || typeof value === 'boolean') { + return String(value); + } + return `'${String(value).replace(/'/g, "''")}'`; +} + +function contentLabel(input: z.infer): string { + const noun = input.contentType === 'dashboard_tile' ? 'dashboard tile' : 'look'; + return input.contentTitle ? `${noun} "${input.contentTitle}"` : noun; +} + +function toSlName(value: string): string { + const normalized = value + .trim() + .replace(/([a-z0-9])([A-Z])/g, '$1_$2') + .toLowerCase() + .replace(/[^a-z0-9]+/g, '_') + .replace(/^_+|_+$/g, '') + .replace(/_+/g, '_'); + if (!normalized) { + throw new Error(`Cannot derive semantic-layer name from empty Looker value`); + } + return /^[0-9]/.test(normalized) ? `n_${normalized}` : normalized; +} diff --git a/packages/context/src/ingest/adapters/looker/types.test.ts b/packages/context/src/ingest/adapters/looker/types.test.ts new file mode 100644 index 00000000..998192c3 --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/types.test.ts @@ -0,0 +1,329 @@ +import { describe, expect, it } from 'vitest'; +import { + lookerPullConfigSchema, + parseLookerPullConfig, + parsedTargetTableSchema, + stagedDashboardFileSchema, + stagedExploreFileSchema, + stagedLookerFetchIssueSchema, + stagedLookerQuerySchema, + stagedLookerScopeFileSchema, + stagedLookerSignalsFileSchema, + stagedLookFileSchema, + stagedSyncConfigSchema, +} from './types.js'; + +describe('Looker staged runtime schemas', () => { + it('parses pull config and staged sync config', () => { + expect( + lookerPullConfigSchema.parse({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + instanceBaseUrl: 'https://example.looker.com', + }), + ).toEqual({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + instanceBaseUrl: 'https://example.looker.com', + connectionMappings: {}, + connectionTypes: {}, + parsedTargetTables: {}, + }); + + expect( + stagedSyncConfigSchema.parse({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + fetchedAt: '2026-04-30T12:00:00.000Z', + instanceBaseUrl: 'https://example.looker.com', + }), + ).toMatchObject({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + instanceBaseUrl: 'https://example.looker.com', + }); + }); + + it('parses incremental pull cursors and scope manifests', () => { + expect( + parseLookerPullConfig({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + dashboardUpdatedSince: '2026-04-30T10:00:00.000Z', + lookUpdatedSince: '2026-04-30T11:00:00.000Z', + }), + ).toEqual({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + dashboardUpdatedSince: '2026-04-30T10:00:00.000Z', + lookUpdatedSince: '2026-04-30T11:00:00.000Z', + connectionMappings: {}, + connectionTypes: {}, + parsedTargetTables: {}, + }); + + expect( + stagedLookerScopeFileSchema.parse({ + mode: 'incremental', + knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'], + fetchedRawPaths: ['dashboards/10.json'], + }), + ).toEqual({ + mode: 'incremental', + knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'], + fetchedRawPaths: ['dashboards/10.json'], + }); + + expect( + stagedSyncConfigSchema.parse({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + fetchedAt: '2026-04-30T12:30:00.000Z', + previousCursors: { + dashboardsLastSyncedAt: null, + looksLastSyncedAt: '2026-04-30T11:00:00.000Z', + }, + nextCursors: { + dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z', + looksLastSyncedAt: '2026-04-30T11:00:00.000Z', + }, + }).nextCursors, + ).toEqual({ + dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z', + looksLastSyncedAt: '2026-04-30T11:00:00.000Z', + }); + }); + + it('normalizes numeric Looker ids to strings', () => { + const dashboard = stagedDashboardFileSchema.parse({ + lookerId: 10, + title: 'Sales Pipeline', + description: null, + folderId: 7, + ownerId: 3, + updatedAt: '2026-04-30T12:00:00.000Z', + tiles: [{ id: 100, title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }], + }); + + expect(dashboard.lookerId).toBe('10'); + expect(dashboard.folderId).toBe('7'); + expect(dashboard.ownerId).toBe('3'); + expect(dashboard.tiles[0].id).toBe('100'); + }); + + it('parses explores, looks, and signal files with defaults', () => { + expect( + stagedExploreFileSchema.parse({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + fields: { + dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }], + measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }], + }, + joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }], + }), + ).toMatchObject({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] }, + }); + + expect( + stagedLookFileSchema.parse({ + lookerId: '20', + title: 'Open Pipeline', + description: null, + folderId: null, + ownerId: null, + updatedAt: null, + query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] }, + }), + ).toMatchObject({ lookerId: '20', query: { fields: ['opportunities.arr'] } }); + + expect(stagedLookerSignalsFileSchema.parse({}).dashboardUsage).toEqual([]); + }); + + it('parses warehouse SL mapping pull config and staged target table fields', () => { + const targetConnectionId = '22222222-2222-4222-8222-222222222222'; + const parsedTargetTable = { + ok: true as const, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }; + + expect(parsedTargetTableSchema.parse(parsedTargetTable)).toEqual(parsedTargetTable); + + expect( + parseLookerPullConfig({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + connectionMappings: { b2b_sandbox_bq: targetConnectionId }, + connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' }, + parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable }, + }), + ).toEqual({ + lookerConnectionId: '11111111-1111-4111-8111-111111111111', + connectionMappings: { b2b_sandbox_bq: targetConnectionId }, + connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' }, + parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable }, + }); + + expect( + stagedExploreFileSchema.parse({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + rawSqlTableName: 'proj.dataset.opportunities AS opportunities', + connectionName: 'b2b_sandbox_bq', + viewName: 'opportunities', + fields: { + dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }], + measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }], + }, + joins: [ + { + name: 'accounts', + type: 'left_outer', + relationship: 'many_to_one', + rawSqlTableName: 'proj.dataset.accounts', + sqlOn: '${opportunities.account_id} = ${accounts.id}', + from: null, + targetTable: { + ok: true, + catalog: 'proj', + schema: 'dataset', + name: 'accounts', + canonicalTable: 'proj.dataset.accounts', + }, + }, + ], + targetWarehouseConnectionId: targetConnectionId, + targetTable: parsedTargetTable, + }), + ).toMatchObject({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + connectionName: 'b2b_sandbox_bq', + targetWarehouseConnectionId: targetConnectionId, + targetTable: parsedTargetTable, + joins: [{ name: 'accounts', targetTable: { ok: true, name: 'accounts' } }], + }); + }); + + it('parses structured Looker mapping fetch warnings', () => { + expect( + stagedLookerFetchIssueSchema.parse({ + rawPath: 'looker_connection_mappings/b2b_sandbox_bq', + entityType: 'looker_connection_mapping', + entityId: 'b2b_sandbox_bq', + severity: 'warning', + statusCode: null, + message: 'Looker connection b2b_sandbox_bq is not mapped to a warehouse connection.', + retryRecommended: false, + kind: 'unmapped_looker_connection', + details: { + lookerConnectionName: 'b2b_sandbox_bq', + affectedExplores: ['b2b.sales_pipeline'], + }, + }), + ).toMatchObject({ + entityType: 'looker_connection_mapping', + kind: 'unmapped_looker_connection', + details: { + lookerConnectionName: 'b2b_sandbox_bq', + affectedExplores: ['b2b.sales_pipeline'], + }, + }); + }); + + it('parses LookML model listing warnings in fetch reports', () => { + expect( + stagedLookerFetchIssueSchema.parse({ + rawPath: 'lookml_models.json', + entityType: 'lookml_models', + entityId: null, + severity: 'warning', + statusCode: 403, + message: 'LookML model access denied', + retryRecommended: false, + }), + ).toEqual({ + rawPath: 'lookml_models.json', + entityType: 'lookml_models', + entityId: null, + severity: 'warning', + statusCode: 403, + message: 'LookML model access denied', + retryRecommended: false, + }); + }); + + it('accepts slug-shaped connection ids inside KLO Looker runtime schemas', () => { + const parsedTargetTable = { + ok: true as const, + catalog: 'proj', + schema: 'dataset', + name: 'opportunities', + canonicalTable: 'proj.dataset.opportunities', + }; + + expect( + parseLookerPullConfig({ + lookerConnectionId: 'prod-looker', + connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' }, + connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' }, + parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable }, + }), + ).toMatchObject({ + lookerConnectionId: 'prod-looker', + connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' }, + }); + + expect( + stagedSyncConfigSchema.parse({ + lookerConnectionId: 'prod-looker', + fetchedAt: '2026-04-30T12:00:00.000Z', + }), + ).toMatchObject({ + lookerConnectionId: 'prod-looker', + }); + + expect( + stagedLookerQuerySchema.parse({ + model: 'b2b', + view: 'sales_pipeline', + targetWarehouseConnectionId: 'prod-warehouse', + targetTable: parsedTargetTable, + }), + ).toMatchObject({ + targetWarehouseConnectionId: 'prod-warehouse', + targetTable: parsedTargetTable, + }); + + expect( + stagedExploreFileSchema.parse({ + modelName: 'b2b', + exploreName: 'sales_pipeline', + label: 'Sales Pipeline', + description: null, + fields: { dimensions: [], measures: [] }, + targetWarehouseConnectionId: 'prod-warehouse', + targetTable: parsedTargetTable, + }), + ).toMatchObject({ + targetWarehouseConnectionId: 'prod-warehouse', + targetTable: parsedTargetTable, + }); + }); + + it('rejects unsafe KLO Looker connection ids', () => { + expect(() => + parseLookerPullConfig({ + lookerConnectionId: '../prod-looker', + }), + ).toThrow(); + + expect(() => + parseLookerPullConfig({ + connectionMappings: { b2b_sandbox_bq: 'prod/warehouse' }, + }), + ).toThrow(); + }); +}); diff --git a/packages/context/src/ingest/adapters/looker/types.ts b/packages/context/src/ingest/adapters/looker/types.ts new file mode 100644 index 00000000..ea515b9d --- /dev/null +++ b/packages/context/src/ingest/adapters/looker/types.ts @@ -0,0 +1,255 @@ +import { z } from 'zod'; +import { connectionTypeSchema } from '../../../connections/connection-type.js'; +import { parsedTargetTableSchema } from '../../parsed-target-table.js'; + +const lookerIdSchema = z.union([z.string(), z.number().int()]).transform(String); +const nullableLookerIdSchema = z.union([lookerIdSchema, z.null()]).default(null); + +export const lookerConnectionIdSchema = z.string().min(1).regex(/^[A-Za-z0-9_-]+$/); + +export { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js'; + +export const lookerRuntimeCursorsSchema = z.object({ + dashboardsLastSyncedAt: z.iso.datetime().nullable().default(null), + looksLastSyncedAt: z.iso.datetime().nullable().default(null), +}); + +export type LookerRuntimeCursors = z.infer; + +export const lookerPullConfigSchema = z.object({ + lookerConnectionId: lookerConnectionIdSchema.optional(), + instanceBaseUrl: z.url().optional(), + dashboardUpdatedSince: z.iso.datetime().nullable().optional(), + lookUpdatedSince: z.iso.datetime().nullable().optional(), + connectionMappings: z.record(z.string(), lookerConnectionIdSchema).default({}), + connectionTypes: z.record(z.string(), connectionTypeSchema).default({}), + parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}), +}); + +export type LookerPullConfig = z.infer; + +export function parseLookerPullConfig(raw: unknown): LookerPullConfig { + return lookerPullConfigSchema.parse(raw ?? {}); +} + +export const stagedSyncConfigSchema = z.object({ + lookerConnectionId: lookerConnectionIdSchema, + fetchedAt: z.iso.datetime(), + instanceBaseUrl: z.url().optional(), + previousCursors: lookerRuntimeCursorsSchema.default({ + dashboardsLastSyncedAt: null, + looksLastSyncedAt: null, + }), + nextCursors: lookerRuntimeCursorsSchema.default({ + dashboardsLastSyncedAt: null, + looksLastSyncedAt: null, + }), +}); + +export const stagedLookerQuerySchema = z.object({ + id: lookerIdSchema.optional(), + model: z.string(), + view: z.string(), + fields: z.array(z.string()).default([]), + filters: z.record(z.string(), z.unknown()).default({}), + sorts: z.array(z.string()).default([]), + limit: z.union([z.string(), z.number()]).optional().nullable(), + dynamicFields: z.string().optional().nullable(), + targetWarehouseConnectionId: lookerConnectionIdSchema.nullable().default(null), + targetTable: parsedTargetTableSchema.nullable().default(null), +}); + +export type StagedLookerQuery = z.infer; + +const stagedDashboardTileSchema = z.object({ + id: lookerIdSchema, + title: z.string().nullable().default(null), + lookId: nullableLookerIdSchema, + query: stagedLookerQuerySchema.nullable().default(null), +}); + +export const stagedDashboardFileSchema = z.object({ + lookerId: lookerIdSchema, + title: z.string(), + description: z.string().nullable(), + folderId: nullableLookerIdSchema, + ownerId: nullableLookerIdSchema, + updatedAt: z.string().nullable(), + tiles: z.array(stagedDashboardTileSchema).default([]), +}); + +export type StagedDashboardFile = z.infer; + +export const stagedLookFileSchema = z.object({ + lookerId: lookerIdSchema, + title: z.string(), + description: z.string().nullable(), + folderId: nullableLookerIdSchema, + ownerId: nullableLookerIdSchema, + updatedAt: z.string().nullable(), + query: stagedLookerQuerySchema.nullable().default(null), +}); + +export type StagedLookFile = z.infer; + +const stagedFolderSchema = z.object({ + id: lookerIdSchema, + name: z.string(), + parentId: nullableLookerIdSchema, + path: z.array(z.string()).default([]), +}); + +export const stagedFoldersTreeFileSchema = z.object({ + folders: z.array(stagedFolderSchema), +}); + +export type StagedFoldersTreeFile = z.infer; + +export const stagedUserFileSchema = z.object({ + id: lookerIdSchema, + displayName: z.string().nullable(), + email: z.string().nullable().default(null), +}); + +export type StagedUserFile = z.infer; + +export const stagedGroupFileSchema = z.object({ + id: lookerIdSchema, + name: z.string(), +}); + +export type StagedGroupFile = z.infer; + +const stagedLookmlModelSchema = z.object({ + name: z.string(), + label: z.string().nullable().default(null), + explores: z.array(z.object({ name: z.string(), label: z.string().nullable().default(null) })), +}); + +export const stagedLookmlModelsFileSchema = z.object({ + models: z.array(stagedLookmlModelSchema), +}); + +export type StagedLookmlModelsFile = z.infer; + +const stagedLookerFieldSchema = z.object({ + name: z.string(), + label: z.string().nullable().default(null), + type: z.string().nullable().default(null), + sql: z.string().nullable().default(null), + description: z.string().nullable().default(null), +}); + +const stagedLookerJoinSchema = z.object({ + name: z.string(), + type: z.string().nullable().default(null), + relationship: z.string().nullable().default(null), + rawSqlTableName: z.string().nullable().default(null), + sqlOn: z.string().nullable().default(null), + from: z.string().nullable().default(null), + targetTable: parsedTargetTableSchema.nullable().default(null), +}); + +export const stagedExploreFileSchema = z.object({ + modelName: z.string(), + exploreName: z.string(), + label: z.string().nullable().default(null), + description: z.string().nullable().default(null), + rawSqlTableName: z.string().nullable().default(null), + connectionName: z.string().nullable().default(null), + viewName: z.string().nullable().default(null), + fields: z.object({ + dimensions: z.array(stagedLookerFieldSchema).default([]), + measures: z.array(stagedLookerFieldSchema).default([]), + }), + joins: z.array(stagedLookerJoinSchema).default([]), + targetWarehouseConnectionId: lookerConnectionIdSchema.nullable().default(null), + targetTable: parsedTargetTableSchema.nullable().default(null), +}); + +export type StagedExploreFile = z.infer; + +const stagedUsageSignalSchema = z.object({ + contentId: lookerIdSchema, + queryCount30d: z.number().int().nonnegative().default(0), + uniqueUsers30d: z.number().int().nonnegative().default(0), + lastRunAt: z.string().nullable().default(null), + topUsers: z.array(lookerIdSchema).default([]), +}); + +const stagedScheduledPlanSignalSchema = z.object({ + contentId: lookerIdSchema, + contentType: z.enum(['dashboard', 'look']), + isScheduled: z.boolean(), + scheduleCount: z.number().int().nonnegative().default(0), + recipientCount: z.number().int().nonnegative().default(0), +}); + +const stagedFavoriteSignalSchema = z.object({ + contentId: lookerIdSchema, + contentType: z.enum(['dashboard', 'look']), + favoriteCount: z.number().int().nonnegative().default(0), +}); + +export const stagedLookerSignalsFileSchema = z.object({ + dashboardUsage: z.array(stagedUsageSignalSchema).default([]), + lookUsage: z.array(stagedUsageSignalSchema).default([]), + scheduledPlans: z.array(stagedScheduledPlanSignalSchema).default([]), + favorites: z.array(stagedFavoriteSignalSchema).default([]), +}); + +export type StagedLookerSignalsFile = z.infer; + +export const stagedLookerScopeFileSchema = z.object({ + mode: z.enum(['full', 'incremental']), + knownCurrentRawPaths: z.array(z.string()).default([]), + fetchedRawPaths: z.array(z.string()).default([]), +}); + +export type StagedLookerScopeFile = z.infer; + +const stagedLookerFetchIssueKindSchema = z.enum([ + 'unmapped_looker_connection', + 'unparseable_sql_table_name', + 'looker_template_unresolved', + 'derived_table_not_supported', + 'lookml_connection_mismatch', +]); + +export const stagedLookerFetchIssueSchema = z.object({ + rawPath: z.string().min(1), + entityType: z.enum(['dashboard', 'look', 'explore', 'signals', 'lookml_models', 'looker_connection_mapping']), + entityId: z.string().nullable().default(null), + severity: z.enum(['warning', 'error']), + statusCode: z.number().int().nullable().default(null), + message: z.string().min(1), + retryRecommended: z.boolean().default(false), + kind: stagedLookerFetchIssueKindSchema.optional(), + details: z.record(z.string(), z.unknown()).optional(), +}); + +export type StagedLookerFetchIssue = z.infer; + +export const stagedLookerFetchReportSchema = z.object({ + status: z.enum(['success', 'partial']), + retryRecommended: z.boolean().default(false), + skipped: z.array(stagedLookerFetchIssueSchema).default([]), + warnings: z.array(stagedLookerFetchIssueSchema).default([]), +}); + +export type StagedLookerFetchReport = z.infer; + +export const STAGED_FILES = { + syncConfig: 'sync-config.json', + scope: 'looker-scope.json', + fetchReport: 'looker-fetch-report.json', + evidenceRoot: 'evidence', + lookmlModels: 'lookml_models.json', + foldersTree: 'folders/tree.json', + signals: { + dashboardUsage: 'signals/dashboard_usage.json', + lookUsage: 'signals/look_usage.json', + scheduledPlans: 'signals/scheduled_plans.json', + favorites: 'signals/favorites.json', + }, +} as const; diff --git a/packages/context/src/ingest/adapters/lookml/chunk.test.ts b/packages/context/src/ingest/adapters/lookml/chunk.test.ts new file mode 100644 index 00000000..e9a8b5f3 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/chunk.test.ts @@ -0,0 +1,230 @@ +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { chunkLookmlProject } from './chunk.js'; +import { type ParsedLookmlProject, parseLookmlStagedDir } from './parse.js'; + +const FIXTURE_ROOT = join(__dirname, '../../../../test/fixtures/lookml'); + +describe('chunkLookmlProject — first run', () => { + it('single-model bundle → 1 WU with model + all views in rawFiles', async () => { + const stagedDir = join(FIXTURE_ROOT, 'single-model'); + const project = await parseLookmlStagedDir(stagedDir); + const result = chunkLookmlProject(project); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.unitKey).toBe('lookml-orders'); + expect(wu.rawFiles.sort()).toEqual(['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml']); + expect(wu.peerFileIndex).toEqual([]); + expect(wu.dependencyPaths).toEqual([]); + expect(result.eviction).toBeUndefined(); + }); + + it('multi-model bundle → 1 WU per model; shared view owned by lex-first model; others see it in dependencyPaths + peerFileIndex is pathless-index', async () => { + const stagedDir = join(FIXTURE_ROOT, 'multi-model'); + const project = await parseLookmlStagedDir(stagedDir); + const result = chunkLookmlProject(project); + expect(result.workUnits).toHaveLength(2); + const marketing = result.workUnits.find((wu) => wu.unitKey === 'lookml-marketing'); + const orders = result.workUnits.find((wu) => wu.unitKey === 'lookml-orders'); + expect(marketing).toBeDefined(); + expect(orders).toBeDefined(); + if (!marketing || !orders) { + throw new Error('expected marketing and orders work units'); + } + + // marketing sorts before orders → marketing owns shared_dims + expect(marketing.rawFiles).toContain('views/shared_dims.view.lkml'); + expect(marketing.rawFiles).toContain('views/campaigns.view.lkml'); + expect(marketing.rawFiles).toContain('marketing.model.lkml'); + expect(marketing.rawFiles).not.toContain('views/orders.view.lkml'); + expect(marketing.dependencyPaths).toEqual([]); + + // orders does NOT own shared_dims — it's in dependencyPaths (read-only upstream). + expect(orders.rawFiles).not.toContain('views/shared_dims.view.lkml'); + expect(orders.dependencyPaths).toEqual(['views/shared_dims.view.lkml']); + expect(orders.rawFiles).toContain('views/orders.view.lkml'); + expect(orders.rawFiles).toContain('orders.model.lkml'); + + // Each WU's peerFileIndex lists the OTHER model's files (paths-only index). + expect(orders.peerFileIndex).toContain('marketing.model.lkml'); + expect(orders.peerFileIndex).toContain('views/campaigns.view.lkml'); + // Dependency paths should not be duplicated into peerFileIndex. + expect(orders.peerFileIndex).not.toContain('views/shared_dims.view.lkml'); + }); + + it('extends-chain fixture: single WU contains base + orders + orders_ext; chain order visible via graph', async () => { + const stagedDir = join(FIXTURE_ROOT, 'extends-chain'); + const project = await parseLookmlStagedDir(stagedDir); + const result = chunkLookmlProject(project); + // One model ("orders") includes views/*.view.lkml — so all three views land in its WU. + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.unitKey).toBe('lookml-orders'); + expect(wu.rawFiles.sort()).toEqual([ + 'orders.model.lkml', + 'views/base.view.lkml', + 'views/orders.view.lkml', + 'views/orders_ext.view.lkml', + ]); + expect(wu.dependencyPaths).toEqual([]); // all ancestors already in rawFiles on first run + expect(wu.notes).toMatch(/orders/); + }); + + it('is deterministic: two calls on the same project return structurally identical WorkUnits', async () => { + const stagedDir = join(FIXTURE_ROOT, 'multi-model'); + const project = await parseLookmlStagedDir(stagedDir); + const r1 = chunkLookmlProject(project); + const r2 = chunkLookmlProject(project); + expect(r1.workUnits).toEqual(r2.workUnits); + }); + + it('unitKey is model-name-derived (stable across parse+chunk cycles and across re-syncs)', async () => { + const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'multi-model')); + const { workUnits } = chunkLookmlProject(project); + expect(workUnits.map((wu) => wu.unitKey).sort()).toEqual(['lookml-marketing', 'lookml-orders']); + }); + + it('marks mismatched model WorkUnits as SL-disallowed and keeps wiki ingest enabled', () => { + const project: ParsedLookmlProject = { + models: [ + { + path: 'b2b.model.lkml', + name: 'b2b', + includes: ['views/orders.view.lkml'], + explores: ['orders'], + connectionName: 'wrong_connection', + }, + ], + views: [{ path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [], rawSqlTableName: 'public.orders' }], + dashboards: [], + allPaths: ['b2b.model.lkml', 'views/orders.view.lkml'], + }; + + const result = chunkLookmlProject(project, { mismatchedModelNames: new Set(['b2b']) }); + const wu = result.workUnits[0]; + + expect(wu.unitKey).toBe('lookml-b2b'); + expect(wu.rawFiles).toEqual(['b2b.model.lkml', 'views/orders.view.lkml']); + expect(wu.slDisallowed).toBe(true); + expect(wu.slDisallowedReason).toBe('lookml_connection_mismatch'); + expect(wu.notes).toContain('[LOOKML SL WRITES DISALLOWED]'); + expect(wu.notes).toContain('reason: lookml_connection_mismatch'); + expect(wu.notes).toContain('Do not call sl_write_source or sl_edit_source for this WorkUnit.'); + }); +}); + +describe('chunkLookmlProject — re-sync', () => { + it("modified file in one model only emits that model's WU", async () => { + const stagedDir = join(FIXTURE_ROOT, 'multi-model'); + const project = await parseLookmlStagedDir(stagedDir); + const result = chunkLookmlProject(project, { + diffSet: { + added: [], + modified: ['views/campaigns.view.lkml'], + deleted: [], + unchanged: [ + 'marketing.model.lkml', + 'orders.model.lkml', + 'views/orders.view.lkml', + 'views/shared_dims.view.lkml', + ], + }, + }); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0].unitKey).toBe('lookml-marketing'); + }); + + it("added file under a model emits that model's WU with the new path in rawFiles", async () => { + const stagedDir = join(FIXTURE_ROOT, 'single-model'); + const project = await parseLookmlStagedDir(stagedDir); + const result = chunkLookmlProject(project, { + diffSet: { + added: ['views/customers.view.lkml'], + modified: [], + deleted: [], + unchanged: ['orders.model.lkml', 'views/orders.view.lkml'], + }, + }); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0].rawFiles).toContain('views/customers.view.lkml'); + }); + + it('widens dependencyPaths with transitive extends ancestors on re-sync', async () => { + const stagedDir = join(FIXTURE_ROOT, 'extends-chain'); + const project = await parseLookmlStagedDir(stagedDir); + // Only orders_ext is touched; base and orders are upstream ancestors. + // Because the single-model WU's rawFiles ALREADY include all three on first run, + // they remain in rawFiles — dependencyPaths stays empty. Widening matters when + // re-sync drops some files from rawFiles, which doesn't apply for a monolithic + // single-model WU. Assert the baseline invariant. + const result = chunkLookmlProject(project, { + diffSet: { + added: [], + modified: ['views/orders_ext.view.lkml'], + deleted: [], + unchanged: ['orders.model.lkml', 'views/base.view.lkml', 'views/orders.view.lkml'], + }, + }); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.rawFiles).toContain('views/orders_ext.view.lkml'); + // Ancestors already in rawFiles → not duplicated into dependencyPaths. + expect(wu.dependencyPaths).toEqual([]); + }); + + it('widens dependencyPaths when an ancestor is OUTSIDE the WU (synthesized cross-model case)', () => { + // Synthesize a scenario in-memory: two models, "a" owns base.view.lkml, + // "b" owns derived.view.lkml which extends base. A diff that only touches + // derived.view.lkml should widen b's WU with base.view.lkml in dependencyPaths + // if base lives outside b's rawFiles. In practice with the current emit rules, + // base.view.lkml would already be in dependencyPaths because model b lists + // base.view.lkml under its `include:`. Here we confirm the widening is idempotent. + const project: ParsedLookmlProject = { + models: [ + { path: 'a.model.lkml', name: 'a', includes: ['views/base.view.lkml'], explores: [], connectionName: null }, + { + path: 'b.model.lkml', + name: 'b', + includes: ['views/base.view.lkml', 'views/derived.view.lkml'], + explores: [], + connectionName: null, + }, + ], + views: [ + { path: 'views/base.view.lkml', name: 'base', extendsFrom: [], rawSqlTableName: null }, + { path: 'views/derived.view.lkml', name: 'derived', extendsFrom: ['base'], rawSqlTableName: null }, + ], + dashboards: [], + allPaths: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml', 'views/derived.view.lkml'], + }; + const result = chunkLookmlProject(project, { + diffSet: { + added: [], + modified: ['views/derived.view.lkml'], + deleted: [], + unchanged: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml'], + }, + }); + const b = result.workUnits.find((wu) => wu.unitKey === 'lookml-b'); + expect(b).toBeDefined(); + if (!b) { + throw new Error('expected lookml-b work unit'); + } + expect(b.dependencyPaths).toContain('views/base.view.lkml'); + }); + + it('passes through diffSet.deleted as an EvictionUnit', async () => { + const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'single-model')); + const result = chunkLookmlProject(project, { + diffSet: { + added: [], + modified: [], + deleted: ['views/zombie.view.lkml'], + unchanged: ['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml'], + }, + }); + expect(result.eviction).toEqual({ deletedRawPaths: ['views/zombie.view.lkml'] }); + // No WU emitted because no current files are touched. + expect(result.workUnits).toEqual([]); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/chunk.ts b/packages/context/src/ingest/adapters/lookml/chunk.ts new file mode 100644 index 00000000..459ff358 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/chunk.ts @@ -0,0 +1,159 @@ +import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js'; +import { buildLookmlGraph, type LookmlGraph } from './graph.js'; +import type { ParsedLookmlProject } from './parse.js'; + +interface ChunkOptions { + diffSet?: DiffSet; + mismatchedModelNames?: Set; +} + +function lookmlSlDisallowedNotes(modelName: string, existingNotes: string): string { + return [ + '[LOOKML SL WRITES DISALLOWED]', + 'reason: lookml_connection_mismatch', + `model: ${modelName}`, + 'Do not call sl_write_source or sl_edit_source for this WorkUnit.', + 'Continue wiki extraction and context candidates from the raw LookML files.', + '[/LOOKML SL WRITES DISALLOWED]', + '', + existingNotes, + ].join('\n'); +} + +/** + * Emit WorkUnits for a parsed LookML project. + * + * First run (no diffSet): one WU per model + `lookml-orphans` (if any non-owned views) + * + `lookml-dashboard-` per dashboard file. + * + * Re-sync (diffSet provided): filter to WUs whose rawFiles intersect added∪modified; + * widen dependencyPaths with every file in `allPaths` + * that's upstream of the WU's changed files via the graph. + * Emit a single EvictionUnit for diffSet.deleted. + */ +export function chunkLookmlProject(project: ParsedLookmlProject, opts: ChunkOptions = {}): ChunkResult { + const graph = buildLookmlGraph(project); + const firstRunUnits = emitFirstRunWorkUnits(project, graph, opts); + if (!opts.diffSet) { + return { workUnits: firstRunUnits }; + } + return applyDiffSet(firstRunUnits, project, graph, opts.diffSet); +} + +function emitFirstRunWorkUnits(project: ParsedLookmlProject, graph: LookmlGraph, opts: ChunkOptions): WorkUnit[] { + const allModelPaths = [...new Set(project.models.map((m) => m.path))].sort(); + const allDashboardPaths = [...new Set(project.dashboards.map((d) => d.path))].sort(); + // Dedupe: a .view.lkml with multiple `view:` blocks produces multiple ParsedLookmlView + // entries sharing one path. + const allViewPaths = [...new Set(project.views.map((v) => v.path))].sort(); + + const workUnits: WorkUnit[] = []; + + // Per-model WU, sorted by model name for determinism. + const sortedModels = [...project.models].sort((a, b) => a.name.localeCompare(b.name)); + + for (const model of sortedModels) { + const includedViewPaths = (graph.viewsIncludedByModel.get(model.name) ?? []).filter((p) => + allViewPaths.includes(p), + ); + // Views the model includes and which this model ALSO owns (first-includer-wins). + const ownedViewPaths = includedViewPaths.filter((p) => graph.ownerByViewPath.get(p) === model.name); + // Views the model includes but that another lexicographically-earlier model owns. + // These land in dependencyPaths so this WU's agent can READ them, but the "canonical + // write" for those views happens in the owner's WU. + const nonOwnedDepViewPaths = includedViewPaths.filter((p) => graph.ownerByViewPath.get(p) !== model.name).sort(); + + const rawFiles = [model.path, ...ownedViewPaths].sort(); + const peerFileIndex = [ + ...allModelPaths.filter((p) => p !== model.path), + ...allViewPaths.filter((p) => !rawFiles.includes(p) && !nonOwnedDepViewPaths.includes(p)), + ...allDashboardPaths, + ].sort(); + + const isMismatched = opts.mismatchedModelNames?.has(model.name) ?? false; + const notes = + model.explores.length > 0 + ? `LookML model "${model.name}" (explores: ${model.explores.join(', ')})` + : `LookML model "${model.name}"`; + + workUnits.push({ + unitKey: `lookml-${model.name}`, + displayLabel: `LookML model "${model.name}"`, + rawFiles, + peerFileIndex, + dependencyPaths: nonOwnedDepViewPaths, + notes: isMismatched ? lookmlSlDisallowedNotes(model.name, notes) : notes, + slDisallowed: isMismatched ? true : undefined, + slDisallowedReason: isMismatched ? 'lookml_connection_mismatch' : undefined, + }); + } + + // Orphan view WU — views that no model includes. Skip entirely if none. + const orphanViewPaths = allViewPaths.filter((p) => !graph.ownerByViewPath.has(p)).sort(); + if (orphanViewPaths.length > 0) { + workUnits.push({ + unitKey: 'lookml-orphans', + displayLabel: 'LookML orphan views', + rawFiles: orphanViewPaths, + peerFileIndex: [...allModelPaths, ...allDashboardPaths].sort(), + dependencyPaths: [], + notes: 'Views not referenced by any .model.lkml (orphaned)', + }); + } + + // One WU per dashboard file. + for (const dashboard of [...project.dashboards].sort((a, b) => a.name.localeCompare(b.name))) { + workUnits.push({ + unitKey: `lookml-dashboard-${dashboard.name}`, + displayLabel: `LookML dashboard "${dashboard.name}"`, + rawFiles: [dashboard.path], + peerFileIndex: [...allModelPaths, ...allViewPaths].sort(), + dependencyPaths: [], + notes: `LookML dashboard "${dashboard.name}"`, + }); + } + + return workUnits; +} + +function applyDiffSet( + firstRunUnits: WorkUnit[], + _project: ParsedLookmlProject, + graph: LookmlGraph, + diffSet: DiffSet, +): ChunkResult { + const touched = new Set([...diffSet.added, ...diffSet.modified]); + const keptUnits: WorkUnit[] = []; + + for (const wu of firstRunUnits) { + const anyTouched = wu.rawFiles.some((p) => touched.has(p)); + if (!anyTouched) { + continue; + } + + // Widen dependencyPaths: for every view in rawFiles, add paths of all transitive + // extends ancestors (if known in the graph) that aren't already in rawFiles. + const existingDeps = new Set(wu.dependencyPaths); + for (const rawPath of wu.rawFiles) { + const viewNames = graph.viewNamesByPath.get(rawPath) ?? []; + for (const viewName of viewNames) { + const ancestors = graph.extendsAncestorsByViewName.get(viewName) ?? []; + for (const ancestorName of ancestors) { + const ancestorPaths = graph.pathsByViewName.get(ancestorName) ?? []; + for (const ancestorPath of ancestorPaths) { + if (!wu.rawFiles.includes(ancestorPath)) { + existingDeps.add(ancestorPath); + } + } + } + } + } + keptUnits.push({ + ...wu, + dependencyPaths: [...existingDeps].sort(), + }); + } + + const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined; + return { workUnits: keptUnits, eviction }; +} diff --git a/packages/context/src/ingest/adapters/lookml/detect.test.ts b/packages/context/src/ingest/adapters/lookml/detect.test.ts new file mode 100644 index 00000000..040c1788 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/detect.test.ts @@ -0,0 +1,46 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { detectLookmlStagedDir } from './detect.js'; + +describe('detectLookmlStagedDir', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'lkml-detect-')); + }); + + afterEach(async () => rm(stagedDir, { recursive: true, force: true })); + + it('returns true when a .model.lkml is present at root', async () => { + await writeFile(join(stagedDir, 'orders.model.lkml'), 'include: "views/*"\n', 'utf-8'); + expect(await detectLookmlStagedDir(stagedDir)).toBe(true); + }); + + it('returns true when only a .view.lkml is present (no model)', async () => { + await writeFile(join(stagedDir, 'x.view.lkml'), 'view: x {}\n', 'utf-8'); + expect(await detectLookmlStagedDir(stagedDir)).toBe(true); + }); + + it('returns true when .lkml files are nested under any subdirectory', async () => { + await mkdir(join(stagedDir, 'nested', 'deeper'), { recursive: true }); + await writeFile(join(stagedDir, 'nested', 'deeper', 'x.view.lkml'), 'view: x {}\n', 'utf-8'); + expect(await detectLookmlStagedDir(stagedDir)).toBe(true); + }); + + it('accepts the .lookml extension as well as .lkml', async () => { + await writeFile(join(stagedDir, 'x.view.lookml'), 'view: x {}\n', 'utf-8'); + expect(await detectLookmlStagedDir(stagedDir)).toBe(true); + }); + + it('returns false for a bundle with no .lkml files at all', async () => { + await writeFile(join(stagedDir, 'README.md'), '# hi\n', 'utf-8'); + await writeFile(join(stagedDir, 'config.yaml'), 'a: 1\n', 'utf-8'); + expect(await detectLookmlStagedDir(stagedDir)).toBe(false); + }); + + it('returns false for an empty directory', async () => { + expect(await detectLookmlStagedDir(stagedDir)).toBe(false); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/detect.ts b/packages/context/src/ingest/adapters/lookml/detect.ts new file mode 100644 index 00000000..4ce51455 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/detect.ts @@ -0,0 +1,13 @@ +import { readdir } from 'node:fs/promises'; + +const LKML_EXT_RE = /\.(lkml|lookml)$/i; + +export async function detectLookmlStagedDir(stagedDir: string): Promise { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + for (const entry of entries) { + if (entry.isFile() && LKML_EXT_RE.test(entry.name)) { + return true; + } + } + return false; +} diff --git a/packages/context/src/ingest/adapters/lookml/fetch-report.test.ts b/packages/context/src/ingest/adapters/lookml/fetch-report.test.ts new file mode 100644 index 00000000..ffeb52fb --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/fetch-report.test.ts @@ -0,0 +1,113 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { ParsedLookmlProject } from './parse.js'; +import { + LOOKML_FETCH_REPORT_FILE, + LOOKML_MISMATCHED_MODELS_FILE, + buildLookmlValidationArtifacts, + readLookmlFetchReport, + readLookmlMismatchedModelNames, + writeLookmlValidationArtifacts, +} from './fetch-report.js'; + +function project(models: ParsedLookmlProject['models']): ParsedLookmlProject { + return { models, views: [], dashboards: [], allPaths: models.map((m) => m.path) }; +} + +describe('LookML validation fetch report', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'lookml-report-')); + }); + + afterEach(async () => rm(stagedDir, { recursive: true, force: true })); + + it('emits partial warning artifacts for mismatched model connection names', async () => { + const artifacts = buildLookmlValidationArtifacts( + project([ + { + path: 'b2b.model.lkml', + name: 'b2b', + includes: [], + explores: ['orders'], + connectionName: 'staging_pg', + }, + { + path: 'finance.model.lkml', + name: 'finance', + includes: [], + explores: ['revenue'], + connectionName: 'b2b_sandbox_bq', + }, + ]), + { expectedLookerConnectionName: 'b2b_sandbox_bq' }, + ); + + expect(artifacts.mismatchedModelNames).toEqual(['b2b']); + expect(artifacts.report.status).toBe('partial'); + expect(artifacts.report.warnings).toEqual([ + { + rawPath: 'b2b.model.lkml', + entityType: 'lookml_models', + entityId: 'b2b', + severity: 'warning', + statusCode: null, + message: + 'LookML model b2b declares connection staging_pg but this warehouse expects b2b_sandbox_bq; SL writes are disabled for this model.', + retryRecommended: false, + kind: 'lookml_connection_mismatch', + details: { model: 'b2b', declared: 'staging_pg', expected: 'b2b_sandbox_bq' }, + }, + ]); + }); + + it('emits success when no expected connection is configured', () => { + const artifacts = buildLookmlValidationArtifacts( + project([ + { + path: 'b2b.model.lkml', + name: 'b2b', + includes: [], + explores: [], + connectionName: 'staging_pg', + }, + ]), + { expectedLookerConnectionName: null }, + ); + + expect(artifacts.mismatchedModelNames).toEqual([]); + expect(artifacts.report).toEqual({ + status: 'success', + retryRecommended: false, + skipped: [], + warnings: [], + }); + }); + + it('round-trips the fetch report and mismatched model sidecar', async () => { + const artifacts = buildLookmlValidationArtifacts( + project([ + { + path: 'orders.model.lkml', + name: 'orders', + includes: [], + explores: [], + connectionName: 'wrong', + }, + ]), + { expectedLookerConnectionName: 'expected' }, + ); + + await writeLookmlValidationArtifacts(stagedDir, artifacts); + + await expect(readFile(join(stagedDir, LOOKML_FETCH_REPORT_FILE), 'utf-8')).resolves.toContain( + 'lookml_connection_mismatch', + ); + await expect(readFile(join(stagedDir, LOOKML_MISMATCHED_MODELS_FILE), 'utf-8')).resolves.toContain('orders'); + await expect(readLookmlFetchReport(stagedDir)).resolves.toEqual(artifacts.report); + await expect(readLookmlMismatchedModelNames(stagedDir)).resolves.toEqual(new Set(['orders'])); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/fetch-report.ts b/packages/context/src/ingest/adapters/lookml/fetch-report.ts new file mode 100644 index 00000000..e626b392 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/fetch-report.ts @@ -0,0 +1,125 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import * as z from 'zod'; +import type { SourceFetchReport } from '../../types.js'; +import type { ParsedLookmlProject } from './parse.js'; + +export const LOOKML_FETCH_REPORT_FILE = 'lookml-fetch-report.json'; +export const LOOKML_MISMATCHED_MODELS_FILE = 'lookml-mismatched-models.json'; + +const fetchIssueKindSchema = z.enum([ + 'unmapped_looker_connection', + 'unparseable_sql_table_name', + 'looker_template_unresolved', + 'derived_table_not_supported', + 'lookml_connection_mismatch', +]); + +const fetchIssueSchema = z.object({ + rawPath: z.string().min(1), + entityType: z.string().min(1), + entityId: z.string().nullable(), + severity: z.enum(['warning', 'error']), + statusCode: z.number().int().nullable(), + message: z.string().min(1), + retryRecommended: z.boolean(), + kind: fetchIssueKindSchema.optional(), + details: z.record(z.string(), z.unknown()).optional(), +}); + +const fetchReportSchema = z.object({ + status: z.enum(['success', 'partial']), + retryRecommended: z.boolean(), + skipped: z.array(fetchIssueSchema), + warnings: z.array(fetchIssueSchema), +}); + +const mismatchedModelsSchema = z.object({ + modelNames: z.array(z.string().min(1)).default([]), +}); + +interface LookmlValidationArtifacts { + report: SourceFetchReport; + mismatchedModelNames: string[]; +} + +export function buildLookmlValidationArtifacts( + project: ParsedLookmlProject, + config: { expectedLookerConnectionName: string | null }, +): LookmlValidationArtifacts { + const expected = config.expectedLookerConnectionName; + if (!expected) { + return { + report: { status: 'success', retryRecommended: false, skipped: [], warnings: [] }, + mismatchedModelNames: [], + }; + } + + const mismatched = project.models + .filter((model) => model.connectionName !== null && model.connectionName !== expected) + .sort((a, b) => a.name.localeCompare(b.name)); + + const warnings = mismatched.map((model) => { + const declared = model.connectionName ?? '(none)'; + return { + rawPath: model.path, + entityType: 'lookml_models', + entityId: model.name, + severity: 'warning' as const, + statusCode: null, + message: `LookML model ${model.name} declares connection ${declared} but this warehouse expects ${expected}; SL writes are disabled for this model.`, + retryRecommended: false, + kind: 'lookml_connection_mismatch' as const, + details: { model: model.name, declared, expected }, + }; + }); + + return { + report: { + status: warnings.length > 0 ? 'partial' : 'success', + retryRecommended: false, + skipped: [], + warnings, + }, + mismatchedModelNames: mismatched.map((model) => model.name), + }; +} + +export async function writeLookmlValidationArtifacts( + stagedDir: string, + artifacts: LookmlValidationArtifacts, +): Promise { + const reportPath = join(stagedDir, LOOKML_FETCH_REPORT_FILE); + await mkdir(dirname(reportPath), { recursive: true }); + await writeFile(reportPath, `${JSON.stringify(fetchReportSchema.parse(artifacts.report), null, 2)}\n`, 'utf-8'); + await writeFile( + join(stagedDir, LOOKML_MISMATCHED_MODELS_FILE), + `${JSON.stringify({ modelNames: artifacts.mismatchedModelNames }, null, 2)}\n`, + 'utf-8', + ); +} + +export async function readLookmlFetchReport(stagedDir: string): Promise { + try { + const raw = await readFile(join(stagedDir, LOOKML_FETCH_REPORT_FILE), 'utf-8'); + return fetchReportSchema.parse(JSON.parse(raw)); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return null; + } + throw error; + } +} + +export async function readLookmlMismatchedModelNames(stagedDir: string): Promise> { + try { + const raw = await readFile(join(stagedDir, LOOKML_MISMATCHED_MODELS_FILE), 'utf-8'); + const parsed = mismatchedModelsSchema.parse(JSON.parse(raw)); + return new Set(parsed.modelNames); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return new Set(); + } + throw error; + } +} diff --git a/packages/context/src/ingest/adapters/lookml/fetch.test.ts b/packages/context/src/ingest/adapters/lookml/fetch.test.ts new file mode 100644 index 00000000..a0c293e7 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/fetch.test.ts @@ -0,0 +1,146 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js'; +import { fetchLookmlRepo } from './fetch.js'; +import type { LookmlPullConfig } from './pull-config.js'; + +const FIXTURE_ROOT = join(__dirname, '../../../../test/fixtures/lookml'); + +function pullConfig(overrides: Partial & Pick): LookmlPullConfig { + return { + branch: 'main', + path: null, + authToken: null, + expectedLookerConnectionName: null, + parsedTargetTables: {}, + ...overrides, + }; +} + +describe('fetchLookmlRepo', () => { + let tmpRoot: string; + + beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'fetch-lookml-')); + }); + + afterEach(async () => rm(tmpRoot, { recursive: true, force: true })); + + it('clones a local file:// repo and materializes only .lkml/.lookml files into stagedDir', async () => { + const repo = await makeLocalGitRepo(join(FIXTURE_ROOT, 'single-model'), join(tmpRoot, 'origin')); + // Add a non-LookML file to prove we filter it out. + await repo.writeFile('README.md', '# readme\n'); + await repo.commit('add readme'); + + const stagedDir = join(tmpRoot, 'staged'); + const cacheDir = join(tmpRoot, 'cache', 'conn-1'); + await mkdir(stagedDir, { recursive: true }); + + const result = await fetchLookmlRepo({ + config: pullConfig({ repoUrl: repo.repoUrl }), + cacheDir, + stagedDir, + }); + + expect(result.filesCopied).toBe(3); // orders.model.lkml + 2 views + expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(readFile(join(stagedDir, 'orders.model.lkml'), 'utf-8')).resolves.toMatch(/connection:/); + await expect(readFile(join(stagedDir, 'views', 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/view: orders/); + // README.md is present in the cache but NOT in stagedDir. + await expect(readFile(join(stagedDir, 'README.md'), 'utf-8')).rejects.toThrow(); + await expect(readFile(join(cacheDir, 'README.md'), 'utf-8')).resolves.toMatch(/readme/); + }); + + it('pulls an existing cache dir (second call) and surfaces the new commit', async () => { + const repo = await makeLocalGitRepo(join(FIXTURE_ROOT, 'single-model'), join(tmpRoot, 'origin')); + const stagedDir1 = join(tmpRoot, 'staged-1'); + const stagedDir2 = join(tmpRoot, 'staged-2'); + const cacheDir = join(tmpRoot, 'cache', 'conn-1'); + await mkdir(stagedDir1, { recursive: true }); + await mkdir(stagedDir2, { recursive: true }); + + const r1 = await fetchLookmlRepo({ + config: pullConfig({ repoUrl: repo.repoUrl }), + cacheDir, + stagedDir: stagedDir1, + }); + + // Commit a new revision in the origin — a modified view. + await repo.writeFile('views/orders.view.lkml', 'view: orders { sql_table_name: public.orders_v2 ;; }\n'); + await repo.commit('bump'); + + const r2 = await fetchLookmlRepo({ + config: pullConfig({ repoUrl: repo.repoUrl }), + cacheDir, + stagedDir: stagedDir2, + }); + expect(r2.commitHash).not.toBe(r1.commitHash); + await expect(readFile(join(stagedDir2, 'views', 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/orders_v2/); + }); + + it('respects config.path — only files under that subtree land in stagedDir', async () => { + // Build a multi-subdir repo: models/... + views/... + const originRoot = join(tmpRoot, 'origin'); + await mkdir(originRoot, { recursive: true }); + await mkdir(join(originRoot, 'fixture-src', 'models'), { recursive: true }); + await mkdir(join(originRoot, 'fixture-src', 'views'), { recursive: true }); + await writeFile(join(originRoot, 'fixture-src', 'models', 'orders.model.lkml'), 'connection: "c"\n', 'utf-8'); + await writeFile(join(originRoot, 'fixture-src', 'views', 'orders.view.lkml'), 'view: orders {}\n', 'utf-8'); + const repo = await makeLocalGitRepo(join(originRoot, 'fixture-src'), join(originRoot, 'git')); + + const stagedDir = join(tmpRoot, 'staged'); + const cacheDir = join(tmpRoot, 'cache', 'conn-path'); + await mkdir(stagedDir, { recursive: true }); + + const result = await fetchLookmlRepo({ + config: pullConfig({ repoUrl: repo.repoUrl, path: 'views' }), + cacheDir, + stagedDir, + }); + expect(result.filesCopied).toBe(1); + await expect(readFile(join(stagedDir, 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/view: orders/); + // The model under `models/` is NOT copied because we scoped to `views/`. + await expect(readFile(join(stagedDir, 'orders.model.lkml'), 'utf-8')).rejects.toThrow(); + }); + + it('falls back to fresh clone when the cache dir is corrupt', async () => { + const repo = await makeLocalGitRepo(join(FIXTURE_ROOT, 'single-model'), join(tmpRoot, 'origin')); + const stagedDir = join(tmpRoot, 'staged'); + const cacheDir = join(tmpRoot, 'cache', 'conn-bad'); + await mkdir(stagedDir, { recursive: true }); + + // Pre-create a cacheDir that looks like a git repo but is corrupt. + await mkdir(join(cacheDir, '.git'), { recursive: true }); + await writeFile(join(cacheDir, '.git', 'HEAD'), 'garbage\n', 'utf-8'); + + const result = await fetchLookmlRepo({ + config: pullConfig({ repoUrl: repo.repoUrl }), + cacheDir, + stagedDir, + }); + expect(result.filesCopied).toBeGreaterThan(0); + }); + + it('sanitizes auth tokens out of error messages when clone fails', async () => { + const stagedDir = join(tmpRoot, 'staged'); + const cacheDir = join(tmpRoot, 'cache', 'conn-bad-url'); + await mkdir(stagedDir, { recursive: true }); + + await expect( + fetchLookmlRepo({ + config: pullConfig({ + repoUrl: 'http://definitely-not-a-real-host.test/r.git', + authToken: 'supersecret-token', + }), + cacheDir, + stagedDir, + }), + ).rejects.toThrow( + // Error is thrown with sanitized message — the token is replaced by '***'. + // The exact message depends on simple-git's failure mode; we assert the token does NOT appear. + expect.objectContaining({ message: expect.not.stringContaining('supersecret-token') }), + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/fetch.ts b/packages/context/src/ingest/adapters/lookml/fetch.ts new file mode 100644 index 00000000..8529bf47 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/fetch.ts @@ -0,0 +1,75 @@ +import { access, copyFile, mkdir, readdir } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import { cloneOrPull, sanitizeRepoError } from '../../repo-fetch.js'; +import type { LookmlPullConfig } from './pull-config.js'; + +export interface FetchLookmlRepoParams { + config: LookmlPullConfig; + /** Persistent cache directory (typically per-connection). Cloned here once, pulled on subsequent calls. */ + cacheDir: string; + /** Per-job staged directory that the adapter writes `.lkml`/`.lookml` files into. */ + stagedDir: string; +} + +export interface FetchLookmlRepoResult { + /** SHA of the repo HEAD after the pull. */ + commitHash: string; + /** Number of LookML files copied into `stagedDir`. */ + filesCopied: number; +} + +const LKML_EXT_RE = /\.(lkml|lookml)$/i; + +export async function fetchLookmlRepo(params: FetchLookmlRepoParams): Promise { + const { config, cacheDir, stagedDir } = params; + const branch = config.branch || 'main'; + + try { + const { commitHash } = await cloneOrPull({ + repoUrl: config.repoUrl, + authToken: config.authToken, + cacheDir, + branch, + }); + + const sourceRoot = config.path ? join(cacheDir, config.path) : cacheDir; + const filesCopied = await copyLkmlFilesRecursive(sourceRoot, stagedDir); + + return { commitHash, filesCopied }; + } catch (err) { + throw new Error(sanitizeRepoError(err, config.authToken)); + } +} + +async function copyLkmlFilesRecursive(sourceRoot: string, destRoot: string): Promise { + if (!(await dirExists(sourceRoot))) { + return 0; + } + await mkdir(destRoot, { recursive: true }); + const entries = await readdir(sourceRoot, { withFileTypes: true, recursive: true }); + let copied = 0; + for (const entry of entries) { + if (!entry.isFile()) { + continue; + } + if (!LKML_EXT_RE.test(entry.name)) { + continue; + } + const absSrc = join(entry.parentPath, entry.name); + const rel = relative(sourceRoot, absSrc); + const dest = join(destRoot, rel); + await mkdir(join(dest, '..'), { recursive: true }); + await copyFile(absSrc, dest); + copied++; + } + return copied; +} + +async function dirExists(path: string): Promise { + try { + await access(path); + return true; + } catch { + return false; + } +} diff --git a/packages/context/src/ingest/adapters/lookml/graph.test.ts b/packages/context/src/ingest/adapters/lookml/graph.test.ts new file mode 100644 index 00000000..c1efd701 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/graph.test.ts @@ -0,0 +1,118 @@ +import { describe, expect, it } from 'vitest'; +import { buildLookmlGraph } from './graph.js'; +import type { ParsedLookmlProject } from './parse.js'; + +type LooseParsedLookmlProject = Omit, 'models' | 'views'> & { + models?: Array & { connectionName?: string | null }>; + views?: Array & { rawSqlTableName?: string | null }>; +}; + +const mkProject = (overrides: LooseParsedLookmlProject): ParsedLookmlProject => ({ + dashboards: [], + allPaths: [], + ...overrides, + models: (overrides.models ?? []).map((model) => ({ connectionName: null, ...model })), + views: (overrides.views ?? []).map((view) => ({ rawSqlTableName: null, ...view })), +}); + +describe('buildLookmlGraph', () => { + it('assigns a single model as owner of all its included views', () => { + const project = mkProject({ + models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/*.view.lkml'], explores: ['orders'] }], + views: [ + { path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [] }, + { path: 'views/customers.view.lkml', name: 'customers', extendsFrom: [] }, + ], + allPaths: ['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml'], + }); + const graph = buildLookmlGraph(project); + expect(graph.ownerByViewPath.get('views/orders.view.lkml')).toBe('orders'); + expect(graph.ownerByViewPath.get('views/customers.view.lkml')).toBe('orders'); + expect(graph.viewsIncludedByModel.get('orders')?.sort()).toEqual([ + 'views/customers.view.lkml', + 'views/orders.view.lkml', + ]); + }); + + it('assigns shared views to the lexicographically-first model that includes them', () => { + const project = mkProject({ + models: [ + { path: 'marketing.model.lkml', name: 'marketing', includes: ['views/shared.view.lkml'], explores: [] }, + { + path: 'orders.model.lkml', + name: 'orders', + includes: ['views/shared.view.lkml', 'views/orders.view.lkml'], + explores: [], + }, + ], + views: [ + { path: 'views/shared.view.lkml', name: 'shared', extendsFrom: [] }, + { path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [] }, + ], + allPaths: ['marketing.model.lkml', 'orders.model.lkml', 'views/orders.view.lkml', 'views/shared.view.lkml'], + }); + const graph = buildLookmlGraph(project); + // "marketing" sorts before "orders", so marketing owns the shared view. + expect(graph.ownerByViewPath.get('views/shared.view.lkml')).toBe('marketing'); + expect(graph.ownerByViewPath.get('views/orders.view.lkml')).toBe('orders'); + // Both models list the shared view in their include set: + expect(graph.includersByViewPath.get('views/shared.view.lkml')?.sort()).toEqual(['marketing', 'orders']); + }); + + it('resolves transitive extends chains into dependency paths', () => { + const project = mkProject({ + models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/*.view.lkml'], explores: [] }], + views: [ + { path: 'views/base.view.lkml', name: 'base', extendsFrom: [] }, + { path: 'views/orders.view.lkml', name: 'orders', extendsFrom: ['base'] }, + { path: 'views/orders_ext.view.lkml', name: 'orders_ext', extendsFrom: ['orders'] }, + ], + allPaths: ['orders.model.lkml', 'views/base.view.lkml', 'views/orders.view.lkml', 'views/orders_ext.view.lkml'], + }); + const graph = buildLookmlGraph(project); + expect(graph.extendsAncestorsByViewName.get('orders_ext')?.sort()).toEqual(['base', 'orders']); + expect(graph.extendsAncestorsByViewName.get('orders')?.sort()).toEqual(['base']); + expect(graph.extendsAncestorsByViewName.get('base')?.sort()).toEqual([]); + }); + + it('resolves glob-style include patterns (views/*.view.lkml) against allPaths', () => { + const project = mkProject({ + models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/*.view.lkml'], explores: [] }], + views: [ + { path: 'views/a.view.lkml', name: 'a', extendsFrom: [] }, + { path: 'views/sub/b.view.lkml', name: 'b', extendsFrom: [] }, + ], + allPaths: ['orders.model.lkml', 'views/a.view.lkml', 'views/sub/b.view.lkml'], + }); + const graph = buildLookmlGraph(project); + // Single-star glob matches one path segment — "views/sub/b.view.lkml" is NOT matched. + expect(graph.viewsIncludedByModel.get('orders')?.sort()).toEqual(['views/a.view.lkml']); + }); + + it('resolves double-star include patterns (views/**/*.view.lkml) recursively', () => { + const project = mkProject({ + models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/**/*.view.lkml'], explores: [] }], + views: [ + { path: 'views/a.view.lkml', name: 'a', extendsFrom: [] }, + { path: 'views/sub/b.view.lkml', name: 'b', extendsFrom: [] }, + ], + allPaths: ['orders.model.lkml', 'views/a.view.lkml', 'views/sub/b.view.lkml'], + }); + const graph = buildLookmlGraph(project); + expect(graph.viewsIncludedByModel.get('orders')?.sort()).toEqual(['views/a.view.lkml', 'views/sub/b.view.lkml']); + }); + + it('leaves a view ownerless when no model includes it', () => { + const project = mkProject({ + models: [{ path: 'other.model.lkml', name: 'other', includes: ['views/included.view.lkml'], explores: [] }], + views: [ + { path: 'views/included.view.lkml', name: 'included', extendsFrom: [] }, + { path: 'views/orphan.view.lkml', name: 'orphan', extendsFrom: [] }, + ], + allPaths: ['other.model.lkml', 'views/included.view.lkml', 'views/orphan.view.lkml'], + }); + const graph = buildLookmlGraph(project); + expect(graph.ownerByViewPath.has('views/orphan.view.lkml')).toBe(false); + expect(graph.ownerByViewPath.get('views/included.view.lkml')).toBe('other'); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/graph.ts b/packages/context/src/ingest/adapters/lookml/graph.ts new file mode 100644 index 00000000..744eb01c --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/graph.ts @@ -0,0 +1,114 @@ +import { minimatch } from 'minimatch'; +import type { ParsedLookmlProject } from './parse.js'; + +export interface LookmlGraph { + /** For each model name, every view path that model's `include:` directives resolve to. NOT filtered by ownership. */ + viewsIncludedByModel: Map; + /** For each view path, the owning model name (lexicographically-first includer). Absent when no model includes it. */ + ownerByViewPath: Map; + /** For each view path, every model name that included it (not only the owner). */ + includersByViewPath: Map; + /** For each view NAME (not path), the transitive `extends:` ancestor NAMES. */ + extendsAncestorsByViewName: Map; + /** Quick lookup: view name → file path. Multiple paths possible if a view is defined in multiple files. */ + pathsByViewName: Map; + /** Quick lookup: view path → view names declared in that file. */ + viewNamesByPath: Map; +} + +/** + * Resolve a single include pattern (relative to stagedDir, may be a glob) against the + * project's full file list. Returns the subset of `allPaths` that match. + * + * LookML `include:` uses a file-relative pattern with `*` matching one path segment and + * `**` matching multiple. `minimatch` gives us exactly this. + */ +function resolveIncludePattern(pattern: string, allPaths: string[]): string[] { + return allPaths.filter((p) => minimatch(p, pattern, { nocase: false })).sort(); +} + +function transitiveAncestors( + viewName: string, + directExtends: Map, + visited = new Set(), +): string[] { + if (visited.has(viewName)) { + return []; + } + visited.add(viewName); + const direct = directExtends.get(viewName) ?? []; + const out = new Set(); + for (const parent of direct) { + out.add(parent); + for (const ancestor of transitiveAncestors(parent, directExtends, visited)) { + out.add(ancestor); + } + } + return [...out].sort(); +} + +export function buildLookmlGraph(project: ParsedLookmlProject): LookmlGraph { + const viewsIncludedByModel = new Map(); + const ownerByViewPath = new Map(); + const includersByViewPath = new Map(); + + // Iterate models in lexicographic-name order so the first-includer-wins rule produces + // deterministic ownership. + const sortedModels = [...project.models].sort((a, b) => a.name.localeCompare(b.name)); + + for (const model of sortedModels) { + const includedPaths = new Set(); + for (const pattern of model.includes) { + for (const match of resolveIncludePattern(pattern, project.allPaths)) { + includedPaths.add(match); + } + } + const sortedPaths = [...includedPaths].sort(); + viewsIncludedByModel.set(model.name, sortedPaths); + + for (const viewPath of sortedPaths) { + const inc = includersByViewPath.get(viewPath) ?? []; + inc.push(model.name); + includersByViewPath.set(viewPath, inc); + if (!ownerByViewPath.has(viewPath)) { + ownerByViewPath.set(viewPath, model.name); + } + } + } + + // Deduplicate + sort includers lists for deterministic output. + for (const [path, names] of includersByViewPath) { + includersByViewPath.set(path, [...new Set(names)].sort()); + } + + // Build extends graph over view NAMES. + const directExtendsByViewName = new Map(); + const pathsByViewName = new Map(); + const viewNamesByPath = new Map(); + for (const view of project.views) { + directExtendsByViewName.set(view.name, view.extendsFrom); + const paths = pathsByViewName.get(view.name) ?? []; + if (!paths.includes(view.path)) { + paths.push(view.path); + } + pathsByViewName.set(view.name, paths.sort()); + const names = viewNamesByPath.get(view.path) ?? []; + if (!names.includes(view.name)) { + names.push(view.name); + } + viewNamesByPath.set(view.path, names.sort()); + } + const extendsAncestorsByViewName = new Map(); + for (const view of project.views) { + extendsAncestorsByViewName.set(view.name, transitiveAncestors(view.name, directExtendsByViewName)); + } + + return { + viewsIncludedByModel, + ownerByViewPath, + includersByViewPath, + extendsAncestorsByViewName, + pathsByViewName, + viewNamesByPath, + }; +} diff --git a/packages/context/src/ingest/adapters/lookml/lookml-parser.d.ts b/packages/context/src/ingest/adapters/lookml/lookml-parser.d.ts new file mode 100644 index 00000000..f5fa87e2 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/lookml-parser.d.ts @@ -0,0 +1,43 @@ +declare module 'lookml-parser' { + /** A single file parsed from its raw content. Top-level keys are block kinds (`view`, `model`, `explore`, …). */ + export type LookmlParseNode = Record; + + /** Result of `parseFiles` with `fileOutput: 'by-type'`. Top-level categories map to file-name-keyed entries. */ + export interface LookmlProjectByType { + file?: Record; + model?: Record; + view?: Record; + explore?: Record; + dashboard?: Record; + manifest?: Record; + } + + export interface ParseFilesSourceItem { + path: string; + content: string; + } + + export interface ParseFilesOptions { + /** Glob string, OR an array of `{ path, content }` pre-read items. */ + source: string | ParseFilesSourceItem[]; + /** `"by-name"` (default), `"array"`, or `"by-type"`. */ + fileOutput?: 'by-name' | 'array' | 'by-type'; + globOptions?: Record; + readFileOptions?: { encoding?: string }; + readFileConcurrency?: number; + console?: Pick; + } + + /** Parse a single LookML source string (not a file). Returns the node tree. */ + export function parse(source: string): LookmlParseNode; + + /** Parse a set of files, following `include:` directives. */ + export function parseFiles(opts: ParseFilesOptions): Promise; + + const lookmlParser: { + parse: typeof parse; + parseFiles: typeof parseFiles; + }; + + export default lookmlParser; +} diff --git a/packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts b/packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts new file mode 100644 index 00000000..32564da1 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts @@ -0,0 +1,60 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js'; +import { LOOKML_FETCH_REPORT_FILE } from './fetch-report.js'; +import { LookmlSourceAdapter } from './lookml.adapter.js'; + +describe('LookmlSourceAdapter validation sidecars', () => { + let tmpRoot: string; + + beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'lookml-adapter-')); + }); + + afterEach(async () => rm(tmpRoot, { recursive: true, force: true })); + + it('writes a partial fetch report and marks mismatched chunks as SL-disallowed', async () => { + const originRoot = join(tmpRoot, 'origin-src'); + await mkdir(join(originRoot, 'views'), { recursive: true }); + await writeFile( + join(originRoot, 'b2b.model.lkml'), + 'connection: "wrong_connection"\ninclude: "views/*.view.lkml"\nexplore: orders {}\n', + 'utf-8', + ); + await writeFile( + join(originRoot, 'views', 'orders.view.lkml'), + 'view: orders { sql_table_name: public.orders ;; }\n', + 'utf-8', + ); + const repo = await makeLocalGitRepo(originRoot, join(tmpRoot, 'origin')); + const stagedDir = join(tmpRoot, 'staged'); + await mkdir(stagedDir, { recursive: true }); + + const adapter = new LookmlSourceAdapter({ homeDir: join(tmpRoot, 'home') }); + await adapter.fetch( + { + repoUrl: repo.repoUrl, + branch: 'main', + path: null, + authToken: null, + expectedLookerConnectionName: 'expected_connection', + }, + stagedDir, + { connectionId: '11111111-1111-4111-8111-111111111111', sourceKey: 'lookml' }, + ); + + await expect(readFile(join(stagedDir, LOOKML_FETCH_REPORT_FILE), 'utf-8')).resolves.toContain( + 'lookml_connection_mismatch', + ); + await expect(adapter.readFetchReport(stagedDir)).resolves.toMatchObject({ status: 'partial' }); + + const chunks = await adapter.chunk(stagedDir); + expect(chunks.workUnits[0]).toMatchObject({ + unitKey: 'lookml-b2b', + slDisallowed: true, + slDisallowedReason: 'lookml_connection_mismatch', + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/lookml.adapter.ts b/packages/context/src/ingest/adapters/lookml/lookml.adapter.ts new file mode 100644 index 00000000..9978ddd4 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/lookml.adapter.ts @@ -0,0 +1,55 @@ +import { join } from 'node:path'; +import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js'; +import { chunkLookmlProject } from './chunk.js'; +import { detectLookmlStagedDir } from './detect.js'; +import { + buildLookmlValidationArtifacts, + readLookmlFetchReport, + readLookmlMismatchedModelNames, + writeLookmlValidationArtifacts, +} from './fetch-report.js'; +import { fetchLookmlRepo } from './fetch.js'; +import { parseLookmlStagedDir } from './parse.js'; +import { parseLookmlPullConfig } from './pull-config.js'; + +export interface LookmlSourceAdapterDeps { + homeDir: string; +} + +export class LookmlSourceAdapter implements SourceAdapter { + readonly source = 'lookml'; + readonly skillNames: string[] = ['lookml_ingest']; + + constructor(private readonly deps: LookmlSourceAdapterDeps) {} + + detect(stagedDir: string): Promise { + return detectLookmlStagedDir(stagedDir); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = parseLookmlPullConfig(pullConfig); + const cacheDir = this.resolveCacheDir(ctx.connectionId); + await fetchLookmlRepo({ config, cacheDir, stagedDir }); + const project = await parseLookmlStagedDir(stagedDir); + await writeLookmlValidationArtifacts( + stagedDir, + buildLookmlValidationArtifacts(project, { + expectedLookerConnectionName: config.expectedLookerConnectionName, + }), + ); + } + + readFetchReport(stagedDir: string) { + return readLookmlFetchReport(stagedDir); + } + + async chunk(stagedDir: string, diffSet?: DiffSet): Promise { + const project = await parseLookmlStagedDir(stagedDir); + const mismatchedModelNames = await readLookmlMismatchedModelNames(stagedDir); + return chunkLookmlProject(project, { diffSet, mismatchedModelNames }); + } + + private resolveCacheDir(connectionId: string): string { + return join(this.deps.homeDir, 'ingest-lookml-repos', connectionId); + } +} diff --git a/packages/context/src/ingest/adapters/lookml/parse.test.ts b/packages/context/src/ingest/adapters/lookml/parse.test.ts new file mode 100644 index 00000000..84ce5b5a --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/parse.test.ts @@ -0,0 +1,166 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { parseLookmlStagedDir } from './parse.js'; + +describe('parseLookmlStagedDir', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'lkml-parse-')); + }); + + afterEach(async () => rm(stagedDir, { recursive: true, force: true })); + + it('parses a single view file and reports it under views with a relative path', async () => { + await writeFile( + join(stagedDir, 'customers.view.lkml'), + `view: customers { + dimension: id { + type: number + primary_key: yes + sql: \${TABLE}.id ;; + } +} +`, + 'utf-8', + ); + const result = await parseLookmlStagedDir(stagedDir); + expect(result.views.map((v) => v.path)).toEqual(['customers.view.lkml']); + expect(result.views[0].name).toBe('customers'); + expect(result.models).toEqual([]); + expect(result.dashboards).toEqual([]); + }); + + it('parses a model file and extracts include globs', async () => { + await mkdir(join(stagedDir, 'views'), { recursive: true }); + await writeFile( + join(stagedDir, 'orders.model.lkml'), + `connection: "my_bq" + +include: "views/*.view.lkml" + +explore: orders {} +`, + 'utf-8', + ); + await writeFile( + join(stagedDir, 'views', 'orders.view.lkml'), + `view: orders { + sql_table_name: public.orders ;; +} +`, + 'utf-8', + ); + const result = await parseLookmlStagedDir(stagedDir); + expect(result.models.map((m) => m.path)).toEqual(['orders.model.lkml']); + expect(result.models[0].name).toBe('orders'); + expect(result.models[0].includes).toEqual(['views/*.view.lkml']); + expect(result.models[0].explores).toEqual(['orders']); + expect(result.views.map((v) => v.path)).toEqual(['views/orders.view.lkml']); + }); + + it('extracts model connection names and raw view sql_table_name declarations', async () => { + await mkdir(join(stagedDir, 'views'), { recursive: true }); + await writeFile( + join(stagedDir, 'b2b.model.lkml'), + `connection: "b2b_sandbox_bq" + +include: "views/*.view.lkml" + +explore: orders {} +`, + 'utf-8', + ); + await writeFile( + join(stagedDir, 'views', 'orders.view.lkml'), + `view: orders { + sql_table_name: analytics.orders AS o ;; +} +`, + 'utf-8', + ); + + const result = await parseLookmlStagedDir(stagedDir); + + expect(result.models[0]).toMatchObject({ + path: 'b2b.model.lkml', + name: 'b2b', + connectionName: 'b2b_sandbox_bq', + }); + expect(result.views[0]).toMatchObject({ + path: 'views/orders.view.lkml', + name: 'orders', + rawSqlTableName: 'analytics.orders AS o', + }); + }); + + it('captures extends declarations on views', async () => { + await writeFile( + join(stagedDir, 'base.view.lkml'), + `view: base { + dimension: id { + type: number + primary_key: yes + sql: \${TABLE}.id ;; + } +} +`, + 'utf-8', + ); + await writeFile( + join(stagedDir, 'orders.view.lkml'), + `view: orders { + extends: [base] + sql_table_name: public.orders ;; +} +`, + 'utf-8', + ); + const result = await parseLookmlStagedDir(stagedDir); + const orders = result.views.find((v) => v.name === 'orders'); + expect(orders).toBeDefined(); + if (!orders) { + throw new Error('expected orders view'); + } + expect(orders.extendsFrom).toEqual(['base']); + }); + + it('collects .dashboard.lkml files structurally (no deep parsing)', async () => { + await writeFile(join(stagedDir, 'overview.dashboard.lkml'), '- dashboard: overview\n title: Overview\n', 'utf-8'); + const result = await parseLookmlStagedDir(stagedDir); + expect(result.dashboards.map((d) => d.path)).toEqual(['overview.dashboard.lkml']); + expect(result.dashboards[0].name).toBe('overview'); + }); + + it('ignores non-.lkml files', async () => { + await writeFile(join(stagedDir, 'README.md'), '# readme\n', 'utf-8'); + await writeFile(join(stagedDir, 'notes.txt'), 'note\n', 'utf-8'); + const result = await parseLookmlStagedDir(stagedDir); + expect(result.models).toEqual([]); + expect(result.views).toEqual([]); + expect(result.dashboards).toEqual([]); + }); + + it('returns a sorted deterministic order across runs', async () => { + await writeFile( + join(stagedDir, 'zeta.view.lkml'), + `view: zeta { +} +`, + 'utf-8', + ); + await writeFile( + join(stagedDir, 'alpha.view.lkml'), + `view: alpha { +} +`, + 'utf-8', + ); + const r1 = await parseLookmlStagedDir(stagedDir); + const r2 = await parseLookmlStagedDir(stagedDir); + expect(r1.views.map((v) => v.path)).toEqual(['alpha.view.lkml', 'zeta.view.lkml']); + expect(r2.views.map((v) => v.path)).toEqual(r1.views.map((v) => v.path)); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/parse.ts b/packages/context/src/ingest/adapters/lookml/parse.ts new file mode 100644 index 00000000..73564049 --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/parse.ts @@ -0,0 +1,202 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import lookmlParser, { type LookmlParseNode, type LookmlProjectByType } from 'lookml-parser'; + +interface ParsedLookmlModel { + /** Path relative to stagedDir, e.g. "orders.model.lkml". */ + path: string; + /** Model name — the file's basename minus ".model.lkml". */ + name: string; + /** `include:` entries (glob strings). Relative to stagedDir. */ + includes: string[]; + /** Explore names declared in the model. Order is source-order. */ + explores: string[]; + connectionName: string | null; +} + +interface ParsedLookmlView { + /** Path relative to stagedDir. */ + path: string; + /** The `view:` name (the identifier on the `view:` block, not the file name). */ + name: string; + /** `extends:` ancestors declared on this view. Empty if none. */ + extendsFrom: string[]; + rawSqlTableName: string | null; +} + +interface ParsedLookmlDashboard { + /** Path relative to stagedDir. */ + path: string; + /** Best-effort dashboard name: the filename minus ".dashboard.lkml". */ + name: string; +} + +export interface ParsedLookmlProject { + models: ParsedLookmlModel[]; + views: ParsedLookmlView[]; + dashboards: ParsedLookmlDashboard[]; + /** All .lkml paths the adapter saw (relative to stagedDir), sorted. */ + allPaths: string[]; +} + +const LKML_EXT_RE = /\.(lkml|lookml)$/i; +const MODEL_FILE_RE = /\.model\.(lkml|lookml)$/i; +const VIEW_FILE_RE = /\.view\.(lkml|lookml)$/i; +const DASHBOARD_FILE_RE = /\.dashboard\.(lkml|lookml)$/i; + +async function collectLkmlFiles(stagedDir: string): Promise { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + const paths: string[] = []; + for (const entry of entries) { + if (!entry.isFile() || !LKML_EXT_RE.test(entry.name)) { + continue; + } + const abs = join(entry.parentPath, entry.name); + paths.push(relative(stagedDir, abs)); + } + paths.sort(); + return paths; +} + +function asStringArray(value: unknown): string[] { + if (Array.isArray(value)) { + return value.filter((v): v is string => typeof v === 'string'); + } + if (typeof value === 'string') { + return [value]; + } + return []; +} + +function firstString(value: unknown): string | null { + if (typeof value === 'string') { + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; + } + if (Array.isArray(value)) { + for (const item of value) { + const found = firstString(item); + if (found) { + return found; + } + } + } + return null; +} + +function extractViewExtendsFromNode(viewNode: LookmlParseNode): string[] { + // lookml-parser normalizes `extends: [a, b]` into `extends__all`, and single-value + // `extends: x` into `extends`. We accept both. + const node = viewNode as Record; + const allSource = node.extends__all; + if (Array.isArray(allSource)) { + const flat: string[] = []; + for (const item of allSource) { + if (Array.isArray(item)) { + for (const inner of item) { + if (typeof inner === 'string') { + flat.push(inner); + } + } + } else if (typeof item === 'string') { + flat.push(item); + } + } + if (flat.length > 0) { + return flat; + } + } + return asStringArray(node.extends); +} + +function nameFromPath(path: string, ext: RegExp): string { + const basename = path.split('/').pop() ?? path; + return basename.replace(ext, ''); +} + +/** + * `project.file` is keyed as `file[type][name]` (e.g. `file.view.customers`) rather + * than by raw path. Look up the node for a given path by matching `$file_path`. + */ +function findFileNode(project: LookmlProjectByType, path: string): LookmlParseNode | undefined { + const fileByType = project.file; + if (!fileByType) { + return undefined; + } + for (const typeBucket of Object.values(fileByType)) { + if (!typeBucket || typeof typeBucket !== 'object') { + continue; + } + for (const node of Object.values(typeBucket as Record)) { + if ((node as Record).$file_path === path) { + return node; + } + } + } + return undefined; +} + +export async function parseLookmlStagedDir(stagedDir: string): Promise { + const allPaths = await collectLkmlFiles(stagedDir); + + const modelPaths = allPaths.filter((p) => MODEL_FILE_RE.test(p)); + const viewPaths = allPaths.filter((p) => VIEW_FILE_RE.test(p)); + const dashboardPaths = allPaths.filter((p) => DASHBOARD_FILE_RE.test(p)); + + const parsableFiles = await Promise.all( + [...modelPaths, ...viewPaths].map(async (p) => ({ + path: p, + content: await readFile(join(stagedDir, p), 'utf-8'), + })), + ); + + let project: LookmlProjectByType = {}; + if (parsableFiles.length > 0) { + project = await lookmlParser.parseFiles({ + source: parsableFiles, + fileOutput: 'by-type', + // Silence the parser's default console warnings — unreadable in test output. + console: { log: () => {}, warn: () => {}, error: () => {} }, + }); + } + + const models: ParsedLookmlModel[] = modelPaths.map((path) => { + const name = nameFromPath(path, /\.model\.(lkml|lookml)$/i); + const modelNode = (project.model?.[name] ?? {}) as Record; + const includes = asStringArray(modelNode.include).concat(asStringArray(modelNode.includes)); + const explores = Object.keys((modelNode.explore ?? {}) as Record).sort(); + return { path, name, includes, explores, connectionName: firstString(modelNode.connection) }; + }); + + const views: ParsedLookmlView[] = []; + for (const path of viewPaths) { + const fileNode = findFileNode(project, path) as Record | undefined; + const viewBlock = (fileNode?.view ?? {}) as Record; + const viewNames = Object.keys(viewBlock).sort(); + if (viewNames.length === 0) { + views.push({ + path, + name: nameFromPath(path, /\.view\.(lkml|lookml)$/i), + extendsFrom: [], + rawSqlTableName: null, + }); + continue; + } + for (const vname of viewNames) { + const viewNode = viewBlock[vname] as Record; + views.push({ + path, + name: vname, + extendsFrom: extractViewExtendsFromNode(viewBlock[vname]), + rawSqlTableName: firstString(viewNode.sql_table_name), + }); + } + } + + const dashboards: ParsedLookmlDashboard[] = dashboardPaths.map((path) => ({ + path, + name: nameFromPath(path, /\.dashboard\.(lkml|lookml)$/i), + })); + + return { models, views, dashboards, allPaths }; +} diff --git a/packages/context/src/ingest/adapters/lookml/pull-config.test.ts b/packages/context/src/ingest/adapters/lookml/pull-config.test.ts new file mode 100644 index 00000000..2edec99f --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/pull-config.test.ts @@ -0,0 +1,140 @@ +import { describe, expect, it } from 'vitest'; +import { parseLookmlPullConfig, pullConfigFromIntegrationConfig } from './pull-config.js'; + +describe('lookml pull config', () => { + it('parses a minimal valid config with defaulted branch', () => { + const config = parseLookmlPullConfig({ repoUrl: 'https://github.com/acme/r.git' }); + expect(config.repoUrl).toBe('https://github.com/acme/r.git'); + expect(config.branch).toBe('main'); + expect(config.path).toBeNull(); + expect(config.authToken).toBeNull(); + expect(config.expectedLookerConnectionName).toBeNull(); + expect(config.parsedTargetTables).toEqual({}); + }); + + it('defaults expectedLookerConnectionName and parsedTargetTables for LookML pulls', () => { + const config = parseLookmlPullConfig({ repoUrl: 'https://github.com/acme/r.git' }); + + expect(config.expectedLookerConnectionName).toBeNull(); + expect(config.parsedTargetTables).toEqual({}); + }); + + it('parses a fully specified config', () => { + const config = parseLookmlPullConfig({ + repoUrl: 'https://gitlab.com/team/proj.git', + branch: 'develop', + path: 'views', + authToken: 'glpat-xyz', + }); + expect(config).toEqual({ + repoUrl: 'https://gitlab.com/team/proj.git', + branch: 'develop', + path: 'views', + authToken: 'glpat-xyz', + expectedLookerConnectionName: null, + parsedTargetTables: {}, + }); + }); + + it('parses the validation-only expected connection and parsed target table map', () => { + const config = parseLookmlPullConfig({ + repoUrl: 'https://github.com/acme/r.git', + expectedLookerConnectionName: 'b2b_sandbox_bq', + parsedTargetTables: { + 'b2b.orders': { + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'orders', + canonicalTable: 'proj.analytics.orders', + }, + 'b2b.derived': { + ok: false, + reason: 'derived_table_not_supported', + }, + }, + }); + + expect(config.expectedLookerConnectionName).toBe('b2b_sandbox_bq'); + expect(config.parsedTargetTables['b2b.orders']).toEqual({ + ok: true, + catalog: 'proj', + schema: 'analytics', + name: 'orders', + canonicalTable: 'proj.analytics.orders', + }); + expect(config.parsedTargetTables['b2b.derived']).toEqual({ + ok: false, + reason: 'derived_table_not_supported', + }); + }); + + it('rejects a non-URL repoUrl', () => { + expect(() => parseLookmlPullConfig({ repoUrl: 'not-a-url' })).toThrow(); + }); + + it('rejects a missing repoUrl', () => { + expect(() => parseLookmlPullConfig({ branch: 'main' })).toThrow(); + }); + + it('pullConfigFromIntegrationConfig extracts the adapter-visible fields', () => { + const integration = { + pullEnabled: true, + repoUrl: 'https://github.com/acme/r.git', + branch: 'main', + path: 'models', + authToken: 'ghp_x', + pullSchedule: 'daily' as const, + nextPullAt: '2026-05-01T00:00:00.000Z', + lastPulledAt: null, + lastCommitHash: null, + }; + expect(pullConfigFromIntegrationConfig(integration)).toEqual({ + repoUrl: 'https://github.com/acme/r.git', + branch: 'main', + path: 'models', + authToken: 'ghp_x', + expectedLookerConnectionName: null, + parsedTargetTables: {}, + }); + }); + + it('pullConfigFromIntegrationConfig forwards the expected connection name', () => { + const integration = { + pullEnabled: true, + repoUrl: 'https://github.com/acme/r.git', + branch: 'main', + path: 'models', + authToken: 'ghp_x', + pullSchedule: 'daily' as const, + nextPullAt: '2026-05-01T00:00:00.000Z', + lastPulledAt: null, + lastCommitHash: null, + expectedLookerConnectionName: 'warehouse_bq', + }; + + expect(pullConfigFromIntegrationConfig(integration)).toEqual({ + repoUrl: 'https://github.com/acme/r.git', + branch: 'main', + path: 'models', + authToken: 'ghp_x', + expectedLookerConnectionName: 'warehouse_bq', + parsedTargetTables: {}, + }); + }); + + it('pullConfigFromIntegrationConfig throws when repoUrl is null', () => { + const integration = { + pullEnabled: false, + repoUrl: null, + branch: null, + path: null, + authToken: null, + pullSchedule: null, + nextPullAt: null, + lastPulledAt: null, + lastCommitHash: null, + }; + expect(() => pullConfigFromIntegrationConfig(integration)).toThrow(/repoUrl/); + }); +}); diff --git a/packages/context/src/ingest/adapters/lookml/pull-config.ts b/packages/context/src/ingest/adapters/lookml/pull-config.ts new file mode 100644 index 00000000..a6eefc9f --- /dev/null +++ b/packages/context/src/ingest/adapters/lookml/pull-config.ts @@ -0,0 +1,39 @@ +import * as z from 'zod'; +import { parsedTargetTableSchema } from '../../parsed-target-table.js'; + +export const lookmlPullConfigSchema = z.object({ + repoUrl: z.string().url(), + branch: z.string().default('main'), + path: z.string().nullable().default(null), + authToken: z.string().nullable().default(null), + expectedLookerConnectionName: z.string().min(1).nullable().default(null), + parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}), +}); + +export type LookmlPullConfig = z.infer; + +export interface LookmlIntegrationLike { + repoUrl: string | null; + branch?: string | null; + path?: string | null; + authToken?: string | null; + expectedLookerConnectionName?: string | null; +} + +export function parseLookmlPullConfig(raw: unknown): LookmlPullConfig { + return lookmlPullConfigSchema.parse(raw); +} + +export function pullConfigFromIntegrationConfig(integration: LookmlIntegrationLike): LookmlPullConfig { + if (!integration.repoUrl) { + throw new Error('lookml integration config missing repoUrl'); + } + return parseLookmlPullConfig({ + repoUrl: integration.repoUrl, + branch: integration.branch ?? 'main', + path: integration.path ?? null, + authToken: integration.authToken ?? null, + expectedLookerConnectionName: integration.expectedLookerConnectionName ?? null, + parsedTargetTables: {}, + }); +} diff --git a/packages/context/src/ingest/adapters/metabase/card-references.test.ts b/packages/context/src/ingest/adapters/metabase/card-references.test.ts new file mode 100644 index 00000000..8c179710 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/card-references.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from 'vitest'; +import { CardReferenceCycleError, expandCardReferences } from './card-references.js'; + +describe('expandCardReferences', () => { + const fetchCard = (id: number): Promise<{ native_query: string }> => { + const cards: Record = { + 100: 'SELECT id FROM base_table', + 101: 'SELECT * FROM {{#100}}', + 102: 'SELECT * FROM {{#101}} WHERE x = 1', + 200: 'SELECT * FROM {{#201}}', + 201: 'SELECT * FROM {{#200}}', + }; + if (!(id in cards)) { + return Promise.reject(new Error(`no card ${id}`)); + } + return Promise.resolve({ native_query: cards[id] }); + }; + + it('returns SQL unchanged when there are no references', async () => { + const result = await expandCardReferences('SELECT 1', { fetchCard }); + expect(result).toBe('SELECT 1'); + }); + + it('inlines a single card reference as a subquery', async () => { + const result = await expandCardReferences('SELECT * FROM {{#100}}', { fetchCard }); + expect(result).toBe('SELECT * FROM (SELECT id FROM base_table)'); + }); + + it('handles slugged references like {{#100-pretty-slug}}', async () => { + const result = await expandCardReferences('SELECT * FROM {{#100-pretty-slug}}', { fetchCard }); + expect(result).toBe('SELECT * FROM (SELECT id FROM base_table)'); + }); + + it('recursively resolves nested references', async () => { + const result = await expandCardReferences('SELECT * FROM {{#102}}', { fetchCard }); + expect(result).toBe('SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT id FROM base_table)) WHERE x = 1)'); + }); + + it('detects cycles and throws CardReferenceCycleError', async () => { + await expect(expandCardReferences('SELECT * FROM {{#200}}', { fetchCard })).rejects.toBeInstanceOf( + CardReferenceCycleError, + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/card-references.ts b/packages/context/src/ingest/adapters/metabase/card-references.ts new file mode 100644 index 00000000..84d8356d --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/card-references.ts @@ -0,0 +1,47 @@ +type FetchCardFn = (cardId: number) => Promise<{ native_query: string }>; + +export class CardReferenceCycleError extends Error { + constructor( + public readonly cardId: number, + public readonly path: number[], + ) { + super(`Cycle detected in Metabase card references at card ${cardId} (path: ${path.join(' -> ')})`); + this.name = 'CardReferenceCycleError'; + } +} + +const CARD_REFERENCE_PATTERN = /\{\{#(\d+)(?:-[^}]+)?\}\}/g; + +export async function expandCardReferences( + sql: string, + opts: { fetchCard: FetchCardFn; visited?: Set }, +): Promise { + const visited = opts.visited ?? new Set(); + const matches = Array.from(sql.matchAll(CARD_REFERENCE_PATTERN)); + if (matches.length === 0) { + return sql; + } + + const resolved = await Promise.all( + matches.map(async (match) => { + const cardId = Number(match[1]); + if (visited.has(cardId)) { + throw new CardReferenceCycleError(cardId, [...visited, cardId]); + } + const nextVisited = new Set(visited); + nextVisited.add(cardId); + const card = await opts.fetchCard(cardId); + const expandedInner = await expandCardReferences(card.native_query, { + fetchCard: opts.fetchCard, + visited: nextVisited, + }); + return { match: match[0], expanded: expandedInner }; + }), + ); + + let output = sql; + for (const { match, expanded } of resolved) { + output = output.split(match).join(`(${expanded})`); + } + return output; +} diff --git a/packages/context/src/ingest/adapters/metabase/chunk.test.ts b/packages/context/src/ingest/adapters/metabase/chunk.test.ts new file mode 100644 index 00000000..46a3ce97 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/chunk.test.ts @@ -0,0 +1,307 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { chunkMetabaseStagedDir } from './chunk.js'; +import { stagedSyncConfigSchema } from './types.js'; + +const FIXTURES = resolve(__dirname, '../../../../test/fixtures/metabase'); +const SIMPLE = join(FIXTURES, 'simple'); +const MULTI = join(FIXTURES, 'multi-collection'); +const CARD_REF = join(FIXTURES, 'card-ref'); + +describe('chunkMetabaseStagedDir — first run', () => { + it('simple fixture emits one WU for collection 5 containing cards + collection file; shared control files in dependencyPaths', async () => { + const result = await chunkMetabaseStagedDir(SIMPLE); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.unitKey).toBe('metabase-col-5'); + expect(wu.rawFiles.sort()).toEqual(['cards/1.json', 'cards/2.json', 'collections/5.json']); + expect(wu.dependencyPaths.sort()).toEqual(['databases/42.json', 'sync-config.json']); + expect(wu.peerFileIndex).toEqual([]); + expect(wu.notes).toContain('collection 5'); + expect(wu.notes).toContain('2 cards'); + }); + + it('multi-collection fixture emits two WUs — one per collection — deterministic by id', async () => { + const result = await chunkMetabaseStagedDir(MULTI); + expect(result.workUnits).toHaveLength(2); + expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['metabase-col-5', 'metabase-col-6']); + expect(result.workUnits[0].rawFiles).toContain('cards/1.json'); + expect(result.workUnits[0].rawFiles).toContain('cards/2.json'); + expect(result.workUnits[0].rawFiles).not.toContain('cards/3.json'); + expect(result.workUnits[1].rawFiles).toContain('cards/3.json'); + expect(result.workUnits[1].rawFiles).not.toContain('cards/1.json'); + // Each WU's peerFileIndex contains the OTHER collection's card files. + expect(result.workUnits[0].peerFileIndex).toContain('cards/3.json'); + expect(result.workUnits[1].peerFileIndex).toContain('cards/1.json'); + }); + + it('card-ref fixture: cross-card reference inside the same collection lands in rawFiles, NOT dependencyPaths', async () => { + const result = await chunkMetabaseStagedDir(CARD_REF); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.rawFiles).toContain('cards/10.json'); + expect(wu.rawFiles).toContain('cards/11.json'); + expect(wu.dependencyPaths).not.toContain('cards/10.json'); + expect(wu.dependencyPaths).not.toContain('cards/11.json'); + }); + + it('is deterministic: two identical invocations return structurally-equal WUs', async () => { + const r1 = await chunkMetabaseStagedDir(SIMPLE); + const r2 = await chunkMetabaseStagedDir(SIMPLE); + expect(JSON.stringify(r1)).toBe(JSON.stringify(r2)); + }); + + it('DiffSet re-sync keeps only WUs with a changed card; unchanged siblings land in dependencyPaths', async () => { + const result = await chunkMetabaseStagedDir(SIMPLE, { + diffSet: { + added: [], + modified: ['cards/1.json'], + deleted: [], + unchanged: ['cards/2.json', 'collections/5.json', 'databases/42.json', 'sync-config.json'], + }, + }); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.rawFiles).toEqual(['cards/1.json']); + expect(wu.dependencyPaths.sort()).toEqual([ + 'cards/2.json', + 'collections/5.json', + 'databases/42.json', + 'sync-config.json', + ]); + }); + + it('DiffSet re-sync: all-unchanged yields zero WUs and no eviction', async () => { + const result = await chunkMetabaseStagedDir(SIMPLE, { + diffSet: { + added: [], + modified: [], + deleted: [], + unchanged: ['cards/1.json', 'cards/2.json', 'collections/5.json', 'databases/42.json', 'sync-config.json'], + }, + }); + expect(result.workUnits).toEqual([]); + expect(result.eviction).toBeUndefined(); + }); + + it('DiffSet re-sync: deleted card emits an EvictionUnit', async () => { + const result = await chunkMetabaseStagedDir(SIMPLE, { + diffSet: { + added: [], + modified: [], + deleted: ['cards/1.json'], + unchanged: ['cards/2.json', 'collections/5.json', 'databases/42.json', 'sync-config.json'], + }, + }); + expect(result.workUnits).toEqual([]); + expect(result.eviction).toEqual({ deletedRawPaths: ['cards/1.json'] }); + }); + + it('DiffSet re-sync: sync-config.json change alone does NOT trigger any WU', async () => { + const result = await chunkMetabaseStagedDir(SIMPLE, { + diffSet: { + added: [], + modified: ['sync-config.json'], + deleted: [], + unchanged: ['cards/1.json', 'cards/2.json', 'collections/5.json', 'databases/42.json'], + }, + }); + expect(result.workUnits).toEqual([]); + expect(result.eviction).toBeUndefined(); + }); + + it('DiffSet re-sync: databases/{id}.json change alone does NOT trigger any WU', async () => { + const result = await chunkMetabaseStagedDir(SIMPLE, { + diffSet: { + added: [], + modified: ['databases/42.json'], + deleted: [], + unchanged: ['cards/1.json', 'cards/2.json', 'collections/5.json', 'sync-config.json'], + }, + }); + expect(result.workUnits).toEqual([]); + expect(result.eviction).toBeUndefined(); + }); +}); + +async function writeInline(stagedDir: string, rel: string, body: object): Promise { + const abs = join(stagedDir, rel); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, JSON.stringify(body), 'utf-8'); +} + +describe('chunkMetabaseStagedDir — selected mode filters non-matching cards', () => { + let dir: string; + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'mb-chunk-select-')); + }); + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it('cards outside selected collections are NOT in any WU', async () => { + await writeInline(dir, 'sync-config.json', { + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + syncMode: 'ONLY', + selections: [{ selectionType: 'collection', metabaseObjectId: 5 }], + defaultTagNames: [], + mapping: { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + }, + }); + await writeInline(dir, 'databases/42.json', { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + }); + await writeInline(dir, 'collections/5.json', { metabaseId: 5, name: 'A', parentId: 'root' }); + await writeInline(dir, 'collections/6.json', { metabaseId: 6, name: 'B', parentId: 'root' }); + await writeInline(dir, 'cards/100.json', { + metabaseId: 100, + name: 'In', + description: null, + type: 'model', + databaseId: 42, + collectionId: 5, + archived: false, + resolvedSql: 'SELECT 1', + templateTags: [], + resultMetadata: [], + collectionPath: ['A'], + referencedCardIds: [], + resolutionStatus: 'resolved', + }); + await writeInline(dir, 'cards/200.json', { + metabaseId: 200, + name: 'Out', + description: null, + type: 'model', + databaseId: 42, + collectionId: 6, + archived: false, + resolvedSql: 'SELECT 1', + templateTags: [], + resultMetadata: [], + collectionPath: ['B'], + referencedCardIds: [], + resolutionStatus: 'resolved', + }); + const result = await chunkMetabaseStagedDir(dir); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0].unitKey).toBe('metabase-col-5'); + expect(result.workUnits[0].rawFiles).toContain('cards/100.json'); + expect(result.workUnits[0].rawFiles).not.toContain('cards/200.json'); + }); +}); + +describe('chunkMetabaseStagedDir — syncMode enum coverage', () => { + let dir: string; + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'mb-chunk-enum-')); + await writeInline(dir, 'databases/42.json', { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + }); + await writeInline(dir, 'collections/5.json', { metabaseId: 5, name: 'A', parentId: 'root' }); + await writeInline(dir, 'collections/6.json', { metabaseId: 6, name: 'B', parentId: 'root' }); + await writeInline(dir, 'cards/100.json', { + metabaseId: 100, + name: 'In', + description: null, + type: 'model', + databaseId: 42, + collectionId: 5, + archived: false, + resolvedSql: 'SELECT 1', + templateTags: [], + resultMetadata: [], + collectionPath: ['A'], + referencedCardIds: [], + resolutionStatus: 'resolved', + }); + await writeInline(dir, 'cards/200.json', { + metabaseId: 200, + name: 'Out', + description: null, + type: 'model', + databaseId: 42, + collectionId: 6, + archived: false, + resolvedSql: 'SELECT 1', + templateTags: [], + resultMetadata: [], + collectionPath: ['B'], + referencedCardIds: [], + resolutionStatus: 'resolved', + }); + }); + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + const BASE_SYNC = { + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + defaultTagNames: [] as string[], + mapping: { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + }, + }; + + it('ALL includes every non-archived card on the matching database', async () => { + await writeInline(dir, 'sync-config.json', { + ...BASE_SYNC, + syncMode: 'ALL', + selections: [], + }); + const result = await chunkMetabaseStagedDir(dir); + const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles); + expect(allRawFiles).toContain('cards/100.json'); + expect(allRawFiles).toContain('cards/200.json'); + }); + + it('ONLY includes cards in selected collections; excludes the rest', async () => { + await writeInline(dir, 'sync-config.json', { + ...BASE_SYNC, + syncMode: 'ONLY', + selections: [{ selectionType: 'collection', metabaseObjectId: 5 }], + }); + const result = await chunkMetabaseStagedDir(dir); + const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles); + expect(allRawFiles).toContain('cards/100.json'); + expect(allRawFiles).not.toContain('cards/200.json'); + }); + + it('EXCEPT excludes cards in selected collections; includes the rest', async () => { + await writeInline(dir, 'sync-config.json', { + ...BASE_SYNC, + syncMode: 'EXCEPT', + selections: [{ selectionType: 'collection', metabaseObjectId: 5 }], + }); + const result = await chunkMetabaseStagedDir(dir); + const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles); + expect(allRawFiles).not.toContain('cards/100.json'); + expect(allRawFiles).toContain('cards/200.json'); + }); + + it('lowercase syncMode is rejected at parse time', () => { + const parsed = stagedSyncConfigSchema.safeParse({ + ...BASE_SYNC, + syncMode: 'all', + selections: [], + }); + expect(parsed.success).toBe(false); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/chunk.ts b/packages/context/src/ingest/adapters/metabase/chunk.ts new file mode 100644 index 00000000..2fe719c5 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/chunk.ts @@ -0,0 +1,243 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import type { ChunkResult, DiffSet, UnresolvedCardInfo, WorkUnit } from '../../types.js'; +import { + STAGED_FILES, + type StagedCardFile, + type StagedSyncConfig, + stagedCardFileSchema, + stagedSyncConfigSchema, +} from './types.js'; + +interface LoadedProject { + /** Parsed sync config. `null` means the file is malformed — chunker treats as no-match. */ + syncConfig: StagedSyncConfig | null; + /** Map raw_path (e.g. `cards/1.json`) → parsed card. Malformed files excluded. */ + cardsByPath: Map; + /** Every file under stagedDir, sorted. */ + allPaths: string[]; +} + +const CARDS_RE = /^cards\/\d+\.json$/; + +async function walkStagedDir(stagedDir: string): Promise { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + const paths: string[] = []; + for (const entry of entries) { + if (!entry.isFile()) { + continue; + } + const abs = join(entry.parentPath, entry.name); + paths.push(relative(stagedDir, abs).replace(/\\/g, '/')); + } + paths.sort(); + return paths; +} + +async function loadProject(stagedDir: string): Promise { + const allPaths = await walkStagedDir(stagedDir); + let syncConfig: StagedSyncConfig | null = null; + try { + const body = await readFile(join(stagedDir, STAGED_FILES.syncConfig), 'utf-8'); + syncConfig = stagedSyncConfigSchema.parse(JSON.parse(body)); + } catch { + syncConfig = null; + } + const cardsByPath = new Map(); + for (const path of allPaths) { + if (!CARDS_RE.test(path)) { + continue; + } + try { + const body = await readFile(join(stagedDir, path), 'utf-8'); + const parsed = stagedCardFileSchema.parse(JSON.parse(body)); + cardsByPath.set(path, parsed); + } catch { + // Malformed card — skip; it will still contribute to `skipped` provenance via the runner. + } + } + return { syncConfig, cardsByPath, allPaths }; +} + +function cardMatchesSyncConfig(card: StagedCardFile, config: StagedSyncConfig): boolean { + if (card.databaseId !== config.metabaseDatabaseId) { + return false; + } + if (card.archived) { + return false; + } + if (config.syncMode === 'ALL') { + return true; + } + const selectedCollections = new Set( + config.selections.filter((s) => s.selectionType === 'collection').map((s) => s.metabaseObjectId), + ); + const selectedItems = new Set( + config.selections.filter((s) => s.selectionType === 'item').map((s) => s.metabaseObjectId), + ); + const isInSelection = + selectedItems.has(card.metabaseId) || + (card.collectionId !== null && + card.collectionId !== 'root' && + selectedCollections.has(card.collectionId as number)); + if (config.syncMode === 'ONLY') { + return isInSelection; + } + if (config.syncMode === 'EXCEPT') { + return !isInSelection; + } + const _exhaustive: never = config.syncMode; + return _exhaustive; +} + +interface ChunkOptions { + diffSet?: DiffSet; +} + +/** + * Emit WorkUnits for a staged Metabase bundle. + * + * First run (no diffSet): one WU per collection of matching cards. Each WU's + * rawFiles include the card paths + the collection file + + * the database file + sync-config.json. Cards that fail + * the sync-config filter do NOT land in any WU — the + * runner will record them as `action_type='skipped'`. + * + * Re-sync (diffSet): keep only WUs with at least one changed card; move unchanged + * component members to `dependencyPaths`. Emit a single + * `EvictionUnit` for `diffSet.deleted`. + * + * Cross-card `{{#N}}` references widen `dependencyPaths` (the referenced card's JSON + * is read by the WU agent for context, even when it lives in a different collection). + */ +async function loadUnresolvedCards(stagedDir: string): Promise { + try { + const body = await readFile(join(stagedDir, STAGED_FILES.unresolvedCards), 'utf-8'); + const parsed = JSON.parse(body); + if (Array.isArray(parsed)) { + return parsed as UnresolvedCardInfo[]; + } + } catch { + // sidecar absent or malformed — treat as none + } + return undefined; +} + +export async function chunkMetabaseStagedDir(stagedDir: string, opts: ChunkOptions = {}): Promise { + const project = await loadProject(stagedDir); + const unresolvedCards = await loadUnresolvedCards(stagedDir); + if (!project.syncConfig) { + return { workUnits: [], unresolvedCards }; + } + const firstRunUnits = emitFirstRunWorkUnits(project); + if (!opts.diffSet) { + return { workUnits: firstRunUnits, unresolvedCards }; + } + const diffResult = applyDiffSet(firstRunUnits, project, opts.diffSet); + return { ...diffResult, unresolvedCards }; +} + +function emitFirstRunWorkUnits(project: LoadedProject): WorkUnit[] { + const { syncConfig, cardsByPath, allPaths } = project; + if (!syncConfig) { + return []; + } + + const matchingCardsByCollection = new Map(); + const matchingCards = new Map(); + const pathByCardId = new Map(); + for (const [path, card] of cardsByPath) { + pathByCardId.set(card.metabaseId, path); + if (!cardMatchesSyncConfig(card, syncConfig)) { + continue; + } + matchingCards.set(path, card); + const bucket = card.collectionId ?? 'root'; + const list = matchingCardsByCollection.get(bucket) ?? []; + list.push(path); + matchingCardsByCollection.set(bucket, list); + } + + const collectionIds = [...matchingCardsByCollection.keys()].sort((a, b) => { + if (a === 'root') { + return -1; + } + if (b === 'root') { + return 1; + } + return (a as number) - (b as number); + }); + + const units: WorkUnit[] = []; + for (const colId of collectionIds) { + const cardPaths = (matchingCardsByCollection.get(colId) ?? []).sort(); + const collectionFile = colId === 'root' ? null : `collections/${colId}.json`; + const databaseFile = `databases/${syncConfig.metabaseDatabaseId}.json`; + // Per-collection files: included in rawFiles so they participate in touched-check. + const rawFiles = [...cardPaths, ...(collectionFile ? [collectionFile] : [])].sort(); + // Shared control files: readable by the agent for context, but mutations to them + // must NOT fan out work across every collection (see applyDiffSet below). + const sharedControlDeps = [databaseFile, STAGED_FILES.syncConfig]; + + // Dependency widening — cards that reference other cards via `{{#N}}`. + const depPaths = new Set(sharedControlDeps); + for (const cardPath of cardPaths) { + const card = matchingCards.get(cardPath); + if (!card) { + continue; + } + for (const refId of card.referencedCardIds) { + const refPath = pathByCardId.get(refId); + if (!refPath) { + continue; + } + if (rawFiles.includes(refPath)) { + continue; + } + depPaths.add(refPath); + } + } + + const rawFilesSet = new Set(rawFiles); + const peerFileIndex = allPaths.filter((p) => !rawFilesSet.has(p) && !depPaths.has(p)).sort(); + + const unitKey = `metabase-col-${colId}`; + const displayLabel = `Metabase collection ${colId}`; + const notes = `${displayLabel} — ${cardPaths.length} card${cardPaths.length === 1 ? '' : 's'}`; + units.push({ + unitKey, + displayLabel, + rawFiles, + peerFileIndex, + dependencyPaths: [...depPaths].sort(), + notes, + }); + } + return units; +} + +function applyDiffSet(firstRunUnits: WorkUnit[], project: LoadedProject, diffSet: DiffSet): ChunkResult { + const touched = new Set([...diffSet.added, ...diffSet.modified]); + const kept: WorkUnit[] = []; + for (const wu of firstRunUnits) { + const anyTouched = wu.rawFiles.some((p) => touched.has(p)); + if (!anyTouched) { + continue; + } + const changedFiles: string[] = []; + const unchangedComponentFiles: string[] = []; + for (const p of wu.rawFiles) { + if (touched.has(p)) { + changedFiles.push(p); + } else { + unchangedComponentFiles.push(p); + } + } + const combinedDeps = new Set([...wu.dependencyPaths, ...unchangedComponentFiles]); + kept.push({ ...wu, rawFiles: changedFiles.sort(), dependencyPaths: [...combinedDeps].sort() }); + } + // `project` is reserved — future strategies may widen across collections. + void project; + const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined; + return { workUnits: kept, eviction }; +} diff --git a/packages/context/src/ingest/adapters/metabase/client-boundary.test.ts b/packages/context/src/ingest/adapters/metabase/client-boundary.test.ts new file mode 100644 index 00000000..5c69db8c --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/client-boundary.test.ts @@ -0,0 +1,43 @@ +import { readFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { describe, expect, it } from 'vitest'; + +const metabaseDir = dirname(fileURLToPath(import.meta.url)); + +async function readMetabaseFile(name: string): Promise { + return readFile(join(metabaseDir, name), 'utf-8'); +} + +describe('KLO Metabase client boundary', () => { + it('keeps NestJS, server data-source base classes, and server-relative imports out of the KLO client', async () => { + const client = await readMetabaseFile('client.ts'); + expect(client).not.toContain(`@${'nestjs'}`); + expect(client).not.toContain(`DataSource${'Client'}`); + expect(client).not.toContain(`../base/data-source-${'client'}`); + expect(client).not.toContain('../types'); + expect(client).not.toContain('../../types/brand'); + }); + + it('keeps proxy implementation code out of the KLO v1 client', async () => { + const client = await readMetabaseFile('client.ts'); + expect(client).not.toContain(`network-${'proxy'}`); + expect(client).not.toContain(`ssh${'2'}`); + expect(client).not.toContain(`tail${'scale'}`); + expect(client).not.toContain('resolveNetworkProxy'); + expect(client).not.toContain('establishProxy'); + expect(client).not.toContain('executeProxiedRequest'); + expect(client).not.toContain('originalHost'); + expect(client).not.toContain('originalHostname'); + expect(client).not.toContain('servername'); + }); + + it('keeps the runtime config proxy-free in v1', async () => { + const port = await readMetabaseFile('client-port.ts'); + const runtimeConfigBlock = port.match(/export interface MetabaseClientRuntimeConfig \{[\s\S]*?\n\}/)?.[0] ?? ''; + expect(runtimeConfigBlock).toContain('apiUrl: string'); + expect(runtimeConfigBlock).toContain('apiKey: string'); + expect(runtimeConfigBlock).not.toContain('proxy'); + expect(runtimeConfigBlock).not.toContain('networkProxy'); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/client-port.test.ts b/packages/context/src/ingest/adapters/metabase/client-port.test.ts new file mode 100644 index 00000000..9686e552 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/client-port.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { FetchContext } from '../../types.js'; +import { + IngestMetabaseClientFactory, + type MetabaseCard, + type MetabaseConnectionClientFactory, + type MetabaseDatasetQuery, + type MetabaseRuntimeClient, + type MetabaseTemplateTag, + type TestConnectionResult, +} from './client-port.js'; +import type { MetabasePullConfig } from './types.js'; + +function makeRuntimeClient(): MetabaseRuntimeClient { + return { + testConnection: vi.fn(), + getCurrentUser: vi.fn(), + getDatabases: vi.fn(), + getDatabase: vi.fn(), + getCollectionTree: vi.fn(), + getCollection: vi.fn(), + getCollectionItems: vi.fn(), + getCard: vi.fn(), + getAllCards: vi.fn(), + convertMbqlToNative: vi.fn(), + getNativeSql: vi.fn(), + getTemplateTags: vi.fn(), + getCardSql: vi.fn(), + getResolvedSql: vi.fn(), + cleanup: vi.fn(), + }; +} + +describe('IngestMetabaseClientFactory', () => { + const config: MetabasePullConfig = { + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + }; + + const ctx: FetchContext = { + connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + sourceKey: 'metabase', + }; + + it('delegates to the connection-level factory with the Metabase source connection id, not ctx.connectionId', async () => { + const runtimeClient = makeRuntimeClient(); + const connectionFactory: MetabaseConnectionClientFactory = { + createClient: vi.fn().mockResolvedValue(runtimeClient), + }; + const factory = new IngestMetabaseClientFactory(connectionFactory); + + await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient); + + expect(connectionFactory.createClient).toHaveBeenCalledTimes(1); + expect(connectionFactory.createClient).toHaveBeenCalledWith(config.metabaseConnectionId); + expect(connectionFactory.createClient).not.toHaveBeenCalledWith(ctx.connectionId); + }); + + it('supports synchronous connection-level factories', async () => { + const runtimeClient = makeRuntimeClient(); + const connectionFactory: MetabaseConnectionClientFactory = { + createClient: vi.fn().mockReturnValue(runtimeClient), + }; + const factory = new IngestMetabaseClientFactory(connectionFactory); + + await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient); + }); +}); + +it('allows the concrete client result shapes used by the relocated Metabase client', () => { + const connectionResult: TestConnectionResult = { + success: false, + error: 'API key is invalid', + metadata: { databases: [] }, + }; + expect(connectionResult.success).toBe(false); + + const templateTag: MetabaseTemplateTag = { + id: 'tag-1', + name: 'created_at', + type: 'dimension', + 'display-name': 'Created At', + 'widget-type': 'date/range', + }; + expect(templateTag['widget-type']).toBe('date/range'); + + const datasetQuery: MetabaseDatasetQuery = { + type: 'native', + database: 42, + native: { + query: 'SELECT * FROM orders WHERE created_at > {{ created_at }}', + 'template-tags': { created_at: templateTag }, + }, + }; + const card: MetabaseCard = { + id: 1, + name: 'Orders', + type: 'model', + query_type: 'native', + database_id: 42, + dataset_query: datasetQuery, + }; + expect(card.dataset_query).toBe(datasetQuery); +}); diff --git a/packages/context/src/ingest/adapters/metabase/client-port.ts b/packages/context/src/ingest/adapters/metabase/client-port.ts new file mode 100644 index 00000000..7aa1f3ed --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/client-port.ts @@ -0,0 +1,196 @@ +import type { FetchContext } from '../../types.js'; +import type { MetabasePullConfig } from './types.js'; + +export interface TestConnectionResult { + success: boolean; + message?: string; + details?: unknown; + error?: string; + metadata?: unknown; +} + +export interface MetabaseClientConfig { + maxRetries: number; + baseDelayMs: number; + maxDelayMs: number; + timeoutMs: number; + jitter: boolean; + retryableStatuses: number[]; +} + +export interface MetabaseClientRuntimeConfig { + apiUrl: string; + apiKey: string; + /** + * Override the default authentication header name. + * - API keys: `x-api-key` (default) + * - Session tokens: `X-Metabase-Session` + */ + authHeaderName?: string; +} + +export interface MetabaseUser { + id: number; + email?: string | null; + first_name?: string | null; + last_name?: string | null; + common_name?: string | null; + is_superuser?: boolean | null; +} + +export interface MetabaseDatabase { + id: number; + name: string; + engine?: string | null; + details?: Record | null; + is_sample?: boolean | null; +} + +export interface MetabaseCollection { + id: number | 'root'; + name: string; + parent_id?: number | 'root' | null; + children?: MetabaseCollection[]; +} + +export interface MetabaseCollectionItem { + id: number; + model: 'card' | 'dataset' | 'metric' | string; + name?: string | null; + collection_id?: number | 'root' | null; + database_id?: number | null; +} + +export interface MetabaseCardSummary { + id: number; + name?: string | null; + archived?: boolean; + database_id?: number | null; + collection_id?: number | 'root' | null; +} + +export interface MetabaseResultMetadataColumn { + name: string; + base_type: string; + semantic_type?: string | null; + display_name?: string | null; + description?: string | null; + fk_target_field_id?: number | null; + field_ref?: unknown[] | null; +} + +export interface MetabaseParameter { + id: string; + name: string; + type: string; + slug?: string | null; + default?: unknown; + sectionId?: string | null; +} + +export interface MetabaseTemplateTag { + id?: string; + name: string; + type: string; + display_name?: string | null; + 'display-name'?: string; + default?: unknown; + card_id?: number | null; + 'card-id'?: number; + 'snippet-name'?: string; + 'snippet-id'?: number; + dimension?: unknown[]; + 'widget-type'?: string; +} + +export interface MetabaseResolvedTemplateTag { + name: string; + type: string; + cardReference?: number | null; + defaultValue?: string | null; +} + +interface MetabaseNativeStage { + 'lib/type': 'mbql.stage/native'; + native: string; + 'template-tags'?: Record; +} + +interface MetabaseLegacyNativeQuery { + query: string; + 'template-tags'?: Record; +} + +export interface MetabaseDatasetQuery { + 'lib/type'?: 'mbql/query'; + database?: number; + type?: 'native' | 'query'; + stages?: MetabaseNativeStage[]; + native?: MetabaseLegacyNativeQuery; +} + +export interface MetabaseNativeQueryResult { + query: string; +} + +export interface ResolvedSqlResult { + resolvedSql: string; + templateTags: MetabaseResolvedTemplateTag[]; + resolutionStatus: 'resolved' | 'fallback'; +} + +export interface MetabaseCard { + id: number; + name: string; + description?: string | null; + type: string; + query_type?: 'native' | 'query'; + database_id: number; + collection_id?: number | 'root' | null; + archived?: boolean; + result_metadata?: MetabaseResultMetadataColumn[] | null; + dataset_query?: MetabaseDatasetQuery | null; + parameters?: MetabaseParameter[] | null; + last_run_at?: string | null; + dashboard_count?: number | null; +} + +export interface MetabaseRuntimeClient { + testConnection(): Promise; + getCurrentUser(): Promise; + getDatabases(): Promise; + getDatabase(id: number): Promise; + getCollectionTree(): Promise; + getCollection(id: number | 'root'): Promise; + getCollectionItems( + collectionId: number | 'root', + models?: ('card' | 'dataset' | 'metric')[], + ): Promise; + getCard(id: number): Promise; + getAllCards(): Promise; + convertMbqlToNative(datasetQuery: MetabaseDatasetQuery): Promise; + getNativeSql(card: MetabaseCard): string | null; + getTemplateTags(card: MetabaseCard): Record; + getCardSql(card: MetabaseCard): Promise; + getResolvedSql(card: MetabaseCard): Promise; + cleanup(): Promise; +} + +export interface MetabaseConnectionClientFactory { + createClient( + metabaseConnectionId: string, + overrides?: Partial, + ): Promise | MetabaseRuntimeClient; +} + +export interface MetabaseClientFactory { + createClient(config: MetabasePullConfig, ctx: FetchContext): Promise | MetabaseRuntimeClient; +} + +export class IngestMetabaseClientFactory implements MetabaseClientFactory { + constructor(private readonly connectionFactory: MetabaseConnectionClientFactory) {} + + async createClient(config: MetabasePullConfig, _ctx: FetchContext): Promise { + return this.connectionFactory.createClient(config.metabaseConnectionId); + } +} diff --git a/packages/context/src/ingest/adapters/metabase/client.test.ts b/packages/context/src/ingest/adapters/metabase/client.test.ts new file mode 100644 index 00000000..21973f7c --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/client.test.ts @@ -0,0 +1,377 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultMetabaseConnectionClientFactory, + getDummyValueForWidgetType, + MetabaseClient, + stripOptionalClauses, +} from './client.js'; +import type { MetabaseCard, MetabaseTemplateTag } from './client-port.js'; + +const runtime = { + apiUrl: 'https://metabase.example.test/api', + apiKey: 'test-key-1234', // pragma: allowlist secret +}; + +const fastRetryConfig = { + maxRetries: 2, + baseDelayMs: 1, + maxDelayMs: 1, + timeoutMs: 5000, + jitter: false, + retryableStatuses: [429, 500, 502, 503, 504], +}; + +function nativeCard(query: string, templateTags: Record = {}): MetabaseCard { + return { + id: 1, + name: 'Native card', + type: 'model', + query_type: 'native', + database_id: 6, + dataset_query: { + type: 'native', + database: 6, + native: { + query, + 'template-tags': templateTags, + }, + }, + }; +} + +describe('DefaultMetabaseConnectionClientFactory', () => { + it('resolves runtime credentials by the explicit Metabase source connection id and merges overrides', async () => { + const resolveCredentials = vi.fn().mockResolvedValue(runtime); + const factory = new DefaultMetabaseConnectionClientFactory(resolveCredentials, { + ...DEFAULT_METABASE_CLIENT_CONFIG, + timeoutMs: 60000, + maxRetries: 4, + }); + + const client = await factory.createClient('metabase-source-1', { timeoutMs: 1000 }); + + expect(resolveCredentials).toHaveBeenCalledWith('metabase-source-1'); + expect(client).toBeInstanceOf(MetabaseClient); + expect(Reflect.get(client, 'baseUrl')).toBe('https://metabase.example.test/api'); + expect(Reflect.get(client, 'runtime').apiKey).toBe('test-key-1234'); + expect(Reflect.get(client, 'config').timeoutMs).toBe(1000); + expect(Reflect.get(client, 'config').maxRetries).toBe(4); + }); +}); + +describe('MetabaseClient retry exhaustion', () => { + let originalFetch: typeof fetch; + + beforeEach(() => { + originalFetch = globalThis.fetch; + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it('wraps an exhausted ECONNRESET retry chain with method, path, attempt count, and original cause', async () => { + const sysErr = Object.assign(new Error('read ECONNRESET'), { + code: 'ECONNRESET', + errno: -104, + syscall: 'read', + }); + const fetchMock = vi.fn().mockRejectedValue(sysErr); + globalThis.fetch = fetchMock; + + const client = new MetabaseClient(runtime, fastRetryConfig); + + let caught: unknown; + try { + await client.getDatabases(); + } catch (err) { + caught = err; + } + + expect(caught).toBeInstanceOf(Error); + const e = caught as Error & { cause?: unknown; code?: string }; + expect(e.message).toContain('Metabase request failed (3 attempts)'); + expect(e.message).toContain('GET /api/database/'); + expect(e.message).toContain('ECONNRESET'); + expect(e.cause).toBe(sysErr); + expect(e.code).toBe('ECONNRESET'); + expect(fetchMock).toHaveBeenCalledTimes(3); + }); + + it('classifies undici mid-TLS-handshake error as TLS-handshake failure', async () => { + const undiciTlsErr = new Error('Client network socket disconnected before secure TLS connection was established'); + const fetchMock = vi.fn().mockRejectedValue(undiciTlsErr); + globalThis.fetch = fetchMock; + + const client = new MetabaseClient(runtime, { ...fastRetryConfig, maxRetries: 0 }); + + let caught: unknown; + try { + await client.getDatabases(); + } catch (err) { + caught = err; + } + + expect(caught).toBeInstanceOf(Error); + const e = caught as Error & { cause?: unknown }; + expect(e.message).toMatch(/^Metabase request failed:/); + expect(e.message).not.toContain('attempts'); + expect(e.message).toContain('TLS handshake to metabase.example.test did not complete'); + expect(e.message).toContain('before secure TLS connection was established'); + expect(e.cause).toBeInstanceOf(Error); + expect(((e.cause as Error & { cause?: unknown }).cause as Error)?.message).toContain( + 'before secure TLS connection was established', + ); + }); + + it('does not wrap when a non-retryable error short-circuits the loop', async () => { + const fetchMock = vi + .fn() + .mockResolvedValue( + new Response('{"message":"unauthorized"}', { status: 401, headers: { 'content-type': 'application/json' } }), + ); + globalThis.fetch = fetchMock; + + const client = new MetabaseClient(runtime, fastRetryConfig); + + let caught: unknown; + try { + await client.getDatabases(); + } catch (err) { + caught = err; + } + + expect(caught).toBeInstanceOf(Error); + const e = caught as Error; + expect(e.message).not.toContain('after 3 attempts'); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); +}); + +describe('MetabaseClient admin auth helpers', () => { + let originalFetch: typeof fetch; + + beforeEach(() => { + originalFetch = globalThis.fetch; + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it('creates a session without sending an auth header', async () => { + const sessionFixture = 'session-fixture'; + const adminCredentialFixture = 'admin-fixture'; + const fetchMock = vi + .fn() + .mockResolvedValue(new Response(JSON.stringify({ id: sessionFixture }), { status: 200 })); + globalThis.fetch = fetchMock; + + const client = new MetabaseClient({ apiUrl: 'https://metabase.example.test', apiKey: '' }, fastRetryConfig); + + await expect(client.createSession('admin@example.test', adminCredentialFixture)).resolves.toBe(sessionFixture); + + expect(fetchMock).toHaveBeenCalledWith( + 'https://metabase.example.test/api/session', + expect.objectContaining({ + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ username: 'admin@example.test', password: adminCredentialFixture }), + }), + ); + }); + + it('uses the configured auth header for permission groups and API-key creation', async () => { + const mintedMetabaseCredential = 'mb_generated'; + const sessionFixture = 'session-fixture'; + const fetchMock = vi + .fn() + .mockResolvedValueOnce(new Response(JSON.stringify([{ id: 2, name: 'Administrators' }]), { status: 200 })) + .mockResolvedValueOnce(new Response(JSON.stringify({ unmasked_key: mintedMetabaseCredential }), { status: 200 })); + globalThis.fetch = fetchMock; + + const client = new MetabaseClient( + { apiUrl: 'https://metabase.example.test', apiKey: sessionFixture, authHeaderName: 'X-Metabase-Session' }, + fastRetryConfig, + ); + + await expect(client.getPermissionGroups()).resolves.toEqual([{ id: 2, name: 'Administrators' }]); + await expect(client.createApiKey({ name: 'KLO CLI test', groupId: 2 })).resolves.toBe(mintedMetabaseCredential); + + expect(fetchMock).toHaveBeenNthCalledWith( + 1, + 'https://metabase.example.test/api/permissions/group', + expect.objectContaining({ + method: 'GET', + headers: { 'Content-Type': 'application/json', 'X-Metabase-Session': sessionFixture }, + }), + ); + expect(fetchMock).toHaveBeenNthCalledWith( + 2, + 'https://metabase.example.test/api/api-key', + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ name: 'KLO CLI test', group_id: 2 }), + }), + ); + }); +}); + +describe('stripOptionalClauses', () => { + it('drops optional blocks that contain Metabase template variables', () => { + const input = 'SELECT * FROM x WHERE 1=1 [[AND a = {{ a }} ]] [[AND b = {{ b }} ]]'; + expect(stripOptionalClauses(input)).toBe('SELECT * FROM x WHERE 1=1 '); + }); + + it('preserves bracket sequences that contain no template variables', () => { + const input = "SELECT * FROM x WHERE col LIKE '[[abc]]'"; + expect(stripOptionalClauses(input)).toBe(input); + }); + + it('leaves naked template variables intact', () => { + const input = 'SELECT * FROM x WHERE id = {{ id }}'; + expect(stripOptionalClauses(input)).toBe(input); + }); +}); + +describe('getDummyValueForWidgetType', () => { + it('returns widget-specific date and number values', () => { + expect(getDummyValueForWidgetType('date/range')).toBe('2020-01-01~2020-12-31'); + expect(getDummyValueForWidgetType('date/all-options')).toBe('2020-01-01~2020-12-31'); + expect(getDummyValueForWidgetType('date/single')).toBe('2020-01-01'); + expect(getDummyValueForWidgetType('date/relative')).toBe('past30days'); + expect(getDummyValueForWidgetType('date/month-year')).toBe('2020-01'); + expect(getDummyValueForWidgetType('date/quarter-year')).toBe('Q1-2020'); + expect(getDummyValueForWidgetType('number/=')).toBe('1'); + expect(getDummyValueForWidgetType('number/between')).toBe('1'); + }); + + it('falls back to an array placeholder for string, identifier, and unknown widgets', () => { + expect(getDummyValueForWidgetType('string/=')).toEqual(['placeholder']); + expect(getDummyValueForWidgetType('category')).toEqual(['placeholder']); + expect(getDummyValueForWidgetType(undefined)).toEqual(['placeholder']); + }); +}); + +describe('MetabaseClient.getResolvedSql', () => { + function makeClient(setup?: (client: MetabaseClient) => void): MetabaseClient { + const client = new MetabaseClient({ apiUrl: 'http://test', apiKey: 'k' }); + setup?.(client); + return client; + } + + it('strips optional clauses locally and skips /api/dataset/native when no naked variables remain', async () => { + const requestSpy = vi.fn(); + const client = makeClient((client) => { + Reflect.set(client, 'requestWithCustomRetry', requestSpy); + }); + const card = nativeCard('SELECT * FROM x WHERE 1=1 [[AND end > {{ auction_end }} ]]', { + auction_end: { + id: 'tag-1', + name: 'auction_end', + type: 'dimension', + 'widget-type': 'date/all-options', + 'display-name': 'Auction End', + }, + }); + + const result = await client.getResolvedSql(card); + + expect(requestSpy).not.toHaveBeenCalled(); + expect(result?.resolutionStatus).toBe('resolved'); + expect(result?.resolvedSql).toBe('SELECT * FROM x WHERE 1=1 '); + expect(result?.templateTags[0]).toMatchObject({ name: 'auction_end', type: 'dimension' }); + }); + + it('inlines saved-question references locally and skips /api/dataset/native when no other variables remain', async () => { + const requestSpy = vi.fn(); + const getCardSpy = vi.fn().mockResolvedValue({ + id: 5996, + name: 'Base card', + type: 'model', + query_type: 'native', + database_id: 6, + dataset_query: { + type: 'native', + database: 6, + native: { query: 'SELECT a, b FROM base' }, + }, + }); + const client = makeClient((client) => { + Reflect.set(client, 'requestWithCustomRetry', requestSpy); + Reflect.set(client, 'getCard', getCardSpy); + }); + const card = nativeCard('SELECT * FROM {{#5996-base}} t [[WHERE end > {{ end }}]]', { + '#5996-base': { + id: 't1', + name: '#5996-base', + type: 'card', + 'card-id': 5996, + }, + end: { + id: 't2', + name: 'end', + type: 'dimension', + 'widget-type': 'date/range', + }, + }); + + const result = await client.getResolvedSql(card); + + expect(requestSpy).not.toHaveBeenCalled(); + expect(getCardSpy).toHaveBeenCalledWith(5996); + expect(result?.resolutionStatus).toBe('resolved'); + expect(result?.resolvedSql).toBe('SELECT * FROM (SELECT a, b FROM base) t '); + }); + + it('uses /api/dataset/native for naked variables and prepends a warning comment', async () => { + const requestSpy = vi.fn().mockResolvedValue({ query: "SELECT * WHERE id = 'placeholder' AND n = 1" }); + const client = makeClient((client) => { + Reflect.set(client, 'requestWithCustomRetry', requestSpy); + }); + const card = nativeCard('SELECT * WHERE id = {{ id }} AND n = {{ n }}', { + id: { id: 't1', name: 'id', type: 'text' }, + n: { id: 't2', name: 'n', type: 'number' }, + }); + + const result = await client.getResolvedSql(card); + + expect(requestSpy).toHaveBeenCalledTimes(1); + expect(result?.resolutionStatus).toBe('resolved'); + const sql = result?.resolvedSql ?? ''; + expect(sql.startsWith('--')).toBe(true); + expect(sql).toMatch(/KLO_PLACEHOLDER_WARNING/); + expect(sql).toMatch(/\bid\b/); + expect(sql).toMatch(/\bn\b/); + }); + + it('falls back to raw native SQL with truthful template tags when /api/dataset/native errors', async () => { + const requestSpy = vi.fn().mockRejectedValue(new Error('Metabase 500')); + const client = makeClient((client) => { + Reflect.set(client, 'requestWithCustomRetry', requestSpy); + }); + const card = nativeCard('SELECT * FROM x WHERE end > {{ auction_end }}', { + auction_end: { + id: 'tag-id', + name: 'auction_end', + type: 'dimension', + 'widget-type': 'date/range', + 'display-name': 'Auction End', + }, + }); + + const result = await client.getResolvedSql(card); + + expect(result?.resolutionStatus).toBe('fallback'); + expect(result?.resolvedSql).toContain('{{ auction_end }}'); + expect(result?.templateTags).toHaveLength(1); + expect(result?.templateTags[0]).toMatchObject({ + name: 'auction_end', + type: 'dimension', + displayName: 'Auction End', + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/client.ts b/packages/context/src/ingest/adapters/metabase/client.ts new file mode 100644 index 00000000..3e2a1a66 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/client.ts @@ -0,0 +1,783 @@ +import { CardReferenceCycleError, expandCardReferences } from './card-references.js'; +import type { + MetabaseCard, + MetabaseCardSummary, + MetabaseClientConfig, + MetabaseClientRuntimeConfig, + MetabaseCollection, + MetabaseCollectionItem, + MetabaseConnectionClientFactory, + MetabaseDatabase, + MetabaseDatasetQuery, + MetabaseNativeQueryResult, + MetabaseRuntimeClient, + MetabaseTemplateTag, + MetabaseUser, + ResolvedSqlResult, + TestConnectionResult, +} from './client-port.js'; + +export interface MetabaseClientLogger { + log(message: string): void; + warn(message: string): void; + error(message: string): void; + debug?(message: string): void; +} + +const defaultLogger: MetabaseClientLogger = { + log: (message) => console.log(message), + warn: (message) => console.warn(message), + error: (message) => console.error(message), + debug: (message) => console.debug(message), +}; + +interface TemplateTagInfo { + [key: string]: string | null; + name: string; + type: string; + displayName: string; + dummyValue: string | null; +} + +interface CreateCardParams { + name: string; + databaseId: number; + sql: string; + collectionId?: number | null; + display?: string; + description?: string; +} + +export const DEFAULT_METABASE_CLIENT_CONFIG: MetabaseClientConfig = { + maxRetries: 2, + baseDelayMs: 1000, + maxDelayMs: 30000, + timeoutMs: 60000, + jitter: true, + retryableStatuses: [429, 500, 502, 503, 504], +}; + +/** Custom error class to preserve Metabase API error details */ +class MetabaseApiError extends Error { + constructor( + message: string, + public readonly status: number, + public readonly responseBody: string, + public readonly isRetryable: boolean, + ) { + super(message); + this.name = 'MetabaseApiError'; + } +} + +/** + * Strip Metabase `[[ ... {{ var }} ... ]]` optional-clause blocks from native SQL. + * + * The bracketed blocks are emitted only when the embedded `{{ var }}` is supplied at + * Metabase query time. For KLO semantic-layer ingest there's no such runtime + * parameter — chat-time filters are composed by the SL query planner — so the optional + * block must be removed before the SQL becomes a permanent SL source. Substituting a + * dummy value (the alternative) bakes a placeholder filter into the source and silently + * excludes rows. + * + * Only strips brackets that contain at least one `{{ }}` placeholder, so unrelated + * `[[`/`]]` literals in string values or regex predicates are preserved. Metabase's + * grammar disallows nested optional blocks (per docs), so non-greedy matching is safe. + */ +export function stripOptionalClauses(sql: string): string { + return sql.replace(/\[\[[\s\S]*?\]\]/g, (match) => (match.includes('{{') ? '' : match)); +} + +/** + * Find every `{{ var }}` placeholder name still present in the SQL. Excludes `{{#N}}` + * card references (those are handled separately by `expandCardReferences`). + */ +function collectRemainingPlaceholderNames(sql: string): Set { + const names = new Set(); + for (const match of sql.matchAll(/\{\{\s*([^#}\s][^}]*?)\s*\}\}/g)) { + names.add(match[1].trim()); + } + return names; +} + +function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): MetabaseDatasetQuery { + if (datasetQuery?.stages?.[0]?.native !== undefined) { + const stages = [...(datasetQuery.stages ?? [])]; + stages[0] = { ...stages[0], native: sql }; + return { ...datasetQuery, stages }; + } + if (datasetQuery?.native) { + return { ...datasetQuery, native: { ...datasetQuery.native, query: sql } }; + } + return datasetQuery; +} + +/** + * Picks a dummy `parameters[].value` for a `dimension`-type template tag based on its + * `widget-type`. Metabase's `/api/dataset/native` dispatches widget-types to substitution + * functions whose value-shape contracts differ — date widgets need a string in the widget's + * format, number widgets need a string scalar, identifier/enum widgets accept `[string]`. + * Sending `['placeholder']` for a date widget triggers a ClassCastException → HTTP 500. + */ +export function getDummyValueForWidgetType(widgetType: string | undefined): string | string[] { + switch (widgetType) { + case 'date/range': + case 'date/all-options': + return '2020-01-01~2020-12-31'; + case 'date/single': + return '2020-01-01'; + case 'date/relative': + return 'past30days'; + case 'date/month-year': + return '2020-01'; + case 'date/quarter-year': + return 'Q1-2020'; + case 'number/=': + case 'number/!=': + case 'number/>=': + case 'number/<=': + case 'number/between': + return '1'; + default: + return ['placeholder']; + } +} + +export class MetabaseClient implements MetabaseRuntimeClient { + private readonly runtime: MetabaseClientRuntimeConfig; + private readonly logger: MetabaseClientLogger; + private readonly baseUrl: string; + private readonly config: MetabaseClientConfig; + + constructor( + runtime: MetabaseClientRuntimeConfig, + config?: Partial, + logger: MetabaseClientLogger = defaultLogger, + ) { + this.runtime = runtime; + this.baseUrl = runtime.apiUrl.replace(/\/+$/, ''); + this.config = { ...DEFAULT_METABASE_CLIENT_CONFIG, ...config }; + this.logger = logger; + } + + async cleanup(): Promise { + // Proxy cleanup stays server-only in v1. The no-op keeps the runtime-client contract stable. + } + + get dataSourceType(): string { + return 'metabase'; + } + + async testConnection(): Promise { + try { + const [user, databases] = await Promise.all([this.getCurrentUser(), this.getDatabases()]); + + return { + success: true, + metadata: { + user: { + email: user.email, + name: user.common_name, + isSuperuser: user.is_superuser, + }, + databases: databases + .filter((db) => !db.is_sample) + .map((db) => ({ + id: db.id, + name: db.name, + engine: db.engine, + host: db.details?.host ?? null, + dbName: db.details?.dbname ?? db.details?.db ?? null, + })), + }, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + } + + async getCurrentUser(): Promise { + return this.request('GET', '/api/user/current'); + } + + async createSession(username: string, password: string): Promise { + const response = await this.request<{ id?: unknown }>('POST', '/api/session', { username, password }); + if (typeof response.id !== 'string' || response.id.trim().length === 0) { + throw new Error('Metabase login did not return a session id'); + } + return response.id; + } + + async getPermissionGroups(): Promise> { + return this.request>('GET', '/api/permissions/group'); + } + + async createApiKey(params: { name: string; groupId: number }): Promise { + const response = await this.request<{ unmasked_key?: unknown }>('POST', '/api/api-key', { + name: params.name, + group_id: params.groupId, + }); + if (typeof response.unmasked_key !== 'string' || response.unmasked_key.trim().length === 0) { + throw new Error('Metabase did not return the newly created API key'); + } + return response.unmasked_key; + } + + async getDatabases(): Promise { + const response = await this.request<{ data: MetabaseDatabase[] }>('GET', '/api/database/'); + return response.data; + } + + async getDatabase(id: number): Promise { + return this.request('GET', `/api/database/${id}`); + } + + async getCollectionTree(): Promise { + return this.request('GET', '/api/collection/tree'); + } + + async getCollection(id: number | 'root'): Promise { + return this.request('GET', `/api/collection/${id}`); + } + + async getCollectionItems( + collectionId: number | 'root', + models: ('card' | 'dataset' | 'metric')[] = ['card', 'dataset', 'metric'], + ): Promise { + const modelsParam = models.map((m) => `models=${m}`).join('&'); + const response = await this.request<{ data: MetabaseCollectionItem[] }>( + 'GET', + `/api/collection/${collectionId}/items?${modelsParam}`, + ); + return response.data; + } + + async getCard(id: number): Promise { + return this.request('GET', `/api/card/${id}`); + } + + async getAllCards(): Promise { + return this.request('GET', '/api/card/?f=all'); + } + + async convertMbqlToNative(datasetQuery: MetabaseDatasetQuery): Promise { + return this.request('POST', '/api/dataset/native', { + ...datasetQuery, + pretty: true, + }); + } + + /** + * Extract native SQL from card, handling both pMBQL (v57+) and legacy formats. + * - pMBQL format: dataset_query.stages[0].native + * - Legacy format: dataset_query.native.query + */ + getNativeSql(card: MetabaseCard): string | null { + // pMBQL format (v57+): stages[0].native + const pMbqlSql = card.dataset_query?.stages?.[0]?.native; + if (pMbqlSql) { + return pMbqlSql; + } + + // Legacy format: native.query + return card.dataset_query?.native?.query ?? null; + } + + /** + * Extract template tags from card, handling both pMBQL and legacy formats. + * - pMBQL format: dataset_query.stages[0]['template-tags'] + * - Legacy format: dataset_query.native['template-tags'] + */ + getTemplateTags(card: MetabaseCard): Record { + // pMBQL format: stages[0]['template-tags'] + const pMbqlTags = card.dataset_query?.stages?.[0]?.['template-tags']; + if (pMbqlTags) { + return pMbqlTags; + } + + // Legacy format: native['template-tags'] + return card.dataset_query?.native?.['template-tags'] ?? {}; + } + + async getCardSql(card: MetabaseCard): Promise { + if (card.query_type === 'native') { + const sql = this.getNativeSql(card); + if (!sql) { + this.logger.warn(`Card ${card.id}: no native SQL found in dataset_query`); + } + return sql; + } + + try { + if (!card.dataset_query) { + this.logger.warn(`Card ${card.id}: no dataset_query found for MBQL conversion`); + return null; + } + const result = await this.convertMbqlToNative(card.dataset_query); + return result.query; + } catch (error) { + this.logger.warn(`Failed to convert MBQL for card ${card.id}: ${error}`); + return null; + } + } + + async getResolvedSql(card: MetabaseCard): Promise { + const rawTemplateTags = this.getTemplateTags(card); + const templateTagEntries = Object.values(rawTemplateTags); + + // For MBQL queries or native queries without template tags, use simple conversion + if (card.query_type !== 'native' || templateTagEntries.length === 0) { + const sql = await this.getCardSql(card); + return sql ? { resolvedSql: sql, templateTags: [], resolutionStatus: 'resolved' } : null; + } + + const nativeQuery = this.getNativeSql(card); + if (!nativeQuery) { + return null; + } + + const templateTags: TemplateTagInfo[] = templateTagEntries.map((tag) => ({ + name: tag.name, + type: tag.type, + displayName: tag['display-name'] ?? tag.name, + dummyValue: tag.type === 'snippet' ? null : this.formatDummyValueForDisplay(tag), + })); + + // Step 1: drop optional [[ ... {{ var }} ... ]] blocks. Semantic-layer sources + // have no parameters; chat-time SL filters compose narrowing WHERE clauses + // dynamically, so any clause the original card author flagged as optional must + // not bake into the persistent SL source SQL (substituting a dummy value would + // silently filter rows out — see incident with auction_seller_bidder_pair_suspicion). + let processedSql = stripOptionalClauses(nativeQuery); + + // Step 2: inline {{#CARD_ID}} card references locally. Recursively strip optional + // clauses in referenced cards too — the same reasoning applies all the way down. + try { + processedSql = await expandCardReferences(processedSql, { + fetchCard: async (id) => { + const referenced = await this.getCard(id as number); + const referencedNative = this.getNativeSql(referenced); + if (!referencedNative) { + throw new Error(`referenced card ${id} has no native query`); + } + return { native_query: stripOptionalClauses(referencedNative) }; + }, + }); + } catch (err) { + if (err instanceof CardReferenceCycleError) { + this.logger.warn(`[metabase] card ${card.id} has a reference cycle; cannot resolve SQL: ${err.message}`); + return null; + } + throw err; + } + + // Step 3: collect template tags that still appear in the SQL after strip + inline. + // Anything bracketed-only is gone now; anything card-referenced is inlined. + const remainingNames = collectRemainingPlaceholderNames(processedSql); + const remainingTags = templateTagEntries.filter((tag) => tag.type !== 'snippet' && remainingNames.has(tag.name)); + + if (remainingTags.length === 0) { + return { resolvedSql: processedSql, templateTags, resolutionStatus: 'resolved' }; + } + + // Step 4: dummy-substitute the remaining naked {{ var }} placeholders via Metabase's + // substitution endpoint. Only required because we can't translate dimension-tag + // bindings to warehouse columns ourselves. Prepend a SQL comment listing every + // dummy substitution so downstream consumers (the metabase_ingest LLM) know which + // values are placeholders and not real filters. + if (!card.dataset_query) { + return null; + } + const datasetQuery = injectNativeSql(card.dataset_query, processedSql); + const parameters = remainingTags.map((tag) => ({ + id: tag.id, + type: this.getParamTypeForTag(tag), + value: this.getDummyValueForTag(tag), + target: + tag.type === 'dimension' ? ['dimension', ['template-tag', tag.name]] : ['variable', ['template-tag', tag.name]], + })); + + try { + // Don't retry 500 errors for SQL resolution - they're deterministic failures + // (invalid dimension filters, bad field references, etc.) + // Still retry 429 (rate limit) and 502/503/504 (gateway errors) + const response = await this.requestWithCustomRetry( + 'POST', + '/api/dataset/native', + { ...datasetQuery, parameters, pretty: true }, + [429, 502, 503, 504], + ); + + const warning = this.buildPlaceholderWarningComment(remainingTags); + return { + resolvedSql: warning + response.query, + templateTags, + resolutionStatus: 'resolved', + }; + } catch (error) { + this.logger.warn( + `[metabase] SQL resolution failed for card ${card.id} after expansion; falling back to unresolved native SQL. Downstream consumers will see resolutionStatus='fallback'. Error: ${error instanceof Error ? error.message : String(error)}`, + ); + return { resolvedSql: nativeQuery, templateTags, resolutionStatus: 'fallback' }; + } + } + + private buildPlaceholderWarningComment(tags: MetabaseTemplateTag[]): string { + const lines = [ + '-- KLO_PLACEHOLDER_WARNING: this SQL was extracted from a Metabase card with', + '-- unbound template parameters. The placeholders below were substituted with DUMMY', + "-- values to satisfy Metabase's parser — they DO NOT represent intended filters.", + '-- Drop the corresponding clauses (or expose them as runtime SL filters) before', + '-- persisting this SQL as a semantic-layer source.', + ]; + for (const tag of tags) { + const widget = tag.type === 'dimension' ? `, widget=${tag['widget-type'] ?? '?'}` : ''; + const dummy = this.formatDummyValueForDisplay(tag); + lines.push(`-- {{ ${tag.name} }} (type=${tag.type}${widget}) → ${dummy}`); + } + return `${lines.join('\n')}\n`; + } + + private getParamTypeForTag(tag: MetabaseTemplateTag): string { + if (tag.type === 'dimension') { + return tag['widget-type'] ?? 'string/='; + } + if (tag.type === 'number') { + return 'number/='; + } + if (tag.type === 'date') { + return 'date/single'; + } + return 'string/='; + } + + private getDummyValueForTag(tag: MetabaseTemplateTag): string | string[] { + if (tag.type === 'number') { + return '1'; + } + if (tag.type === 'date') { + return '2020-01-01'; + } + if (tag.type === 'dimension') { + return getDummyValueForWidgetType(tag['widget-type']); + } + return 'placeholder'; + } + + private formatDummyValueForDisplay(tag: MetabaseTemplateTag): string { + const value = this.getDummyValueForTag(tag); + if (Array.isArray(value)) { + return value.map((v) => `'${v}'`).join(', '); + } + if (tag.type === 'number') { + return value; + } + return `'${value}'`; + } + + async createCard(params: CreateCardParams): Promise { + const body = { + name: params.name, + display: params.display ?? 'table', + visualization_settings: {}, + dataset_query: { + type: 'native', + native: { + query: params.sql, + }, + database: params.databaseId, + }, + collection_id: params.collectionId ?? null, + description: params.description, + }; + + return this.request('POST', '/api/card', body); + } + + async deleteCard(id: number): Promise { + await this.request('DELETE', `/api/card/${id}`); + } + + private async request(method: 'GET' | 'POST' | 'PUT' | 'DELETE', path: string, body?: unknown): Promise { + return this.requestWithRetry(method, path, body); + } + + /** + * Make a request with custom retryable status codes. + * Useful for endpoints where certain errors are deterministic and shouldn't be retried. + */ + private async requestWithCustomRetry( + method: 'GET' | 'POST' | 'PUT' | 'DELETE', + path: string, + body: unknown, + retryableStatuses: number[], + ): Promise { + return this.requestWithRetry(method, path, body, retryableStatuses); + } + + private async requestWithRetry( + method: 'GET' | 'POST' | 'PUT' | 'DELETE', + path: string, + body?: unknown, + retryableStatusesOverride?: number[], + ): Promise { + const retryableStatuses = retryableStatusesOverride ?? this.config.retryableStatuses; + let lastError: Error | null = null; + let attempts = 0; + + for (let attempt = 0; attempt <= this.config.maxRetries; attempt++) { + attempts = attempt + 1; + try { + return await this.executeRequest(method, path, body); + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + + if (!this.isRetryableError(lastError, retryableStatuses)) { + throw lastError; + } + + if (attempt >= this.config.maxRetries) { + break; + } + + const delay = this.calculateDelay(attempt); + this.logger.warn( + `Metabase request failed (attempt ${attempt + 1}/${this.config.maxRetries + 1}), ` + + `retrying in ${delay}ms: ${method} ${path}`, + ); + await this.sleep(delay); + } + } + + throw this.wrapExhaustedError(lastError as Error, method, path, attempts); + } + + private wrapExhaustedError( + cause: Error, + method: 'GET' | 'POST' | 'PUT' | 'DELETE', + path: string, + attempts: number, + ): Error { + // Only mention the attempt count when retries actually happened — "1 attempt" adds no info. + const retryNote = attempts > 1 ? ` (${attempts} attempts)` : ''; + const wrapped = new Error(`Metabase request failed${retryNote}: ${method} ${path} — ${cause.message}`, { + cause, + }); + const causeCode = (cause as NodeJS.ErrnoException).code; + if (causeCode) { + (wrapped as NodeJS.ErrnoException).code = causeCode; + } + wrapped.name = + cause.name === 'Error' ? 'MetabaseRetryExhaustedError' : `MetabaseRetryExhaustedError(${cause.name})`; + return wrapped; + } + + /** + * Calculate delay with exponential backoff and jitter. + * Uses "full jitter" algorithm recommended by AWS: + * https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ + */ + private calculateDelay(attempt: number): number { + const exponentialDelay = this.config.baseDelayMs * 2 ** attempt; + const cappedDelay = Math.min(exponentialDelay, this.config.maxDelayMs); + + if (!this.config.jitter) { + return cappedDelay; + } + + // Full jitter: random between baseDelay and cappedDelay + const jitterRange = cappedDelay - this.config.baseDelayMs; + return Math.floor(Math.random() * jitterRange) + this.config.baseDelayMs; + } + + private async executeRequest(method: 'GET' | 'POST' | 'PUT' | 'DELETE', path: string, body?: unknown): Promise { + const headers: Record = { + 'Content-Type': 'application/json', + }; + if (this.runtime.apiKey) { + headers[this.runtime.authHeaderName ?? 'x-api-key'] = this.runtime.apiKey; + } + + const url = `${this.baseUrl}${path}`; + const parsedUrl = new URL(url); + const isHttps = parsedUrl.protocol === 'https:'; + const port = parsedUrl.port ? parseInt(parsedUrl.port, 10) : isHttps ? 443 : 80; + + // Create AbortController for timeout + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), this.config.timeoutMs); + + try { + const options: RequestInit = { + method, + headers, + signal: controller.signal, + }; + + if (body && (method === 'POST' || method === 'PUT')) { + options.body = JSON.stringify(body); + } + + const response = await fetch(url, options); + + if (!response.ok) { + const errorBody = await response.text().catch(() => ''); + const isRetryable = this.isRetryableStatus(response.status); + + // Log full error details at debug level for diagnostics + this.logger.debug?.( + `Metabase API error: ${method} ${path} -> ${response.status}, body: ${errorBody.slice(0, 500)}`, + ); + + throw new MetabaseApiError( + this.getErrorMessage(response.status, errorBody), + response.status, + errorBody, + isRetryable, + ); + } + + return response.json() as Promise; + } catch (error) { + // Handle abort/timeout + if (error instanceof Error && error.name === 'AbortError') { + const timeoutError = new Error(`Request timeout after ${this.config.timeoutMs}ms: ${method} ${path}`); + (timeoutError as NodeJS.ErrnoException).code = 'ETIMEDOUT'; + throw timeoutError; + } + // Undici (Node fetch) emits a stable message when the socket is closed mid-TLS handshake. + // Fetch hides socket events, so this narrow message check is the only signal we have. + if (isHttps && error instanceof Error && error.message.includes('before secure TLS connection was established')) { + throw this.classifyHttpError(error, { + tcpConnected: true, + tlsCompleted: false, + isHttps: true, + host: parsedUrl.hostname, + port, + }); + } + throw error; + } finally { + clearTimeout(timeoutId); + } + } + + /** + * Wrap a network error with a phase-aware message so users see "TLS handshake didn't complete" + * instead of "read ECONNRESET". Preserves the original error via cause + code so retry + * detection (isRetryableError) and other code-based branching keep working. + */ + private classifyHttpError( + cause: Error, + phase: { + tcpConnected: boolean; + tlsCompleted: boolean; + isHttps: boolean; + host: string; + port: number | string; + }, + ): Error { + if (!phase.tcpConnected) { + return this.wrapWithCause( + cause, + `Cannot reach ${phase.host}:${phase.port}: ${cause.message}`, + 'MetabaseTcpConnectError', + ); + } + if (phase.isHttps && !phase.tlsCompleted) { + return this.wrapWithCause( + cause, + `TLS handshake to ${phase.host} did not complete — the upstream server may be down or unresponsive: ${cause.message}`, + 'MetabaseTlsHandshakeError', + ); + } + return cause; + } + + private wrapWithCause(cause: Error, message: string, name: string): Error { + const wrapped = new Error(message, { cause }); + wrapped.name = name; + const causeCode = (cause as NodeJS.ErrnoException).code; + if (causeCode) { + (wrapped as NodeJS.ErrnoException).code = causeCode; + } + return wrapped; + } + + private isRetryableStatus(status: number): boolean { + return this.config.retryableStatuses.includes(status); + } + + private getErrorMessage(status: number, body: string): string { + switch (status) { + case 401: + return 'API key is invalid or expired. Please update your Metabase connection settings.'; + case 403: + return 'Access denied. The API key does not have permission to perform this action.'; + case 404: + return 'Resource not found. The requested item may have been deleted.'; + case 429: + return 'Rate limited by Metabase. Please try again later.'; + default: + if (status >= 500) { + return `Metabase server error (${status}). Please try again later.`; + } + return `Metabase API error (${status}): ${body || 'Unknown error'}`; + } + } + + private isRetryableError(error: Error, retryableStatuses: number[]): boolean { + // Custom MetabaseApiError - check status against provided list + if (error instanceof MetabaseApiError) { + return retryableStatuses.includes(error.status); + } + + const code = (error as NodeJS.ErrnoException).code; + + // Timeout errors are retryable + if (code === 'ETIMEDOUT' || code === 'TIMEOUT') { + return true; + } + + // Check HTTP status codes + if (code?.startsWith('HTTP_')) { + const status = parseInt(code.replace('HTTP_', ''), 10); + return retryableStatuses.includes(status); + } + + // Network errors are retryable + const message = error.message.toLowerCase(); + return ( + message.includes('econnreset') || + message.includes('etimedout') || + message.includes('econnrefused') || + message.includes('socket hang up') || + message.includes('network') || + message.includes('abort') + ); + } + + private sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } +} + +export class DefaultMetabaseConnectionClientFactory implements MetabaseConnectionClientFactory { + constructor( + private readonly resolveCredentials: ( + metabaseConnectionId: string, + ) => Promise | MetabaseClientRuntimeConfig, + private readonly defaultClientConfig: MetabaseClientConfig = DEFAULT_METABASE_CLIENT_CONFIG, + private readonly logger: MetabaseClientLogger = defaultLogger, + ) {} + + async createClient( + metabaseConnectionId: string, + overrides?: Partial, + ): Promise { + const runtime = await this.resolveCredentials(metabaseConnectionId); + const mergedConfig = { ...this.defaultClientConfig, ...(overrides ?? {}) }; + return new MetabaseClient(runtime, mergedConfig, this.logger); + } +} diff --git a/packages/context/src/ingest/adapters/metabase/detect.test.ts b/packages/context/src/ingest/adapters/metabase/detect.test.ts new file mode 100644 index 00000000..816bbef6 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/detect.test.ts @@ -0,0 +1,49 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { detectMetabaseStagedDir } from './detect.js'; + +async function touch(stagedDir: string, relPath: string, body: string): Promise { + const abs = join(stagedDir, relPath); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, body, 'utf-8'); +} + +describe('detectMetabaseStagedDir', () => { + let stagedDir: string; + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'mb-detect-')); + }); + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('returns true when sync-config.json + cards/*.json are present', async () => { + await touch(stagedDir, 'sync-config.json', '{}'); + await touch(stagedDir, 'cards/1.json', '{}'); + expect(await detectMetabaseStagedDir(stagedDir)).toBe(true); + }); + + it('returns false when sync-config.json is missing', async () => { + await touch(stagedDir, 'cards/1.json', '{}'); + expect(await detectMetabaseStagedDir(stagedDir)).toBe(false); + }); + + it('returns false when cards/ is empty', async () => { + await touch(stagedDir, 'sync-config.json', '{}'); + await mkdir(join(stagedDir, 'cards'), { recursive: true }); + expect(await detectMetabaseStagedDir(stagedDir)).toBe(false); + }); + + it('returns false for an empty staged dir', async () => { + expect(await detectMetabaseStagedDir(stagedDir)).toBe(false); + }); + + it('returns true even when the cards dir has one file and extra non-JSON siblings', async () => { + await touch(stagedDir, 'sync-config.json', '{}'); + await touch(stagedDir, 'cards/1.json', '{}'); + await touch(stagedDir, 'README.md', '# readme'); + expect(await detectMetabaseStagedDir(stagedDir)).toBe(true); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/detect.ts b/packages/context/src/ingest/adapters/metabase/detect.ts new file mode 100644 index 00000000..336872fd --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/detect.ts @@ -0,0 +1,19 @@ +import { readdir, stat } from 'node:fs/promises'; +import { join } from 'node:path'; +import { STAGED_FILES } from './types.js'; + +export async function detectMetabaseStagedDir(stagedDir: string): Promise { + try { + await stat(join(stagedDir, STAGED_FILES.syncConfig)); + } catch { + return false; + } + const cardsDir = join(stagedDir, STAGED_FILES.cardsDir); + let cardEntries: string[]; + try { + cardEntries = await readdir(cardsDir); + } catch { + return false; + } + return cardEntries.some((name) => name.endsWith('.json')); +} diff --git a/packages/context/src/ingest/adapters/metabase/fanout-planner.test.ts b/packages/context/src/ingest/adapters/metabase/fanout-planner.test.ts new file mode 100644 index 00000000..cb275472 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/fanout-planner.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'vitest'; +import { planMetabaseFanoutChildren } from './fanout-planner.js'; + +describe('planMetabaseFanoutChildren', () => { + it('builds ordered child plans for sync-enabled mapped Metabase databases', () => { + const plans = planMetabaseFanoutChildren({ + metabaseConnectionId: 'prod-metabase', + mappings: [ + { metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a', syncEnabled: true }, + { metabaseDatabaseId: 2, targetConnectionId: null, syncEnabled: true }, + { metabaseDatabaseId: 3, targetConnectionId: 'warehouse_c', syncEnabled: false }, + { metabaseDatabaseId: 4, targetConnectionId: 'warehouse_b', syncEnabled: true }, + ], + }); + + expect(plans).toEqual([ + { + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + pullConfig: { metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 1 }, + }, + { + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 4, + targetConnectionId: 'warehouse_b', + pullConfig: { metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 4 }, + }, + ]); + }); + + it('rejects invalid generated pull configs before any host enqueues work', () => { + expect(() => + planMetabaseFanoutChildren({ + metabaseConnectionId: 'prod-metabase', + mappings: [{ metabaseDatabaseId: 0, targetConnectionId: 'warehouse_a', syncEnabled: true }], + }), + ).toThrow(/metabaseDatabaseId/); + }); + + it('rejects source states with no sync-enabled target mappings', () => { + expect(() => + planMetabaseFanoutChildren({ + metabaseConnectionId: 'prod-metabase', + mappings: [ + { metabaseDatabaseId: 1, targetConnectionId: null, syncEnabled: true }, + { metabaseDatabaseId: 2, targetConnectionId: 'warehouse_b', syncEnabled: false }, + ], + }), + ).toThrow('no sync-enabled mappings with a target connection for Metabase connection prod-metabase'); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/fanout-planner.ts b/packages/context/src/ingest/adapters/metabase/fanout-planner.ts new file mode 100644 index 00000000..660e8100 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/fanout-planner.ts @@ -0,0 +1,49 @@ +import { parseMetabasePullConfig, type MetabasePullConfig } from './types.js'; + +export interface MetabaseFanoutMappingInput { + metabaseDatabaseId: number; + targetConnectionId: string | null; + syncEnabled: boolean; +} + +export interface MetabaseFanoutChildPlan { + metabaseConnectionId: string; + metabaseDatabaseId: number; + targetConnectionId: string; + pullConfig: MetabasePullConfig; +} + +export interface PlanMetabaseFanoutChildrenInput { + metabaseConnectionId: string; + mappings: MetabaseFanoutMappingInput[]; +} + +export function planMetabaseFanoutChildren(input: PlanMetabaseFanoutChildrenInput): MetabaseFanoutChildPlan[] { + const children: MetabaseFanoutChildPlan[] = []; + + for (const mapping of input.mappings) { + if (!mapping.syncEnabled || mapping.targetConnectionId === null) { + continue; + } + + const pullConfig = parseMetabasePullConfig({ + metabaseConnectionId: input.metabaseConnectionId, + metabaseDatabaseId: mapping.metabaseDatabaseId, + }); + + children.push({ + metabaseConnectionId: input.metabaseConnectionId, + metabaseDatabaseId: mapping.metabaseDatabaseId, + targetConnectionId: mapping.targetConnectionId, + pullConfig, + }); + } + + if (children.length === 0) { + throw new Error( + `no sync-enabled mappings with a target connection for Metabase connection ${input.metabaseConnectionId}`, + ); + } + + return children; +} diff --git a/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts b/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts new file mode 100644 index 00000000..1d8d2478 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/fetch-scope.test.ts @@ -0,0 +1,144 @@ +import { describe, expect, it } from 'vitest'; +import { computeFetchScope, type FetchScope, hashScope, isPathInMetabaseScope } from './fetch-scope.js'; +import type { StagedSyncConfig } from './types.js'; + +const BASE_CONFIG = { + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + defaultTagNames: [] as string[], + mapping: { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + }, +} satisfies Omit; + +describe('computeFetchScope', () => { + it('returns { kind: "all" } for syncMode ALL', () => { + const scope = computeFetchScope({ + ...BASE_CONFIG, + syncMode: 'ALL', + selections: [{ selectionType: 'item', metabaseObjectId: 5 }], + }); + expect(scope).toEqual({ kind: 'all' }); + }); + + it('returns { kind: "all-except", ... } for syncMode EXCEPT', () => { + const scope = computeFetchScope({ + ...BASE_CONFIG, + syncMode: 'EXCEPT', + selections: [ + { selectionType: 'item', metabaseObjectId: 5 }, + { selectionType: 'collection', metabaseObjectId: 7 }, + ], + }); + expect(scope).toEqual({ + kind: 'all-except', + excludeCardIds: new Set([5]), + excludeCollectionIds: new Set([7]), + }); + }); + + it('returns { kind: "explicit", ... } for syncMode ONLY', () => { + const scope = computeFetchScope({ + ...BASE_CONFIG, + syncMode: 'ONLY', + selections: [ + { selectionType: 'item', metabaseObjectId: 5 }, + { selectionType: 'item', metabaseObjectId: 11 }, + { selectionType: 'collection', metabaseObjectId: 7 }, + ], + }); + expect(scope).toEqual({ + kind: 'explicit', + includeCardIds: new Set([5, 11]), + includeCollectionIds: new Set([7]), + }); + }); + + it('returns empty explicit scope for ONLY with no selections', () => { + const scope = computeFetchScope({ ...BASE_CONFIG, syncMode: 'ONLY', selections: [] }); + expect(scope).toEqual({ + kind: 'explicit', + includeCardIds: new Set(), + includeCollectionIds: new Set(), + }); + }); +}); + +describe('hashScope', () => { + it('produces the same hash for identical inputs', () => { + const a = hashScope({ + kind: 'explicit', + includeCardIds: new Set([1, 2, 3]), + includeCollectionIds: new Set([7]), + }); + const b = hashScope({ + kind: 'explicit', + includeCardIds: new Set([3, 2, 1]), + includeCollectionIds: new Set([7]), + }); + expect(a).toBe(b); + }); + + it('produces different hashes for different scopes', () => { + const a = hashScope({ kind: 'all' }); + const b = hashScope({ + kind: 'explicit', + includeCardIds: new Set([1]), + includeCollectionIds: new Set(), + }); + expect(a).not.toBe(b); + }); + + it('produces a 64-char hex string', () => { + const fp = hashScope({ kind: 'all' }); + expect(fp).toMatch(/^[0-9a-f]{64}$/); + }); +}); + +describe('isPathInMetabaseScope', () => { + const allScope: FetchScope = { kind: 'all' }; + const exceptScope: FetchScope = { + kind: 'all-except', + excludeCardIds: new Set([100]), + excludeCollectionIds: new Set([5]), + }; + const explicitScope: FetchScope = { + kind: 'explicit', + includeCardIds: new Set([1, 2]), + includeCollectionIds: new Set([7]), + }; + + it('always includes sync-config.json', () => { + expect(isPathInMetabaseScope('sync-config.json', allScope)).toBe(true); + expect(isPathInMetabaseScope('sync-config.json', exceptScope)).toBe(true); + expect(isPathInMetabaseScope('sync-config.json', explicitScope)).toBe(true); + }); + + it('always includes collections/* and databases/*', () => { + expect(isPathInMetabaseScope('collections/5.json', explicitScope)).toBe(true); + expect(isPathInMetabaseScope('databases/42.json', explicitScope)).toBe(true); + }); + + it('for `all` scope, every cards/.json is in scope', () => { + expect(isPathInMetabaseScope('cards/1.json', allScope)).toBe(true); + expect(isPathInMetabaseScope('cards/999.json', allScope)).toBe(true); + }); + + it('for `all-except` scope, excluded card ids are out of scope', () => { + expect(isPathInMetabaseScope('cards/100.json', exceptScope)).toBe(false); + expect(isPathInMetabaseScope('cards/101.json', exceptScope)).toBe(true); + }); + + it('for `explicit` scope, only include-set card ids are in scope', () => { + expect(isPathInMetabaseScope('cards/1.json', explicitScope)).toBe(true); + expect(isPathInMetabaseScope('cards/2.json', explicitScope)).toBe(true); + expect(isPathInMetabaseScope('cards/3.json', explicitScope)).toBe(false); + }); + + it('unknown path shapes default to in-scope (conservative)', () => { + expect(isPathInMetabaseScope('some-new-dir/whatever.json', explicitScope)).toBe(true); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/fetch-scope.ts b/packages/context/src/ingest/adapters/metabase/fetch-scope.ts new file mode 100644 index 00000000..bee97ec8 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/fetch-scope.ts @@ -0,0 +1,82 @@ +import { createHash } from 'node:crypto'; +import type { StagedSyncConfig } from './types.js'; + +export type FetchScope = + | { kind: 'all' } + | { kind: 'all-except'; excludeCardIds: Set; excludeCollectionIds: Set } + | { kind: 'explicit'; includeCardIds: Set; includeCollectionIds: Set }; + +/** + * Collapse the staged sync-config's `syncMode` + `selections` into the discriminated + * union the fetcher switches on. Pure function; no I/O, no side effects. + */ +export function computeFetchScope(syncConfig: StagedSyncConfig): FetchScope { + if (syncConfig.syncMode === 'ALL') { + return { kind: 'all' }; + } + const cardIds = new Set(); + const collectionIds = new Set(); + for (const sel of syncConfig.selections) { + if (sel.selectionType === 'item') { + cardIds.add(sel.metabaseObjectId); + } else { + collectionIds.add(sel.metabaseObjectId); + } + } + if (syncConfig.syncMode === 'EXCEPT') { + return { kind: 'all-except', excludeCardIds: cardIds, excludeCollectionIds: collectionIds }; + } + return { kind: 'explicit', includeCardIds: cardIds, includeCollectionIds: collectionIds }; +} + +/** + * Stable SHA-256 hex fingerprint of the scope. Order-insensitive (sets are + * sorted before serialization) so that two scopes with the same membership + * hash identically. + */ +export function hashScope(scope: FetchScope): string { + const canonical = canonicalize(scope); + return createHash('sha256').update(canonical).digest('hex'); +} + +function canonicalize(scope: FetchScope): string { + if (scope.kind === 'all') { + return JSON.stringify({ kind: 'all' }); + } + if (scope.kind === 'all-except') { + return JSON.stringify({ + kind: 'all-except', + excludeCardIds: [...scope.excludeCardIds].sort((a, b) => a - b), + excludeCollectionIds: [...scope.excludeCollectionIds].sort((a, b) => a - b), + }); + } + return JSON.stringify({ + kind: 'explicit', + includeCardIds: [...scope.includeCardIds].sort((a, b) => a - b), + includeCollectionIds: [...scope.includeCollectionIds].sort((a, b) => a - b), + }); +} + +const CARD_PATH_RE = /^cards\/(\d+)\.json$/; + +/** + * Decide whether a staged-dir-relative path falls inside the given scope. + * `sync-config.json`, `collections/*`, and `databases/*` are always in-scope + * (they're global files the chunker always needs). Only `cards/.json` + * paths are scope-checked — unknown path shapes default to true so we don't + * silently drop metadata files a future adapter variant might introduce. + */ +export function isPathInMetabaseScope(rawPath: string, scope: FetchScope): boolean { + const match = CARD_PATH_RE.exec(rawPath); + if (!match) { + return true; + } + const cardId = Number(match[1]); + if (scope.kind === 'all') { + return true; + } + if (scope.kind === 'all-except') { + return !scope.excludeCardIds.has(cardId); + } + return scope.includeCardIds.has(cardId); +} diff --git a/packages/context/src/ingest/adapters/metabase/fetch.test.ts b/packages/context/src/ingest/adapters/metabase/fetch.test.ts new file mode 100644 index 00000000..8beee21d --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/fetch.test.ts @@ -0,0 +1,515 @@ +import { mkdtemp, readdir, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { FetchContext } from '../../types.js'; +import { fetchMetabaseBundle } from './fetch.js'; + +const metabaseConnectionId = 'a1b2c3d4-e5f6-4789-9abc-def012345678'; +const targetConnectionId = 'b2c3d4e5-f6a7-4890-abcd-ef0123456789'; + +function makeMockClient() { + return { + getAllCards: vi.fn().mockResolvedValue([ + { id: 1, name: 'Orders', archived: false, database_id: 42, collection_id: 5 }, + { id: 2, name: 'Old orders (archived)', archived: true, database_id: 42, collection_id: 5 }, + { id: 3, name: 'Wrong DB', archived: false, database_id: 999, collection_id: 5 }, + ]), + getCard: vi.fn().mockImplementation((id: number) => + Promise.resolve({ + id, + name: `Card ${id}`, + description: null, + type: 'model', + database_id: 42, + collection_id: 5, + archived: false, + result_metadata: [{ name: 'id', base_type: 'type/Integer' }], + }), + ), + getResolvedSql: vi.fn().mockImplementation((card: { id: number }) => + Promise.resolve({ + resolvedSql: `SELECT * FROM card_${card.id}`, + templateTags: [], + resolutionStatus: 'resolved', + }), + ), + getCollectionTree: vi.fn().mockResolvedValue([{ id: 5, name: 'Orders Team', parent_id: null, children: [] }]), + getCollectionItems: vi.fn().mockResolvedValue([]), + cleanup: vi.fn().mockResolvedValue(undefined), + }; +} + +describe('fetchMetabaseBundle', () => { + let stagedDir: string; + let clientFactory: ReturnType; + let sourceStateReader: ReturnType; + + function makeClientFactory() { + const mockClient = makeMockClient(); + return { + createClient: vi.fn().mockResolvedValue(mockClient), + __client: mockClient, + }; + } + + function makeFetchContext(connectionId = targetConnectionId): FetchContext { + return { + connectionId, + sourceKey: 'metabase', + }; + } + + function makeSourceStateReader() { + return { + getSourceState: vi.fn().mockResolvedValue({ + syncMode: 'ALL', + selections: [], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: targetConnectionId, + syncEnabled: true, + }, + ], + defaultTagNames: [], + }), + }; + } + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'mb-fetch-')); + clientFactory = makeClientFactory(); + sourceStateReader = makeSourceStateReader(); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('writes sync-config.json, one database file, one collection file, and only non-archived cards matching databaseId', async () => { + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + const cardFiles = await readdir(join(stagedDir, 'cards')); + expect(cardFiles.sort()).toEqual(['1.json']); + const collections = await readdir(join(stagedDir, 'collections')); + expect(collections).toEqual(['5.json']); + const databases = await readdir(join(stagedDir, 'databases')); + expect(databases).toEqual(['42.json']); + const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8')); + expect(syncConfig.metabaseDatabaseId).toBe(42); + expect(syncConfig.mapping.targetConnectionId).toBe(targetConnectionId); + + const card = JSON.parse(await readFile(join(stagedDir, 'cards/1.json'), 'utf-8')); + expect(card.metabaseId).toBe(1); + expect(card.resolvedSql).toBe('SELECT * FROM card_1'); + expect(card.resolutionStatus).toBe('resolved'); + expect(card.collectionPath).toEqual(['Orders Team']); + expect(card.archived).toBe(false); + }); + + it('passes the Metabase source pull config and target fetch context to the client factory', async () => { + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + + expect(clientFactory.createClient).toHaveBeenCalledTimes(1); + expect(clientFactory.createClient).toHaveBeenCalledWith( + { metabaseConnectionId, metabaseDatabaseId: 42 }, + { connectionId: targetConnectionId, sourceKey: 'metabase' }, + ); + }); + + it('reads source state by the Metabase source connection id, not the target fetch context connection id', async () => { + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + + expect(sourceStateReader.getSourceState).toHaveBeenCalledTimes(1); + expect(sourceStateReader.getSourceState).toHaveBeenCalledWith(metabaseConnectionId); + expect(sourceStateReader.getSourceState).not.toHaveBeenCalledWith(targetConnectionId); + }); + + it('cleans up the client after a successful fetch', async () => { + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + + expect(clientFactory.__client.cleanup).toHaveBeenCalledTimes(1); + }); + + it('cleans up the client when fetch fails after client creation', async () => { + clientFactory.__client.getCollectionTree.mockRejectedValueOnce(new Error('collection tree unavailable')); + + await expect( + fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }), + ).rejects.toThrow('collection tree unavailable'); + + expect(clientFactory.__client.cleanup).toHaveBeenCalledTimes(1); + }); + + it('throws BadRequestException when the requested metabaseDatabaseId has no matching sync-enabled mapping', async () => { + sourceStateReader.getSourceState.mockResolvedValue({ + syncMode: 'ALL', + selections: [], + mappings: [], + defaultTagNames: [], + }); + await expect( + fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }), + ).rejects.toThrow(/no sync-enabled mapping for database 42/); + }); + + it('throws BadRequestException when the mapping points to a different target connection than the job', async () => { + sourceStateReader.getSourceState.mockResolvedValue({ + syncMode: 'ALL', + selections: [], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'c3d4e5f6-a7b8-4901-bcde-f01234567890', + syncEnabled: true, + }, + ], + defaultTagNames: [], + }); + await expect( + fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }), + ).rejects.toThrow(/mapping.*does not point to connection/); + }); + + it('throws when the matching mapping has a null metabaseDatabaseName (unhydrated)', async () => { + sourceStateReader.getSourceState.mockResolvedValue({ + syncMode: 'ALL', + selections: [], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: null, + metabaseEngine: 'postgres', + targetConnectionId, + syncEnabled: true, + }, + ], + defaultTagNames: [], + }); + await expect( + fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }), + ).rejects.toThrow(/unhydrated.*klo connection mapping refresh/); + }); + + it('skips cards whose getResolvedSql returns null and records them in unresolved-cards.json', async () => { + clientFactory.__client.getResolvedSql.mockResolvedValue(null); + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + const cardFiles = await readdir(join(stagedDir, 'cards')).catch(() => []); + expect(cardFiles).toEqual([]); + const unresolved = JSON.parse(await readFile(join(stagedDir, 'unresolved-cards.json'), 'utf-8')); + expect(unresolved).toEqual([expect.objectContaining({ cardId: 1, name: 'Card 1', reason: 'api_500' })]); + }); + + it('records referenced cards via `{{#N}}` in resolvedSql', async () => { + clientFactory.__client.getResolvedSql.mockImplementation((card: { id: number }) => + Promise.resolve({ + resolvedSql: card.id === 1 ? 'SELECT * FROM {{#999}}' : `SELECT * FROM card_${card.id}`, + templateTags: card.id === 1 ? [{ name: 'r', type: 'card', cardReference: 999 }] : [], + resolutionStatus: 'resolved', + }), + ); + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + const card = JSON.parse(await readFile(join(stagedDir, 'cards/1.json'), 'utf-8')); + expect(card.referencedCardIds).toEqual([999]); + }); +}); + +/* eslint-disable @typescript-eslint/require-await -- mock fixtures return constants */ +describe('fetchMetabaseBundle — scoped fetch', () => { + it('ONLY scope fetches exactly the selected card ids (no reference closure)', async () => { + const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-only-')); + try { + const catalog = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map((id) => ({ + id, + name: `Card ${id}`, + type: 'model', + database_id: 42, + collection_id: 5, + archived: false, + result_metadata: [], + })); + const getCardCalls: number[] = []; + const client: any = { + getAllCards: async () => + catalog.map((c) => ({ + id: c.id, + database_id: c.database_id, + archived: false, + collection_id: c.collection_id, + })), + getCard: async (id: number) => { + getCardCalls.push(id); + const c = catalog.find((x) => x.id === id); + if (!c) { + throw new Error(`no such card ${id}`); + } + return c; + }, + getResolvedSql: async () => ({ resolvedSql: 'SELECT 1', templateTags: [], resolutionStatus: 'resolved' }), + getCollectionTree: async () => [{ id: 5, name: 'Col5', parent_id: null }], + getCollectionItems: async () => [], + cleanup: async () => {}, + }; + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 }, + stagedDir: staged, + ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' }, + clientFactory: { createClient: () => client }, + sourceStateReader: { + getSourceState: async () => ({ + syncMode: 'ONLY', + selections: [ + { selectionType: 'item', metabaseObjectId: 2 }, + { selectionType: 'item', metabaseObjectId: 5 }, + { selectionType: 'item', metabaseObjectId: 8 }, + ], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + syncEnabled: true, + }, + ], + defaultTagNames: [], + }), + } as any, + }); + expect([...getCardCalls].sort((a, b) => a - b)).toEqual([2, 5, 8]); + } finally { + await rm(staged, { recursive: true, force: true }); + } + }); + + it('ONLY scope walks collections via getCollectionItems', async () => { + const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-col-')); + try { + const getCardCalls: number[] = []; + const collectionItems = [ + { id: 100, model: 'card' }, + { id: 101, model: 'card' }, + ]; + const client: any = { + getAllCards: async () => [], + getCard: async (id: number) => { + getCardCalls.push(id); + return { + id, + name: `Card ${id}`, + type: 'model', + database_id: 42, + collection_id: 7, + archived: false, + result_metadata: [], + }; + }, + getResolvedSql: async () => ({ resolvedSql: 'SELECT 1', templateTags: [], resolutionStatus: 'resolved' }), + getCollectionTree: async () => [{ id: 7, name: 'Col7', parent_id: null }], + getCollectionItems: async (cid: number) => (cid === 7 ? collectionItems : []), + cleanup: async () => {}, + }; + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 }, + stagedDir: staged, + ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' }, + clientFactory: { createClient: () => client }, + sourceStateReader: { + getSourceState: async () => ({ + syncMode: 'ONLY', + selections: [{ selectionType: 'collection', metabaseObjectId: 7 }], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + syncEnabled: true, + }, + ], + defaultTagNames: [], + }), + } as any, + }); + expect([...getCardCalls].sort((a, b) => a - b)).toEqual([100, 101]); + } finally { + await rm(staged, { recursive: true, force: true }); + } + }); + + it('ONLY scope closes over {{#N}} references, bounded', async () => { + const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-ref-')); + try { + const getCardCalls: number[] = []; + const refs: Record = { 1: [2], 2: [3], 3: [] }; + const client: any = { + getAllCards: async () => [], + getCard: async (id: number) => { + getCardCalls.push(id); + return { + id, + name: `Card ${id}`, + type: 'model', + database_id: 42, + collection_id: null, + archived: false, + result_metadata: [], + }; + }, + getResolvedSql: async (card: any) => ({ + resolvedSql: `SELECT 1 ${(refs[card.id] ?? []).map((r) => `{{#${r}}}`).join(' ')}`, + templateTags: (refs[card.id] ?? []).map((r) => ({ name: `#${r}`, type: 'card', cardReference: r })), + resolutionStatus: 'resolved', + }), + getCollectionTree: async () => [], + getCollectionItems: async () => [], + cleanup: async () => {}, + }; + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 }, + stagedDir: staged, + ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' }, + clientFactory: { createClient: () => client }, + sourceStateReader: { + getSourceState: async () => ({ + syncMode: 'ONLY', + selections: [{ selectionType: 'item', metabaseObjectId: 1 }], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + syncEnabled: true, + }, + ], + defaultTagNames: [], + }), + } as any, + }); + expect([...getCardCalls].sort((a, b) => a - b)).toEqual([1, 2, 3]); + } finally { + await rm(staged, { recursive: true, force: true }); + } + }); + + it('ONLY with cyclical refs does not infinite-loop', async () => { + const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-cycle-')); + try { + const getCardCalls: number[] = []; + const refs: Record = { 1: [2], 2: [1] }; + const client: any = { + getAllCards: async () => [], + getCard: async (id: number) => { + getCardCalls.push(id); + return { + id, + name: `Card ${id}`, + type: 'model', + database_id: 42, + collection_id: null, + archived: false, + result_metadata: [], + }; + }, + getResolvedSql: async (card: any) => ({ + resolvedSql: `SELECT 1`, + templateTags: (refs[card.id] ?? []).map((r) => ({ name: `#${r}`, type: 'card', cardReference: r })), + resolutionStatus: 'resolved', + }), + getCollectionTree: async () => [], + getCollectionItems: async () => [], + cleanup: async () => {}, + }; + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 }, + stagedDir: staged, + ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' }, + clientFactory: { createClient: () => client }, + sourceStateReader: { + getSourceState: async () => ({ + syncMode: 'ONLY', + selections: [{ selectionType: 'item', metabaseObjectId: 1 }], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + syncEnabled: true, + }, + ], + defaultTagNames: [], + }), + } as any, + }); + expect([...getCardCalls].sort((a, b) => a - b)).toEqual([1, 2]); + } finally { + await rm(staged, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/fetch.ts b/packages/context/src/ingest/adapters/metabase/fetch.ts new file mode 100644 index 00000000..81fe6ab3 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/fetch.ts @@ -0,0 +1,315 @@ +import { mkdir, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { FetchContext, UnresolvedCardInfo } from '../../types.js'; +import type { MetabaseClientFactory, MetabaseRuntimeClient } from './client-port.js'; +import { computeFetchScope, type FetchScope } from './fetch-scope.js'; +import { serializeCard } from './serialize-card.js'; +import type { MetabaseSourceStateReader } from './source-state-port.js'; +import { + type MetabasePullConfig, + parseMetabasePullConfig, + STAGED_FILES, + type StagedCollectionFile, + type StagedDatabaseFile, + type StagedSyncConfig, +} from './types.js'; + +class IngestInputError extends Error { + constructor(message: string) { + super(message); + this.name = 'IngestInputError'; + } +} + +const logger = { + log: (message: string) => console.log(message), + warn: (message: string) => console.warn(message), +}; + +export interface FetchMetabaseBundleParams { + pullConfig: unknown; + stagedDir: string; + ctx: FetchContext; + clientFactory: MetabaseClientFactory; + sourceStateReader: MetabaseSourceStateReader; +} + +interface CollectionNode { + id: number | 'root'; + name: string; + parentId: number | 'root' | null; +} + +function buildCollectionIndex( + tree: Awaited>, +): Map { + const index = new Map(); + function walk(nodes: typeof tree, parentId: number | 'root' | null): void { + for (const n of nodes) { + index.set(n.id, { id: n.id, name: n.name, parentId }); + const children = (n.children ?? []) as typeof tree; + walk(children, n.id); + } + } + walk(tree, null); + return index; +} + +function resolvePath(index: Map, collectionId: number | 'root'): string[] { + const path: string[] = []; + let cursor: number | 'root' | null = collectionId; + const visited = new Set(); + while (cursor !== null && cursor !== 'root') { + if (visited.has(cursor)) { + break; + } + visited.add(cursor); + const node = index.get(cursor); + if (!node) { + break; + } + path.unshift(node.name); + cursor = node.parentId; + } + return path; +} + +export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise { + const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig); + const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId); + const mapping = syncState.mappings.find( + (m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled, + ); + if (!mapping?.targetConnectionId) { + throw new IngestInputError( + `no sync-enabled mapping for database ${pullConfig.metabaseDatabaseId} on Metabase connection ${pullConfig.metabaseConnectionId}`, + ); + } + if (mapping.targetConnectionId !== params.ctx.connectionId) { + throw new IngestInputError( + `mapping for database ${pullConfig.metabaseDatabaseId} does not point to connection ${params.ctx.connectionId} (points to ${mapping.targetConnectionId})`, + ); + } + if (mapping.metabaseDatabaseName === null) { + throw new IngestInputError( + `mapping for database ${pullConfig.metabaseDatabaseId} on Metabase connection ${pullConfig.metabaseConnectionId} is unhydrated; run \`klo connection mapping refresh ${pullConfig.metabaseConnectionId}\` to populate metabaseDatabaseName before ingest.`, + ); + } + const mappingDatabaseName: string = mapping.metabaseDatabaseName; + + const client = await params.clientFactory.createClient(pullConfig, params.ctx); + try { + const stagedForScope: StagedSyncConfig = { + metabaseConnectionId: pullConfig.metabaseConnectionId, + metabaseDatabaseId: pullConfig.metabaseDatabaseId, + syncMode: syncState.syncMode, + selections: syncState.selections.map((s) => ({ + selectionType: s.selectionType, + metabaseObjectId: s.metabaseObjectId, + })), + defaultTagNames: syncState.defaultTagNames, + mapping: { + metabaseDatabaseId: mapping.metabaseDatabaseId, + metabaseDatabaseName: mappingDatabaseName, + metabaseEngine: mapping.metabaseEngine, + targetConnectionId: mapping.targetConnectionId, + }, + }; + const scope = computeFetchScope(stagedForScope); + + const collectionTree = await client.getCollectionTree(); + const collectionIndex = buildCollectionIndex(collectionTree); + + await mkdir(join(params.stagedDir, STAGED_FILES.cardsDir), { recursive: true }); + await mkdir(join(params.stagedDir, STAGED_FILES.collectionsDir), { recursive: true }); + await mkdir(join(params.stagedDir, STAGED_FILES.databasesDir), { recursive: true }); + + const cardIdsToFetch = await resolveCardIdsToFetch(client, scope, pullConfig.metabaseDatabaseId, logger); + + const referencedCollectionIds = new Set(); + let writtenCards = 0; + const fetched = new Set(); + const queue: number[] = [...cardIdsToFetch]; + const unresolvedCards: UnresolvedCardInfo[] = []; + + while (queue.length > 0) { + const cardId = queue.shift(); + if (cardId === undefined) { + continue; + } + if (fetched.has(cardId)) { + continue; + } + fetched.add(cardId); + + let fullCard: Awaited>; + try { + fullCard = await client.getCard(cardId); + } catch (e) { + logger.warn(`failed to load card ${cardId}: ${e instanceof Error ? e.message : String(e)}`); + continue; + } + if (fullCard.database_id !== pullConfig.metabaseDatabaseId) { + continue; + } + if (fullCard.archived) { + continue; + } + const resolvedResult = await client.getResolvedSql(fullCard).then( + (sql) => ({ ok: true as const, sql }), + (err: unknown) => ({ ok: false as const, err }), + ); + if (!resolvedResult.ok || resolvedResult.sql === null) { + const reason = classifyResolutionFailure(resolvedResult); + const errorMessage = resolvedResult.ok + ? undefined + : resolvedResult.err instanceof Error + ? resolvedResult.err.message + : String(resolvedResult.err); + unresolvedCards.push({ + cardId, + name: fullCard.name, + reason, + errorMessage, + }); + logger.warn(`[metabase.fetch] card ${cardId} ("${fullCard.name}") dropped; reason=${reason}`); + continue; + } + const resolved = resolvedResult.sql; + const collectionPath = + fullCard.collection_id && fullCard.collection_id !== 'root' + ? resolvePath(collectionIndex, fullCard.collection_id as number) + : []; + const staged = serializeCard({ + card: fullCard, + resolvedSql: resolved.resolvedSql, + templateTags: resolved.templateTags ?? [], + collectionPath, + resolutionStatus: resolved.resolutionStatus, + }); + await writeFile( + join(params.stagedDir, STAGED_FILES.cardsDir, `${fullCard.id}.json`), + JSON.stringify(staged, null, 2), + 'utf-8', + ); + writtenCards += 1; + if (typeof fullCard.collection_id === 'number') { + referencedCollectionIds.add(fullCard.collection_id); + } + + if (scope.kind === 'explicit') { + for (const refId of staged.referencedCardIds) { + if (!fetched.has(refId)) { + queue.push(refId); + } + } + } + } + + for (const colId of referencedCollectionIds) { + const node = collectionIndex.get(colId); + if (!node) { + continue; + } + const file: StagedCollectionFile = { + metabaseId: node.id, + name: node.name, + parentId: node.parentId ?? 'root', + }; + await writeFile( + join(params.stagedDir, STAGED_FILES.collectionsDir, `${colId}.json`), + JSON.stringify(file, null, 2), + 'utf-8', + ); + } + + const databaseFile: StagedDatabaseFile = { + metabaseDatabaseId: mapping.metabaseDatabaseId, + metabaseDatabaseName: mappingDatabaseName, + metabaseEngine: mapping.metabaseEngine, + targetConnectionId: mapping.targetConnectionId, + }; + await writeFile( + join(params.stagedDir, STAGED_FILES.databasesDir, `${mapping.metabaseDatabaseId}.json`), + JSON.stringify(databaseFile, null, 2), + 'utf-8', + ); + + await writeFile(join(params.stagedDir, STAGED_FILES.syncConfig), JSON.stringify(stagedForScope, null, 2), 'utf-8'); + + if (unresolvedCards.length > 0) { + await writeFile( + join(params.stagedDir, STAGED_FILES.unresolvedCards), + JSON.stringify(unresolvedCards, null, 2), + 'utf-8', + ); + } + + logger.log( + `wrote ${writtenCards} cards for database ${pullConfig.metabaseDatabaseId} -> ${mapping.targetConnectionId} (scope=${scope.kind}); unresolved=${unresolvedCards.length}`, + ); + } finally { + await client.cleanup(); + } +} + +function classifyResolutionFailure( + r: { ok: true; sql: { resolvedSql: string } | null } | { ok: false; err: unknown }, +): UnresolvedCardInfo['reason'] { + if (r.ok && r.sql === null) { + return 'api_500'; + } + if (!r.ok) { + const msg = r.err instanceof Error ? r.err.message : String(r.err); + if (msg.includes('Cycle detected')) { + return 'cycle'; + } + if (msg.includes('no native query')) { + return 'missing_native'; + } + } + return 'unknown'; +} + +/** + * Resolve the initial set of card ids to fetch based on the scope. For `all` + * and `all-except`, this fans out to `getAllCards()` and filters by + * `database_id` + `excludeCardIds` / `excludeCollectionIds`. For `explicit`, + * this walks the selection: direct item ids + members of selected collections + * (via `getCollectionItems`). The closure over `{{#N}}` references is applied + * later in the main fetch loop. + */ +async function resolveCardIdsToFetch( + client: MetabaseRuntimeClient, + scope: FetchScope, + metabaseDatabaseId: number, + logger: { warn(message: string): void }, +): Promise { + if (scope.kind === 'all' || scope.kind === 'all-except') { + const all = await client.getAllCards(); + const matching = all.filter((c) => !c.archived && c.database_id === metabaseDatabaseId); + if (scope.kind === 'all') { + return matching.map((c) => c.id); + } + return matching + .filter((c) => !scope.excludeCardIds.has(c.id)) + .filter((c) => typeof c.collection_id !== 'number' || !scope.excludeCollectionIds.has(c.collection_id)) + .map((c) => c.id); + } + const ids = new Set(scope.includeCardIds); + for (const colId of scope.includeCollectionIds) { + let items: Array<{ id: number; model: string }>; + try { + items = await client.getCollectionItems(colId); + } catch (e) { + logger.warn(`failed to list collection ${colId}: ${e instanceof Error ? e.message : String(e)}`); + continue; + } + for (const item of items) { + if (item.model === 'card' || item.model === 'dataset' || item.model === 'metric') { + ids.add(item.id); + } + } + } + return [...ids]; +} diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts new file mode 100644 index 00000000..71d76007 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it } from 'vitest'; +import type { KloProjectConnectionConfig } from '../../../project/index.js'; +import { metabaseRuntimeConfigFromLocalConnection } from './local-metabase.adapter.js'; + +describe('metabaseRuntimeConfigFromLocalConnection', () => { + it('resolves api_url and env-backed api_key_ref from a flat klo.yaml connection', () => { + const connection: KloProjectConnectionConfig = { + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret + }; + + expect( + metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection, { + METABASE_API_KEY: 'mb_key', // pragma: allowlist secret + }), + ).toEqual({ + apiUrl: 'https://metabase.example.com', + apiKey: 'mb_key', // pragma: allowlist secret + }); + }); + + it('accepts url as the local api URL alias', () => { + const connection: KloProjectConnectionConfig = { + driver: 'metabase', + url: 'https://metabase.example.com', + api_key: 'literal-test-key', // pragma: allowlist secret + }; + + expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({ + apiUrl: 'https://metabase.example.com', + apiKey: 'literal-test-key', // pragma: allowlist secret + }); + }); + + it('rejects proxy-bearing local Metabase connections', () => { + const connection: KloProjectConnectionConfig = { + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key: 'literal-test-key', // pragma: allowlist secret + networkProxy: { type: 'ssh' }, + }; + + expect(() => metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toThrow( + 'Standalone KLO does not support proxy-bearing Metabase connections yet', + ); + }); + + it('rejects non-Metabase source connections', () => { + const connection: KloProjectConnectionConfig = { + driver: 'postgres', + url: 'postgres://localhost/db', + }; + + expect(() => metabaseRuntimeConfigFromLocalConnection('warehouse', connection)).toThrow( + 'Connection "warehouse" is not a Metabase connection', + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts new file mode 100644 index 00000000..d9a36e34 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts @@ -0,0 +1,80 @@ +import type { KloLocalProject, KloProjectConnectionConfig } from '../../../project/index.js'; +import { kloLocalStateDbPath } from '../../../project/index.js'; +import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './client.js'; +import { + IngestMetabaseClientFactory, + type MetabaseClientConfig, + type MetabaseClientRuntimeConfig, +} from './client-port.js'; +import { LocalMetabaseSourceStateReader } from './local-source-state-store.js'; +import { MetabaseSourceAdapter } from './metabase.adapter.js'; + +function stringField(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function resolveEnvReference(ref: string, env: NodeJS.ProcessEnv): string | null { + if (!ref.startsWith('env:')) { + return null; + } + const name = ref.slice('env:'.length); + return stringField(env[name]); +} + +function hasNetworkProxy(connection: KloProjectConnectionConfig): boolean { + return connection.networkProxy != null || connection.network_proxy != null; +} + +export function metabaseRuntimeConfigFromLocalConnection( + connectionId: string, + connection: KloProjectConnectionConfig | undefined, + env: NodeJS.ProcessEnv = process.env, +): MetabaseClientRuntimeConfig { + if (!connection || String(connection.driver).toLowerCase() !== 'metabase') { + throw new Error(`Connection "${connectionId}" is not a Metabase connection`); + } + if (hasNetworkProxy(connection)) { + throw new Error( + `Standalone KLO does not support proxy-bearing Metabase connections yet. Use hosted Metabase ingest for "${connectionId}" until the KLO Metabase proxy support spec lands.`, + ); + } + + const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url); + const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey); + const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef); + const apiKey = literalApiKey ?? (apiKeyRef ? resolveEnvReference(apiKeyRef, env) : null); + + if (!apiUrl) { + throw new Error(`Connection "${connectionId}" is missing metabase api_url`); + } + if (!apiKey) { + throw new Error(`Connection "${connectionId}" is missing metabase api_key or api_key_ref`); + } + + return { apiUrl, apiKey }; +} + +interface CreateLocalMetabaseSourceAdapterOptions { + env?: NodeJS.ProcessEnv; + defaultClientConfig?: MetabaseClientConfig; +} + +export function createLocalMetabaseSourceAdapter( + project: KloLocalProject, + options: CreateLocalMetabaseSourceAdapterOptions = {}, +): MetabaseSourceAdapter { + const sourceStateReader = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(project) }); + const connectionFactory = new DefaultMetabaseConnectionClientFactory( + (metabaseConnectionId) => + metabaseRuntimeConfigFromLocalConnection( + metabaseConnectionId, + project.config.connections[metabaseConnectionId], + options.env, + ), + options.defaultClientConfig ?? DEFAULT_METABASE_CLIENT_CONFIG, + ); + return new MetabaseSourceAdapter({ + clientFactory: new IngestMetabaseClientFactory(connectionFactory), + sourceStateReader, + }); +} diff --git a/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts b/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts new file mode 100644 index 00000000..9ab0fbe9 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts @@ -0,0 +1,314 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { LocalMetabaseSourceStateReader } from './local-source-state-store.js'; + +describe('LocalMetabaseSourceStateReader', () => { + let tempDir: string; + let store: LocalMetabaseSourceStateReader; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-metabase-local-state-')); + store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.klo', 'db.sqlite') }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('round-trips hydrated source state through SQLite', async () => { + await store.replaceSourceState({ + connectionId: 'prod-metabase', + syncMode: 'ONLY', + defaultTagNames: ['analytics', 'curated'], + selections: [ + { selectionType: 'collection', metabaseObjectId: 10 }, + { selectionType: 'item', metabaseObjectId: 99 }, + ], + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + metabaseHost: 'warehouse.internal', + metabaseDbName: 'analytics', + targetConnectionId: 'warehouse', + syncEnabled: true, + source: 'cli', + }, + ], + }); + + await expect(store.getSourceState('prod-metabase')).resolves.toEqual({ + syncMode: 'ONLY', + defaultTagNames: ['analytics', 'curated'], + selections: [ + { selectionType: 'collection', metabaseObjectId: 10 }, + { selectionType: 'item', metabaseObjectId: 99 }, + ], + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'warehouse', + syncEnabled: true, + }, + ], + }); + }); + + it('excludes unhydrated mappings from getSourceState and exposes them through the side accessor', async () => { + await store.replaceSourceState({ + connectionId: 'prod-metabase', + syncMode: 'ALL', + defaultTagNames: [], + selections: [], + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: null, + metabaseEngine: null, + metabaseHost: null, + metabaseDbName: null, + targetConnectionId: 'warehouse', + syncEnabled: true, + source: 'klo.yaml', + }, + { + metabaseDatabaseId: 2, + metabaseDatabaseName: 'Sandbox', + metabaseEngine: 'postgres', + metabaseHost: 'warehouse.internal', + metabaseDbName: 'sandbox', + targetConnectionId: 'warehouse', + syncEnabled: true, + source: 'refresh', + }, + ], + }); + + const state = await store.getSourceState('prod-metabase'); + expect(state.mappings.map((mapping) => mapping.metabaseDatabaseId)).toEqual([2]); + await expect(store.getUnhydratedSyncEnabledMappingIds('prod-metabase')).resolves.toEqual([1]); + }); + + it('defaults missing sync config to ALL with no tags or selections', async () => { + await store.replaceSourceState({ + connectionId: 'prod-metabase', + mappings: [ + { + metabaseDatabaseId: 3, + metabaseDatabaseName: 'Warehouse', + metabaseEngine: 'postgres', + metabaseHost: null, + metabaseDbName: null, + targetConnectionId: null, + syncEnabled: false, + source: 'refresh', + }, + ], + }); + + await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ + syncMode: 'ALL', + defaultTagNames: [], + selections: [], + }); + }); + + it('supports command-sized mapping writes and reads', async () => { + await store.upsertDatabaseMapping({ + connectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'prod-warehouse', + syncEnabled: true, + source: 'cli', + }); + await store.setSyncState({ + connectionId: 'prod-metabase', + syncMode: 'ONLY', + defaultTagNames: ['analytics'], + selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], + }); + + await expect(store.listDatabaseMappings('prod-metabase')).resolves.toEqual([ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: null, + metabaseEngine: null, + metabaseHost: null, + metabaseDbName: null, + targetConnectionId: 'prod-warehouse', + syncEnabled: true, + source: 'cli', + }, + ]); + await expect(store.getUnhydratedSyncEnabledMappingIds('prod-metabase')).resolves.toEqual([1]); + await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ + syncMode: 'ONLY', + defaultTagNames: ['analytics'], + selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], + mappings: [], + }); + }); + + it('refreshes discovered database metadata while preserving user mapping intent', async () => { + await store.upsertDatabaseMapping({ + connectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'prod-warehouse', + syncEnabled: true, + source: 'cli', + }); + + await store.refreshDiscoveredDatabases({ + connectionId: 'prod-metabase', + discovered: [ + { id: 1, name: 'Analytics', engine: 'postgres', host: 'pg.internal', dbName: 'analytics' }, + { id: 2, name: 'Sandbox', engine: 'postgres', host: 'pg.internal', dbName: 'sandbox' }, + ], + }); + + await expect(store.listDatabaseMappings('prod-metabase')).resolves.toEqual([ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + metabaseHost: 'pg.internal', + metabaseDbName: 'analytics', + targetConnectionId: 'prod-warehouse', + syncEnabled: true, + source: 'cli', + }, + { + metabaseDatabaseId: 2, + metabaseDatabaseName: 'Sandbox', + metabaseEngine: 'postgres', + metabaseHost: 'pg.internal', + metabaseDbName: 'sandbox', + targetConnectionId: null, + syncEnabled: false, + source: 'refresh', + }, + ]); + }); + + it('updates sync-enabled, clears scoped rows, and applies bulk state in one call', async () => { + await store.replaceSourceState({ + connectionId: 'prod-metabase', + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + metabaseHost: 'pg.internal', + metabaseDbName: 'analytics', + targetConnectionId: 'prod-warehouse', + syncEnabled: true, + source: 'refresh', + }, + { + metabaseDatabaseId: 2, + metabaseDatabaseName: 'Sandbox', + metabaseEngine: 'postgres', + metabaseHost: 'pg.internal', + metabaseDbName: 'sandbox', + targetConnectionId: 'staging-warehouse', + syncEnabled: true, + source: 'refresh', + }, + ], + }); + + await store.setMappingSyncEnabled({ + connectionId: 'prod-metabase', + metabaseDatabaseId: 2, + syncEnabled: false, + }); + await store.clearDatabaseMappings({ connectionId: 'prod-metabase', metabaseDatabaseId: 1 }); + + await expect(store.listDatabaseMappings('prod-metabase')).resolves.toEqual([ + { + metabaseDatabaseId: 2, + metabaseDatabaseName: 'Sandbox', + metabaseEngine: 'postgres', + metabaseHost: 'pg.internal', + metabaseDbName: 'sandbox', + targetConnectionId: 'staging-warehouse', + syncEnabled: false, + source: 'refresh', + }, + ]); + }); + + it('seeds unhydrated yaml intent without exposing it through getSourceState', async () => { + await store.applyYamlBootstrap({ + connectionId: 'prod-metabase', + syncMode: 'ALL', + defaultTagNames: ['klo'], + selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], + mappings: [{ metabaseDatabaseId: 1, targetConnectionId: 'prod-warehouse', syncEnabled: true }], + }); + + await expect(store.getUnhydratedSyncEnabledMappingIds('prod-metabase')).resolves.toEqual([1]); + await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ + syncMode: 'ALL', + defaultTagNames: ['klo'], + selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], + mappings: [], + }); + await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: null, + targetConnectionId: 'prod-warehouse', + syncEnabled: true, + source: 'klo.yaml', + }, + ]); + }); + + it('applies yaml target intent onto refresh metadata but does not overwrite cli rows', async () => { + await store.refreshDiscoveredDatabases({ + connectionId: 'prod-metabase', + discovered: [{ id: 1, name: 'Analytics', engine: 'postgres', host: 'db.test', dbName: 'analytics' }], + }); + await store.upsertDatabaseMapping({ + connectionId: 'prod-metabase', + metabaseDatabaseId: 2, + targetConnectionId: 'cli-warehouse', + syncEnabled: true, + source: 'cli', + }); + + await store.applyYamlBootstrap({ + connectionId: 'prod-metabase', + syncMode: 'EXCEPT', + defaultTagNames: [], + selections: [{ selectionType: 'item', metabaseObjectId: 99 }], + mappings: [ + { metabaseDatabaseId: 1, targetConnectionId: 'yaml-warehouse', syncEnabled: true }, + { metabaseDatabaseId: 2, targetConnectionId: 'yaml-warehouse', syncEnabled: false }, + ], + }); + + await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'yaml-warehouse', + syncEnabled: true, + source: 'klo.yaml', + }, + { + metabaseDatabaseId: 2, + targetConnectionId: 'cli-warehouse', + syncEnabled: true, + source: 'cli', + }, + ]); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/local-source-state-store.ts b/packages/context/src/ingest/adapters/metabase/local-source-state-store.ts new file mode 100644 index 00000000..bee43298 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/local-source-state-store.ts @@ -0,0 +1,560 @@ +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import type { MetabaseSourceState, MetabaseSourceStateReader, MetabaseSourceStateSelection } from './source-state-port.js'; +import type { MetabaseSyncMode } from './types.js'; + +export type LocalMetabaseMappingSource = 'klo.yaml' | 'cli' | 'refresh'; + +interface LocalMetabaseSourceStateStoreOptions { + dbPath: string; + now?: () => Date; +} + +export interface LocalMetabaseSourceStateMappingInput { + metabaseDatabaseId: number; + metabaseDatabaseName: string | null; + metabaseEngine: string | null; + metabaseHost: string | null; + metabaseDbName: string | null; + targetConnectionId: string | null; + syncEnabled: boolean; + source: LocalMetabaseMappingSource; +} + +export interface ReplaceLocalMetabaseSourceStateInput { + connectionId: string; + syncMode?: MetabaseSyncMode; + defaultTagNames?: string[]; + selections?: MetabaseSourceStateSelection[]; + mappings: LocalMetabaseSourceStateMappingInput[]; +} + +interface ApplyLocalMetabaseYamlBootstrapInput { + connectionId: string; + syncMode: MetabaseSyncMode; + defaultTagNames: string[]; + selections: MetabaseSourceStateSelection[]; + mappings: Array<{ + metabaseDatabaseId: number; + targetConnectionId: string | null; + syncEnabled: boolean; + }>; +} + +export interface LocalMetabaseMappingListRow extends LocalMetabaseSourceStateMappingInput {} + +export interface UpsertLocalMetabaseDatabaseMappingInput { + connectionId: string; + metabaseDatabaseId: number; + targetConnectionId: string | null; + syncEnabled: boolean; + source: LocalMetabaseMappingSource; +} + +export interface SetLocalMetabaseMappingSyncEnabledInput { + connectionId: string; + metabaseDatabaseId: number; + syncEnabled: boolean; +} + +export interface SetLocalMetabaseSyncStateInput { + connectionId: string; + syncMode: MetabaseSyncMode; + defaultTagNames: string[]; + selections: MetabaseSourceStateSelection[]; +} + +export interface RefreshLocalMetabaseDiscoveredDatabasesInput { + connectionId: string; + discovered: Array<{ + id: number; + name: string; + engine: string; + host: string | null; + dbName: string | null; + }>; +} + +export interface ClearLocalMetabaseMappingsInput { + connectionId: string; + metabaseDatabaseId?: number; +} + +interface SelectionRow { + selection_type: 'collection' | 'item'; + metabase_object_id: number; +} + +interface MappingRow { + metabase_database_id: number; + metabase_database_name: string | null; + metabase_engine: string | null; + target_connection_id: string | null; + sync_enabled: number; +} + +interface SyncConfigRow { + sync_mode: MetabaseSyncMode; + default_tag_names_json: string; +} + +function parseDefaultTagNames(raw: string): string[] { + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed.filter((value): value is string => typeof value === 'string') : []; +} + +export class LocalMetabaseSourceStateReader implements MetabaseSourceStateReader { + private readonly db: Database.Database; + private readonly now: () => Date; + + constructor(options: LocalMetabaseSourceStateStoreOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.now = options.now ?? (() => new Date()); + this.db.exec(` + CREATE TABLE IF NOT EXISTS local_metabase_sync_config ( + metabase_connection_id TEXT PRIMARY KEY, + sync_mode TEXT NOT NULL, + default_tag_names_json TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS local_metabase_selections ( + metabase_connection_id TEXT NOT NULL, + selection_type TEXT NOT NULL, + metabase_object_id INTEGER NOT NULL, + PRIMARY KEY (metabase_connection_id, selection_type, metabase_object_id) + ); + + CREATE TABLE IF NOT EXISTS local_metabase_database_mappings ( + metabase_connection_id TEXT NOT NULL, + metabase_database_id INTEGER NOT NULL, + metabase_database_name TEXT, + metabase_engine TEXT, + metabase_host TEXT, + metabase_db_name TEXT, + target_connection_id TEXT, + sync_enabled INTEGER NOT NULL DEFAULT 0, + source TEXT NOT NULL, + updated_at TEXT NOT NULL, + PRIMARY KEY (metabase_connection_id, metabase_database_id) + ); + `); + } + + async applyYamlBootstrap(input: ApplyLocalMetabaseYamlBootstrapInput): Promise { + const timestamp = this.now().toISOString(); + const apply = this.db.transaction(() => { + const syncConfigExists = this.db + .prepare('SELECT 1 FROM local_metabase_sync_config WHERE metabase_connection_id = ?') + .get(input.connectionId); + if (!syncConfigExists) { + this.db + .prepare( + ` + INSERT INTO local_metabase_sync_config ( + metabase_connection_id, + sync_mode, + default_tag_names_json, + updated_at + ) + VALUES (?, ?, ?, ?) + `, + ) + .run(input.connectionId, input.syncMode, JSON.stringify(input.defaultTagNames), timestamp); + + const insertSelection = this.db.prepare(` + INSERT INTO local_metabase_selections ( + metabase_connection_id, + selection_type, + metabase_object_id + ) + VALUES (?, ?, ?) + `); + for (const selection of input.selections) { + insertSelection.run(input.connectionId, selection.selectionType, selection.metabaseObjectId); + } + } + + const existing = this.db.prepare(` + SELECT target_connection_id, source + FROM local_metabase_database_mappings + WHERE metabase_connection_id = ? AND metabase_database_id = ? + `); + const insert = this.db.prepare(` + INSERT INTO local_metabase_database_mappings ( + metabase_connection_id, + metabase_database_id, + metabase_database_name, + metabase_engine, + metabase_host, + metabase_db_name, + target_connection_id, + sync_enabled, + source, + updated_at + ) + VALUES (?, ?, NULL, NULL, NULL, NULL, ?, ?, 'klo.yaml', ?) + `); + const updateRefreshRow = this.db.prepare(` + UPDATE local_metabase_database_mappings + SET target_connection_id = ?, + sync_enabled = ?, + source = 'klo.yaml', + updated_at = ? + WHERE metabase_connection_id = ? + AND metabase_database_id = ? + AND source = 'refresh' + AND target_connection_id IS NULL + `); + + for (const mapping of input.mappings) { + const row = existing.get(input.connectionId, mapping.metabaseDatabaseId) as + | { target_connection_id: string | null; source: LocalMetabaseMappingSource } + | undefined; + if (!row) { + insert.run( + input.connectionId, + mapping.metabaseDatabaseId, + mapping.targetConnectionId, + mapping.syncEnabled ? 1 : 0, + timestamp, + ); + continue; + } + if (row.source === 'refresh' && row.target_connection_id === null) { + updateRefreshRow.run( + mapping.targetConnectionId, + mapping.syncEnabled ? 1 : 0, + timestamp, + input.connectionId, + mapping.metabaseDatabaseId, + ); + } + } + }); + + apply(); + } + + async replaceSourceState(input: ReplaceLocalMetabaseSourceStateInput): Promise { + const timestamp = this.now().toISOString(); + const syncMode = input.syncMode ?? 'ALL'; + const selections = input.selections ?? []; + const defaultTagNames = input.defaultTagNames ?? []; + + const replace = this.db.transaction(() => { + this.db + .prepare( + ` + INSERT INTO local_metabase_sync_config ( + metabase_connection_id, + sync_mode, + default_tag_names_json, + updated_at + ) + VALUES (?, ?, ?, ?) + ON CONFLICT(metabase_connection_id) DO UPDATE SET + sync_mode = excluded.sync_mode, + default_tag_names_json = excluded.default_tag_names_json, + updated_at = excluded.updated_at + `, + ) + .run(input.connectionId, syncMode, JSON.stringify(defaultTagNames), timestamp); + + this.db.prepare('DELETE FROM local_metabase_selections WHERE metabase_connection_id = ?').run(input.connectionId); + const insertSelection = this.db.prepare(` + INSERT INTO local_metabase_selections ( + metabase_connection_id, + selection_type, + metabase_object_id + ) + VALUES (?, ?, ?) + `); + for (const selection of selections) { + insertSelection.run(input.connectionId, selection.selectionType, selection.metabaseObjectId); + } + + this.db + .prepare('DELETE FROM local_metabase_database_mappings WHERE metabase_connection_id = ?') + .run(input.connectionId); + const insertMapping = this.db.prepare(` + INSERT INTO local_metabase_database_mappings ( + metabase_connection_id, + metabase_database_id, + metabase_database_name, + metabase_engine, + metabase_host, + metabase_db_name, + target_connection_id, + sync_enabled, + source, + updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + for (const mapping of input.mappings) { + insertMapping.run( + input.connectionId, + mapping.metabaseDatabaseId, + mapping.metabaseDatabaseName, + mapping.metabaseEngine, + mapping.metabaseHost, + mapping.metabaseDbName, + mapping.targetConnectionId, + mapping.syncEnabled ? 1 : 0, + mapping.source, + timestamp, + ); + } + }); + + replace(); + } + + async listDatabaseMappings(connectionId: string): Promise { + const rows = this.db + .prepare( + ` + SELECT + metabase_database_id, + metabase_database_name, + metabase_engine, + metabase_host, + metabase_db_name, + target_connection_id, + sync_enabled, + source + FROM local_metabase_database_mappings + WHERE metabase_connection_id = ? + ORDER BY metabase_database_id + `, + ) + .all(connectionId) as Array<{ + metabase_database_id: number; + metabase_database_name: string | null; + metabase_engine: string | null; + metabase_host: string | null; + metabase_db_name: string | null; + target_connection_id: string | null; + sync_enabled: number; + source: LocalMetabaseMappingSource; + }>; + + return rows.map((row) => ({ + metabaseDatabaseId: row.metabase_database_id, + metabaseDatabaseName: row.metabase_database_name, + metabaseEngine: row.metabase_engine, + metabaseHost: row.metabase_host, + metabaseDbName: row.metabase_db_name, + targetConnectionId: row.target_connection_id, + syncEnabled: row.sync_enabled === 1, + source: row.source, + })); + } + + async upsertDatabaseMapping(input: UpsertLocalMetabaseDatabaseMappingInput): Promise { + const timestamp = this.now().toISOString(); + this.db + .prepare( + ` + INSERT INTO local_metabase_database_mappings ( + metabase_connection_id, + metabase_database_id, + metabase_database_name, + metabase_engine, + metabase_host, + metabase_db_name, + target_connection_id, + sync_enabled, + source, + updated_at + ) + VALUES (?, ?, NULL, NULL, NULL, NULL, ?, ?, ?, ?) + ON CONFLICT(metabase_connection_id, metabase_database_id) DO UPDATE SET + target_connection_id = excluded.target_connection_id, + sync_enabled = excluded.sync_enabled, + source = excluded.source, + updated_at = excluded.updated_at + `, + ) + .run( + input.connectionId, + input.metabaseDatabaseId, + input.targetConnectionId, + input.syncEnabled ? 1 : 0, + input.source, + timestamp, + ); + } + + async setMappingSyncEnabled(input: SetLocalMetabaseMappingSyncEnabledInput): Promise { + const timestamp = this.now().toISOString(); + this.db + .prepare( + ` + UPDATE local_metabase_database_mappings + SET sync_enabled = ?, updated_at = ? + WHERE metabase_connection_id = ? AND metabase_database_id = ? + `, + ) + .run(input.syncEnabled ? 1 : 0, timestamp, input.connectionId, input.metabaseDatabaseId); + } + + async setSyncState(input: SetLocalMetabaseSyncStateInput): Promise { + const timestamp = this.now().toISOString(); + const write = this.db.transaction(() => { + this.db + .prepare( + ` + INSERT INTO local_metabase_sync_config ( + metabase_connection_id, + sync_mode, + default_tag_names_json, + updated_at + ) + VALUES (?, ?, ?, ?) + ON CONFLICT(metabase_connection_id) DO UPDATE SET + sync_mode = excluded.sync_mode, + default_tag_names_json = excluded.default_tag_names_json, + updated_at = excluded.updated_at + `, + ) + .run(input.connectionId, input.syncMode, JSON.stringify(input.defaultTagNames), timestamp); + + this.db.prepare('DELETE FROM local_metabase_selections WHERE metabase_connection_id = ?').run(input.connectionId); + const insertSelection = this.db.prepare(` + INSERT INTO local_metabase_selections ( + metabase_connection_id, + selection_type, + metabase_object_id + ) + VALUES (?, ?, ?) + `); + for (const selection of input.selections) { + insertSelection.run(input.connectionId, selection.selectionType, selection.metabaseObjectId); + } + }); + + write(); + } + + async refreshDiscoveredDatabases(input: RefreshLocalMetabaseDiscoveredDatabasesInput): Promise { + const timestamp = this.now().toISOString(); + const refresh = this.db.transaction(() => { + const upsert = this.db.prepare(` + INSERT INTO local_metabase_database_mappings ( + metabase_connection_id, + metabase_database_id, + metabase_database_name, + metabase_engine, + metabase_host, + metabase_db_name, + target_connection_id, + sync_enabled, + source, + updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, NULL, 0, 'refresh', ?) + ON CONFLICT(metabase_connection_id, metabase_database_id) DO UPDATE SET + metabase_database_name = excluded.metabase_database_name, + metabase_engine = excluded.metabase_engine, + metabase_host = excluded.metabase_host, + metabase_db_name = excluded.metabase_db_name, + updated_at = excluded.updated_at + `); + + for (const database of input.discovered) { + upsert.run( + input.connectionId, + database.id, + database.name, + database.engine, + database.host, + database.dbName, + timestamp, + ); + } + }); + + refresh(); + } + + async clearDatabaseMappings(input: ClearLocalMetabaseMappingsInput): Promise { + if (input.metabaseDatabaseId === undefined) { + this.db.prepare('DELETE FROM local_metabase_database_mappings WHERE metabase_connection_id = ?').run(input.connectionId); + return; + } + this.db + .prepare('DELETE FROM local_metabase_database_mappings WHERE metabase_connection_id = ? AND metabase_database_id = ?') + .run(input.connectionId, input.metabaseDatabaseId); + } + + async getUnhydratedSyncEnabledMappingIds(connectionId: string): Promise { + const rows = this.db + .prepare( + ` + SELECT metabase_database_id + FROM local_metabase_database_mappings + WHERE metabase_connection_id = ? + AND sync_enabled = 1 + AND target_connection_id IS NOT NULL + AND metabase_database_name IS NULL + ORDER BY metabase_database_id + `, + ) + .all(connectionId) as Array<{ metabase_database_id: number }>; + return rows.map((row) => row.metabase_database_id); + } + + async getSourceState(connectionId: string): Promise { + const config = this.db + .prepare('SELECT sync_mode, default_tag_names_json FROM local_metabase_sync_config WHERE metabase_connection_id = ?') + .get(connectionId) as SyncConfigRow | undefined; + const selections = this.db + .prepare( + ` + SELECT selection_type, metabase_object_id + FROM local_metabase_selections + WHERE metabase_connection_id = ? + ORDER BY selection_type, metabase_object_id + `, + ) + .all(connectionId) as SelectionRow[]; + const mappings = this.db + .prepare( + ` + SELECT + metabase_database_id, + metabase_database_name, + metabase_engine, + target_connection_id, + sync_enabled + FROM local_metabase_database_mappings + WHERE metabase_connection_id = ? + AND metabase_database_name IS NOT NULL + ORDER BY metabase_database_id + `, + ) + .all(connectionId) as MappingRow[]; + + return { + syncMode: config?.sync_mode ?? 'ALL', + defaultTagNames: config ? parseDefaultTagNames(config.default_tag_names_json) : [], + selections: selections.map((selection) => ({ + selectionType: selection.selection_type, + metabaseObjectId: selection.metabase_object_id, + })), + mappings: mappings.map((mapping) => ({ + metabaseDatabaseId: mapping.metabase_database_id, + metabaseDatabaseName: mapping.metabase_database_name, + metabaseEngine: mapping.metabase_engine, + targetConnectionId: mapping.target_connection_id, + syncEnabled: mapping.sync_enabled === 1, + })), + }; + } +} diff --git a/packages/context/src/ingest/adapters/metabase/mapping.test.ts b/packages/context/src/ingest/adapters/metabase/mapping.test.ts new file mode 100644 index 00000000..79ccb2df --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/mapping.test.ts @@ -0,0 +1,295 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { MetabaseRuntimeClient } from './client-port.js'; +import { + METABASE_ENGINE_TO_CONNECTION_TYPE, + computeMetabaseMappingDrift, + computeMetabaseMappingPhysicalMismatches, + discoverMetabaseDatabases, + findBestMatch, + refreshMetabaseMapping, + validateMappingPhysicalMatch, + validateMetabaseMappings, +} from './mapping.js'; + +describe('discoverMetabaseDatabases', () => { + it('filters sample databases and extracts host plus database names from Metabase details', async () => { + const client = { + getDatabases: vi.fn().mockResolvedValue([ + { + id: 1, + name: 'Sample', + engine: 'postgres', + details: { host: 'sample.internal', dbname: 'sample' }, + is_sample: true, + }, + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal:5432', dbname: 'analytics' }, + is_sample: false, + }, + { + id: 3, + name: 'Warehouse', + engine: 'mysql', + details: { host: 'mysql.internal', db: 'warehouse' }, + is_sample: false, + }, + ]), + } as Pick as MetabaseRuntimeClient; + + await expect(discoverMetabaseDatabases(client)).resolves.toEqual([ + { id: 2, name: 'Analytics', engine: 'postgres', host: 'pg.internal:5432', dbName: 'analytics' }, + { id: 3, name: 'Warehouse', engine: 'mysql', host: 'mysql.internal', dbName: 'warehouse' }, + ]); + }); +}); + +describe('computeMetabaseMappingDrift', () => { + it('reports unmapped discovered databases, stale mappings, and in-sync mappings', () => { + const drift = computeMetabaseMappingDrift({ + currentMappings: { + '2': 'target-postgres', + '9': 'target-stale', + }, + discovered: [ + { id: 2, name: 'Analytics', engine: 'postgres', host: 'pg.internal', dbName: 'analytics' }, + { id: 3, name: 'Warehouse', engine: 'mysql', host: 'mysql.internal', dbName: 'warehouse' }, + ], + }); + + expect(drift).toEqual({ + unmappedDiscovered: [ + { id: 3, name: 'Warehouse', engine: 'mysql', host: 'mysql.internal', dbName: 'warehouse' }, + ], + staleMappings: [{ id: '9', reason: 'database_not_found' }], + inSync: [{ id: 2, kloConnectionId: 'target-postgres' }], + }); + }); +}); + +describe('validateMetabaseMappings', () => { + it('accepts mappings whose target connection ids exist', () => { + expect( + validateMetabaseMappings({ + mappings: { '2': 'target-postgres' }, + knownKloConnectionIds: new Set(['target-postgres']), + }), + ).toEqual({ ok: true }); + }); + + it('returns one error per missing target connection id', () => { + expect( + validateMetabaseMappings({ + mappings: { '2': 'missing-target', '3': 'target-mysql' }, + knownKloConnectionIds: new Set(['target-mysql']), + }), + ).toEqual({ + ok: false, + errors: [{ key: '2', reason: 'KLO connection missing-target does not exist' }], + }); + }); +}); + +describe('validateMappingPhysicalMatch', () => { + it('returns null when Snowflake mapping points at the same database', () => { + expect( + validateMappingPhysicalMatch( + { metabaseEngine: 'snowflake', metabaseDbName: 'ANALYTICS', metabaseHost: null }, + { connection_type: 'SNOWFLAKE', database: 'ANALYTICS', account: 'EMOVRJS-CZ07756' }, + ), + ).toBeNull(); + }); + + it('returns a reason when Snowflake mapping points at a different database', () => { + const reason = validateMappingPhysicalMatch( + { metabaseEngine: 'snowflake', metabaseDbName: 'SNAPSHOTS', metabaseHost: null }, + { connection_type: 'SNOWFLAKE', database: 'ANALYTICS', account: 'EMOVRJS-CZ07756' }, + ); + + expect(reason).toContain('SNAPSHOTS'); + expect(reason).toContain('ANALYTICS'); + }); + + it('returns a reason when engine type mismatches', () => { + const reason = validateMappingPhysicalMatch( + { metabaseEngine: 'snowflake', metabaseDbName: 'ANALYTICS', metabaseHost: null }, + { connection_type: 'POSTGRESQL', database: 'ANALYTICS', host: 'pg.internal' }, + ); + + expect(reason).toContain('engine'); + }); + + it('returns null when Postgres host and database both match after normalization', () => { + expect( + validateMappingPhysicalMatch( + { metabaseEngine: 'postgres', metabaseDbName: 'app', metabaseHost: 'PG.INTERNAL:5432' }, + { connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'APP' }, + ), + ).toBeNull(); + }); + + it('returns a reason when Postgres host matches but database differs', () => { + const reason = validateMappingPhysicalMatch( + { metabaseEngine: 'postgres', metabaseDbName: 'app', metabaseHost: 'pg.internal' }, + { connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'other_app' }, + ); + + expect(reason).toContain('app'); + expect(reason).toContain('other_app'); + }); + + it('uses BigQuery dataset_id before project_id when comparing database names', () => { + expect( + validateMappingPhysicalMatch( + { metabaseEngine: 'bigquery', metabaseDbName: 'analytics_dataset', metabaseHost: null }, + { connection_type: 'BIGQUERY', dataset_id: 'analytics_dataset', project_id: 'warehouse-project' }, + ), + ).toBeNull(); + }); + + it('returns null for unknown engines because KLO cannot validate them', () => { + expect( + validateMappingPhysicalMatch( + { metabaseEngine: 'unknown-engine', metabaseDbName: 'X', metabaseHost: 'host' }, + { connection_type: 'OTHER' }, + ), + ).toBeNull(); + }); +}); + +describe('computeMetabaseMappingPhysicalMismatches', () => { + it('returns only mismatched physical mappings', () => { + expect( + computeMetabaseMappingPhysicalMismatches([ + { + mappingId: 'mapping-ok', + metabase: { metabaseEngine: 'postgres', metabaseHost: 'pg.internal', metabaseDbName: 'app' }, + target: { connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'app' }, + }, + { + mappingId: 'mapping-bad', + metabase: { metabaseEngine: 'postgres', metabaseHost: 'pg.internal', metabaseDbName: 'app' }, + target: { connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'other_app' }, + }, + ]), + ).toEqual([ + { + mappingId: 'mapping-bad', + reason: "Metabase database 'app' does not match KLO connection database 'other_app'", + }, + ]); + }); +}); + +describe('refreshMetabaseMapping', () => { + it('combines discovery drift and physical validation through a caller-provided target resolver', async () => { + const client = { + getDatabases: vi.fn().mockResolvedValue([ + { + id: 2, + name: 'Analytics', + engine: 'postgres', + details: { host: 'pg.internal', dbname: 'analytics' }, + is_sample: false, + }, + ]), + } as Pick as MetabaseRuntimeClient; + + await expect( + refreshMetabaseMapping({ + client, + currentMappings: { '2': 'target-postgres' }, + resolveKloConnectionPhysicalInfo: vi.fn().mockResolvedValue({ + connection_type: 'POSTGRESQL', + host: 'pg.internal', + database: 'wrong_database', + }), + }), + ).resolves.toEqual({ + drift: { + unmappedDiscovered: [], + staleMappings: [], + inSync: [{ id: 2, kloConnectionId: 'target-postgres' }], + }, + physicalMismatches: [ + { + mappingId: '2', + reason: "Metabase database 'analytics' does not match KLO connection database 'wrong_database'", + }, + ], + }); + }); +}); + +describe('findBestMatch', () => { + const candidates = [ + { + id: 'snowflake-target', + name: 'Warehouse Snowflake', + connection_type: 'SNOWFLAKE', + connection_params: { account: 'EMOVRJS-CZ07756', database: 'ANALYTICS' }, + }, + { + id: 'postgres-host-only', + name: 'Host Only Postgres', + connection_type: 'POSTGRESQL', + connection_params: { host: 'pg.internal', database: 'other_app' }, + }, + { + id: 'postgres-db-only', + name: 'Database Only Postgres', + connection_type: 'POSTGRESQL', + connection_params: { host: 'other.internal', database: 'app' }, + }, + { + id: 'postgres-full', + name: 'Full Postgres', + connection_type: 'POSTGRESQL', + connection_params: { host: 'pg.internal', database: 'app' }, + }, + ]; + + it('chooses a host-and-database match over weaker matches', () => { + expect( + findBestMatch({ metabaseEngine: 'postgres', metabaseHost: 'pg.internal:5432', metabaseDbName: 'APP' }, candidates), + ).toEqual({ + connectionId: 'postgres-full', + connectionName: 'Full Postgres', + reason: 'host_and_database', + }); + }); + + it('falls back to database-only matching when host does not match', () => { + expect( + findBestMatch( + { metabaseEngine: 'postgres', metabaseHost: 'unknown.internal', metabaseDbName: 'app' }, + candidates, + ), + ).toEqual({ + connectionId: 'postgres-db-only', + connectionName: 'Database Only Postgres', + reason: 'database_only', + }); + }); + + it('returns null for unsupported Metabase engines', () => { + expect( + findBestMatch({ metabaseEngine: 'unknown-engine', metabaseHost: 'pg.internal', metabaseDbName: 'app' }, candidates), + ).toBeNull(); + }); +}); + +describe('METABASE_ENGINE_TO_CONNECTION_TYPE', () => { + it('keeps the server-supported Metabase engine table in KLO', () => { + expect(METABASE_ENGINE_TO_CONNECTION_TYPE).toMatchObject({ + postgres: 'POSTGRESQL', + bigquery: 'BIGQUERY', + 'bigquery-cloud-sdk': 'BIGQUERY', + snowflake: 'SNOWFLAKE', + sqlserver: 'SQLSERVER', + mysql: 'MYSQL', + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/mapping.ts b/packages/context/src/ingest/adapters/metabase/mapping.ts new file mode 100644 index 00000000..52007d3e --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/mapping.ts @@ -0,0 +1,344 @@ +import type { MetabaseDatabase, MetabaseRuntimeClient } from './client-port.js'; + +export const METABASE_ENGINE_TO_CONNECTION_TYPE = { + postgres: 'POSTGRESQL', + bigquery: 'BIGQUERY', + 'bigquery-cloud-sdk': 'BIGQUERY', + snowflake: 'SNOWFLAKE', + sqlserver: 'SQLSERVER', + mysql: 'MYSQL', +} as const; + +export type MetabaseMappedConnectionType = + (typeof METABASE_ENGINE_TO_CONNECTION_TYPE)[keyof typeof METABASE_ENGINE_TO_CONNECTION_TYPE]; + +export interface DiscoveredMetabaseDatabase { + id: number; + name: string; + engine: string; + host: string | null; + dbName: string | null; +} + +export interface MetabaseMappingDrift { + unmappedDiscovered: DiscoveredMetabaseDatabase[]; + staleMappings: Array<{ id: string; reason: 'database_not_found' }>; + inSync: Array<{ id: number; kloConnectionId: string }>; +} + +export interface MappingPhysicalInfo { + metabaseEngine: string | null; + metabaseDbName: string | null; + metabaseHost: string | null; +} + +export interface KloConnectionPhysicalInfo { + connection_type: string; + database?: unknown; + host?: unknown; + account?: unknown; + dataset_id?: unknown; + project_id?: unknown; + [key: string]: unknown; +} + +export interface PhysicalMismatchInput { + mappingId: string; + metabase: MappingPhysicalInfo; + target: KloConnectionPhysicalInfo; +} + +export interface PhysicalMismatch { + mappingId: string; + reason: string; +} + +export interface MappingRefreshReport { + drift: MetabaseMappingDrift; + physicalMismatches: PhysicalMismatch[]; +} + +export type MetabaseMappingValidationResult = + | { ok: true } + | { ok: false; errors: Array<{ key: string; reason: string }> }; + +export interface AutoMatchCandidate { + id: string; + name: string; + connection_type: string; + connection_params: unknown; +} + +export interface AutoMatchResult { + connectionId: string; + connectionName: string; + reason: 'host_and_database' | 'database_only' | 'host_only'; +} + +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === 'object' && !Array.isArray(value); +} + +function readString(record: Record, key: string): string | null { + const value = record[key]; + return typeof value === 'string' && value.length > 0 ? value : null; +} + +function normalizeHost(host: unknown): string | null { + if (typeof host !== 'string' || host.length === 0) { + return null; + } + return host.toLowerCase().replace(/:\d+$/, ''); +} + +function normalizeName(name: unknown): string | null { + if (typeof name !== 'string' || name.length === 0) { + return null; + } + return name.toLowerCase(); +} + +function displayValue(value: unknown): string { + return typeof value === 'string' && value.length > 0 ? value : 'unknown'; +} + +function getTargetDatabase(target: KloConnectionPhysicalInfo): unknown { + if (target.connection_type === 'BIGQUERY') { + return target.dataset_id ?? target.project_id ?? target.database; + } + return target.database; +} + +function extractHost(params: Record, connectionType: string): string | null { + switch (connectionType) { + case 'POSTGRESQL': + case 'SQLSERVER': + case 'MYSQL': + return readString(params, 'host'); + case 'SNOWFLAKE': + return readString(params, 'account'); + default: + return null; + } +} + +function extractDatabase(params: Record, connectionType: string): string | null { + switch (connectionType) { + case 'POSTGRESQL': + case 'SQLSERVER': + case 'SNOWFLAKE': + case 'MYSQL': + return readString(params, 'database'); + case 'BIGQUERY': + return readString(params, 'dataset_id'); + default: + return null; + } +} + +function toDiscoveredMetabaseDatabase(database: MetabaseDatabase): DiscoveredMetabaseDatabase { + const details = isRecord(database.details) ? database.details : {}; + return { + id: database.id, + name: database.name, + engine: database.engine ?? '', + host: readString(details, 'host'), + dbName: readString(details, 'dbname') ?? readString(details, 'db'), + }; +} + +export async function discoverMetabaseDatabases( + client: Pick, +): Promise { + const databases = await client.getDatabases(); + return databases.filter((database) => !database.is_sample).map(toDiscoveredMetabaseDatabase); +} + +export function computeMetabaseMappingDrift(args: { + currentMappings: Record; + discovered: DiscoveredMetabaseDatabase[]; +}): MetabaseMappingDrift { + const discoveredById = new Map(args.discovered.map((database) => [String(database.id), database])); + const unmappedDiscovered = args.discovered.filter((database) => !args.currentMappings[String(database.id)]); + const staleMappings = Object.keys(args.currentMappings) + .filter((id) => !discoveredById.has(id)) + .map((id) => ({ id, reason: 'database_not_found' as const })); + const inSync = Object.entries(args.currentMappings) + .filter(([id, kloConnectionId]) => discoveredById.has(id) && typeof kloConnectionId === 'string') + .map(([id, kloConnectionId]) => ({ id: Number(id), kloConnectionId: kloConnectionId as string })); + + return { unmappedDiscovered, staleMappings, inSync }; +} + +export function validateMetabaseMappings(args: { + mappings: Record; + knownKloConnectionIds: Set; +}): MetabaseMappingValidationResult { + const errors: Array<{ key: string; reason: string }> = []; + for (const [key, connectionId] of Object.entries(args.mappings)) { + if (!connectionId) { + continue; + } + if (!args.knownKloConnectionIds.has(connectionId)) { + errors.push({ key, reason: `KLO connection ${connectionId} does not exist` }); + } + } + return errors.length === 0 ? { ok: true } : { ok: false, errors }; +} + +export function validateMappingPhysicalMatch( + mapping: MappingPhysicalInfo, + target: KloConnectionPhysicalInfo, +): string | null { + const engine = mapping.metabaseEngine?.toLowerCase(); + if (!engine) { + return null; + } + + const expectedType = METABASE_ENGINE_TO_CONNECTION_TYPE[engine as keyof typeof METABASE_ENGINE_TO_CONNECTION_TYPE]; + if (!expectedType) { + return null; + } + + if (target.connection_type !== expectedType) { + return `Metabase database engine '${engine}' does not match KLO connection type '${target.connection_type}'`; + } + + const metabaseDb = normalizeName(mapping.metabaseDbName); + const targetDb = normalizeName(getTargetDatabase(target)); + + if (engine === 'snowflake' || engine === 'bigquery' || engine === 'bigquery-cloud-sdk') { + if (metabaseDb && targetDb && metabaseDb !== targetDb) { + return `Metabase database '${mapping.metabaseDbName}' does not match KLO connection database '${displayValue( + getTargetDatabase(target), + )}'`; + } + return null; + } + + if (engine === 'postgres' || engine === 'mysql' || engine === 'sqlserver') { + const metabaseHost = normalizeHost(mapping.metabaseHost); + const targetHost = normalizeHost(target.host); + + if (metabaseHost && targetHost && metabaseHost !== targetHost) { + return `Metabase host '${mapping.metabaseHost}' does not match KLO connection host '${displayValue( + target.host, + )}'`; + } + if (metabaseDb && targetDb && metabaseDb !== targetDb) { + return `Metabase database '${mapping.metabaseDbName}' does not match KLO connection database '${displayValue( + getTargetDatabase(target), + )}'`; + } + return null; + } + + return null; +} + +export function computeMetabaseMappingPhysicalMismatches(inputs: PhysicalMismatchInput[]): PhysicalMismatch[] { + const mismatches: PhysicalMismatch[] = []; + for (const input of inputs) { + const reason = validateMappingPhysicalMatch(input.metabase, input.target); + if (reason) { + mismatches.push({ mappingId: input.mappingId, reason }); + } + } + return mismatches; +} + +export async function refreshMetabaseMapping(args: { + client: Pick; + currentMappings: Record; + resolveKloConnectionPhysicalInfo: (kloConnectionId: string) => Promise; +}): Promise { + const discovered = await discoverMetabaseDatabases(args.client); + const drift = computeMetabaseMappingDrift({ currentMappings: args.currentMappings, discovered }); + const discoveredById = new Map(discovered.map((database) => [database.id, database])); + const physicalMismatches: PhysicalMismatch[] = []; + + for (const mapping of drift.inSync) { + const discoveredDatabase = discoveredById.get(mapping.id); + if (!discoveredDatabase) { + continue; + } + const target = await args.resolveKloConnectionPhysicalInfo(mapping.kloConnectionId); + if (!target) { + physicalMismatches.push({ + mappingId: String(mapping.id), + reason: `KLO connection ${mapping.kloConnectionId} does not exist`, + }); + continue; + } + const reason = validateMappingPhysicalMatch( + { + metabaseEngine: discoveredDatabase.engine, + metabaseHost: discoveredDatabase.host, + metabaseDbName: discoveredDatabase.dbName, + }, + target, + ); + if (reason) { + physicalMismatches.push({ mappingId: String(mapping.id), reason }); + } + } + + return { drift, physicalMismatches }; +} + +export function findBestMatch(mapping: MappingPhysicalInfo, candidates: AutoMatchCandidate[]): AutoMatchResult | null { + const engine = mapping.metabaseEngine?.toLowerCase(); + if (!engine) { + return null; + } + + const expectedType = METABASE_ENGINE_TO_CONNECTION_TYPE[engine as keyof typeof METABASE_ENGINE_TO_CONNECTION_TYPE]; + if (!expectedType) { + return null; + } + + const compatibleConnections = candidates.filter((candidate) => candidate.connection_type === expectedType); + if (compatibleConnections.length === 0) { + return null; + } + + const metabaseHost = normalizeHost(mapping.metabaseHost); + const metabaseDb = normalizeName(mapping.metabaseDbName); + let bestMatch: AutoMatchResult | null = null; + let bestScore = 0; + + for (const connection of compatibleConnections) { + if (!isRecord(connection.connection_params)) { + continue; + } + + const connHost = normalizeHost(extractHost(connection.connection_params, connection.connection_type)); + const connDb = normalizeName(extractDatabase(connection.connection_params, connection.connection_type)); + const hostMatch = metabaseHost && connHost && metabaseHost === connHost; + const dbMatch = metabaseDb && connDb && metabaseDb === connDb; + + let score = 0; + let reason: AutoMatchResult['reason'] = 'host_only'; + if (hostMatch && dbMatch) { + score = 3; + reason = 'host_and_database'; + } else if (dbMatch) { + score = 2; + reason = 'database_only'; + } else if (hostMatch) { + score = 1; + reason = 'host_only'; + } + + if (score > bestScore) { + bestScore = score; + bestMatch = { + connectionId: connection.id, + connectionName: connection.name, + reason, + }; + } + } + + return bestMatch; +} diff --git a/packages/context/src/ingest/adapters/metabase/metabase.adapter.test.ts b/packages/context/src/ingest/adapters/metabase/metabase.adapter.test.ts new file mode 100644 index 00000000..a22c1f3b --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/metabase.adapter.test.ts @@ -0,0 +1,153 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { MetabaseSourceAdapter } from './metabase.adapter.js'; + +describe('MetabaseSourceAdapter', () => { + let stagedDir: string; + let adapter: MetabaseSourceAdapter; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'mb-adapter-')); + adapter = new MetabaseSourceAdapter({} as any); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('declares the expected source key and skill list', () => { + expect(adapter.source).toBe('metabase'); + expect(adapter.skillNames).toEqual(['metabase_ingest']); + }); + + it('detect: true for a valid staged dir', async () => { + await writeFile(join(stagedDir, 'sync-config.json'), '{}', 'utf-8'); + await mkdir(join(stagedDir, 'cards'), { recursive: true }); + await writeFile(join(stagedDir, 'cards/1.json'), '{}', 'utf-8'); + expect(await adapter.detect(stagedDir)).toBe(true); + }); + + it('detect: false for a random empty dir', async () => { + expect(await adapter.detect(stagedDir)).toBe(false); + }); + + it('exposes a fetch() method (network-bound — real calls covered by fetch.spec.ts)', () => { + expect(typeof adapter.fetch).toBe('function'); + }); + + it('forwards fetch dependencies using the source-state reader port', async () => { + const client = { + getAllCards: vi.fn().mockResolvedValue([]), + getCollectionTree: vi.fn().mockResolvedValue([]), + getCollectionItems: vi.fn().mockResolvedValue([]), + cleanup: vi.fn().mockResolvedValue(undefined), + }; + const clientFactory = { + createClient: vi.fn().mockResolvedValue(client), + }; + const sourceStateReader = { + getSourceState: vi.fn().mockResolvedValue({ + syncMode: 'ALL', + selections: [], + defaultTagNames: [], + mappings: [ + { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + syncEnabled: true, + }, + ], + }), + }; + const forwardingAdapter = new MetabaseSourceAdapter({ clientFactory, sourceStateReader }); + + await forwardingAdapter.fetch( + { + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + }, + stagedDir, + { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' }, + ); + + expect(sourceStateReader.getSourceState).toHaveBeenCalledWith('a1b2c3d4-e5f6-4789-9abc-def012345678'); + expect(clientFactory.createClient).toHaveBeenCalledWith( + { + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + }, + { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' }, + ); + }); +}); + +describe('MetabaseSourceAdapter.describeScope', () => { + const adapter = new MetabaseSourceAdapter({} as any); + let dir: string; + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'mb-scope-')); + }); + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + async function writeSyncConfig(cfg: unknown): Promise { + await writeFile(join(dir, 'sync-config.json'), JSON.stringify(cfg), 'utf-8'); + } + + const BASE = { + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + defaultTagNames: [], + mapping: { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + }, + }; + + it('returns a fingerprint + predicate for ONLY-scope staged dir', async () => { + await writeSyncConfig({ + ...BASE, + syncMode: 'ONLY', + selections: [{ selectionType: 'item', metabaseObjectId: 5 }], + }); + const scope = await adapter.describeScope(dir); + expect(scope.fingerprint).toMatch(/^[0-9a-f]{64}$/); + expect(scope.isPathInScope('cards/5.json')).toBe(true); + expect(scope.isPathInScope('cards/99.json')).toBe(false); + expect(scope.isPathInScope('sync-config.json')).toBe(true); + }); + + it('fingerprint is stable across invocations', async () => { + await writeSyncConfig({ + ...BASE, + syncMode: 'ONLY', + selections: [ + { selectionType: 'item', metabaseObjectId: 1 }, + { selectionType: 'item', metabaseObjectId: 2 }, + ], + }); + const a = await adapter.describeScope(dir); + const b = await adapter.describeScope(dir); + expect(a.fingerprint).toBe(b.fingerprint); + }); + + it('different syncMode produces different fingerprint', async () => { + await writeSyncConfig({ ...BASE, syncMode: 'ALL', selections: [] }); + const all = await adapter.describeScope(dir); + await writeSyncConfig({ + ...BASE, + syncMode: 'ONLY', + selections: [{ selectionType: 'item', metabaseObjectId: 1 }], + }); + const only = await adapter.describeScope(dir); + expect(all.fingerprint).not.toBe(only.fingerprint); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/metabase.adapter.ts new file mode 100644 index 00000000..1c0bb53b --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/metabase.adapter.ts @@ -0,0 +1,51 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter } from '../../types.js'; +import { chunkMetabaseStagedDir } from './chunk.js'; +import type { MetabaseClientFactory } from './client-port.js'; +import { detectMetabaseStagedDir } from './detect.js'; +import { fetchMetabaseBundle } from './fetch.js'; +import { computeFetchScope, hashScope, isPathInMetabaseScope } from './fetch-scope.js'; +import type { MetabaseSourceStateReader } from './source-state-port.js'; +import { STAGED_FILES, stagedSyncConfigSchema } from './types.js'; + +export interface MetabaseSourceAdapterDeps { + clientFactory: MetabaseClientFactory; + sourceStateReader: MetabaseSourceStateReader; +} + +export class MetabaseSourceAdapter implements SourceAdapter { + readonly source = 'metabase'; + readonly skillNames: string[] = ['metabase_ingest']; + + constructor(private readonly deps: MetabaseSourceAdapterDeps) {} + + detect(stagedDir: string): Promise { + return detectMetabaseStagedDir(stagedDir); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + await fetchMetabaseBundle({ + pullConfig, + stagedDir, + ctx, + clientFactory: this.deps.clientFactory, + sourceStateReader: this.deps.sourceStateReader, + }); + } + + chunk(stagedDir: string, diffSet?: DiffSet): Promise { + return chunkMetabaseStagedDir(stagedDir, { diffSet }); + } + + async describeScope(stagedDir: string): Promise { + const body = await readFile(join(stagedDir, STAGED_FILES.syncConfig), 'utf-8'); + const syncConfig = stagedSyncConfigSchema.parse(JSON.parse(body)); + const scope = computeFetchScope(syncConfig); + const fingerprint = hashScope(scope); + return { + fingerprint, + isPathInScope: (p) => isPathInMetabaseScope(p, scope), + }; + } +} diff --git a/packages/context/src/ingest/adapters/metabase/serialize-card.test.ts b/packages/context/src/ingest/adapters/metabase/serialize-card.test.ts new file mode 100644 index 00000000..ff10ce59 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/serialize-card.test.ts @@ -0,0 +1,222 @@ +import { describe, expect, it } from 'vitest'; +import { extractReferencedCardIds, serializeCard } from './serialize-card.js'; + +describe('extractReferencedCardIds', () => { + it('pulls ids out of template tags with type=card', () => { + const tags = [ + { name: 'orders', type: 'card', cardReference: 42 }, + { name: 'param', type: 'text' }, + ]; + expect(extractReferencedCardIds(tags, '')).toEqual([42]); + }); + + it('finds `{{#N}}` references in the SQL body even when the tag list lacks cardReference', () => { + const tags = [{ name: 'orders_ref', type: 'card' }]; + const sql = 'SELECT * FROM ({{#42}}) UNION ALL (SELECT * FROM {{#101}})'; + expect(extractReferencedCardIds(tags, sql).sort((a, b) => a - b)).toEqual([42, 101]); + }); + + it('dedupes card ids across tags and SQL body', () => { + const tags = [{ name: 'a', type: 'card', cardReference: 42 }]; + const sql = 'SELECT * FROM {{#42}}'; + expect(extractReferencedCardIds(tags, sql)).toEqual([42]); + }); + + it('returns [] when no references exist', () => { + expect(extractReferencedCardIds([], 'SELECT 1')).toEqual([]); + }); +}); + +describe('serializeCard', () => { + const baseCard = { + id: 7, + name: 'Daily orders', + description: 'Orders by day', + type: 'model', + database_id: 42, + collection_id: 5, + archived: false, + result_metadata: [ + { + name: 'order_count', + display_name: 'Count', + base_type: 'type/Integer', + semantic_type: null, + description: null, + fk_target_field_id: null, + }, + ], + } as const; + + it('returns a valid StagedCardFile with resolved SQL and template tags', () => { + const staged = serializeCard({ + card: baseCard as any, + resolvedSql: 'SELECT COUNT(*) AS order_count FROM orders', + templateTags: [], + collectionPath: ['Data', 'Orders'], + resolutionStatus: 'resolved', + }); + expect(staged.metabaseId).toBe(7); + expect(staged.name).toBe('Daily orders'); + expect(staged.collectionPath).toEqual(['Data', 'Orders']); + expect(staged.resolvedSql).toBe('SELECT COUNT(*) AS order_count FROM orders'); + expect(staged.referencedCardIds).toEqual([]); + expect(staged.resultMetadata).toHaveLength(1); + expect(staged.resultMetadata[0].name).toBe('order_count'); + }); + + it('persists resolutionStatus="resolved" when caller passes it', () => { + const staged = serializeCard({ + card: baseCard as any, + resolvedSql: 'SELECT 1', + templateTags: [], + collectionPath: [], + resolutionStatus: 'resolved', + }); + + expect(staged.resolutionStatus).toBe('resolved'); + }); + + it('persists resolutionStatus="fallback" when caller passes it', () => { + const staged = serializeCard({ + card: baseCard as any, + resolvedSql: 'SELECT * FROM {{#101}}', + templateTags: [{ name: 'ref', type: 'card', cardReference: 101 }], + collectionPath: [], + resolutionStatus: 'fallback', + }); + + expect(staged.resolutionStatus).toBe('fallback'); + }); + + it('extracts referencedCardIds from template tags + SQL body', () => { + const staged = serializeCard({ + card: baseCard as any, + resolvedSql: 'SELECT * FROM {{#101}}', + templateTags: [{ name: 'ref', type: 'card', cardReference: 101 }], + collectionPath: [], + resolutionStatus: 'resolved', + }); + expect(staged.referencedCardIds).toEqual([101]); + }); + + it('null description passes through as null, not empty string', () => { + const staged = serializeCard({ + card: { ...baseCard, description: null } as any, + resolvedSql: '', + templateTags: [], + collectionPath: [], + resolutionStatus: 'resolved', + }); + expect(staged.description).toBeNull(); + }); + + it('collectionId=`root` stays as the string literal "root"', () => { + const staged = serializeCard({ + card: { ...baseCard, collection_id: 'root' } as any, + resolvedSql: '', + templateTags: [], + collectionPath: [], + resolutionStatus: 'resolved', + }); + expect(staged.collectionId).toBe('root'); + }); + + it('persists parameters[] from the input card', () => { + const out = serializeCard({ + card: { + id: 1, + name: 'X', + description: null, + type: 'question', + database_id: 6, + collection_id: null, + archived: false, + result_metadata: [], + parameters: [ + { id: 'p1', name: 'auction_end', type: 'date/range', slug: 'auction_end', default: null, sectionId: 'date' }, + { id: 'p2', name: 'status', type: 'category', slug: 'status', default: 'active', sectionId: 'string' }, + ], + } as any, + resolvedSql: 'SELECT 1', + templateTags: [], + collectionPath: [], + resolutionStatus: 'resolved', + }); + expect(out.parameters).toHaveLength(2); + expect(out.parameters?.[0]).toMatchObject({ id: 'p1', name: 'auction_end', type: 'date/range' }); + }); + + it('persists field_ref on each result-metadata column', () => { + const out = serializeCard({ + card: { + id: 1, + name: 'X', + description: null, + type: 'question', + database_id: 6, + collection_id: null, + archived: false, + result_metadata: [ + { + name: 'customer_id', + base_type: 'type/Integer', + semantic_type: 'type/FK', + fk_target_field_id: 42, + field_ref: ['field', 99, null], + }, + ], + } as any, + resolvedSql: 'SELECT customer_id FROM x', + templateTags: [], + collectionPath: [], + resolutionStatus: 'resolved', + }); + expect(out.resultMetadata[0].field_ref).toEqual(['field', 99, null]); + }); + + it('persists lastRunAt and dashboardCount when present on the card', () => { + const out = serializeCard({ + card: { + id: 1, + name: 'X', + description: null, + type: 'question', + database_id: 6, + collection_id: null, + archived: false, + result_metadata: [], + last_run_at: '2026-04-27T10:00:00Z', + dashboard_count: 3, + } as any, + resolvedSql: 'SELECT 1', + templateTags: [], + collectionPath: [], + resolutionStatus: 'resolved', + }); + expect(out.lastRunAt).toBe('2026-04-27T10:00:00Z'); + expect(out.dashboardCount).toBe(3); + }); + + it('omits the new fields gracefully when the card lacks them', () => { + const out = serializeCard({ + card: { + id: 1, + name: 'X', + description: null, + type: 'question', + database_id: 6, + collection_id: null, + archived: false, + result_metadata: [], + } as any, + resolvedSql: 'SELECT 1', + templateTags: [], + collectionPath: [], + resolutionStatus: 'resolved', + }); + expect(out.parameters).toEqual([]); + expect(out.lastRunAt).toBeNull(); + expect(out.dashboardCount).toBeNull(); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/serialize-card.ts b/packages/context/src/ingest/adapters/metabase/serialize-card.ts new file mode 100644 index 00000000..57f1acf9 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/serialize-card.ts @@ -0,0 +1,127 @@ +import type { StagedCardFile, StagedParameter, StagedResultColumn, StagedTemplateTag } from './types.js'; + +const CARD_REF_RE = /\{\{#(\d+)\}\}/g; + +/** + * Input TemplateTag shape mirrors `MetabaseClient.getTemplateTags` output. We keep the + * shape loose — only `name`, `type`, and optional `cardReference`/`default` are needed here. + */ +export interface InputTemplateTag { + name: string; + type: string; + cardReference?: number | null; + defaultValue?: string | null; +} + +export function extractReferencedCardIds(templateTags: InputTemplateTag[], sql: string): number[] { + const ids = new Set(); + for (const tag of templateTags) { + if (tag.type === 'card' && typeof tag.cardReference === 'number') { + ids.add(tag.cardReference); + } + } + for (const match of sql.matchAll(CARD_REF_RE)) { + const n = Number.parseInt(match[1], 10); + if (Number.isFinite(n) && n > 0) { + ids.add(n); + } + } + return [...ids].sort((a, b) => a - b); +} + +/** + * Input card shape — matches the fields `MetabaseClient.getCard()` returns that we + * care about. The adapter reads whatever the client returns; this helper stays + * duck-typed so the client's type can evolve without churn here. + */ +export interface InputCard { + id: number; + name: string; + description?: string | null; + type: string; + database_id: number; + collection_id?: number | 'root' | null; + archived?: boolean; + result_metadata?: Array<{ + name: string; + display_name?: string | null; + base_type: string; + semantic_type?: string | null; + description?: string | null; + fk_target_field_id?: number | null; + field_ref?: unknown[] | null; + }> | null; + parameters?: Array<{ + id: string; + name: string; + type: string; + slug?: string | null; + default?: unknown; + sectionId?: string | null; + }> | null; + last_run_at?: string | null; + dashboard_count?: number | null; +} + +export interface SerializeCardParams { + card: InputCard; + resolvedSql: string; + templateTags: InputTemplateTag[]; + collectionPath: string[]; + resolutionStatus: 'resolved' | 'fallback'; +} + +function toStagedColumn(col: NonNullable[number]): StagedResultColumn { + return { + name: col.name, + display_name: col.display_name ?? null, + base_type: col.base_type, + semantic_type: col.semantic_type ?? null, + description: col.description ?? null, + fk_target_field_id: col.fk_target_field_id ?? null, + field_ref: col.field_ref ?? null, + }; +} + +function toStagedParameter(param: NonNullable[number]): StagedParameter { + return { + id: param.id, + name: param.name, + type: param.type, + slug: param.slug ?? null, + default: param.default ?? null, + sectionId: param.sectionId ?? null, + }; +} + +function toStagedTemplateTag(tag: InputTemplateTag): StagedTemplateTag { + return { + name: tag.name, + type: tag.type, + defaultValue: tag.defaultValue ?? null, + cardReference: tag.cardReference ?? null, + }; +} + +export function serializeCard(params: SerializeCardParams): StagedCardFile { + const { card, resolvedSql, templateTags, collectionPath, resolutionStatus } = params; + const referencedCardIds = extractReferencedCardIds(templateTags, resolvedSql); + return { + metabaseId: card.id, + name: card.name, + description: card.description ?? null, + type: card.type, + databaseId: card.database_id, + collectionId: card.collection_id ?? null, + archived: card.archived ?? false, + resolvedSql, + templateTags: templateTags.map(toStagedTemplateTag), + resultMetadata: (card.result_metadata ?? []).map(toStagedColumn), + collectionPath, + referencedCardIds, + parameters: (card.parameters ?? []).map(toStagedParameter), + lastRunAt: card.last_run_at ?? null, + dashboardCount: card.dashboard_count ?? null, + resolutionStatus, + }; +} diff --git a/packages/context/src/ingest/adapters/metabase/source-state-port.ts b/packages/context/src/ingest/adapters/metabase/source-state-port.ts new file mode 100644 index 00000000..7c872f8d --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/source-state-port.ts @@ -0,0 +1,25 @@ +import type { MetabaseSyncMode } from './types.js'; + +export interface MetabaseSourceStateSelection { + selectionType: 'collection' | 'item'; + metabaseObjectId: number; +} + +export interface MetabaseSourceStateMapping { + metabaseDatabaseId: number; + metabaseDatabaseName: string | null; + metabaseEngine: string | null; + targetConnectionId: string | null; + syncEnabled: boolean; +} + +export interface MetabaseSourceState { + syncMode: MetabaseSyncMode; + selections: MetabaseSourceStateSelection[]; + defaultTagNames: string[]; + mappings: MetabaseSourceStateMapping[]; +} + +export interface MetabaseSourceStateReader { + getSourceState(connectionId: string): Promise; +} diff --git a/packages/context/src/ingest/adapters/metabase/types.test.ts b/packages/context/src/ingest/adapters/metabase/types.test.ts new file mode 100644 index 00000000..4a445d89 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/types.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, it } from 'vitest'; +import { + metabasePullConfigSchema, + parseMetabasePullConfig, + stagedCardFileSchema, + stagedSyncConfigSchema, +} from './types.js'; + +describe('metabase adapter types', () => { + it('parses a valid MetabasePullConfig', () => { + const parsed = parseMetabasePullConfig({ + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + }); + expect(parsed.metabaseConnectionId).toBe('a1b2c3d4-e5f6-4789-9abc-def012345678'); + expect(parsed.metabaseDatabaseId).toBe(42); + }); + + it('parses local-safe Metabase connection IDs for standalone projects', () => { + const parsed = parseMetabasePullConfig({ metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 42 }); + expect(parsed.metabaseConnectionId).toBe('prod-metabase'); + }); + + it('rejects unsafe metabaseConnectionId values', () => { + expect(() => parseMetabasePullConfig({ metabaseConnectionId: '../prod', metabaseDatabaseId: 42 })).toThrow(); + }); + + it('rejects missing metabaseDatabaseId', () => { + const parsed = metabasePullConfigSchema.safeParse({ metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678' }); + expect(parsed.success).toBe(false); + }); + + it('stagedCardFileSchema accepts a minimal card', () => { + const parsed = stagedCardFileSchema.parse({ + metabaseId: 1, + name: 'Orders', + description: null, + type: 'model', + databaseId: 42, + collectionId: 5, + archived: false, + resolvedSql: 'SELECT * FROM orders', + templateTags: [], + resultMetadata: [], + collectionPath: ['Data', 'Orders'], + referencedCardIds: [], + resolutionStatus: 'resolved', + }); + expect(parsed.metabaseId).toBe(1); + expect(parsed.collectionPath).toEqual(['Data', 'Orders']); + }); + + it('stagedSyncConfigSchema accepts selections + mappings snapshot', () => { + const parsed = stagedSyncConfigSchema.parse({ + metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', + metabaseDatabaseId: 42, + syncMode: 'ALL', + selections: [], + defaultTagNames: [], + mapping: { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', + }, + }); + expect(parsed.syncMode).toBe('ALL'); + }); + + it('stagedSyncConfigSchema accepts local-safe connection IDs', () => { + const parsed = stagedSyncConfigSchema.parse({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 42, + syncMode: 'ALL', + selections: [], + defaultTagNames: [], + mapping: { + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId: 'warehouse_a', + }, + }); + expect(parsed.metabaseConnectionId).toBe('prod-metabase'); + expect(parsed.mapping.targetConnectionId).toBe('warehouse_a'); + }); +}); diff --git a/packages/context/src/ingest/adapters/metabase/types.ts b/packages/context/src/ingest/adapters/metabase/types.ts new file mode 100644 index 00000000..fc2ecb87 --- /dev/null +++ b/packages/context/src/ingest/adapters/metabase/types.ts @@ -0,0 +1,137 @@ +import { z } from 'zod'; + +export const metabaseSyncModeSchema = z.enum(['ALL', 'ONLY', 'EXCEPT']); +export type MetabaseSyncMode = z.infer; + +export const metabaseLocalConnectionIdSchema = z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/); + +/** + * The lean config the adapter needs at `fetch()` time. Lives in the BullMQ payload's + * `bundleRef.config` when the runner invokes the adapter. Never persisted beyond one + * job — the persisted state (enabled/disabled, auth, scheduling) lives on the + * Metabase connection's `connections.config` JSONB. + */ +export const metabasePullConfigSchema = z.object({ + /** The Metabase connection (source) — the thing being swept. */ + metabaseConnectionId: metabaseLocalConnectionIdSchema, + /** The Metabase-side database id whose cards this bundle pulls (one bundle = one database). */ + metabaseDatabaseId: z.number().int().positive(), +}); + +export type MetabasePullConfig = z.infer; + +export function parseMetabasePullConfig(raw: unknown): MetabasePullConfig { + return metabasePullConfigSchema.parse(raw); +} + +/** A Metabase column from `card.result_metadata`. Mirrors what the LLM consumes today. */ +export const stagedResultColumnSchema = z.object({ + name: z.string(), + display_name: z.string().optional().nullable(), + base_type: z.string(), + semantic_type: z.string().optional().nullable(), + description: z.string().optional().nullable(), + fk_target_field_id: z.number().optional().nullable(), + field_ref: z.array(z.unknown()).optional().nullable(), +}); + +export type StagedResultColumn = z.infer; + +export const stagedParameterSchema = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), + slug: z.string().optional().nullable(), + default: z.unknown().optional().nullable(), + sectionId: z.string().optional().nullable(), +}); + +export type StagedParameter = z.infer; + +/** A template tag pulled from an MBQL card's `dataset_query.stages[0].template-tags`. */ +export const stagedTemplateTagSchema = z.object({ + name: z.string(), + type: z.string(), + defaultValue: z.string().optional().nullable(), + /** If this tag is a saved-question reference (`{{#42}}`), the referenced card id. Null for plain tags. */ + cardReference: z.number().int().positive().optional().nullable(), +}); + +export type StagedTemplateTag = z.infer; + +/** + * A serialized card file, one per `cards/.json`. Hashed content-addressably by + * the runner — key inputs that change cause re-ingest, cosmetic fields do not. + */ +export const stagedCardFileSchema = z.object({ + metabaseId: z.number().int().positive(), + name: z.string(), + description: z.string().nullable(), + type: z.string(), // 'question' | 'model' | 'metric' + databaseId: z.number().int().positive(), + collectionId: z.union([z.number().int(), z.literal('root')]).nullable(), + archived: z.boolean(), + resolvedSql: z.string(), + templateTags: z.array(stagedTemplateTagSchema), + resultMetadata: z.array(stagedResultColumnSchema), + /** Full collection breadcrumb path, e.g. ['Data', 'Orders Team']. `[]` for root cards. */ + collectionPath: z.array(z.string()), + /** Card ids this card references via `{{#N}}` template tags or other saved-question refs. */ + referencedCardIds: z.array(z.number().int().positive()), + parameters: z.array(stagedParameterSchema).default([]), + lastRunAt: z.string().nullable().default(null), + dashboardCount: z.number().int().nullable().default(null), + resolutionStatus: z.enum(['resolved', 'fallback']), +}); + +export type StagedCardFile = z.infer; + +/** A serialized collection file, `collections/.json`. Minimal — path lives on the card. */ +export const stagedCollectionFileSchema = z.object({ + metabaseId: z.union([z.number().int(), z.literal('root')]), + name: z.string(), + parentId: z.union([z.number().int(), z.literal('root')]).nullable(), +}); + +export type StagedCollectionFile = z.infer; + +/** A serialized database-mapping snapshot, `databases/.json`. */ +export const stagedDatabaseFileSchema = z.object({ + metabaseDatabaseId: z.number().int().positive(), + metabaseDatabaseName: z.string(), + metabaseEngine: z.string().nullable(), + targetConnectionId: metabaseLocalConnectionIdSchema, +}); + +export type StagedDatabaseFile = z.infer; + +/** The filter snapshot. Written once per `fetch()` to `sync-config.json`. */ +export const stagedSyncConfigSchema = z.object({ + metabaseConnectionId: metabaseLocalConnectionIdSchema, + metabaseDatabaseId: z.number().int().positive(), + syncMode: metabaseSyncModeSchema, + selections: z.array( + z.object({ + selectionType: z.enum(['collection', 'item']), + metabaseObjectId: z.number().int(), + }), + ), + defaultTagNames: z.array(z.string()), + mapping: z.object({ + metabaseDatabaseId: z.number().int().positive(), + metabaseDatabaseName: z.string(), + metabaseEngine: z.string().nullable(), + targetConnectionId: metabaseLocalConnectionIdSchema, + }), +}); + +export type StagedSyncConfig = z.infer; + +/** Filenames inside stagedDir. Centralized so chunk() + fetch() + detect() all agree. */ +export const STAGED_FILES = { + syncConfig: 'sync-config.json', + cardsDir: 'cards', + collectionsDir: 'collections', + databasesDir: 'databases', + unresolvedCards: 'unresolved-cards.json', +} as const; diff --git a/packages/context/src/ingest/adapters/metricflow/chunk.test.ts b/packages/context/src/ingest/adapters/metricflow/chunk.test.ts new file mode 100644 index 00000000..88062fb3 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/chunk.test.ts @@ -0,0 +1,124 @@ +import { join, resolve } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { chunkMetricFlowProject } from './chunk.js'; +import { parseMetricFlowStagedDir } from './parse.js'; + +const FIXTURES = resolve(__dirname, '../../../../test/fixtures/metricflow'); +const SINGLE = join(FIXTURES, 'single-model'); +const EXTENDS_CHAIN = join(FIXTURES, 'extends-chain'); +const MULTI = join(FIXTURES, 'multi-component'); +const DBT_MIXED = join(FIXTURES, 'dbt-mixed'); + +describe('chunkMetricFlowProject — first run', () => { + it('single-model fixture emits one WU with the orders model + its metric file (collapsed via metric refs)', async () => { + const project = await parseMetricFlowStagedDir(SINGLE); + const result = chunkMetricFlowProject(project); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.unitKey).toBe('metricflow-orders'); + expect(wu.rawFiles).toEqual(['models/orders.yml']); + expect(wu.dependencyPaths).toEqual([]); + expect(wu.peerFileIndex).toEqual([]); + }); + + it('extends-chain fixture collapses orders + orders_ext + metrics/orders_final into ONE WU', async () => { + const project = await parseMetricFlowStagedDir(EXTENDS_CHAIN); + const result = chunkMetricFlowProject(project); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.unitKey).toBe('metricflow-orders'); + expect(wu.rawFiles.sort()).toEqual(['metrics/orders_final.yml', 'models/orders.yml', 'models/orders_ext.yml']); + expect(wu.notes).toContain('orders'); + expect(wu.notes).toContain('orders_ext'); + expect(wu.notes).toContain('revenue'); + }); + + it('multi-component fixture emits two disjoint WUs ordered by leadName', async () => { + const project = await parseMetricFlowStagedDir(MULTI); + const result = chunkMetricFlowProject(project); + expect(result.workUnits).toHaveLength(2); + expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['metricflow-campaigns', 'metricflow-orders']); + expect(result.workUnits[0].rawFiles).toEqual(['models/marketing/campaigns.yml']); + expect(result.workUnits[0].peerFileIndex).toEqual(['models/sales/orders.yml']); + expect(result.workUnits[1].rawFiles).toEqual(['models/sales/orders.yml']); + expect(result.workUnits[1].peerFileIndex).toEqual(['models/marketing/campaigns.yml']); + }); + + it('dbt-mixed fixture: non-MetricFlow YAML (dbt_project.yml) lands in peerFileIndex, not in any WU', async () => { + const project = await parseMetricFlowStagedDir(DBT_MIXED); + const result = chunkMetricFlowProject(project); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0].rawFiles).toEqual(['models/orders.yml']); + expect(result.workUnits[0].peerFileIndex).toEqual(['dbt_project.yml']); + }); + + it('chunk is deterministic: two identical invocations return structurally-equal WUs', async () => { + const p1 = await parseMetricFlowStagedDir(EXTENDS_CHAIN); + const p2 = await parseMetricFlowStagedDir(EXTENDS_CHAIN); + const r1 = chunkMetricFlowProject(p1); + const r2 = chunkMetricFlowProject(p2); + expect(JSON.stringify(r1)).toBe(JSON.stringify(r2)); + }); + + it('DiffSet re-sync: only WUs with a touched rawFile are kept', async () => { + const project = await parseMetricFlowStagedDir(MULTI); + const result = chunkMetricFlowProject(project, { + diffSet: { + added: [], + modified: ['models/sales/orders.yml'], + deleted: [], + unchanged: ['models/marketing/campaigns.yml'], + }, + }); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0].unitKey).toBe('metricflow-orders'); + expect(result.workUnits[0].rawFiles).toEqual(['models/sales/orders.yml']); + expect(result.workUnits[0].dependencyPaths).toEqual([]); // no unchanged sibling in this component + }); + + it('DiffSet re-sync: unchanged component siblings move from rawFiles into dependencyPaths', async () => { + const project = await parseMetricFlowStagedDir(EXTENDS_CHAIN); + const result = chunkMetricFlowProject(project, { + diffSet: { + added: [], + modified: ['models/orders_ext.yml'], // only the extension file changed + deleted: [], + unchanged: ['models/orders.yml', 'metrics/orders_final.yml'], + }, + }); + expect(result.workUnits).toHaveLength(1); + const wu = result.workUnits[0]; + expect(wu.rawFiles).toEqual(['models/orders_ext.yml']); + expect(wu.dependencyPaths.sort()).toEqual(['metrics/orders_final.yml', 'models/orders.yml']); + }); + + it('DiffSet re-sync: all-unchanged yields zero WUs', async () => { + const project = await parseMetricFlowStagedDir(EXTENDS_CHAIN); + const result = chunkMetricFlowProject(project, { + diffSet: { + added: [], + modified: [], + deleted: [], + unchanged: ['models/orders.yml', 'models/orders_ext.yml', 'metrics/orders_final.yml'], + }, + }); + expect(result.workUnits).toEqual([]); + expect(result.eviction).toBeUndefined(); + }); + + it('DiffSet re-sync: deleted files produce an EvictionUnit', async () => { + const project = await parseMetricFlowStagedDir(MULTI); + const result = chunkMetricFlowProject(project, { + diffSet: { + added: [], + modified: [], + deleted: ['models/marketing/campaigns.yml'], + unchanged: ['models/sales/orders.yml'], + }, + }); + expect(result.workUnits).toEqual([]); + expect(result.eviction).toEqual({ + deletedRawPaths: ['models/marketing/campaigns.yml'], + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/chunk.ts b/packages/context/src/ingest/adapters/metricflow/chunk.ts new file mode 100644 index 00000000..57e2b123 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/chunk.ts @@ -0,0 +1,93 @@ +import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js'; +import { buildMetricFlowGraph, type MetricFlowComponent, type MetricFlowGraph } from './graph.js'; +import type { ParsedMetricFlowProject } from './parse.js'; + +interface ChunkOptions { + diffSet?: DiffSet; +} + +/** + * Emit WorkUnits for a parsed MetricFlow project. + * + * First run (no diffSet): one WU per connected component. rawFiles = all component + * paths, peerFileIndex = everything else in `allPaths`. + * + * Re-sync (diffSet provided): filter to components whose paths intersect added∪modified. + * Move unchanged component paths from rawFiles into + * dependencyPaths (the WU agent still reads them for + * inheritance context, but they're not "changed"). Emit a + * single EvictionUnit for diffSet.deleted. + */ +export function chunkMetricFlowProject(project: ParsedMetricFlowProject, opts: ChunkOptions = {}): ChunkResult { + const graph = buildMetricFlowGraph(project); + const firstRunUnits = emitFirstRunWorkUnits(project, graph); + if (!opts.diffSet) { + return { workUnits: firstRunUnits }; + } + return applyDiffSet(firstRunUnits, graph, opts.diffSet); +} + +function describeComponent(c: MetricFlowComponent): string { + const parts: string[] = []; + if (c.semanticModelNames.length > 0) { + parts.push(`semantic_models: ${c.semanticModelNames.join(', ')}`); + } + if (c.metricNames.length > 0) { + parts.push(`metrics: ${c.metricNames.join(', ')}`); + } + return parts.length > 0 ? `MetricFlow component (${parts.join('; ')})` : 'MetricFlow component (empty)'; +} + +function emitFirstRunWorkUnits(project: ParsedMetricFlowProject, graph: MetricFlowGraph): WorkUnit[] { + const participatingPaths = new Set(graph.components.flatMap((c) => c.paths)); + const nonParticipatingPaths = project.allPaths.filter((p) => !participatingPaths.has(p)).sort(); + const allParticipatingSorted = [...participatingPaths].sort(); + + return graph.components.map((component): WorkUnit => { + const rawFiles = [...component.paths].sort(); + const rawFilesSet = new Set(rawFiles); + const peerFileIndex = [ + ...allParticipatingSorted.filter((p) => !rawFilesSet.has(p)), + ...nonParticipatingPaths, + ].sort(); + return { + unitKey: `metricflow-${component.leadName}`, + displayLabel: `MetricFlow "${component.leadName}"`, + rawFiles, + peerFileIndex, + dependencyPaths: [], + notes: describeComponent(component), + }; + }); +} + +function applyDiffSet(firstRunUnits: WorkUnit[], graph: MetricFlowGraph, diffSet: DiffSet): ChunkResult { + const touched = new Set([...diffSet.added, ...diffSet.modified]); + const kept: WorkUnit[] = []; + + for (const wu of firstRunUnits) { + const anyTouched = wu.rawFiles.some((p) => touched.has(p)); + if (!anyTouched) { + continue; + } + const changedFiles: string[] = []; + const unchangedComponentFiles: string[] = []; + for (const p of wu.rawFiles) { + if (touched.has(p)) { + changedFiles.push(p); + } else { + unchangedComponentFiles.push(p); + } + } + const combinedDeps = new Set([...wu.dependencyPaths, ...unchangedComponentFiles]); + kept.push({ + ...wu, + rawFiles: changedFiles.sort(), + dependencyPaths: [...combinedDeps].sort(), + }); + } + + void graph; // reserved for future widening (e.g. cross-component ancestor paths) + const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined; + return { workUnits: kept, eviction }; +} diff --git a/packages/context/src/ingest/adapters/metricflow/deep-parse.test.ts b/packages/context/src/ingest/adapters/metricflow/deep-parse.test.ts new file mode 100644 index 00000000..8896db68 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/deep-parse.test.ts @@ -0,0 +1,1304 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import { parseMetricflowFiles, translateMetricflowJinjaFilter } from './deep-parse.js'; + +function yaml(strings: TemplateStringsArray, ...values: unknown[]): string { + return String.raw(strings, ...values); +} + +function parseOne(content: string) { + return parseMetricflowFiles([{ content, path: 'test.yml' }]); +} + +describe('parseMetricflowFiles', () => { + beforeEach(() => { + // Keep this hook so the copied tests keep their grouping shape while the parser stays pure. + }); + + // ============ Semantic Model Parsing ============ + + describe('parseFiles — semantic models', () => { + it('extracts name, description, modelRef, and defaultTimeDimension', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + description: All completed orders + model: ref('stg_orders') + defaults: + agg_time_dimension: order_date + dimensions: [] + measures: [] +`, + ); + + expect(result.semanticModels).toHaveLength(1); + const sm = result.semanticModels[0]; + expect(sm.name).toBe('orders'); + expect(sm.description).toBe('All completed orders'); + expect(sm.modelRef).toBe('stg_orders'); + expect(sm.defaultTimeDimension).toBe('order_date'); + }); + + it('extracts modelRef from source()', () => { + const result = parseOne(yaml` +semantic_models: + - name: raw_events + model: source('analytics', 'events') + dimensions: [] + measures: [] +`, + ); + + expect(result.semanticModels[0].modelRef).toBe('events'); + }); + + it('uses raw string when model is not ref() or source()', () => { + const result = parseOne(yaml` +semantic_models: + - name: custom + model: my_table + dimensions: [] + measures: [] +`, + ); + + expect(result.semanticModels[0].modelRef).toBe('my_table'); + }); + + it('sets description to null when missing', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: [] +`, + ); + + expect(result.semanticModels[0].description).toBeNull(); + }); + }); + + // ============ Dimensions ============ + + describe('parseFiles — dimensions', () => { + it('maps categorical to string and time to time', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: + - name: status + type: categorical + description: Order status + - name: created_at + type: time + description: When the order was placed + measures: [] +`, + ); + + const dims = result.semanticModels[0].dimensions; + expect(dims).toHaveLength(2); + expect(dims[0]).toEqual({ + name: 'status', + column: 'status', + type: 'string', + label: 'Status', + description: 'Order status', + }); + expect(dims[1]).toEqual({ + name: 'created_at', + column: 'created_at', + type: 'time', + label: 'Created At', + description: 'When the order was placed', + }); + }); + + it('uses expr as column when provided', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: + - name: order_status + type: categorical + expr: status_code + measures: [] +`, + ); + + expect(result.semanticModels[0].dimensions[0].column).toBe('status_code'); + }); + + it('uses explicit label over auto-generated one', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: + - name: order_status_code + type: categorical + label: Status + measures: [] +`, + ); + + expect(result.semanticModels[0].dimensions[0].label).toBe('Status'); + }); + }); + + // ============ Measures ============ + + describe('parseFiles — measures', () => { + it('maps all standard aggregation types', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total_amount + agg: sum + expr: amount + - name: order_count + agg: count + expr: '1' + - name: unique_customers + agg: count_distinct + expr: customer_id + - name: avg_amount + agg: average + expr: amount + - name: max_amount + agg: max + expr: amount + - name: min_amount + agg: min + expr: amount + - name: median_amount + agg: median + expr: amount +`, + ); + + const measures = result.semanticModels[0].measures; + expect(measures).toHaveLength(7); + expect(measures.map((m) => m.type === 'simple' && m.aggregation)).toEqual([ + 'sum', + 'count', + 'count_distinct', + 'avg', + 'max', + 'min', + 'median', + ]); + }); + + it('maps sum_boolean to sum', () => { + const result = parseOne(yaml` +semantic_models: + - name: users + model: ref('users') + dimensions: [] + measures: + - name: active_users + agg: sum_boolean + expr: is_active +`, + ); + + const m = result.semanticModels[0].measures[0]; + expect(m.type).toBe('simple'); + if (m.type === 'simple') { + expect(m.aggregation).toBe('sum'); + } + }); + + it('maps percentile p50 to median', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: median_delivery_time + agg: percentile + expr: delivery_hours + agg_params: + percentile: 0.5 +`, + ); + + const m = result.semanticModels[0].measures[0]; + expect(m.type).toBe('simple'); + if (m.type === 'simple') { + expect(m.aggregation).toBe('median'); + expect(m.column).toBe('delivery_hours'); + } + }); + + it('maps percentile p95 to none with label', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: p95_delivery_time + agg: percentile + expr: delivery_hours + agg_params: + percentile: 0.95 +`, + ); + + const m = result.semanticModels[0].measures[0]; + expect(m.type).toBe('simple'); + if (m.type === 'simple') { + expect(m.aggregation).toBe('none'); + expect(m.label).toBe('P95 Delivery Time (p95)'); + } + }); + + it('skips unsupported aggregation types', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total_amount + agg: sum + expr: amount + - name: weird_measure + agg: hyperloglog + expr: user_id +`, + ); + + expect(result.semanticModels[0].measures).toHaveLength(1); + expect(result.semanticModels[0].measures[0].name).toBe('total_amount'); + }); + + it('uses measure name as column when expr is missing', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: amount + agg: sum +`, + ); + + const m = result.semanticModels[0].measures[0]; + expect(m.type).toBe('simple'); + if (m.type === 'simple') { + expect(m.column).toBe('amount'); + } + }); + }); + + // ============ Jinja Filter Translation ============ + + describe('translateJinjaFilter', () => { + it('translates Dimension references', () => { + expect(translateMetricflowJinjaFilter("{{ Dimension('orders__status') }} = 'completed'")).toBe( + "status = 'completed'", + ); + }); + + it('translates TimeDimension references', () => { + expect(translateMetricflowJinjaFilter("{{ TimeDimension('orders__created_at', 'day') }} > '2024-01-01'")).toBe( + "created_at > '2024-01-01'", + ); + }); + + it('translates TimeDimension without granularity arg', () => { + expect(translateMetricflowJinjaFilter("{{ TimeDimension('orders__created_at') }} IS NOT NULL")).toBe( + 'created_at IS NOT NULL', + ); + }); + + it('translates Entity references', () => { + expect(translateMetricflowJinjaFilter("{{ Entity('orders__customer_id') }} IS NOT NULL")).toBe( + 'customer_id IS NOT NULL', + ); + }); + + it('translates Metric with array params', () => { + expect(translateMetricflowJinjaFilter("{{ Metric('total_revenue', ['product_category']) }} > 100")).toBe( + 'total_revenue > 100', + ); + }); + + it('translates Metric with object params', () => { + expect(translateMetricflowJinjaFilter("{{ Metric('total_revenue', {'group': true}) }} > 100")).toBe( + 'total_revenue > 100', + ); + }); + + it('translates Metric without params', () => { + expect(translateMetricflowJinjaFilter("{{ Metric('total_revenue') }} > 50")).toBe('total_revenue > 50'); + }); + + it('handles combined filter with multiple Jinja references', () => { + const filter = + "{{ Dimension('orders__status') }} = 'active' AND {{ TimeDimension('orders__created_at', 'day') }} >= '2024-01-01'"; + expect(translateMetricflowJinjaFilter(filter)).toBe("status = 'active' AND created_at >= '2024-01-01'"); + }); + + it('passes through plain SQL unchanged', () => { + expect(translateMetricflowJinjaFilter("status = 'active'")).toBe("status = 'active'"); + }); + }); + + // ============ Entity Relationships ============ + + describe('parseFiles — relationships', () => { + it('creates FK relationship when foreign entity matches primary entity by name', () => { + const result = parseOne(yaml` +semantic_models: + - name: customers + model: ref('dim_customers') + entities: + - name: customer_id + type: primary + expr: id + dimensions: [] + measures: [] + - name: orders + model: ref('fct_orders') + entities: + - name: order_id + type: primary + - name: customer_id + type: foreign + dimensions: [] + measures: [] +`, + ); + + expect(result.relationships).toHaveLength(1); + expect(result.relationships[0]).toEqual({ + fromTable: 'fct_orders', + fromColumn: 'customer_id', + toTable: 'dim_customers', + toColumn: 'id', + }); + }); + + it('uses primary_entity shorthand for FK matching', () => { + const result = parseOne(yaml` +semantic_models: + - name: products + model: ref('dim_products') + primary_entity: product_id + dimensions: [] + measures: [] + - name: order_items + model: ref('fct_order_items') + entities: + - name: item_id + type: primary + - name: product_id + type: foreign + dimensions: [] + measures: [] +`, + ); + + expect(result.relationships).toHaveLength(1); + expect(result.relationships[0]).toEqual({ + fromTable: 'fct_order_items', + fromColumn: 'product_id', + toTable: 'dim_products', + toColumn: 'product_id', + }); + }); + + it('does not create self-referencing relationships', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + entities: + - name: order_id + type: primary + - name: order_id + type: foreign + dimensions: [] + measures: [] +`, + ); + + expect(result.relationships).toHaveLength(0); + }); + + it('deduplicates relationships across models in the same file', () => { + const result = parseOne(yaml` +semantic_models: + - name: customers + model: ref('customers') + entities: + - name: customer_id + type: primary + dimensions: [] + measures: [] + - name: orders + model: ref('orders') + entities: + - name: order_id + type: primary + - name: customer_id + type: foreign + dimensions: [] + measures: [] + - name: returns + model: ref('returns') + entities: + - name: return_id + type: primary + - name: customer_id + type: foreign + dimensions: [] + measures: [] +`, + ); + + // orders→customers and returns→customers (2 unique relationships) + expect(result.relationships).toHaveLength(2); + }); + + it('creates relationships when primary and foreign entities are split across files', () => { + const result = parseMetricflowFiles([ + { + content: yaml` +semantic_models: + - name: salesforce_calls + model: ref('fct_salesforce_calls') + entities: + - name: task_id + type: primary + dimensions: [] + measures: [] +`, + path: 'sem_fct_salesforce_calls.yml', + }, + { + content: yaml` +semantic_models: + - name: daily_flash + model: ref('rpt_daily_flash') + entities: + - name: rpt_daily_flash_uuid + type: primary + - name: task_id + type: foreign + dimensions: [] + measures: [] +`, + path: 'sem_rpt_daily_flash.yml', + }, + ]); + + expect(result.relationships).toHaveLength(1); + expect(result.relationships[0]).toEqual({ + fromTable: 'rpt_daily_flash', + fromColumn: 'task_id', + toTable: 'fct_salesforce_calls', + toColumn: 'task_id', + }); + }); + + it('skips foreign entity with no matching primary', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + entities: + - name: order_id + type: primary + - name: nonexistent_id + type: foreign + dimensions: [] + measures: [] +`, + ); + + expect(result.relationships).toHaveLength(0); + }); + }); + + // ============ Metric Resolution ============ + + describe('parseFiles — metric resolution', () => { + it('absorbs simple metric label/description onto parent measure', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total_revenue + agg: sum + expr: amount +metrics: + - name: revenue + label: Total Revenue + description: Sum of all order amounts + type: simple + type_params: + measure: total_revenue +`, + ); + + const measure = result.semanticModels[0].measures[0]; + expect(measure.label).toBe('Total Revenue'); + expect(measure.description).toBe('Sum of all order amounts'); + }); + + it('handles measure as object with name property', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total_revenue + agg: sum + expr: amount +metrics: + - name: revenue + label: Revenue (Filtered) + type: simple + type_params: + measure: + name: total_revenue + filter: + - "status = 'completed'" +`, + ); + + const measure = result.semanticModels[0].measures[0]; + expect(measure.label).toBe('Revenue (Filtered)'); + }); + + it('applies metric-level filter to measure with Jinja translation', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: order_count + agg: count + expr: '1' +metrics: + - name: completed_orders + type: simple + type_params: + measure: order_count + filter: + - "{{ Dimension('orders__status') }} = 'completed'" +`, + ); + + // Filtered metric creates a new measure; base measure stays clean + expect(result.semanticModels[0].measures).toHaveLength(2); + const baseMeasure = result.semanticModels[0].measures[0] as { filter?: string }; + expect(baseMeasure.filter).toBeUndefined(); + const filteredMeasure = result.semanticModels[0].measures[1] as { name: string; filter?: string }; + expect(filteredMeasure.name).toBe('completed_orders'); + expect(filteredMeasure.filter).toBe("status = 'completed'"); + }); + + it('marks cumulative metrics on the measure', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total_revenue + agg: sum + expr: amount +metrics: + - name: cumulative_revenue + type: cumulative + type_params: + measure: total_revenue +`, + ); + + const measure = result.semanticModels[0].measures[0] as { cumulative?: boolean }; + expect(measure.cumulative).toBe(true); + }); + + it('creates derived measure for single-model derived metric', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total_revenue + agg: sum + expr: amount + - name: order_count + agg: count + expr: '1' +metrics: + - name: metric_revenue + type: simple + type_params: + measure: total_revenue + - name: metric_count + type: simple + type_params: + measure: order_count + - name: avg_order_value + label: Average Order Value + type: derived + type_params: + expr: SAFE_DIVIDE(rev, cnt) + metrics: + - name: metric_revenue + alias: rev + - name: metric_count + alias: cnt +`, + ); + + const measures = result.semanticModels[0].measures; + expect(measures).toHaveLength(3); + const derived = measures[2]; + expect(derived.type).toBe('derived'); + if (derived.type === 'derived') { + expect(derived.name).toBe('avg_order_value'); + expect(derived.label).toBe('Average Order Value'); + expect(derived.expr).toBe('SAFE_DIVIDE(total_revenue, order_count)'); + expect(derived.dependsOn).toEqual(['total_revenue', 'order_count']); + } + }); + + it('auto-generates ratio metric expression from numerator/denominator', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: completed_count + agg: count + expr: '1' + - name: total_count + agg: count + expr: '1' +metrics: + - name: metric_completed + type: simple + type_params: + measure: completed_count + - name: metric_total + type: simple + type_params: + measure: total_count + - name: completion_rate + type: ratio + type_params: + numerator: + name: metric_completed + denominator: + name: metric_total +`, + ); + + const measures = result.semanticModels[0].measures; + const ratio = measures[2]; + expect(ratio.type).toBe('derived'); + if (ratio.type === 'derived') { + expect(ratio.name).toBe('completion_rate'); + expect(ratio.expr).toBe('completed_count / NULLIF(total_count, 0)'); + expect(ratio.dependsOn).toEqual(['completed_count', 'total_count']); + } + }); + + it('skips conversion metrics gracefully', () => { + const result = parseOne(yaml` +semantic_models: + - name: events + model: ref('events') + dimensions: [] + measures: + - name: event_count + agg: count + expr: '1' +metrics: + - name: signup_conversion + type: conversion + type_params: + conversion_type_params: + entity: user_id + base_measure: page_views + conversion_measure: signups + - name: simple_metric + type: simple + type_params: + measure: event_count +`, + ); + + // Conversion metric skipped, simple metric processed + expect(result.crossModelMetrics).toHaveLength(0); + const measures = result.semanticModels[0].measures; + expect(measures).toHaveLength(1); + }); + + it('creates cross-model derived metric when references span models', () => { + const result = parseMetricflowFiles([ + { + content: yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total_revenue + agg: sum + expr: amount +`, + path: 'orders.yml', + }, + { + content: yaml` +semantic_models: + - name: campaigns + model: ref('campaigns') + dimensions: [] + measures: + - name: total_spend + agg: sum + expr: spend +`, + path: 'campaigns.yml', + }, + { + content: yaml` +metrics: + - name: metric_revenue + type: simple + type_params: + measure: total_revenue + - name: metric_spend + type: simple + type_params: + measure: total_spend + - name: roas + label: Return on Ad Spend + description: Revenue per dollar spent + type: derived + type_params: + expr: SAFE_DIVIDE(revenue, spend) + metrics: + - name: metric_revenue + alias: revenue + - name: metric_spend + alias: spend +`, + path: 'metrics.yml', + }, + ]); + + expect(result.crossModelMetrics).toHaveLength(1); + const cm = result.crossModelMetrics[0]; + expect(cm.name).toBe('roas'); + expect(cm.label).toBe('Return on Ad Spend'); + expect(cm.expr).toBe('SAFE_DIVIDE(revenue, spend)'); + expect(cm.dependsOn).toHaveLength(2); + expect(cm.dependsOn[0].metricName).toBe('orders'); + expect(cm.dependsOn[1].metricName).toBe('campaigns'); + }); + + it('resolves derived-of-derived metrics within the same model', () => { + const result = parseOne(yaml` +semantic_models: + - name: financials + model: ref('financials') + dimensions: [] + measures: + - name: gross_revenue + agg: sum + expr: revenue + - name: cost_of_goods + agg: sum + expr: cogs + - name: operating_expenses + agg: sum + expr: opex +metrics: + - name: metric_gross_revenue + type: simple + type_params: + measure: gross_revenue + - name: metric_cogs + type: simple + type_params: + measure: cost_of_goods + - name: metric_opex + type: simple + type_params: + measure: operating_expenses + - name: gross_profit + type: derived + type_params: + expr: rev - cogs + metrics: + - name: metric_gross_revenue + alias: rev + - name: metric_cogs + alias: cogs + - name: net_profit_margin + type: derived + type_params: + expr: SAFE_DIVIDE(gp - opex, gp) + metrics: + - name: gross_profit + alias: gp + - name: metric_opex + alias: opex +`, + ); + + const measures = result.semanticModels[0].measures; + // 3 original + gross_profit derived + net_profit_margin derived-of-derived + expect(measures).toHaveLength(5); + + const netProfit = measures.find((m) => m.name === 'net_profit_margin'); + expect(netProfit).toBeDefined(); + expect(netProfit!.type).toBe('derived'); + if (netProfit!.type === 'derived') { + expect(netProfit!.dependsOn).toContain('gross_profit'); + } + }); + }); + + // ============ Edge Cases ============ + + describe('parseFiles — edge cases', () => { + it('handles empty YAML gracefully', () => { + const result = parseOne(''); + expect(result.semanticModels).toHaveLength(0); + expect(result.crossModelMetrics).toHaveLength(0); + expect(result.relationships).toHaveLength(0); + }); + + it('handles invalid YAML gracefully', () => { + const result = parseOne('{{{{invalid yaml!!!!'); + expect(result.semanticModels).toHaveLength(0); + }); + + it('handles file with only metrics and no semantic models', () => { + const result = parseOne(yaml` +metrics: + - name: orphan_metric + type: simple + type_params: + measure: nonexistent +`, + ); + + expect(result.semanticModels).toHaveLength(0); + // Orphan metric referencing non-existent measure is silently skipped + expect(result.crossModelMetrics).toHaveLength(0); + }); + + it('handles multiple files', () => { + const result = parseMetricflowFiles([ + { + content: yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: + - name: status + type: categorical + measures: + - name: order_count + agg: count + expr: '1' +`, + path: 'orders.yml', + }, + { + content: yaml` +semantic_models: + - name: products + model: ref('products') + dimensions: + - name: category + type: categorical + measures: + - name: product_count + agg: count + expr: '1' +`, + path: 'products.yml', + }, + ]); + + expect(result.semanticModels).toHaveLength(2); + expect(result.semanticModels[0].name).toBe('orders'); + expect(result.semanticModels[1].name).toBe('products'); + }); + + it('returns empty warnings for valid files', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: total + agg: sum + expr: amount +`, + ); + + expect(result.warnings).toHaveLength(0); + }); + + it('handles filter as object with where_filters', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: order_count + agg: count + expr: '1' +metrics: + - name: active_orders + type: simple + type_params: + measure: order_count + filter: + where_filters: + - where_sql_template: "status = 'active'" + - where_sql_template: "amount > 0" +`, + ); + + // Filtered metric creates a new measure + expect(result.semanticModels[0].measures).toHaveLength(2); + const filteredMeasure = result.semanticModels[0].measures[1] as { name: string; filter?: string }; + expect(filteredMeasure.name).toBe('active_orders'); + expect(filteredMeasure.filter).toBe("status = 'active' AND amount > 0"); + }); + + it('creates separate measures for multiple filtered metrics on the same base', () => { + const result = parseOne(yaml` +semantic_models: + - name: intakes + model: ref('intakes') + dimensions: [] + measures: + - name: count_intakes + agg: count + expr: '1' +metrics: + - name: count_first_intakes + label: First Intakes + type: simple + type_params: + measure: count_intakes + filter: + - "is_first_intake = TRUE" + - name: count_new_intakes + label: New Intakes + type: simple + type_params: + measure: count_intakes + filter: + - "new_refill = 'New'" + - name: count_refill_intakes + label: Refill Intakes + type: simple + type_params: + measure: count_intakes + filter: + - "new_refill = 'Refill'" +`, + ); + + const measures = result.semanticModels[0].measures; + // 1 base + 3 filtered + expect(measures).toHaveLength(4); + + // Base measure stays clean + expect(measures[0].name).toBe('count_intakes'); + expect((measures[0] as { filter?: string }).filter).toBeUndefined(); + + // Each filtered metric creates its own measure + expect(measures[1].name).toBe('count_first_intakes'); + expect((measures[1] as { filter?: string }).filter).toBe('is_first_intake = TRUE'); + expect(measures[1].label).toBe('First Intakes'); + + expect(measures[2].name).toBe('count_new_intakes'); + expect((measures[2] as { filter?: string }).filter).toBe("new_refill = 'New'"); + + expect(measures[3].name).toBe('count_refill_intakes'); + expect((measures[3] as { filter?: string }).filter).toBe("new_refill = 'Refill'"); + }); + + it('mixed filtered and unfiltered metrics work together', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: order_count + agg: count + expr: '1' +metrics: + - name: order_count + label: All Orders + type: simple + type_params: + measure: order_count + - name: completed_orders + label: Completed Orders + type: simple + type_params: + measure: order_count + filter: + - "status = 'completed'" +`, + ); + + const measures = result.semanticModels[0].measures; + expect(measures).toHaveLength(2); + + // Unfiltered metric updates base measure label + expect(measures[0].name).toBe('order_count'); + expect(measures[0].label).toBe('All Orders'); + expect((measures[0] as { filter?: string }).filter).toBeUndefined(); + + // Filtered metric creates new measure + expect(measures[1].name).toBe('completed_orders'); + expect(measures[1].label).toBe('Completed Orders'); + expect((measures[1] as { filter?: string }).filter).toBe("status = 'completed'"); + }); + + it('derived metric referencing a filtered metric resolves to the new measure name', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: order_count + agg: count + expr: '1' + - name: order_total + agg: sum + expr: amount +metrics: + - name: all_orders + type: simple + type_params: + measure: order_count + - name: large_orders + type: simple + type_params: + measure: order_count + filter: + - "amount > 100" + - name: pct_large_orders + type: derived + label: "% Large Orders" + type_params: + expr: large_orders / all_orders + metrics: + - name: large_orders + - name: all_orders +`, + ); + + const measures = result.semanticModels[0].measures; + // order_count (base) + order_total (base) + large_orders (filtered) + pct_large_orders (derived) + expect(measures).toHaveLength(4); + + const derived = measures[3] as { name: string; dependsOn: string[]; expr: string }; + expect(derived.name).toBe('pct_large_orders'); + // large_orders resolves to its own name (the new filtered measure) + // all_orders resolves to order_count (unfiltered metric → base measure) + expect(derived.dependsOn).toEqual(['large_orders', 'order_count']); + expect(derived.expr).toBe('large_orders / order_count'); + }); + }); + + // ============ Warnings Collection ============ + + describe('parseFiles — warnings', () => { + it('collects warning for unsupported aggregation type', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: weird_measure + agg: hyperloglog + expr: user_id +`, + ); + + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]).toContain("unsupported aggregation 'hyperloglog'"); + expect(result.warnings[0]).toContain('weird_measure'); + }); + + it('collects warning for skipped conversion metrics', () => { + const result = parseOne(yaml` +semantic_models: + - name: events + model: ref('events') + dimensions: [] + measures: + - name: event_count + agg: count + expr: '1' +metrics: + - name: signup_conversion + type: conversion + type_params: + conversion_type_params: + entity: user_id + base_measure: page_views + conversion_measure: signups +`, + ); + + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]).toContain('conversion metrics are not yet supported'); + expect(result.warnings[0]).toContain('signup_conversion'); + }); + + it('collects warning for non-median percentile', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: p95_time + agg: percentile + expr: delivery_hours + agg_params: + percentile: 0.95 +`, + ); + + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]).toContain("aggregation 'none'"); + expect(result.warnings[0]).toContain('p95'); + }); + + it('collects warning for unparseable YAML', () => { + const result = parseOne('{{{{invalid yaml!!!!'); + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]).toContain('Failed to parse YAML'); + }); + + it('collects multiple warnings from different sources', () => { + const result = parseOne(yaml` +semantic_models: + - name: orders + model: ref('orders') + dimensions: [] + measures: + - name: weird_one + agg: hyperloglog + expr: x + - name: weird_two + agg: custom_agg + expr: y +metrics: + - name: funnel + type: conversion + type_params: + conversion_type_params: + entity: user_id + base_measure: a + conversion_measure: b +`, + ); + + expect(result.warnings).toHaveLength(3); + }); + }); + + // ============ Entity Description Passthrough ============ + + describe('parseFiles — entity description on relationships', () => { + it('passes entity description to relationship', () => { + const result = parseOne(yaml` +semantic_models: + - name: customers + model: ref('dim_customers') + entities: + - name: customer_id + type: primary + expr: id + dimensions: [] + measures: [] + - name: orders + model: ref('fct_orders') + entities: + - name: order_id + type: primary + - name: customer_id + type: foreign + description: Links order to the purchasing customer + dimensions: [] + measures: [] +`, + ); + + expect(result.relationships).toHaveLength(1); + expect(result.relationships[0].description).toBe('Links order to the purchasing customer'); + }); + + it('omits description when entity has no description', () => { + const result = parseOne(yaml` +semantic_models: + - name: customers + model: ref('dim_customers') + entities: + - name: customer_id + type: primary + expr: id + dimensions: [] + measures: [] + - name: orders + model: ref('fct_orders') + entities: + - name: order_id + type: primary + - name: customer_id + type: foreign + dimensions: [] + measures: [] +`, + ); + + expect(result.relationships).toHaveLength(1); + expect(result.relationships[0].description).toBeUndefined(); + }); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/deep-parse.ts b/packages/context/src/ingest/adapters/metricflow/deep-parse.ts new file mode 100644 index 00000000..bb80dd20 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/deep-parse.ts @@ -0,0 +1,700 @@ +import { parse as parseYaml } from 'yaml'; +import { noopLogger, type KloLogger } from '../../../core/index.js'; + +export interface DimensionDefinition { + name: string; + column: string; + type: string; + label?: string; + description?: string; +} + +export interface SimpleMeasureDefinition { + type: 'simple'; + name: string; + column: string; + aggregation: 'sum' | 'count' | 'count_distinct' | 'avg' | 'min' | 'max' | 'median' | 'none'; + label?: string; + description?: string; + filter?: string; + cumulative?: boolean; +} + +export type MeasureDefinition = + | SimpleMeasureDefinition + | { + type: 'derived'; + name: string; + expr: string; + dependsOn?: string[]; + label?: string; + description?: string; + }; + +export interface ParsedMetricflowRelationship { + fromTable: string; + fromColumn: string; + toTable: string; + toColumn: string; + fromSchema?: string; + toSchema?: string; + description?: string; +} + +export interface MetricflowParseOptions { + logger?: KloLogger; +} + +// ============ MetricFlow YAML Interfaces ============ + +interface MetricFlowYaml { + semantic_models?: MetricFlowSemanticModel[]; + metrics?: MetricFlowMetric[]; +} + +interface MetricFlowSemanticModel { + name: string; + description?: string; + model: string; + primary_entity?: string; + entities?: MetricFlowEntity[]; + dimensions?: MetricFlowDimension[]; + measures?: MetricFlowMeasure[]; + defaults?: { agg_time_dimension?: string }; + config?: Record; +} + +interface MetricFlowEntity { + name: string; + type: 'primary' | 'foreign' | 'unique' | 'natural'; + expr?: string; + description?: string; +} + +interface MetricFlowDimension { + name: string; + type: 'categorical' | 'time'; + description?: string; + expr?: string; + label?: string; + type_params?: { + time_granularity?: string; + }; +} + +interface MetricFlowMeasure { + name: string; + agg: string; + expr?: string; + description?: string; + label?: string; + create_metric?: boolean; + non_additive_dimension?: Record; + agg_params?: { + percentile?: number; + use_discrete_percentile?: boolean; + use_approximate_percentile?: boolean; + }; +} + +interface MetricFlowMetricInput { + name: string; + alias?: string; + offset_window?: string; + filter?: string | string[]; +} + +type MetricFlowFilter = string | string[] | { where_filters: Array<{ where_sql_template: string }> }; + +interface MetricFlowMetric { + name: string; + label?: string; + description?: string; + type: 'simple' | 'derived' | 'cumulative' | 'ratio' | 'conversion'; + type_params: { + measure?: string | { name: string; filter?: unknown; alias?: string }; + expr?: string; + metrics?: MetricFlowMetricInput[]; + numerator?: MetricFlowMetricInput; + denominator?: MetricFlowMetricInput; + window?: string; + grain_to_date?: string; + cumulative_type_params?: { + window?: string; + grain_to_date?: string; + period_agg?: string; + }; + conversion_type_params?: { + entity: string; + calculation?: string; + base_measure?: string | { name: string }; + conversion_measure?: string | { name: string }; + window?: string; + }; + }; + filter?: MetricFlowFilter; +} + +// ============ Parse Result Types ============ + +export interface ParsedSemanticModel { + name: string; + description: string | null; + modelRef: string; + dimensions: DimensionDefinition[]; + measures: MeasureDefinition[]; + entities: MetricFlowEntity[]; + defaultTimeDimension: string | null; +} + +export interface ParsedCrossModelMetric { + name: string; + label: string | null; + description: string | null; + type: 'derived'; + expr: string; + dependsOn: Array<{ metricName: string; alias?: string }>; + filter: string | null; +} + +export interface MetricFlowParseResult { + semanticModels: ParsedSemanticModel[]; + crossModelMetrics: ParsedCrossModelMetric[]; + relationships: ParsedMetricflowRelationship[]; + warnings: string[]; +} + +// ============ Aggregation Mapping ============ + +const AGG_MAP: Record = { + sum: 'sum', + sum_boolean: 'sum', + count: 'count', + count_distinct: 'count_distinct', + average: 'avg', + avg: 'avg', + min: 'min', + max: 'max', + median: 'median', +}; + +export function parseMetricflowFiles( + files: Array<{ content: string; path: string }>, + options: MetricflowParseOptions = {}, +): MetricFlowParseResult { + const parser = new MetricflowDeepParser(options.logger ?? noopLogger); + return parser.parseFiles(files); +} + +export function translateMetricflowJinjaFilter(filter: string): string { + return new MetricflowDeepParser(noopLogger).translateJinjaFilter(filter); +} + +class MetricflowDeepParser { + constructor(private readonly logger: KloLogger) {} + + parseFiles(files: Array<{ content: string; path: string }>): MetricFlowParseResult { + this.logger.log(`Parsing ${files.length} files for MetricFlow definitions`); + + const allSemanticModels: ParsedSemanticModel[] = []; + const allMetrics: MetricFlowMetric[] = []; + const allRelationshipModels: MetricFlowSemanticModel[] = []; + const warnings: string[] = []; + + for (const file of files) { + const result = this.parseFile(file.content, file.path, warnings); + allSemanticModels.push(...result.semanticModels); + allMetrics.push(...result.metrics); + allRelationshipModels.push(...result.relationshipModels); + } + + // Build measure→model index for cross-model metric resolution + const measureToModel = this.buildMeasureIndex(allSemanticModels); + + // Absorb simple metrics as labels on existing measures, identify cross-model derived metrics + const crossModelMetrics = this.resolveMetrics(allMetrics, measureToModel, allSemanticModels, warnings); + const relationships = this.deduplicateRelationships(this.extractRelationships(allRelationshipModels)); + + this.logger.log( + `Total: ${allSemanticModels.length} semantic models, ${crossModelMetrics.length} cross-model metrics, ${relationships.length} relationships`, + ); + + return { + semanticModels: allSemanticModels, + crossModelMetrics, + relationships, + warnings, + }; + } + + private parseFile( + yamlContent: string, + filePath: string | undefined, + warnings: string[], + ): { + semanticModels: ParsedSemanticModel[]; + metrics: MetricFlowMetric[]; + relationshipModels: MetricFlowSemanticModel[]; + } { + let yaml: MetricFlowYaml; + try { + yaml = parseYaml(yamlContent) as MetricFlowYaml; + } catch (error) { + const msg = `Failed to parse YAML${filePath ? ` at ${filePath}` : ''}: ${error}`; + this.logger.warn(msg); + warnings.push(msg); + return { semanticModels: [], metrics: [], relationshipModels: [] }; + } + + if (!yaml || typeof yaml !== 'object') { + return { semanticModels: [], metrics: [], relationshipModels: [] }; + } + + const semanticModels = (yaml.semantic_models ?? []).map((sm) => this.parseSemanticModel(sm, warnings)); + const metrics = yaml.metrics ?? []; + + return { semanticModels, metrics, relationshipModels: yaml.semantic_models ?? [] }; + } + + private parseSemanticModel(sm: MetricFlowSemanticModel, warnings: string[]): ParsedSemanticModel { + const dimensions = (sm.dimensions ?? []).map((d) => this.convertDimension(d)); + const measures = (sm.measures ?? []) + .map((m) => this.convertMeasure(m, warnings)) + .filter(Boolean) as MeasureDefinition[]; + + this.logger.debug( + `Parsed semantic model '${sm.name}': ${dimensions.length} dimensions, ${measures.length} measures`, + ); + + return { + name: sm.name, + description: sm.description?.trim() || null, + modelRef: this.extractModelRef(sm.model), + dimensions, + measures, + entities: sm.entities ?? [], + defaultTimeDimension: sm.defaults?.agg_time_dimension ?? null, + }; + } + + private convertDimension(dim: MetricFlowDimension): DimensionDefinition { + const type = dim.type === 'time' ? 'time' : 'string'; + const column = dim.expr ?? dim.name; + + return { + name: dim.name, + column, + type, + label: dim.label ?? this.toTitleCase(dim.name), + description: dim.description?.trim() || undefined, + }; + } + + private convertMeasure(m: MetricFlowMeasure, warnings: string[]): MeasureDefinition | null { + const column = m.expr ?? m.name; + + // Handle percentile: map p50 to median, others to none with inline SQL + if (m.agg === 'percentile') { + const pct = m.agg_params?.percentile ?? 0.5; + if (pct === 0.5) { + return { + type: 'simple' as const, + name: m.name, + column, + aggregation: 'median', + label: m.label ?? this.toTitleCase(m.name), + description: m.description?.trim() || undefined, + }; + } + // Non-median percentile: store as 'none' with the percentile value in description + const pctLabel = `p${Math.round(pct * 100)}`; + warnings.push(`Measure '${m.name}': non-median percentile (${pctLabel}) stored with aggregation 'none'`); + return { + type: 'simple' as const, + name: m.name, + column, + aggregation: 'none', + label: m.label ?? `${this.toTitleCase(m.name)} (${pctLabel})`, + description: m.description?.trim() || `${pctLabel} of ${column}`, + }; + } + + const aggregation = AGG_MAP[m.agg]; + if (!aggregation) { + const msg = `Measure '${m.name}': unsupported aggregation '${m.agg}', skipped`; + this.logger.warn(msg); + warnings.push(msg); + return null; + } + + return { + type: 'simple' as const, + name: m.name, + column, + aggregation, + label: m.label ?? this.toTitleCase(m.name), + description: m.description?.trim() || undefined, + }; + } + + private extractRelationships(semanticModels: MetricFlowSemanticModel[]): ParsedMetricflowRelationship[] { + const relationships: ParsedMetricflowRelationship[] = []; + + // Build a map of primary entity names → (model, column) + const primaryEntities = new Map(); + for (const sm of semanticModels) { + // Handle primary_entity shorthand (top-level field) + if (sm.primary_entity) { + primaryEntities.set(sm.primary_entity, { + model: this.extractModelRef(sm.model), + column: sm.primary_entity, + }); + } + for (const entity of sm.entities ?? []) { + if (entity.type === 'primary' || entity.type === 'unique') { + primaryEntities.set(entity.name, { + model: this.extractModelRef(sm.model), + column: entity.expr ?? entity.name, + }); + } + } + } + + // Match foreign entities to primary entities by name + for (const sm of semanticModels) { + const fromTable = this.extractModelRef(sm.model); + for (const entity of sm.entities ?? []) { + if (entity.type !== 'foreign') { + continue; + } + + const primary = primaryEntities.get(entity.name); + if (!primary || primary.model === fromTable) { + continue; + } + + relationships.push({ + fromTable, + fromColumn: entity.expr ?? entity.name, + toTable: primary.model, + toColumn: primary.column, + description: entity.description?.trim() || undefined, + }); + } + } + + return relationships; + } + + private buildMeasureIndex(models: ParsedSemanticModel[]): Map { + const index = new Map(); + for (const model of models) { + for (const measure of model.measures) { + index.set(measure.name, model.name); + } + } + return index; + } + + /** + * Extract measure name from type_params.measure which can be a string or { name: string }. + */ + private extractMeasureName(measure: string | { name: string } | undefined): string | undefined { + if (!measure) { + return undefined; + } + if (typeof measure === 'string') { + return measure; + } + return measure.name; + } + + /** + * Normalize metric filter to an array of strings. + * MetricFlow filters can be a string, array of strings, or { where_filters: [{ where_sql_template }] }. + */ + private normalizeFilter(filter: MetricFlowFilter | undefined): string[] { + if (!filter) { + return []; + } + if (typeof filter === 'string') { + return [filter]; + } + if (Array.isArray(filter)) { + return filter; + } + if (filter.where_filters) { + return filter.where_filters.map((f) => f.where_sql_template); + } + return []; + } + + /** + * For ratio metrics, build the referenced metrics list from numerator/denominator. + * For derived metrics, use type_params.metrics directly. + */ + private getReferencedMetrics(metric: MetricFlowMetric): MetricFlowMetricInput[] { + if (metric.type === 'derived') { + return metric.type_params.metrics ?? []; + } + if (metric.type === 'ratio') { + const refs: MetricFlowMetricInput[] = []; + if (metric.type_params.numerator) { + refs.push(metric.type_params.numerator); + } + if (metric.type_params.denominator) { + refs.push(metric.type_params.denominator); + } + return refs; + } + return []; + } + + private resolveMetrics( + metrics: MetricFlowMetric[], + measureToModel: Map, + models: ParsedSemanticModel[], + warnings: string[], + ): ParsedCrossModelMetric[] { + const crossModelMetrics: ParsedCrossModelMetric[] = []; + + // Build metric→model index from simple/cumulative metrics (needed for derived-of-derived resolution) + const metricToModel = new Map(); + for (const metric of metrics) { + if (metric.type === 'simple' || metric.type === 'cumulative') { + const measureName = this.extractMeasureName(metric.type_params.measure); + if (measureName) { + const owner = measureToModel.get(measureName); + if (owner) { + metricToModel.set(metric.name, owner); + } + } + } + } + + // Build metric→measure name index for resolving dependsOn + // For filtered metrics, the new measure will use the metric's name + const metricToMeasureName = new Map(); + for (const metric of metrics) { + if (metric.type === 'simple' || metric.type === 'cumulative') { + const measureName = this.extractMeasureName(metric.type_params.measure); + if (measureName) { + const filterClauses = this.normalizeFilter(metric.filter); + if (filterClauses.length > 0) { + metricToMeasureName.set(metric.name, metric.name); + } else { + metricToMeasureName.set(metric.name, measureName); + } + } + } + } + + for (const metric of metrics) { + if (metric.type === 'conversion') { + this.logger.debug(`Skipping conversion metric '${metric.name}' (not supported)`); + warnings.push(`Metric '${metric.name}': conversion metrics are not yet supported, skipped`); + continue; + } + + if (metric.type === 'simple' || metric.type === 'cumulative') { + const measureName = this.extractMeasureName(metric.type_params.measure); + if (!measureName) { + continue; + } + + const ownerModelName = measureToModel.get(measureName); + if (!ownerModelName) { + continue; + } + + const model = models.find((m) => m.name === ownerModelName); + if (!model) { + continue; + } + + const baseMeasure = model.measures.find((m) => m.name === measureName); + if (!baseMeasure) { + continue; + } + + const filterClauses = this.normalizeFilter(metric.filter); + + if (filterClauses.length > 0 && baseMeasure.type === 'simple') { + // Filtered metric: create a NEW measure (copy of base with metric's identity + filter) + const translatedFilter = filterClauses.map((f) => this.translateJinjaFilter(f)).join(' AND '); + const newMeasure: MeasureDefinition = { + type: 'simple' as const, + name: metric.name, + column: baseMeasure.column, + aggregation: baseMeasure.aggregation, + label: metric.label ?? this.toTitleCase(metric.name), + description: metric.description?.trim() || baseMeasure.description, + filter: translatedFilter, + }; + + if (metric.type === 'cumulative') { + (newMeasure as { cumulative?: boolean }).cumulative = true; + } + + if (metric.name === baseMeasure.name) { + // Same name as base measure: replace in-place to avoid duplicates + const idx = model.measures.indexOf(baseMeasure); + model.measures[idx] = newMeasure; + } else { + model.measures.push(newMeasure); + } + measureToModel.set(metric.name, ownerModelName); + } else { + // Unfiltered metric: update base measure's label/description in-place + if (metric.label) { + (baseMeasure as { label?: string }).label = metric.label; + } + if (metric.description) { + (baseMeasure as { description?: string }).description = metric.description; + } + if (metric.type === 'cumulative' && baseMeasure.type === 'simple') { + (baseMeasure as { cumulative?: boolean }).cumulative = true; + } + } + } else if (metric.type === 'derived' || metric.type === 'ratio') { + const referencedMetrics = this.getReferencedMetrics(metric); + if (referencedMetrics.length === 0) { + continue; + } + + // Find which models own the referenced metrics using metricToModel index + const ownerModels = new Set(); + for (const ref of referencedMetrics) { + const owner = metricToModel.get(ref.name); + if (owner) { + ownerModels.add(owner); + } + } + + if (ownerModels.size <= 1 && ownerModels.size > 0) { + // Single-model derived/ratio metric — add as derived measure to that model + const ownerModelName = [...ownerModels][0]; + const model = models.find((m) => m.name === ownerModelName); + if (!model) { + continue; + } + + const dependsOn = referencedMetrics.map((ref) => metricToMeasureName.get(ref.name) ?? ref.name); + + let expr = metric.type_params.expr ?? ''; + + // For ratio metrics without an explicit expr, generate "numerator / denominator" + if (metric.type === 'ratio' && !metric.type_params.expr) { + const [numName, denName] = dependsOn; + expr = numName && denName ? `${numName} / NULLIF(${denName}, 0)` : dependsOn.join(' / '); + } + + // Replace metric name aliases with actual measure names in expression + for (const ref of referencedMetrics) { + const actualName = metricToMeasureName.get(ref.name) ?? ref.name; + const aliasOrName = ref.alias ?? ref.name; + if (aliasOrName !== actualName) { + expr = expr.replace(new RegExp(`\\b${aliasOrName}\\b`, 'g'), actualName); + } + } + + const derivedMeasure: MeasureDefinition = { + type: 'derived' as const, + name: metric.name, + expr, + dependsOn, + label: metric.label ?? this.toTitleCase(metric.name), + description: metric.description?.trim() || undefined, + }; + + model.measures.push(derivedMeasure); + + // Register this derived metric in metricToModel so derived-of-derived can find it + metricToModel.set(metric.name, ownerModelName); + } else { + // Cross-model or unresolved derived metric + const dependsOn = referencedMetrics.map((ref) => { + const ownerModel = metricToModel.get(ref.name); + return { metricName: ownerModel ?? ref.name, alias: ref.alias }; + }); + + const filterClauses = this.normalizeFilter(metric.filter); + const filter = + filterClauses.length > 0 ? filterClauses.map((f) => this.translateJinjaFilter(f)).join(' AND ') : null; + + crossModelMetrics.push({ + name: metric.name, + label: metric.label ?? null, + description: metric.description?.trim() || null, + type: 'derived', + expr: metric.type_params.expr ?? '', + dependsOn, + filter, + }); + } + } + } + + return crossModelMetrics; + } + + /** + * Translate MetricFlow Jinja filter syntax to raw SQL. + * {{ Dimension('model__column') }} → column + * {{ TimeDimension('model__column', 'day') }} → column + */ + translateJinjaFilter(filter: string): string { + return filter + .replace(/\{\{\s*Dimension\s*\(\s*'([^']+)'\s*\)\s*\}\}/g, (_match, ref: string) => { + const parts = ref.split('__'); + return parts[parts.length - 1]; + }) + .replace(/\{\{\s*TimeDimension\s*\(\s*'([^']+)'\s*(?:,\s*'[^']*'\s*)?\)\s*\}\}/g, (_match, ref: string) => { + const parts = ref.split('__'); + return parts[parts.length - 1]; + }) + .replace(/\{\{\s*Entity\s*\(\s*'([^']+)'\s*\)\s*\}\}/g, (_match, ref: string) => { + const parts = ref.split('__'); + return parts[parts.length - 1]; + }) + .replace(/\{\{\s*Metric\s*\(\s*'([^']+)'\s*(?:,\s*[^)]+)?\)\s*\}\}/g, (_match, metricName: string) => metricName) + .trim(); + } + + /** + * Extract model name from ref('model_name') or source('source', 'table'). + */ + private extractModelRef(modelStr: string): string { + const refMatch = modelStr.match(/ref\s*\(\s*['"]([^'"]+)['"]\s*\)/); + if (refMatch) { + return refMatch[1]; + } + + const sourceMatch = modelStr.match(/source\s*\(\s*['"][^'"]+['"]\s*,\s*['"]([^'"]+)['"]\s*\)/); + if (sourceMatch) { + return sourceMatch[1]; + } + + return modelStr; + } + + private toTitleCase(snakeCase: string): string { + return snakeCase + .split('_') + .filter(Boolean) + .map((w) => w.charAt(0).toUpperCase() + w.slice(1).toLowerCase()) + .join(' '); + } + + private deduplicateRelationships(relationships: ParsedMetricflowRelationship[]): ParsedMetricflowRelationship[] { + const seen = new Set(); + return relationships.filter((rel) => { + const key = `${rel.fromTable}.${rel.fromColumn}->${rel.toTable}.${rel.toColumn}`.toLowerCase(); + if (seen.has(key)) { + return false; + } + seen.add(key); + return true; + }); + } +} diff --git a/packages/context/src/ingest/adapters/metricflow/detect.test.ts b/packages/context/src/ingest/adapters/metricflow/detect.test.ts new file mode 100644 index 00000000..a8df434f --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/detect.test.ts @@ -0,0 +1,51 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { detectMetricFlowStagedDir } from './detect.js'; + +async function touch(stagedDir: string, relPath: string, body = ''): Promise { + const abs = join(stagedDir, relPath); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, body, 'utf-8'); +} + +describe('detectMetricFlowStagedDir', () => { + let stagedDir: string; + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'mf-detect-')); + }); + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('returns true when any YAML has top-level semantic_models:', async () => { + await touch(stagedDir, 'models/a.yml', 'semantic_models:\n - {name: a, model: x, measures: []}\n'); + expect(await detectMetricFlowStagedDir(stagedDir)).toBe(true); + }); + + it('returns true when any YAML has top-level metrics:', async () => { + await touch(stagedDir, 'metrics/m.yaml', 'metrics:\n - {name: m, type: simple, type_params: {measure: x}}\n'); + expect(await detectMetricFlowStagedDir(stagedDir)).toBe(true); + }); + + it('returns false for a directory with only dbt_project.yml', async () => { + await touch(stagedDir, 'dbt_project.yml', 'name: my_proj\nversion: "1.0.0"\n'); + expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false); + }); + + it('returns false for an empty directory', async () => { + expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false); + }); + + it('returns false for only broken YAML', async () => { + await touch(stagedDir, 'broken.yml', '{ not: valid :::'); + expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false); + }); + + it('ignores non-YAML files and returns false when no YAML qualifies', async () => { + await touch(stagedDir, 'readme.md', '# readme'); + await touch(stagedDir, 'script.py', 'print("hi")'); + expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/detect.ts b/packages/context/src/ingest/adapters/metricflow/detect.ts new file mode 100644 index 00000000..8794b427 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/detect.ts @@ -0,0 +1,34 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { parse as parseYaml } from 'yaml'; + +const YAML_EXT_RE = /\.(ya?ml)$/i; + +export async function detectMetricFlowStagedDir(stagedDir: string): Promise { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + for (const entry of entries) { + if (!entry.isFile() || !YAML_EXT_RE.test(entry.name)) { + continue; + } + const abs = join(entry.parentPath, entry.name); + let body: string; + try { + body = await readFile(abs, 'utf-8'); + } catch { + continue; + } + let yaml: unknown; + try { + yaml = parseYaml(body); + } catch { + continue; + } + if (yaml && typeof yaml === 'object') { + const obj = yaml as Record; + if (Array.isArray(obj.semantic_models) || Array.isArray(obj.metrics)) { + return true; + } + } + } + return false; +} diff --git a/packages/context/src/ingest/adapters/metricflow/fetch.test.ts b/packages/context/src/ingest/adapters/metricflow/fetch.test.ts new file mode 100644 index 00000000..70568be2 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/fetch.test.ts @@ -0,0 +1,110 @@ +import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js'; +import { fetchMetricflowRepo } from './fetch.js'; + +async function exists(path: string): Promise { + try { + await access(path); + return true; + } catch { + return false; + } +} + +async function makeRepo(tmpRoot: string, files: Record) { + const fixtureDir = join(tmpRoot, 'fixture-src'); + for (const [path, content] of Object.entries(files)) { + const dest = join(fixtureDir, path); + await mkdir(join(dest, '..'), { recursive: true }); + await writeFile(dest, content, 'utf-8'); + } + return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin')); +} + +describe('fetchMetricflowRepo', () => { + let tmpRoot: string; + + beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'metricflow-fetch-')); + }); + + afterEach(async () => { + await rm(tmpRoot, { recursive: true, force: true }); + }); + + it('clones a dbt repo and stages only YAML files', async () => { + const repo = await makeRepo(tmpRoot, { + 'dbt_project.yml': 'name: analytics\n', + 'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n', + 'models/readme.md': '# not staged\n', + 'macros/util.sql': 'select 1\n', + }); + + const result = await fetchMetricflowRepo({ + config: { + repoUrl: repo.repoUrl, + branch: 'main', + path: null, + authToken: null, + parsedTargetTables: {}, + }, + cacheDir: join(tmpRoot, 'cache'), + stagedDir: join(tmpRoot, 'stage'), + }); + + expect(result.filesCopied).toBe(2); + expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(readFile(join(tmpRoot, 'stage/dbt_project.yml'), 'utf-8')).resolves.toContain('analytics'); + await expect(readFile(join(tmpRoot, 'stage/models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models'); + expect(await exists(join(tmpRoot, 'stage/models/readme.md'))).toBe(false); + expect(await exists(join(tmpRoot, 'stage/macros/util.sql'))).toBe(false); + }); + + it('honors a configured repo subdirectory', async () => { + const repo = await makeRepo(tmpRoot, { + 'warehouse/dbt_project.yml': 'name: warehouse\n', + 'warehouse/models/orders.yaml': 'semantic_models:\n - name: orders\n model: ref("orders")\n', + 'outside/ignored.yml': 'semantic_models:\n - name: ignored\n model: ref("ignored")\n', + }); + + const result = await fetchMetricflowRepo({ + config: { + repoUrl: repo.repoUrl, + branch: 'main', + path: 'warehouse', + authToken: null, + parsedTargetTables: {}, + }, + cacheDir: join(tmpRoot, 'cache'), + stagedDir: join(tmpRoot, 'stage'), + }); + + expect(result.filesCopied).toBe(2); + await expect(readFile(join(tmpRoot, 'stage/models/orders.yaml'), 'utf-8')).resolves.toContain('orders'); + expect(await exists(join(tmpRoot, 'stage/outside/ignored.yml'))).toBe(false); + }); + + it('returns zero files when the configured subdirectory is absent', async () => { + const repo = await makeRepo(tmpRoot, { + 'dbt_project.yml': 'name: analytics\n', + }); + await mkdir(join(tmpRoot, 'stage'), { recursive: true }); + + const result = await fetchMetricflowRepo({ + config: { + repoUrl: repo.repoUrl, + branch: 'main', + path: 'missing', + authToken: null, + parsedTargetTables: {}, + }, + cacheDir: join(tmpRoot, 'cache'), + stagedDir: join(tmpRoot, 'stage'), + }); + + expect(result.filesCopied).toBe(0); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/fetch.ts b/packages/context/src/ingest/adapters/metricflow/fetch.ts new file mode 100644 index 00000000..7f6d5586 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/fetch.ts @@ -0,0 +1,67 @@ +import { access, copyFile, mkdir, readdir } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import { cloneOrPull, sanitizeRepoError } from '../../repo-fetch.js'; +import type { MetricflowPullConfig } from './pull-config.js'; + +export interface FetchMetricflowRepoParams { + config: MetricflowPullConfig; + cacheDir: string; + stagedDir: string; +} + +export interface FetchMetricflowRepoResult { + commitHash: string; + filesCopied: number; +} + +const YAML_EXT_RE = /\.ya?ml$/i; + +export async function fetchMetricflowRepo(params: FetchMetricflowRepoParams): Promise { + const { config, cacheDir, stagedDir } = params; + const branch = config.branch || 'main'; + + try { + const { commitHash } = await cloneOrPull({ + repoUrl: config.repoUrl, + authToken: config.authToken, + cacheDir, + branch, + }); + + const sourceRoot = config.path ? join(cacheDir, config.path) : cacheDir; + const filesCopied = await copyYamlFilesRecursive(sourceRoot, stagedDir); + return { commitHash, filesCopied }; + } catch (err) { + throw new Error(sanitizeRepoError(err, config.authToken)); + } +} + +async function copyYamlFilesRecursive(sourceRoot: string, destRoot: string): Promise { + if (!(await dirExists(sourceRoot))) { + return 0; + } + await mkdir(destRoot, { recursive: true }); + const entries = await readdir(sourceRoot, { withFileTypes: true, recursive: true }); + let copied = 0; + for (const entry of entries) { + if (!entry.isFile() || !YAML_EXT_RE.test(entry.name)) { + continue; + } + const absSrc = join(entry.parentPath, entry.name); + const rel = relative(sourceRoot, absSrc); + const dest = join(destRoot, rel); + await mkdir(join(dest, '..'), { recursive: true }); + await copyFile(absSrc, dest); + copied++; + } + return copied; +} + +async function dirExists(path: string): Promise { + try { + await access(path); + return true; + } catch { + return false; + } +} diff --git a/packages/context/src/ingest/adapters/metricflow/graph.test.ts b/packages/context/src/ingest/adapters/metricflow/graph.test.ts new file mode 100644 index 00000000..93a3a6c6 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/graph.test.ts @@ -0,0 +1,268 @@ +import { describe, expect, it } from 'vitest'; +import { buildMetricFlowGraph } from './graph.js'; +import type { ParsedMetricFlowProject } from './parse.js'; + +function project(parts: Partial): ParsedMetricFlowProject { + return { + semanticModels: parts.semanticModels ?? [], + metrics: parts.metrics ?? [], + allPaths: parts.allPaths ?? [], + files: parts.files ?? [], + }; +} + +describe('buildMetricFlowGraph', () => { + it('puts each standalone semantic_model in its own component', () => { + const graph = buildMetricFlowGraph( + project({ + semanticModels: [ + { + path: 'models/a.yml', + name: 'a', + modelRef: 'a', + extendsFrom: [], + measureNames: ['m1'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + { + path: 'models/b.yml', + name: 'b', + modelRef: 'b', + extendsFrom: [], + measureNames: ['m2'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + ], + allPaths: ['models/a.yml', 'models/b.yml'], + }), + ); + expect(graph.components).toHaveLength(2); + const byPath = new Map(graph.components.flatMap((c) => c.paths.map((p) => [p, c.id]))); + expect(byPath.get('models/a.yml')).not.toBe(byPath.get('models/b.yml')); + }); + + it('unions two files when one semantic_model extends another', () => { + const graph = buildMetricFlowGraph( + project({ + semanticModels: [ + { + path: 'models/orders.yml', + name: 'orders', + modelRef: 'orders', + extendsFrom: [], + measureNames: ['gross_amount'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + { + path: 'models/orders_ext.yml', + name: 'orders_ext', + modelRef: 'orders_ext', + extendsFrom: ['orders'], + measureNames: ['refund_amount'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + ], + allPaths: ['models/orders.yml', 'models/orders_ext.yml'], + }), + ); + expect(graph.components).toHaveLength(1); + expect(graph.components[0].paths.sort()).toEqual(['models/orders.yml', 'models/orders_ext.yml']); + }); + + it('unions a metric-only file with the semantic_model files whose measures it references', () => { + const graph = buildMetricFlowGraph( + project({ + semanticModels: [ + { + path: 'models/orders.yml', + name: 'orders', + modelRef: 'orders', + extendsFrom: [], + measureNames: ['gross_amount'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + { + path: 'models/orders_ext.yml', + name: 'orders_ext', + modelRef: 'orders_ext', + extendsFrom: ['orders'], + measureNames: ['refund_amount'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + ], + metrics: [ + { + path: 'metrics/orders_final.yml', + name: 'revenue', + type: 'derived', + measureRef: null, + dependsOn: ['gross_amount', 'refund_amount'], + }, + ], + allPaths: ['metrics/orders_final.yml', 'models/orders.yml', 'models/orders_ext.yml'], + }), + ); + expect(graph.components).toHaveLength(1); + expect(graph.components[0].paths.sort()).toEqual([ + 'metrics/orders_final.yml', + 'models/orders.yml', + 'models/orders_ext.yml', + ]); + }); + + it('leaves unrelated semantic_models in separate components (two disjoint groups)', () => { + const graph = buildMetricFlowGraph( + project({ + semanticModels: [ + { + path: 'models/sales/orders.yml', + name: 'orders', + modelRef: 'orders', + extendsFrom: [], + measureNames: ['order_count'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + { + path: 'models/marketing/campaigns.yml', + name: 'campaigns', + modelRef: 'campaigns', + extendsFrom: [], + measureNames: ['spend'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + ], + allPaths: ['models/marketing/campaigns.yml', 'models/sales/orders.yml'], + }), + ); + expect(graph.components).toHaveLength(2); + }); + + it('returns components ordered lexicographically by their first-name-member', () => { + const graph = buildMetricFlowGraph( + project({ + semanticModels: [ + { + path: 'models/z.yml', + name: 'z_model', + modelRef: 'z', + extendsFrom: [], + measureNames: ['m'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + { + path: 'models/a.yml', + name: 'a_model', + modelRef: 'a', + extendsFrom: [], + measureNames: ['m'], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + ], + allPaths: ['models/a.yml', 'models/z.yml'], + }), + ); + expect(graph.components.map((c) => c.leadName)).toEqual(['a_model', 'z_model']); + }); + + it('metric that references an unknown measure still anchors its own file as a singleton', () => { + const graph = buildMetricFlowGraph( + project({ + metrics: [ + { path: 'metrics/dangling.yml', name: 'dangling', type: 'simple', measureRef: 'nowhere', dependsOn: [] }, + ], + allPaths: ['metrics/dangling.yml'], + }), + ); + expect(graph.components).toHaveLength(1); + expect(graph.components[0].paths).toEqual(['metrics/dangling.yml']); + expect(graph.components[0].leadName).toBe('dangling'); + }); + + it('transitive extends forms one component across 3 files', () => { + const graph = buildMetricFlowGraph( + project({ + semanticModels: [ + { + path: 'a.yml', + name: 'a', + modelRef: 'a', + extendsFrom: [], + measureNames: [], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + { + path: 'b.yml', + name: 'b', + modelRef: 'b', + extendsFrom: ['a'], + measureNames: [], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + { + path: 'c.yml', + name: 'c', + modelRef: 'c', + extendsFrom: ['b'], + measureNames: [], + dimensionNames: [], + entityNames: [], + primaryEntities: [], + foreignEntities: [], + defaultTimeDimension: null, + }, + ], + allPaths: ['a.yml', 'b.yml', 'c.yml'], + }), + ); + expect(graph.components).toHaveLength(1); + expect(graph.components[0].paths.sort()).toEqual(['a.yml', 'b.yml', 'c.yml']); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/graph.ts b/packages/context/src/ingest/adapters/metricflow/graph.ts new file mode 100644 index 00000000..5453f998 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/graph.ts @@ -0,0 +1,205 @@ +import type { ParsedMetricFlowProject } from './parse.js'; + +export interface MetricFlowComponent { + /** Stable integer id, assigned in lexicographic order of `leadName`. */ + id: number; + /** Sorted list of relative paths making up this component. */ + paths: string[]; + /** Sorted list of semantic_model names in this component. Empty for metric-only components. */ + semanticModelNames: string[]; + /** Sorted list of metric names whose defining file is in this component. */ + metricNames: string[]; + /** Lexicographically-first semantic_model name, or first metric name if none. Drives unitKey. */ + leadName: string; +} + +export interface MetricFlowGraph { + components: MetricFlowComponent[]; + /** Map semantic_model name → containing component id. */ + componentByModelName: Map; + /** Map relative path → containing component id. */ + componentByPath: Map; + /** Map semantic_model name → its declaring path. */ + pathByModelName: Map; + /** Map semantic_model name → sorted transitive extends ancestor names (used for dependency widening in re-sync). */ + extendsAncestorsByModelName: Map; +} + +class UnionFind { + private readonly parent = new Map(); + + add(item: T): void { + if (!this.parent.has(item)) { + this.parent.set(item, item); + } + } + + find(item: T): T { + this.add(item); + let root = item; + while (this.parent.get(root) !== root) { + const next = this.parent.get(root); + if (next === undefined) { + throw new Error('union-find parent missing during root traversal'); + } + root = next; + } + // Path compression — walk again, point each to root. + let cursor = item; + while (this.parent.get(cursor) !== root) { + const next = this.parent.get(cursor); + if (next === undefined) { + throw new Error('union-find parent missing during path compression'); + } + this.parent.set(cursor, root); + cursor = next; + } + return root; + } + + union(a: T, b: T): void { + const ra = this.find(a); + const rb = this.find(b); + if (ra !== rb) { + this.parent.set(ra, rb); + } + } + + roots(): T[] { + return [...this.parent.keys()].filter((k) => this.find(k) === k); + } +} + +function transitiveAncestors(modelName: string, direct: Map, visited = new Set()): string[] { + if (visited.has(modelName)) { + return []; + } + visited.add(modelName); + const parents = direct.get(modelName) ?? []; + const out = new Set(); + for (const parent of parents) { + out.add(parent); + for (const a of transitiveAncestors(parent, direct, visited)) { + out.add(a); + } + } + return [...out].sort(); +} + +export function buildMetricFlowGraph(project: ParsedMetricFlowProject): MetricFlowGraph { + // Index: semantic_model name → path, measure_name → semantic_model_path. + const pathByModelName = new Map(); + const semanticModelPathToName = new Map(); + const measureOwnerPath = new Map(); + for (const sm of project.semanticModels) { + pathByModelName.set(sm.name, sm.path); + semanticModelPathToName.set(sm.path, sm.name); + for (const mName of sm.measureNames) { + if (!measureOwnerPath.has(mName)) { + measureOwnerPath.set(mName, sm.path); + } + } + } + + // Union-find keyed by relative path. Every path that carries at least one semantic_model + // or at least one metric enters the structure; other YAMLs (e.g. `dbt_project.yml`) are + // ignored. + const uf = new UnionFind(); + const participatingPaths = new Set(); + for (const sm of project.semanticModels) { + uf.add(sm.path); + participatingPaths.add(sm.path); + } + for (const m of project.metrics) { + uf.add(m.path); + participatingPaths.add(m.path); + } + + // (a) extends: unions. + for (const sm of project.semanticModels) { + for (const parent of sm.extendsFrom) { + const parentPath = pathByModelName.get(parent); + if (parentPath) { + uf.union(sm.path, parentPath); + } + } + } + + // (b) metric → measure reference unions. For simple/cumulative, union metric file with + // the owner semantic_model file of the referenced measure. For derived/ratio/conversion, + // each referenced metric name ultimately resolves to a measure; we look it up directly + // in the measure index. When a metric's dependsOn item is itself a metric name (derived), + // we don't try to chase the chain — the transitive union still happens because the chained + // metric's own file will also get unioned to the underlying measure's owner. + for (const m of project.metrics) { + const candidates: string[] = []; + if (m.measureRef) { + candidates.push(m.measureRef); + } + candidates.push(...m.dependsOn); + for (const name of candidates) { + const ownerPath = measureOwnerPath.get(name); + if (ownerPath) { + uf.union(m.path, ownerPath); + } + } + } + + // Group participating paths by root. + const groups = new Map(); + for (const path of participatingPaths) { + const root = uf.find(path); + const list = groups.get(root) ?? []; + list.push(path); + groups.set(root, list); + } + + // Build component records. + const components: MetricFlowComponent[] = []; + const componentByPath = new Map(); + const componentByModelName = new Map(); + + // Compute leadName for each raw group before assigning ids so ordering is stable. + const rawComponents = [...groups.values()].map((paths) => { + const sortedPaths = [...paths].sort(); + const smNames = sortedPaths + .map((p) => semanticModelPathToName.get(p)) + .filter((n): n is string => typeof n === 'string') + .sort(); + const metricNames = project.metrics + .filter((m) => sortedPaths.includes(m.path)) + .map((m) => m.name) + .sort(); + const leadName = smNames[0] ?? metricNames[0] ?? sortedPaths[0]; + return { paths: sortedPaths, semanticModelNames: smNames, metricNames, leadName }; + }); + rawComponents.sort((a, b) => a.leadName.localeCompare(b.leadName)); + + rawComponents.forEach((rc, id) => { + components.push({ id, ...rc }); + for (const path of rc.paths) { + componentByPath.set(path, id); + } + for (const name of rc.semanticModelNames) { + componentByModelName.set(name, id); + } + }); + + // Extends ancestor index (used by DiffSet widening). + const directExtends = new Map(); + for (const sm of project.semanticModels) { + directExtends.set(sm.name, [...sm.extendsFrom].sort()); + } + const extendsAncestorsByModelName = new Map(); + for (const sm of project.semanticModels) { + extendsAncestorsByModelName.set(sm.name, transitiveAncestors(sm.name, directExtends)); + } + + return { + components, + componentByModelName, + componentByPath, + pathByModelName, + extendsAncestorsByModelName, + }; +} diff --git a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.test.ts b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.test.ts new file mode 100644 index 00000000..c0d72e35 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.test.ts @@ -0,0 +1,382 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { MetricFlowParseResult } from './deep-parse.js'; +import { importMetricflowSemanticModels } from './import-semantic-models.js'; + +const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join(''); + +function parseResult(): MetricFlowParseResult { + return { + semanticModels: [ + { + name: 'orders', + description: 'Orders', + modelRef: 'orders', + dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }], + measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }], + entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }], + defaultTimeDimension: null, + }, + ], + crossModelMetrics: [ + { + name: 'global_revenue', + label: null, + description: 'Revenue everywhere', + type: 'derived', + expr: 'sum(revenue)', + dependsOn: [{ metricName: 'orders' }], + filter: null, + }, + ], + relationships: [{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }], + warnings: ['parser warning'], + }; +} + +describe('importMetricflowSemanticModels', () => { + it('writes semantic models through a worktree-scoped semantic layer service and returns touched sources', async () => { + const scoped = { + getManifestEntry: vi.fn().mockResolvedValue(null), + isManifestBacked: vi.fn().mockResolvedValue(false), + loadAllSources: vi.fn().mockResolvedValue([]), + loadSource: vi.fn().mockResolvedValue(null), + writeSource: vi.fn().mockResolvedValue({ warnings: [] }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await importMetricflowSemanticModels( + { semanticLayerService }, + { + connectionId: 'warehouse-1', + parseResult: parseResult(), + targetSchema: null, + hostTables: [ + { id: 'orders-table', name: 'orders', catalog: null, db: null, columns: [{ id: 'c1', name: 'customer_id' }] }, + { id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c2', name: 'id' }] }, + ], + workdir: '/tmp/session-worktree', + }, + ); + + expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/session-worktree'); + expect(scoped.writeSource).toHaveBeenCalledTimes(2); + expect(scoped.writeSource).toHaveBeenNthCalledWith( + 1, + 'warehouse-1', + expect.objectContaining({ name: 'orders' }), + 'dbt MetricFlow', + DBT_SYSTEM_EMAIL, + 'dbt MetricFlow sync: create source orders', + { skipValidation: true }, + ); + expect(scoped.writeSource).toHaveBeenNthCalledWith( + 2, + 'warehouse-1', + expect.objectContaining({ name: 'global-revenue' }), + 'dbt MetricFlow', + DBT_SYSTEM_EMAIL, + 'dbt MetricFlow sync: create cross-model source global-revenue', + { skipValidation: true }, + ); + expect(result).toEqual({ + sourcesCreated: 1, + sourcesUpdated: 0, + sourcesSkipped: 0, + crossModelSourcesCreated: 1, + relationshipsImported: 0, + warnings: ['parser warning'], + errors: [], + touchedSources: [ + { connectionId: 'warehouse-1', sourceName: 'global-revenue' }, + { connectionId: 'warehouse-1', sourceName: 'orders' }, + ], + }); + }); + + it('updates count when an existing semantic model source exists', async () => { + const scoped = { + getManifestEntry: vi.fn().mockResolvedValue(null), + isManifestBacked: vi.fn().mockResolvedValue(false), + loadAllSources: vi.fn().mockResolvedValue([]), + loadSource: vi.fn().mockImplementation((connectionId: string, sourceName: string) => + Promise.resolve(sourceName === 'orders' ? { name: 'orders' } : null), + ), + writeSource: vi.fn().mockResolvedValue({ warnings: [] }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await importMetricflowSemanticModels( + { semanticLayerService }, + { + connectionId: 'warehouse-1', + parseResult: { ...parseResult(), crossModelMetrics: [], relationships: [] }, + targetSchema: null, + hostTables: [], + workdir: '/tmp/session-worktree', + }, + ); + + expect(result.sourcesCreated).toBe(0); + expect(result.sourcesUpdated).toBe(1); + expect(result.crossModelSourcesCreated).toBe(0); + }); + + it('keeps domain write failures structured and continues processing', async () => { + const scoped = { + getManifestEntry: vi.fn().mockResolvedValue(null), + isManifestBacked: vi.fn().mockResolvedValue(false), + loadAllSources: vi.fn().mockResolvedValue([]), + loadSource: vi.fn().mockResolvedValue(null), + writeSource: vi.fn().mockRejectedValueOnce(new Error('cannot write orders')).mockResolvedValue({ warnings: [] }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await importMetricflowSemanticModels( + { semanticLayerService }, + { + connectionId: 'warehouse-1', + parseResult: parseResult(), + targetSchema: null, + hostTables: [], + workdir: '/tmp/session-worktree', + }, + ); + + expect(result.sourcesSkipped).toBe(1); + expect(result.crossModelSourcesCreated).toBe(1); + expect(result.errors).toEqual(["Failed to import semantic model 'orders': cannot write orders"]); + expect(result.touchedSources).toEqual([{ connectionId: 'warehouse-1', sourceName: 'global-revenue' }]); + }); + + it('writes manifest-backed semantic models as overlays', async () => { + const manifestOrders = { + name: 'orders', + table: 'analytics.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'string' }, + { name: 'customer_id', type: 'string' }, + ], + joins: [], + measures: [], + descriptions: { db: 'Orders table from scan' }, + }; + const written: Array<{ name: string; table?: string; columns?: unknown[]; joins?: unknown[] }> = []; + const scoped = { + getManifestEntry: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => { + return sourceName === 'orders' ? manifestOrders : null; + }), + isManifestBacked: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => { + return sourceName === 'orders'; + }), + loadAllSources: vi.fn().mockResolvedValue([]), + loadSource: vi.fn().mockResolvedValue(null), + writeSource: vi.fn().mockImplementation(async (_connectionId: string, source: (typeof written)[number]) => { + written.push(source); + return { warnings: [] }; + }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await importMetricflowSemanticModels( + { semanticLayerService }, + { + connectionId: 'warehouse-1', + parseResult: { + ...parseResult(), + semanticModels: [ + parseResult().semanticModels[0], + { + name: 'customers', + description: null, + modelRef: 'customers', + dimensions: [{ name: 'id', column: 'id', type: 'string' }], + measures: [], + entities: [], + defaultTimeDimension: null, + }, + ], + crossModelMetrics: [], + }, + targetSchema: null, + hostTables: [ + { + id: 'orders-table', + name: 'orders', + catalog: null, + db: null, + columns: [ + { id: 'c1', name: 'customer_id' }, + { id: 'c2', name: 'id' }, + ], + }, + { id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c3', name: 'id' }] }, + ], + workdir: '/tmp/session-worktree', + }, + ); + + expect(written[0]).toMatchObject({ + name: 'orders', + joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }], + descriptions: { dbt: 'Orders' }, + }); + expect(written[0]).not.toHaveProperty('table'); + expect(written[0]).not.toHaveProperty('columns'); + expect(result.sourcesUpdated).toBe(1); + expect(result.relationshipsImported).toBe(1); + }); + + it('drops joins whose keys are absent from manifest-backed source columns', async () => { + const scoped = { + getManifestEntry: vi.fn().mockResolvedValue({ + name: 'orders', + table: 'analytics.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + joins: [], + measures: [], + }), + isManifestBacked: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => { + return sourceName === 'orders'; + }), + loadAllSources: vi.fn().mockResolvedValue([]), + loadSource: vi.fn().mockResolvedValue(null), + writeSource: vi.fn().mockResolvedValue({ warnings: [] }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await importMetricflowSemanticModels( + { semanticLayerService }, + { + connectionId: 'warehouse-1', + parseResult: { ...parseResult(), crossModelMetrics: [] }, + targetSchema: null, + hostTables: [ + { id: 'orders-table', name: 'orders', catalog: null, db: null, columns: [{ id: 'c1', name: 'id' }] }, + { id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c2', name: 'id' }] }, + ], + workdir: '/tmp/session-worktree', + }, + ); + + expect(scoped.writeSource).toHaveBeenCalledWith( + 'warehouse-1', + expect.not.objectContaining({ joins: expect.anything() }), + expect.any(String), + expect.any(String), + expect.any(String), + { skipValidation: true }, + ); + expect(result.relationshipsImported).toBe(0); + }); + + it('repairs earlier sources when a later related model fails to write', async () => { + const written: Array<{ name: string; joins?: unknown[] }> = []; + const scoped = { + getManifestEntry: vi.fn().mockResolvedValue(null), + isManifestBacked: vi.fn().mockResolvedValue(false), + loadAllSources: vi.fn().mockResolvedValue([]), + loadSource: vi.fn().mockResolvedValue(null), + writeSource: vi + .fn() + .mockImplementationOnce(async (_connectionId: string, source: (typeof written)[number]) => { + written.push(source); + return { warnings: [] }; + }) + .mockRejectedValueOnce(new Error('disk full')) + .mockImplementation(async (_connectionId: string, source: (typeof written)[number]) => { + written.push(source); + return { warnings: [] }; + }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await importMetricflowSemanticModels( + { semanticLayerService }, + { + connectionId: 'warehouse-1', + parseResult: { + ...parseResult(), + semanticModels: [ + parseResult().semanticModels[0], + { + name: 'customers', + description: null, + modelRef: 'customers', + dimensions: [{ name: 'id', column: 'id', type: 'string' }], + measures: [], + entities: [], + defaultTimeDimension: null, + }, + ], + crossModelMetrics: [], + }, + targetSchema: null, + hostTables: [ + { + id: 'orders-table', + name: 'orders', + catalog: null, + db: null, + columns: [ + { id: 'c1', name: 'customer_id' }, + { id: 'c2', name: 'id' }, + ], + }, + { id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c3', name: 'id' }] }, + ], + workdir: '/tmp/session-worktree', + }, + ); + + expect(result.sourcesCreated).toBe(1); + expect(result.sourcesSkipped).toBe(1); + expect(result.relationshipsImported).toBe(0); + expect(result.errors).toContain("Failed to import semantic model 'customers': disk full"); + expect(written.filter((source) => source.name === 'orders')).toHaveLength(2); + expect(written[written.length - 1]).toMatchObject({ name: 'orders', joins: [] }); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts new file mode 100644 index 00000000..bfdd824f --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts @@ -0,0 +1,293 @@ +import type { SemanticLayerService, SemanticLayerSource } from '../../../sl/index.js'; +import { + addTouchedSlSource, + createTouchedSlSources, + listTouchedSlSources, + type TouchedSlSource, +} from '../../../tools/index.js'; +import type { MetricFlowParseResult } from './deep-parse.js'; +import { + buildMetricflowJoinsForModel, + buildMetricflowSemanticModelSource, + filterValidMetricflowRelationships, + findMatchingMetricflowTable, + getMetricflowAvailableColumnNames, + mapCrossModelMetricToSource, + resolveMetricflowSemanticModelSourceName, + toKebabCaseMetricflowName, + type MetricflowHostTable, + type MetricflowSemanticModelImportContext, +} from './semantic-models.js'; + +export interface MetricFlowImportResult { + sourcesCreated: number; + sourcesUpdated: number; + sourcesSkipped: number; + crossModelSourcesCreated: number; + relationshipsImported: number; + warnings: string[]; + errors: string[]; + touchedSources: TouchedSlSource[]; +} + +export type MetricflowSemanticLayerWriter = Pick< + SemanticLayerService, + 'getManifestEntry' | 'isManifestBacked' | 'loadAllSources' | 'loadSource' | 'writeSource' +>; + +export type MetricflowSemanticLayerService = MetricflowSemanticLayerWriter & { + forWorktree(workdir: string): MetricflowSemanticLayerWriter; +}; + +export interface ImportMetricflowSemanticModelsDeps { + semanticLayerService: MetricflowSemanticLayerService; +} + +const DBT_METRICFLOW_AUTHOR = { + name: 'dbt MetricFlow', + email: ['system@kae', 'lio.dev'].join(''), +} as const; + +export interface ImportMetricflowSemanticModelsInput { + connectionId: string; + parseResult: MetricFlowParseResult; + targetSchema?: string | null; + hostTables: MetricflowHostTable[]; + workdir?: string; +} + +export async function importMetricflowSemanticModels( + deps: ImportMetricflowSemanticModelsDeps, + input: ImportMetricflowSemanticModelsInput, +): Promise { + const semanticLayerService = input.workdir + ? deps.semanticLayerService.forWorktree(input.workdir) + : deps.semanticLayerService; + const warnings = [...input.parseResult.warnings]; + const errors: string[] = []; + const touched = createTouchedSlSources(); + let sourcesCreated = 0; + let sourcesUpdated = 0; + let sourcesSkipped = 0; + let crossModelSourcesCreated = 0; + + const preexistingSourceNames = new Set( + (await semanticLayerService.loadAllSources(input.connectionId)).map((source) => source.name), + ); + const modelContexts: MetricflowSemanticModelImportContext[] = []; + const sourceNameByModelRef = new Map(); + const sourceNameByManifestName = new Map(); + const availableColumnNamesByModelRef = new Map>(); + + for (const semanticModel of input.parseResult.semanticModels) { + const matchedTable = findMatchingMetricflowTable(semanticModel.modelRef, input.hostTables, input.targetSchema); + const sourceName = resolveMetricflowSemanticModelSourceName(semanticModel, matchedTable); + sourceNameByModelRef.set(semanticModel.modelRef, sourceName); + if (matchedTable) { + sourceNameByManifestName.set(matchedTable.name, sourceName); + } + const manifestSource = await resolveManifestSource(semanticLayerService, input.connectionId, sourceName, matchedTable); + const context = { + model: semanticModel, + matchedTable, + sourceName, + manifestSource, + }; + availableColumnNamesByModelRef.set(semanticModel.modelRef, getMetricflowAvailableColumnNames(context)); + modelContexts.push(context); + } + + const validRelationships = filterValidMetricflowRelationships( + input.parseResult.relationships, + availableColumnNamesByModelRef, + ); + const availableTargetModelRefs = new Set( + modelContexts + .filter( + (context) => + preexistingSourceNames.has(context.sourceName) || context.manifestSource?.name === context.sourceName, + ) + .map((context) => context.model.modelRef), + ); + const successfulModelContexts: MetricflowSemanticModelImportContext[] = []; + + for (const context of modelContexts) { + try { + const joins = buildMetricflowJoinsForModel(context.model, validRelationships, sourceNameByModelRef); + const source = buildMetricflowSemanticModelSource(context, joins, sourceNameByManifestName); + const existing = + preexistingSourceNames.has(context.sourceName) || + context.manifestSource?.name === context.sourceName || + Boolean(await semanticLayerService.loadSource(input.connectionId, context.sourceName)); + + await semanticLayerService.writeSource( + input.connectionId, + source as SemanticLayerSource, + DBT_METRICFLOW_AUTHOR.name, + DBT_METRICFLOW_AUTHOR.email, + `dbt MetricFlow sync: ${existing ? 'update' : 'create'} source ${context.sourceName}`, + { skipValidation: true }, + ); + + const legacyWarning = await legacyKebabSourceWarning( + semanticLayerService, + input.connectionId, + context.model.modelRef, + context.sourceName, + ); + if (legacyWarning) { + warnings.push(legacyWarning); + } + + if (existing) { + sourcesUpdated++; + } else { + sourcesCreated++; + } + availableTargetModelRefs.add(context.model.modelRef); + successfulModelContexts.push(context); + addTouchedSlSource(touched, input.connectionId, context.sourceName); + } catch (error) { + errors.push( + `Failed to import semantic model '${context.model.name}': ${error instanceof Error ? error.message : String(error)}`, + ); + sourcesSkipped++; + } + } + + if (successfulModelContexts.length > 0) { + try { + await repairSourcesAfterPartialImportFailures({ + semanticLayerService, + connectionId: input.connectionId, + contexts: successfulModelContexts, + relationships: validRelationships, + sourceNameByModelRef, + sourceNameByManifestName, + availableTargetModelRefs, + touched, + }); + } catch (error) { + errors.push( + `Failed to repair semantic-model joins after partial import: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + const relationshipsImported = successfulModelContexts.reduce((count, context) => { + return ( + count + + buildMetricflowJoinsForModel(context.model, validRelationships, sourceNameByModelRef, availableTargetModelRefs) + .length + ); + }, 0); + + for (const metric of input.parseResult.crossModelMetrics) { + try { + const source = mapCrossModelMetricToSource(metric); + await semanticLayerService.writeSource( + input.connectionId, + source, + DBT_METRICFLOW_AUTHOR.name, + DBT_METRICFLOW_AUTHOR.email, + `dbt MetricFlow sync: create cross-model source ${source.name}`, + { skipValidation: true }, + ); + crossModelSourcesCreated++; + addTouchedSlSource(touched, input.connectionId, source.name); + } catch (error) { + errors.push( + `Failed to import cross-model metric '${metric.name}': ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + return { + sourcesCreated, + sourcesUpdated, + sourcesSkipped, + crossModelSourcesCreated, + relationshipsImported, + warnings, + errors, + touchedSources: listTouchedSlSources(touched), + }; +} + +async function resolveManifestSource( + semanticLayerService: MetricflowSemanticLayerWriter, + connectionId: string, + sourceName: string, + matchedTable: MetricflowHostTable | undefined, +) { + if (await semanticLayerService.isManifestBacked(connectionId, sourceName)) { + return semanticLayerService.getManifestEntry(connectionId, sourceName); + } + + const matchedTableName = matchedTable?.name; + if (!matchedTableName || matchedTableName === sourceName) { + return null; + } + if (await semanticLayerService.isManifestBacked(connectionId, matchedTableName)) { + return semanticLayerService.getManifestEntry(connectionId, matchedTableName); + } + return null; +} + +async function legacyKebabSourceWarning( + semanticLayerService: MetricflowSemanticLayerWriter, + connectionId: string, + modelRef: string, + sourceName: string, +): Promise { + const kebabName = toKebabCaseMetricflowName(modelRef); + if (kebabName === sourceName) { + return null; + } + const legacy = await semanticLayerService.loadSource(connectionId, kebabName); + if (!legacy) { + return null; + } + return ( + `MetricFlow sync: legacy kebab-case source '${kebabName}' still exists alongside the new source ` + + `'${sourceName}' (modelRef '${modelRef}'). Migrate persisted references before deleting the old file.` + ); +} + +async function repairSourcesAfterPartialImportFailures(input: { + semanticLayerService: MetricflowSemanticLayerWriter; + connectionId: string; + contexts: MetricflowSemanticModelImportContext[]; + relationships: Parameters[1]; + sourceNameByModelRef: Map; + sourceNameByManifestName: Map; + availableTargetModelRefs: Set; + touched: ReturnType; +}): Promise { + for (const context of input.contexts) { + const fullJoins = buildMetricflowJoinsForModel(context.model, input.relationships, input.sourceNameByModelRef); + const repairedJoins = buildMetricflowJoinsForModel( + context.model, + input.relationships, + input.sourceNameByModelRef, + input.availableTargetModelRefs, + ); + if (fullJoins.length === repairedJoins.length) { + continue; + } + const repairedSource = buildMetricflowSemanticModelSource( + context, + repairedJoins, + input.sourceNameByManifestName, + ); + await input.semanticLayerService.writeSource( + input.connectionId, + repairedSource as SemanticLayerSource, + DBT_METRICFLOW_AUTHOR.name, + DBT_METRICFLOW_AUTHOR.email, + `dbt MetricFlow sync: repair source ${context.sourceName} after partial import`, + { skipValidation: true }, + ); + addTouchedSlSource(input.touched, input.connectionId, context.sourceName); + } +} diff --git a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts new file mode 100644 index 00000000..af2e409f --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts @@ -0,0 +1,121 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js'; +import type { SourceAdapter } from '../../types.js'; +import { MetricflowSourceAdapter } from './metricflow.adapter.js'; + +function compileOnlyRequiredDepsCheck(): void { + // @ts-expect-error MetricflowSourceAdapter requires an explicit cache home. + new MetricflowSourceAdapter(); +} +void compileOnlyRequiredDepsCheck; + +async function makeRepo(tmpRoot: string, files: Record) { + const fixtureDir = join(tmpRoot, 'fixture-src'); + for (const [path, content] of Object.entries(files)) { + const dest = join(fixtureDir, path); + await mkdir(join(dest, '..'), { recursive: true }); + await writeFile(dest, content, 'utf-8'); + } + return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin')); +} + +describe('MetricflowSourceAdapter', () => { + let tmpRoot: string; + let stagedDir: string; + let adapter: SourceAdapter; + + beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'mf-adapter-')); + stagedDir = join(tmpRoot, 'stage'); + adapter = new MetricflowSourceAdapter({ homeDir: join(tmpRoot, 'cache-home') }); + }); + + afterEach(async () => { + await rm(tmpRoot, { recursive: true, force: true }); + }); + + it('declares the expected source key and skill list', () => { + expect(adapter.source).toBe('metricflow'); + expect(adapter.skillNames).toEqual(['metricflow_ingest']); + }); + + it('detects a staged dir with a semantic_models YAML', async () => { + await mkdir(join(stagedDir, 'models'), { recursive: true }); + await writeFile( + join(stagedDir, 'models/orders.yml'), + 'semantic_models:\n - {name: orders, model: x, measures: [{name: c, agg: count, expr: id}]}\n', + 'utf-8', + ); + expect(await adapter.detect(stagedDir)).toBe(true); + }); + + it('rejects a staged dir with no MetricFlow-shaped YAML', async () => { + await mkdir(stagedDir, { recursive: true }); + await writeFile(join(stagedDir, 'dbt_project.yml'), 'name: proj\n', 'utf-8'); + expect(await adapter.detect(stagedDir)).toBe(false); + }); + + it('chunk: first-run on a minimal single-model dir emits one WU', async () => { + await mkdir(join(stagedDir, 'models'), { recursive: true }); + await writeFile( + join(stagedDir, 'models/orders.yml'), + 'semantic_models:\n - {name: orders, model: x, measures: [{name: c, agg: count, expr: id}]}\n', + 'utf-8', + ); + const result = await adapter.chunk(stagedDir); + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0].unitKey).toBe('metricflow-orders'); + }); + + it('attaches deep parse artifacts to the chunk result', async () => { + await mkdir(stagedDir, { recursive: true }); + await writeFile( + join(stagedDir, 'semantic_models.yml'), + [ + 'semantic_models:', + ' - name: orders', + " model: ref('orders')", + ' dimensions: []', + ' measures:', + ' - name: order_count', + ' agg: count', + " expr: '1'", + ].join('\n'), + ); + + const chunk = await adapter.chunk(stagedDir); + + expect(chunk.parseArtifacts).toMatchObject({ + semanticModels: [{ name: 'orders', modelRef: 'orders' }], + crossModelMetrics: [], + relationships: [], + warnings: [], + }); + }); + + it('fetches repo YAML files into the staged directory using a per-connection cache', async () => { + const repo = await makeRepo(tmpRoot, { + 'dbt_project.yml': 'name: analytics\n', + 'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n', + 'models/readme.md': '# ignored\n', + }); + + await adapter.fetch?.( + { + repoUrl: repo.repoUrl, + branch: 'main', + path: null, + authToken: null, + parsedTargetTables: {}, + }, + stagedDir, + { connectionId: 'warehouse-1', sourceKey: 'metricflow' }, + ); + + await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models'); + expect(await adapter.detect(stagedDir)).toBe(true); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts new file mode 100644 index 00000000..a465a973 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts @@ -0,0 +1,47 @@ +import { join } from 'node:path'; +import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js'; +import { chunkMetricFlowProject } from './chunk.js'; +import { detectMetricFlowStagedDir } from './detect.js'; +import { parseMetricflowFiles, type MetricFlowParseResult } from './deep-parse.js'; +import { fetchMetricflowRepo } from './fetch.js'; +import { parseMetricFlowStagedDir, type ParsedMetricFlowProject } from './parse.js'; +import { parseMetricflowPullConfig } from './pull-config.js'; + +export interface MetricflowSourceAdapterDeps { + homeDir: string; +} + +export class MetricflowSourceAdapter implements SourceAdapter { + readonly source = 'metricflow'; + readonly skillNames: string[] = ['metricflow_ingest']; + + constructor(private readonly deps: MetricflowSourceAdapterDeps) {} + + detect(stagedDir: string): Promise { + return detectMetricFlowStagedDir(stagedDir); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = parseMetricflowPullConfig(pullConfig); + await fetchMetricflowRepo({ + config, + cacheDir: this.resolveCacheDir(ctx.connectionId), + stagedDir, + }); + } + + async chunk(stagedDir: string, diffSet?: DiffSet): Promise { + const project = await parseMetricFlowStagedDir(stagedDir); + const chunk = await chunkMetricFlowProject(project, { diffSet }); + const parseArtifacts = parseMetricflowStagedDirForImport(project); + return { ...chunk, parseArtifacts }; + } + + private resolveCacheDir(connectionId: string): string { + return join(this.deps.homeDir, 'ingest-metricflow-repos', connectionId); + } +} + +function parseMetricflowStagedDirForImport(project: ParsedMetricFlowProject): MetricFlowParseResult { + return parseMetricflowFiles(project.files); +} diff --git a/packages/context/src/ingest/adapters/metricflow/parse.test.ts b/packages/context/src/ingest/adapters/metricflow/parse.test.ts new file mode 100644 index 00000000..72a94472 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/parse.test.ts @@ -0,0 +1,206 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { parseMetricFlowStagedDir } from './parse.js'; + +async function writeFixture(stagedDir: string, relPath: string, body: string): Promise { + const abs = join(stagedDir, relPath); + await mkdir(join(abs, '..'), { recursive: true }); + await writeFile(abs, body, 'utf-8'); +} + +describe('parseMetricFlowStagedDir', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'mf-parse-')); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('extracts one semantic_model with its measures + dimensions + entities', async () => { + await writeFixture( + stagedDir, + 'models/orders.yml', + [ + 'semantic_models:', + ' - name: orders', + ' description: Order fact table.', + " model: ref('orders')", + ' entities:', + ' - name: order_id', + ' type: primary', + ' - name: customer_id', + ' type: foreign', + ' dimensions:', + ' - name: ordered_at', + ' type: time', + ' type_params:', + ' time_granularity: day', + ' measures:', + ' - name: order_count', + ' agg: count', + ' expr: order_id', + ' - name: gross_amount', + ' agg: sum', + ' expr: amount', + '', + ].join('\n'), + ); + const project = await parseMetricFlowStagedDir(stagedDir); + expect(project.semanticModels).toHaveLength(1); + const sm = project.semanticModels[0]; + expect(sm.path).toBe('models/orders.yml'); + expect(sm.name).toBe('orders'); + expect(sm.modelRef).toBe('orders'); + expect(sm.measureNames).toEqual(['gross_amount', 'order_count']); + expect(sm.dimensionNames).toEqual(['ordered_at']); + expect(sm.entityNames).toEqual(['customer_id', 'order_id']); + expect(sm.primaryEntities).toEqual(['order_id']); + expect(sm.foreignEntities).toEqual(['customer_id']); + expect(sm.extendsFrom).toEqual([]); + expect(project.files).toEqual([ + { + path: 'models/orders.yml', + content: expect.stringContaining('semantic_models:'), + }, + ]); + }); + + it('captures `extends:` as a string OR a list', async () => { + await writeFixture( + stagedDir, + 'models/orders.yml', + [ + 'semantic_models:', + ' - name: orders', + " model: ref('orders')", + ' measures:', + ' - {name: order_count, agg: count, expr: order_id}', + '', + ].join('\n'), + ); + await writeFixture( + stagedDir, + 'models/orders_ext_list.yml', + [ + 'semantic_models:', + ' - name: orders_ext_list', + " model: ref('orders_ext')", + ' extends: [orders]', + ' measures:', + ' - {name: refund_amount, agg: sum, expr: refund_amt}', + '', + ].join('\n'), + ); + await writeFixture( + stagedDir, + 'models/orders_ext_str.yml', + [ + 'semantic_models:', + ' - name: orders_ext_str', + " model: ref('orders_ext')", + ' extends: orders', + ' measures:', + ' - {name: refund_amount2, agg: sum, expr: refund_amt2}', + '', + ].join('\n'), + ); + const project = await parseMetricFlowStagedDir(stagedDir); + const list = project.semanticModels.find((sm) => sm.name === 'orders_ext_list'); + const str = project.semanticModels.find((sm) => sm.name === 'orders_ext_str'); + expect(list?.extendsFrom).toEqual(['orders']); + expect(str?.extendsFrom).toEqual(['orders']); + }); + + it('extracts metrics with referenced measures for simple + derived + ratio + cumulative', async () => { + await writeFixture( + stagedDir, + 'metrics/core.yml', + [ + 'metrics:', + ' - name: total_orders', + ' type: simple', + ' type_params:', + ' measure: order_count', + ' - name: revenue', + ' type: derived', + ' type_params:', + ' expr: gross_amount - refund_amount', + ' metrics:', + ' - name: gross_amount', + ' - name: refund_amount', + ' - name: refund_rate', + ' type: ratio', + ' type_params:', + ' numerator: refund_amount', + ' denominator: gross_amount', + ' - name: cum_revenue', + ' type: cumulative', + ' type_params:', + ' measure: gross_amount', + ' window: 7 days', + '', + ].join('\n'), + ); + const project = await parseMetricFlowStagedDir(stagedDir); + expect(project.metrics).toHaveLength(4); + const byName = new Map(project.metrics.map((m) => [m.name, m])); + expect(byName.get('total_orders')?.type).toBe('simple'); + expect(byName.get('total_orders')?.measureRef).toBe('order_count'); + expect(byName.get('revenue')?.type).toBe('derived'); + expect(byName.get('revenue')?.dependsOn.sort()).toEqual(['gross_amount', 'refund_amount']); + expect(byName.get('refund_rate')?.type).toBe('ratio'); + expect(byName.get('refund_rate')?.dependsOn.sort()).toEqual(['gross_amount', 'refund_amount']); + expect(byName.get('cum_revenue')?.type).toBe('cumulative'); + expect(byName.get('cum_revenue')?.measureRef).toBe('gross_amount'); + }); + + it('returns empty arrays for a non-MetricFlow YAML (e.g. dbt_project.yml)', async () => { + await writeFixture(stagedDir, 'dbt_project.yml', 'name: my_proj\nversion: "1.0.0"\n'); + const project = await parseMetricFlowStagedDir(stagedDir); + expect(project.semanticModels).toEqual([]); + expect(project.metrics).toEqual([]); + expect(project.allPaths).toEqual(['dbt_project.yml']); + }); + + it('skips files that are not YAML (or fail to parse) without throwing', async () => { + await writeFixture(stagedDir, 'broken.yml', '{ this is: not valid YAML :::'); + await writeFixture(stagedDir, 'other.txt', 'ignore me'); + const project = await parseMetricFlowStagedDir(stagedDir); + expect(project.semanticModels).toEqual([]); + expect(project.metrics).toEqual([]); + // allPaths includes `.yml` / `.yaml` only, even when unparseable: + expect(project.allPaths).toEqual(['broken.yml']); + }); + + it('allPaths is sorted deterministically', async () => { + await writeFixture(stagedDir, 'models/z.yml', 'semantic_models: []\n'); + await writeFixture(stagedDir, 'models/a.yml', 'semantic_models: []\n'); + await writeFixture(stagedDir, 'metrics/b.yaml', 'metrics: []\n'); + const project = await parseMetricFlowStagedDir(stagedDir); + expect(project.allPaths).toEqual(['metrics/b.yaml', 'models/a.yml', 'models/z.yml']); + }); + + it("extracts modelRef from ref('name') and source('src','table') and literal strings", async () => { + await writeFixture( + stagedDir, + 'models/a.yml', + [ + 'semantic_models:', + ' - {name: a, model: "ref(\'orders\')", measures: [{name: c, agg: count, expr: id}]}', + " - {name: b, model: \"source('raw','orders_raw')\", measures: [{name: c, agg: count, expr: id}]}", + ' - {name: c, model: plain_table, measures: [{name: c, agg: count, expr: id}]}', + '', + ].join('\n'), + ); + const project = await parseMetricFlowStagedDir(stagedDir); + const byName = new Map(project.semanticModels.map((s) => [s.name, s])); + expect(byName.get('a')?.modelRef).toBe('orders'); + expect(byName.get('b')?.modelRef).toBe('orders_raw'); + expect(byName.get('c')?.modelRef).toBe('plain_table'); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/parse.ts b/packages/context/src/ingest/adapters/metricflow/parse.ts new file mode 100644 index 00000000..935858d0 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/parse.ts @@ -0,0 +1,241 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import { parse as parseYaml } from 'yaml'; + +export interface ParsedMetricFlowSemanticModel { + /** Path relative to stagedDir, e.g. "models/orders.yml". */ + path: string; + /** `name:` on the semantic_model. */ + name: string; + /** Best-effort ref name: `ref('x')` → 'x'; `source('s','t')` → 't'; literal → literal. */ + modelRef: string; + /** + * `extends:` parents declared on this semantic_model. MetricFlow does not ship + * `extends:` as a first-class field; this adapter treats any `extends:` that + * appears as a hint from the author that one model inherits from another. + * Empty if absent. + */ + extendsFrom: string[]; + measureNames: string[]; + dimensionNames: string[]; + entityNames: string[]; + primaryEntities: string[]; + foreignEntities: string[]; + defaultTimeDimension: string | null; +} + +export type MetricFlowMetricType = 'simple' | 'derived' | 'cumulative' | 'ratio' | 'conversion'; + +export interface ParsedMetricFlowMetric { + path: string; + name: string; + type: MetricFlowMetricType; + /** For `simple` + `cumulative`. `null` for `derived`/`ratio`/`conversion`. */ + measureRef: string | null; + /** For `derived`/`ratio`/`conversion`: the metric names this metric depends on. */ + dependsOn: string[]; +} + +export interface ParsedMetricFlowProject { + semanticModels: ParsedMetricFlowSemanticModel[]; + metrics: ParsedMetricFlowMetric[]; + /** All `.yml`/`.yaml` paths seen under stagedDir, relative + sorted. */ + allPaths: string[]; + files: Array<{ path: string; content: string }>; +} + +const YAML_EXT_RE = /\.(ya?ml)$/i; + +async function collectYamlFiles(stagedDir: string): Promise { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + const paths: string[] = []; + for (const entry of entries) { + if (!entry.isFile() || !YAML_EXT_RE.test(entry.name)) { + continue; + } + const abs = join(entry.parentPath, entry.name); + paths.push(relative(stagedDir, abs)); + } + paths.sort(); + return paths; +} + +function asStringArray(value: unknown): string[] { + if (Array.isArray(value)) { + return value.filter((v): v is string => typeof v === 'string'); + } + if (typeof value === 'string') { + return [value]; + } + return []; +} + +/** Extract `ref('x')` / `source('s','t')` / literal from a MetricFlow `model:` field. */ +function extractModelRef(modelStr: string): string { + const refMatch = modelStr.match(/ref\s*\(\s*['"]([^'"]+)['"]\s*\)/); + if (refMatch) { + return refMatch[1]; + } + const sourceMatch = modelStr.match(/source\s*\(\s*['"][^'"]+['"]\s*,\s*['"]([^'"]+)['"]\s*\)/); + if (sourceMatch) { + return sourceMatch[1]; + } + return modelStr; +} + +interface RawSemanticModel { + name?: unknown; + model?: unknown; + extends?: unknown; + entities?: Array<{ name?: unknown; type?: unknown }>; + dimensions?: Array<{ name?: unknown }>; + measures?: Array<{ name?: unknown }>; + defaults?: { agg_time_dimension?: unknown }; +} + +interface RawMetric { + name?: unknown; + type?: unknown; + type_params?: { + measure?: unknown; + metrics?: Array<{ name?: unknown }>; + numerator?: unknown; + denominator?: unknown; + conversion_type_params?: { + base_measure?: unknown; + conversion_measure?: unknown; + }; + }; +} + +interface RawYaml { + semantic_models?: RawSemanticModel[]; + metrics?: RawMetric[]; +} + +function extractMeasureFromInput(input: unknown): string | null { + if (typeof input === 'string') { + return input; + } + if (input && typeof input === 'object' && 'name' in input && typeof (input as { name: unknown }).name === 'string') { + return (input as { name: string }).name; + } + return null; +} + +function extractReferencedMetricNames(m: RawMetric): string[] { + const tp = m.type_params ?? {}; + const names: string[] = []; + for (const ref of tp.metrics ?? []) { + if (ref && typeof ref.name === 'string') { + names.push(ref.name); + } + } + const num = extractMeasureFromInput(tp.numerator); + const den = extractMeasureFromInput(tp.denominator); + if (num) { + names.push(num); + } + if (den) { + names.push(den); + } + return [...new Set(names)].sort(); +} + +function parseSemanticModel(sm: RawSemanticModel, path: string): ParsedMetricFlowSemanticModel | null { + if (typeof sm.name !== 'string') { + return null; + } + const entities = (sm.entities ?? []).filter((e) => e && typeof e.name === 'string') as Array<{ + name: string; + type?: unknown; + }>; + const primaryEntities = entities + .filter((e) => e.type === 'primary' || e.type === 'unique') + .map((e) => e.name) + .sort(); + const foreignEntities = entities + .filter((e) => e.type === 'foreign') + .map((e) => e.name) + .sort(); + const entityNames = entities.map((e) => e.name).sort(); + const measureNames = ((sm.measures ?? []).filter((m) => m && typeof m.name === 'string') as Array<{ name: string }>) + .map((m) => m.name) + .sort(); + const dimensionNames = ( + (sm.dimensions ?? []).filter((d) => d && typeof d.name === 'string') as Array<{ name: string }> + ) + .map((d) => d.name) + .sort(); + const modelRef = typeof sm.model === 'string' ? extractModelRef(sm.model) : ''; + const extendsFrom = asStringArray(sm.extends); + const defaultTimeDimension = + typeof sm.defaults?.agg_time_dimension === 'string' ? sm.defaults.agg_time_dimension : null; + + return { + path, + name: sm.name, + modelRef, + extendsFrom, + measureNames, + dimensionNames, + entityNames, + primaryEntities, + foreignEntities, + defaultTimeDimension, + }; +} + +function parseMetric(m: RawMetric, path: string): ParsedMetricFlowMetric | null { + if (typeof m.name !== 'string') { + return null; + } + const typeStr = typeof m.type === 'string' ? m.type : ''; + const ALLOWED: MetricFlowMetricType[] = ['simple', 'derived', 'cumulative', 'ratio', 'conversion']; + if (!ALLOWED.includes(typeStr as MetricFlowMetricType)) { + return null; + } + const type = typeStr as MetricFlowMetricType; + const measureRef = + type === 'simple' || type === 'cumulative' ? extractMeasureFromInput(m.type_params?.measure) : null; + const dependsOn = extractReferencedMetricNames(m); + return { path, name: m.name, type, measureRef, dependsOn }; +} + +export async function parseMetricFlowStagedDir(stagedDir: string): Promise { + const allPaths = await collectYamlFiles(stagedDir); + const semanticModels: ParsedMetricFlowSemanticModel[] = []; + const metrics: ParsedMetricFlowMetric[] = []; + const files: Array<{ path: string; content: string }> = []; + + for (const path of allPaths) { + const body = await readFile(join(stagedDir, path), 'utf-8'); + files.push({ path, content: body }); + let yaml: RawYaml | null; + try { + yaml = parseYaml(body) as RawYaml | null; + } catch { + yaml = null; + } + if (!yaml || typeof yaml !== 'object') { + continue; + } + for (const sm of yaml.semantic_models ?? []) { + const parsed = parseSemanticModel(sm, path); + if (parsed) { + semanticModels.push(parsed); + } + } + for (const m of yaml.metrics ?? []) { + const parsed = parseMetric(m, path); + if (parsed) { + metrics.push(parsed); + } + } + } + + semanticModels.sort((a, b) => a.name.localeCompare(b.name) || a.path.localeCompare(b.path)); + metrics.sort((a, b) => a.name.localeCompare(b.name) || a.path.localeCompare(b.path)); + + return { semanticModels, metrics, allPaths, files }; +} diff --git a/packages/context/src/ingest/adapters/metricflow/pull-config.test.ts b/packages/context/src/ingest/adapters/metricflow/pull-config.test.ts new file mode 100644 index 00000000..5137a4e6 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/pull-config.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, it } from 'vitest'; +import { parseMetricflowPullConfig, pullConfigFromMetricflowIntegration } from './pull-config.js'; + +describe('metricflow pull config', () => { + it('applies defaults for optional git fields', () => { + const parsed = parseMetricflowPullConfig({ + repoUrl: 'https://github.com/acme/analytics.git', + }); + + expect(parsed).toEqual({ + repoUrl: 'https://github.com/acme/analytics.git', + branch: 'main', + path: null, + authToken: null, + parsedTargetTables: {}, + }); + }); + + it('preserves provided branch, path, token, and parsed target tables', () => { + const parsed = parseMetricflowPullConfig({ + repoUrl: 'https://github.com/acme/analytics.git', + branch: 'release', + path: 'dbt', + authToken: 'secret-token', + parsedTargetTables: { + orders: { + catalog: 'warehouse', + schema: 'marts', + name: 'orders', + ok: true, + canonicalTable: 'analytics.marts.orders', + }, + }, + }); + + expect(parsed.branch).toBe('release'); + expect(parsed.path).toBe('dbt'); + expect(parsed.authToken).toBe('secret-token'); + expect(parsed.parsedTargetTables.orders).toMatchObject({ ok: true, name: 'orders' }); + }); + + it('rejects missing repoUrl', () => { + expect(() => parseMetricflowPullConfig({})).toThrow(); + }); + + it('builds pull config from a local metricflow integration block', () => { + expect( + pullConfigFromMetricflowIntegration({ + repoUrl: 'https://github.com/acme/analytics.git', + branch: null, + path: null, + authToken: null, + }), + ).toEqual({ + repoUrl: 'https://github.com/acme/analytics.git', + branch: 'main', + path: null, + authToken: null, + parsedTargetTables: {}, + }); + }); + + it('throws a clear error when the integration block has no repo URL', () => { + expect(() => pullConfigFromMetricflowIntegration({ repoUrl: null })).toThrow( + 'metricflow integration config missing repoUrl', + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/pull-config.ts b/packages/context/src/ingest/adapters/metricflow/pull-config.ts new file mode 100644 index 00000000..ad38b033 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/pull-config.ts @@ -0,0 +1,37 @@ +import { z } from 'zod'; +import { parsedTargetTableSchema } from '../../parsed-target-table.js'; + +export const metricflowPullConfigSchema = z.object({ + repoUrl: z.string().url(), + branch: z.string().default('main'), + path: z.string().nullable().default(null), + authToken: z.string().nullable().default(null), + parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}), +}); + +export type MetricflowPullConfig = z.infer; + +export interface MetricflowIntegrationLike { + repoUrl: string | null; + branch?: string | null; + path?: string | null; + authToken?: string | null; + parsedTargetTables?: Record>; +} + +export function parseMetricflowPullConfig(raw: unknown): MetricflowPullConfig { + return metricflowPullConfigSchema.parse(raw); +} + +export function pullConfigFromMetricflowIntegration(integration: MetricflowIntegrationLike): MetricflowPullConfig { + if (!integration.repoUrl) { + throw new Error('metricflow integration config missing repoUrl'); + } + return parseMetricflowPullConfig({ + repoUrl: integration.repoUrl, + branch: integration.branch ?? 'main', + path: integration.path ?? null, + authToken: integration.authToken ?? null, + parsedTargetTables: integration.parsedTargetTables ?? {}, + }); +} diff --git a/packages/context/src/ingest/adapters/metricflow/semantic-models.test.ts b/packages/context/src/ingest/adapters/metricflow/semantic-models.test.ts new file mode 100644 index 00000000..9ad88563 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/semantic-models.test.ts @@ -0,0 +1,258 @@ +import { describe, expect, it } from 'vitest'; +import { composeOverlay, type SemanticLayerSource } from '../../../sl/index.js'; +import type { ParsedCrossModelMetric, ParsedMetricflowRelationship, ParsedSemanticModel } from './deep-parse.js'; +import { + buildMetricflowColumns, + buildMetricflowJoinsForModel, + buildMetricflowSemanticModelSource, + countImportableMetricflowRelationships, + findMatchingMetricflowTable, + mapCrossModelMetricToSource, + mapSemanticModelToSource, + resolveMetricflowSemanticModelSourceName, + rewriteMetricflowManifestJoins, + toKebabCaseMetricflowName, +} from './semantic-models.js'; + +const ordersModel: ParsedSemanticModel = { + name: 'orders', + description: 'Order facts', + modelRef: 'fct_orders', + dimensions: [ + { name: 'status', column: 'status', type: 'string', label: 'Status', description: 'Order status' }, + { name: 'ordered_at', column: 'ordered_at', type: 'time', label: 'Ordered At' }, + ], + measures: [ + { + type: 'simple', + name: 'total_revenue', + column: 'amount', + aggregation: 'sum', + label: 'Total Revenue', + description: 'Revenue', + filter: "status = 'completed'", + }, + { + type: 'derived', + name: 'average_revenue', + expr: 'total_revenue / NULLIF(order_count, 0)', + dependsOn: ['total_revenue', 'order_count'], + }, + ], + entities: [], + defaultTimeDimension: 'ordered_at', +}; + +describe('metricflow semantic model mapping', () => { + it('normalizes source names the same way the server importer did', () => { + expect(toKebabCaseMetricflowName('Fct Orders!')).toBe('fct-orders'); + }); + + it('maps a parsed semantic model to a SemanticLayerSource', () => { + expect(mapSemanticModelToSource(ordersModel, 'analytics.orders')).toEqual({ + name: 'fct-orders', + table: 'analytics.orders', + grain: ['status', 'ordered_at'], + columns: [ + { name: 'status', type: 'string', description: 'Order status' }, + { name: 'ordered_at', type: 'time' }, + ], + measures: [ + { + name: 'total_revenue', + expr: 'sum(amount)', + description: 'Revenue', + filter: "status = 'completed'", + }, + { + name: 'average_revenue', + expr: 'total_revenue / NULLIF(order_count, 0)', + }, + ], + joins: [], + descriptions: { dbt: 'Order facts' }, + }); + }); + + it('maps a cross-model metric to a SQL standalone source', () => { + const metric: ParsedCrossModelMetric = { + name: 'roas', + label: 'ROAS', + description: 'Return on ad spend', + type: 'derived', + expr: 'revenue / spend', + dependsOn: [ + { metricName: 'orders', alias: 'revenue' }, + { metricName: 'campaigns', alias: 'spend' }, + ], + filter: "channel = 'paid'", + }; + + expect(mapCrossModelMetricToSource(metric)).toEqual({ + name: 'roas', + sql: 'revenue / spend', + descriptions: { dbt: 'Return on ad spend' }, + grain: [], + columns: [], + measures: [ + { + name: 'roas', + expr: 'revenue / spend', + description: 'Return on ad spend', + filter: "channel = 'paid'", + }, + ], + joins: [], + }); + }); + + it('finds matching tables using target schema, exact name, dotted suffix, and underscore suffix', () => { + const tables = [ + { id: '1', name: 'fct_orders', catalog: null, db: 'analytics', columns: [] }, + { id: '2', name: 'warehouse.marts.fct_orders', catalog: null, db: 'marts', columns: [] }, + { id: '3', name: 'warehouse_fct_customers', catalog: null, db: null, columns: [] }, + ]; + + expect(findMatchingMetricflowTable('fct_orders', tables, 'analytics')?.id).toBe('1'); + expect(findMatchingMetricflowTable('fct_orders', [tables[1]], null)?.id).toBe('2'); + expect(findMatchingMetricflowTable('fct_customers', [tables[2]], null)?.id).toBe('3'); + expect(findMatchingMetricflowTable('missing', tables, null)).toBeUndefined(); + }); + + it('counts only relationships whose tables and columns exist', () => { + const relationships: ParsedMetricflowRelationship[] = [ + { fromTable: 'orders', fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }, + { fromTable: 'orders', fromColumn: 'missing', toTable: 'customers', toColumn: 'id' }, + { fromTable: 'orders', fromColumn: 'customer_id', toTable: 'missing_table', toColumn: 'id' }, + ]; + const tables = [ + { id: '1', name: 'orders', catalog: null, db: null, columns: [{ id: 'c1', name: 'customer_id' }] }, + { id: '2', name: 'customers', catalog: null, db: null, columns: [{ id: 'c2', name: 'id' }] }, + ]; + + expect(countImportableMetricflowRelationships(relationships, tables)).toBe(1); + }); + + it('resolves semantic-model source names to lowercase snake_case identifiers', () => { + expect( + resolveMetricflowSemanticModelSourceName(ordersModel, { + id: '1', + name: 'ANALYTICS.Fct Orders', + catalog: null, + db: 'analytics', + columns: [], + }), + ).toBe('fct_orders'); + expect(resolveMetricflowSemanticModelSourceName({ ...ordersModel, modelRef: 'fallback_model' }, undefined)).toBe( + 'fallback_model', + ); + }); + + it('materializes entity join keys as hidden standalone columns', () => { + expect( + buildMetricflowColumns({ + ...ordersModel, + entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id', description: 'FK to customers' }], + }), + ).toContainEqual({ name: 'customer_id', type: 'string', visibility: 'hidden', description: 'FK to customers' }); + }); + + it('builds standalone sources with semantic-model joins', () => { + const orders: ParsedSemanticModel = { + ...ordersModel, + modelRef: 'orders', + entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }], + }; + const customers: ParsedSemanticModel = { + ...ordersModel, + name: 'customers', + modelRef: 'customers', + dimensions: [{ name: 'id', column: 'id', type: 'string' }], + measures: [], + entities: [], + }; + const sourceNameByModelRef = new Map([ + [orders.modelRef, 'orders'], + [customers.modelRef, 'customers'], + ]); + const joins = buildMetricflowJoinsForModel( + orders, + [{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }], + sourceNameByModelRef, + ); + + expect( + buildMetricflowSemanticModelSource( + { + model: orders, + matchedTable: undefined, + sourceName: 'orders', + manifestSource: null, + }, + joins, + new Map(), + ), + ).toMatchObject({ + name: 'orders', + table: 'orders', + joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }], + }); + }); + + it('builds overlays for exact manifest matches so scanned columns remain manifest-owned', () => { + const manifestSource: SemanticLayerSource = { + name: 'orders', + table: 'analytics.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'string' }, + { name: 'customer_id', type: 'string' }, + ], + joins: [], + measures: [], + descriptions: { db: 'Orders table from scan' }, + }; + const overlay = buildMetricflowSemanticModelSource( + { + model: { ...ordersModel, modelRef: 'orders', description: 'dbt-described orders' }, + matchedTable: undefined, + sourceName: 'orders', + manifestSource, + }, + [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }], + new Map(), + ); + + expect(overlay).not.toHaveProperty('table'); + expect(overlay).not.toHaveProperty('grain'); + expect(overlay).not.toHaveProperty('columns'); + expect(overlay).toMatchObject({ + name: 'orders', + joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }], + descriptions: { dbt: 'dbt-described orders' }, + }); + + const composed = composeOverlay(manifestSource, overlay); + expect(composed.columns.map((column) => column.name)).toEqual(['id', 'customer_id']); + expect(composed.joins).toHaveLength(1); + expect(composed.descriptions).toEqual({ db: 'Orders table from scan', dbt: 'dbt-described orders' }); + }); + + it('rewrites preserved manifest joins to synced bare source names', () => { + expect( + rewriteMetricflowManifestJoins( + [ + { + to: 'analytics.customers', + on: 'analytics.orders.customer_id = analytics.customers.id', + relationship: 'many_to_one', + }, + ], + new Map([ + ['analytics.orders', 'orders'], + ['analytics.customers', 'customers'], + ]), + ), + ).toEqual([{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }]); + }); +}); diff --git a/packages/context/src/ingest/adapters/metricflow/semantic-models.ts b/packages/context/src/ingest/adapters/metricflow/semantic-models.ts new file mode 100644 index 00000000..f2f84c61 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/semantic-models.ts @@ -0,0 +1,387 @@ +import type { SemanticLayerSource } from '../../../sl/index.js'; +import type { + ParsedCrossModelMetric, + ParsedMetricflowRelationship, + ParsedSemanticModel, +} from './deep-parse.js'; + +export interface MetricflowHostTable { + id: string; + name: string; + catalog: string | null; + db: string | null; + columns: Array<{ id: string; name: string }>; +} + +export interface MetricflowSemanticModelImportContext { + model: ParsedSemanticModel; + matchedTable: MetricflowHostTable | undefined; + sourceName: string; + manifestSource: SemanticLayerSource | null; +} + +export type MetricflowSemanticModelJoin = SemanticLayerSource['joins'][number]; + +export type MetricflowWritableSemanticLayerSource = Pick & + Partial>; + +export function toKebabCaseMetricflowName(str: string): string { + return str + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, ''); +} + +export function mapSemanticModelToSource(model: ParsedSemanticModel, tableRef?: string): SemanticLayerSource { + return { + name: toKebabCaseMetricflowName(model.modelRef), + table: tableRef ?? model.modelRef, + grain: model.dimensions.map((d) => d.column), + columns: model.dimensions.map((d) => ({ + name: d.column, + type: d.type, + ...(d.description ? { description: d.description } : {}), + })), + measures: model.measures.map((m) => { + if (m.type === 'simple') { + return { + name: m.name, + expr: `${m.aggregation}(${m.column})`, + ...(m.description ? { description: m.description } : {}), + ...(m.filter ? { filter: m.filter } : {}), + }; + } + return { + name: m.name, + expr: m.expr, + ...(m.description ? { description: m.description } : {}), + }; + }), + joins: [], + descriptions: { dbt: model.description ?? model.modelRef }, + }; +} + +export function mapCrossModelMetricToSource(metric: ParsedCrossModelMetric): SemanticLayerSource { + return { + name: toKebabCaseMetricflowName(metric.name), + sql: metric.expr, + descriptions: { dbt: metric.description ?? metric.name }, + grain: [], + columns: [], + measures: [ + { + name: metric.name, + expr: metric.expr, + ...(metric.description ? { description: metric.description } : {}), + ...(metric.filter ? { filter: metric.filter } : {}), + }, + ], + joins: [], + }; +} + +export function findMatchingMetricflowTable( + modelRef: string, + hostTables: MetricflowHostTable[], + targetSchema?: string | null, +): MetricflowHostTable | undefined { + const ref = modelRef.toLowerCase(); + + if (targetSchema) { + const schemaMatch = hostTables.find( + (table) => table.name.toLowerCase() === ref && table.db?.toLowerCase() === targetSchema.toLowerCase(), + ); + if (schemaMatch) { + return schemaMatch; + } + } + + const nameMatches = hostTables.filter((table) => table.name.toLowerCase() === ref); + if (nameMatches.length === 1) { + return nameMatches[0]; + } + + const byTablePart = hostTables.filter((table) => { + const parts = table.name.toLowerCase().split('.'); + return parts[parts.length - 1] === ref; + }); + if (byTablePart.length === 1) { + return byTablePart[0]; + } + + const suffixMatches = hostTables.filter( + (table) => table.name.toLowerCase().endsWith(`.${ref}`) || table.name.toLowerCase().endsWith(`_${ref}`), + ); + if (suffixMatches.length === 1) { + return suffixMatches[0]; + } + + return undefined; +} + +export function resolveMetricflowSemanticModelSourceName( + model: ParsedSemanticModel, + matchedTable: MetricflowHostTable | undefined, +): string { + const candidate = matchedTable?.name ?? model.modelRef; + const bare = candidate.includes('.') ? (candidate.split('.').pop() ?? candidate) : candidate; + return toSnakeCaseIdentifier(bare) || toSnakeCaseIdentifier(model.modelRef); +} + +export function buildMetricflowJoinsForModel( + model: ParsedSemanticModel, + relationships: ParsedMetricflowRelationship[], + sourceNameByModelRef: Map, + availableTargetModelRefs?: Set, +): MetricflowSemanticModelJoin[] { + const fromSourceName = sourceNameByModelRef.get(model.modelRef); + if (!fromSourceName) { + return []; + } + + const joins: MetricflowSemanticModelJoin[] = []; + for (const relationship of relationships) { + if (relationship.fromTable !== model.modelRef) { + continue; + } + if (availableTargetModelRefs && !availableTargetModelRefs.has(relationship.toTable)) { + continue; + } + const toSourceName = sourceNameByModelRef.get(relationship.toTable); + if (!toSourceName) { + continue; + } + joins.push({ + to: toSourceName, + on: `${fromSourceName}.${relationship.fromColumn} = ${toSourceName}.${relationship.toColumn}`, + relationship: 'many_to_one', + }); + } + return joins; +} + +export function buildMetricflowSemanticModelSource( + context: MetricflowSemanticModelImportContext, + joins: MetricflowSemanticModelJoin[], + sourceNameByManifestName: Map, +): MetricflowWritableSemanticLayerSource { + const { model, sourceName, manifestSource, matchedTable } = context; + + if (manifestSource?.name === sourceName) { + return mapMetricflowSemanticModelToOverlay(model, sourceName, joins); + } + if (manifestSource) { + return mapMetricflowSemanticModelToMergedStandalone(model, sourceName, manifestSource, joins, sourceNameByManifestName); + } + return mapMetricflowSemanticModelToStandalone(model, sourceName, matchedTable?.name ?? model.modelRef, joins); +} + +export function buildMetricflowMeasures(model: ParsedSemanticModel): SemanticLayerSource['measures'] { + return model.measures.map((measure) => { + if (measure.type === 'simple') { + return { + name: measure.name, + expr: `${measure.aggregation}(${measure.column})`, + ...(measure.description ? { description: measure.description } : {}), + ...(measure.filter ? { filter: measure.filter } : {}), + }; + } + return { + name: measure.name, + expr: measure.expr, + ...(measure.description ? { description: measure.description } : {}), + }; + }); +} + +export function buildMetricflowColumns(model: ParsedSemanticModel): SemanticLayerSource['columns'] { + const columns: SemanticLayerSource['columns'] = model.dimensions.map((dimension) => ({ + name: dimension.column, + type: dimension.type, + ...(dimension.description ? { description: dimension.description } : {}), + })); + const existingNames = new Set(columns.map((column) => column.name.toLowerCase())); + + for (const entity of model.entities) { + const columnName = (entity.expr ?? entity.name)?.trim(); + if (!columnName) { + continue; + } + const normalizedName = columnName.toLowerCase(); + if (existingNames.has(normalizedName)) { + continue; + } + columns.push({ + name: columnName, + type: 'string', + visibility: 'hidden', + ...(entity.description ? { description: entity.description } : {}), + }); + existingNames.add(normalizedName); + } + + return columns; +} + +export function filterValidMetricflowRelationships( + relationships: ParsedMetricflowRelationship[], + availableColumnNamesByModelRef: Map>, +): ParsedMetricflowRelationship[] { + return relationships.filter((relationship) => { + const fromColumns = availableColumnNamesByModelRef.get(relationship.fromTable); + const toColumns = availableColumnNamesByModelRef.get(relationship.toTable); + if (!fromColumns || !toColumns) { + return false; + } + return fromColumns.has(relationship.fromColumn.toLowerCase()) && toColumns.has(relationship.toColumn.toLowerCase()); + }); +} + +export function getMetricflowAvailableColumnNames(context: MetricflowSemanticModelImportContext): Set { + const columns = context.manifestSource?.columns ?? buildMetricflowColumns(context.model); + return new Set(columns.map((column) => column.name.toLowerCase())); +} + +export function countImportableMetricflowRelationships( + relationships: ParsedMetricflowRelationship[], + hostTables: MetricflowHostTable[], +): number { + const tablesByName = new Map(); + for (const table of hostTables) { + tablesByName.set(table.name.toLowerCase(), table); + } + + let validCount = 0; + for (const relationship of relationships) { + const fromTable = tablesByName.get(relationship.fromTable.toLowerCase()); + const toTable = tablesByName.get(relationship.toTable.toLowerCase()); + if (!fromTable || !toTable) { + continue; + } + const fromColumn = fromTable.columns.find( + (column) => column.name.toLowerCase() === relationship.fromColumn.toLowerCase(), + ); + const toColumn = toTable.columns.find( + (column) => column.name.toLowerCase() === relationship.toColumn.toLowerCase(), + ); + if (!fromColumn || !toColumn) { + continue; + } + validCount++; + } + + return validCount; +} + +function mapMetricflowSemanticModelToStandalone( + model: ParsedSemanticModel, + sourceName: string, + tableRef: string, + joins: MetricflowSemanticModelJoin[], +): MetricflowWritableSemanticLayerSource { + return { + name: sourceName, + table: tableRef, + grain: model.dimensions.map((dimension) => dimension.column), + columns: buildMetricflowColumns(model), + measures: buildMetricflowMeasures(model), + joins, + descriptions: { dbt: model.description ?? model.modelRef }, + }; +} + +function mapMetricflowSemanticModelToMergedStandalone( + model: ParsedSemanticModel, + sourceName: string, + manifestSource: SemanticLayerSource, + joins: MetricflowSemanticModelJoin[], + sourceNameByManifestName: Map, +): MetricflowWritableSemanticLayerSource { + const rewrittenManifestJoins = rewriteMetricflowManifestJoins(manifestSource.joins, sourceNameByManifestName); + return { + ...manifestSource, + name: sourceName, + measures: buildMetricflowMeasures(model), + joins: mergeMetricflowJoins(rewrittenManifestJoins, joins), + descriptions: { + ...(manifestSource.descriptions ?? {}), + dbt: model.description ?? model.modelRef, + }, + }; +} + +function mapMetricflowSemanticModelToOverlay( + model: ParsedSemanticModel, + sourceName: string, + joins: MetricflowSemanticModelJoin[], +): MetricflowWritableSemanticLayerSource { + const overlay: MetricflowWritableSemanticLayerSource = { + name: sourceName, + descriptions: { dbt: model.description ?? model.modelRef }, + measures: buildMetricflowMeasures(model), + }; + if (joins.length > 0) { + overlay.joins = joins; + } + return overlay; +} + +function mergeMetricflowJoins( + baseJoins: SemanticLayerSource['joins'], + overlayJoins: MetricflowSemanticModelJoin[], +): SemanticLayerSource['joins'] { + const existingKeys = new Set(baseJoins.map((join) => `${join.to}::${normalizeMetricflowJoinOn(join.on)}`)); + const newJoins = overlayJoins.filter((join) => !existingKeys.has(`${join.to}::${normalizeMetricflowJoinOn(join.on)}`)); + return [...baseJoins, ...newJoins]; +} + +export function normalizeMetricflowJoinOn(on: string): string { + return on.replace(/\s+/g, ' ').trim(); +} + +export function rewriteMetricflowManifestJoins( + joins: SemanticLayerSource['joins'], + sourceNameByManifestName: Map, +): SemanticLayerSource['joins'] { + return joins.map((join) => ({ + ...join, + to: sourceNameByManifestName.get(join.to) ?? join.to, + on: rewriteMetricflowJoinOn(join.on, sourceNameByManifestName), + })); +} + +export function rewriteMetricflowJoinOn(on: string, sourceNameByManifestName: Map): string { + const parts = on.split('='); + if (parts.length !== 2) { + return on; + } + const left = parseMetricflowJoinReference(parts[0].trim()); + const right = parseMetricflowJoinReference(parts[1].trim()); + if (!left || !right) { + return on; + } + const leftTable = sourceNameByManifestName.get(left.table) ?? left.table; + const rightTable = sourceNameByManifestName.get(right.table) ?? right.table; + return `${leftTable}.${left.column} = ${rightTable}.${right.column}`; +} + +export function parseMetricflowJoinReference(ref: string): { table: string; column: string } | null { + const lastDot = ref.lastIndexOf('.'); + if (lastDot <= 0 || lastDot === ref.length - 1) { + return null; + } + return { + table: ref.slice(0, lastDot).trim(), + column: ref.slice(lastDot + 1).trim(), + }; +} + +function toSnakeCaseIdentifier(str: string): string { + return str + .trim() + .replace(/[^a-zA-Z0-9_]+/g, '_') + .replace(/_+/g, '_') + .replace(/^_+|_+$/g, '') + .toLowerCase(); +} diff --git a/packages/context/src/ingest/adapters/notion/chunk.ts b/packages/context/src/ingest/adapters/notion/chunk.ts new file mode 100644 index 00000000..260a28fd --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/chunk.ts @@ -0,0 +1,153 @@ +import { createHash } from 'node:crypto'; +import { readdir, readFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js'; +import { notionManifestSchema, notionMetadataSchema } from './types.js'; + +const MAX_NOTION_WORK_UNIT_CHARS = 40_000; +export const NOTION_ORG_KNOWLEDGE_WARNING = + 'Anything accessible to this Notion integration can become organization knowledge.'; + +async function walk(root: string): Promise { + const entries = await readdir(root, { withFileTypes: true, recursive: true }); + return entries + .filter((entry) => entry.isFile()) + .map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/')) + .sort(); +} + +function safeUnitKey(path: string): string { + return `notion-${path + .replace(/^pages\//, 'page/') + .replace(/\/page\.md$/, '') + .replace(/[^a-zA-Z0-9]+/g, '-') + .replace(/^-+|-+$/g, '')}`; +} + +function splitLineRanges(content: string, maxChars: number): Array<{ startLine: number; endLine: number }> { + const rawLines = content.split('\n'); + const lines = rawLines.length > 0 && rawLines[rawLines.length - 1] === '' ? rawLines.slice(0, -1) : rawLines; + const ranges: Array<{ startLine: number; endLine: number }> = []; + let startLine = 1; + let currentChars = 0; + + for (let index = 0; index < lines.length; index += 1) { + const lineChars = lines[index].length + 1; + if (currentChars > 0 && currentChars + lineChars > maxChars) { + ranges.push({ startLine, endLine: index }); + startLine = index + 1; + currentChars = 0; + } + currentChars += lineChars; + } + + if (startLine <= lines.length) { + ranges.push({ startLine, endLine: lines.length }); + } + + return ranges.length > 0 ? ranges : [{ startLine: 1, endLine: 1 }]; +} + +async function readManifest(stagedDir: string) { + try { + return notionManifestSchema.parse(JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8'))); + } catch (error) { + throw new Error(`Invalid Notion manifest: ${error instanceof Error ? error.message : String(error)}`); + } +} + +export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet): Promise { + const files = await walk(stagedDir); + const manifest = await readManifest(stagedDir); + const touched = diffSet ? new Set([...diffSet.added, ...diffSet.modified]) : null; + const workUnits: WorkUnit[] = []; + const warnings: string[] = []; + + for (const pagePath of files.filter((path) => path.endsWith('/page.md'))) { + const metadataPath = pagePath.replace(/\/page\.md$/, '/metadata.json'); + const blockPath = pagePath.replace(/\/page\.md$/, '/blocks.json'); + const primary = [metadataPath, pagePath].filter((path) => files.includes(path)); + if (touched && !primary.some((path) => touched.has(path))) { + continue; + } + + const metadata = notionMetadataSchema.parse(JSON.parse(await readFile(join(stagedDir, metadataPath), 'utf-8'))); + const rawFiles = touched ? primary.filter((path) => touched.has(path)).sort() : primary.sort(); + const dependencyPaths = ['manifest.json', files.includes(blockPath) ? blockPath : null] + .filter((path): path is string => typeof path === 'string' && !rawFiles.includes(path)) + .sort(); + const excluded = new Set([...rawFiles, ...dependencyPaths]); + const peerFileIndex = files.filter((path) => !excluded.has(path)).sort(); + const pageContent = await readFile(join(stagedDir, pagePath), 'utf-8'); + const unitKey = safeUnitKey(pagePath); + + if (rawFiles.includes(pagePath) && pageContent.length > MAX_NOTION_WORK_UNIT_CHARS) { + warnings.push(`Oversized Notion page split into span-scoped work units: ${metadata.path}`); + const ranges = splitLineRanges(pageContent, MAX_NOTION_WORK_UNIT_CHARS); + for (let index = 0; index < ranges.length; index += 1) { + const range = ranges[index]; + workUnits.push({ + unitKey: `${unitKey}-part-${index + 1}`, + displayLabel: `${metadata.path} (part ${index + 1} of ${ranges.length})`, + rawFiles, + dependencyPaths, + peerFileIndex, + notes: `Synthesize durable wiki and SL knowledge from this Notion page span only. Use read_raw_span on ${pagePath} for lines ${range.startLine}-${range.endLine}; do not call read_raw_file for oversized pages. Cite evidence chunk/page IDs.`, + }); + } + continue; + } + + workUnits.push({ + unitKey, + displayLabel: metadata.path, + rawFiles, + dependencyPaths, + peerFileIndex, + notes: + 'Synthesize durable wiki and SL knowledge from this Notion page. Write wiki entries with wiki_write and SL sources with sl_write_source; cite evidence chunk/page IDs.', + }); + } + + return { + workUnits, + eviction: diffSet && diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined, + reconcileNotes: [ + `Notion maxKnowledgeCreatesPerRun=${manifest.maxKnowledgeCreatesPerRun}`, + `Notion maxKnowledgeUpdatesPerRun=${manifest.maxKnowledgeUpdatesPerRun}`, + ], + contextReport: { + capped: manifest.capped, + warnings: [...new Set([NOTION_ORG_KNOWLEDGE_WARNING, ...manifest.warnings, ...warnings])], + }, + }; +} + +export async function describeNotionScope(stagedDir: string): Promise { + const manifest = await readManifest(stagedDir); + const files = await walk(stagedDir); + const presentPaths = new Set(files); + const partialSnapshot = manifest.partialSnapshot || manifest.capped; + const scopeKey = JSON.stringify({ + crawlMode: manifest.crawlMode, + rootPageIds: [...manifest.rootPageIds].sort(), + rootDatabaseIds: [...manifest.rootDatabaseIds].sort(), + rootDataSourceIds: [...manifest.rootDataSourceIds].sort(), + partialSnapshot, + }); + const fingerprint = createHash('sha256').update(scopeKey).digest('hex'); + return { + fingerprint, + isPathInScope: (rawPath) => { + if (partialSnapshot) { + return presentPaths.has(rawPath); + } + return ( + rawPath === 'manifest.json' || + rawPath.startsWith('pages/') || + rawPath.startsWith('databases/') || + rawPath.startsWith('data-sources/') + ); + }, + }; +} diff --git a/packages/context/src/ingest/adapters/notion/cluster.test.ts b/packages/context/src/ingest/adapters/notion/cluster.test.ts new file mode 100644 index 00000000..714c6f46 --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/cluster.test.ts @@ -0,0 +1,119 @@ +import { mkdir, mkdtemp, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, test } from 'vitest'; +import type { KloEmbeddingPort } from '../../../core/embedding.js'; +import type { WorkUnit } from '../../types.js'; +import { clusterNotionWorkUnits, MIN_PAGES_TO_CLUSTER } from './cluster.js'; + +function fakeEmbedding(text: string): number[] { + const v = [0, 0, 0, 0]; + for (const ch of text) { + v[ch.charCodeAt(0) % 4] += 1; + } + return v; +} + +const mockEmbed: KloEmbeddingPort = { + maxBatchSize: 100, + computeEmbedding: async (t: string) => fakeEmbedding(t), + computeEmbeddingsBulk: async (texts: string[]) => texts.map(fakeEmbedding), +}; + +async function makeStaged(pages: Array<{ id: string; title: string; body: string }>): Promise { + const dir = await mkdtemp(join(tmpdir(), 'notion-cluster-')); + for (const p of pages) { + const pageDir = join(dir, 'pages', p.id); + await mkdir(pageDir, { recursive: true }); + await writeFile(join(pageDir, 'page.md'), p.body); + await writeFile( + join(pageDir, 'metadata.json'), + JSON.stringify({ + id: p.id, + title: p.title, + path: p.title, + objectType: 'page', + properties: {}, + }), + ); + } + return dir; +} + +function makeWorkUnits(pages: Array<{ id: string }>): WorkUnit[] { + return pages.map((p) => ({ + unitKey: `notion-${p.id}`, + rawFiles: [`pages/${p.id}/page.md`, `pages/${p.id}/metadata.json`], + peerFileIndex: [], + dependencyPaths: ['manifest.json'], + })); +} + +describe('clusterNotionWorkUnits', () => { + test('returns input unchanged when below threshold', async () => { + const pages = Array.from({ length: 3 }, (_, i) => ({ + id: `p${i}`, + title: `Page ${i}`, + body: 'short body', + })); + const stagedDir = await makeStaged(pages); + const wus = makeWorkUnits(pages); + const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed }); + expect(out).toHaveLength(3); + expect(out[0].unitKey).toBe('notion-p0'); + }); + + test('groups pages into k=ceil(N/8) clusters when above threshold', async () => { + const n = MIN_PAGES_TO_CLUSTER + 4; + const pages = Array.from({ length: n }, (_, i) => ({ + id: `p${i}`, + title: `Topic ${i % 2 === 0 ? 'alpha' : 'beta'} ${i}`, + body: `Body for page ${i}`.repeat(20), + })); + const stagedDir = await makeStaged(pages); + const wus = makeWorkUnits(pages); + const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed }); + expect(out.length).toBeLessThanOrEqual(wus.length); + expect(out.length).toBe(Math.ceil(wus.length / 8)); + for (const wu of out) { + expect(wu.unitKey).toMatch(/^notion-cluster-\d+$/); + expect(wu.rawFiles.length).toBeGreaterThan(0); + expect(wu.notes).toMatch(/Synthesize/); + } + }); + + test('preserves coverage: every input rawFile appears in some cluster', async () => { + const pages = Array.from({ length: 12 }, (_, i) => ({ + id: `p${i}`, + title: `Page ${i}`, + body: 'body content', + })); + const stagedDir = await makeStaged(pages); + const wus = makeWorkUnits(pages); + const inputFiles = new Set(wus.flatMap((wu) => wu.rawFiles)); + const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed }); + const outFiles = new Set(out.flatMap((wu) => wu.rawFiles)); + expect(outFiles).toEqual(inputFiles); + }); + + test('falls back to input when embedding fails', async () => { + const pages = Array.from({ length: 10 }, (_, i) => ({ + id: `p${i}`, + title: `Page ${i}`, + body: 'b', + })); + const stagedDir = await makeStaged(pages); + const wus = makeWorkUnits(pages); + const failingEmbed: KloEmbeddingPort = { + maxBatchSize: 100, + computeEmbedding: async () => { + throw new Error('embedding down'); + }, + computeEmbeddingsBulk: async () => { + throw new Error('embedding down'); + }, + }; + const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: failingEmbed }); + expect(out).toEqual(wus); + }); +}); diff --git a/packages/context/src/ingest/adapters/notion/cluster.ts b/packages/context/src/ingest/adapters/notion/cluster.ts new file mode 100644 index 00000000..853cd6cb --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/cluster.ts @@ -0,0 +1,90 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { KloEmbeddingPort } from '../../../core/embedding.js'; +import { kmeans, pickK } from '../../clustering/kmeans.js'; +import type { WorkUnit } from '../../types.js'; +import { notionMetadataSchema } from './types.js'; + +export const MIN_PAGES_TO_CLUSTER = 5; +const CLUSTER_TEXT_BODY_CHARS = 1024; +const CLUSTER_SEED = 42; + +interface ClusterNotionWorkUnitsArgs { + workUnits: WorkUnit[]; + stagedDir: string; + embedding: KloEmbeddingPort; +} + +async function buildClusterText(wu: WorkUnit, stagedDir: string): Promise { + const metadataPath = wu.rawFiles.find((p) => p.endsWith('/metadata.json')); + const pagePath = wu.rawFiles.find((p) => p.endsWith('/page.md')); + let title = wu.displayLabel ?? wu.unitKey; + if (metadataPath) { + try { + const raw = await readFile(join(stagedDir, metadataPath), 'utf-8'); + const md = notionMetadataSchema.parse(JSON.parse(raw)); + title = md.path || md.title || title; + } catch { + // fall through with displayLabel + } + } + let body = ''; + if (pagePath) { + try { + const raw = await readFile(join(stagedDir, pagePath), 'utf-8'); + body = raw.slice(0, CLUSTER_TEXT_BODY_CHARS); + } catch { + // empty body OK + } + } + const combined = `${title}\n\n${body}`.trim(); + return combined.length > 0 ? combined : title; +} + +function mergeWorkUnits(bucket: WorkUnit[], clusterIndex: number): WorkUnit { + const rawFiles = Array.from(new Set(bucket.flatMap((w) => w.rawFiles))).sort(); + const dependencyPaths = Array.from(new Set(bucket.flatMap((w) => w.dependencyPaths))).sort(); + const allFiles = new Set([...rawFiles, ...dependencyPaths]); + const peerFileIndex = Array.from( + new Set(bucket.flatMap((w) => w.peerFileIndex).filter((p) => !allFiles.has(p))), + ).sort(); + const labels = bucket + .map((w) => w.displayLabel ?? w.unitKey) + .filter((label, i, arr) => arr.indexOf(label) === i) + .slice(0, 5); + const labelSummary = labels.join(', '); + return { + unitKey: `notion-cluster-${clusterIndex + 1}`, + displayLabel: `Notion cluster ${clusterIndex + 1} (${bucket.length} pages: ${labelSummary})`, + rawFiles, + dependencyPaths, + peerFileIndex, + notes: + `Synthesize durable wiki and SL knowledge from these ${bucket.length} related Notion pages. ` + + 'Read each page with read_raw_file (or read_raw_span for oversized pages). ' + + 'Search nearby evidence with context_evidence_search/_read/_neighbors when needed. ' + + 'Write wiki entries directly with wiki_write and SL sources directly with sl_write_source. ' + + 'Do not call context_candidate_write.', + }; +} + +export async function clusterNotionWorkUnits(args: ClusterNotionWorkUnitsArgs): Promise { + const { workUnits, stagedDir, embedding } = args; + if (workUnits.length < MIN_PAGES_TO_CLUSTER) return workUnits; + const k = pickK(workUnits.length); + if (k <= 1) return workUnits; + const texts = await Promise.all(workUnits.map((wu) => buildClusterText(wu, stagedDir))); + let vectors: number[][]; + try { + vectors = await embedding.computeEmbeddingsBulk(texts); + } catch { + return workUnits; + } + if (vectors.length !== workUnits.length) return workUnits; + const { assignments } = kmeans(vectors, k, { seed: CLUSTER_SEED }); + const buckets: WorkUnit[][] = Array.from({ length: k }, () => []); + workUnits.forEach((wu, i) => { + buckets[assignments[i]].push(wu); + }); + return buckets.filter((b) => b.length > 0).map((b, idx) => mergeWorkUnits(b, idx)); +} diff --git a/packages/context/src/ingest/adapters/notion/detect.ts b/packages/context/src/ingest/adapters/notion/detect.ts new file mode 100644 index 00000000..aaba9fb1 --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/detect.ts @@ -0,0 +1,20 @@ +import { readFile, readdir } from 'node:fs/promises'; +import { join } from 'node:path'; + +export async function detectNotionStagedDir(stagedDir: string): Promise { + try { + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as { source?: unknown }; + if (manifest.source === 'notion') { + return true; + } + } catch { + // Fall through to structural detection for staged dirs without a manifest. + } + + try { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + return entries.some((entry) => entry.isFile() && entry.name === 'page.md'); + } catch { + return false; + } +} diff --git a/packages/context/src/ingest/adapters/notion/fetch.test.ts b/packages/context/src/ingest/adapters/notion/fetch.test.ts new file mode 100644 index 00000000..ae6d5fd5 --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/fetch.test.ts @@ -0,0 +1,395 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { fetchNotionSnapshot } from './fetch.js'; +import type { NotionApi } from './notion-client.js'; + +describe('fetchNotionSnapshot', () => { + let stagedDir: string; + let client: NotionApi; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'notion-fetch-')); + client = { + search: vi.fn().mockResolvedValue({ results: [], hasMore: false, nextCursor: null }), + retrieveBotUser: vi.fn().mockResolvedValue({ name: 'Notion bot' }), + retrievePage: vi.fn().mockImplementation((pageId: string) => ({ + id: pageId, + url: `https://notion.example/${pageId}`, + parent: pageId.startsWith('row-') + ? { type: 'data_source_id', data_source_id: 'data-source-search' } + : { type: 'page_id', page_id: 'root' }, + last_edited_time: '2026-04-12T10:15:00.000Z', + last_edited_by: { type: 'person', name: 'Jane Doe', person: {} }, + properties: { Name: { type: 'title', title: [{ plain_text: pageId === 'row-1' ? 'Row One' : pageId }] } }, + })), + retrieveDatabase: vi.fn().mockResolvedValue({ + id: 'database-1', + data_sources: [{ id: 'data-source-1', name: 'Policies' }], + }), + queryDataSource: vi.fn().mockResolvedValue({ + results: [ + { + id: 'row-1', + url: 'https://notion.example/row-1', + parent: { type: 'data_source_id', data_source_id: 'data-source-1' }, + last_edited_time: '2026-04-12T10:15:00.000Z', + properties: { Name: { type: 'title', title: [{ plain_text: 'Row One' }] } }, + }, + ], + hasMore: false, + nextCursor: null, + }), + listBlockChildren: vi.fn().mockResolvedValue({ + results: [ + { id: 'h1', type: 'heading_1', heading_1: { rich_text: [{ plain_text: 'Policy' }] } }, + { id: 'p1', type: 'paragraph', paragraph: { rich_text: [{ plain_text: 'Durable rule.' }] } }, + ], + hasMore: false, + nextCursor: null, + }), + }; + }); + + afterEach(async () => { + vi.restoreAllMocks(); + await rm(stagedDir, { recursive: true, force: true }); + }); + + it('materializes selected root pages and database data-source rows', async () => { + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: ['database-1'], + rootDataSourceIds: [], + maxPagesPerRun: 10, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')); + expect(manifest).toMatchObject({ + source: 'notion', + apiVersion: '2026-03-11', + pageCount: 2, + databaseCount: 1, + dataSourceCount: 1, + }); + await expect(readFile(join(stagedDir, 'pages/page-1/page.md'), 'utf-8')).resolves.toContain('Durable rule.'); + await expect( + readFile(join(stagedDir, 'databases/database-1/data-sources/data-source-1/rows/row-1/page.md'), 'utf-8'), + ).resolves.toContain('Row One'); + }); + + it('logs skipped page materialization failures', async () => { + const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + (client.retrievePage as ReturnType).mockRejectedValueOnce(new Error('Notion API failed')); + + const manifest = await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 10, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + expect(manifest.skipped).toEqual([{ externalId: 'page-1', reason: 'Notion API failed' }]); + expect(warn).toHaveBeenCalledWith('Skipping Notion page page-1: Notion API failed'); + }); + + it('recursively fetches selected-root child pages and derives scoped links', async () => { + (client.retrievePage as ReturnType).mockImplementation((pageId: string) => ({ + id: pageId, + url: `https://notion.example/${pageId}`, + parent: + pageId === 'child-page' ? { type: 'page_id', page_id: 'root-page' } : { type: 'workspace', workspace: true }, + last_edited_time: '2026-04-12T10:15:00.000Z', + properties: { + Name: { type: 'title', title: [{ plain_text: pageId === 'root-page' ? 'Root Page' : 'Child Page' }] }, + Related: pageId === 'root-page' ? { type: 'relation', relation: [{ id: 'child-page' }] } : undefined, + }, + })); + (client.listBlockChildren as ReturnType).mockImplementation((blockId: string) => ({ + results: + blockId === 'root-page' + ? [ + { id: 'child-page', type: 'child_page', child_page: { title: 'Child Page' } }, + { + id: 'page-link', + type: 'link_to_page', + link_to_page: { type: 'page_id', page_id: 'child-page' }, + }, + { + id: 'db-link', + type: 'link_to_page', + link_to_page: { type: 'database_id', database_id: 'database-1' }, + }, + ] + : [ + { + id: 'mention-root', + type: 'paragraph', + paragraph: { + rich_text: [ + { + plain_text: 'See Root Page', + mention: { type: 'page', page: { id: 'root-page' } }, + }, + ], + }, + }, + ], + hasMore: false, + nextCursor: null, + })); + + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'selected_roots', + rootPageIds: ['root-page'], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 10, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + const rootLinks = JSON.parse(await readFile(join(stagedDir, 'pages/root-page/links.json'), 'utf-8')); + const childLinks = JSON.parse(await readFile(join(stagedDir, 'pages/child-page/links.json'), 'utf-8')); + expect(rootLinks).toMatchObject({ + children: ['child-page'], + reverseLinks: ['child-page'], + mentions: ['child-page'], + databases: ['database-1'], + }); + expect(childLinks).toMatchObject({ + children: [], + reverseLinks: ['root-page'], + mentions: ['root-page'], + databases: [], + }); + }); + + it('truncates deeply nested block trees and records a warning', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + (client.listBlockChildren as ReturnType).mockImplementation((blockId: string) => { + const currentDepth = blockId === 'page-1' ? 0 : Number(blockId.replace('block-', '')); + const nextDepth = currentDepth + 1; + return { + results: + nextDepth <= 12 + ? [ + { + id: `block-${nextDepth}`, + type: 'paragraph', + has_children: nextDepth < 12, + paragraph: { rich_text: [{ plain_text: `Depth ${nextDepth}` }] }, + }, + ] + : [], + hasMore: false, + nextCursor: null, + }; + }); + + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 10, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + const blocks = JSON.parse(await readFile(join(stagedDir, 'pages/page-1/blocks.json'), 'utf-8')); + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')); + expect(blocks).toHaveLength(10); + expect(manifest.warnings).toContain('maxBlockDepth reached for page page-1 at depth 10'); + expect(warnSpy).toHaveBeenCalledWith('maxBlockDepth reached for page page-1 at depth 10'); + }); + + it('truncates pages at the per-page block cap and records a warning', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + (client.listBlockChildren as ReturnType).mockResolvedValue({ + results: Array.from({ length: 2001 }, (_, index) => ({ + id: `block-${index}`, + type: 'paragraph', + paragraph: { rich_text: [{ plain_text: `Block ${index}` }] }, + })), + hasMore: false, + nextCursor: null, + }); + + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 10, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + const blocks = JSON.parse(await readFile(join(stagedDir, 'pages/page-1/blocks.json'), 'utf-8')); + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')); + expect(blocks).toHaveLength(2000); + expect(manifest.warnings).toContain('maxBlocksPerPage reached for page page-1 at 2000 blocks'); + expect(warnSpy).toHaveBeenCalledWith('maxBlocksPerPage reached for page page-1 at 2000 blocks'); + }); + + it('uses all_accessible search for pages and data sources', async () => { + (client.search as ReturnType) + .mockResolvedValueOnce({ results: [{ id: 'page-search', object: 'page' }], hasMore: false, nextCursor: null }) + .mockResolvedValueOnce({ + results: [{ id: 'data-source-search', object: 'data_source' }], + hasMore: false, + nextCursor: null, + }); + + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 10, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + expect(client.search).toHaveBeenCalledWith('page', null, 10); + expect(client.search).toHaveBeenCalledWith('data_source', null, 1); + await expect(readFile(join(stagedDir, 'pages/page-search/page.md'), 'utf-8')).resolves.toContain('Durable rule.'); + await expect( + readFile(join(stagedDir, 'data-sources/data-source-search/rows/row-1/page.md'), 'utf-8'), + ).resolves.toContain('Row One'); + }); + + it('does not write a duplicate generic page snapshot when page search sees a data-source row first', async () => { + (client.search as ReturnType) + .mockResolvedValueOnce({ results: [{ id: 'row-1', object: 'page' }], hasMore: false, nextCursor: null }) + .mockResolvedValueOnce({ + results: [{ id: 'data-source-search', object: 'data_source' }], + hasMore: false, + nextCursor: null, + }); + + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 10, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + await expect(readFile(join(stagedDir, 'pages/row-1/page.md'), 'utf-8')).rejects.toThrow(); + await expect( + readFile(join(stagedDir, 'data-sources/data-source-search/rows/row-1/page.md'), 'utf-8'), + ).resolves.toContain('Row One'); + const rowMetadata = JSON.parse( + await readFile(join(stagedDir, 'data-sources/data-source-search/rows/row-1/metadata.json'), 'utf-8'), + ); + expect(rowMetadata).toMatchObject({ objectType: 'data_source_row', dataSourceId: 'data-source-search' }); + }); + + it('caps page materialization at maxPagesPerRun', async () => { + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'selected_roots', + rootPageIds: ['page-1', 'page-2'], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 1, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')); + expect(manifest.capped).toBe(true); + expect(manifest.partialSnapshot).toBe(true); + expect(manifest.pageCount).toBe(1); + }); + + it('short-circuits all_accessible pagination and records a continuation cursor when capped', async () => { + (client.search as ReturnType).mockResolvedValueOnce({ + results: [{ id: 'page-1', object: 'page' }], + hasMore: true, + nextCursor: 'next-page-cursor', + }); + + await fetchNotionSnapshot({ + client, + stagedDir, + config: { + authToken: 'secret', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + maxPagesPerRun: 1, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + lastSuccessfulCursor: null, + }, + }); + + const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')); + expect(manifest).toMatchObject({ capped: true, continuedFromCursor: false, partialSnapshot: true, pageCount: 1 }); + expect(JSON.parse(manifest.nextSuccessfulCursor)).toEqual({ + phase: 'all_accessible_pages', + cursor: 'next-page-cursor', + }); + expect(client.search).not.toHaveBeenCalledWith('data_source', expect.anything(), expect.anything()); + }); +}); diff --git a/packages/context/src/ingest/adapters/notion/fetch.ts b/packages/context/src/ingest/adapters/notion/fetch.ts new file mode 100644 index 00000000..6d0ee691 --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/fetch.ts @@ -0,0 +1,653 @@ +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { normalizeNotionBlocksToMarkdown, normalizeNotionPageMetadata } from './normalize.js'; +import type { NotionApi, NotionDatabaseContainer } from './notion-client.js'; +import { + NOTION_API_VERSION, + NOTION_SOURCE_KEY, + notionCrawlCursorSchema, + type NotionBlock, + type NotionCrawlCursor, + type NotionManifest, + type NotionPullConfig, +} from './types.js'; + +interface FetchNotionSnapshotParams { + client: NotionApi; + config: NotionPullConfig; + stagedDir: string; +} + +interface CrawlState { + pageCount: number; + databaseCount: number; + dataSourceCount: number; + capped: boolean; + skipped: Array<{ externalId: string; reason: string }>; + warnings: string[]; + materializedPageTargets: Set; + nextSuccessfulCursor: string | null; + pageTargets: Map; +} + +interface BlockCollectionState { + blocks: NotionBlock[]; + blockCountWarningWritten: boolean; +} + +interface NotionLinks { + children: string[]; + reverseLinks: string[]; + mentions: string[]; + databases: string[]; +} + +const DEFAULT_MAX_BLOCK_DEPTH = 10; +const DEFAULT_MAX_BLOCKS_PER_PAGE = 2000; +const logger = { + warn: (message: string) => console.warn(message), +}; + +async function writeJson(path: string, value: unknown): Promise { + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +async function writeText(path: string, value: string): Promise { + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, value.endsWith('\n') ? value : `${value}\n`, 'utf-8'); +} + +function addWarning(warnings: string[], warning: string, logWarning = false): void { + if (!warnings.includes(warning)) { + warnings.push(warning); + if (logWarning) { + logger.warn(warning); + } + } +} + +function remainingPageBudget(state: CrawlState, config: NotionPullConfig): number { + return Math.max(config.maxPagesPerRun - state.pageCount, 0); +} + +function hasPageBudget(state: CrawlState, config: NotionPullConfig): boolean { + return remainingPageBudget(state, config) > 0; +} + +function markCapped(state: CrawlState, config: NotionPullConfig, cursor?: NotionCrawlCursor): void { + state.capped = true; + addWarning(state.warnings, `maxPagesPerRun reached at ${config.maxPagesPerRun}`); + state.nextSuccessfulCursor = cursor ? JSON.stringify(cursor) : null; +} + +function parseConfiguredCursor(config: NotionPullConfig): NotionCrawlCursor { + if (!config.lastSuccessfulCursor) { + return null; + } + try { + return notionCrawlCursorSchema.parse(JSON.parse(config.lastSuccessfulCursor)); + } catch { + return null; + } +} + +async function visitPaginated(params: { + load: ( + cursor: string | null, + pageSize: number, + ) => Promise<{ results: T[]; hasMore: boolean; nextCursor: string | null }>; + startCursor?: string | null; + pageSize: () => number; + shouldContinue: () => boolean; + visit: (item: T, nextCursor: string | null) => Promise; +}): Promise { + let cursor = params.startCursor ?? null; + do { + if (!params.shouldContinue()) { + return; + } + const page = await params.load(cursor, Math.max(1, Math.min(params.pageSize(), 100))); + const nextCursor = page.hasMore ? page.nextCursor : null; + for (const item of page.results) { + if (!params.shouldContinue()) { + return; + } + await params.visit(item, nextCursor); + } + cursor = nextCursor; + } while (cursor); +} + +function addBlockCountWarning(state: BlockCollectionState, warnings: string[], pageId: string): void { + if (state.blockCountWarningWritten) { + return; + } + addWarning(warnings, `maxBlocksPerPage reached for page ${pageId} at ${DEFAULT_MAX_BLOCKS_PER_PAGE} blocks`, true); + state.blockCountWarningWritten = true; +} + +async function collectBlockChildren(params: { + client: NotionApi; + blockId: string; + pageId: string; + depth: number; + warnings: string[]; + state: BlockCollectionState; +}): Promise { + let cursor: string | null = null; + do { + const remainingBlocks = DEFAULT_MAX_BLOCKS_PER_PAGE - params.state.blocks.length; + if (remainingBlocks <= 0) { + addBlockCountWarning(params.state, params.warnings, params.pageId); + return; + } + const page = await params.client.listBlockChildren(params.blockId, cursor, Math.min(remainingBlocks, 100)); + for (let index = 0; index < page.results.length; index += 1) { + if (params.state.blocks.length >= DEFAULT_MAX_BLOCKS_PER_PAGE) { + addBlockCountWarning(params.state, params.warnings, params.pageId); + return; + } + + const block = page.results[index]; + const blockDepth = params.depth + 1; + params.state.blocks.push(block); + + if (block.has_children) { + if (blockDepth >= DEFAULT_MAX_BLOCK_DEPTH) { + addWarning( + params.warnings, + `maxBlockDepth reached for page ${params.pageId} at depth ${DEFAULT_MAX_BLOCK_DEPTH}`, + true, + ); + } else if (params.state.blocks.length >= DEFAULT_MAX_BLOCKS_PER_PAGE) { + addBlockCountWarning(params.state, params.warnings, params.pageId); + return; + } else { + await collectBlockChildren({ + client: params.client, + blockId: block.id, + pageId: params.pageId, + depth: blockDepth, + warnings: params.warnings, + state: params.state, + }); + } + } + + if ( + params.state.blocks.length >= DEFAULT_MAX_BLOCKS_PER_PAGE && + (index < page.results.length - 1 || page.hasMore) + ) { + addBlockCountWarning(params.state, params.warnings, params.pageId); + return; + } + } + cursor = page.hasMore ? page.nextCursor : null; + } while (cursor); +} + +async function collectBlockTree(client: NotionApi, pageId: string, warnings: string[]): Promise { + const state: BlockCollectionState = { blocks: [], blockCountWarningWritten: false }; + await collectBlockChildren({ + client, + blockId: pageId, + pageId, + depth: 0, + warnings, + state, + }); + return state.blocks; +} + +interface ScopedLinkTarget { + pageId: string; + dir: string; + children: string[]; + reverseLinks: string[]; + mentions: string[]; + databases: string[]; +} + +function indexTargetsByPageId(targets: Iterable): Map { + const targetsByPageId = new Map(); + for (const target of targets) { + const existing = targetsByPageId.get(target.pageId) ?? []; + existing.push(target); + targetsByPageId.set(target.pageId, existing); + } + return targetsByPageId; +} + +function addUnique(target: string[], value: unknown): void { + if (typeof value === 'string' && value && !target.includes(value)) { + target.push(value); + } +} + +function collectLinkedIds(value: unknown, links: NotionLinks): void { + if (Array.isArray(value)) { + for (const item of value) { + collectLinkedIds(item, links); + } + return; + } + if (!value || typeof value !== 'object') { + return; + } + + const typed = value as Record; + if (typed.type === 'relation' && Array.isArray(typed.relation)) { + for (const relation of typed.relation) { + addUnique(links.mentions, (relation as { id?: unknown }).id); + } + } + if (typed.type === 'page' && typed.page && typeof typed.page === 'object') { + addUnique(links.mentions, (typed.page as { id?: unknown }).id); + } + if (typed.type === 'link_to_page' && typed.link_to_page && typeof typed.link_to_page === 'object') { + const link = typed.link_to_page as Record; + addUnique(links.mentions, link.page_id); + addUnique(links.databases, link.database_id); + } + + for (const nested of Object.values(typed)) { + collectLinkedIds(nested, links); + } +} + +function extractLinks(page: Record, blocks: NotionBlock[]): NotionLinks { + const links: NotionLinks = { children: [], reverseLinks: [], mentions: [], databases: [] }; + collectLinkedIds(page.properties, links); + for (const block of blocks) { + if (block.type === 'child_page') { + addUnique(links.children, block.id); + } + collectLinkedIds(block, links); + } + return links; +} + +function parentDataSourceId(page: Record): string | null { + const parent = page.parent; + if (!parent || typeof parent !== 'object') { + return null; + } + const typed = parent as Record; + return typed.type === 'data_source_id' && typeof typed.data_source_id === 'string' ? typed.data_source_id : null; +} + +async function writeScopedLinks(stagedRoot: string, state: CrawlState): Promise { + const scopedPageIds = new Set([...state.pageTargets.values()].map((target) => target.pageId)); + const linksByTarget = new Map( + [...state.pageTargets].map(([targetKey, target]) => [ + targetKey, + { + pageId: target.pageId, + dir: target.dir, + children: target.links.children.filter((id) => scopedPageIds.has(id)).sort(), + reverseLinks: [] as string[], + mentions: target.links.mentions.filter((id) => scopedPageIds.has(id)).sort(), + databases: [...new Set(target.links.databases)].sort(), + }, + ]), + ); + const targetsByPageId = indexTargetsByPageId(linksByTarget.values()); + + for (const source of linksByTarget.values()) { + for (const targetPageId of source.mentions) { + for (const target of targetsByPageId.get(targetPageId) ?? []) { + addUnique(target.reverseLinks, source.pageId); + } + } + } + + for (const target of linksByTarget.values()) { + target.reverseLinks.sort(); + await writeJson(join(stagedRoot, target.dir, 'links.json'), { + children: target.children, + reverseLinks: target.reverseLinks, + mentions: target.mentions, + databases: target.databases, + }); + } +} + +async function materializePage(params: { + client: NotionApi; + pageId: string; + stagedRoot: string; + fallbackPath: string[]; + state: CrawlState; + config: NotionPullConfig; + databaseId?: string | null; + dataSourceId?: string | null; + rowPath?: string | null; + page?: Record | null; + skipDataSourceRows?: boolean; +}): Promise { + const dir = params.rowPath ?? join('pages', params.pageId); + if (params.state.materializedPageTargets.has(dir)) { + return; + } + if (!hasPageBudget(params.state, params.config)) { + markCapped(params.state, params.config); + return; + } + params.state.materializedPageTargets.add(dir); + + try { + const page = params.page ?? (await params.client.retrievePage(params.pageId)); + if (params.skipDataSourceRows && !params.dataSourceId && parentDataSourceId(page)) { + return; + } + const blocks = await collectBlockTree(params.client, params.pageId, params.state.warnings); + const metadata = normalizeNotionPageMetadata({ + page, + fallbackPath: params.fallbackPath, + objectType: params.dataSourceId ? 'data_source_row' : 'page', + databaseId: params.databaseId ?? null, + dataSourceId: params.dataSourceId ?? null, + }); + const markdownBody = normalizeNotionBlocksToMarkdown(blocks); + const pageMarkdown = [`# ${metadata.title}`, '', markdownBody].filter(Boolean).join('\n\n'); + await writeJson(join(params.stagedRoot, dir, 'metadata.json'), metadata); + await writeText(join(params.stagedRoot, dir, 'page.md'), pageMarkdown); + await writeJson(join(params.stagedRoot, dir, 'blocks.json'), blocks); + const links = extractLinks(page, blocks); + params.state.pageTargets.set(dir, { pageId: params.pageId, dir, links }); + params.state.pageCount += 1; + + if (!params.dataSourceId) { + for (const childPageId of links.children) { + if (params.state.capped) { + break; + } + await materializePage({ + client: params.client, + pageId: childPageId, + stagedRoot: params.stagedRoot, + fallbackPath: [...params.fallbackPath, metadata.title], + state: params.state, + config: params.config, + }); + } + } + } catch (error) { + logger.warn(`Skipping Notion page ${params.pageId}: ${error instanceof Error ? error.message : String(error)}`); + params.state.skipped.push({ + externalId: params.pageId, + reason: error instanceof Error ? error.message : String(error), + }); + } +} + +async function materializeDataSource(params: { + client: NotionApi; + dataSourceId: string; + stagedRoot: string; + fallbackPath: string[]; + state: CrawlState; + config: NotionPullConfig; + databaseId?: string | null; + dataSourceSearchCursorAfterThis?: string | null; + rowStartCursor?: string | null; +}): Promise { + const baseDir = params.databaseId + ? join('databases', params.databaseId, 'data-sources', params.dataSourceId) + : join('data-sources', params.dataSourceId); + await writeJson(join(params.stagedRoot, baseDir, 'metadata.json'), { + objectType: 'data_source', + id: params.dataSourceId, + title: params.dataSourceId, + path: [...params.fallbackPath, params.dataSourceId].join(' / '), + url: null, + parentId: params.databaseId ?? null, + databaseId: params.databaseId ?? null, + dataSourceId: params.dataSourceId, + lastEditedAt: null, + lastEditedBy: null, + properties: {}, + }); + params.state.dataSourceCount += 1; + + await visitPaginated({ + load: (cursor, pageSize) => params.client.queryDataSource(params.dataSourceId, cursor, pageSize), + startCursor: params.rowStartCursor ?? null, + pageSize: () => remainingPageBudget(params.state, params.config), + shouldContinue: () => hasPageBudget(params.state, params.config), + visit: async (row, nextCursor) => { + if (typeof row.id !== 'string') { + return; + } + await materializePage({ + client: params.client, + pageId: row.id, + stagedRoot: params.stagedRoot, + fallbackPath: params.fallbackPath, + state: params.state, + config: params.config, + databaseId: params.databaseId ?? null, + dataSourceId: params.dataSourceId, + rowPath: join(baseDir, 'rows', row.id), + page: row, + }); + if (!hasPageBudget(params.state, params.config) && nextCursor) { + markCapped( + params.state, + params.config, + params.dataSourceSearchCursorAfterThis === undefined + ? undefined + : { + phase: 'all_accessible_data_source_rows', + dataSourceId: params.dataSourceId, + dataSourceSearchCursor: params.dataSourceSearchCursorAfterThis ?? null, + rowCursor: nextCursor, + }, + ); + } + }, + }); +} + +async function materializeDatabase(params: { + client: NotionApi; + databaseId: string; + stagedRoot: string; + state: CrawlState; + config: NotionPullConfig; +}): Promise { + const database: NotionDatabaseContainer = await params.client.retrieveDatabase(params.databaseId); + await writeJson(join(params.stagedRoot, 'databases', params.databaseId, 'metadata.json'), { + objectType: 'database', + id: params.databaseId, + title: params.databaseId, + path: params.databaseId, + url: null, + parentId: null, + databaseId: params.databaseId, + dataSourceId: null, + lastEditedAt: null, + lastEditedBy: null, + properties: {}, + }); + params.state.databaseCount += 1; + + for (const dataSource of database.data_sources ?? []) { + if (params.state.capped) { + return; + } + await materializeDataSource({ + client: params.client, + dataSourceId: dataSource.id, + stagedRoot: params.stagedRoot, + fallbackPath: [params.databaseId, dataSource.name ?? dataSource.id], + state: params.state, + config: params.config, + databaseId: params.databaseId, + }); + } +} + +export async function fetchNotionSnapshot(params: FetchNotionSnapshotParams): Promise { + await mkdir(params.stagedDir, { recursive: true }); + const configuredCursor = params.config.crawlMode === 'all_accessible' ? parseConfiguredCursor(params.config) : null; + const continuedFromCursor = configuredCursor !== null; + const state: CrawlState = { + pageCount: 0, + databaseCount: 0, + dataSourceCount: 0, + capped: false, + skipped: [], + warnings: [], + materializedPageTargets: new Set(), + nextSuccessfulCursor: null, + pageTargets: new Map(), + }; + + if (params.config.crawlMode === 'all_accessible') { + // Known v1 limitation: with Notion API 2026-03-11, search exposes page and data_source objects but not + // database containers. If container search becomes available, add a database pass before data-source rows here. + const startWithDataSources = + configuredCursor?.phase === 'all_accessible_data_sources' || + configuredCursor?.phase === 'all_accessible_data_source_rows'; + + if (configuredCursor?.phase === 'all_accessible_data_source_rows') { + await materializeDataSource({ + client: params.client, + dataSourceId: configuredCursor.dataSourceId, + stagedRoot: params.stagedDir, + fallbackPath: [configuredCursor.dataSourceId], + state, + config: params.config, + dataSourceSearchCursorAfterThis: configuredCursor.dataSourceSearchCursor, + rowStartCursor: configuredCursor.rowCursor, + }); + if (!hasPageBudget(state, params.config) && !state.capped && configuredCursor.dataSourceSearchCursor) { + markCapped(state, params.config, { + phase: 'all_accessible_data_sources', + cursor: configuredCursor.dataSourceSearchCursor, + }); + } + } + + if (!startWithDataSources && !state.capped) { + await visitPaginated({ + load: (cursor, pageSize) => params.client.search('page', cursor, pageSize), + startCursor: configuredCursor?.phase === 'all_accessible_pages' ? configuredCursor.cursor : null, + pageSize: () => remainingPageBudget(state, params.config), + shouldContinue: () => hasPageBudget(state, params.config), + visit: async (page, nextCursor) => { + await materializePage({ + client: params.client, + pageId: page.id, + stagedRoot: params.stagedDir, + fallbackPath: [], + state, + config: params.config, + skipDataSourceRows: true, + }); + if (!hasPageBudget(state, params.config) && nextCursor) { + markCapped(state, params.config, { phase: 'all_accessible_pages', cursor: nextCursor }); + } + }, + }); + if (!hasPageBudget(state, params.config) && state.nextSuccessfulCursor === null) { + markCapped(state, params.config, { phase: 'all_accessible_data_sources', cursor: null }); + } + } + + if (!state.capped) { + await visitPaginated({ + load: (cursor) => params.client.search('data_source', cursor, 1), + startCursor: + configuredCursor?.phase === 'all_accessible_data_sources' + ? configuredCursor.cursor + : configuredCursor?.phase === 'all_accessible_data_source_rows' + ? configuredCursor.dataSourceSearchCursor + : null, + pageSize: () => 1, + shouldContinue: () => hasPageBudget(state, params.config), + visit: async (dataSource, nextCursor) => { + await materializeDataSource({ + client: params.client, + dataSourceId: dataSource.id, + stagedRoot: params.stagedDir, + fallbackPath: [dataSource.id], + state, + config: params.config, + dataSourceSearchCursorAfterThis: nextCursor, + }); + if (!hasPageBudget(state, params.config) && state.nextSuccessfulCursor === null) { + markCapped(state, params.config, { phase: 'all_accessible_data_sources', cursor: nextCursor }); + } + }, + }); + } + } else { + for (const pageId of params.config.rootPageIds) { + if (state.capped) { + break; + } + await materializePage({ + client: params.client, + pageId, + stagedRoot: params.stagedDir, + fallbackPath: [], + state, + config: params.config, + }); + } + for (const databaseId of params.config.rootDatabaseIds) { + if (state.capped) { + break; + } + await materializeDatabase({ + client: params.client, + databaseId, + stagedRoot: params.stagedDir, + state, + config: params.config, + }); + } + for (const dataSourceId of params.config.rootDataSourceIds) { + if (state.capped) { + break; + } + await materializeDataSource({ + client: params.client, + dataSourceId, + stagedRoot: params.stagedDir, + fallbackPath: [dataSourceId], + state, + config: params.config, + }); + } + } + + await writeScopedLinks(params.stagedDir, state); + + const manifest: NotionManifest = { + source: NOTION_SOURCE_KEY, + apiVersion: NOTION_API_VERSION, + crawlMode: params.config.crawlMode, + rootPageIds: params.config.rootPageIds, + rootDatabaseIds: params.config.rootDatabaseIds, + rootDataSourceIds: params.config.rootDataSourceIds, + fetchedAt: new Date().toISOString(), + pageCount: state.pageCount, + databaseCount: state.databaseCount, + dataSourceCount: state.dataSourceCount, + capped: state.capped, + continuedFromCursor, + partialSnapshot: state.capped || continuedFromCursor, + maxPagesPerRun: params.config.maxPagesPerRun, + maxKnowledgeCreatesPerRun: params.config.maxKnowledgeCreatesPerRun, + maxKnowledgeUpdatesPerRun: params.config.maxKnowledgeUpdatesPerRun, + nextSuccessfulCursor: state.capped ? state.nextSuccessfulCursor : null, + skipped: state.skipped, + warnings: state.warnings, + }; + await writeJson(join(params.stagedDir, 'manifest.json'), manifest); + return manifest; +} diff --git a/packages/context/src/ingest/adapters/notion/normalize.test.ts b/packages/context/src/ingest/adapters/notion/normalize.test.ts new file mode 100644 index 00000000..3b90c4de --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/normalize.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, it } from 'vitest'; +import { normalizeNotionBlocksToMarkdown, normalizeNotionPageMetadata, propertyValueToText } from './normalize.js'; + +describe('Notion normalization', () => { + it('converts common blocks into stable markdown', () => { + const markdown = normalizeNotionBlocksToMarkdown([ + { id: 'h1', type: 'heading_1', heading_1: { rich_text: [{ plain_text: 'Policy' }] } }, + { id: 'p1', type: 'paragraph', paragraph: { rich_text: [{ plain_text: 'Booked revenue excludes refunds.' }] } }, + { id: 'b1', type: 'bulleted_list_item', bulleted_list_item: { rich_text: [{ plain_text: 'Exclude tests' }] } }, + { id: 'n1', type: 'numbered_list_item', numbered_list_item: { rich_text: [{ plain_text: 'Review monthly' }] } }, + { id: 't1', type: 'to_do', to_do: { checked: true, rich_text: [{ plain_text: 'Approved by Finance' }] } }, + { id: 'c1', type: 'code', code: { language: 'sql', rich_text: [{ plain_text: 'select 1' }] } }, + ]); + + expect(markdown).toContain('## Policy'); + expect(markdown).toContain('Booked revenue excludes refunds.'); + expect(markdown).toContain('- Exclude tests'); + expect(markdown).toContain('1. Review monthly'); + expect(markdown).toContain('- [x] Approved by Finance'); + expect(markdown).toContain('```sql\nselect 1\n```'); + }); + + it('escapes closing parens in markdown link URLs', () => { + const markdown = normalizeNotionBlocksToMarkdown([ + { + id: 'p1', + type: 'paragraph', + paragraph: { + rich_text: [{ plain_text: 'Disambiguation', href: 'https://example.com/wiki/Foo_(bar)' }], + }, + }, + ]); + + expect(markdown).toBe(String.raw`[Disambiguation](https://example.com/wiki/Foo_(bar\))`); + }); + + it('normalizes title, path, parent, editor, and properties', () => { + const metadata = normalizeNotionPageMetadata({ + page: { + id: 'page-1', + url: 'https://notion.so/page-1', + parent: { type: 'page_id', page_id: 'parent-1' }, + last_edited_time: '2026-04-12T10:15:00.000Z', + last_edited_by: { type: 'person', name: 'Jane Doe', person: {} }, + properties: { + Name: { type: 'title', title: [{ plain_text: 'Revenue Recognition' }] }, + Status: { type: 'select', select: { name: 'Approved' } }, + }, + }, + fallbackPath: ['Company Handbook', 'Finance'], + objectType: 'page', + }); + + expect(metadata).toMatchObject({ + objectType: 'page', + id: 'page-1', + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + parentId: 'parent-1', + lastEditedAt: '2026-04-12T10:15:00.000Z', + lastEditedBy: 'Jane Doe', + properties: { Status: 'Approved' }, + }); + }); + + it('formats selected property values for search text', () => { + expect(propertyValueToText({ type: 'multi_select', multi_select: [{ name: 'Finance' }, { name: 'Policy' }] })).toBe( + 'Finance, Policy', + ); + expect(propertyValueToText({ type: 'checkbox', checkbox: true })).toBe('true'); + expect(propertyValueToText({ type: 'date', date: { start: '2026-04-01', end: null } })).toBe('2026-04-01'); + }); +}); diff --git a/packages/context/src/ingest/adapters/notion/normalize.ts b/packages/context/src/ingest/adapters/notion/normalize.ts new file mode 100644 index 00000000..0e608697 --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/normalize.ts @@ -0,0 +1,185 @@ +import type { NotionBlock, NotionMetadata, NotionObjectType, NotionRichText } from './types.js'; + +function richTextToMarkdown(value: unknown): string { + if (!Array.isArray(value)) { + return ''; + } + return value + .map((part) => { + const text = typeof (part as NotionRichText).plain_text === 'string' ? (part as NotionRichText).plain_text : ''; + const href = typeof (part as NotionRichText).href === 'string' ? (part as NotionRichText).href : null; + return href && text ? `[${text}](${href.replace(/\)/g, '\\)')})` : text; + }) + .join('') + .trim(); +} + +export function propertyValueToText(value: unknown): string { + if (!value || typeof value !== 'object' || !('type' in value)) { + return ''; + } + const property = value as Record; + switch (property.type) { + case 'title': + return richTextToMarkdown((property.title as unknown[]) ?? []); + case 'rich_text': + return richTextToMarkdown((property.rich_text as unknown[]) ?? []); + case 'select': + return typeof (property.select as { name?: unknown } | null)?.name === 'string' + ? (property.select as { name: string }).name + : ''; + case 'multi_select': + return Array.isArray(property.multi_select) + ? property.multi_select + .map((item) => + typeof (item as { name?: unknown }).name === 'string' ? (item as { name: string }).name : '', + ) + .filter(Boolean) + .join(', ') + : ''; + case 'checkbox': + return String(Boolean(property.checkbox)); + case 'date': + return typeof (property.date as { start?: unknown } | null)?.start === 'string' + ? (property.date as { start: string }).start + : ''; + case 'number': + return property.number === null || property.number === undefined ? '' : String(property.number); + case 'url': + case 'email': + case 'phone_number': + return typeof property[property.type as string] === 'string' ? String(property[property.type as string]) : ''; + default: + return ''; + } +} + +function extractTitle(properties: Record): string { + for (const value of Object.values(properties)) { + if (value && typeof value === 'object' && (value as { type?: unknown }).type === 'title') { + const title = propertyValueToText(value); + if (title) { + return title; + } + } + } + return 'Untitled'; +} + +function parentId(parent: unknown): string | null { + if (!parent || typeof parent !== 'object') { + return null; + } + const typed = parent as Record; + if (typed.type === 'page_id' && typeof typed.page_id === 'string') { + return typed.page_id; + } + if (typed.type === 'database_id' && typeof typed.database_id === 'string') { + return typed.database_id; + } + if (typed.type === 'data_source_id' && typeof typed.data_source_id === 'string') { + return typed.data_source_id; + } + return null; +} + +function editorName(user: unknown): string | null { + if (!user || typeof user !== 'object') { + return null; + } + const typed = user as Record; + if (typeof typed.name === 'string') { + return typed.name; + } + return null; +} + +export function normalizeNotionPageMetadata(input: { + page: Record; + fallbackPath: string[]; + objectType: NotionObjectType; + databaseId?: string | null; + dataSourceId?: string | null; +}): NotionMetadata { + const properties = + input.page.properties && typeof input.page.properties === 'object' + ? (input.page.properties as Record) + : {}; + const title = extractTitle(properties); + const selectedProperties = Object.fromEntries( + Object.entries(properties) + .filter(([, value]) => value && typeof value === 'object' && (value as { type?: unknown }).type !== 'title') + .map(([key, value]) => [key, propertyValueToText(value)]) + .filter(([, value]) => value !== ''), + ); + + return { + objectType: input.objectType, + id: String(input.page.id), + title, + path: [...input.fallbackPath, title].filter(Boolean).join(' / '), + url: typeof input.page.url === 'string' ? input.page.url : null, + parentId: parentId(input.page.parent), + databaseId: input.databaseId ?? null, + dataSourceId: input.dataSourceId ?? null, + lastEditedAt: typeof input.page.last_edited_time === 'string' ? input.page.last_edited_time : null, + lastEditedBy: editorName(input.page.last_edited_by), + properties: selectedProperties, + }; +} + +export function normalizeNotionBlocksToMarkdown(blocks: NotionBlock[]): string { + const lines: string[] = []; + for (const block of blocks) { + const payload = block[block.type] as Record | undefined; + const text = richTextToMarkdown(payload?.rich_text); + switch (block.type) { + case 'heading_1': + lines.push(`## ${text}`); + break; + case 'heading_2': + lines.push(`### ${text}`); + break; + case 'heading_3': + lines.push(`#### ${text}`); + break; + case 'paragraph': + if (text) { + lines.push(text); + } + break; + case 'bulleted_list_item': + lines.push(`- ${text}`); + break; + case 'numbered_list_item': + lines.push(`1. ${text}`); + break; + case 'to_do': + lines.push(`- [${payload?.checked ? 'x' : ' '}] ${text}`); + break; + case 'quote': + lines.push(`> ${text}`); + break; + case 'callout': + lines.push(`> ${text}`); + break; + case 'code': + lines.push(`\`\`\`${typeof payload?.language === 'string' ? payload.language : ''}\n${text}\n\`\`\``); + break; + case 'divider': + lines.push('---'); + break; + case 'child_page': + if (typeof payload?.title === 'string') { + lines.push(`- Child page: ${payload.title}`); + } + break; + default: + if (text) { + lines.push(text); + } + break; + } + } + return lines.join('\n\n').trim(); +} diff --git a/packages/context/src/ingest/adapters/notion/notion-client.test.ts b/packages/context/src/ingest/adapters/notion/notion-client.test.ts new file mode 100644 index 00000000..fd3d54eb --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/notion-client.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, it, vi } from 'vitest'; +import { retryNotionRequest } from './notion-client.js'; + +describe('Notion client retry helper', () => { + it('retries rate-limited requests and then returns the response', async () => { + const sleep = vi.fn().mockResolvedValue(undefined); + const operation = vi + .fn() + .mockRejectedValueOnce({ code: 'rate_limited', headers: { 'retry-after': '2' } }) + .mockResolvedValueOnce({ ok: true }); + + const result = await retryNotionRequest(operation, { sleep, maxAttempts: 2 }); + + expect(result).toEqual({ ok: true }); + expect(sleep).toHaveBeenCalledWith(2000); + }); + + it('caps retry-after sleep from rate-limit responses', async () => { + const sleep = vi.fn().mockResolvedValue(undefined); + const operation = vi + .fn() + .mockRejectedValueOnce({ code: 'rate_limited', headers: { 'retry-after': '3600' } }) + .mockResolvedValueOnce({ ok: true }); + + await retryNotionRequest(operation, { sleep, maxAttempts: 2 }); + + expect(sleep).toHaveBeenCalledWith(60_000); + }); + + it('retries transient 5xx requests and then returns the response', async () => { + const sleep = vi.fn().mockResolvedValue(undefined); + const operation = vi + .fn() + .mockRejectedValueOnce({ code: 'service_unavailable', status: 503, message: 'temporary outage' }) + .mockResolvedValueOnce({ ok: true }); + + const result = await retryNotionRequest(operation, { sleep, maxAttempts: 2 }); + + expect(result).toEqual({ ok: true }); + expect(sleep).toHaveBeenCalledWith(1000); + }); + + it('uses exponential backoff for transient 5xx retries', async () => { + const sleep = vi.fn().mockResolvedValue(undefined); + const operation = vi + .fn() + .mockRejectedValueOnce({ code: 'service_unavailable', status: 503, message: 'temporary outage' }) + .mockRejectedValueOnce({ code: 'service_unavailable', status: 503, message: 'temporary outage' }) + .mockResolvedValueOnce({ ok: true }); + + await retryNotionRequest(operation, { sleep, maxAttempts: 3 }); + + expect(sleep).toHaveBeenNthCalledWith(1, 1000); + expect(sleep).toHaveBeenNthCalledWith(2, 2000); + }); + + it('throws the sanitized error after attempts are exhausted', async () => { + const sleep = vi.fn().mockResolvedValue(undefined); + const operation = vi.fn().mockRejectedValue({ code: 'rate_limited', message: 'token secret leaked' }); + + await expect(retryNotionRequest(operation, { sleep, maxAttempts: 2, authToken: 'secret' })).rejects.toThrow( + /token \*\*\* leaked/, + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/notion/notion-client.ts b/packages/context/src/ingest/adapters/notion/notion-client.ts new file mode 100644 index 00000000..c7cb007c --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/notion-client.ts @@ -0,0 +1,205 @@ +import { Client } from '@notionhq/client'; +import { NOTION_API_VERSION, type NotionBlock } from './types.js'; + +interface NotionSearchResult { + id: string; + object: 'page' | 'data_source' | string; + [key: string]: unknown; +} + +export interface NotionDatabaseContainer { + id: string; + title?: unknown[]; + data_sources?: Array<{ id: string; name?: string }>; + [key: string]: unknown; +} + +export interface NotionBotInfo { + id?: string; + name?: string | null; + bot?: { + workspace_name?: string | null; + [key: string]: unknown; + }; + [key: string]: unknown; +} + +export interface NotionApi { + search( + filterValue: 'page' | 'data_source', + startCursor?: string | null, + pageSize?: number, + ): Promise<{ + results: NotionSearchResult[]; + hasMore: boolean; + nextCursor: string | null; + }>; + retrieveBotUser(): Promise; + retrievePage(pageId: string): Promise>; + retrieveDatabase(databaseId: string): Promise; + queryDataSource( + dataSourceId: string, + startCursor?: string | null, + pageSize?: number, + ): Promise<{ + results: Record[]; + hasMore: boolean; + nextCursor: string | null; + }>; + listBlockChildren( + blockId: string, + startCursor?: string | null, + pageSize?: number, + ): Promise<{ + results: NotionBlock[]; + hasMore: boolean; + nextCursor: string | null; + }>; +} + +interface RetryOptions { + maxAttempts?: number; + sleep?: (ms: number) => Promise; + authToken?: string; +} + +const defaultSleep = (ms: number): Promise => new Promise((resolve) => setTimeout(resolve, ms)); +const transientStatusCodes = new Set([500, 502, 503]); +const transientErrorCodes = new Set(['internal_server_error', 'service_unavailable', 'gateway_timeout']); +const MAX_RETRY_SLEEP_MS = 60_000; + +function errorCode(error: unknown): string | undefined { + return typeof error === 'object' && error !== null && typeof (error as { code?: unknown }).code === 'string' + ? (error as { code: string }).code + : undefined; +} + +function errorStatus(error: unknown): number | undefined { + if (!error || typeof error !== 'object') { + return undefined; + } + const status = + (error as { status?: unknown; statusCode?: unknown }).status ?? (error as { statusCode?: unknown }).statusCode; + return typeof status === 'number' ? status : undefined; +} + +function shouldRetryNotionError(error: unknown): boolean { + const code = errorCode(error); + const status = errorStatus(error); + return code === 'rate_limited' || transientErrorCodes.has(code ?? '') || transientStatusCodes.has(status ?? 0); +} + +export async function retryNotionRequest(operation: () => Promise, options: RetryOptions = {}): Promise { + const maxAttempts = options.maxAttempts ?? 4; + const sleep = options.sleep ?? defaultSleep; + let lastError: unknown = null; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + return await operation(); + } catch (error) { + lastError = error; + if (!shouldRetryNotionError(error) || attempt === maxAttempts) { + break; + } + const headers = typeof error === 'object' && error !== null ? (error as { headers?: unknown }).headers : null; + const retryAfter = + headers && typeof headers === 'object' + ? Number( + (headers as Record)['retry-after'] ?? + (headers as Record)['Retry-After'], + ) + : NaN; + const retryAfterMs = Number.isFinite(retryAfter) && retryAfter > 0 ? retryAfter * 1000 : null; + const fallbackBackoffMs = 1000 * 2 ** (attempt - 1); + await sleep(Math.min(retryAfterMs ?? fallbackBackoffMs, MAX_RETRY_SLEEP_MS)); + } + } + + const raw = + lastError instanceof Error + ? lastError.message + : typeof lastError === 'object' && + lastError !== null && + typeof (lastError as { message?: unknown }).message === 'string' + ? (lastError as { message: string }).message + : String(lastError); + const sanitized = options.authToken ? raw.split(options.authToken).join('***') : raw; + throw new Error(sanitized); +} + +export class NotionClient implements NotionApi { + private readonly client: Client; + + constructor(private readonly authToken: string) { + this.client = new Client({ + auth: authToken, + notionVersion: NOTION_API_VERSION, + }); + } + + async search(filterValue: 'page' | 'data_source', startCursor?: string | null, pageSize = 100) { + const response = await retryNotionRequest( + () => + this.client.search({ + filter: { property: 'object', value: filterValue }, + start_cursor: startCursor ?? undefined, + page_size: pageSize, + }) as Promise<{ results: NotionSearchResult[]; has_more: boolean; next_cursor: string | null }>, + { authToken: this.authToken }, + ); + return { results: response.results, hasMore: response.has_more, nextCursor: response.next_cursor }; + } + + async retrieveBotUser(): Promise { + return retryNotionRequest(() => this.client.users.me({}) as Promise, { + authToken: this.authToken, + }); + } + + async retrievePage(pageId: string): Promise> { + return retryNotionRequest( + () => this.client.pages.retrieve({ page_id: pageId }) as Promise>, + { + authToken: this.authToken, + }, + ); + } + + async retrieveDatabase(databaseId: string): Promise { + return retryNotionRequest( + () => + this.client.request({ + method: 'get', + path: `databases/${databaseId}`, + }) as Promise, + { authToken: this.authToken }, + ); + } + + async queryDataSource(dataSourceId: string, startCursor?: string | null, pageSize = 100) { + const response = await retryNotionRequest( + () => + this.client.request({ + method: 'post', + path: `data_sources/${dataSourceId}/query`, + body: { start_cursor: startCursor ?? undefined, page_size: pageSize }, + }) as Promise<{ results: Record[]; has_more: boolean; next_cursor: string | null }>, + { authToken: this.authToken }, + ); + return { results: response.results, hasMore: response.has_more, nextCursor: response.next_cursor }; + } + + async listBlockChildren(blockId: string, startCursor?: string | null, pageSize = 100) { + const response = await retryNotionRequest( + () => + this.client.blocks.children.list({ + block_id: blockId, + start_cursor: startCursor ?? undefined, + page_size: pageSize, + }) as Promise<{ results: NotionBlock[]; has_more: boolean; next_cursor: string | null }>, + { authToken: this.authToken }, + ); + return { results: response.results, hasMore: response.has_more, nextCursor: response.next_cursor }; + } +} diff --git a/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts b/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts new file mode 100644 index 00000000..49658526 --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts @@ -0,0 +1,350 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { DiffSetService } from '../../diff-set.service.js'; +import { NOTION_ORG_KNOWLEDGE_WARNING } from './chunk.js'; +import { NotionSourceAdapter } from './notion.adapter.js'; + +describe('NotionSourceAdapter', () => { + let stagedDir: string; + let adapter: NotionSourceAdapter; + let onPullSucceeded: ReturnType Promise>>; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'notion-adapter-')); + onPullSucceeded = vi.fn().mockResolvedValue(undefined); + adapter = new NotionSourceAdapter({ onPullSucceeded: async (ctx) => onPullSucceeded(ctx) }); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + async function writePage(id: string, title: string, body = 'Durable rule.\n'): Promise { + await mkdir(join(stagedDir, 'pages', id), { recursive: true }); + await writeFile( + join(stagedDir, 'pages', id, 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id, + title, + path: `Company / ${title}`, + url: null, + parentId: null, + databaseId: null, + dataSourceId: null, + lastEditedAt: null, + lastEditedBy: null, + properties: {}, + }), + 'utf-8', + ); + await writeFile(join(stagedDir, 'pages', id, 'page.md'), `# ${title}\n\n${body}`, 'utf-8'); + await writeFile(join(stagedDir, 'pages', id, 'blocks.json'), '[]\n', 'utf-8'); + } + + it('declares Notion source behavior', () => { + expect(adapter.source).toBe('notion'); + expect(adapter.skillNames).toEqual(['notion_synthesize']); + expect(adapter.reconcileSkillNames).toEqual([]); + expect(adapter.evidenceIndexing).toBe('documents'); + expect(adapter.triageSupported).toBe(true); + }); + + it('returns structural triage signals for a staged Notion page', async () => { + await mkdir(join(stagedDir, 'pages', 'page-1'), { recursive: true }); + await writeFile( + join(stagedDir, 'pages', 'page-1', 'metadata.json'), + JSON.stringify({ + objectType: 'data_source_row', + id: 'page-1', + title: '2026-04-29 Daily Sync', + path: 'Company / Daily Syncs / 2026-04-29 Daily Sync', + url: null, + parentId: 'parent-page', + databaseId: 'database-1', + dataSourceId: 'data-source-1', + lastEditedAt: '2026-04-29T12:00:00.000Z', + lastEditedBy: 'Jane Doe', + properties: { + Status: 'Complete', + Owner: 'Ops', + Count: 3, + Nested: { ignored: true }, + }, + }), + 'utf-8', + ); + + await expect(adapter.getTriageSignals?.(stagedDir, 'page-1')).resolves.toEqual({ + parentType: 'data_source_id', + objectType: 'data_source_row', + isDateTitled: true, + lastEditedAt: '2026-04-29T12:00:00.000Z', + propertyHints: { + Count: '3', + Owner: 'Ops', + Status: 'Complete', + }, + }); + }); + + it('detects a Notion staged dir from manifest source', async () => { + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ source: 'notion', apiVersion: '2026-03-11' }), + 'utf-8', + ); + expect(await adapter.detect(stagedDir)).toBe(true); + }); + + it('does not delete prior pages omitted by a capped partial snapshot', async () => { + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ + source: 'notion', + apiVersion: '2026-03-11', + crawlMode: 'selected_roots', + rootPageIds: ['page-1', 'page-2'], + rootDatabaseIds: [], + rootDataSourceIds: [], + fetchedAt: '2026-04-28T00:00:00.000Z', + pageCount: 1, + databaseCount: 0, + dataSourceCount: 0, + capped: true, + continuedFromCursor: false, + partialSnapshot: true, + maxPagesPerRun: 1, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + skipped: [], + warnings: ['maxPagesPerRun reached at 1'], + }), + 'utf-8', + ); + await writePage('page-1', 'Revenue Recognition'); + + const scope = await adapter.describeScope(stagedDir); + const diffSetService = new DiffSetService({ + findLatestHashesForCompletedSyncs: vi.fn().mockResolvedValue( + new Map([ + ['manifest.json', 'old-manifest'], + ['pages/page-1/page.md', 'same'], + ['pages/page-2/page.md', 'prior-page-two'], + ]), + ), + } as never); + const diff = await diffSetService.compute( + 'conn-1', + 'notion', + new Map([ + ['manifest.json', 'new-manifest'], + ['pages/page-1/page.md', 'same'], + ]), + scope.isPathInScope.bind(scope), + ); + + expect(diff.deleted).toEqual([]); + }); + + it('does not delete prior pages omitted by an uncapped all_accessible cursor continuation', async () => { + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ + source: 'notion', + apiVersion: '2026-03-11', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + fetchedAt: '2026-04-28T00:00:00.000Z', + pageCount: 1, + databaseCount: 0, + dataSourceCount: 0, + capped: false, + continuedFromCursor: true, + partialSnapshot: true, + maxPagesPerRun: 100, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + nextSuccessfulCursor: null, + skipped: [], + warnings: [], + }), + 'utf-8', + ); + await writePage('page-2', 'Later Page'); + + const scope = await adapter.describeScope(stagedDir); + const diffSetService = new DiffSetService({ + findLatestHashesForCompletedSyncs: vi.fn().mockResolvedValue( + new Map([ + ['manifest.json', 'old-manifest'], + ['pages/page-1/page.md', 'prior-page-one'], + ['pages/page-2/page.md', 'same'], + ]), + ), + } as never); + const diff = await diffSetService.compute( + 'conn-1', + 'notion', + new Map([ + ['manifest.json', 'new-manifest'], + ['pages/page-2/page.md', 'same'], + ]), + scope.isPathInScope.bind(scope), + ); + + expect(diff.deleted).toEqual([]); + }); + + it('chunks changed pages into candidate-extraction work units', async () => { + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ + source: 'notion', + apiVersion: '2026-03-11', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: [], + rootDataSourceIds: [], + fetchedAt: '2026-04-28T00:00:00.000Z', + pageCount: 1, + databaseCount: 0, + dataSourceCount: 0, + capped: false, + continuedFromCursor: false, + partialSnapshot: false, + maxPagesPerRun: 100, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + skipped: [], + warnings: [], + }), + 'utf-8', + ); + await writePage('page-1', 'Revenue Recognition'); + + const result = await adapter.chunk(stagedDir, { + added: ['pages/page-1/page.md', 'pages/page-1/metadata.json'], + modified: [], + deleted: [], + unchanged: ['manifest.json', 'pages/page-1/blocks.json'], + }); + + expect(result.workUnits).toHaveLength(1); + expect(result.workUnits[0]).toMatchObject({ + unitKey: 'notion-page-page-1', + rawFiles: ['pages/page-1/metadata.json', 'pages/page-1/page.md'], + dependencyPaths: ['manifest.json', 'pages/page-1/blocks.json'], + }); + expect(result.workUnits[0].notes).toContain('Synthesize durable wiki and SL knowledge'); + expect(result.reconcileNotes).toEqual([ + 'Notion maxKnowledgeCreatesPerRun=5', + 'Notion maxKnowledgeUpdatesPerRun=20', + ]); + expect(result.contextReport).toEqual({ capped: false, warnings: [NOTION_ORG_KNOWLEDGE_WARNING] }); + }); + + it('reports malformed manifests with a Notion-specific error', async () => { + await writeFile(join(stagedDir, 'manifest.json'), '{bad json', 'utf-8'); + + await expect(adapter.chunk(stagedDir)).rejects.toThrow(/Invalid Notion manifest/); + }); + + it('splits oversized changed pages into span-scoped work units', async () => { + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ + source: 'notion', + apiVersion: '2026-03-11', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: [], + rootDataSourceIds: [], + fetchedAt: '2026-04-28T00:00:00.000Z', + pageCount: 1, + databaseCount: 0, + dataSourceCount: 0, + capped: false, + continuedFromCursor: false, + partialSnapshot: false, + maxPagesPerRun: 100, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + skipped: [], + warnings: [], + }), + 'utf-8', + ); + await writePage( + 'page-1', + 'Giant Parent', + Array.from({ length: 2600 }, (_, i) => `Line ${i + 1}: durable context.`).join('\n'), + ); + + const result = await adapter.chunk(stagedDir, { + added: ['pages/page-1/page.md', 'pages/page-1/metadata.json'], + modified: [], + deleted: [], + unchanged: ['manifest.json', 'pages/page-1/blocks.json'], + }); + + expect(result.workUnits.length).toBeGreaterThan(1); + expect(result.workUnits[0]).toMatchObject({ + unitKey: 'notion-page-page-1-part-1', + rawFiles: ['pages/page-1/metadata.json', 'pages/page-1/page.md'], + }); + expect(result.workUnits[0].notes).toContain('Use read_raw_span'); + expect(result.workUnits[0].notes).toMatch(/lines 1-\d+/); + expect(result.workUnits.at(-1)?.notes).toMatch(/lines \d+-2602/); + expect(result.contextReport?.warnings).toContain( + 'Oversized Notion page split into span-scoped work units: Company / Giant Parent', + ); + }); + + it('persists the manifest continuation cursor after successful pulls', async () => { + const completedAt = new Date('2026-04-28T01:00:00.000Z'); + const nextSuccessfulCursor = JSON.stringify({ phase: 'all_accessible_pages', cursor: 'cursor-2' }); + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ + source: 'notion', + apiVersion: '2026-03-11', + crawlMode: 'all_accessible', + rootPageIds: [], + rootDatabaseIds: [], + rootDataSourceIds: [], + fetchedAt: '2026-04-28T00:00:00.000Z', + pageCount: 1, + databaseCount: 0, + dataSourceCount: 0, + capped: true, + continuedFromCursor: false, + partialSnapshot: true, + maxPagesPerRun: 1, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + nextSuccessfulCursor, + skipped: [], + warnings: ['maxPagesPerRun reached at 1'], + }), + 'utf-8', + ); + + await adapter.onPullSucceeded({ + connectionId: 'conn-1', + sourceKey: 'notion', + syncId: 'sync-1', + trigger: 'scheduled_pull', + completedAt, + stagedDir, + }); + + expect(onPullSucceeded).toHaveBeenCalledWith( + expect.objectContaining({ connectionId: 'conn-1', completedAt, nextSuccessfulCursor }), + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/notion/notion.adapter.ts b/packages/context/src/ingest/adapters/notion/notion.adapter.ts new file mode 100644 index 00000000..896ef69f --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/notion.adapter.ts @@ -0,0 +1,160 @@ +import { readdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { + ChunkResult, + ClusterWorkUnitsContext, + DiffSet, + FetchContext, + IngestTrigger, + ScopeDescriptor, + SourceAdapter, + TriageSignals, + WorkUnit, +} from '../../types.js'; +import { chunkNotionStagedDir, describeNotionScope } from './chunk.js'; +import { clusterNotionWorkUnits } from './cluster.js'; +import { detectNotionStagedDir } from './detect.js'; +import { fetchNotionSnapshot } from './fetch.js'; +import { NotionClient } from './notion-client.js'; +import { parseNotionPullConfig } from './pull-config.js'; +import { type NotionMetadata, notionManifestSchema, notionMetadataSchema } from './types.js'; + +interface NotionPullSucceededContext { + connectionId: string; + sourceKey: string; + syncId: string; + trigger: IngestTrigger; + completedAt: Date; + stagedDir: string; + nextSuccessfulCursor: string | null; +} + +export interface NotionSourceAdapterDeps { + onPullSucceeded?: (ctx: NotionPullSucceededContext) => Promise; +} + +export class NotionSourceAdapter implements SourceAdapter { + readonly source = 'notion'; + readonly skillNames = ['notion_synthesize']; + readonly reconcileSkillNames: string[] = []; + readonly evidenceIndexing = 'documents' as const; + readonly triageSupported = true; + + constructor(private readonly deps: NotionSourceAdapterDeps = {}) {} + + detect(stagedDir: string): Promise { + return detectNotionStagedDir(stagedDir); + } + + async fetch(pullConfig: unknown, stagedDir: string, _ctx: FetchContext): Promise { + const config = parseNotionPullConfig(pullConfig); + await fetchNotionSnapshot({ client: new NotionClient(config.authToken), config, stagedDir }); + } + + chunk(stagedDir: string, diffSet?: DiffSet): Promise { + return chunkNotionStagedDir(stagedDir, diffSet); + } + + clusterWorkUnits(ctx: ClusterWorkUnitsContext): Promise { + return clusterNotionWorkUnits({ + workUnits: ctx.workUnits, + stagedDir: ctx.stagedDir, + embedding: ctx.embedding, + }); + } + + describeScope(stagedDir: string): Promise { + return describeNotionScope(stagedDir); + } + + async getTriageSignals(stagedDir: string, externalId: string): Promise { + const metadata = await this.findMetadataByExternalId(stagedDir, externalId); + if (!metadata) { + return {}; + } + + return { + parentType: this.parentType(metadata), + objectType: metadata.objectType, + isDateTitled: this.isDateLikeTitle(metadata.title), + lastEditedAt: metadata.lastEditedAt ?? undefined, + propertyHints: this.propertyHints(metadata.properties), + }; + } + + private async findMetadataByExternalId(stagedDir: string, externalId: string): Promise { + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + const metadataPaths = entries + .filter((entry) => entry.isFile() && entry.name === 'metadata.json') + .map((entry) => join(entry.parentPath, entry.name)) + .sort(); + + for (const metadataPath of metadataPaths) { + const metadata = notionMetadataSchema.parse(JSON.parse(await readFile(metadataPath, 'utf-8'))); + if (metadata.id === externalId) { + return metadata; + } + } + + return null; + } + + private parentType(metadata: NotionMetadata): string { + if (metadata.dataSourceId) { + return 'data_source_id'; + } + if (metadata.databaseId) { + return 'database_id'; + } + if (metadata.parentId) { + return 'page_id'; + } + return 'workspace'; + } + + private isDateLikeTitle(title: string): boolean { + const trimmed = title.trim(); + return ( + /^\d{4}-\d{2}-\d{2}$/.test(trimmed) || + /^\d{4}-\d{2}-\d{2}\b/.test(trimmed) || + /^\d{1,2}\/\d{1,2}\/\d{2,4}\b/.test(trimmed) || + (!Number.isNaN(Date.parse(trimmed)) && /\d{4}/.test(trimmed)) + ); + } + + private propertyHints(properties: Record): Record { + return Object.fromEntries( + Object.entries(properties) + .sort(([left], [right]) => left.localeCompare(right)) + .flatMap(([key, value]) => { + const hint = this.propertyHintValue(value); + return hint === null ? [] : [[key, hint]]; + }) + .slice(0, 8), + ); + } + + private propertyHintValue(value: unknown): string | null { + if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') { + return String(value); + } + if (value === null) { + return 'null'; + } + return null; + } + + async onPullSucceeded(ctx: { + connectionId: string; + sourceKey: string; + syncId: string; + trigger: IngestTrigger; + completedAt: Date; + stagedDir: string; + }): Promise { + const manifest = notionManifestSchema.parse( + JSON.parse(await readFile(join(ctx.stagedDir, 'manifest.json'), 'utf-8')), + ); + await this.deps.onPullSucceeded?.({ ...ctx, nextSuccessfulCursor: manifest.nextSuccessfulCursor }); + } +} diff --git a/packages/context/src/ingest/adapters/notion/pull-config.ts b/packages/context/src/ingest/adapters/notion/pull-config.ts new file mode 100644 index 00000000..f111378a --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/pull-config.ts @@ -0,0 +1,5 @@ +import { notionPullConfigSchema, type NotionPullConfig } from './types.js'; + +export function parseNotionPullConfig(raw: unknown): NotionPullConfig { + return notionPullConfigSchema.parse(raw); +} diff --git a/packages/context/src/ingest/adapters/notion/types.ts b/packages/context/src/ingest/adapters/notion/types.ts new file mode 100644 index 00000000..1ac272ae --- /dev/null +++ b/packages/context/src/ingest/adapters/notion/types.ts @@ -0,0 +1,84 @@ +import { z } from 'zod'; + +export const NOTION_API_VERSION = '2026-03-11'; +export const NOTION_SOURCE_KEY = 'notion'; + +export const notionPullConfigSchema = z.object({ + authToken: z.string().min(1), + crawlMode: z.enum(['all_accessible', 'selected_roots']), + rootPageIds: z.array(z.string().min(1)).default([]), + rootDatabaseIds: z.array(z.string().min(1)).default([]), + rootDataSourceIds: z.array(z.string().min(1)).default([]), + maxPagesPerRun: z.number().int().min(1).max(10_000).default(1000), + maxKnowledgeCreatesPerRun: z.number().int().min(0).max(25).default(5), + maxKnowledgeUpdatesPerRun: z.number().int().min(0).max(100).default(20), + lastSuccessfulCursor: z.string().nullable().default(null), +}); +export type NotionPullConfig = z.infer; + +export const notionCrawlCursorSchema = z + .discriminatedUnion('phase', [ + z.object({ phase: z.literal('all_accessible_pages'), cursor: z.string().nullable() }), + z.object({ phase: z.literal('all_accessible_data_sources'), cursor: z.string().nullable() }), + z.object({ + phase: z.literal('all_accessible_data_source_rows'), + dataSourceId: z.string(), + dataSourceSearchCursor: z.string().nullable(), + rowCursor: z.string().nullable(), + }), + ]) + .nullable(); +export type NotionCrawlCursor = z.infer; + +const notionObjectTypeSchema = z.enum(['page', 'database', 'data_source', 'data_source_row']); +export type NotionObjectType = z.infer; + +export const notionManifestSchema = z.object({ + source: z.literal(NOTION_SOURCE_KEY), + apiVersion: z.literal(NOTION_API_VERSION), + crawlMode: z.enum(['all_accessible', 'selected_roots']), + rootPageIds: z.array(z.string()), + rootDatabaseIds: z.array(z.string()), + rootDataSourceIds: z.array(z.string()), + fetchedAt: z.string().datetime(), + pageCount: z.number().int(), + databaseCount: z.number().int(), + dataSourceCount: z.number().int(), + capped: z.boolean().default(false), + continuedFromCursor: z.boolean().default(false), + partialSnapshot: z.boolean().default(false), + maxPagesPerRun: z.number().int(), + maxKnowledgeCreatesPerRun: z.number().int(), + maxKnowledgeUpdatesPerRun: z.number().int(), + nextSuccessfulCursor: z.string().nullable().default(null), + skipped: z.array(z.object({ externalId: z.string(), reason: z.string() })).default([]), + warnings: z.array(z.string()).default([]), +}); +export type NotionManifest = z.infer; + +export const notionMetadataSchema = z.object({ + objectType: notionObjectTypeSchema, + id: z.string(), + title: z.string(), + path: z.string(), + url: z.string().nullable().default(null), + parentId: z.string().nullable().default(null), + databaseId: z.string().nullable().default(null), + dataSourceId: z.string().nullable().default(null), + lastEditedAt: z.string().datetime().nullable().default(null), + lastEditedBy: z.string().nullable().default(null), + properties: z.record(z.string(), z.unknown()).default({}), +}); +export type NotionMetadata = z.infer; + +export interface NotionRichText { + plain_text?: string; + href?: string | null; +} + +export interface NotionBlock { + id: string; + type: string; + has_children?: boolean; + [key: string]: unknown; +} diff --git a/packages/context/src/ingest/canonical-pins.test.ts b/packages/context/src/ingest/canonical-pins.test.ts new file mode 100644 index 00000000..dec62360 --- /dev/null +++ b/packages/context/src/ingest/canonical-pins.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from 'vitest'; +import { buildCanonicalPinsPromptBlock, type CanonicalPin, selectRelevantCanonicalPins } from './canonical-pins.js'; +import type { StageIndex } from './stages/stage-index.types.js'; + +function makeStageIndex(): StageIndex { + return { + jobId: 'job-1', + connectionId: 'c1', + workUnits: [ + { + unitKey: 'wu-billing', + rawFiles: ['metrics/billing.yml'], + status: 'success', + actions: [ + { + target: 'sl', + type: 'created', + key: 'billing.churn_risk_score', + detail: 'captured churn risk from billing', + }, + ], + touchedSlSources: [{ connectionId: 'c1', sourceName: 'billing' }], + }, + ], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }; +} + +const pins: CanonicalPin[] = [ + { + contestedKey: 'churn_risk_score', + canonicalArtifactKey: 'billing.churn_risk_score', + pinnedAt: '2026-04-27T12:00:00.000Z', + pinnedBy: 'user-1', + reason: 'billing owns the contractual definition', + }, + { + contestedKey: 'gross_margin', + canonicalArtifactKey: 'finance.gross_margin', + pinnedAt: '2026-04-27T12:01:00.000Z', + pinnedBy: 'user-2', + reason: null, + }, +]; + +describe('canonical pins', () => { + it('selects only pins relevant to the current Stage Index', () => { + expect(selectRelevantCanonicalPins(makeStageIndex(), pins)).toEqual([pins[0]]); + }); + + it('keeps pins whose canonical artifact is mentioned even when contestedKey is absent', () => { + const stageIndex = makeStageIndex(); + stageIndex.workUnits[0].actions[0].key = 'finance.gross_margin'; + stageIndex.workUnits[0].actions[0].detail = 'refreshed margin'; + + expect(selectRelevantCanonicalPins(stageIndex, pins)).toEqual([pins[1]]); + }); + + it('formats a compact canonical_pins block for the reconciliation prompt', () => { + expect(buildCanonicalPinsPromptBlock([pins[0]])).toBe( + [ + '', + '- contestedKey: churn_risk_score', + ' canonicalArtifactKey: billing.churn_risk_score', + ' reason: billing owns the contractual definition', + '', + ].join('\n'), + ); + }); + + it('omits the block when no relevant pins exist', () => { + expect(buildCanonicalPinsPromptBlock([])).toBe(''); + }); +}); diff --git a/packages/context/src/ingest/canonical-pins.ts b/packages/context/src/ingest/canonical-pins.ts new file mode 100644 index 00000000..74b4e7a5 --- /dev/null +++ b/packages/context/src/ingest/canonical-pins.ts @@ -0,0 +1,66 @@ +import type { StageIndex } from './stages/stage-index.types.js'; + +export interface CanonicalPin { + contestedKey: string; + canonicalArtifactKey: string; + pinnedAt: string; + pinnedBy: string; + reason: string | null; +} + +function normalize(value: string): string { + return value.trim().toLowerCase(); +} + +function stageIndexSearchText(stageIndex: StageIndex): string { + const parts: string[] = [stageIndex.jobId]; + for (const wu of stageIndex.workUnits) { + parts.push( + wu.unitKey, + ...wu.rawFiles, + ...wu.touchedSlSources.flatMap((source) => [ + source.connectionId, + source.sourceName, + `${source.connectionId}:${source.sourceName}`, + ]), + ); + for (const action of wu.actions) { + parts.push( + action.target, + action.type, + action.key, + action.detail, + action.targetConnectionId ?? stageIndex.connectionId, + ); + } + } + return normalize(parts.join('\n')); +} + +export function selectRelevantCanonicalPins(stageIndex: StageIndex, pins: CanonicalPin[]): CanonicalPin[] { + if (pins.length === 0) { + return []; + } + const haystack = stageIndexSearchText(stageIndex); + return pins.filter((pin) => { + const contestedKey = normalize(pin.contestedKey); + const canonicalArtifactKey = normalize(pin.canonicalArtifactKey); + return haystack.includes(contestedKey) || haystack.includes(canonicalArtifactKey); + }); +} + +export function buildCanonicalPinsPromptBlock(pins: CanonicalPin[]): string { + if (pins.length === 0) { + return ''; + } + const lines = ['']; + for (const pin of pins) { + lines.push(`- contestedKey: ${pin.contestedKey}`); + lines.push(` canonicalArtifactKey: ${pin.canonicalArtifactKey}`); + if (pin.reason) { + lines.push(` reason: ${pin.reason}`); + } + } + lines.push(''); + return lines.join('\n'); +} diff --git a/packages/context/src/ingest/clustering/kmeans.test.ts b/packages/context/src/ingest/clustering/kmeans.test.ts new file mode 100644 index 00000000..3cda76d1 --- /dev/null +++ b/packages/context/src/ingest/clustering/kmeans.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, test } from 'vitest'; +import { kmeans, pickK } from './kmeans.js'; + +describe('pickK', () => { + test('uses ceil(N/8) heuristic clamped to [1, 10]', () => { + expect(pickK(0)).toBe(0); + expect(pickK(1)).toBe(1); + expect(pickK(8)).toBe(1); + expect(pickK(9)).toBe(2); + expect(pickK(24)).toBe(3); + expect(pickK(81)).toBe(10); + expect(pickK(1000)).toBe(10); + }); +}); + +describe('kmeans', () => { + test('separates two well-spaced gaussians', () => { + const points = [ + [0, 0], + [0.1, 0.1], + [-0.1, 0.05], + [10, 10], + [10.1, 9.9], + [9.95, 10.05], + ]; + const { assignments } = kmeans(points, 2, { seed: 42 }); + expect(assignments[0]).toBe(assignments[1]); + expect(assignments[0]).toBe(assignments[2]); + expect(assignments[3]).toBe(assignments[4]); + expect(assignments[3]).toBe(assignments[5]); + expect(assignments[0]).not.toBe(assignments[3]); + }); + + test('is deterministic with same seed', () => { + const points = Array.from({ length: 30 }, (_, i) => [Math.sin(i), Math.cos(i)]); + const a = kmeans(points, 4, { seed: 7 }).assignments; + const b = kmeans(points, 4, { seed: 7 }).assignments; + expect(a).toEqual(b); + }); + + test('k=1 puts everything in one cluster', () => { + const points = [ + [1, 0], + [0, 1], + [-1, 0], + [0, -1], + ]; + const { assignments } = kmeans(points, 1, { seed: 1 }); + expect(new Set(assignments).size).toBe(1); + }); + + test('k>=N produces N singleton clusters', () => { + const points = [ + [1, 0], + [0, 1], + [-1, 0], + ]; + const { assignments } = kmeans(points, 3, { seed: 1 }); + expect(new Set(assignments).size).toBe(3); + }); + + test('handles empty input', () => { + const { assignments, centroids } = kmeans([], 3, { seed: 1 }); + expect(assignments).toEqual([]); + expect(centroids).toEqual([]); + }); +}); diff --git a/packages/context/src/ingest/clustering/kmeans.ts b/packages/context/src/ingest/clustering/kmeans.ts new file mode 100644 index 00000000..b222413f --- /dev/null +++ b/packages/context/src/ingest/clustering/kmeans.ts @@ -0,0 +1,114 @@ +interface KmeansOptions { + seed?: number; + maxIters?: number; +} + +interface KmeansResult { + assignments: number[]; + centroids: number[][]; +} + +export function pickK(n: number): number { + if (n <= 0) return 0; + return Math.max(1, Math.min(10, Math.ceil(n / 8))); +} + +function mulberry32(seed: number): () => number { + let s = seed >>> 0; + return () => { + s = (s + 0x6d2b79f5) >>> 0; + let t = s; + t = Math.imul(t ^ (t >>> 15), t | 1); + t ^= t + Math.imul(t ^ (t >>> 7), t | 61); + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }; +} + +function distSq(a: number[], b: number[]): number { + let s = 0; + for (let i = 0; i < a.length; i += 1) { + const d = a[i] - b[i]; + s += d * d; + } + return s; +} + +function kMeansPlusPlusInit(points: number[][], k: number, rand: () => number): number[][] { + const centroids: number[][] = []; + const firstIdx = Math.floor(rand() * points.length); + centroids.push([...points[firstIdx]]); + while (centroids.length < k) { + const dists = points.map((p) => Math.min(...centroids.map((c) => distSq(p, c)))); + const total = dists.reduce((acc, d) => acc + d, 0); + if (total === 0) { + centroids.push([...points[Math.floor(rand() * points.length)]]); + continue; + } + let r = rand() * total; + let chosen = 0; + for (let i = 0; i < dists.length; i += 1) { + r -= dists[i]; + if (r <= 0) { + chosen = i; + break; + } + } + centroids.push([...points[chosen]]); + } + return centroids; +} + +export function kmeans(points: number[][], k: number, options: KmeansOptions = {}): KmeansResult { + const n = points.length; + if (n === 0 || k <= 0) return { assignments: [], centroids: [] }; + if (k >= n) { + return { + assignments: points.map((_, i) => i), + centroids: points.map((p) => [...p]), + }; + } + const rand = mulberry32(options.seed ?? 1); + const maxIters = options.maxIters ?? 50; + const centroids = kMeansPlusPlusInit(points, k, rand); + const dim = points[0].length; + const assignments = new Array(n).fill(0); + + for (let iter = 0; iter < maxIters; iter += 1) { + let changed = false; + for (let i = 0; i < n; i += 1) { + let bestK = 0; + let bestD = Infinity; + for (let c = 0; c < k; c += 1) { + const d = distSq(points[i], centroids[c]); + if (d < bestD) { + bestD = d; + bestK = c; + } + } + if (assignments[i] !== bestK) { + assignments[i] = bestK; + changed = true; + } + } + + const sums = Array.from({ length: k }, () => new Array(dim).fill(0)); + const counts = new Array(k).fill(0); + for (let i = 0; i < n; i += 1) { + const c = assignments[i]; + counts[c] += 1; + for (let d = 0; d < dim; d += 1) { + sums[c][d] += points[i][d]; + } + } + for (let c = 0; c < k; c += 1) { + if (counts[c] === 0) continue; + for (let d = 0; d < dim; d += 1) { + sums[c][d] /= counts[c]; + } + centroids[c] = sums[c]; + } + if (!changed) break; + } + + return { assignments, centroids }; +} diff --git a/packages/context/src/ingest/context-candidates/candidate-dedup.service.test.ts b/packages/context/src/ingest/context-candidates/candidate-dedup.service.test.ts new file mode 100644 index 00000000..a7b5520e --- /dev/null +++ b/packages/context/src/ingest/context-candidates/candidate-dedup.service.test.ts @@ -0,0 +1,268 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { ContextCandidateForDedup } from '../ports.js'; +import { CandidateDedupService } from './candidate-dedup.service.js'; +import type { ContextCandidateStorePort } from './store.js'; +import type { ContextCandidateEmbeddingPort } from './types.js'; + +const vector = (...values: number[]): string => JSON.stringify(values); + +const candidate = ( + overrides: Partial & { candidateKey: string }, +): ContextCandidateForDedup => ({ + id: `${overrides.candidateKey}-id`, + candidateKey: overrides.candidateKey, + topic: overrides.topic ?? overrides.candidateKey, + assertion: overrides.assertion ?? `Assertion for ${overrides.candidateKey}`, + promotionScore: overrides.promotionScore ?? 1, + createdAt: overrides.createdAt ?? new Date('2026-04-29T10:00:00.000Z'), + evidenceChunkIds: overrides.evidenceChunkIds ?? [], + evidenceRefs: overrides.evidenceRefs ?? [], + embedding: 'embedding' in overrides ? (overrides.embedding ?? null) : vector(1, 0, 0), + lane: overrides.lane ?? null, +}); + +function buildHarness( + overrides: { + enabled?: boolean; + threshold?: number; + scoreAggregation?: 'max' | 'mean' | 'sum'; + candidates?: ContextCandidateForDedup[]; + } = {}, +) { + const store = { + listPendingCandidatesForDedup: vi.fn().mockResolvedValue(overrides.candidates ?? []), + updateCandidateEmbedding: vi.fn().mockResolvedValue(undefined), + markCandidatesAsMergedToCluster: vi.fn().mockResolvedValue(undefined), + listBudgetExhaustedCandidatesForCarryForward: vi.fn(), + listCurrentRunEvidenceChunksForCarryForward: vi.fn(), + insertCandidate: vi.fn(), + }; + const embeddings = { + maxBatchSize: 100, + computeEmbedding: vi.fn(), + computeEmbeddingsBulk: vi.fn(), + }; + const service = new CandidateDedupService({ + store: store as unknown as ContextCandidateStorePort, + embeddings: embeddings as unknown as ContextCandidateEmbeddingPort, + settings: { + enabled: overrides.enabled ?? true, + topicSimilarityThreshold: overrides.threshold ?? 0.85, + scoreAggregation: overrides.scoreAggregation ?? 'max', + }, + }); + + return { service, store, embeddings }; +} + +describe('CandidateDedupService', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('returns raw representatives without writes when dedup is disabled', async () => { + const first = candidate({ candidateKey: 'first', embedding: vector(1, 0, 0) }); + const duplicate = candidate({ candidateKey: 'duplicate', embedding: vector(0.99, 0.01, 0) }); + const { service, store, embeddings } = buildHarness({ + enabled: false, + candidates: [first, duplicate], + }); + + const result = await service.deduplicateRun('run-1'); + + expect(result).toMatchObject({ + enabled: false, + candidatesIn: 2, + clustersOut: 2, + mergedCount: 0, + largestClusterSize: 1, + embeddingFailures: 0, + }); + expect(result.representatives.map((item) => item.candidateKey)).toEqual(['first', 'duplicate']); + expect(store.markCandidatesAsMergedToCluster).not.toHaveBeenCalled(); + expect(embeddings.computeEmbeddingsBulk).not.toHaveBeenCalled(); + }); + + it('clusters near duplicates and persists representative evidence unions', async () => { + const rep = candidate({ + candidateKey: 'icp-primary', + topic: 'ICP', + assertion: 'Finance operators are the ICP.', + promotionScore: 11, + evidenceChunkIds: ['00000000-0000-0000-0000-000000000001'], + evidenceRefs: [{ stableCitationKey: 'icp-a', rawPath: 'pages/a/page.md' }], + embedding: vector(1, 0, 0), + }); + const duplicate = candidate({ + candidateKey: 'icp-duplicate', + topic: 'Ideal customer profile', + assertion: 'The ICP is finance teams.', + promotionScore: 7, + evidenceChunkIds: ['00000000-0000-0000-0000-000000000002'], + evidenceRefs: [{ stableCitationKey: 'icp-b', rawPath: 'pages/b/page.md' }], + embedding: vector(0.99, 0.02, 0), + }); + const unique = candidate({ + candidateKey: 'pricing-policy', + promotionScore: 6, + evidenceChunkIds: ['00000000-0000-0000-0000-000000000003'], + evidenceRefs: [{ stableCitationKey: 'price-a', rawPath: 'pages/pricing/page.md' }], + embedding: vector(0, 1, 0), + }); + const { service, store } = buildHarness({ candidates: [rep, duplicate, unique] }); + + const result = await service.deduplicateRun('run-1'); + + expect(result).toMatchObject({ + enabled: true, + candidatesIn: 3, + clustersOut: 2, + mergedCount: 1, + largestClusterSize: 2, + embeddingFailures: 0, + }); + expect(result.representatives.map((item) => item.candidateKey)).toEqual(['icp-primary', 'pricing-policy']); + expect(store.markCandidatesAsMergedToCluster).toHaveBeenCalledWith({ + representativeId: rep.id, + memberIds: [duplicate.id], + evidenceChunkIds: ['00000000-0000-0000-0000-000000000001', '00000000-0000-0000-0000-000000000002'], + evidenceRefs: [ + { stableCitationKey: 'icp-a', rawPath: 'pages/a/page.md' }, + { stableCitationKey: 'icp-b', rawPath: 'pages/b/page.md' }, + ], + promotionScore: 11, + }); + }); + + it('uses the configured similarity threshold', async () => { + const base = candidate({ candidateKey: 'base', embedding: vector(1, 0, 0), promotionScore: 5 }); + const borderline = candidate({ candidateKey: 'borderline', embedding: vector(0.8, 0.6, 0), promotionScore: 4 }); + + const strict = buildHarness({ candidates: [base, borderline], threshold: 0.95 }); + const strictResult = await strict.service.deduplicateRun('run-1'); + expect(strictResult.clustersOut).toBe(2); + expect(strict.store.markCandidatesAsMergedToCluster).not.toHaveBeenCalled(); + + const loose = buildHarness({ candidates: [base, borderline], threshold: 0.75 }); + const looseResult = await loose.service.deduplicateRun('run-1'); + expect(looseResult.clustersOut).toBe(1); + expect(loose.store.markCandidatesAsMergedToCluster).toHaveBeenCalledTimes(1); + }); + + it('fills missing embeddings in batches and persists them before clustering', async () => { + const first = candidate({ candidateKey: 'missing-a', embedding: null }); + const second = candidate({ candidateKey: 'missing-b', embedding: null }); + const { service, store, embeddings } = buildHarness({ candidates: [first, second] }); + embeddings.computeEmbeddingsBulk.mockResolvedValueOnce([ + [1, 0, 0], + [0, 1, 0], + ]); + + const result = await service.deduplicateRun('run-1'); + + expect(result.embeddingFailures).toBe(0); + expect(embeddings.computeEmbeddingsBulk).toHaveBeenCalledWith([ + 'missing-a - Assertion for missing-a', + 'missing-b - Assertion for missing-b', + ]); + expect(store.updateCandidateEmbedding).toHaveBeenCalledWith(first.id, [1, 0, 0]); + expect(store.updateCandidateEmbedding).toHaveBeenCalledWith(second.id, [0, 1, 0]); + }); + + it('isolates a single embedding failure and keeps that candidate as a singleton', async () => { + const first = candidate({ candidateKey: 'embed-ok', embedding: null }); + const second = candidate({ candidateKey: 'embed-fail', embedding: null }); + const { service, store, embeddings } = buildHarness({ candidates: [first, second] }); + embeddings.computeEmbeddingsBulk.mockRejectedValueOnce(new Error('bulk provider unavailable')); + embeddings.computeEmbedding + .mockResolvedValueOnce([1, 0, 0]) + .mockRejectedValueOnce(new Error('single candidate failed')); + + const result = await service.deduplicateRun('run-1'); + + expect(result.embeddingFailures).toBe(1); + expect(result.clustersOut).toBe(2); + expect(result.warnings).toEqual( + expect.arrayContaining([ + expect.stringContaining( + 'embedding bulk failed: bulk provider unavailable; falling back to per-candidate embedding for 2 candidates', + ), + expect.stringContaining('Embedding failed for candidate embed-fail'), + ]), + ); + expect(store.updateCandidateEmbedding).toHaveBeenCalledTimes(1); + expect(store.updateCandidateEmbedding).toHaveBeenCalledWith(first.id, [1, 0, 0]); + }); + + it('applies mean and sum score aggregation modes', async () => { + const rep = candidate({ candidateKey: 'score-rep', promotionScore: 9, embedding: vector(1, 0, 0) }); + const duplicate = candidate({ + candidateKey: 'score-duplicate', + promotionScore: 3, + embedding: vector(0.99, 0.02, 0), + }); + + const mean = buildHarness({ candidates: [rep, duplicate], scoreAggregation: 'mean' }); + await mean.service.deduplicateRun('run-1'); + expect(mean.store.markCandidatesAsMergedToCluster).toHaveBeenCalledWith( + expect.objectContaining({ promotionScore: 6 }), + ); + + const sum = buildHarness({ candidates: [rep, duplicate], scoreAggregation: 'sum' }); + await sum.service.deduplicateRun('run-1'); + expect(sum.store.markCandidatesAsMergedToCluster).toHaveBeenCalledWith( + expect.objectContaining({ promotionScore: 12 }), + ); + }); + + it('rounds mean score aggregation for the integer promotion score column', async () => { + const rep = candidate({ candidateKey: 'rounded-rep', promotionScore: 10, embedding: vector(1, 0, 0) }); + const duplicate = candidate({ + candidateKey: 'rounded-duplicate', + promotionScore: 7, + embedding: vector(0.99, 0.02, 0), + }); + const { service, store } = buildHarness({ candidates: [rep, duplicate], scoreAggregation: 'mean' }); + + await service.deduplicateRun('run-1'); + + expect(store.markCandidatesAsMergedToCluster).toHaveBeenCalledWith(expect.objectContaining({ promotionScore: 9 })); + }); + + it('is a no-op on a rerun after non-representatives are already merged', async () => { + const rep = candidate({ candidateKey: 'rerun-rep', promotionScore: 9, embedding: vector(1, 0, 0) }); + const duplicate = candidate({ + candidateKey: 'rerun-duplicate', + promotionScore: 3, + embedding: vector(0.99, 0.02, 0), + }); + const { service, store } = buildHarness(); + store.listPendingCandidatesForDedup.mockResolvedValueOnce([rep, duplicate]).mockResolvedValueOnce([rep]); + + const first = await service.deduplicateRun('run-1'); + const second = await service.deduplicateRun('run-1'); + + expect(first.mergedCount).toBe(1); + expect(second.mergedCount).toBe(0); + expect(second.clustersOut).toBe(1); + expect(store.markCandidatesAsMergedToCluster).toHaveBeenCalledTimes(1); + }); + + it('returns raw candidates with a warning when cluster persistence throws', async () => { + const rep = candidate({ candidateKey: 'persist-rep', promotionScore: 9, embedding: vector(1, 0, 0) }); + const duplicate = candidate({ + candidateKey: 'persist-duplicate', + promotionScore: 3, + embedding: vector(0.99, 0.02, 0), + }); + const { service, store } = buildHarness({ candidates: [rep, duplicate] }); + store.markCandidatesAsMergedToCluster.mockRejectedValueOnce(new Error('database unavailable')); + + const result = await service.deduplicateRun('run-1'); + + expect(result.clustersOut).toBe(2); + expect(result.mergedCount).toBe(0); + expect(result.representatives.map((item) => item.candidateKey)).toEqual(['persist-rep', 'persist-duplicate']); + expect(result.warnings).toEqual([expect.stringContaining('Dedup failed for run run-1')]); + }); +}); diff --git a/packages/context/src/ingest/context-candidates/candidate-dedup.service.ts b/packages/context/src/ingest/context-candidates/candidate-dedup.service.ts new file mode 100644 index 00000000..57d4404d --- /dev/null +++ b/packages/context/src/ingest/context-candidates/candidate-dedup.service.ts @@ -0,0 +1,315 @@ +import { type KloLogger, noopLogger } from '../../core/index.js'; +import type { CandidateDedupResult, ContextCandidateForDedup, JsonValue } from '../ports.js'; +import { buildContextCandidateEmbeddingText } from './embedding-text.js'; +import type { ContextCandidateStorePort } from './store.js'; +import type { CandidateDedupSettings, ContextCandidateEmbeddingPort } from './types.js'; + +interface CandidateWithVector extends ContextCandidateForDedup { + embeddingVector: number[] | null; +} + +interface CandidateCluster { + representative: CandidateWithVector; + members: CandidateWithVector[]; +} + +export interface CandidateDedupServiceDeps { + store: ContextCandidateStorePort; + embeddings: ContextCandidateEmbeddingPort; + settings: CandidateDedupSettings; + logger?: KloLogger; +} + +export class CandidateDedupService { + private readonly logger: KloLogger; + + constructor(private readonly deps: CandidateDedupServiceDeps) { + this.logger = deps.logger ?? noopLogger; + } + + async deduplicateRun(runId: string): Promise { + const candidates = await this.deps.store.listPendingCandidatesForDedup(runId); + const config = this.deps.settings; + + if (!config.enabled) { + return this.rawResult(candidates, false, [], 0); + } + + try { + const prepared = await this.prepareEmbeddings(candidates); + const clusters = this.clusterCandidates(prepared.candidates, config.topicSimilarityThreshold); + const effectiveScores = await this.persistClusters(clusters, config.scoreAggregation); + + const mergedCount = clusters.reduce((sum, cluster) => sum + Math.max(cluster.members.length - 1, 0), 0); + const largestClusterSize = clusters.reduce((max, cluster) => Math.max(max, cluster.members.length), 0); + const representatives = clusters + .map((cluster) => { + const representative = this.stripVector(cluster.representative); + return { + ...representative, + promotionScore: effectiveScores.get(cluster.representative.id) ?? representative.promotionScore, + }; + }) + .sort((left, right) => { + if (right.promotionScore !== left.promotionScore) { + return right.promotionScore - left.promotionScore; + } + return left.createdAt.getTime() - right.createdAt.getTime(); + }); + + this.logger.log( + `Dedup: ${candidates.length} candidates -> ${representatives.length} clusters (largest cluster ${largestClusterSize} members)`, + ); + + return { + enabled: true, + candidatesIn: candidates.length, + clustersOut: representatives.length, + mergedCount, + largestClusterSize, + embeddingFailures: prepared.embeddingFailures, + representatives, + warnings: prepared.warnings, + }; + } catch (error) { + const message = `Dedup failed for run ${runId}: ${error instanceof Error ? error.message : String(error)}`; + this.logger.warn(message); + return this.rawResult(candidates, true, [message], 0); + } + } + + private async prepareEmbeddings(candidates: ContextCandidateForDedup[]): Promise<{ + candidates: CandidateWithVector[]; + embeddingFailures: number; + warnings: string[]; + }> { + const prepared = candidates.map((candidate) => ({ + ...candidate, + embeddingVector: this.parseEmbedding(candidate.embedding), + })); + const missing = prepared.filter((candidate) => candidate.embeddingVector === null); + const warnings: string[] = []; + let embeddingFailures = 0; + + for (let i = 0; i < missing.length; i += this.deps.embeddings.maxBatchSize) { + const batch = missing.slice(i, i + this.deps.embeddings.maxBatchSize); + const texts = batch.map((candidate) => buildContextCandidateEmbeddingText(candidate)); + + try { + const embeddings = await this.deps.embeddings.computeEmbeddingsBulk(texts); + if (embeddings.length !== batch.length) { + throw new Error(`expected ${batch.length} embeddings, got ${embeddings.length}`); + } + + for (let index = 0; index < batch.length; index++) { + batch[index].embeddingVector = embeddings[index]; + await this.deps.store.updateCandidateEmbedding(batch[index].id, embeddings[index]); + } + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + warnings.push( + `embedding bulk failed: ${reason}; falling back to per-candidate embedding for ${batch.length} candidates`, + ); + + for (const candidate of batch) { + try { + const embedding = await this.deps.embeddings.computeEmbedding( + buildContextCandidateEmbeddingText(candidate), + ); + candidate.embeddingVector = embedding; + await this.deps.store.updateCandidateEmbedding(candidate.id, embedding); + } catch (singleError) { + embeddingFailures += 1; + warnings.push( + `Embedding failed for candidate ${candidate.candidateKey}: ${ + singleError instanceof Error ? singleError.message : String(singleError) + }`, + ); + } + } + } + } + + return { candidates: prepared, embeddingFailures, warnings }; + } + + private clusterCandidates(candidates: CandidateWithVector[], threshold: number): CandidateCluster[] { + const clusters: CandidateCluster[] = []; + const sorted = [...candidates].sort((left, right) => { + if (right.promotionScore !== left.promotionScore) { + return right.promotionScore - left.promotionScore; + } + return left.createdAt.getTime() - right.createdAt.getTime(); + }); + + for (const candidate of sorted) { + if (!candidate.embeddingVector) { + clusters.push({ representative: candidate, members: [candidate] }); + continue; + } + + const match = clusters.find( + (cluster) => + cluster.representative.embeddingVector && + candidate.embeddingVector && + this.cosine(candidate.embeddingVector, cluster.representative.embeddingVector) >= threshold, + ); + + if (match) { + match.members.push(candidate); + } else { + clusters.push({ representative: candidate, members: [candidate] }); + } + } + + return clusters; + } + + private async persistClusters( + clusters: CandidateCluster[], + scoreAggregation: 'max' | 'mean' | 'sum', + ): Promise> { + const effectiveScores = new Map(); + + for (const cluster of clusters) { + if (cluster.members.length <= 1) { + effectiveScores.set(cluster.representative.id, cluster.representative.promotionScore); + continue; + } + + const promotionScore = this.aggregateScore(cluster.members, scoreAggregation); + effectiveScores.set(cluster.representative.id, promotionScore); + + await this.deps.store.markCandidatesAsMergedToCluster({ + representativeId: cluster.representative.id, + memberIds: cluster.members.slice(1).map((member) => member.id), + evidenceChunkIds: this.unionEvidenceChunkIds(cluster.members), + evidenceRefs: this.unionEvidenceRefs(cluster.members), + promotionScore, + }); + } + + return effectiveScores; + } + + private parseEmbedding(value: string | null): number[] | null { + if (!value) { + return null; + } + + try { + const parsed = JSON.parse(value); + if (Array.isArray(parsed) && parsed.every((item) => typeof item === 'number')) { + return parsed; + } + } catch { + return null; + } + + return null; + } + + private cosine(left: number[], right: number[]): number { + let dot = 0; + let leftNorm = 0; + let rightNorm = 0; + const length = Math.min(left.length, right.length); + + for (let i = 0; i < length; i++) { + dot += left[i] * right[i]; + leftNorm += left[i] * left[i]; + rightNorm += right[i] * right[i]; + } + + if (leftNorm === 0 || rightNorm === 0) { + return 0; + } + + return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm)); + } + + private unionEvidenceChunkIds(members: CandidateWithVector[]): string[] { + const seen = new Set(); + for (const member of members) { + for (const chunkId of member.evidenceChunkIds) { + seen.add(chunkId); + } + } + return [...seen]; + } + + private unionEvidenceRefs(members: CandidateWithVector[]): JsonValue { + const refs: JsonValue[] = []; + const seen = new Set(); + + for (const member of members) { + if (!Array.isArray(member.evidenceRefs)) { + continue; + } + + for (const ref of member.evidenceRefs) { + const key = this.evidenceRefKey(ref); + if (!seen.has(key)) { + seen.add(key); + refs.push(ref); + } + } + } + + return refs; + } + + private evidenceRefKey(ref: JsonValue): string { + if (ref && typeof ref === 'object' && !Array.isArray(ref)) { + const record = ref as Record; + if (typeof record.stableCitationKey === 'string') { + return `stable:${record.stableCitationKey}`; + } + if (typeof record.chunkId === 'string') { + return `chunk:${record.chunkId}`; + } + if (typeof record.rawPath === 'string') { + return `raw:${record.rawPath}`; + } + } + + return JSON.stringify(ref); + } + + private aggregateScore(members: CandidateWithVector[], mode: 'max' | 'mean' | 'sum'): number { + const scores = members.map((member) => member.promotionScore); + + if (mode === 'sum') { + return scores.reduce((sum, score) => sum + score, 0); + } + + if (mode === 'mean') { + return Math.round(scores.reduce((sum, score) => sum + score, 0) / scores.length); + } + + return Math.max(...scores); + } + + private rawResult( + candidates: ContextCandidateForDedup[], + enabled: boolean, + warnings: string[], + embeddingFailures: number, + ): CandidateDedupResult { + return { + enabled, + candidatesIn: candidates.length, + clustersOut: candidates.length, + mergedCount: 0, + largestClusterSize: candidates.length > 0 ? 1 : 0, + embeddingFailures, + representatives: candidates, + warnings, + }; + } + + private stripVector(candidate: CandidateWithVector): ContextCandidateForDedup { + const { embeddingVector: _embeddingVector, ...rest } = candidate; + return rest; + } +} diff --git a/packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.test.ts b/packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.test.ts new file mode 100644 index 00000000..df452ca7 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.test.ts @@ -0,0 +1,183 @@ +import { createHash } from 'node:crypto'; +import { describe, expect, it, vi } from 'vitest'; +import { ContextCandidateCarryforwardService } from './context-candidate-carryforward.service.js'; +import type { ContextCandidateStorePort } from './store.js'; +import type { BudgetExhaustedCandidateForCarryForward, CurrentRunEvidenceChunkForCarryForward } from './types.js'; + +function candidate( + overrides: Partial = {}, +): BudgetExhaustedCandidateForCarryForward { + return { + sourceRunId: 'prior-run-1', + candidateKey: 'budget-revenue-policy', + topic: 'Revenue policy', + assertion: 'Booked revenue excludes refunds.', + rationale: 'The finance handbook states this reusable rule.', + evidenceChunkIds: ['prior-chunk-1'], + evidenceRefs: [ + { + chunkId: 'prior-chunk-1', + stableCitationKey: 'notion:page-1:revenue-policy', + syncId: 'sync-prior', + rawPath: 'pages/page-1/page.md', + }, + ], + suggestedPageKey: 'revenue-policy', + actionHint: 'create', + durabilityScore: 3, + authorityScore: 3, + reuseScore: 3, + noveltyScore: 2, + riskScore: 1, + promotionScore: 10, + lane: 'full', + ...overrides, + }; +} + +function chunk( + overrides: Partial = {}, +): CurrentRunEvidenceChunkForCarryForward { + return { + chunkId: 'current-chunk-1', + stableCitationKey: 'notion:page-1:revenue-policy', + syncId: 'sync-current', + rawPath: 'pages/page-1/page.md', + title: 'Revenue Policy', + path: 'Company / Revenue Policy', + url: 'https://notion.example/page-1', + lastEditedAt: new Date('2026-04-30T12:00:00.000Z'), + citation: { + source: 'notion', + pageId: 'page-1', + syncId: 'sync-current', + rawPath: 'pages/page-1/page.md', + }, + content: 'Booked revenue excludes refunds and test accounts.', + ...overrides, + }; +} + +function buildHarness(reExamineBudgetExhaustedOnRerun: boolean) { + const store = { + listPendingCandidatesForDedup: vi.fn(), + updateCandidateEmbedding: vi.fn(), + markCandidatesAsMergedToCluster: vi.fn(), + listBudgetExhaustedCandidatesForCarryForward: vi.fn(), + listCurrentRunEvidenceChunksForCarryForward: vi.fn(), + insertCandidate: vi.fn().mockResolvedValue({ id: 'new-candidate-1' }), + }; + const service = new ContextCandidateCarryforwardService({ + store: store as unknown as ContextCandidateStorePort, + settings: { reExamineBudgetExhaustedOnRerun }, + }); + return { service, store }; +} + +describe('ContextCandidateCarryforwardService', () => { + it('carries a prior budget-exhausted candidate with remapped current evidence', async () => { + const { service, store } = buildHarness(true); + store.listBudgetExhaustedCandidatesForCarryForward.mockResolvedValueOnce([candidate()]); + store.listCurrentRunEvidenceChunksForCarryForward.mockResolvedValueOnce([chunk()]); + + const result = await service.carryForward({ + runId: 'current-run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + }); + + expect(result).toMatchObject({ + considered: 1, + carriedForward: 1, + skippedNotReemitted: 0, + remappedEvidenceRefs: 1, + staleEvidenceRefs: 0, + }); + expect(store.insertCandidate).toHaveBeenCalledWith( + expect.objectContaining({ + runId: 'current-run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + candidateKey: 'budget-revenue-policy', + evidenceChunkIds: ['current-chunk-1'], + evidenceRefs: [ + expect.objectContaining({ + chunkId: 'current-chunk-1', + stableCitationKey: 'notion:page-1:revenue-policy', + syncId: 'sync-current', + snippetHash: createHash('sha256') + .update('Booked revenue excludes refunds and test accounts.') + .digest('hex'), + }), + ], + status: 'pending', + rejectionReason: null, + lane: 'full', + embedding: null, + }), + ); + expect(result.warnings).toEqual(['Re-examined 1 prior budget-exhausted context candidate.']); + }); + + it('skips stale prior candidates when config requires current evidence re-emission', async () => { + const { service, store } = buildHarness(false); + store.listBudgetExhaustedCandidatesForCarryForward.mockResolvedValueOnce([candidate()]); + store.listCurrentRunEvidenceChunksForCarryForward.mockResolvedValueOnce([]); + + const result = await service.carryForward({ + runId: 'current-run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + }); + + expect(result).toMatchObject({ + considered: 1, + carriedForward: 0, + skippedNotReemitted: 1, + remappedEvidenceRefs: 0, + staleEvidenceRefs: 0, + }); + expect(store.insertCandidate).not.toHaveBeenCalled(); + expect(result.warnings).toEqual([ + 'Skipped 1 budget-exhausted context candidate because its evidence was not re-emitted in this run.', + ]); + }); + + it('carries stale prior evidence when reExamineBudgetExhaustedOnRerun is enabled', async () => { + const { service, store } = buildHarness(true); + store.listBudgetExhaustedCandidatesForCarryForward.mockResolvedValueOnce([candidate()]); + store.listCurrentRunEvidenceChunksForCarryForward.mockResolvedValueOnce([]); + + const result = await service.carryForward({ + runId: 'current-run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + }); + + expect(result).toMatchObject({ + considered: 1, + carriedForward: 1, + skippedNotReemitted: 0, + remappedEvidenceRefs: 0, + staleEvidenceRefs: 1, + }); + expect(store.insertCandidate).toHaveBeenCalledWith( + expect.objectContaining({ + runId: 'current-run-1', + evidenceChunkIds: ['prior-chunk-1'], + evidenceRefs: [ + { + chunkId: 'prior-chunk-1', + stableCitationKey: 'notion:page-1:revenue-policy', + syncId: 'sync-prior', + rawPath: 'pages/page-1/page.md', + }, + ], + }), + ); + expect(result.warnings).toEqual([ + 'Re-examined 1 prior budget-exhausted context candidate.', + 'Carried 1 budget-exhausted evidence ref without a current-run chunk remap.', + ]); + }); +}); diff --git a/packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.ts b/packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.ts new file mode 100644 index 00000000..5150f517 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/context-candidate-carryforward.service.ts @@ -0,0 +1,195 @@ +import { createHash } from 'node:crypto'; +import { type KloLogger, noopLogger } from '../../core/index.js'; +import type { JsonValue } from '../ports.js'; +import type { ContextCandidateStorePort } from './store.js'; +import type { + BudgetExhaustedCandidateForCarryForward, + ContextCandidateCarryforwardSettings, + CurrentRunEvidenceChunkForCarryForward, +} from './types.js'; + +export interface ContextCandidateCarryforwardArgs { + runId: string; + connectionId: string; + sourceKey: string; +} + +export interface ContextCandidateCarryforwardResult { + considered: number; + carriedForward: number; + skippedNotReemitted: number; + remappedEvidenceRefs: number; + staleEvidenceRefs: number; + warnings: string[]; +} + +export interface ContextCandidateCarryforwardServiceDeps { + store: ContextCandidateStorePort; + settings: ContextCandidateCarryforwardSettings; + logger?: KloLogger; +} + +export class ContextCandidateCarryforwardService { + private readonly logger: KloLogger; + + constructor(private readonly deps: ContextCandidateCarryforwardServiceDeps) { + this.logger = deps.logger ?? noopLogger; + } + + async carryForward(args: ContextCandidateCarryforwardArgs): Promise { + const candidates = await this.deps.store.listBudgetExhaustedCandidatesForCarryForward({ + connectionId: args.connectionId, + sourceKey: args.sourceKey, + currentRunId: args.runId, + }); + const chunks = await this.deps.store.listCurrentRunEvidenceChunksForCarryForward(args.runId); + const chunksByStableKey = new Map(chunks.map((chunk) => [chunk.stableCitationKey, chunk])); + const allowStaleEvidence = this.deps.settings.reExamineBudgetExhaustedOnRerun; + + let carriedForward = 0; + let skippedNotReemitted = 0; + let remappedEvidenceRefs = 0; + let staleEvidenceRefs = 0; + + for (const candidate of candidates) { + const remap = this.remapEvidence(candidate, chunksByStableKey); + if (remap.remappedCount === 0 && !allowStaleEvidence) { + skippedNotReemitted += 1; + continue; + } + + await this.deps.store.insertCandidate({ + runId: args.runId, + connectionId: args.connectionId, + sourceKey: args.sourceKey, + candidateKey: candidate.candidateKey, + topic: candidate.topic, + assertion: candidate.assertion, + rationale: candidate.rationale, + evidenceChunkIds: remap.evidenceChunkIds, + evidenceRefs: remap.evidenceRefs, + suggestedPageKey: candidate.suggestedPageKey, + actionHint: candidate.actionHint, + durabilityScore: candidate.durabilityScore, + authorityScore: candidate.authorityScore, + reuseScore: candidate.reuseScore, + noveltyScore: candidate.noveltyScore, + riskScore: candidate.riskScore, + promotionScore: candidate.promotionScore, + status: 'pending', + rejectionReason: null, + lane: candidate.lane, + embedding: null, + }); + + carriedForward += 1; + remappedEvidenceRefs += remap.remappedCount; + staleEvidenceRefs += remap.staleCount; + } + + const warnings = this.buildWarnings({ carriedForward, skippedNotReemitted, staleEvidenceRefs }); + if (carriedForward > 0 || skippedNotReemitted > 0) { + this.logger.log( + `Budget carryforward: considered ${candidates.length}, carried ${carriedForward}, skipped ${skippedNotReemitted}`, + ); + } + + return { + considered: candidates.length, + carriedForward, + skippedNotReemitted, + remappedEvidenceRefs, + staleEvidenceRefs, + warnings, + }; + } + + private remapEvidence( + candidate: BudgetExhaustedCandidateForCarryForward, + chunksByStableKey: Map, + ): { evidenceChunkIds: string[]; evidenceRefs: JsonValue; remappedCount: number; staleCount: number } { + const refs = Array.isArray(candidate.evidenceRefs) ? candidate.evidenceRefs : []; + const remappedRefs: JsonValue[] = []; + const remappedChunkIds: string[] = []; + + for (const ref of refs) { + const stableKey = this.stableCitationKey(ref); + const currentChunk = stableKey ? chunksByStableKey.get(stableKey) : undefined; + if (!currentChunk) { + continue; + } + + remappedChunkIds.push(currentChunk.chunkId); + remappedRefs.push(this.currentEvidenceRef(currentChunk)); + } + + if (remappedRefs.length > 0) { + return { + evidenceChunkIds: [...new Set(remappedChunkIds)], + evidenceRefs: remappedRefs, + remappedCount: remappedRefs.length, + staleCount: 0, + }; + } + + return { + evidenceChunkIds: candidate.evidenceChunkIds, + evidenceRefs: candidate.evidenceRefs, + remappedCount: 0, + staleCount: refs.length, + }; + } + + private stableCitationKey(ref: JsonValue): string | null { + if (!ref || typeof ref !== 'object' || Array.isArray(ref)) { + return null; + } + const value = (ref as Record).stableCitationKey; + return typeof value === 'string' ? value : null; + } + + private currentEvidenceRef(chunk: CurrentRunEvidenceChunkForCarryForward): JsonValue { + return { + chunkId: chunk.chunkId, + stableCitationKey: chunk.stableCitationKey, + syncId: chunk.syncId, + rawPath: chunk.rawPath, + title: chunk.title, + path: chunk.path, + url: chunk.url, + lastEditedAt: chunk.lastEditedAt?.toISOString() ?? null, + snippetHash: createHash('sha256').update(chunk.content).digest('hex'), + citation: chunk.citation, + }; + } + + private buildWarnings(params: { + carriedForward: number; + skippedNotReemitted: number; + staleEvidenceRefs: number; + }): string[] { + const warnings: string[] = []; + if (params.carriedForward > 0) { + warnings.push( + `Re-examined ${params.carriedForward} prior budget-exhausted context candidate${ + params.carriedForward === 1 ? '' : 's' + }.`, + ); + } + if (params.skippedNotReemitted > 0) { + warnings.push( + `Skipped ${params.skippedNotReemitted} budget-exhausted context candidate${ + params.skippedNotReemitted === 1 ? '' : 's' + } because its evidence was not re-emitted in this run.`, + ); + } + if (params.staleEvidenceRefs > 0) { + warnings.push( + `Carried ${params.staleEvidenceRefs} budget-exhausted evidence ref${ + params.staleEvidenceRefs === 1 ? '' : 's' + } without a current-run chunk remap.`, + ); + } + return warnings; + } +} diff --git a/packages/context/src/ingest/context-candidates/curator-pagination.service.test.ts b/packages/context/src/ingest/context-candidates/curator-pagination.service.test.ts new file mode 100644 index 00000000..bf1876a3 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/curator-pagination.service.test.ts @@ -0,0 +1,196 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ContextCandidateForDedup } from '../ports.js'; +import { type CuratorPaginationInput, CuratorPaginationService } from './curator-pagination.service.js'; +import type { ContextCandidateStorePort } from './store.js'; + +const candidate = (key: string, score: number): ContextCandidateForDedup => ({ + id: `id-${key}`, + candidateKey: key, + topic: `Topic ${key}`, + assertion: `Assertion ${key}`, + promotionScore: score, + createdAt: new Date(`2026-04-30T10:0${score % 10}:00.000Z`), + evidenceChunkIds: [`chunk-${key}`], + evidenceRefs: [{ stableCitationKey: `ref-${key}` }], + embedding: null, + lane: 'full', +}); + +const promptCandidate = (key: string, score: number) => ({ + candidateKey: key, + topic: `Topic ${key}`, + assertion: `Assertion ${key}`, + rationale: `Rationale ${key}`, + actionHint: 'create', + status: 'pending', + promotionScore: score, + suggestedPageKey: `page-${key}`, + evidenceRefs: [{ stableCitationKey: `ref-${key}` }], +}); + +function makeHarness(overrides: Partial<{ batchSize: number; maxPasses: number; stepBudgetPerPass: number }> = {}) { + const store = { + listPendingCandidatesForDedup: vi.fn().mockResolvedValue([]), + updateCandidateEmbedding: vi.fn().mockResolvedValue(undefined), + markCandidatesAsMergedToCluster: vi.fn().mockResolvedValue(undefined), + listBudgetExhaustedCandidatesForCarryForward: vi.fn().mockResolvedValue([]), + listCurrentRunEvidenceChunksForCarryForward: vi.fn().mockResolvedValue([]), + insertCandidate: vi.fn().mockResolvedValue({ id: 'candidate-1' }), + listCandidatesForPromptByKeys: vi + .fn() + .mockImplementation((_runId: string, keys: string[]) => + Promise.resolve(keys.map((key) => promptCandidate(key, Number(key.replace('c', '')) || 1))), + ), + markPendingCandidatesByReason: vi.fn().mockResolvedValue(0), + summarizeCandidateVerdicts: vi.fn().mockResolvedValue({ + pending: 0, + promoted: 1, + merged: 1, + rejected: 1, + conflict: 0, + rejectedByReason: { exceeded_curator_passes: 1 }, + }), + } satisfies ContextCandidateStorePort; + const agentRunner = { + runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }), + }; + const actions: Array<{ target: 'wiki'; type: 'created' | 'updated'; key: string; detail: string }> = []; + const prompts: string[] = []; + const service = new CuratorPaginationService({ + store, + agentRunner: agentRunner as never, + settings: { + batchSize: overrides.batchSize ?? 2, + maxPasses: overrides.maxPasses ?? 2, + stepBudgetPerPass: overrides.stepBudgetPerPass ?? 7, + }, + }); + + const input = ( + representatives = [candidate('c1', 10), candidate('c2', 9), candidate('c3', 8)], + ): CuratorPaginationInput => ({ + runId: 'run-1', + sourceKey: 'notion', + jobId: 'job-1', + stageIndex: { + jobId: 'job-1', + connectionId: 'c1', + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + evictionUnit: undefined, + representatives, + initialBudget: { creates: 2, updates: 1 }, + modelRole: 'curator', + buildSystemPrompt: () => 'system prompt', + buildUserPrompt: ({ runState, items }) => { + const prompt = `pass=${runState.passNumber}; budget=${runState.budgetRemaining.creates}/${ + runState.budgetRemaining.updates + }; items=${items.map((item) => item.candidateKey).join(',')}; previous=${runState.previouslyPromotedInRun + .map((page) => page.pageKey) + .join(',')}`; + prompts.push(prompt); + return prompt; + }, + buildToolSet: () => ({}), + getReconciliationActions: () => actions, + }); + + return { store, agentRunner, actions, prompts, service, input }; +} + +describe('CuratorPaginationService', () => { + it('paginates representatives and carries budget plus previous wiki writes into later passes', async () => { + const harness = makeHarness({ batchSize: 2, maxPasses: 2, stepBudgetPerPass: 7 }); + harness.agentRunner.runLoop.mockImplementation(() => { + if (harness.actions.length === 0) { + harness.actions.push({ target: 'wiki', type: 'created', key: 'page-c1', detail: 'Created C1' }); + } + return { stopReason: 'natural' }; + }); + + const result = await harness.service.reconcile(harness.input()); + + expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(2); + expect(harness.agentRunner.runLoop).toHaveBeenCalledWith(expect.objectContaining({ modelRole: 'curator' })); + expect(harness.prompts[0]).toContain('pass=1'); + expect(harness.prompts[0]).toContain('budget=2/1'); + expect(harness.prompts[0]).toContain('items=c1,c2'); + expect(harness.prompts[1]).toContain('pass=2'); + expect(harness.prompts[1]).toContain('budget=1/1'); + expect(harness.prompts[1]).toContain('previous=page-c1'); + expect(result.report).toMatchObject({ + passesRun: 2, + topicsExamined: 3, + topicsByVerdict: { promoted: 1, merged: 1, rejected: 1, conflict: 0 }, + topicsRejectedByReason: { exceeded_curator_passes: 1 }, + budgetExhausted: false, + }); + }); + + it('marks unprocessed representatives when maxPasses is exhausted', async () => { + const harness = makeHarness({ batchSize: 1, maxPasses: 1 }); + + await harness.service.reconcile(harness.input([candidate('c1', 10), candidate('c2', 9)])); + + expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({ + runId: 'run-1', + candidateKeys: ['c2'], + rejectionReason: 'exceeded_curator_passes', + }); + expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({ + runId: 'run-1', + candidateKeys: ['c1', 'c2'], + rejectionReason: 'exceeded_curator_passes', + }); + }); + + it('marks remaining pending representatives when run budget is exhausted', async () => { + const harness = makeHarness({ batchSize: 1, maxPasses: 5 }); + const input = harness.input([candidate('c1', 10), candidate('c2', 9), candidate('c3', 8)]); + input.initialBudget = { creates: 1, updates: 0 }; + harness.agentRunner.runLoop.mockImplementation(() => { + harness.actions.push({ target: 'wiki', type: 'created', key: 'page-c1', detail: 'Created C1' }); + return { stopReason: 'natural' }; + }); + + const result = await harness.service.reconcile(input); + + expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(1); + expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({ + runId: 'run-1', + candidateKeys: ['c1', 'c2', 'c3'], + rejectionReason: 'exceeded_run_budget', + }); + expect(result.report.budgetExhausted).toBe(true); + }); + + it('marks a failed pass with curator_pass_error and continues to later batches', async () => { + const harness = makeHarness({ batchSize: 1, maxPasses: 3 }); + harness.agentRunner.runLoop + .mockResolvedValueOnce({ stopReason: 'error', error: new Error('provider timeout') }) + .mockResolvedValue({ stopReason: 'natural' }); + + const result = await harness.service.reconcile(harness.input([candidate('c1', 10), candidate('c2', 9)])); + + expect(harness.agentRunner.runLoop).toHaveBeenCalledTimes(2); + expect(harness.store.markPendingCandidatesByReason).toHaveBeenCalledWith({ + runId: 'run-1', + candidateKeys: ['c1'], + rejectionReason: 'curator_pass_error', + }); + expect(result.warnings).toContain('Curator pass 1 failed: provider timeout'); + }); + + it('skips when there are no representatives and no reconciliation work', async () => { + const harness = makeHarness(); + + const result = await harness.service.reconcile(harness.input([])); + + expect(result.skipped).toBe(true); + expect(harness.agentRunner.runLoop).not.toHaveBeenCalled(); + expect(result.report.topicsExamined).toBe(0); + }); +}); diff --git a/packages/context/src/ingest/context-candidates/curator-pagination.service.ts b/packages/context/src/ingest/context-candidates/curator-pagination.service.ts new file mode 100644 index 00000000..189b9670 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/curator-pagination.service.ts @@ -0,0 +1,333 @@ +import type { KloModelRole } from '@klo/llm'; +import type { ToolSet } from 'ai'; +import type { AgentRunnerService } from '../../agent/index.js'; +import { type KloLogger, noopLogger } from '../../core/index.js'; +import type { MemoryAction } from '../../memory/index.js'; +import type { ContextCandidateForDedup, CuratorPaginationPort, CuratorPaginationReport } from '../ports.js'; +import type { + ReconcileCandidateForPrompt, + ReconcileCandidateSummary, + ReconcilePromptRunState, + WikiPageRef, +} from '../stages/build-reconcile-context.js'; +import { type ReconciliationOutcome, runReconciliationStage4 } from '../stages/stage-4-reconciliation.js'; +import type { StageIndex } from '../stages/stage-index.types.js'; +import type { EvictionUnit } from '../types.js'; +import type { ContextCandidateStorePort } from './store.js'; +import type { ContextCandidateVerdictSummary, CuratorPaginationSettings } from './types.js'; + +interface CuratorPaginationBudget { + creates: number; + updates: number; +} + +interface CuratorPaginationPromptInput { + summary: ReconcileCandidateSummary; + items: ReconcileCandidateForPrompt[]; + runState: ReconcilePromptRunState; +} + +export interface CuratorPaginationInput { + runId: string; + sourceKey: string; + jobId: string; + stageIndex: StageIndex; + evictionUnit: EvictionUnit | undefined; + representatives: ContextCandidateForDedup[]; + initialBudget: CuratorPaginationBudget; + modelRole: KloModelRole; + buildSystemPrompt: () => string; + buildUserPrompt: (input: CuratorPaginationPromptInput) => string; + buildToolSet: (passNumber: number) => ToolSet; + getReconciliationActions: () => MemoryAction[]; + onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void; +} + +interface CuratorPaginationResult extends ReconciliationOutcome { + report: CuratorPaginationReport; + warnings: string[]; +} + +export interface CuratorPaginationServiceDeps { + store: ContextCandidateStorePort; + agentRunner: AgentRunnerService; + settings: CuratorPaginationSettings; + logger?: KloLogger; +} + +export class CuratorPaginationService implements CuratorPaginationPort { + private readonly logger: KloLogger; + + constructor(private readonly deps: CuratorPaginationServiceDeps) { + this.logger = deps.logger ?? noopLogger; + } + + async reconcile(input: CuratorPaginationInput): Promise { + const config = this.deps.settings; + const representatives = [...input.representatives]; + const allCandidateKeys = representatives.map((candidate) => candidate.candidateKey); + const budget: CuratorPaginationBudget = { + creates: Math.max(0, input.initialBudget.creates), + updates: Math.max(0, input.initialBudget.updates), + }; + const previouslyPromotedInRun: WikiPageRef[] = []; + const warnings: string[] = []; + let passNumber = 0; + let topicsExamined = 0; + let budgetExhausted = budget.creates === 0 && budget.updates === 0; + let stopReason: ReconciliationOutcome['stopReason']; + let error: Error | undefined; + let actionCursor = input.getReconciliationActions().length; + + if (representatives.length === 0 && !this.hasNonCandidateReconcileWork(input.stageIndex, input.evictionUnit)) { + return this.result({ + skipped: true, + stopReason, + error, + report: this.emptyReport(), + warnings, + }); + } + + if (representatives.length === 0) { + passNumber = 1; + const outcome = await this.runPass({ + input, + candidates: [], + passNumber, + maxPasses: config.maxPasses, + budget, + previouslyPromotedInRun, + forceRun: false, + }); + stopReason = outcome.stopReason; + error = outcome.error; + return this.result({ + skipped: outcome.skipped, + stopReason, + error, + report: this.emptyReport({ passesRun: outcome.skipped ? 0 : 1 }), + warnings, + }); + } + + const queue = [...representatives]; + while (queue.length > 0 && passNumber < config.maxPasses) { + if (budget.creates === 0 && budget.updates === 0) { + budgetExhausted = true; + await this.deps.store.markPendingCandidatesByReason({ + runId: input.runId, + candidateKeys: queue.map((candidate) => candidate.candidateKey), + rejectionReason: 'exceeded_run_budget', + }); + queue.length = 0; + break; + } + + const batch = queue.splice(0, config.batchSize); + const batchKeys = batch.map((candidate) => candidate.candidateKey); + passNumber += 1; + topicsExamined += batch.length; + + const outcome = await this.runPass({ + input, + candidates: batch, + passNumber, + maxPasses: config.maxPasses, + budget, + previouslyPromotedInRun, + forceRun: true, + }); + stopReason = outcome.stopReason; + error = outcome.error; + + const actions = input.getReconciliationActions(); + const newWikiActions = actions + .slice(actionCursor) + .filter((action) => action.target === 'wiki' && (action.type === 'created' || action.type === 'updated')); + actionCursor = actions.length; + this.consumeBudget(budget, newWikiActions); + previouslyPromotedInRun.push(...this.toWikiRefs(newWikiActions)); + + if (outcome.stopReason === 'error' || outcome.error) { + const message = `Curator pass ${passNumber} failed: ${outcome.error?.message ?? outcome.stopReason ?? 'unknown error'}`; + warnings.push(message); + this.logger.warn(message); + await this.deps.store.markPendingCandidatesByReason({ + runId: input.runId, + candidateKeys: batchKeys, + rejectionReason: 'curator_pass_error', + }); + continue; + } + + if (budget.creates === 0 && budget.updates === 0) { + budgetExhausted = true; + await this.deps.store.markPendingCandidatesByReason({ + runId: input.runId, + candidateKeys: [...batchKeys, ...queue.map((candidate) => candidate.candidateKey)], + rejectionReason: 'exceeded_run_budget', + }); + queue.length = 0; + break; + } + } + + if (queue.length > 0) { + await this.deps.store.markPendingCandidatesByReason({ + runId: input.runId, + candidateKeys: queue.map((candidate) => candidate.candidateKey), + rejectionReason: 'exceeded_curator_passes', + }); + } + + await this.deps.store.markPendingCandidatesByReason({ + runId: input.runId, + candidateKeys: allCandidateKeys, + rejectionReason: 'exceeded_curator_passes', + }); + + const verdicts = await this.deps.store.summarizeCandidateVerdicts(input.runId, allCandidateKeys); + const report = this.reportFromVerdicts({ + passesRun: passNumber, + topicsExamined, + budgetExhausted, + verdicts, + }); + + this.logger.log( + `Curator: ${report.passesRun} passes, ${report.topicsExamined} topics examined, ${report.topicsByVerdict.promoted} promoted`, + ); + + return this.result({ + skipped: false, + stopReason, + error, + report, + warnings, + }); + } + + private async runPass(params: { + input: CuratorPaginationInput; + candidates: ContextCandidateForDedup[]; + passNumber: number; + maxPasses: number; + budget: CuratorPaginationBudget; + previouslyPromotedInRun: WikiPageRef[]; + forceRun: boolean; + }): Promise { + const config = this.deps.settings; + const candidateKeys = params.candidates.map((candidate) => candidate.candidateKey); + const items = await this.deps.store.listCandidatesForPromptByKeys(params.input.runId, candidateKeys); + const summary = this.batchSummary(items); + + return runReconciliationStage4({ + stageIndex: params.input.stageIndex, + evictionUnit: params.input.evictionUnit, + agentRunner: this.deps.agentRunner, + buildSystemPrompt: () => params.input.buildSystemPrompt(), + buildUserPrompt: () => + params.input.buildUserPrompt({ + summary, + items, + runState: { + passNumber: params.passNumber, + maxPasses: params.maxPasses, + budgetRemaining: { ...params.budget }, + previouslyPromotedInRun: [...params.previouslyPromotedInRun], + }, + }), + buildToolSet: () => params.input.buildToolSet(params.passNumber), + modelRole: params.input.modelRole, + stepBudget: config.stepBudgetPerPass, + sourceKey: params.input.sourceKey, + jobId: params.input.jobId, + forceRun: params.forceRun, + onStepFinish: params.input.onStepFinish + ? ({ stepIndex, stepBudget }) => + params.input.onStepFinish?.({ passNumber: params.passNumber, stepIndex, stepBudget }) + : undefined, + }); + } + + private batchSummary(items: ReconcileCandidateForPrompt[]): ReconcileCandidateSummary { + return items.reduce( + (summary, item) => { + summary.total += 1; + if (item.status === 'pending') { + summary.pending += 1; + } else if (item.status === 'promoted') { + summary.promoted += 1; + } else if (item.status === 'merged') { + summary.merged += 1; + } else if (item.status === 'rejected') { + summary.rejected += 1; + } else if (item.status === 'conflict') { + summary.conflict += 1; + } + return summary; + }, + { total: 0, pending: 0, promoted: 0, merged: 0, rejected: 0, conflict: 0 }, + ); + } + + private hasNonCandidateReconcileWork(stageIndex: StageIndex, evictionUnit: EvictionUnit | undefined): boolean { + return stageIndex.workUnits.some((wu) => wu.actions.length > 0) || !!evictionUnit?.deletedRawPaths.length; + } + + private consumeBudget(budget: CuratorPaginationBudget, actions: MemoryAction[]): void { + const creates = actions.filter((action) => action.type === 'created').length; + const updates = actions.filter((action) => action.type === 'updated').length; + budget.creates = Math.max(0, budget.creates - creates); + budget.updates = Math.max(0, budget.updates - updates); + } + + private toWikiRefs(actions: MemoryAction[]): WikiPageRef[] { + return actions.map((action) => ({ + pageKey: action.key, + action: action.type as 'created' | 'updated', + summary: action.detail, + })); + } + + private reportFromVerdicts(params: { + passesRun: number; + topicsExamined: number; + budgetExhausted: boolean; + verdicts: ContextCandidateVerdictSummary; + }): CuratorPaginationReport { + return { + passesRun: params.passesRun, + topicsExamined: params.topicsExamined, + topicsByVerdict: { + promoted: params.verdicts.promoted, + merged: params.verdicts.merged, + rejected: params.verdicts.rejected, + conflict: params.verdicts.conflict, + }, + topicsRejectedByReason: params.verdicts.rejectedByReason, + budgetExhausted: params.budgetExhausted, + }; + } + + private emptyReport(overrides: Partial = {}): CuratorPaginationReport { + return { + passesRun: 0, + topicsExamined: 0, + topicsByVerdict: { + promoted: 0, + merged: 0, + rejected: 0, + conflict: 0, + }, + topicsRejectedByReason: {}, + budgetExhausted: false, + ...overrides, + }; + } + + private result(result: CuratorPaginationResult): CuratorPaginationResult { + return result; + } +} diff --git a/packages/context/src/ingest/context-candidates/embedding-text.test.ts b/packages/context/src/ingest/context-candidates/embedding-text.test.ts new file mode 100644 index 00000000..e3e2e728 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/embedding-text.test.ts @@ -0,0 +1,13 @@ +import { describe, expect, it } from 'vitest'; +import { buildContextCandidateEmbeddingText } from './embedding-text.js'; + +describe('buildContextCandidateEmbeddingText', () => { + it('matches the existing dedup embedding input format', () => { + expect( + buildContextCandidateEmbeddingText({ + topic: 'Revenue Recognition', + assertion: 'Booked revenue excludes refunds and test accounts.', + }), + ).toBe('Revenue Recognition - Booked revenue excludes refunds and test accounts.'); + }); +}); diff --git a/packages/context/src/ingest/context-candidates/embedding-text.ts b/packages/context/src/ingest/context-candidates/embedding-text.ts new file mode 100644 index 00000000..bf758c86 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/embedding-text.ts @@ -0,0 +1,8 @@ +interface ContextCandidateEmbeddingTextInput { + topic: string; + assertion: string; +} + +export function buildContextCandidateEmbeddingText(input: ContextCandidateEmbeddingTextInput): string { + return `${input.topic} - ${input.assertion}`; +} diff --git a/packages/context/src/ingest/context-candidates/index.ts b/packages/context/src/ingest/context-candidates/index.ts new file mode 100644 index 00000000..7c8e0f78 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/index.ts @@ -0,0 +1,29 @@ +export type { CandidateDedupServiceDeps } from './candidate-dedup.service.js'; +export { CandidateDedupService } from './candidate-dedup.service.js'; +export type { + ContextCandidateCarryforwardArgs, + ContextCandidateCarryforwardResult, + ContextCandidateCarryforwardServiceDeps, +} from './context-candidate-carryforward.service.js'; +export { ContextCandidateCarryforwardService } from './context-candidate-carryforward.service.js'; +export type { CuratorPaginationInput, CuratorPaginationServiceDeps } from './curator-pagination.service.js'; +export { CuratorPaginationService } from './curator-pagination.service.js'; +export { buildContextCandidateEmbeddingText } from './embedding-text.js'; +export type { ContextCandidateStorePort } from './store.js'; +export type { + BudgetExhaustedCandidateForCarryForward, + CandidateDedupSettings, + ContextCandidateActionHint, + ContextCandidateCarryforwardSettings, + ContextCandidateEmbeddingPort, + ContextCandidateForPrompt, + ContextCandidateLane, + ContextCandidateRejectionReason, + ContextCandidateScoreAggregation, + ContextCandidateStatus, + ContextCandidateVerdictSummary, + CuratorPaginationSettings, + CurrentRunEvidenceChunkForCarryForward, + InsertContextCandidateInput, + MarkContextCandidateClusterInput, +} from './types.js'; diff --git a/packages/context/src/ingest/context-candidates/store.test.ts b/packages/context/src/ingest/context-candidates/store.test.ts new file mode 100644 index 00000000..1c2311ad --- /dev/null +++ b/packages/context/src/ingest/context-candidates/store.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ContextCandidateForDedup } from '../ports.js'; +import type { ContextCandidateStorePort } from './store.js'; +import type { InsertContextCandidateInput } from './types.js'; + +const candidate: ContextCandidateForDedup = { + id: 'candidate-1', + candidateKey: 'revenue-policy', + topic: 'Revenue policy', + assertion: 'Booked revenue excludes refunds.', + promotionScore: 10, + createdAt: new Date('2026-04-30T10:00:00.000Z'), + evidenceChunkIds: ['chunk-1'], + evidenceRefs: [{ stableCitationKey: 'notion:page-1:revenue' }], + embedding: '[1,0,0]', + lane: 'full', +}; + +const insert: InsertContextCandidateInput = { + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + candidateKey: 'revenue-policy', + topic: 'Revenue policy', + assertion: 'Booked revenue excludes refunds.', + rationale: 'Finance handbook says this.', + evidenceChunkIds: ['chunk-1'], + evidenceRefs: [{ stableCitationKey: 'notion:page-1:revenue' }], + suggestedPageKey: 'revenue-policy', + actionHint: 'create', + durabilityScore: 3, + authorityScore: 3, + reuseScore: 3, + noveltyScore: 2, + riskScore: 1, + promotionScore: 10, + status: 'pending', + rejectionReason: null, + lane: 'full', + embedding: null, +}; + +describe('ContextCandidateStorePort', () => { + it('describes the persistence operations required by candidate services', async () => { + const store: ContextCandidateStorePort = { + listPendingCandidatesForDedup: vi.fn().mockResolvedValue([candidate]), + updateCandidateEmbedding: vi.fn().mockResolvedValue(undefined), + markCandidatesAsMergedToCluster: vi.fn().mockResolvedValue(undefined), + listBudgetExhaustedCandidatesForCarryForward: vi.fn().mockResolvedValue([]), + listCurrentRunEvidenceChunksForCarryForward: vi.fn().mockResolvedValue([]), + insertCandidate: vi.fn().mockResolvedValue({ id: 'candidate-2' }), + listCandidatesForPromptByKeys: vi.fn().mockResolvedValue([]), + markPendingCandidatesByReason: vi.fn().mockResolvedValue(0), + summarizeCandidateVerdicts: vi.fn().mockResolvedValue({ + pending: 0, + promoted: 0, + merged: 0, + rejected: 0, + conflict: 0, + rejectedByReason: {}, + }), + }; + + await expect(store.listPendingCandidatesForDedup('run-1')).resolves.toEqual([candidate]); + await expect(store.insertCandidate(insert)).resolves.toEqual({ id: 'candidate-2' }); + await expect( + store.markCandidatesAsMergedToCluster({ + representativeId: 'candidate-1', + memberIds: ['candidate-3'], + evidenceChunkIds: ['chunk-1', 'chunk-3'], + evidenceRefs: [{ stableCitationKey: 'notion:page-1:revenue' }], + promotionScore: 10, + }), + ).resolves.toBeUndefined(); + }); +}); diff --git a/packages/context/src/ingest/context-candidates/store.ts b/packages/context/src/ingest/context-candidates/store.ts new file mode 100644 index 00000000..1c14cfa7 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/store.ts @@ -0,0 +1,30 @@ +import type { ContextCandidateForDedup } from '../ports.js'; +import type { ReconcileCandidateForPrompt } from '../stages/build-reconcile-context.js'; +import type { + BudgetExhaustedCandidateForCarryForward, + ContextCandidateRejectionReason, + ContextCandidateVerdictSummary, + CurrentRunEvidenceChunkForCarryForward, + InsertContextCandidateInput, + MarkContextCandidateClusterInput, +} from './types.js'; + +export interface ContextCandidateStorePort { + listPendingCandidatesForDedup(runId: string): Promise; + updateCandidateEmbedding(candidateId: string, embedding: number[]): Promise; + markCandidatesAsMergedToCluster(params: MarkContextCandidateClusterInput): Promise; + listBudgetExhaustedCandidatesForCarryForward(params: { + connectionId: string; + sourceKey: string; + currentRunId: string; + }): Promise; + listCurrentRunEvidenceChunksForCarryForward(runId: string): Promise; + insertCandidate(params: InsertContextCandidateInput): Promise<{ id: string }>; + listCandidatesForPromptByKeys(runId: string, candidateKeys: string[]): Promise; + markPendingCandidatesByReason(params: { + runId: string; + candidateKeys: string[]; + rejectionReason: ContextCandidateRejectionReason; + }): Promise; + summarizeCandidateVerdicts(runId: string, candidateKeys: string[]): Promise; +} diff --git a/packages/context/src/ingest/context-candidates/types.ts b/packages/context/src/ingest/context-candidates/types.ts new file mode 100644 index 00000000..2f8b7db2 --- /dev/null +++ b/packages/context/src/ingest/context-candidates/types.ts @@ -0,0 +1,121 @@ +import type { JsonValue } from '../ports.js'; + +export type ContextCandidateActionHint = 'create' | 'update' | 'merge' | 'conflict' | 'skip'; +export type ContextCandidateStatus = 'pending' | 'promoted' | 'merged' | 'rejected' | 'conflict'; +export type ContextCandidateRejectionReason = + | 'low_score' + | 'duplicates_existing_wiki' + | 'not_durable' + | 'conflict_unresolved' + | 'exceeded_run_budget' + | 'exceeded_curator_passes' + | 'curator_pass_error'; +export type ContextCandidateLane = 'light' | 'full' | null; +export type ContextCandidateScoreAggregation = 'max' | 'mean' | 'sum'; + +export interface ContextCandidateForPrompt { + candidateKey: string; + topic: string; + assertion: string; + rationale: string; + actionHint: string; + status: string; + promotionScore: number; + suggestedPageKey: string | null; + evidenceRefs: JsonValue; +} + +export interface ContextCandidateVerdictSummary { + pending: number; + promoted: number; + merged: number; + rejected: number; + conflict: number; + rejectedByReason: Record; +} + +export interface CuratorPaginationSettings { + batchSize: number; + maxPasses: number; + stepBudgetPerPass: number; +} + +export interface InsertContextCandidateInput { + runId: string; + connectionId: string; + sourceKey: string; + candidateKey: string; + topic: string; + assertion: string; + rationale: string; + evidenceChunkIds: string[]; + evidenceRefs: JsonValue; + suggestedPageKey: string | null; + actionHint: ContextCandidateActionHint; + durabilityScore: number; + authorityScore: number; + reuseScore: number; + noveltyScore: number; + riskScore: number; + promotionScore: number; + status: ContextCandidateStatus; + rejectionReason: string | null; + lane?: ContextCandidateLane; + embedding?: number[] | null; +} + +export interface MarkContextCandidateClusterInput { + representativeId: string; + memberIds: string[]; + evidenceChunkIds: string[]; + evidenceRefs: JsonValue; + promotionScore: number; +} + +export interface BudgetExhaustedCandidateForCarryForward { + sourceRunId: string; + candidateKey: string; + topic: string; + assertion: string; + rationale: string; + evidenceChunkIds: string[]; + evidenceRefs: JsonValue; + suggestedPageKey: string | null; + actionHint: ContextCandidateActionHint; + durabilityScore: number; + authorityScore: number; + reuseScore: number; + noveltyScore: number; + riskScore: number; + promotionScore: number; + lane: ContextCandidateLane; +} + +export interface CurrentRunEvidenceChunkForCarryForward { + chunkId: string; + stableCitationKey: string; + syncId: string; + rawPath: string; + title: string; + path: string; + url: string | null; + lastEditedAt: Date | null; + citation: JsonValue; + content: string; +} + +export interface ContextCandidateEmbeddingPort { + maxBatchSize: number; + computeEmbedding(text: string): Promise; + computeEmbeddingsBulk(texts: string[]): Promise; +} + +export interface CandidateDedupSettings { + enabled: boolean; + topicSimilarityThreshold: number; + scoreAggregation: ContextCandidateScoreAggregation; +} + +export interface ContextCandidateCarryforwardSettings { + reExamineBudgetExhaustedOnRerun: boolean; +} diff --git a/packages/context/src/ingest/context-evidence/context-evidence-index.service.test.ts b/packages/context/src/ingest/context-evidence/context-evidence-index.service.test.ts new file mode 100644 index 00000000..d62c7b53 --- /dev/null +++ b/packages/context/src/ingest/context-evidence/context-evidence-index.service.test.ts @@ -0,0 +1,479 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { ContextEvidenceIndexService } from './context-evidence-index.service.js'; +import type { ContextEvidenceIndexStorePort } from './store.js'; +import type { ContextEvidenceEmbeddingPort } from './types.js'; + +const vector384 = (first: number): number[] => [first, ...Array.from({ length: 383 }, () => 0)]; + +describe('ContextEvidenceIndexService', () => { + let tmp: string; + let repository: { + upsertDocument: ReturnType; + replaceChunks: ReturnType; + countPublishedDocumentsByRawPaths: ReturnType; + publishSync: ReturnType; + }; + let embeddings: { + computeEmbeddingsBulk: ReturnType; + maxBatchSize: number; + }; + let service: ContextEvidenceIndexService; + + beforeEach(async () => { + tmp = await mkdtemp(join(tmpdir(), 'context-evidence-')); + repository = { + upsertDocument: vi.fn().mockResolvedValue({ id: 'doc-1' }), + replaceChunks: vi.fn().mockResolvedValue(undefined), + countPublishedDocumentsByRawPaths: vi.fn().mockResolvedValue(1), + publishSync: vi.fn().mockResolvedValue({ documentsPublished: 1, documentsDeleted: 0 }), + }; + embeddings = { + maxBatchSize: 100, + computeEmbeddingsBulk: vi + .fn() + .mockImplementation((texts: string[]) => texts.map((_text, index) => vector384((index + 1) / 10))), + }; + service = new ContextEvidenceIndexService({ + store: repository as Partial as ContextEvidenceIndexStorePort, + embeddings: embeddings as Partial as ContextEvidenceEmbeddingPort, + logger: { warn: vi.fn() }, + }); + }); + + afterEach(async () => { + await rm(tmp, { recursive: true, force: true }); + }); + + it('indexes Notion-style page markdown into documents and heading chunks', async () => { + await writeFile(join(tmp, 'manifest.json'), JSON.stringify({ source: 'notion', apiVersion: '2026-03-11' })); + await mkdir(join(tmp, 'pages', 'page-1'), { recursive: true }); + await writeFile( + join(tmp, 'pages', 'page-1', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'page-1', + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + url: 'https://notion.example/page-1', + parentId: 'page-parent', + lastEditedAt: '2026-04-12T10:15:00.000Z', + lastEditedBy: 'Jane Doe', + properties: { Status: 'Approved' }, + }), + ); + await writeFile( + join(tmp, 'pages', 'page-1', 'page.md'), + [ + '# Revenue Recognition', + '', + '## Policy', + '', + 'Booked revenue excludes refunds and test accounts.', + '', + '## Caveats', + '', + 'This page supersedes the 2025 Sales Ops revenue definition.', + ].join('\n'), + ); + + const summary = await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { + added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'], + modified: [], + deleted: [], + unchanged: [], + }, + currentHashes: new Map([ + ['pages/page-1/metadata.json', 'meta-hash'], + ['pages/page-1/page.md', 'page-hash'], + ]), + }); + + expect(summary.documentsIndexed).toBe(1); + expect(summary.chunksIndexed).toBeGreaterThanOrEqual(2); + expect(repository.upsertDocument).toHaveBeenCalledWith( + expect.objectContaining({ + connectionId: 'connection-1', + runId: 'run-1', + sourceKey: 'notion', + externalId: 'page-1', + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + rawPath: 'pages/page-1/page.md', + contentHash: 'page-hash', + }), + ); + expect(repository.replaceChunks).toHaveBeenCalledWith( + 'doc-1', + expect.arrayContaining([ + expect.objectContaining({ + chunkKey: 'h2:policy:0000', + headingPath: ['Revenue Recognition', 'Policy'], + stableCitationKey: expect.stringMatching(/^notion:page-1:policy:[a-f0-9]{16}$/), + }), + ]), + ); + }); + + it('indexes only added or modified page documents by default', async () => { + for (const pageId of ['changed', 'unchanged']) { + await mkdir(join(tmp, 'pages', pageId), { recursive: true }); + await writeFile( + join(tmp, 'pages', pageId, 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: pageId, + title: pageId === 'changed' ? 'Changed Page' : 'Unchanged Page', + path: `Company Handbook / ${pageId}`, + }), + ); + await writeFile(join(tmp, 'pages', pageId, 'page.md'), `# ${pageId}\n\n${pageId} body`); + } + + const summary = await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { + added: [], + modified: ['pages/changed/page.md'], + deleted: [], + unchanged: ['pages/unchanged/page.md', 'pages/unchanged/metadata.json'], + }, + currentHashes: new Map([ + ['pages/changed/page.md', 'changed-hash'], + ['pages/unchanged/page.md', 'unchanged-hash'], + ]), + }); + + expect(summary.documentsIndexed).toBe(1); + expect(repository.upsertDocument).toHaveBeenCalledTimes(1); + expect(repository.upsertDocument).toHaveBeenCalledWith( + expect.objectContaining({ + externalId: 'changed', + contentHash: 'changed-hash', + }), + ); + }); + + it('indexes documents when only their metadata changed', async () => { + for (const pageId of ['metadata-changed', 'unchanged']) { + await mkdir(join(tmp, 'pages', pageId), { recursive: true }); + await writeFile( + join(tmp, 'pages', pageId, 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: pageId, + title: pageId === 'metadata-changed' ? 'Metadata Changed' : 'Unchanged Page', + path: `Company Handbook / ${pageId}`, + properties: { Status: pageId === 'metadata-changed' ? 'Approved' : 'Draft' }, + }), + ); + await writeFile(join(tmp, 'pages', pageId, 'page.md'), `# ${pageId}\n\n${pageId} body`); + } + + const summary = await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { + added: [], + modified: ['pages/metadata-changed/metadata.json'], + deleted: [], + unchanged: ['pages/unchanged/page.md', 'pages/unchanged/metadata.json'], + }, + currentHashes: new Map([ + ['pages/metadata-changed/page.md', 'metadata-changed-hash'], + ['pages/unchanged/page.md', 'unchanged-hash'], + ]), + }); + + expect(summary.documentsIndexed).toBe(1); + expect(repository.upsertDocument).toHaveBeenCalledTimes(1); + expect(repository.upsertDocument).toHaveBeenCalledWith( + expect.objectContaining({ + externalId: 'metadata-changed', + contentHash: 'metadata-changed-hash', + }), + ); + }); + + it('marks deleted page markdown paths as deleted evidence documents', async () => { + const summary = await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-2', + diffSet: { + added: [], + modified: [], + deleted: ['pages/page-1/page.md', 'pages/page-1/metadata.json'], + unchanged: [], + }, + currentHashes: new Map(), + }); + + expect(summary.documentsDeleted).toBe(1); + expect(repository.countPublishedDocumentsByRawPaths).toHaveBeenCalledWith('connection-1', 'notion', [ + 'pages/page-1/page.md', + ]); + }); + + it('falls back to null embeddings when embedding computation fails', async () => { + embeddings.computeEmbeddingsBulk.mockRejectedValueOnce(new Error('embedding provider unavailable')); + await writeFile(join(tmp, 'manifest.json'), JSON.stringify({ source: 'notion', apiVersion: '2026-03-11' })); + await mkdir(join(tmp, 'pages', 'page-2'), { recursive: true }); + await writeFile( + join(tmp, 'pages', 'page-2', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'page-2', + title: 'Glossary', + path: 'Company Handbook / Glossary', + }), + ); + await writeFile(join(tmp, 'pages', 'page-2', 'page.md'), '# Glossary\n\nARR means annual recurring revenue.'); + + const summary = await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { added: ['pages/page-2/page.md'], modified: [], deleted: [], unchanged: [] }, + currentHashes: new Map([['pages/page-2/page.md', 'page-hash']]), + }); + + expect(summary.embeddingFailures).toBe(1); + expect(repository.replaceChunks).toHaveBeenCalledWith( + 'doc-1', + expect.arrayContaining([expect.objectContaining({ embedding: null })]), + ); + }); + + it('batches embedding requests at the provider batch size', async () => { + embeddings.computeEmbeddingsBulk.mockImplementation((texts: string[]) => { + if (texts.length > 100) { + throw new Error('too many texts'); + } + return texts.map((_text, index) => vector384(index / 100)); + }); + await mkdir(join(tmp, 'pages', 'large-page'), { recursive: true }); + await writeFile( + join(tmp, 'pages', 'large-page', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'large-page', + title: 'Large Page', + path: 'Company Handbook / Large Page', + }), + ); + await writeFile( + join(tmp, 'pages', 'large-page', 'page.md'), + Array.from({ length: 101 }, (_value, index) => + [`## Section ${index + 1}`, '', `Body ${index + 1}`].join('\n'), + ).join('\n\n'), + ); + + const summary = await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { added: ['pages/large-page/page.md'], modified: [], deleted: [], unchanged: [] }, + currentHashes: new Map([['pages/large-page/page.md', 'large-hash']]), + }); + + expect(summary.embeddingFailures).toBe(0); + expect(embeddings.computeEmbeddingsBulk).toHaveBeenCalledTimes(2); + expect(embeddings.computeEmbeddingsBulk.mock.calls.map(([texts]) => texts)).toEqual([ + expect.arrayContaining([expect.stringContaining('Section 1')]), + expect.arrayContaining([expect.stringContaining('Section 101')]), + ]); + expect(embeddings.computeEmbeddingsBulk.mock.calls[0][0]).toHaveLength(100); + expect(embeddings.computeEmbeddingsBulk.mock.calls[1][0]).toHaveLength(1); + expect(repository.replaceChunks).toHaveBeenCalledWith( + 'doc-1', + expect.arrayContaining([expect.objectContaining({ embedding: expect.any(Array) })]), + ); + }); + + it('splits single long paragraphs into bounded chunks', async () => { + await mkdir(join(tmp, 'pages', 'long-paragraph'), { recursive: true }); + await writeFile( + join(tmp, 'pages', 'long-paragraph', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'long-paragraph', + title: 'Long Paragraph', + path: 'Company Handbook / Long Paragraph', + }), + ); + await writeFile(join(tmp, 'pages', 'long-paragraph', 'page.md'), `# Long Paragraph\n\n${'x'.repeat(12_000)}`); + + await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { added: ['pages/long-paragraph/page.md'], modified: [], deleted: [], unchanged: [] }, + currentHashes: new Map([['pages/long-paragraph/page.md', 'long-hash']]), + }); + + const chunks = repository.replaceChunks.mock.calls[0][1] as Array<{ content: string }>; + expect(chunks.length).toBeGreaterThan(1); + expect(chunks.every((chunk) => chunk.content.length <= 4800)).toBe(true); + }); + + it('creates unique stable citation keys for duplicate heading leaves', async () => { + embeddings.computeEmbeddingsBulk.mockImplementation((texts: string[]) => + texts.map((_text, index) => vector384(index / 10)), + ); + await mkdir(join(tmp, 'pages', 'duplicate-headings'), { recursive: true }); + await writeFile( + join(tmp, 'pages', 'duplicate-headings', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'duplicate-headings', + title: 'Duplicate Headings', + path: 'Company Handbook / Duplicate Headings', + }), + ); + await writeFile( + join(tmp, 'pages', 'duplicate-headings', 'page.md'), + [ + '# Duplicate Headings', + '', + '## Overview', + '', + 'First overview.', + '', + '## Overview', + '', + 'Second overview.', + ].join('\n'), + ); + + await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { added: ['pages/duplicate-headings/page.md'], modified: [], deleted: [], unchanged: [] }, + currentHashes: new Map([['pages/duplicate-headings/page.md', 'duplicate-hash']]), + }); + + const chunks = repository.replaceChunks.mock.calls[0][1]; + const citationKeys = chunks.map((chunk: { stableCitationKey: string }) => chunk.stableCitationKey); + expect(new Set(citationKeys).size).toBe(citationKeys.length); + expect(citationKeys).toEqual([ + expect.stringMatching(/^notion:duplicate-headings:overview:[a-f0-9]{16}$/), + expect.stringMatching(/^notion:duplicate-headings:overview:[a-f0-9]{16}$/), + ]); + }); + + it('persists Notion links metadata for neighbor lookup', async () => { + await mkdir(join(tmp, 'pages', 'page-root'), { recursive: true }); + await writeFile( + join(tmp, 'pages/page-root/metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'page-root', + title: 'Root', + path: 'Root', + }), + ); + await writeFile(join(tmp, 'pages/page-root/page.md'), '# Root\n\nSee linked pages.'); + await writeFile( + join(tmp, 'pages/page-root/links.json'), + JSON.stringify({ + children: ['page-child'], + reverseLinks: ['page-parent'], + mentions: ['page-mentioned'], + databases: [], + }), + ); + + await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { added: ['pages/page-root/links.json'], modified: [], deleted: [], unchanged: [] }, + currentHashes: new Map(), + forceRebuild: true, + }); + + expect(repository.upsertDocument).toHaveBeenCalledWith( + expect.objectContaining({ + metadata: expect.objectContaining({ + linksPath: 'pages/page-root/links.json', + links: expect.objectContaining({ + children: ['page-child'], + reverseLinks: ['page-parent'], + mentions: ['page-mentioned'], + }), + }), + }), + ); + }); + + it('indexes data-source row metadata as the surviving document form', async () => { + await mkdir(join(tmp, 'data-sources/data-source-1/rows/row-1'), { recursive: true }); + await writeFile( + join(tmp, 'data-sources/data-source-1/rows/row-1/metadata.json'), + JSON.stringify({ + objectType: 'data_source_row', + id: 'row-1', + title: 'Row One', + path: 'Policies / Row One', + parentId: 'data-source-1', + databaseId: null, + dataSourceId: 'data-source-1', + properties: {}, + }), + ); + await writeFile(join(tmp, 'data-sources/data-source-1/rows/row-1/page.md'), '# Row One\n\nDurable row fact.'); + + await service.indexStagedDir({ + stagedDir: tmp, + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet: { + added: ['data-sources/data-source-1/rows/row-1/metadata.json', 'data-sources/data-source-1/rows/row-1/page.md'], + modified: [], + deleted: [], + unchanged: [], + }, + currentHashes: new Map([['data-sources/data-source-1/rows/row-1/page.md', 'row-hash']]), + }); + + expect(repository.upsertDocument).toHaveBeenCalledWith( + expect.objectContaining({ + externalId: 'row-1', + objectType: 'data_source_row', + dataSourceId: 'data-source-1', + rawPath: 'data-sources/data-source-1/rows/row-1/page.md', + }), + ); + }); +}); diff --git a/packages/context/src/ingest/context-evidence/context-evidence-index.service.ts b/packages/context/src/ingest/context-evidence/context-evidence-index.service.ts new file mode 100644 index 00000000..e625ccce --- /dev/null +++ b/packages/context/src/ingest/context-evidence/context-evidence-index.service.ts @@ -0,0 +1,447 @@ +import { createHash } from 'node:crypto'; +import { readdir, readFile } from 'node:fs/promises'; +import { basename, dirname, join, relative } from 'node:path'; +import { noopLogger, type KloLogger } from '../../core/index.js'; +import type { JsonValue } from '../ports.js'; +import type { DiffSet } from '../types.js'; +import type { ContextEvidenceIndexStorePort } from './store.js'; +import type { + ContextEvidenceEmbeddingPort, + ContextEvidenceIndexSummary, + ReplaceContextEvidenceChunk, +} from './types.js'; + +interface IndexStagedDirArgs { + stagedDir: string; + runId: string; + connectionId: string; + sourceKey: string; + syncId: string; + diffSet: DiffSet; + currentHashes: Map; + forceRebuild?: boolean; +} + +interface PublishSyncArgs { + connectionId: string; + sourceKey: string; + syncId: string; + diffSet: DiffSet; +} + +interface ContextEvidenceIndexServiceDeps { + store: ContextEvidenceIndexStorePort; + embeddings: ContextEvidenceEmbeddingPort; + logger?: Pick; +} + +type JsonObject = { [key: string]: JsonValue | undefined }; + +interface StagedEvidenceDocument { + metadataPath: string; + markdownPath: string; + linksPath?: string; + metadata: { + objectType?: string; + id?: string; + title?: string; + path?: string; + url?: string | null; + parentId?: string | null; + databaseId?: string | null; + dataSourceId?: string | null; + lastEditedAt?: string | null; + lastEditedBy?: string | null; + properties?: JsonObject; + }; + links?: JsonObject; + markdown: string; +} + +interface MarkdownChunk { + headingPath: string[]; + content: string; +} + +export class ContextEvidenceIndexService { + private readonly store: ContextEvidenceIndexStorePort; + private readonly embeddings: ContextEvidenceEmbeddingPort; + private readonly logger: Pick; + + constructor(deps: ContextEvidenceIndexServiceDeps) { + this.store = deps.store; + this.embeddings = deps.embeddings; + this.logger = deps.logger ?? noopLogger; + } + + async indexStagedDir(args: IndexStagedDirArgs): Promise { + const warnings: string[] = []; + const documents = await this.collectDocuments(args.stagedDir, warnings); + const indexablePaths = this.indexableDocumentPaths(args.diffSet); + let documentsIndexed = 0; + let chunksIndexed = 0; + let embeddingFailures = 0; + + for (const staged of documents) { + if (!args.forceRebuild && !this.shouldIndexDocument(args.stagedDir, staged, indexablePaths)) { + continue; + } + + const externalId = staged.metadata.id; + const title = staged.metadata.title; + const path = staged.metadata.path ?? title; + + if (!externalId || !title || !path) { + warnings.push(`Skipped ${staged.metadataPath}: metadata requires id, title, and path`); + continue; + } + + const rawPath = this.toRawPath(args.stagedDir, staged.markdownPath); + const contentHash = args.currentHashes.get(rawPath) ?? this.sha256(staged.markdown); + const document = await this.store.upsertDocument({ + runId: args.runId, + connectionId: args.connectionId, + sourceKey: args.sourceKey, + externalId, + externalParentId: staged.metadata.parentId ?? null, + databaseId: staged.metadata.databaseId ?? null, + dataSourceId: staged.metadata.dataSourceId ?? null, + title, + path, + url: staged.metadata.url ?? null, + objectType: staged.metadata.objectType ?? 'page', + lastEditedAt: staged.metadata.lastEditedAt ? new Date(staged.metadata.lastEditedAt) : null, + lastEditedBy: staged.metadata.lastEditedBy ?? null, + rawPath, + syncId: args.syncId, + contentHash, + publishState: 'pending', + metadata: { + metadataPath: this.toRawPath(args.stagedDir, staged.metadataPath), + ...(staged.linksPath && staged.links + ? { linksPath: this.toRawPath(args.stagedDir, staged.linksPath), links: staged.links } + : {}), + properties: staged.metadata.properties ?? {}, + }, + }); + + const chunks = this.buildChunks(staged.markdown, title); + const searchTexts = chunks.map((chunk) => this.buildSearchText(staged, chunk)); + const embeddings = await this.computeEmbeddings(searchTexts); + + if (embeddings.failed) { + embeddingFailures += 1; + } + + const headingPathOccurrences = new Map(); + const replaceChunks: ReplaceContextEvidenceChunk[] = chunks.map((chunk, ordinal) => { + const headingLeaf = chunk.headingPath[chunk.headingPath.length - 1] ?? title; + const headingSlug = this.slug(headingLeaf); + const normalizedHeadingPath = this.normalizeHeadingPath(chunk.headingPath); + const occurrence = (headingPathOccurrences.get(normalizedHeadingPath) ?? 0) + 1; + headingPathOccurrences.set(normalizedHeadingPath, occurrence); + + return { + chunkKey: `${this.headingLevelKey(chunk.headingPath)}:${headingSlug}:${String(ordinal).padStart(4, '0')}`, + headingPath: chunk.headingPath, + ordinal, + content: chunk.content, + searchText: searchTexts[ordinal], + embedding: embeddings.values[ordinal] ?? null, + tokenCount: this.estimateTokens(chunk.content), + citation: { + source: args.sourceKey, + pageId: externalId, + title, + path, + url: staged.metadata.url ?? null, + lastEditedAt: staged.metadata.lastEditedAt ?? null, + syncId: args.syncId, + rawPath, + }, + stableCitationKey: this.buildStableCitationKey( + args.sourceKey, + externalId, + headingSlug, + normalizedHeadingPath, + occurrence, + ), + syncId: args.syncId, + contentHash: this.sha256(chunk.content), + }; + }); + + await this.store.replaceChunks(document.id, replaceChunks); + documentsIndexed += 1; + chunksIndexed += replaceChunks.length; + } + + const deletedMarkdownPaths = this.deletedMarkdownPaths(args.diffSet); + const documentsDeleted = await this.store.countPublishedDocumentsByRawPaths( + args.connectionId, + args.sourceKey, + deletedMarkdownPaths, + ); + + return { documentsIndexed, chunksIndexed, documentsDeleted, embeddingFailures, warnings }; + } + + async publishSync(args: PublishSyncArgs): Promise<{ documentsPublished: number; documentsDeleted: number }> { + return this.store.publishSync( + args.connectionId, + args.sourceKey, + args.syncId, + this.deletedMarkdownPaths(args.diffSet), + ); + } + + private async collectDocuments(stagedDir: string, warnings: string[]): Promise { + const metadataPaths = await this.findFiles(stagedDir, 'metadata.json'); + const documents: StagedEvidenceDocument[] = []; + + for (const metadataPath of metadataPaths) { + const markdownPath = join(dirname(metadataPath), 'page.md'); + let markdown: string; + + try { + markdown = await readFile(markdownPath, 'utf-8'); + } catch { + continue; + } + + try { + const metadata = JSON.parse(await readFile(metadataPath, 'utf-8')) as StagedEvidenceDocument['metadata']; + const linksPath = join(dirname(metadataPath), 'links.json'); + let links: JsonObject | undefined; + try { + const parsedLinks = JSON.parse(await readFile(linksPath, 'utf-8')) as unknown; + if (parsedLinks && typeof parsedLinks === 'object' && !Array.isArray(parsedLinks)) { + links = parsedLinks as JsonObject; + } + } catch { + // links.json is optional. + } + documents.push({ + metadataPath, + markdownPath, + linksPath: links ? linksPath : undefined, + metadata, + links, + markdown, + }); + } catch (error) { + warnings.push( + `Skipped ${relative(stagedDir, metadataPath)}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + + return documents; + } + + private async findFiles(root: string, fileName: string): Promise { + const entries = await readdir(root, { withFileTypes: true, recursive: true }); + return entries + .filter((entry) => entry.isFile() && entry.name === fileName) + .map((entry) => join(entry.parentPath, entry.name)) + .sort(); + } + + private indexableDocumentPaths(diffSet: DiffSet): Set { + return new Set([...diffSet.added, ...diffSet.modified]); + } + + private shouldIndexDocument(stagedDir: string, staged: StagedEvidenceDocument, indexablePaths: Set): boolean { + return ( + indexablePaths.has(this.toRawPath(stagedDir, staged.markdownPath)) || + indexablePaths.has(this.toRawPath(stagedDir, staged.metadataPath)) || + (staged.linksPath ? indexablePaths.has(this.toRawPath(stagedDir, staged.linksPath)) : false) + ); + } + + private deletedMarkdownPaths(diffSet: DiffSet): string[] { + return diffSet.deleted.filter((path) => basename(path) === 'page.md'); + } + + private buildChunks(markdown: string, title: string): MarkdownChunk[] { + const lines = markdown.split(/\r?\n/); + const chunks: MarkdownChunk[] = []; + let headingPath: string[] = [title]; + let currentLines: string[] = []; + + const flush = () => { + const content = currentLines.join('\n').trim(); + + if (content) { + chunks.push({ headingPath: [...headingPath], content }); + } + + currentLines = []; + }; + + for (const line of lines) { + const match = /^(#{1,6})\s+(.+)$/.exec(line); + + if (match) { + flush(); + const level = match[1].length; + const heading = match[2].trim(); + headingPath = level === 1 ? [heading] : [...headingPath.slice(0, level - 1), heading]; + continue; + } + + currentLines.push(line); + } + + flush(); + + if (chunks.length === 0) { + const content = markdown.trim(); + return content ? [{ headingPath: [title], content }] : []; + } + + return this.splitLargeChunks(chunks); + } + + private splitLargeChunks(chunks: MarkdownChunk[]): MarkdownChunk[] { + const maxChars = 4800; + const out: MarkdownChunk[] = []; + + const pushBounded = (chunk: MarkdownChunk, content: string): void => { + for (let start = 0; start < content.length; start += maxChars) { + const part = content.slice(start, start + maxChars).trim(); + if (part) { + out.push({ ...chunk, content: part }); + } + } + }; + + for (const chunk of chunks) { + if (chunk.content.length <= maxChars) { + out.push(chunk); + continue; + } + + const paragraphs = chunk.content.split(/\n{2,}/); + let current = ''; + + for (const paragraph of paragraphs) { + if (paragraph.length > maxChars) { + if (current) { + out.push({ ...chunk, content: current }); + current = ''; + } + pushBounded(chunk, paragraph); + continue; + } + + const next = current ? `${current}\n\n${paragraph}` : paragraph; + + if (next.length > maxChars && current) { + out.push({ ...chunk, content: current }); + current = paragraph; + } else { + current = next; + } + } + + if (current) { + out.push({ ...chunk, content: current }); + } + } + + return out; + } + + private buildSearchText(staged: StagedEvidenceDocument, chunk: MarkdownChunk): string { + const properties = Object.entries(staged.metadata.properties ?? {}) + .map(([key, value]) => `${key}: ${String(value)}`) + .join('\n'); + + return [staged.metadata.title, staged.metadata.path, chunk.headingPath.join(' / '), properties, chunk.content] + .filter(Boolean) + .join('\n'); + } + + private async computeEmbeddings(texts: string[]): Promise<{ values: Array; failed: boolean }> { + if (texts.length === 0) { + return { values: [], failed: false }; + } + + const configuredMaxBatchSize = this.embeddings.maxBatchSize; + const maxBatchSize: number = + typeof configuredMaxBatchSize === 'number' && + Number.isInteger(configuredMaxBatchSize) && + configuredMaxBatchSize > 0 + ? configuredMaxBatchSize + : 100; + const values: Array = []; + let failed = false; + + for (let offset = 0; offset < texts.length; offset += maxBatchSize) { + const batch = texts.slice(offset, offset + maxBatchSize); + + try { + const batchEmbeddings = await this.embeddings.computeEmbeddingsBulk(batch); + if (batchEmbeddings.length !== batch.length) { + throw new Error(`expected ${batch.length} embeddings, received ${batchEmbeddings.length}`); + } + values.push(...batchEmbeddings); + } catch (error) { + failed = true; + this.logger.warn( + `Context evidence embeddings failed: ${error instanceof Error ? error.message : String(error)}`, + ); + values.push(...batch.map(() => null)); + } + } + + return { values, failed }; + } + + private toRawPath(stagedDir: string, fullPath: string): string { + return relative(stagedDir, fullPath).split('\\').join('/'); + } + + private headingLevelKey(headingPath: string[]): string { + return `h${Math.min(Math.max(headingPath.length, 1), 6)}`; + } + + private slug(value: string): string { + const slug = value + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); + + return slug || 'section'; + } + + private normalizeHeadingPath(headingPath: string[]): string { + return headingPath + .map((heading) => heading.trim().toLowerCase().replace(/\s+/g, ' ')) + .filter(Boolean) + .join('/'); + } + + private buildStableCitationKey( + sourceKey: string, + externalId: string, + headingSlug: string, + normalizedHeadingPath: string, + occurrence: number, + ): string { + const digest = this.sha256([sourceKey, externalId, normalizedHeadingPath, String(occurrence)].join('\0')).slice( + 0, + 16, + ); + return `${sourceKey}:${externalId}:${headingSlug}:${digest}`; + } + + private estimateTokens(value: string): number { + return Math.ceil(value.split(/\s+/).filter(Boolean).length * 1.3); + } + + private sha256(value: string): string { + return createHash('sha256').update(value).digest('hex'); + } +} diff --git a/packages/context/src/ingest/context-evidence/index.ts b/packages/context/src/ingest/context-evidence/index.ts new file mode 100644 index 00000000..99be0177 --- /dev/null +++ b/packages/context/src/ingest/context-evidence/index.ts @@ -0,0 +1,12 @@ +export { ContextEvidenceIndexService } from './context-evidence-index.service.js'; +export { SqliteContextEvidenceStore } from './sqlite-context-evidence-store.js'; +export type { + ContextEvidenceDocumentRef, + ContextEvidenceEmbeddingPort, + ContextEvidenceIndexSummary, + EvidencePublishState, + ReplaceContextEvidenceChunk, + UpsertContextEvidenceDocument, +} from './types.js'; +export type { ContextEvidenceIndexStorePort } from './store.js'; +export type { SqliteContextEvidenceStoreOptions } from './sqlite-context-evidence-store.js'; diff --git a/packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.test.ts b/packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.test.ts new file mode 100644 index 00000000..9e42ff6d --- /dev/null +++ b/packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.test.ts @@ -0,0 +1,490 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { InsertContextCandidateInput } from '../context-candidates/index.js'; +import type { JsonValue } from '../ports.js'; +import { SqliteContextEvidenceStore } from './sqlite-context-evidence-store.js'; + +describe('SqliteContextEvidenceStore', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-context-evidence-sqlite-')); + dbPath = join(tempDir, '.klo', 'db.sqlite'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + function store(): SqliteContextEvidenceStore { + return new SqliteContextEvidenceStore({ dbPath }); + } + + async function seedDocument( + subject: SqliteContextEvidenceStore, + input: { + runId?: string; + syncId?: string; + externalId?: string; + externalParentId?: string | null; + title?: string; + path?: string; + rawPath?: string; + metadata?: JsonValue; + publishState?: 'pending' | 'published'; + embedding?: number[] | null; + content?: string; + searchText?: string; + } = {}, + ): Promise<{ documentId: string; chunkId: string }> { + const runId = input.runId ?? 'run-1'; + const syncId = input.syncId ?? 'sync-1'; + const externalId = input.externalId ?? 'page-1'; + const title = input.title ?? 'Revenue Policy'; + const rawPath = input.rawPath ?? `pages/${externalId}/page.md`; + const doc = await subject.upsertDocument({ + runId, + connectionId: 'conn-1', + sourceKey: 'notion', + externalId, + externalParentId: input.externalParentId ?? null, + databaseId: null, + dataSourceId: null, + title, + path: input.path ?? `Company Handbook / ${title}`, + url: `https://notion.test/${externalId}`, + objectType: 'page', + lastEditedAt: new Date('2026-04-30T10:00:00.000Z'), + lastEditedBy: 'user-1', + rawPath, + syncId, + contentHash: `hash-${externalId}`, + publishState: input.publishState ?? 'pending', + metadata: input.metadata ?? {}, + }); + await subject.replaceChunks(doc.id, [ + { + chunkKey: 'intro', + headingPath: ['Revenue'], + ordinal: 0, + content: input.content ?? `${title} requires approval from the accountable owner.`, + searchText: input.searchText ?? `${title} approval accountable owner`, + embedding: input.embedding ?? [1, 0, 0], + tokenCount: 8, + citation: { + source: 'notion', + pageId: externalId, + title, + syncId, + rawPath, + }, + stableCitationKey: `notion:${externalId}:intro`, + syncId, + contentHash: `chunk-${externalId}`, + }, + ]); + const read = await subject.readDocumentByExternalId('conn-1', 'notion', externalId, runId); + if (!read) { + throw new Error(`seeded document ${externalId} was not readable`); + } + return { documentId: doc.id, chunkId: read.chunks[0].id }; + } + + function candidate(input: Partial = {}): InsertContextCandidateInput { + return { + runId: input.runId ?? 'run-1', + connectionId: input.connectionId ?? 'conn-1', + sourceKey: input.sourceKey ?? 'notion', + candidateKey: input.candidateKey ?? 'owner-approval-policy', + topic: input.topic ?? 'Owner approval policy', + assertion: input.assertion ?? 'Revenue policy changes require an accountable owner.', + rationale: input.rationale ?? 'The Notion evidence states that owner approval is required.', + evidenceChunkIds: input.evidenceChunkIds ?? ['chunk-1'], + evidenceRefs: input.evidenceRefs ?? [ + { + chunkId: 'chunk-1', + stableCitationKey: 'notion:page-1:intro', + syncId: 'sync-1', + rawPath: 'pages/page-1/page.md', + }, + ], + suggestedPageKey: input.suggestedPageKey ?? 'revenue_policy', + actionHint: input.actionHint ?? 'create', + durabilityScore: input.durabilityScore ?? 3, + authorityScore: input.authorityScore ?? 3, + reuseScore: input.reuseScore ?? 2, + noveltyScore: input.noveltyScore ?? 2, + riskScore: input.riskScore ?? 0, + promotionScore: input.promotionScore ?? 10, + status: input.status ?? 'pending', + rejectionReason: input.rejectionReason ?? null, + lane: input.lane ?? 'full', + embedding: input.embedding ?? null, + }; + } + + it('persists evidence documents, chunks, publish state, and retrieval across reopen', async () => { + const first = store(); + const seeded = await seedDocument(first, { + metadata: { links: { children: ['child-1'], mentions: ['linked-1'], reverseLinks: ['back-1'] } }, + }); + await seedDocument(first, { + externalId: 'child-1', + externalParentId: 'page-1', + title: 'Child Policy', + searchText: 'child handbook reference', + embedding: [0, 1, 0], + }); + await seedDocument(first, { + externalId: 'linked-1', + title: 'Linked Policy', + searchText: 'linked handbook reference', + embedding: [0, 1, 0], + }); + await seedDocument(first, { + externalId: 'back-1', + title: 'Backlink Policy', + searchText: 'backlink handbook reference', + embedding: [0, 1, 0], + }); + + expect(await first.countPublishedDocumentsByRawPaths('conn-1', 'notion', ['pages/page-1/page.md'])).toBe(0); + expect(await first.publishSync('conn-1', 'notion', 'sync-1', [])).toEqual({ + documentsPublished: 4, + documentsDeleted: 0, + }); + + const reopened = store(); + expect(await reopened.countPublishedDocumentsByRawPaths('conn-1', 'notion', ['pages/page-1/page.md'])).toBe(1); + const search = await reopened.searchRRF({ + connectionId: 'conn-1', + sourceKey: 'notion', + queryEmbedding: [0.99, 0.01, 0], + queryText: 'approval owner', + limit: 5, + includeDeleted: false, + }); + + expect(search[0]).toMatchObject({ + documentId: seeded.documentId, + externalId: 'page-1', + title: 'Revenue Policy', + stableCitationKey: 'notion:page-1:intro', + matchReasons: expect.arrayContaining(['lexical', 'semantic']), + lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]), + }); + expect(search[0].score).toBeGreaterThan(0); + + await expect(reopened.readChunkById(seeded.chunkId, 'conn-1', 'notion')).resolves.toMatchObject({ + chunk: expect.objectContaining({ id: seeded.chunkId, content: expect.stringContaining('Revenue Policy') }), + document: expect.objectContaining({ external_id: 'page-1' }), + }); + await expect(reopened.readDocumentById(seeded.documentId, 'conn-1', 'notion')).resolves.toMatchObject({ + chunks: [expect.objectContaining({ id: seeded.chunkId })], + }); + await expect( + reopened.findNeighborDocuments({ + connectionId: 'conn-1', + sourceKey: 'notion', + documentId: seeded.documentId, + relation: 'children', + limit: 5, + }), + ).resolves.toEqual([expect.objectContaining({ externalId: 'child-1', relation: 'children' })]); + await expect( + reopened.findNeighborDocuments({ + connectionId: 'conn-1', + sourceKey: 'notion', + documentId: seeded.documentId, + relation: 'linked', + limit: 5, + }), + ).resolves.toEqual([expect.objectContaining({ externalId: 'linked-1', relation: 'linked' })]); + await expect( + reopened.findNeighborDocuments({ + connectionId: 'conn-1', + sourceKey: 'notion', + documentId: seeded.documentId, + relation: 'backlinked', + limit: 5, + }), + ).resolves.toEqual([expect.objectContaining({ externalId: 'back-1', relation: 'backlinked' })]); + }); + + it('uses hybrid RRF lanes for context evidence search and exposes match reasons', async () => { + const subject = store(); + const primary = await seedDocument(subject, { + externalId: 'page-discount', + title: 'Enterprise Discount Policy', + content: 'Enterprise discounts require finance approval before quote approval.', + searchText: 'enterprise discount finance approval quote', + embedding: [1, 0, 0], + publishState: 'published', + }); + await seedDocument(subject, { + externalId: 'page-owner', + title: 'Accountable Owner Policy', + content: 'Every policy has an accountable owner and review date.', + searchText: 'accountable owner review date', + embedding: [0.95, 0.05, 0], + publishState: 'published', + }); + await seedDocument(subject, { + externalId: 'page-expense', + title: 'Expense Policy', + content: 'Expense reimbursement requires receipt review.', + searchText: 'expense reimbursement receipt review', + embedding: [0, 1, 0], + publishState: 'published', + }); + + const search = await subject.searchRRF({ + connectionId: 'conn-1', + sourceKey: 'notion', + queryEmbedding: [1, 0, 0], + queryText: 'enterprise discount approval', + limit: 2, + includeDeleted: false, + }); + + expect(search).toHaveLength(2); + expect(search[0]).toMatchObject({ + chunkId: primary.chunkId, + documentId: primary.documentId, + externalId: 'page-discount', + title: 'Enterprise Discount Policy', + matchReasons: expect.arrayContaining(['lexical', 'semantic', 'token']), + lanes: expect.arrayContaining([ + expect.objectContaining({ lane: 'lexical', status: 'available', requestedCandidatePoolLimit: 25 }), + expect.objectContaining({ lane: 'semantic', status: 'available', requestedCandidatePoolLimit: 25 }), + expect.objectContaining({ lane: 'token', status: 'available', requestedCandidatePoolLimit: 25 }), + ]), + }); + expect(search[0].score).toBeCloseTo(1.5 / 61 + 2 / 61 + 0.75 / 61, 8); + expect(search[1].matchReasons).toContain('semantic'); + }); + + it('falls back to token substring matching when FTS has no valid terms', async () => { + const subject = store(); + await seedDocument(subject, { + externalId: 'page-cpp', + title: 'C++ Warehouse Notes', + content: 'C++ parser notes for warehouse extraction.', + searchText: 'C++ parser warehouse extraction', + embedding: null, + publishState: 'published', + }); + + const search = await subject.searchRRF({ + connectionId: 'conn-1', + sourceKey: 'notion', + queryEmbedding: null, + queryText: '++', + limit: 5, + includeDeleted: false, + }); + + expect(search).toHaveLength(1); + expect(search[0]).toMatchObject({ + externalId: 'page-cpp', + matchReasons: ['token'], + lanes: expect.arrayContaining([ + expect.objectContaining({ lane: 'lexical', status: 'skipped', reason: 'fts_query_empty' }), + expect.objectContaining({ lane: 'semantic', status: 'skipped', reason: 'embedding_unconfigured' }), + expect.objectContaining({ lane: 'token', status: 'available', returnedCandidateCount: 1 }), + ]), + }); + }); + + it('keeps current-run and deleted-state visibility filters before hybrid ranking', async () => { + const subject = store(); + const current = await seedDocument(subject, { + runId: 'run-current', + externalId: 'page-current', + title: 'Current Run Evidence', + searchText: 'visibility approval current', + publishState: 'pending', + }); + await seedDocument(subject, { + runId: 'run-other', + externalId: 'page-other-pending', + title: 'Other Pending Evidence', + searchText: 'visibility approval other pending', + publishState: 'pending', + }); + await seedDocument(subject, { + runId: 'run-old', + syncId: 'sync-old', + externalId: 'page-published', + title: 'Published Evidence', + searchText: 'visibility approval published', + publishState: 'published', + }); + await subject.publishSync('conn-1', 'notion', 'sync-old', ['pages/page-published/page.md']); + + const search = await subject.searchRRF({ + connectionId: 'conn-1', + sourceKey: 'notion', + queryEmbedding: null, + queryText: 'visibility approval', + limit: 10, + includeDeleted: false, + currentRunId: 'run-current', + }); + + expect(search.map((result) => result.externalId)).toEqual(['page-current']); + expect(search[0]).toMatchObject({ + chunkId: current.chunkId, + matchReasons: expect.arrayContaining(['lexical']), + }); + + const deletedIncluded = await subject.searchRRF({ + connectionId: 'conn-1', + sourceKey: 'notion', + queryEmbedding: null, + queryText: 'visibility approval', + limit: 10, + includeDeleted: true, + currentRunId: 'run-current', + }); + + expect(deletedIncluded.map((result) => result.externalId)).toEqual( + expect.arrayContaining(['page-current', 'page-published']), + ); + }); + + it('supports page triage lanes and light extraction chunk lookup', async () => { + const subject = store(); + await seedDocument(subject); + + expect(await subject.setDocumentTriageLane('run-1', 'pages/page-1/page.md', 'light')).toBe(1); + const chunks = await subject.listDocumentChunksForLightExtraction('run-1', 'pages/page-1/page.md'); + + expect(chunks).toEqual([ + expect.objectContaining({ + chunkId: expect.any(String), + headingPath: ['Revenue'], + rawPath: 'pages/page-1/page.md', + title: 'Revenue Policy', + stableCitationKey: 'notion:page-1:intro', + }), + ]); + }); + + it('supports candidate writes, dedup state, status updates, and carry-forward reads across reopen', async () => { + const first = store(); + const seeded = await seedDocument(first); + await first.publishSync('conn-1', 'notion', 'sync-1', []); + + const primary = await first.insertCandidate(candidate({ evidenceChunkIds: [seeded.chunkId] })); + const duplicate = await first.insertCandidate( + candidate({ + candidateKey: 'owner-approval-policy-copy', + evidenceChunkIds: [seeded.chunkId], + promotionScore: 6, + }), + ); + await first.updateCandidateEmbedding(primary.id, [0.1, 0.2, 0.3]); + await first.markCandidatesAsMergedToCluster({ + representativeId: primary.id, + memberIds: [duplicate.id], + evidenceChunkIds: [seeded.chunkId], + evidenceRefs: [{ chunkId: seeded.chunkId, stableCitationKey: 'notion:page-1:intro', syncId: 'sync-1' }], + promotionScore: 16, + }); + await first.insertCandidate( + candidate({ + runId: 'old-run', + candidateKey: 'prior-budget-candidate', + status: 'rejected', + rejectionReason: 'exceeded_run_budget', + evidenceChunkIds: [seeded.chunkId], + }), + ); + + const reopened = store(); + const pending = await reopened.listPendingCandidatesForDedup('run-1'); + expect(pending).toEqual([ + expect.objectContaining({ + id: primary.id, + candidateKey: 'owner-approval-policy', + embedding: JSON.stringify([0.1, 0.2, 0.3]), + promotionScore: 16, + }), + ]); + await expect( + reopened.updateCandidateStatus({ + runId: 'run-1', + candidateKey: 'owner-approval-policy', + status: 'promoted', + rejectionReason: null, + }), + ).resolves.toMatchObject({ candidate_key: 'owner-approval-policy', status: 'promoted' }); + await expect( + reopened.listBudgetExhaustedCandidatesForCarryForward({ + connectionId: 'conn-1', + sourceKey: 'notion', + currentRunId: 'run-1', + }), + ).resolves.toEqual([expect.objectContaining({ candidateKey: 'prior-budget-candidate', sourceRunId: 'old-run' })]); + await expect(reopened.listCurrentRunEvidenceChunksForCarryForward('run-1')).resolves.toEqual([ + expect.objectContaining({ chunkId: seeded.chunkId, stableCitationKey: 'notion:page-1:intro' }), + ]); + await expect(reopened.readChunksByIds([seeded.chunkId], 'conn-1', 'notion')).resolves.toEqual([ + expect.objectContaining({ chunkId: seeded.chunkId, externalId: 'page-1' }), + ]); + }); + + it('supports curator pagination prompt ordering, rejection marking, and verdict summaries', async () => { + const subject = store(); + await subject.insertCandidate( + candidate({ + candidateKey: 'c1', + topic: 'Revenue policy', + status: 'pending', + promotionScore: 10, + }), + ); + await subject.insertCandidate( + candidate({ + candidateKey: 'c2', + topic: 'Refund policy', + status: 'promoted', + promotionScore: 9, + }), + ); + await subject.insertCandidate( + candidate({ + candidateKey: 'c3', + topic: 'Task backlog', + status: 'pending', + promotionScore: 1, + }), + ); + + await expect(subject.listCandidatesForPromptByKeys('run-1', ['c3', 'c1'])).resolves.toEqual([ + expect.objectContaining({ candidateKey: 'c3', topic: 'Task backlog' }), + expect.objectContaining({ candidateKey: 'c1', topic: 'Revenue policy' }), + ]); + + await expect( + subject.markPendingCandidatesByReason({ + runId: 'run-1', + candidateKeys: ['c1', 'c2', 'missing'], + rejectionReason: 'exceeded_curator_passes', + }), + ).resolves.toBe(1); + + await expect(subject.summarizeCandidateVerdicts('run-1', ['c1', 'c2', 'c3'])).resolves.toEqual({ + pending: 1, + promoted: 1, + merged: 0, + rejected: 1, + conflict: 0, + rejectedByReason: { exceeded_curator_passes: 1 }, + }); + }); +}); diff --git a/packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.ts b/packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.ts new file mode 100644 index 00000000..5d18394f --- /dev/null +++ b/packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.ts @@ -0,0 +1,1418 @@ +import { randomUUID } from 'node:crypto'; +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import { HybridSearchCore, type SearchCandidateGenerator, type SearchLaneBreakdown } from '../../search/index.js'; +import type { + ContextCandidateStatusResult, + ContextEvidenceChunkForCandidate, + ContextEvidenceChunkReadResult, + ContextEvidenceNeighborResult, + ContextEvidenceReadResult, + ContextEvidenceSearchArgs, + ContextEvidenceSearchMatchReason, + ContextEvidenceSearchResult, + ContextEvidenceToolStorePort, +} from '../../tools/context-evidence-tool-store.js'; +import type { + BudgetExhaustedCandidateForCarryForward, + ContextCandidateRejectionReason, + ContextCandidateStorePort, + ContextCandidateVerdictSummary, + CurrentRunEvidenceChunkForCarryForward, + InsertContextCandidateInput, + MarkContextCandidateClusterInput, +} from '../context-candidates/index.js'; +import type { PageTriageEvidenceChunk, PageTriageStorePort } from '../page-triage/index.js'; +import type { ContextCandidateForDedup, ContextCandidateSummary, JsonValue } from '../ports.js'; +import type { ContextEvidenceIndexStorePort } from './store.js'; +import type { + ContextEvidenceDocumentRef, + EvidencePublishState, + ReplaceContextEvidenceChunk, + UpsertContextEvidenceDocument, +} from './types.js'; + +export interface SqliteContextEvidenceStoreOptions { + dbPath: string; + idFactory?: () => string; +} + +interface DocumentRow { + id: string; + run_id: string; + connection_id: string; + source_key: string; + external_id: string; + external_parent_id: string | null; + title: string; + path: string; + url: string | null; + raw_path: string; + sync_id: string; + publish_state: EvidencePublishState; + deleted_at: string | null; + triage_lane: string | null; + metadata_json: string; + last_edited_at: string | null; +} + +interface ChunkRow { + id: string; + document_id: string; + chunk_key: string; + heading_path_json: string; + ordinal: number; + content: string; + search_text: string; + embedding_json: string | null; + citation_json: string; + stable_citation_key: string; + sync_id: string; + content_hash: string; +} + +interface CandidateRow { + id: string; + run_id: string; + connection_id: string; + source_key: string; + candidate_key: string; + topic: string; + assertion: string; + rationale: string; + evidence_chunk_ids_json: string; + evidence_refs_json: string; + suggested_page_key: string | null; + action_hint: string; + durability_score: number; + authority_score: number; + reuse_score: number; + novelty_score: number; + risk_score: number; + promotion_score: number; + status: 'pending' | 'promoted' | 'merged' | 'rejected' | 'conflict'; + rejection_reason: string | null; + lane: 'light' | 'full' | null; + embedding_json: string | null; + created_at: string; + updated_at: string; +} + +interface VisibleChunkRow extends ChunkRow { + external_id: string; + title: string; + path: string; + url: string | null; + raw_path: string; + last_edited_at: string | null; +} + +interface ContextEvidenceLaneCandidate { + id: string; + chunkId: string; + rank: number; + rawScore: number; +} + +function stringifyJson(value: JsonValue | string[] | number[] | null): string { + return JSON.stringify(value ?? null); +} + +function parseJson(raw: string | null, fallback: T): T { + if (!raw) { + return fallback; + } + return JSON.parse(raw) as T; +} + +function parseDate(raw: string | null): Date | null { + return raw ? new Date(raw) : null; +} + +function placeholders(values: readonly unknown[]): string { + return values.map(() => '?').join(', '); +} + +function ftsQuery(text: string): string { + return text + .trim() + .split(/\s+/) + .map((token) => token.replace(/[^A-Za-z0-9_]/g, '')) + .filter(Boolean) + .map((token) => `${token}*`) + .join(' OR '); +} + +function cosine(left: number[], right: number[]): number { + let dot = 0; + let leftNorm = 0; + let rightNorm = 0; + const length = Math.min(left.length, right.length); + for (let index = 0; index < length; index++) { + dot += left[index] * right[index]; + leftNorm += left[index] * left[index]; + rightNorm += right[index] * right[index]; + } + return leftNorm === 0 || rightNorm === 0 ? 0 : dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm)); +} + +function metadataLinks(row: DocumentRow): { + children: string[]; + mentions: string[]; + databases: string[]; + reverseLinks: string[]; +} { + const metadata = parseJson>(row.metadata_json, {}); + const links = + typeof metadata.links === 'object' && metadata.links !== null ? (metadata.links as Record) : {}; + const stringArray = (value: unknown): string[] => + Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []; + return { + children: stringArray(links.children), + mentions: stringArray(links.mentions), + databases: stringArray(links.databases), + reverseLinks: stringArray(links.reverseLinks), + }; +} + +export class SqliteContextEvidenceStore + implements ContextEvidenceIndexStorePort, ContextCandidateStorePort, PageTriageStorePort, ContextEvidenceToolStorePort +{ + private readonly db: Database.Database; + private readonly idFactory: () => string; + + constructor(options: SqliteContextEvidenceStoreOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.idFactory = options.idFactory ?? (() => randomUUID()); + this.db.exec(` + CREATE TABLE IF NOT EXISTS context_evidence_documents ( + id TEXT PRIMARY KEY, + run_id TEXT NOT NULL, + connection_id TEXT NOT NULL, + source_key TEXT NOT NULL, + external_id TEXT NOT NULL, + external_parent_id TEXT, + database_id TEXT, + data_source_id TEXT, + title TEXT NOT NULL, + path TEXT NOT NULL, + url TEXT, + object_type TEXT NOT NULL, + last_edited_at TEXT, + last_edited_by TEXT, + raw_path TEXT NOT NULL, + sync_id TEXT NOT NULL, + content_hash TEXT NOT NULL, + publish_state TEXT NOT NULL, + published_at TEXT, + deleted_at TEXT, + triage_lane TEXT, + metadata_json TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(connection_id, source_key, external_id, sync_id) + ); + + CREATE INDEX IF NOT EXISTS context_evidence_documents_visible_idx + ON context_evidence_documents (connection_id, source_key, publish_state, deleted_at); + + CREATE INDEX IF NOT EXISTS context_evidence_documents_run_raw_idx + ON context_evidence_documents (run_id, raw_path); + + CREATE TABLE IF NOT EXISTS context_evidence_chunks ( + id TEXT PRIMARY KEY, + document_id TEXT NOT NULL REFERENCES context_evidence_documents(id) ON DELETE CASCADE, + chunk_key TEXT NOT NULL, + heading_path_json TEXT NOT NULL, + ordinal INTEGER NOT NULL, + content TEXT NOT NULL, + search_text TEXT NOT NULL, + embedding_json TEXT, + token_count INTEGER NOT NULL, + citation_json TEXT NOT NULL, + stable_citation_key TEXT NOT NULL, + sync_id TEXT NOT NULL, + content_hash TEXT NOT NULL, + UNIQUE(document_id, chunk_key) + ); + + CREATE INDEX IF NOT EXISTS context_evidence_chunks_document_idx + ON context_evidence_chunks (document_id, ordinal); + + CREATE VIRTUAL TABLE IF NOT EXISTS context_evidence_chunks_fts + USING fts5(chunk_id UNINDEXED, search_text); + + CREATE TABLE IF NOT EXISTS context_knowledge_candidates ( + id TEXT PRIMARY KEY, + run_id TEXT NOT NULL, + connection_id TEXT NOT NULL, + source_key TEXT NOT NULL, + candidate_key TEXT NOT NULL, + topic TEXT NOT NULL, + assertion TEXT NOT NULL, + rationale TEXT NOT NULL, + evidence_chunk_ids_json TEXT NOT NULL, + evidence_refs_json TEXT NOT NULL, + suggested_page_key TEXT, + action_hint TEXT NOT NULL, + durability_score INTEGER NOT NULL, + authority_score INTEGER NOT NULL, + reuse_score INTEGER NOT NULL, + novelty_score INTEGER NOT NULL, + risk_score INTEGER NOT NULL, + promotion_score INTEGER NOT NULL, + status TEXT NOT NULL, + rejection_reason TEXT, + lane TEXT, + embedding_json TEXT, + representative_id TEXT, + cluster_id TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(run_id, candidate_key) + ); + + CREATE INDEX IF NOT EXISTS context_knowledge_candidates_run_status_idx + ON context_knowledge_candidates (run_id, status, promotion_score DESC, created_at ASC); + + CREATE INDEX IF NOT EXISTS context_knowledge_candidates_carry_forward_idx + ON context_knowledge_candidates (connection_id, source_key, status, rejection_reason, updated_at DESC); + `); + } + + async upsertDocument(params: UpsertContextEvidenceDocument): Promise { + const now = new Date().toISOString(); + const existing = this.db + .prepare( + ` + SELECT id FROM context_evidence_documents + WHERE connection_id = ? AND source_key = ? AND external_id = ? AND sync_id = ? + `, + ) + .get(params.connectionId, params.sourceKey, params.externalId, params.syncId) as { id: string } | undefined; + const id = existing?.id ?? `ctxdoc-${this.idFactory()}`; + const publishState = params.publishState ?? 'published'; + const row = { + id, + runId: params.runId, + connectionId: params.connectionId, + sourceKey: params.sourceKey, + externalId: params.externalId, + externalParentId: params.externalParentId, + databaseId: params.databaseId, + dataSourceId: params.dataSourceId, + title: params.title, + path: params.path, + url: params.url, + objectType: params.objectType, + lastEditedAt: params.lastEditedAt?.toISOString() ?? null, + lastEditedBy: params.lastEditedBy, + rawPath: params.rawPath, + syncId: params.syncId, + contentHash: params.contentHash, + publishState, + publishedAt: publishState === 'published' ? now : null, + metadataJson: stringifyJson(params.metadata), + now, + }; + + this.db + .prepare( + ` + INSERT INTO context_evidence_documents ( + id, run_id, connection_id, source_key, external_id, external_parent_id, database_id, + data_source_id, title, path, url, object_type, last_edited_at, last_edited_by, + raw_path, sync_id, content_hash, publish_state, published_at, deleted_at, + triage_lane, metadata_json, created_at, updated_at + ) + VALUES ( + @id, @runId, @connectionId, @sourceKey, @externalId, @externalParentId, @databaseId, + @dataSourceId, @title, @path, @url, @objectType, @lastEditedAt, @lastEditedBy, + @rawPath, @syncId, @contentHash, @publishState, @publishedAt, NULL, + NULL, @metadataJson, @now, @now + ) + ON CONFLICT(connection_id, source_key, external_id, sync_id) DO UPDATE SET + run_id = excluded.run_id, + external_parent_id = excluded.external_parent_id, + database_id = excluded.database_id, + data_source_id = excluded.data_source_id, + title = excluded.title, + path = excluded.path, + url = excluded.url, + object_type = excluded.object_type, + last_edited_at = excluded.last_edited_at, + last_edited_by = excluded.last_edited_by, + raw_path = excluded.raw_path, + content_hash = excluded.content_hash, + publish_state = excluded.publish_state, + published_at = excluded.published_at, + deleted_at = NULL, + metadata_json = excluded.metadata_json, + updated_at = excluded.updated_at + `, + ) + .run(row); + return { id }; + } + + async replaceChunks(documentId: string, chunks: ReplaceContextEvidenceChunk[]): Promise { + const replace = this.db.transaction(() => { + const oldRows = this.db + .prepare('SELECT id FROM context_evidence_chunks WHERE document_id = ?') + .all(documentId) as Array<{ + id: string; + }>; + for (const row of oldRows) { + this.db.prepare('DELETE FROM context_evidence_chunks_fts WHERE chunk_id = ?').run(row.id); + } + this.db.prepare('DELETE FROM context_evidence_chunks WHERE document_id = ?').run(documentId); + + const insertChunk = this.db.prepare(` + INSERT INTO context_evidence_chunks ( + id, document_id, chunk_key, heading_path_json, ordinal, content, search_text, + embedding_json, token_count, citation_json, stable_citation_key, sync_id, content_hash + ) + VALUES ( + @id, @documentId, @chunkKey, @headingPathJson, @ordinal, @content, @searchText, + @embeddingJson, @tokenCount, @citationJson, @stableCitationKey, @syncId, @contentHash + ) + `); + const insertFts = this.db.prepare( + 'INSERT INTO context_evidence_chunks_fts (chunk_id, search_text) VALUES (?, ?)', + ); + + for (const chunk of chunks) { + const id = `ctxchunk-${this.idFactory()}`; + insertChunk.run({ + id, + documentId, + chunkKey: chunk.chunkKey, + headingPathJson: stringifyJson(chunk.headingPath), + ordinal: chunk.ordinal, + content: chunk.content, + searchText: chunk.searchText, + embeddingJson: chunk.embedding ? stringifyJson(chunk.embedding) : null, + tokenCount: chunk.tokenCount, + citationJson: stringifyJson(chunk.citation), + stableCitationKey: chunk.stableCitationKey, + syncId: chunk.syncId, + contentHash: chunk.contentHash, + }); + insertFts.run(id, chunk.searchText); + } + }); + replace(); + } + + async countPublishedDocumentsByRawPaths( + connectionId: string, + sourceKey: string, + rawPaths: string[], + ): Promise { + if (rawPaths.length === 0) { + return 0; + } + const row = this.db + .prepare( + ` + SELECT count(*) AS count + FROM context_evidence_documents + WHERE connection_id = ? + AND source_key = ? + AND raw_path IN (${placeholders(rawPaths)}) + AND publish_state = 'published' + AND deleted_at IS NULL + `, + ) + .get(connectionId, sourceKey, ...rawPaths) as { count: number }; + return row.count; + } + + async publishSync( + connectionId: string, + sourceKey: string, + syncId: string, + deletedMarkdownRawPaths: string[], + ): Promise<{ documentsPublished: number; documentsDeleted: number }> { + const publish = this.db.transaction(() => { + const now = new Date().toISOString(); + const pending = this.db + .prepare( + ` + SELECT DISTINCT external_id + FROM context_evidence_documents + WHERE connection_id = ? AND source_key = ? AND sync_id = ? AND publish_state = 'pending' AND deleted_at IS NULL + `, + ) + .all(connectionId, sourceKey, syncId) as Array<{ external_id: string }>; + const externalIds = pending.map((row) => row.external_id); + if (externalIds.length > 0) { + this.db + .prepare( + ` + UPDATE context_evidence_documents + SET publish_state = 'superseded', updated_at = ? + WHERE connection_id = ? + AND source_key = ? + AND external_id IN (${placeholders(externalIds)}) + AND sync_id <> ? + AND publish_state = 'published' + AND deleted_at IS NULL + `, + ) + .run(now, connectionId, sourceKey, ...externalIds, syncId); + } + const published = this.db + .prepare( + ` + UPDATE context_evidence_documents + SET publish_state = 'published', published_at = ?, deleted_at = NULL, updated_at = ? + WHERE connection_id = ? AND source_key = ? AND sync_id = ? AND publish_state = 'pending' AND deleted_at IS NULL + `, + ) + .run(now, now, connectionId, sourceKey, syncId).changes; + const uniqueDeleted = [...new Set(deletedMarkdownRawPaths)]; + const deleted = + uniqueDeleted.length === 0 + ? 0 + : this.db + .prepare( + ` + UPDATE context_evidence_documents + SET deleted_at = ?, updated_at = ? + WHERE connection_id = ? + AND source_key = ? + AND raw_path IN (${placeholders(uniqueDeleted)}) + AND publish_state = 'published' + AND deleted_at IS NULL + `, + ) + .run(now, now, connectionId, sourceKey, ...uniqueDeleted).changes; + return { documentsPublished: published, documentsDeleted: deleted }; + }); + return publish(); + } + + async setDocumentTriageLane(runId: string, rawPath: string, lane: 'skip' | 'light' | 'full'): Promise { + return this.db + .prepare( + ` + UPDATE context_evidence_documents + SET triage_lane = ?, updated_at = ? + WHERE run_id = ? AND raw_path = ? + `, + ) + .run(lane, new Date().toISOString(), runId, rawPath).changes; + } + + async listDocumentChunksForLightExtraction(runId: string, rawPath: string): Promise { + const rows = this.db + .prepare( + ` + SELECT c.*, d.raw_path, d.title, d.path, d.url, d.last_edited_at + FROM context_evidence_chunks c + JOIN context_evidence_documents d ON d.id = c.document_id + WHERE d.run_id = ? AND d.raw_path = ? + ORDER BY c.ordinal ASC + `, + ) + .all(runId, rawPath) as Array< + ChunkRow & Pick + >; + return rows.map((row) => ({ + chunkId: row.id, + headingPath: parseJson(row.heading_path_json, []), + ordinal: row.ordinal, + content: row.content, + stableCitationKey: row.stable_citation_key, + citation: parseJson(row.citation_json, null), + rawPath: row.raw_path, + title: row.title, + path: row.path, + url: row.url, + lastEditedAt: parseDate(row.last_edited_at), + })); + } + + async searchRRF(args: ContextEvidenceSearchArgs): Promise { + const rows = this.visibleChunks( + args.connectionId, + args.sourceKey ?? null, + args.currentRunId ?? null, + args.includeDeleted, + ); + const rowsById = new Map(rows.map((row) => [row.id, row])); + const store = this; + const core = new HybridSearchCore(); + + const generators: SearchCandidateGenerator[] = [ + { + lane: 'lexical', + async generate(searchArgs) { + const fts = ftsQuery(searchArgs.queryText); + if (!fts) { + return { status: 'skipped', candidates: [], reason: 'fts_query_empty' }; + } + const candidates = store.searchLexicalContextEvidenceCandidates( + rowsById, + fts, + searchArgs.laneCandidatePoolLimit, + ); + return { + candidates: candidates.map((candidate) => ({ + id: candidate.id, + rank: candidate.rank, + rawScore: candidate.rawScore, + })), + }; + }, + }, + { + lane: 'semantic', + async generate(searchArgs) { + if (!args.queryEmbedding) { + return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' }; + } + const candidates = store.searchSemanticContextEvidenceCandidates( + rows, + args.queryEmbedding, + searchArgs.laneCandidatePoolLimit, + ); + return { + candidates: candidates.map((candidate) => ({ + id: candidate.id, + rank: candidate.rank, + rawScore: candidate.rawScore, + })), + }; + }, + }, + { + lane: 'token', + async generate(searchArgs) { + const candidates = store.searchTokenContextEvidenceCandidates( + rows, + searchArgs.normalizedQuery.terms, + searchArgs.queryText, + searchArgs.laneCandidatePoolLimit, + ); + return { + candidates: candidates.map((candidate) => ({ + id: candidate.id, + rank: candidate.rank, + rawScore: candidate.rawScore, + })), + }; + }, + }, + ]; + + const result = await core.search({ queryText: args.queryText, limit: args.limit, generators }); + return result.results + .map((fused): ContextEvidenceSearchResult | null => { + const row = rowsById.get(fused.id); + return row + ? this.contextEvidenceSearchResult( + row, + fused.score, + fused.matchReasons as ContextEvidenceSearchMatchReason[], + result.lanes, + ) + : null; + }) + .filter((entry): entry is ContextEvidenceSearchResult => entry !== null); + } + + async readChunkById( + chunkId: string, + connectionId: string, + sourceKey: string, + currentRunId?: string, + ): Promise { + const row = this.visibleChunks(connectionId, sourceKey, currentRunId ?? null, false).find( + (chunk) => chunk.id === chunkId, + ); + if (!row) { + return null; + } + return { + document: this.documentForRead(row), + chunk: this.chunkForRead(row), + }; + } + + async readDocumentById( + documentId: string, + connectionId: string, + sourceKey: string, + currentRunId?: string, + ): Promise { + return this.readDocument({ documentId, connectionId, sourceKey, currentRunId: currentRunId ?? null }); + } + + async readDocumentByExternalId( + connectionId: string, + sourceKey: string, + externalId: string, + currentRunId?: string, + ): Promise { + return this.readDocument({ externalId, connectionId, sourceKey, currentRunId: currentRunId ?? null }); + } + + async readChunksByIds( + chunkIds: string[], + connectionId: string, + sourceKey: string, + currentRunId?: string, + ): Promise { + if (chunkIds.length === 0) { + return []; + } + const visible = this.visibleChunks(connectionId, sourceKey, currentRunId ?? null, false); + const byId = new Map(visible.map((row) => [row.id, row])); + return chunkIds.flatMap((chunkId) => { + const row = byId.get(chunkId); + if (!row) { + return []; + } + return [ + { + chunkId: row.id, + documentId: row.document_id, + externalId: row.external_id, + title: row.title, + path: row.path, + url: row.url, + rawPath: row.raw_path, + content: row.content, + citation: parseJson(row.citation_json, null), + stableCitationKey: row.stable_citation_key, + syncId: row.sync_id, + lastEditedAt: parseDate(row.last_edited_at), + }, + ]; + }); + } + + async findNeighborDocuments(args: { + connectionId: string; + sourceKey: string; + documentId: string; + relation: 'parent' | 'children' | 'linked' | 'backlinked' | 'same_path'; + limit: number; + currentRunId?: string; + }): Promise { + const current = this.visibleDocument(args.connectionId, args.sourceKey, args.documentId, args.currentRunId ?? null); + if (!current) { + return []; + } + let externalIds: string[] = []; + if (args.relation === 'parent' && current.external_parent_id) { + externalIds = [current.external_parent_id]; + } else if (args.relation === 'children') { + return this.neighborRowsByParent(args, current.external_id); + } else if (args.relation === 'same_path' && current.external_parent_id) { + return this.neighborRowsByParent(args, current.external_parent_id).filter( + (row) => row.externalId !== current.external_id, + ); + } else if (args.relation === 'linked') { + const links = metadataLinks(current); + externalIds = [...links.mentions, ...links.databases]; + } else if (args.relation === 'backlinked') { + externalIds = metadataLinks(current).reverseLinks; + } + return this.neighborRowsByExternalIds(args, externalIds); + } + + async insertCandidate( + params: InsertContextCandidateInput, + ): Promise<{ id: string; candidate_key: string; promotion_score: number; status: string }> { + const now = new Date().toISOString(); + const existing = this.db + .prepare('SELECT id, created_at FROM context_knowledge_candidates WHERE run_id = ? AND candidate_key = ?') + .get(params.runId, params.candidateKey) as { id: string; created_at: string } | undefined; + const id = existing?.id ?? `ctxcand-${this.idFactory()}`; + this.db + .prepare( + ` + INSERT INTO context_knowledge_candidates ( + id, run_id, connection_id, source_key, candidate_key, topic, assertion, rationale, + evidence_chunk_ids_json, evidence_refs_json, suggested_page_key, action_hint, + durability_score, authority_score, reuse_score, novelty_score, risk_score, promotion_score, + status, rejection_reason, lane, embedding_json, representative_id, cluster_id, created_at, updated_at + ) + VALUES ( + @id, @runId, @connectionId, @sourceKey, @candidateKey, @topic, @assertion, @rationale, + @evidenceChunkIdsJson, @evidenceRefsJson, @suggestedPageKey, @actionHint, + @durabilityScore, @authorityScore, @reuseScore, @noveltyScore, @riskScore, @promotionScore, + @status, @rejectionReason, @lane, @embeddingJson, NULL, NULL, @createdAt, @updatedAt + ) + ON CONFLICT(run_id, candidate_key) DO UPDATE SET + connection_id = excluded.connection_id, + source_key = excluded.source_key, + topic = excluded.topic, + assertion = excluded.assertion, + rationale = excluded.rationale, + evidence_chunk_ids_json = excluded.evidence_chunk_ids_json, + evidence_refs_json = excluded.evidence_refs_json, + suggested_page_key = excluded.suggested_page_key, + action_hint = excluded.action_hint, + durability_score = excluded.durability_score, + authority_score = excluded.authority_score, + reuse_score = excluded.reuse_score, + novelty_score = excluded.novelty_score, + risk_score = excluded.risk_score, + promotion_score = excluded.promotion_score, + status = excluded.status, + rejection_reason = excluded.rejection_reason, + lane = excluded.lane, + embedding_json = excluded.embedding_json, + updated_at = excluded.updated_at + `, + ) + .run({ + id, + runId: params.runId, + connectionId: params.connectionId, + sourceKey: params.sourceKey, + candidateKey: params.candidateKey, + topic: params.topic, + assertion: params.assertion, + rationale: params.rationale, + evidenceChunkIdsJson: stringifyJson(params.evidenceChunkIds), + evidenceRefsJson: stringifyJson(params.evidenceRefs), + suggestedPageKey: params.suggestedPageKey, + actionHint: params.actionHint, + durabilityScore: params.durabilityScore, + authorityScore: params.authorityScore, + reuseScore: params.reuseScore, + noveltyScore: params.noveltyScore, + riskScore: params.riskScore, + promotionScore: params.promotionScore, + status: params.status, + rejectionReason: params.rejectionReason, + lane: params.lane ?? null, + embeddingJson: params.embedding ? stringifyJson(params.embedding) : null, + createdAt: existing?.created_at ?? now, + updatedAt: now, + }); + return { id, candidate_key: params.candidateKey, promotion_score: params.promotionScore, status: params.status }; + } + + async listCandidatesForPromptByKeys(runId: string, candidateKeys: string[]) { + if (candidateKeys.length === 0) { + return []; + } + const rows = this.db + .prepare( + ` + SELECT candidate_key, topic, assertion, rationale, action_hint, status, promotion_score, suggested_page_key, + evidence_refs_json + FROM context_knowledge_candidates + WHERE run_id = ? AND candidate_key IN (${placeholders(candidateKeys)}) + `, + ) + .all(runId, ...candidateKeys) as CandidateRow[]; + const byKey = new Map( + rows.map((row) => [ + row.candidate_key, + { + candidateKey: row.candidate_key, + topic: row.topic, + assertion: row.assertion, + rationale: row.rationale, + actionHint: row.action_hint, + status: row.status, + promotionScore: row.promotion_score, + suggestedPageKey: row.suggested_page_key, + evidenceRefs: parseJson(row.evidence_refs_json, null), + }, + ]), + ); + return candidateKeys.map((candidateKey) => byKey.get(candidateKey)).filter((row) => !!row); + } + + async markPendingCandidatesByReason(params: { + runId: string; + candidateKeys: string[]; + rejectionReason: ContextCandidateRejectionReason; + }): Promise { + if (params.candidateKeys.length === 0) { + return 0; + } + return this.db + .prepare( + ` + UPDATE context_knowledge_candidates + SET status = 'rejected', rejection_reason = ?, updated_at = ? + WHERE run_id = ? AND candidate_key IN (${placeholders(params.candidateKeys)}) AND status = 'pending' + `, + ) + .run(params.rejectionReason, new Date().toISOString(), params.runId, ...params.candidateKeys).changes; + } + + async summarizeCandidateVerdicts(runId: string, candidateKeys: string[]): Promise { + const summary: ContextCandidateVerdictSummary = { + pending: 0, + promoted: 0, + merged: 0, + rejected: 0, + conflict: 0, + rejectedByReason: {}, + }; + if (candidateKeys.length === 0) { + return summary; + } + const rows = this.db + .prepare( + ` + SELECT status, rejection_reason + FROM context_knowledge_candidates + WHERE run_id = ? AND candidate_key IN (${placeholders(candidateKeys)}) + `, + ) + .all(runId, ...candidateKeys) as CandidateRow[]; + for (const row of rows) { + if (row.status === 'pending') { + summary.pending += 1; + } else if (row.status === 'promoted') { + summary.promoted += 1; + } else if (row.status === 'merged') { + summary.merged += 1; + } else if (row.status === 'rejected') { + summary.rejected += 1; + if (row.rejection_reason) { + summary.rejectedByReason[row.rejection_reason] = (summary.rejectedByReason[row.rejection_reason] ?? 0) + 1; + } + } else if (row.status === 'conflict') { + summary.conflict += 1; + } + } + return summary; + } + + async listPendingCandidatesForDedup(runId: string): Promise { + const rows = this.db + .prepare( + ` + SELECT * FROM context_knowledge_candidates + WHERE run_id = ? AND status = 'pending' + ORDER BY promotion_score DESC, created_at ASC + `, + ) + .all(runId) as CandidateRow[]; + return rows.map((row) => this.candidateForDedup(row)); + } + + async updateCandidateEmbedding(candidateId: string, embedding: number[]): Promise { + this.db + .prepare('UPDATE context_knowledge_candidates SET embedding_json = ?, updated_at = ? WHERE id = ?') + .run(stringifyJson(embedding), new Date().toISOString(), candidateId); + } + + async markCandidatesAsMergedToCluster(params: MarkContextCandidateClusterInput): Promise { + const update = this.db.transaction(() => { + this.db + .prepare( + ` + UPDATE context_knowledge_candidates + SET cluster_id = ?, evidence_chunk_ids_json = ?, evidence_refs_json = ?, promotion_score = ?, updated_at = ? + WHERE id = ? + `, + ) + .run( + params.representativeId, + stringifyJson(params.evidenceChunkIds), + stringifyJson(params.evidenceRefs), + params.promotionScore, + new Date().toISOString(), + params.representativeId, + ); + if (params.memberIds.length > 0) { + this.db + .prepare( + ` + UPDATE context_knowledge_candidates + SET status = 'merged', representative_id = ?, cluster_id = ?, updated_at = ? + WHERE id IN (${placeholders(params.memberIds)}) + `, + ) + .run(params.representativeId, params.representativeId, new Date().toISOString(), ...params.memberIds); + } + }); + update(); + } + + async listBudgetExhaustedCandidatesForCarryForward(params: { + connectionId: string; + sourceKey: string; + currentRunId: string; + }): Promise { + const currentRows = this.db + .prepare('SELECT candidate_key FROM context_knowledge_candidates WHERE run_id = ?') + .all(params.currentRunId) as Array<{ candidate_key: string }>; + const currentKeys = new Set(currentRows.map((row) => row.candidate_key)); + const rows = this.db + .prepare( + ` + SELECT * FROM context_knowledge_candidates + WHERE connection_id = ? + AND source_key = ? + AND run_id <> ? + AND status = 'rejected' + AND rejection_reason = 'exceeded_run_budget' + ORDER BY candidate_key ASC, updated_at DESC + `, + ) + .all(params.connectionId, params.sourceKey, params.currentRunId) as CandidateRow[]; + const seen = new Set(); + return rows.flatMap((row) => { + if (currentKeys.has(row.candidate_key) || seen.has(row.candidate_key)) { + return []; + } + seen.add(row.candidate_key); + return [ + { + sourceRunId: row.run_id, + candidateKey: row.candidate_key, + topic: row.topic, + assertion: row.assertion, + rationale: row.rationale, + evidenceChunkIds: parseJson(row.evidence_chunk_ids_json, []), + evidenceRefs: parseJson(row.evidence_refs_json, []), + suggestedPageKey: row.suggested_page_key, + actionHint: row.action_hint as BudgetExhaustedCandidateForCarryForward['actionHint'], + durabilityScore: row.durability_score, + authorityScore: row.authority_score, + reuseScore: row.reuse_score, + noveltyScore: row.novelty_score, + riskScore: row.risk_score, + promotionScore: row.promotion_score, + lane: row.lane, + }, + ]; + }); + } + + async listCurrentRunEvidenceChunksForCarryForward(runId: string): Promise { + const rows = this.db + .prepare( + ` + SELECT c.*, d.external_id, d.raw_path, d.title, d.path, d.url, d.last_edited_at + FROM context_evidence_chunks c + JOIN context_evidence_documents d ON d.id = c.document_id + WHERE d.run_id = ? AND d.deleted_at IS NULL + ORDER BY d.raw_path ASC, c.ordinal ASC + `, + ) + .all(runId) as VisibleChunkRow[]; + return rows.map((row) => ({ + chunkId: row.id, + stableCitationKey: row.stable_citation_key, + syncId: row.sync_id, + rawPath: row.raw_path, + title: row.title, + path: row.path, + url: row.url, + lastEditedAt: parseDate(row.last_edited_at), + citation: parseJson(row.citation_json, null), + content: row.content, + })); + } + + async updateCandidateStatus(args: { + runId: string; + candidateKey: string; + status: 'pending' | 'promoted' | 'merged' | 'rejected' | 'conflict'; + rejectionReason: string | null; + }): Promise { + this.db + .prepare( + ` + UPDATE context_knowledge_candidates + SET status = ?, rejection_reason = ?, updated_at = ? + WHERE run_id = ? AND candidate_key = ? + `, + ) + .run(args.status, args.rejectionReason, new Date().toISOString(), args.runId, args.candidateKey); + const row = this.db + .prepare('SELECT candidate_key, status FROM context_knowledge_candidates WHERE run_id = ? AND candidate_key = ?') + .get(args.runId, args.candidateKey) as Pick | undefined; + return row ? { candidate_key: row.candidate_key, status: row.status } : null; + } + + async getCandidateSummary(runId: string): Promise { + const rows = this.db + .prepare( + ` + SELECT status, COUNT(*) AS count + FROM context_knowledge_candidates + WHERE run_id = ? + GROUP BY status + `, + ) + .all(runId) as Array<{ status: CandidateRow['status']; count: number }>; + const summary: ContextCandidateSummary = { + total: 0, + pending: 0, + promoted: 0, + merged: 0, + rejected: 0, + conflict: 0, + }; + for (const row of rows) { + summary.total += row.count; + summary[row.status] = row.count; + } + return summary; + } + + private searchLexicalContextEvidenceCandidates( + visibleRowsById: Map, + query: string, + limit: number, + ): ContextEvidenceLaneCandidate[] { + const rows = this.db + .prepare( + ` + SELECT chunk_id, bm25(context_evidence_chunks_fts) AS score + FROM context_evidence_chunks_fts + WHERE context_evidence_chunks_fts MATCH ? + ORDER BY score ASC, chunk_id ASC + `, + ) + .all(query) as Array<{ chunk_id: string; score: number }>; + + return rows + .filter((row) => visibleRowsById.has(row.chunk_id)) + .slice(0, Math.max(1, limit)) + .map((row, index) => ({ + id: row.chunk_id, + chunkId: row.chunk_id, + rank: index + 1, + rawScore: Number(row.score), + })); + } + + private searchSemanticContextEvidenceCandidates( + rows: VisibleChunkRow[], + queryEmbedding: number[], + limit: number, + ): ContextEvidenceLaneCandidate[] { + return rows + .flatMap((row) => { + const vector = parseJson(row.embedding_json, null); + if (!vector) { + return []; + } + return [ + { + id: row.id, + chunkId: row.id, + rank: 0, + rawScore: cosine(queryEmbedding, vector), + }, + ]; + }) + .filter((candidate) => candidate.rawScore > 0) + .sort((left, right) => right.rawScore - left.rawScore || left.chunkId.localeCompare(right.chunkId)) + .slice(0, Math.max(1, limit)) + .map((candidate, index) => ({ ...candidate, rank: index + 1 })); + } + + private searchTokenContextEvidenceCandidates( + rows: VisibleChunkRow[], + terms: string[], + rawQueryText: string, + limit: number, + ): ContextEvidenceLaneCandidate[] { + const rawNeedle = rawQueryText.trim().toLowerCase(); + if (terms.length === 0 && rawNeedle.length === 0) { + return []; + } + + return rows + .map((row) => { + const haystack = row.search_text.toLowerCase(); + const rawScore = + terms.length > 0 + ? terms.filter((term) => haystack.includes(term)).length / terms.length + : haystack.includes(rawNeedle) + ? 1 + : 0; + return { + id: row.id, + chunkId: row.id, + rank: 0, + rawScore, + }; + }) + .filter((candidate) => candidate.rawScore > 0) + .sort((left, right) => right.rawScore - left.rawScore || left.chunkId.localeCompare(right.chunkId)) + .slice(0, Math.max(1, limit)) + .map((candidate, index) => ({ ...candidate, rank: index + 1 })); + } + + private contextEvidenceSearchResult( + row: VisibleChunkRow, + score: number, + matchReasons: ContextEvidenceSearchMatchReason[], + lanes: SearchLaneBreakdown[], + ): ContextEvidenceSearchResult { + return { + chunkId: row.id, + documentId: row.document_id, + externalId: row.external_id, + title: row.title, + path: row.path, + url: row.url, + snippet: row.content.slice(0, 500), + score, + citation: parseJson(row.citation_json, null), + stableCitationKey: row.stable_citation_key, + syncId: row.sync_id, + lastEditedAt: parseDate(row.last_edited_at), + matchReasons, + lanes, + }; + } + + private visibleChunks( + connectionId: string, + sourceKey: string | null, + currentRunId: string | null, + includeDeleted: boolean, + ): VisibleChunkRow[] { + return this.db + .prepare( + ` + SELECT + c.*, + d.external_id, + d.title, + d.path, + d.url, + d.raw_path, + d.last_edited_at + FROM context_evidence_chunks c + JOIN context_evidence_documents d ON d.id = c.document_id + WHERE d.connection_id = @connectionId + AND (@sourceKey IS NULL OR d.source_key = @sourceKey) + AND (@includeDeleted = 1 OR d.deleted_at IS NULL) + AND ( + d.publish_state = 'published' + OR (@currentRunId IS NOT NULL AND d.run_id = @currentRunId AND d.publish_state = 'pending') + ) + ORDER BY d.created_at ASC, c.ordinal ASC + `, + ) + .all({ + connectionId, + sourceKey, + currentRunId, + includeDeleted: includeDeleted ? 1 : 0, + }) as VisibleChunkRow[]; + } + + private visibleDocument( + connectionId: string, + sourceKey: string, + documentId: string, + currentRunId: string | null, + ): DocumentRow | null { + return ( + (this.db + .prepare( + ` + SELECT * FROM context_evidence_documents + WHERE id = ? + AND connection_id = ? + AND source_key = ? + AND deleted_at IS NULL + AND ( + publish_state = 'published' + OR (? IS NOT NULL AND run_id = ? AND publish_state = 'pending') + ) + `, + ) + .get(documentId, connectionId, sourceKey, currentRunId, currentRunId) as DocumentRow | undefined) ?? null + ); + } + + private readDocument(params: { + documentId?: string; + externalId?: string; + connectionId: string; + sourceKey: string; + currentRunId: string | null; + }): ContextEvidenceReadResult | null { + const document = params.documentId + ? this.visibleDocument(params.connectionId, params.sourceKey, params.documentId, params.currentRunId) + : ((this.db + .prepare( + ` + SELECT * FROM context_evidence_documents + WHERE connection_id = ? + AND source_key = ? + AND external_id = ? + AND deleted_at IS NULL + AND ( + publish_state = 'published' + OR (? IS NOT NULL AND run_id = ? AND publish_state = 'pending') + ) + ORDER BY CASE WHEN run_id = ? THEN 0 ELSE 1 END, updated_at DESC + `, + ) + .get( + params.connectionId, + params.sourceKey, + params.externalId, + params.currentRunId, + params.currentRunId, + params.currentRunId, + ) as DocumentRow | undefined) ?? null); + if (!document) { + return null; + } + const chunks = this.db + .prepare('SELECT * FROM context_evidence_chunks WHERE document_id = ? ORDER BY ordinal ASC') + .all(document.id) as ChunkRow[]; + return { + document: this.documentForRead(document), + chunks: chunks.map((chunk) => this.chunkForRead(chunk)), + }; + } + + private documentForRead( + row: Pick, + ): ContextEvidenceReadResult['document'] { + return { + id: row.id, + title: row.title, + path: row.path, + external_id: row.external_id, + url: row.url, + }; + } + + private chunkForRead( + row: Pick, + ): ContextEvidenceReadResult['chunks'][number] { + return { + id: row.id, + content: row.content, + citation: parseJson(row.citation_json, null), + }; + } + + private candidateForDedup(row: CandidateRow): ContextCandidateForDedup { + return { + id: row.id, + candidateKey: row.candidate_key, + topic: row.topic, + assertion: row.assertion, + promotionScore: row.promotion_score, + createdAt: new Date(row.created_at), + evidenceChunkIds: parseJson(row.evidence_chunk_ids_json, []), + evidenceRefs: parseJson(row.evidence_refs_json, []), + embedding: row.embedding_json, + lane: row.lane === 'light' || row.lane === 'full' ? row.lane : null, + }; + } + + private neighborRowsByExternalIds( + args: { + connectionId: string; + sourceKey: string; + relation: ContextEvidenceNeighborResult['relation']; + limit: number; + currentRunId?: string; + }, + externalIds: string[], + ): ContextEvidenceNeighborResult[] { + if (externalIds.length === 0) { + return []; + } + const rows = this.db + .prepare( + ` + SELECT * FROM context_evidence_documents + WHERE connection_id = ? + AND source_key = ? + AND external_id IN (${placeholders(externalIds)}) + AND deleted_at IS NULL + AND ( + publish_state = 'published' + OR (? IS NOT NULL AND run_id = ? AND publish_state = 'pending') + ) + ORDER BY path ASC + LIMIT ? + `, + ) + .all( + args.connectionId, + args.sourceKey, + ...externalIds, + args.currentRunId ?? null, + args.currentRunId ?? null, + args.limit, + ) as DocumentRow[]; + return rows.map((row) => this.neighborResult(row, args.relation)); + } + + private neighborRowsByParent( + args: { + connectionId: string; + sourceKey: string; + relation: ContextEvidenceNeighborResult['relation']; + limit: number; + currentRunId?: string; + }, + parentExternalId: string, + ): ContextEvidenceNeighborResult[] { + const rows = this.db + .prepare( + ` + SELECT * FROM context_evidence_documents + WHERE connection_id = ? + AND source_key = ? + AND external_parent_id = ? + AND deleted_at IS NULL + AND ( + publish_state = 'published' + OR (? IS NOT NULL AND run_id = ? AND publish_state = 'pending') + ) + ORDER BY path ASC + LIMIT ? + `, + ) + .all( + args.connectionId, + args.sourceKey, + parentExternalId, + args.currentRunId ?? null, + args.currentRunId ?? null, + args.limit, + ) as DocumentRow[]; + return rows.map((row) => this.neighborResult(row, args.relation)); + } + + private neighborResult( + row: DocumentRow, + relation: ContextEvidenceNeighborResult['relation'], + ): ContextEvidenceNeighborResult { + return { + documentId: row.id, + externalId: row.external_id, + title: row.title, + path: row.path, + relation, + url: row.url, + lastEditedAt: parseDate(row.last_edited_at), + }; + } +} diff --git a/packages/context/src/ingest/context-evidence/store.test.ts b/packages/context/src/ingest/context-evidence/store.test.ts new file mode 100644 index 00000000..9c2d281a --- /dev/null +++ b/packages/context/src/ingest/context-evidence/store.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ContextEvidenceIndexStorePort } from './store.js'; +import type { ReplaceContextEvidenceChunk, UpsertContextEvidenceDocument } from './types.js'; + +const documentInput: UpsertContextEvidenceDocument = { + runId: 'run-1', + connectionId: 'connection-1', + sourceKey: 'notion', + externalId: 'page-1', + externalParentId: null, + databaseId: null, + dataSourceId: null, + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + url: 'https://notion.example/page-1', + objectType: 'page', + lastEditedAt: new Date('2026-04-12T10:15:00.000Z'), + lastEditedBy: 'Jane Doe', + rawPath: 'pages/page-1/page.md', + syncId: 'sync-1', + contentHash: 'page-hash', + publishState: 'pending', + metadata: { properties: { Status: 'Approved' } }, +}; + +const chunkInput: ReplaceContextEvidenceChunk = { + chunkKey: 'h2:policy:0000', + headingPath: ['Revenue Recognition', 'Policy'], + ordinal: 0, + content: 'Booked revenue excludes refunds and test accounts.', + searchText: 'Revenue Recognition\nPolicy\nBooked revenue excludes refunds and test accounts.', + embedding: [0.1, 0.2, 0.3], + tokenCount: 8, + citation: { + source: 'notion', + pageId: 'page-1', + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + rawPath: 'pages/page-1/page.md', + }, + stableCitationKey: 'notion:page-1:policy:abc123', + syncId: 'sync-1', + contentHash: 'chunk-hash', +}; + +describe('ContextEvidenceIndexStorePort', () => { + it('describes the persistence operations required by the package indexer', async () => { + const store: ContextEvidenceIndexStorePort = { + upsertDocument: vi.fn().mockResolvedValue({ id: 'doc-1' }), + replaceChunks: vi.fn().mockResolvedValue(undefined), + countPublishedDocumentsByRawPaths: vi.fn().mockResolvedValue(1), + publishSync: vi.fn().mockResolvedValue({ documentsPublished: 1, documentsDeleted: 0 }), + }; + + await expect(store.upsertDocument(documentInput)).resolves.toEqual({ id: 'doc-1' }); + await store.replaceChunks('doc-1', [chunkInput]); + await expect( + store.countPublishedDocumentsByRawPaths('connection-1', 'notion', ['pages/page-1/page.md']), + ).resolves.toBe(1); + await expect( + store.publishSync('connection-1', 'notion', 'sync-1', ['pages/page-1/page.md']), + ).resolves.toEqual({ documentsPublished: 1, documentsDeleted: 0 }); + + expect(store.replaceChunks).toHaveBeenCalledWith('doc-1', [chunkInput]); + }); +}); diff --git a/packages/context/src/ingest/context-evidence/store.ts b/packages/context/src/ingest/context-evidence/store.ts new file mode 100644 index 00000000..d50ebc79 --- /dev/null +++ b/packages/context/src/ingest/context-evidence/store.ts @@ -0,0 +1,17 @@ +import type { + ContextEvidenceDocumentRef, + ReplaceContextEvidenceChunk, + UpsertContextEvidenceDocument, +} from './types.js'; + +export interface ContextEvidenceIndexStorePort { + upsertDocument(params: UpsertContextEvidenceDocument): Promise; + replaceChunks(documentId: string, chunks: ReplaceContextEvidenceChunk[]): Promise; + countPublishedDocumentsByRawPaths(connectionId: string, sourceKey: string, rawPaths: string[]): Promise; + publishSync( + connectionId: string, + sourceKey: string, + syncId: string, + deletedMarkdownRawPaths: string[], + ): Promise<{ documentsPublished: number; documentsDeleted: number }>; +} diff --git a/packages/context/src/ingest/context-evidence/types.ts b/packages/context/src/ingest/context-evidence/types.ts new file mode 100644 index 00000000..7f6c1419 --- /dev/null +++ b/packages/context/src/ingest/context-evidence/types.ts @@ -0,0 +1,55 @@ +import type { JsonValue } from '../ports.js'; + +export type EvidencePublishState = 'pending' | 'published' | 'superseded'; + +export interface ContextEvidenceDocumentRef { + id: string; +} + +export interface UpsertContextEvidenceDocument { + runId: string; + connectionId: string; + sourceKey: string; + externalId: string; + externalParentId: string | null; + databaseId: string | null; + dataSourceId: string | null; + title: string; + path: string; + url: string | null; + objectType: string; + lastEditedAt: Date | null; + lastEditedBy: string | null; + rawPath: string; + syncId: string; + contentHash: string; + publishState?: EvidencePublishState; + metadata: JsonValue; +} + +export interface ReplaceContextEvidenceChunk { + chunkKey: string; + headingPath: string[]; + ordinal: number; + content: string; + searchText: string; + embedding: number[] | null; + tokenCount: number; + citation: JsonValue; + stableCitationKey: string; + syncId: string; + contentHash: string; +} + +export interface ContextEvidenceEmbeddingPort { + maxBatchSize?: number; + computeEmbeddingsBulk(texts: string[]): Promise; +} + +export interface ContextEvidenceIndexSummary { + documentsIndexed: number; + chunksIndexed: number; + documentsDeleted: number; + embeddingFailures: number; + warnings: string[]; +} diff --git a/packages/context/src/ingest/dbt-shared/project-vars.test.ts b/packages/context/src/ingest/dbt-shared/project-vars.test.ts new file mode 100644 index 00000000..3b9ed6b3 --- /dev/null +++ b/packages/context/src/ingest/dbt-shared/project-vars.test.ts @@ -0,0 +1,118 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + loadProjectInfo, + parseProjectName, + parseProjectVars, + resolveJinjaVariables, +} from './project-vars.js'; + +function entries(map: Map): Record { + return Object.fromEntries([...map.entries()].sort(([a], [b]) => a.localeCompare(b))); +} + +describe('dbt-shared project vars', () => { + let tmpRoot: string; + + beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'dbt-project-vars-')); + }); + + afterEach(async () => { + await rm(tmpRoot, { recursive: true, force: true }); + }); + + it('extracts top-level vars, nested dotted vars, and scalar values only', () => { + const vars = parseProjectVars(` +name: revenue_project +vars: + database: analytics + enabled: true + threads: 4 + ignored_list: + - a + ignored_null: + pkg: + region: us + fiscal_year: 2026 +`); + + expect(entries(vars)).toEqual({ + database: 'analytics', + enabled: 'true', + 'pkg.fiscal_year': '2026', + 'pkg.region': 'us', + threads: '4', + }); + }); + + it('returns an empty variable map for missing vars, malformed YAML, arrays, and scalar documents', () => { + expect(entries(parseProjectVars('name: no_vars\n'))).toEqual({}); + expect(entries(parseProjectVars('{{{{ invalid yaml'))).toEqual({}); + expect(entries(parseProjectVars('- just\n- a\n- list\n'))).toEqual({}); + }); + + it('extracts a string project name and returns null for invalid or missing names', () => { + expect(parseProjectName('name: revenue_project\n')).toBe('revenue_project'); + expect(parseProjectName('version: 1\n')).toBeNull(); + expect(parseProjectName('{{{{ invalid yaml')).toBeNull(); + expect(parseProjectName('name: 42\n')).toBeNull(); + }); + + it('resolves exact var names, honors defaults, and reports unresolved names without throwing', () => { + const variables = new Map([ + ['database', 'analytics'], + ['pkg.region', 'us'], + ]); + + const result = resolveJinjaVariables( + [ + 'database: "{{ var(\'database\') }}"', + 'region: "{{ var("pkg.region") }}"', + 'schema: "{{ var(\'schema\', \'public\') }}"', + 'missing: "{{ var(\'missing\') }}"', + ].join('\n'), + variables, + ); + + expect(result.content).toContain('database: "analytics"'); + expect(result.content).toContain('region: "us"'); + expect(result.content).toContain('schema: "public"'); + expect(result.content).toContain('missing: "{{ var(\'missing\') }}"'); + expect(result.unresolvedVars).toEqual(['missing']); + }); + + it('keeps package-scoped variables exact and does not resolve by suffix', () => { + const variables = parseProjectVars(` +vars: + pkg: + database: package_db +`); + + const result = resolveJinjaVariables( + 'database: "{{ var(\'database\', \'fallback_db\') }}"\npackage_database: "{{ var(\'pkg.database\') }}"\n', + variables, + ); + + expect(result.content).toContain('database: "fallback_db"'); + expect(result.content).toContain('package_database: "package_db"'); + expect(result.unresolvedVars).toEqual([]); + }); + + it('loads dbt_project.yml before dbt_project.yaml and falls back to an empty project info object', async () => { + const projectDir = join(tmpRoot, 'project'); + await mkdir(projectDir, { recursive: true }); + await writeFile(join(projectDir, 'dbt_project.yaml'), 'name: yaml_project\nvars:\n database: yaml_db\n'); + await writeFile(join(projectDir, 'dbt_project.yml'), 'name: yml_project\nvars:\n database: yml_db\n'); + + const loaded = await loadProjectInfo(projectDir); + expect(loaded.projectName).toBe('yml_project'); + expect(entries(loaded.variables)).toEqual({ database: 'yml_db' }); + + const missing = await loadProjectInfo(join(tmpRoot, 'missing')); + expect(missing.projectName).toBeNull(); + expect(entries(missing.variables)).toEqual({}); + }); +}); diff --git a/packages/context/src/ingest/dbt-shared/project-vars.ts b/packages/context/src/ingest/dbt-shared/project-vars.ts new file mode 100644 index 00000000..2900e08f --- /dev/null +++ b/packages/context/src/ingest/dbt-shared/project-vars.ts @@ -0,0 +1,121 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { parse as parseYaml } from 'yaml'; + +interface DbtProjectYaml { + name?: unknown; + vars?: unknown; + [key: string]: unknown; +} + +export interface DbtProjectInfo { + variables: Map; + projectName: string | null; +} + +export interface ResolveJinjaVariablesResult { + content: string; + unresolvedVars: string[]; +} + +export function parseProjectVars(yamlContent: string): Map { + const variables = new Map(); + const project = parseProjectYaml(yamlContent); + + if (!isRecord(project) || !isRecord(project.vars)) { + return variables; + } + + extractVariables(project.vars, '', variables); + return variables; +} + +export function parseProjectName(yamlContent: string): string | null { + const project = parseProjectYaml(yamlContent); + + if (!isRecord(project) || typeof project.name !== 'string') { + return null; + } + + return project.name; +} + +export async function loadProjectInfo(projectDir: string): Promise { + for (const fileName of ['dbt_project.yml', 'dbt_project.yaml']) { + const filePath = join(projectDir, fileName); + try { + const content = await readFile(filePath, 'utf-8'); + return { + variables: parseProjectVars(content), + projectName: parseProjectName(content), + }; + } catch { + // Try the next dbt project filename. + } + } + + return { variables: new Map(), projectName: null }; +} + +export function resolveJinjaVariables( + content: string, + variables: Map, +): ResolveJinjaVariablesResult { + const varPattern = /\{\{\s*var\s*\(\s*['"]([^'"]+)['"]\s*(?:,\s*['"]([^'"]*)['"]\s*)?\)\s*\}\}/g; + const unresolvedVars = new Set(); + + const resolvedContent = content.replace( + varPattern, + (fullMatch, varName: string, defaultValue: string | undefined) => { + const value = variables.get(varName); + if (value !== undefined) { + return value; + } + + if (defaultValue !== undefined) { + return defaultValue; + } + + unresolvedVars.add(varName); + return fullMatch; + }, + ); + + return { + content: resolvedContent, + unresolvedVars: [...unresolvedVars].sort(), + }; +} + +function parseProjectYaml(yamlContent: string): DbtProjectYaml | null { + try { + const parsed = parseYaml(yamlContent) as unknown; + return isRecord(parsed) ? parsed : null; + } catch { + return null; + } +} + +function extractVariables(obj: Record, prefix: string, variables: Map): void { + for (const [key, value] of Object.entries(obj)) { + const fullKey = prefix ? `${prefix}.${key}` : key; + + if (value === null || value === undefined) { + continue; + } + + if (typeof value === 'string') { + variables.set(fullKey, value); + } else if (typeof value === 'number' || typeof value === 'boolean') { + variables.set(fullKey, String(value)); + } else if (Array.isArray(value)) { + continue; + } else if (isRecord(value)) { + extractVariables(value, fullKey, variables); + } + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} diff --git a/packages/context/src/ingest/dbt-shared/schema-files.test.ts b/packages/context/src/ingest/dbt-shared/schema-files.test.ts new file mode 100644 index 00000000..f55851f6 --- /dev/null +++ b/packages/context/src/ingest/dbt-shared/schema-files.test.ts @@ -0,0 +1,41 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { findDbtSchemaFiles, loadDbtSchemaFiles } from './schema-files.js'; + +describe('dbt shared schema files', () => { + let tmpRoot: string; + + beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'dbt-schema-files-')); + }); + + afterEach(async () => { + await rm(tmpRoot, { recursive: true, force: true }); + }); + + it('loads schema yaml files from dbt search directories and skips project config files', async () => { + await mkdir(join(tmpRoot, 'models', 'nested'), { recursive: true }); + await mkdir(join(tmpRoot, 'seeds'), { recursive: true }); + await writeFile(join(tmpRoot, 'dbt_project.yml'), 'name: ignored\n'); + await writeFile(join(tmpRoot, 'packages.yml'), 'packages: []\n'); + await writeFile(join(tmpRoot, 'models', 'schema.yml'), 'version: 2\nmodels: []\n'); + await writeFile(join(tmpRoot, 'models', 'nested', 'customers.yaml'), 'version: 2\nmodels: []\n'); + await writeFile(join(tmpRoot, 'seeds', 'seed.yml'), 'version: 2\nseeds: []\n'); + + const paths = await findDbtSchemaFiles(tmpRoot); + expect(paths.map((path) => path.replace(`${tmpRoot}/`, '')).sort()).toEqual([ + 'models/nested/customers.yaml', + 'models/schema.yml', + 'seeds/seed.yml', + ]); + + const files = await loadDbtSchemaFiles(tmpRoot); + expect(files.map((file) => file.path).sort()).toEqual([ + 'models/nested/customers.yaml', + 'models/schema.yml', + 'seeds/seed.yml', + ]); + }); +}); diff --git a/packages/context/src/ingest/dbt-shared/schema-files.ts b/packages/context/src/ingest/dbt-shared/schema-files.ts new file mode 100644 index 00000000..6c223b83 --- /dev/null +++ b/packages/context/src/ingest/dbt-shared/schema-files.ts @@ -0,0 +1,76 @@ +import { promises as fs } from 'node:fs'; +import { join, relative } from 'node:path'; +import type { DbtSchemaFile } from '../adapters/dbt-descriptions/parse-schema.js'; + +const DBT_SCHEMA_SEARCH_DIRS = ['models', 'seeds', 'snapshots', 'analyses', '.'] as const; +const DBT_CONFIG_YAML_FILES = new Set([ + 'dbt_project.yml', + 'dbt_project.yaml', + 'packages.yml', + 'packages.yaml', + 'selectors.yml', + 'selectors.yaml', +]); + +export async function loadDbtSchemaFiles(projectDir: string): Promise { + const schemaFiles = await findDbtSchemaFiles(projectDir); + return Promise.all( + schemaFiles.map(async (filePath) => ({ + content: await fs.readFile(filePath, 'utf-8'), + path: relative(projectDir, filePath), + })), + ); +} + +export async function findDbtSchemaFiles(projectDir: string): Promise { + const schemaFiles: string[] = []; + + for (const dir of DBT_SCHEMA_SEARCH_DIRS) { + const searchPath = join(projectDir, dir); + try { + await fs.access(searchPath); + schemaFiles.push(...(await findYamlFilesRecursive(searchPath))); + } catch { + // Missing dbt search directories are normal. + } + } + + return [...new Set(schemaFiles)].sort(); +} + +async function findYamlFilesRecursive(dir: string): Promise { + const files: string[] = []; + + let entries; + try { + entries = await fs.readdir(dir, { withFileTypes: true }); + } catch { + return files; + } + + for (const entry of entries) { + const fullPath = join(dir, entry.name); + + if (entry.isDirectory()) { + if (!entry.name.startsWith('.') && entry.name !== 'node_modules') { + files.push(...(await findYamlFilesRecursive(fullPath))); + } + continue; + } + + if (!entry.isFile()) { + continue; + } + + const name = entry.name.toLowerCase(); + if (DBT_CONFIG_YAML_FILES.has(name)) { + continue; + } + + if (name.endsWith('.yml') || name.endsWith('.yaml')) { + files.push(fullPath); + } + } + + return files; +} diff --git a/packages/context/src/ingest/diff-set.service.test.ts b/packages/context/src/ingest/diff-set.service.test.ts new file mode 100644 index 00000000..4eb3ceaa --- /dev/null +++ b/packages/context/src/ingest/diff-set.service.test.ts @@ -0,0 +1,163 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { computeDiffSetFromHashes, DiffSetService } from './diff-set.service.js'; + +function makeRepo(latest: Map) { + return { + findLatestHashesForCompletedSyncs: () => Promise.resolve(latest), + }; +} + +describe('DiffSetService', () => { + let service: DiffSetService; + const provenanceRepo = { findLatestHashesForCompletedSyncs: vi.fn() }; + + beforeEach(() => { + vi.clearAllMocks(); + service = new DiffSetService(provenanceRepo as any); + }); + + it('first run — no prior completed run — everything is added', async () => { + provenanceRepo.findLatestHashesForCompletedSyncs.mockResolvedValue(new Map()); + const diff = await service.compute( + 'c1', + 'fake', + new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h2'], + ]), + ); + expect(diff.added.sort()).toEqual(['a.yml', 'b.yml']); + expect(diff.modified).toEqual([]); + expect(diff.deleted).toEqual([]); + expect(diff.unchanged).toEqual([]); + }); + + it('classifies added / modified / deleted / unchanged against the latest-hash baseline', async () => { + provenanceRepo.findLatestHashesForCompletedSyncs.mockResolvedValue( + new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h_old'], + ['c.yml', 'hc'], + ]), + ); + const now = new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h_new'], + ['d.yml', 'hd'], + ]); + const diff = await service.compute('c1', 'fake', now); + expect(diff.unchanged).toEqual(['a.yml']); + expect(diff.modified).toEqual(['b.yml']); + expect(diff.deleted).toEqual(['c.yml']); + expect(diff.added).toEqual(['d.yml']); + }); + + it('computes a pure diff from current and prior hash maps', () => { + const diff = computeDiffSetFromHashes( + new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h2-new'], + ['d.yml', 'h4'], + ]), + new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h2-old'], + ['c.yml', 'h3'], + ]), + ); + + expect(diff).toEqual({ + added: ['d.yml'], + modified: ['b.yml'], + deleted: ['c.yml'], + unchanged: ['a.yml'], + }); + }); + + it('returns sorted arrays for deterministic hashing', async () => { + provenanceRepo.findLatestHashesForCompletedSyncs.mockResolvedValue(new Map()); + const diff = await service.compute( + 'c1', + 'fake', + new Map([ + ['z.yml', 'hz'], + ['a.yml', 'ha'], + ]), + ); + expect(diff.added).toEqual(['a.yml', 'z.yml']); + }); + + it('with isPathInScope predicate, out-of-scope prior entries are not reported as deleted', async () => { + const prior = new Map([ + ['cards/1.json', 'hashA'], + ['cards/2.json', 'hashB'], + ['cards/3.json', 'hashC'], + ]); + const current = new Map([ + ['cards/1.json', 'hashA'], + ['cards/2.json', 'hashB'], + ]); + const inScope = new Set(['cards/1.json', 'cards/2.json']); + const svc = new DiffSetService(makeRepo(prior) as any); + const diff = await svc.compute('conn', 'metabase', current, (p) => inScope.has(p)); + expect(diff.deleted).toEqual([]); + expect(diff.unchanged).toEqual(['cards/1.json', 'cards/2.json']); + }); + + it('with isPathInScope predicate, in-scope deletions are still reported', async () => { + const prior = new Map([ + ['cards/1.json', 'hashA'], + ['cards/2.json', 'hashB'], + ]); + const current = new Map([['cards/1.json', 'hashA']]); + const inScope = new Set(['cards/1.json', 'cards/2.json']); + const svc = new DiffSetService(makeRepo(prior) as any); + const diff = await svc.compute('conn', 'metabase', current, (p) => inScope.has(p)); + expect(diff.deleted).toEqual(['cards/2.json']); + }); + + it('holds unchanged baseline across multiple incremental re-syncs (regression for skipped-row sync_id drift)', async () => { + // After sync 1 wrote (a.yml, h1, sync=S1, skipped) and sync 2 computed a no-op, + // sync 3 must still see a.yml as unchanged — the baseline comes from S1, not from + // the most recent sync_id alone. + provenanceRepo.findLatestHashesForCompletedSyncs.mockResolvedValue( + new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h2'], + ]), + ); + const diff = await service.compute( + 'c1', + 'fake', + new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h2'], + ]), + ); + expect(diff.added).toEqual([]); + expect(diff.unchanged).toEqual(['a.yml', 'b.yml']); + }); +}); + +describe('DiffSetService — scope-narrowing scenario', () => { + it('scope from [1,2,3] → [1,2] leaves no spurious deletions', async () => { + const prior = new Map([ + ['cards/1.json', 'hashA'], + ['cards/2.json', 'hashB'], + ['cards/3.json', 'hashC'], + ['sync-config.json', 'hashCfg'], + ]); + const current = new Map([ + ['cards/1.json', 'hashA'], + ['cards/2.json', 'hashB'], + ['sync-config.json', 'hashCfg2'], + ]); + const inScope = new Set(['cards/1.json', 'cards/2.json', 'sync-config.json']); + const svc = new DiffSetService(makeRepo(prior) as any); + const diff = await svc.compute('conn', 'metabase', current, (p) => inScope.has(p)); + expect(diff.deleted).toEqual([]); + expect(diff.modified).toEqual(['sync-config.json']); + expect(diff.unchanged).toEqual(['cards/1.json', 'cards/2.json']); + expect(diff.added).toEqual([]); + }); +}); diff --git a/packages/context/src/ingest/diff-set.service.ts b/packages/context/src/ingest/diff-set.service.ts new file mode 100644 index 00000000..2942f15d --- /dev/null +++ b/packages/context/src/ingest/diff-set.service.ts @@ -0,0 +1,54 @@ +import type { IngestProvenancePort } from './ports.js'; +import type { DiffSet } from './types.js'; + +export function computeDiffSetFromHashes( + currentHashes: Map, + priorHashesRaw: Map, + isPathInScope?: (rawPath: string) => boolean, +): DiffSet { + const priorHashes = isPathInScope + ? new Map([...priorHashesRaw].filter(([path]) => isPathInScope(path))) + : priorHashesRaw; + + const added: string[] = []; + const modified: string[] = []; + const unchanged: string[] = []; + const deleted: string[] = []; + + for (const [path, hash] of currentHashes) { + const prior = priorHashes.get(path); + if (prior === undefined) { + added.push(path); + } else if (prior === hash) { + unchanged.push(path); + } else { + modified.push(path); + } + } + + for (const path of priorHashes.keys()) { + if (!currentHashes.has(path)) { + deleted.push(path); + } + } + + added.sort(); + modified.sort(); + unchanged.sort(); + deleted.sort(); + return { added, modified, unchanged, deleted }; +} + +export class DiffSetService { + constructor(private readonly provenance: IngestProvenancePort) {} + + async compute( + connectionId: string, + sourceKey: string, + currentHashes: Map, + isPathInScope?: (rawPath: string) => boolean, + ): Promise { + const priorHashes = await this.provenance.findLatestHashesForCompletedSyncs(connectionId, sourceKey); + return computeDiffSetFromHashes(currentHashes, priorHashes, isPathInScope); + } +} diff --git a/packages/context/src/ingest/git-env.ts b/packages/context/src/ingest/git-env.ts new file mode 100644 index 00000000..12ada23a --- /dev/null +++ b/packages/context/src/ingest/git-env.ts @@ -0,0 +1,20 @@ +import { type SimpleGit, simpleGit } from 'simple-git'; + +const PRE_COMMIT_GIT_ENV = [ + 'GIT_ALTERNATE_OBJECT_DIRECTORIES', + 'GIT_CONFIG_COUNT', + 'GIT_CONFIG_PARAMETERS', + 'GIT_DIR', + 'GIT_EXEC_PATH', + 'GIT_INDEX_FILE', + 'GIT_PREFIX', + 'GIT_WORK_TREE', +] as const; + +export function createSimpleGit(baseDir?: string): SimpleGit { + const env = { ...process.env }; + for (const key of PRE_COMMIT_GIT_ENV) { + delete env[key]; + } + return simpleGit(baseDir).env(env); +} diff --git a/packages/context/src/ingest/index.ts b/packages/context/src/ingest/index.ts new file mode 100644 index 00000000..a5a74cee --- /dev/null +++ b/packages/context/src/ingest/index.ts @@ -0,0 +1,640 @@ +export { DbtSourceAdapter } from './adapters/dbt/dbt.adapter.js'; +export { FakeSourceAdapter } from './adapters/fake/fake.adapter.js'; +export type { + DaemonLiveDatabaseIntrospectionOptions, + KloDaemonDatabaseHttpJsonRunner, + KloDaemonDatabaseIntrospectionCommand, + KloDaemonDatabaseJsonRunner, +} from './adapters/live-database/daemon-introspection.js'; +export { createDaemonLiveDatabaseIntrospection } from './adapters/live-database/daemon-introspection.js'; +export type { + LiveDatabaseExtractedColumn, + LiveDatabaseExtractedForeignKey, + LiveDatabaseExtractedSchema, + LiveDatabaseExtractedTable, +} from './adapters/live-database/extracted-schema.js'; +export { + buildLiveDatabaseTableNaturalKey, + kloSchemaSnapshotToExtractedSchema, +} from './adapters/live-database/extracted-schema.js'; +export { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js'; +export type { + BuildLiveDatabaseManifestShardsInput, + BuildLiveDatabaseManifestShardsResult, + LiveDatabaseManifestColumn, + LiveDatabaseManifestExistingDescriptions, + LiveDatabaseManifestJoinData, + LiveDatabaseManifestJoinEntry, + LiveDatabaseManifestShard, + LiveDatabaseManifestTableData, + LiveDatabaseManifestTableEntry, +} from './adapters/live-database/manifest.js'; +export { buildLiveDatabaseManifestShards } from './adapters/live-database/manifest.js'; +export type { + LiveDatabaseStructuralChanges, + LiveDatabaseStructuralSyncOperations, + LiveDatabaseStructuralSyncPlan, + LiveDatabaseStructuralSyncStats, + LiveDatabaseSyncedColumn, + LiveDatabaseSyncedLink, + LiveDatabaseSyncedSchema, + LiveDatabaseSyncedTable, + PlanLiveDatabaseStructuralSyncInput, +} from './adapters/live-database/structural-sync.js'; +export { planLiveDatabaseStructuralSync } from './adapters/live-database/structural-sync.js'; +export type { + LiveDatabaseIntrospectionPort, + LiveDatabaseSourceAdapterDeps, +} from './adapters/live-database/types.js'; +export { getLookerTriageSignals, writeLookerEvidenceDocuments } from './adapters/looker/evidence-documents.js'; +export { LookerClient } from './adapters/looker/client.js'; +export type { + LookerClientDeps, + LookerClientLogger, + LookerConnectionParams, + LookerSdkPort, + LookerWarehouseConnectionInfo, + TestConnectionResult as LookerTestConnectionResult, +} from './adapters/looker/client.js'; +export type { + LookerClientFactory, + LookerEntityRef, + LookerRuntimeClient, +} from './adapters/looker/fetch.js'; +export { + DefaultLookerClientFactory, + DefaultLookerConnectionClientFactory, +} from './adapters/looker/factory.js'; +export { + createDaemonLookerTableIdentifierParser, + type DaemonLookerTableIdentifierParserOptions, + type KloDaemonTableIdentifierHttpJsonRunner, +} from './adapters/looker/daemon-table-identifier-parser.js'; +export type { + LookerConnectionClientFactory, + LookerCredentialResolver, +} from './adapters/looker/factory.js'; +export { + createLocalLookerCredentialResolver, + createLocalLookerSourceAdapter, + lookerCredentialsFromLocalConnection, +} from './adapters/looker/local-looker.adapter.js'; +export { + LocalLookerRuntimeStore, + type ClearLocalLookerMappingsInput, + type LocalLookerConnectionMappingListRow, + type LocalLookerMappingSource, + type LookerSourceStateReader, + type RefreshLocalLookerDiscoveredConnectionsInput, + type UpsertLocalLookerConnectionMappingInput, +} from './adapters/looker/local-runtime-store.js'; +export { + LOOKER_DIALECT_TO_CONNECTION_TYPE, + buildLookerPullConfigFromInputs, + collectExploreParseItems, + computeLookerMappingDrift, + discoverLookerConnections, + extractWarehouseDatabase, + extractWarehouseHost, + lookerDialectToConnectionType, + normalizeHost, + normalizeName, + projectParsedIdentifier, + refreshLookerMappingPlaceholders, + sqlglotDialectForConnectionType, + suggestKloConnectionForLookerConnection, + validateLookerMappings, + validateLookerWarehouseTarget, +} from './adapters/looker/mapping.js'; +export type { + LookerConnectionMapping as KloLookerConnectionMapping, + LookerMappingCandidateConnection, + LookerMappingClient, + LookerMappingDrift, + LookerMappingValidationResult, + LookerParsedIdentifier, + LookerTableIdentifierParseItem, + LookerTableIdentifierParser, + LookerTargetConnection, + LookerWarehouseTargetConnectionType, +} from './adapters/looker/mapping.js'; +export { + readLookerFetchReport, + writeLookerFetchReport, +} from './adapters/looker/fetch-report.js'; +export { LookerSourceAdapter, type LookerSourceAdapterDeps } from './adapters/looker/looker.adapter.js'; +export { + describeLookerScope, + hashLookerScope, + isPathInLookerScope, + readLookerScope, +} from './adapters/looker/scope.js'; +export type { + LookerQueryToSlInput, + LookerSlFieldProposal, + LookerSlMeasureProposal, + LookerSlProposal, + LookerSlSegmentProposal, +} from './adapters/looker/tools/looker-query-to-sl.tool.js'; +export { + buildLookerSlProposal, + createLookerQueryToSlTool, + formatLookerSlProposal, + lookerQueryToSlInputSchema, +} from './adapters/looker/tools/looker-query-to-sl.tool.js'; +export type { + LookerPullConfig, + LookerRuntimeCursors, + StagedDashboardFile, + StagedExploreFile, + StagedFoldersTreeFile, + StagedGroupFile, + StagedLookerFetchIssue, + StagedLookerFetchReport, + StagedLookerQuery, + StagedLookerScopeFile, + StagedLookerSignalsFile, + StagedLookFile, + StagedLookmlModelsFile, + StagedUserFile, +} from './adapters/looker/types.js'; +export { + lookerConnectionIdSchema, + lookerRuntimeCursorsSchema, + stagedLookerFetchIssueSchema, + stagedLookerFetchReportSchema, + stagedLookerScopeFileSchema, + stagedSyncConfigSchema, +} from './adapters/looker/types.js'; +export { LookmlSourceAdapter } from './adapters/lookml/lookml.adapter.js'; +export { parseLookmlStagedDir } from './adapters/lookml/parse.js'; +export type { ParsedLookmlProject } from './adapters/lookml/parse.js'; +export { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultMetabaseConnectionClientFactory, + MetabaseClient, + getDummyValueForWidgetType, + stripOptionalClauses, +} from './adapters/metabase/client.js'; +export { CardReferenceCycleError, expandCardReferences } from './adapters/metabase/card-references.js'; +export { IngestMetabaseClientFactory } from './adapters/metabase/client-port.js'; +export type { MetabaseClientLogger } from './adapters/metabase/client.js'; +export type { + MetabaseCard, + MetabaseCardSummary, + MetabaseClientConfig, + MetabaseClientFactory, + MetabaseClientRuntimeConfig, + MetabaseCollection, + MetabaseCollectionItem, + MetabaseConnectionClientFactory, + MetabaseDatabase, + MetabaseDatasetQuery, + MetabaseNativeQueryResult, + MetabaseParameter, + MetabaseResolvedTemplateTag, + MetabaseResultMetadataColumn, + MetabaseRuntimeClient, + MetabaseTemplateTag, + MetabaseUser, + ResolvedSqlResult, + TestConnectionResult, +} from './adapters/metabase/client-port.js'; +export type { + MetabaseSourceState, + MetabaseSourceStateMapping, + MetabaseSourceStateReader, + MetabaseSourceStateSelection, +} from './adapters/metabase/source-state-port.js'; +export { + METABASE_ENGINE_TO_CONNECTION_TYPE, + computeMetabaseMappingDrift, + computeMetabaseMappingPhysicalMismatches, + discoverMetabaseDatabases, + findBestMatch, + refreshMetabaseMapping, + validateMappingPhysicalMatch, + validateMetabaseMappings, +} from './adapters/metabase/mapping.js'; +export type { + AutoMatchCandidate, + AutoMatchResult as MetabaseAutoMatchResult, + DiscoveredMetabaseDatabase, + KloConnectionPhysicalInfo, + MappingPhysicalInfo, + MappingRefreshReport, + MetabaseMappedConnectionType, + MetabaseMappingDrift, + MetabaseMappingValidationResult, + PhysicalMismatch, + PhysicalMismatchInput, +} from './adapters/metabase/mapping.js'; +export { planMetabaseFanoutChildren } from './adapters/metabase/fanout-planner.js'; +export type { + MetabaseFanoutChildPlan, + MetabaseFanoutMappingInput, + PlanMetabaseFanoutChildrenInput, +} from './adapters/metabase/fanout-planner.js'; +export { MetabaseSourceAdapter } from './adapters/metabase/metabase.adapter.js'; +export { + createLocalMetabaseSourceAdapter, + metabaseRuntimeConfigFromLocalConnection, +} from './adapters/metabase/local-metabase.adapter.js'; +export { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; +export type { + ClearLocalMetabaseMappingsInput, + LocalMetabaseMappingListRow, + LocalMetabaseMappingSource, + LocalMetabaseSourceStateMappingInput, + ReplaceLocalMetabaseSourceStateInput, + RefreshLocalMetabaseDiscoveredDatabasesInput, + SetLocalMetabaseMappingSyncEnabledInput, + SetLocalMetabaseSyncStateInput, + UpsertLocalMetabaseDatabaseMappingInput, +} from './adapters/metabase/local-source-state-store.js'; +export { metabaseLocalConnectionIdSchema, metabasePullConfigSchema, parseMetabasePullConfig } from './adapters/metabase/types.js'; +export type { MetabasePullConfig, MetabaseSyncMode } from './adapters/metabase/types.js'; +export { + fetchMetricflowRepo, +} from './adapters/metricflow/fetch.js'; +export type { FetchMetricflowRepoParams, FetchMetricflowRepoResult } from './adapters/metricflow/fetch.js'; +export { + parseMetricflowFiles, + translateMetricflowJinjaFilter, +} from './adapters/metricflow/deep-parse.js'; +export type { + DimensionDefinition, + MeasureDefinition, + MetricFlowParseResult, + MetricflowParseOptions, + ParsedCrossModelMetric, + ParsedMetricflowRelationship, + ParsedSemanticModel, + SimpleMeasureDefinition, +} from './adapters/metricflow/deep-parse.js'; +export { + buildMetricflowColumns, + buildMetricflowJoinsForModel, + buildMetricflowMeasures, + buildMetricflowSemanticModelSource, + countImportableMetricflowRelationships, + filterValidMetricflowRelationships, + findMatchingMetricflowTable, + getMetricflowAvailableColumnNames, + mapCrossModelMetricToSource, + mapSemanticModelToSource, + normalizeMetricflowJoinOn, + parseMetricflowJoinReference, + resolveMetricflowSemanticModelSourceName, + rewriteMetricflowJoinOn, + rewriteMetricflowManifestJoins, + toKebabCaseMetricflowName, +} from './adapters/metricflow/semantic-models.js'; +export { importMetricflowSemanticModels } from './adapters/metricflow/import-semantic-models.js'; +export type { + ImportMetricflowSemanticModelsDeps, + ImportMetricflowSemanticModelsInput, + MetricFlowImportResult, + MetricflowSemanticLayerService, + MetricflowSemanticLayerWriter, +} from './adapters/metricflow/import-semantic-models.js'; +export type { + MetricflowHostTable, + MetricflowSemanticModelImportContext, + MetricflowSemanticModelJoin, + MetricflowWritableSemanticLayerSource, +} from './adapters/metricflow/semantic-models.js'; +export { MetricflowSourceAdapter, type MetricflowSourceAdapterDeps } from './adapters/metricflow/metricflow.adapter.js'; +export { + metricflowPullConfigSchema, + parseMetricflowPullConfig, + pullConfigFromMetricflowIntegration, +} from './adapters/metricflow/pull-config.js'; +export type { + MetricflowIntegrationLike, + MetricflowPullConfig, +} from './adapters/metricflow/pull-config.js'; +export { NOTION_ORG_KNOWLEDGE_WARNING } from './adapters/notion/chunk.js'; +export { NotionSourceAdapter, type NotionSourceAdapterDeps } from './adapters/notion/notion.adapter.js'; +export { NotionClient, type NotionApi, type NotionBotInfo } from './adapters/notion/notion-client.js'; +export { chunkHistoricSqlStagedDir, describeHistoricSqlScope } from './adapters/historic-sql/chunk.js'; +export { detectHistoricSqlStagedDir } from './adapters/historic-sql/detect.js'; +export { + HistoricSqlExtensionMissingError, + HistoricSqlGrantsMissingError, + HistoricSqlVersionUnsupportedError, +} from './adapters/historic-sql/errors.js'; +export { HistoricSqlSourceAdapter } from './adapters/historic-sql/historic-sql.adapter.js'; +export { BigQueryHistoricSqlQueryHistoryReader } from './adapters/historic-sql/bigquery-query-history-reader.js'; +export type { BigQueryHistoricSqlQueryHistoryReaderOptions } from './adapters/historic-sql/bigquery-query-history-reader.js'; +export { PostgresPgssQueryHistoryReader } from './adapters/historic-sql/postgres-pgss-query-history-reader.js'; +export { SnowflakeHistoricSqlQueryHistoryReader } from './adapters/historic-sql/snowflake-query-history-reader.js'; +export { stageHistoricSqlTemplates } from './adapters/historic-sql/stage.js'; +export { + pgssBaselinePath, + readPgssBaseline, + stagePgStatStatementsTemplates, + writePgssBaselineAtomic, +} from './adapters/historic-sql/stage-pgss.js'; +export type { PgssBaseline, StagePgStatStatementsTemplatesResult } from './adapters/historic-sql/stage-pgss.js'; +export type { + HistoricSqlDialect, + HistoricSqlManifest, + HistoricSqlMetadata, + HistoricSqlPullConfig, + HistoricSqlQueryHistoryReader, + HistoricSqlRawQueryRow, + HistoricSqlSourceAdapterDeps, + HistoricSqlTimeWindow, + HistoricSqlUsage, + KloPostgresQueryClient, + PostgresPgssAggregateRow, + PostgresPgssProbeResult, + PostgresPgssReader, + PostgresPgssRow, + PostgresPgssSnapshot, +} from './adapters/historic-sql/types.js'; +export { + HISTORIC_SQL_OBJECT_TYPE, + HISTORIC_SQL_SOURCE_KEY, + historicSqlManifestSchema, + historicSqlMetadataSchema, + historicSqlPullConfigSchema, + historicSqlRawQueryRowSchema, + historicSqlUsageSchema, +} from './adapters/historic-sql/types.js'; +export type { CanonicalPin } from './canonical-pins.js'; +export { buildCanonicalPinsPromptBlock, selectRelevantCanonicalPins } from './canonical-pins.js'; +export type { + BudgetExhaustedCandidateForCarryForward, + CandidateDedupServiceDeps, + CandidateDedupSettings, + ContextCandidateActionHint, + ContextCandidateCarryforwardArgs, + ContextCandidateCarryforwardResult, + ContextCandidateCarryforwardServiceDeps, + ContextCandidateCarryforwardSettings, + ContextCandidateEmbeddingPort, + ContextCandidateForPrompt, + ContextCandidateLane, + ContextCandidateRejectionReason, + ContextCandidateScoreAggregation, + ContextCandidateStatus, + ContextCandidateStorePort, + ContextCandidateVerdictSummary, + CuratorPaginationInput, + CuratorPaginationServiceDeps, + CuratorPaginationSettings, + CurrentRunEvidenceChunkForCarryForward, + InsertContextCandidateInput, + MarkContextCandidateClusterInput, +} from './context-candidates/index.js'; +export { + buildContextCandidateEmbeddingText, + CandidateDedupService, + ContextCandidateCarryforwardService, + CuratorPaginationService, +} from './context-candidates/index.js'; +export type { + ContextEvidenceDocumentRef, + ContextEvidenceEmbeddingPort, + ContextEvidenceIndexStorePort, + ContextEvidenceIndexSummary as PackageContextEvidenceIndexSummary, + EvidencePublishState, + ReplaceContextEvidenceChunk, + SqliteContextEvidenceStoreOptions, + UpsertContextEvidenceDocument, +} from './context-evidence/index.js'; +export { + ContextEvidenceIndexService, + SqliteContextEvidenceStore, +} from './context-evidence/index.js'; +export { DiffSetService } from './diff-set.service.js'; +export { IngestBundleRunner } from './ingest-bundle.runner.js'; +export type { DefaultLocalIngestAdaptersOptions } from './local-adapters.js'; +export { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from './local-adapters.js'; +export type { + LocalIngestMcpOptions, + LocalIngestResult, + LocalMetabaseFanoutChild, + LocalMetabaseFanoutProgress, + LocalMetabaseFanoutProgressChild, + LocalMetabaseFanoutResult, + RunLocalIngestOptions, + RunLocalMetabaseIngestOptions, +} from './local-ingest.js'; +export { getLatestLocalIngestStatus, getLocalIngestStatus, runLocalIngest, runLocalMetabaseIngest } from './local-ingest.js'; +export { seedLocalMappingStateFromKloYaml } from './local-mapping-reconcile.js'; +export type { + CreateLocalBundleIngestRuntimeOptions, + LocalBundleIngestRuntime, +} from './local-bundle-runtime.js'; +export { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; +export type { + LocalIngestDiffPaths, + LocalIngestRunRecord, + LocalIngestStatus, + RunLocalStageOnlyIngestOptions, +} from './local-stage-ingest.js'; +export { getLocalStageOnlyIngestStatus, runLocalStageOnlyIngest } from './local-stage-ingest.js'; +export { + ingestReportToMemoryFlowReplay, + localIngestRunToMemoryFlowReplay, +} from './memory-flow/events.js'; +export { + buildAuthenticatedUrl, + cleanupRepoDir, + cloneOrPull, + RepoConfigError, + RepoFetchError, + repoDirExists, + sanitizeRepoError, + testRepoConnection, + validateRepoConfig, +} from './repo-fetch.js'; +export type { RepoFetchConfig } from './repo-fetch.js'; +export { + loadProjectInfo, + parseProjectName, + parseProjectVars, + resolveJinjaVariables, +} from './dbt-shared/project-vars.js'; +export type { DbtProjectInfo, ResolveJinjaVariablesResult } from './dbt-shared/project-vars.js'; +export { findDbtSchemaFiles, loadDbtSchemaFiles } from './dbt-shared/schema-files.js'; +export { + computeDbtSchemaHash, + parseDbtSchemaFile, + parseDbtSchemaFiles, +} from './adapters/dbt-descriptions/parse-schema.js'; +export type { + DbtParsedColumn, + DbtColumnConstraints, + DbtDataTestRef, + DbtParsedRelationship, + DbtParsedTable, + DbtSchemaFile, + DbtSchemaParseResult, +} from './adapters/dbt-descriptions/parse-schema.js'; +export { findMatchingKloTable, matchDbtTables } from './adapters/dbt-descriptions/match-tables.js'; +export type { DbtHostTableLite, DbtTableMatch } from './adapters/dbt-descriptions/match-tables.js'; +export { toDescriptionUpdates } from './adapters/dbt-descriptions/to-description-updates.js'; +export type { DbtDescriptionUpdates } from './adapters/dbt-descriptions/to-description-updates.js'; +export { toRelationshipUpdates } from './adapters/dbt-descriptions/to-relationship-updates.js'; +export type { DbtRelationshipUpdates } from './adapters/dbt-descriptions/to-relationship-updates.js'; +export { toMetadataUpdates } from './adapters/dbt-descriptions/to-metadata-updates.js'; +export { mergeSemanticModelTables } from './adapters/dbt-descriptions/merge-semantic-model-tables.js'; +export type { KloJoinUpdate, KloMetadataUpdate } from '../scan/enrichment-types.js'; +export { + createInitialMemoryFlowInteractionState, + findMemoryFlowSearchMatches, + reduceMemoryFlowInteractionState, + selectedMemoryFlowColumn, + selectedMemoryFlowDetails, + selectMemoryFlowChip, + selectMemoryFlowColumn, + visibleMemoryFlowChips, +} from './memory-flow/interaction.js'; +export { renderMemoryFlowInteractive } from './memory-flow/interactive-render.js'; +export { createMemoryFlowLiveBuffer, sanitizeMemoryFlowError } from './memory-flow/live-buffer.js'; +export { renderMemoryFlowReplay } from './memory-flow/render.js'; +export { formatMemoryFlowFinalSummary } from './memory-flow/summary.js'; +export type { MemoryFlowStreamEvent } from './memory-flow/schema.js'; +export { + memoryFlowActionDetailSchema, + memoryFlowDetailSectionsSchema, + memoryFlowEventSchema, + memoryFlowPlannedWorkUnitSchema, + memoryFlowReplayInputSchema, + memoryFlowRunStatusSchema, + memoryFlowStreamEventSchema, + parseMemoryFlowReplayInput, +} from './memory-flow/schema.js'; +export type { + MemoryFlowChip, + MemoryFlowColumnId, + MemoryFlowColumnView, + MemoryFlowDisplayStatus, + MemoryFlowEvent, + MemoryFlowEventSink, + MemoryFlowFilterMode, + MemoryFlowInteractionCommand, + MemoryFlowInteractionState, + MemoryFlowLiveBufferOptions, + MemoryFlowPaneId, + MemoryFlowPlannedWorkUnit, + MemoryFlowRenderOptions, + MemoryFlowReplayInput, + MemoryFlowReplayPatch, + MemoryFlowRunStatus, + MemoryFlowViewModel, +} from './memory-flow/types.js'; +export { buildMemoryFlowViewModel } from './memory-flow/view-model.js'; +export type { + MemoryFlowStatusBadge, + MemoryFlowVisualColumn, + MemoryFlowVisualModel, +} from './memory-flow/visuals.js'; +export { + buildMemoryFlowVisualModel, + memoryFlowStatusBadge, + renderMemoryFlowConnectorLine, +} from './memory-flow/visuals.js'; +export type { + PageTriageEvidenceChunk, + PageTriageReport, + PageTriageRunArgs, + PageTriageServiceDeps, + PageTriageSettings, + PageTriageStorePort, +} from './page-triage/index.js'; +export { PageTriageService } from './page-triage/index.js'; +export type { + CandidateDedupPort, + CandidateDedupResult, + ContextCandidateCarryforwardPort, + ContextCandidateForDedup, + ContextCandidateSummary, + ContextEvidenceCandidatesPort, + ContextEvidenceIndexPort, + ContextEvidenceIndexSummary, + CreateIngestRunArgs, + CuratorPaginationPort, + CuratorPaginationReport, + DiffSetComputerPort, + IngestBundleRunnerDeps, + IngestCanonicalPinsPort, + IngestCommitMessagePort, + IngestFileStorePort, + IngestGitAuthor, + IngestKnowledgeIndexPort, + IngestLockPort, + IngestProvenanceInsert, + IngestProvenancePort, + IngestProvenanceRow, + IngestReportsPort, + IngestRunnerJob, + IngestRunRecord, + IngestRunsPort, + IngestSessionWorktree, + IngestSessionWorktreePort, + IngestSettingsPort, + IngestStoragePort, + IngestToolsetFactoryPort, + IngestToolsetLike, + PageTriagePort, + PageTriageRunResult, + ProvenanceActionType, + SourceAdapterRegistryPort, +} from './ports.js'; +export { + buildSyncId, + provenanceMarker, + rawSourcesDirForSync, + rawSourcesRoot, +} from './raw-sources-paths.js'; +export { ingestReportSnapshotSchema, parseIngestReportSnapshot } from './report-snapshot.js'; +export type { IngestReportBody, IngestReportSnapshot } from './reports.js'; +export * from './reports.js'; +export { SourceAdapterRegistry } from './source-adapter-registry.js'; +export type { SqliteBundleIngestStoreOptions } from './sqlite-bundle-ingest-store.js'; +export { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js'; +export type { + SaveCompletedLocalIngestRunInput, + SqliteLocalIngestStoreOptions, +} from './sqlite-local-ingest-store.js'; +export { SqliteLocalIngestStore } from './sqlite-local-ingest-store.js'; +export type { + ReconcileCandidateForPrompt, + ReconcileCandidateSummary, + ReconcilePromptRunState, + WikiPageRef, +} from './stages/build-reconcile-context.js'; +export { + buildReconcileSystemPrompt, + buildReconcileToolSet, + buildReconcileUserPrompt, +} from './stages/build-reconcile-context.js'; +export type { ReconciliationOutcome } from './stages/stage-4-reconciliation.js'; +export { runReconciliationStage4 } from './stages/stage-4-reconciliation.js'; +export type { StageIndex } from './stages/stage-index.types.js'; +export type { + ChunkResult, + DiffSet, + EvictionUnit, + FetchContext, + IngestBundleJob, + IngestBundleRef, + IngestBundleResult, + IngestDiffSummary, + IngestJobContext, + IngestJobPhase, + IngestTrigger, + ScopeDescriptor, + SourceAdapter, + SourceFetchIssue, + SourceFetchReport, + TriageLane, + TriageSignals, + UnresolvedCardInfo, + WorkUnit, +} from './types.js'; diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts new file mode 100644 index 00000000..56661078 --- /dev/null +++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts @@ -0,0 +1,1853 @@ +import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { GitService } from '../core/index.js'; +import { addTouchedSlSource } from '../tools/index.js'; +import { IngestBundleRunner } from './ingest-bundle.runner.js'; +import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js'; +import type { MemoryFlowReplayInput } from './memory-flow/types.js'; +import type { IngestBundleRunnerDeps } from './ports.js'; + +class TestJobContext { + private currentProgress = 0; + + constructor( + public readonly jobId: string, + public readonly userId: string | null | undefined, + public readonly checkCancellation: () => Promise, + private readonly updateProgressFn: (progress: number, message?: string) => Promise, + private readonly parent?: TestJobContext, + private readonly start = 0, + private readonly span = 1, + ) {} + + async updateProgress(progress: number, message?: string): Promise { + const local = Math.max(0, Math.min(1, progress)); + this.currentProgress = local; + if (this.parent) { + await this.parent.updateProgress(Math.max(0, Math.min(1, this.start + this.span * local)), message); + return; + } + await this.updateProgressFn(local, message); + } + + startPhase(fraction: number): TestJobContext { + return new TestJobContext( + this.jobId, + this.userId, + this.checkCancellation, + this.updateProgressFn, + this, + this.currentProgress, + Math.max(0, Math.min(1, fraction)), + ); + } +} + +const deferred = () => { + let resolve!: (v: T) => void; + const promise = new Promise((r) => { + resolve = r; + }); + return { promise, resolve }; +}; + +function bundleReplayInput(): MemoryFlowReplayInput { + return { + runId: 'pending', + connectionId: 'c1', + adapter: 'fake', + status: 'running', + sourceDir: '/tmp/stage/upload-x', + syncId: 'pending', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }; +} + +const makeDeps = () => { + const runsRepo = { + create: vi.fn().mockResolvedValue({ id: 'run-1' }), + findMostRecentCompleted: vi.fn().mockResolvedValue(null), + markFailed: vi.fn(), + markCompleted: vi.fn(), + }; + const provenanceRepo = { + insertMany: vi.fn(), + findHashesBySync: vi.fn().mockResolvedValue(new Map()), + findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue(new Map()), + }; + const reportsRepo = { + create: vi.fn().mockResolvedValue({ id: 'report-1' }), + findByJobId: vi.fn().mockResolvedValue(null), + markSuperseded: vi.fn().mockResolvedValue(undefined), + }; + const canonicalPins = { + listPins: vi.fn().mockResolvedValue([]), + }; + const adapter = { + source: 'fake', + skillNames: [] as string[], + reconcileSkillNames: undefined as undefined | string[], + evidenceIndexing: undefined as undefined | 'documents', + triageSupported: undefined as undefined | boolean, + detect: vi.fn().mockResolvedValue(true), + listTargetConnectionIds: undefined as undefined | ((stagedDir: string) => Promise), + chunk: vi.fn().mockResolvedValue({ + workUnits: [{ unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] }], + }), + }; + const registry = { get: vi.fn().mockReturnValue(adapter) }; + const diffSetService = { + compute: vi.fn().mockResolvedValue({ added: ['a.yml'], modified: [], deleted: [], unchanged: [] }), + }; + const contextEvidenceIndex = { + indexStagedDir: vi.fn().mockResolvedValue({ + documentsIndexed: 1, + chunksIndexed: 1, + documentsDeleted: 0, + embeddingFailures: 0, + warnings: [], + }), + publishSync: vi.fn().mockResolvedValue(undefined), + }; + const pageTriage = { + triageRun: vi.fn().mockResolvedValue({ + enabled: true, + fullRawPaths: new Set(['a.yml']), + warnings: [], + }), + }; + const scopedGit = { + revParseHead: vi.fn().mockResolvedValue('h'), + commitFiles: vi.fn(), + resetHardTo: vi.fn(), + assertWorktreeClean: vi.fn().mockResolvedValue(undefined), + }; + const sessionWorktreeService = { + create: vi.fn().mockResolvedValue({ + chatId: 'j1', + workdir: '/tmp/wt', + branch: 'session/j1', + baseSha: 'b', + createdAt: new Date(), + git: scopedGit, + config: {}, + }), + cleanup: vi.fn(), + }; + const agentRunner = { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }) }; + const gitService = { + revParseHead: vi.fn().mockResolvedValue('base'), + listFilesAtHead: vi.fn().mockResolvedValue([]), + getFileAtCommit: vi.fn(), + squashMergeIntoMain: vi + .fn() + .mockResolvedValue({ ok: true, squashSha: 'sq', touchedPaths: ['raw-sources/c1/fake/s/a.yml'] }), + }; + const lockingService = { + withLock: vi.fn().mockImplementation(async (_k: string, fn: () => Promise) => fn()), + }; + const appSettingsService = { + settings: { + ai: { slValidation: { probeRowCount: 1 } }, + llm: { memoryIngestionModel: 'test-model' }, + }, + }; + const skillsRegistry = { + listSkills: vi.fn().mockResolvedValue([]), + getSkill: vi.fn().mockResolvedValue(null), + buildSkillsPrompt: vi.fn().mockReturnValue(''), + stripFrontmatter: vi.fn().mockImplementation((s: string) => s), + }; + const promptService = { + loadPrompt: vi.fn().mockResolvedValue('base-framing'), + }; + const wikiService = { + forWorktree: vi.fn().mockReturnValue({}), + readPage: vi.fn().mockResolvedValue(null), + syncFromCommit: vi.fn().mockResolvedValue(undefined), + }; + const knowledgeSlRefs = { + syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }), + }; + const knowledgeIndex = { + listPagesForUser: vi.fn().mockResolvedValue([]), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue({}), + listFilesForConnection: vi + .fn() + .mockImplementation((connectionId: string) => + Promise.resolve(connectionId === 'warehouse-2' ? ['looker__orders.yaml'] : []), + ), + loadAllSources: vi.fn().mockResolvedValue([]), + }; + const slSearchService = { + indexSources: vi.fn().mockResolvedValue(undefined), + }; + const slSourcesRepository = {}; + const slValidator = { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) }; + const toolsetFactory = { + createIngestWuToolset: vi.fn().mockReturnValue({ + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }), + }; + const configService = { + enqueueCommitMessageJobForExternalCommit: vi.fn().mockResolvedValue(undefined), + }; + return { + runsRepo, + provenanceRepo, + reportsRepo, + canonicalPins, + adapter, + registry, + diffSetService, + contextEvidenceIndex, + pageTriage, + sessionWorktreeService, + agentRunner, + gitService, + lockingService, + slValidator, + appSettingsService, + skillsRegistry, + promptService, + wikiService, + knowledgeSlRefs, + knowledgeIndex, + semanticLayerService, + slSearchService, + slSourcesRepository, + toolsetFactory, + configService, + }; +}; + +const buildRunner = (deps: ReturnType = makeDeps(), overrides: Partial = {}) => + new IngestBundleRunner({ + runs: deps.runsRepo as any, + provenance: deps.provenanceRepo as any, + registry: deps.registry as any, + diffSetService: deps.diffSetService as any, + contextEvidenceIndex: deps.contextEvidenceIndex, + pageTriage: deps.pageTriage as any, + sessionWorktreeService: deps.sessionWorktreeService as any, + agentRunner: deps.agentRunner as any, + gitService: deps.gitService as any, + lockingService: deps.lockingService as any, + storage: { + homeDir: '/tmp/klo-test', + systemGitAuthor: { name: 'KLO Test', email: 'system@klo.local' }, + resolveUploadDir: (uploadId) => `/tmp/klo-test/ingest-uploads/${uploadId}`, + resolvePullDir: (jobId) => `/tmp/klo-test/ingest-pulls/${jobId}`, + resolveTranscriptDir: (jobId) => `/tmp/klo-test/run/wu-transcripts/${jobId}`, + }, + settings: { probeRowCount: 1, memoryIngestionModel: 'test-model' }, + skillsRegistry: deps.skillsRegistry as any, + promptService: deps.promptService as any, + wikiService: deps.wikiService as any, + knowledgeSlRefs: deps.knowledgeSlRefs as any, + knowledgeIndex: deps.knowledgeIndex, + semanticLayerService: deps.semanticLayerService as any, + slSearchService: deps.slSearchService as any, + slSourcesRepository: deps.slSourcesRepository as any, + connections: { + listEnabledConnections: vi.fn().mockResolvedValue([]), + getConnectionById: vi.fn().mockResolvedValue({ id: 'c1', name: 'warehouse', connectionType: 'POSTGRES' }), + executeQuery: vi.fn().mockResolvedValue({ headers: [], rows: [] }), + }, + reports: deps.reportsRepo as any, + canonicalPins: deps.canonicalPins, + slValidator: deps.slValidator as any, + toolsetFactory: deps.toolsetFactory as any, + commitMessages: { + enqueueForExternalCommit: deps.configService.enqueueCommitMessageJobForExternalCommit, + }, + embedding: { + maxBatchSize: 10, + computeEmbedding: async () => [0], + computeEmbeddingsBulk: async (texts: string[]) => texts.map(() => [0]), + }, + ...overrides, + }); + +describe('IngestBundleRunner — FIFO-per-connection', () => { + let spy: any; + + beforeEach(() => { + spy = vi.fn(); + }); + + it('serializes two jobs on the same connectionId', async () => { + const runner = buildRunner(); + (runner as any).runInner = async (job: any) => { + spy(job.jobId); + await new Promise((r) => setTimeout(r, 5)); + spy(`done-${job.jobId}`); + return { + runId: 'r', + syncId: 's', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 0, + failedWorkUnits: [], + artifactsWritten: 0, + commitSha: null, + }; + }; + const p1 = runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'u1' }, + }); + const p2 = runner.run({ + jobId: 'j2', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'u2' }, + }); + await Promise.all([p1, p2]); + expect(spy.mock.calls.map((c: unknown[]) => c[0])).toEqual(['j1', 'done-j1', 'j2', 'done-j2']); + }); + + it('runs jobs on different connections in parallel', async () => { + const runner = buildRunner(); + const d1 = deferred(); + const d2 = deferred(); + (runner as any).runInner = async (job: any) => { + spy(`start-${job.jobId}`); + if (job.jobId === 'j1') { + await d1.promise; + } + if (job.jobId === 'j2') { + await d2.promise; + } + return { + runId: 'r', + syncId: 's', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 0, + failedWorkUnits: [], + artifactsWritten: 0, + commitSha: null, + }; + }; + const p1 = runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'u1' }, + }); + const p2 = runner.run({ + jobId: 'j2', + connectionId: 'c2', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'u2' }, + }); + await new Promise((r) => setTimeout(r, 10)); + expect(spy.mock.calls.map((c: unknown[]) => c[0]).sort()).toEqual(['start-j1', 'start-j2']); + d1.resolve(); + d2.resolve(); + await Promise.all([p1, p2]); + }); +}); + +describe('IngestBundleRunner — Stages 1 → 7', () => { + it('runs the full pipeline, creates a run row, stages files, chunks, squashes, writes provenance', async () => { + const deps = makeDeps(); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const result = await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(deps.runsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ jobId: 'j1', connectionId: 'c1', sourceKey: 'fake', trigger: 'upload' }), + ); + expect(deps.adapter.detect).toHaveBeenCalled(); + expect(deps.adapter.chunk).toHaveBeenCalled(); + expect(result.workUnitCount).toBe(1); + expect(deps.diffSetService.compute).toHaveBeenCalled(); + expect(deps.gitService.squashMergeIntoMain).toHaveBeenCalledWith( + 'session/j1', + expect.any(String), + expect.any(String), + expect.stringContaining('ingest(fake): j1'), + ); + expect(deps.provenanceRepo.insertMany).toHaveBeenCalled(); + expect(result.commitSha).toBe('sq'); + expect(deps.runsRepo.markCompleted).toHaveBeenCalledWith('run-1', expect.any(Object), 'completed'); + // Single touched path → path-scoped diff for the LLM commit-message note. + expect(deps.configService.enqueueCommitMessageJobForExternalCommit).toHaveBeenCalledWith( + { commitHash: 'sq' }, + expect.stringContaining('ingest(fake): j1'), + 'raw-sources/c1/fake/s/a.yml', + ); + }); + + it('reuses document evidence indexing and page triage for historic-SQL WorkUnits', async () => { + const deps = makeDeps(); + deps.adapter.source = 'historic-sql'; + deps.adapter.skillNames = ['historic_sql_ingest']; + deps.adapter.reconcileSkillNames = ['historic_sql_curator']; + deps.adapter.evidenceIndexing = 'documents'; + deps.adapter.triageSupported = true; + deps.adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'full', rawFiles: ['templates/full/metadata.json'], dependencyPaths: [], peerFileIndex: [] }, + { unitKey: 'skip', rawFiles: ['templates/skip/metadata.json'], dependencyPaths: [], peerFileIndex: [] }, + ], + }); + deps.diffSetService.compute.mockResolvedValue({ + added: ['templates/full/metadata.json', 'templates/skip/metadata.json'], + modified: [], + deleted: [], + unchanged: [], + }); + deps.pageTriage.triageRun.mockResolvedValue({ + enabled: true, + fullRawPaths: new Set(['templates/full/metadata.json']), + warnings: [], + }); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([ + ['templates/full/metadata.json', 'h-full'], + ['templates/skip/metadata.json', 'h-skip'], + ]), + rawDirInWorktree: 'raw-sources/c1/historic-sql/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const result = await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'historic-sql', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + const workUnitCalls = deps.agentRunner.runLoop.mock.calls.filter( + ([params]) => params.telemetryTags?.operationName === 'ingest-bundle-wu', + ); + expect(deps.contextEvidenceIndex.indexStagedDir).toHaveBeenCalled(); + expect(deps.pageTriage.triageRun).toHaveBeenCalled(); + expect(workUnitCalls).toHaveLength(1); + expect(workUnitCalls[0][0].telemetryTags.unitKey).toBe('full'); + expect(result.workUnitCount).toBe(1); + }); + + it('emits memory-flow source and planning events for bundle ingest', async () => { + const deps = makeDeps(); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'u1', + rawFiles: ['a.yml'], + peerFileIndex: ['peer.yml'], + dependencyPaths: ['manifest.yml'], + }, + ], + eviction: { deletedRawPaths: ['old.yml'] }, + }); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const snapshots: MemoryFlowReplayInput[] = []; + const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput(), { + onChange: (snapshot) => snapshots.push(snapshot), + }); + const ctx = new TestJobContext( + 'j1', + null, + () => Promise.resolve(), + () => Promise.resolve(), + ); + (ctx as any).memoryFlow = memoryFlow; + + await runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + ctx, + ); + + expect(memoryFlow.snapshot()).toMatchObject({ + runId: 'run-1', + connectionId: 'c1', + adapter: 'fake', + sourceDir: '/tmp/stage/upload-x', + }); + expect(memoryFlow.snapshot().plannedWorkUnits).toEqual([ + { + unitKey: 'u1', + rawFiles: ['a.yml'], + peerFileCount: 1, + dependencyCount: 1, + }, + ]); + expect(memoryFlow.snapshot().events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'source_acquired', adapter: 'fake', trigger: 'upload', fileCount: 1 }), + expect.objectContaining({ type: 'scope_detected', fingerprint: null }), + expect.objectContaining({ type: 'raw_snapshot_written', rawFileCount: 1 }), + expect.objectContaining({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }), + expect.objectContaining({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 1 }), + ]), + ); + expect(snapshots.length).toBeGreaterThan(4); + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + memoryFlow: expect.objectContaining({ + metadata: expect.objectContaining({ + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + }), + events: expect.arrayContaining([ + expect.objectContaining({ + type: 'source_acquired', + emittedAt: expect.stringMatching(/^\d{4}-\d{2}-\d{2}T/), + }), + ]), + }), + }), + }), + ); + }); + + it('emits memory-flow WorkUnit step, candidate action, and finish events', async () => { + const deps = makeDeps(); + let currentToolSession: any = null; + deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => { + currentToolSession = toolSession; + return { + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }; + }); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await params.onStepFinish?.({ stepIndex: 1, stepBudget: params.stepBudget }); + currentToolSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'knowledge/orders.md', + detail: 'captured order context', + }); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput()); + const ctx = new TestJobContext( + 'j1', + null, + () => Promise.resolve(), + () => Promise.resolve(), + ); + (ctx as any).memoryFlow = memoryFlow; + + await runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + ctx, + ); + + expect(memoryFlow.snapshot().events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: 'work_unit_started', + unitKey: 'u1', + skills: ['ingest_triage', 'sl_capture', 'knowledge_capture'], + stepBudget: 40, + }), + expect.objectContaining({ type: 'work_unit_step', unitKey: 'u1', stepIndex: 1, stepBudget: 40 }), + expect.objectContaining({ + type: 'candidate_action', + unitKey: 'u1', + target: 'wiki', + action: 'created', + key: 'knowledge/orders.md', + }), + expect.objectContaining({ type: 'work_unit_finished', unitKey: 'u1', status: 'success' }), + ]), + ); + }); + + it('emits memory-flow gate, saved, provenance, and report events', async () => { + const deps = makeDeps(); + let currentToolSession: any = null; + deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => { + currentToolSession = toolSession; + return { + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }; + }); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + currentToolSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'orders', + detail: 'captured gross revenue', + }); + } + if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') { + await params.toolSet.emit_conflict_resolution.execute( + { + kind: 'near_duplicate', + artifactKey: 'sl:orders', + detail: 'orders retained as canonical', + flaggedForHuman: false, + }, + { toolCallId: 'conflict-1', messages: [] }, + ); + await params.toolSet.emit_unmapped_fallback.execute( + { + rawPath: 'a.yml', + reason: 'semantic_not_representable', + fallback: 'flagged', + }, + { toolCallId: 'fallback-1', messages: [] }, + ); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput()); + const ctx = new TestJobContext( + 'j1', + null, + () => Promise.resolve(), + () => Promise.resolve(), + ); + (ctx as any).memoryFlow = memoryFlow; + + await runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + ctx, + ); + + expect(memoryFlow.snapshot()).toMatchObject({ + reportId: 'report-1', + reportPath: 'report-1', + }); + expect(memoryFlow.snapshot().events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'reconciliation_finished', conflictCount: 1, fallbackCount: 1 }), + expect.objectContaining({ type: 'saved', commitSha: 'sq', wikiCount: 0, slCount: 1 }), + expect.objectContaining({ type: 'provenance_recorded', rowCount: 1 }), + expect.objectContaining({ type: 'report_created', runId: 'run-1', reportPath: 'report-1' }), + ]), + ); + }); + + it('finishes successful bundle memory-flow runs as done', async () => { + const deps = makeDeps(); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput()); + const ctx = new TestJobContext( + 'j1', + null, + () => Promise.resolve(), + () => Promise.resolve(), + ); + (ctx as any).memoryFlow = memoryFlow; + + await runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + ctx, + ); + + expect(memoryFlow.snapshot().status).toBe('done'); + }); + + it('finishes bundle memory-flow runs with sanitized errors when the runner fails', async () => { + const deps = makeDeps(); + const sensitiveMessage = [ + 'failed to read postgres://user', + ':password', + '@localhost:5432/db?api_key=abc', + ' token=', + 'secret', + ].join(''); + deps.adapter.detect.mockRejectedValue(new Error(sensitiveMessage)); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput()); + const ctx = new TestJobContext( + 'j1', + null, + () => Promise.resolve(), + () => Promise.resolve(), + ); + (ctx as any).memoryFlow = memoryFlow; + + await expect( + runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + ctx, + ), + ).rejects.toThrow(/failed to read/); + + expect(memoryFlow.snapshot()).toMatchObject({ + status: 'error', + errors: ['failed to read postgres://[redacted] token=[redacted]'], + }); + expect(memoryFlow.snapshot().events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'source_acquired', adapter: 'fake', trigger: 'upload', fileCount: 1 }), + ]), + ); + }); + + it('stores memory-flow provenance and transcript summaries in the ingest report body', async () => { + const deps = makeDeps(); + deps.toolsetFactory.createIngestWuToolset.mockReturnValue({ + toAiSdkTools: vi.fn().mockReturnValue({ + read_raw_span: { + description: 'read a raw span', + inputSchema: {}, + execute: vi.fn().mockResolvedValue('safe excerpt'), + }, + wiki_write: { + description: 'write wiki', + inputSchema: {}, + execute: vi.fn().mockResolvedValue('written'), + }, + }), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await params.toolSet.read_raw_span.execute( + { path: 'a.yml', startLine: 1, endLine: 2 }, + { toolCallId: 'read-1', messages: [] }, + ); + await params.toolSet.wiki_write.execute( + { key: 'knowledge/a.md', content: 'safe summary' }, + { toolCallId: 'wiki-1', messages: [] }, + ); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + provenanceRows: [ + expect.objectContaining({ + rawPath: 'a.yml', + artifactKind: null, + artifactKey: null, + actionType: 'skipped', + targetConnectionId: null, + }), + ], + toolTranscripts: [ + { + unitKey: 'u1', + path: '/tmp/klo-test/run/wu-transcripts/j1/u1.jsonl', + toolCallCount: 2, + errorCount: 0, + toolNames: ['read_raw_span', 'wiki_write'], + }, + ], + }), + }), + ); + }); + + it('persists WorkUnit unmapped fallback records in the report body', async () => { + const deps = makeDeps(); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await params.toolSet.emit_unmapped_fallback.execute( + { + rawPath: 'a.yml', + reason: 'conversion_metric_unsupported', + fallback: 'flagged', + }, + { toolCallId: 'fallback-1', messages: [] }, + ); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + unmappedFallbacks: [ + { + rawPath: 'a.yml', + reason: 'conversion_metric_unsupported', + fallback: 'flagged', + }, + ], + }), + }), + ); + }); + + it('persists reconciliation conflict and eviction records in the report body', async () => { + const deps = makeDeps(); + deps.diffSetService.compute.mockResolvedValue({ + added: [], + modified: [], + deleted: ['views/old_orders.view.lkml'], + unchanged: [], + }); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [], + eviction: { deletedRawPaths: ['views/old_orders.view.lkml'] }, + }); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') { + await params.toolSet.emit_conflict_resolution.execute( + { + kind: 'near_duplicate', + artifactKey: 'sl:orders', + detail: 'orders and old_orders overlapped; orders is retained as canonical', + flaggedForHuman: false, + }, + { toolCallId: 'conflict-1', messages: [] }, + ); + await params.toolSet.emit_eviction_decision.execute( + { + rawPath: 'views/old_orders.view.lkml', + artifactKind: 'sl', + artifactKey: 'old_orders', + action: 'removed', + reason: 'raw source disappeared in this sync', + }, + { toolCallId: 'eviction-1', messages: [] }, + ); + await params.toolSet.emit_unmapped_fallback.execute( + { + rawPath: 'cards/untranslated.json', + reason: 'metabase_sql_untranslated', + fallback: 'flagged', + }, + { toolCallId: 'fallback-1', messages: [] }, + ); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['cards/untranslated.json', 'h-card']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + conflictsResolved: [ + { + kind: 'near_duplicate', + artifactKey: 'sl:orders', + detail: 'orders and old_orders overlapped; orders is retained as canonical', + flaggedForHuman: false, + }, + ], + evictionsApplied: [ + { + rawPath: 'views/old_orders.view.lkml', + artifactKind: 'sl', + artifactKey: 'old_orders', + action: 'removed', + reason: 'raw source disappeared in this sync', + }, + ], + unmappedFallbacks: [ + { + rawPath: 'cards/untranslated.json', + reason: 'metabase_sql_untranslated', + fallback: 'flagged', + }, + ], + }), + }), + ); + }); + + it('persists reconciliation artifact resolutions as provenance rows', async () => { + const deps = makeDeps(); + deps.diffSetService.compute.mockResolvedValue({ + added: [], + modified: [], + deleted: ['looks/20.json'], + unchanged: ['explores/b2b/sales_pipeline.json'], + }); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [], + eviction: { deletedRawPaths: ['looks/20.json'] }, + }); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') { + await params.toolSet.emit_artifact_resolution.execute( + { + rawPath: 'explores/b2b/sales_pipeline.json', + artifactKind: 'sl', + artifactKey: 'looker__b2b__sales_pipeline', + actionType: 'subsumed', + reason: 'File adapter source b2b__sales_pipeline is canonical.', + }, + { toolCallId: 'resolution-1', messages: [] }, + ); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h-explore']]), + rawDirInWorktree: 'raw-sources/c1/looker/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'looker', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(deps.provenanceRepo.insertMany).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + rawPath: 'explores/b2b/sales_pipeline.json', + artifactKind: 'sl', + artifactKey: 'looker__b2b__sales_pipeline', + actionType: 'subsumed', + }), + ]), + ); + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + artifactResolutions: [ + { + rawPath: 'explores/b2b/sales_pipeline.json', + artifactKind: 'sl', + artifactKey: 'looker__b2b__sales_pipeline', + actionType: 'subsumed', + reason: 'File adapter source b2b__sales_pipeline is canonical.', + }, + ], + }), + }), + ); + }); + + it('runs manual override reconciliation from the prior report snapshot and marks the prior report superseded', async () => { + const tempRoot = await mkdtemp(join(tmpdir(), 'klo-override-')); + const deps = makeDeps(); + deps.reportsRepo.findByJobId.mockResolvedValue({ + id: 'report-old', + runId: 'run-old', + jobId: 'job-old', + connectionId: 'c1', + sourceKey: 'fake', + createdAt: '2026-04-27T10:00:00.000Z', + body: { + syncId: '2026-04-27-100000-job-old', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: 'old-sha', + workUnits: [ + { + unitKey: 'wu-orders', + rawFiles: ['a.yml'], + status: 'success', + actions: [ + { + target: 'sl', + type: 'updated', + key: 'orders', + detail: 'captured gross_revenue as orders.gross_revenue', + }, + ], + touchedSlSources: ['orders'], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [ + { + kind: 'definitional_contradiction', + contestedKey: 'gross_revenue', + artifactKey: 'orders.gross_revenue', + detail: 'billing and orders disagree', + flaggedForHuman: true, + }, + ], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + }, + }); + deps.gitService.listFilesAtHead.mockResolvedValue(['raw-sources/c1/fake/2026-04-27-100000-job-old/a.yml']); + deps.gitService.getFileAtCommit.mockResolvedValue('name: orders\n'); + deps.diffSetService.compute.mockResolvedValue({ added: [], modified: [], deleted: [], unchanged: ['a.yml'] }); + deps.agentRunner.runLoop.mockImplementation(async (args: any) => { + await args.toolSet.emit_conflict_resolution.execute( + { + kind: 'definitional_contradiction', + contestedKey: 'gross_revenue', + artifactKey: 'orders.gross_revenue', + detail: 'canonical pin applied', + flaggedForHuman: false, + }, + { toolCallId: 'tc-1', messages: [] }, + ); + return { stopReason: 'natural' }; + }); + + const runner = new IngestBundleRunner({ + ...(buildRunner(deps) as any).deps, + storage: { + homeDir: tempRoot, + systemGitAuthor: { name: 'KLO Test', email: 'system@klo.local' }, + resolveUploadDir: (uploadId: string) => join(tempRoot, 'ingest-uploads', uploadId), + resolvePullDir: (jobId: string) => join(tempRoot, 'ingest-pulls', jobId), + resolveTranscriptDir: (jobId: string) => join(tempRoot, 'run', 'wu-transcripts', jobId), + }, + }); + + await runner.run({ + jobId: 'job-new', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'manual_override', + bundleRef: { kind: 'override', priorJobId: 'job-old' }, + }); + + await expect(readFile(join(tempRoot, 'ingest-pulls/job-new/a.yml'), 'utf-8')).resolves.toBe('name: orders\n'); + expect(deps.adapter.chunk).not.toHaveBeenCalled(); + expect(deps.agentRunner.runLoop).toHaveBeenCalled(); + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + jobId: 'job-new', + body: expect.objectContaining({ + overrideOf: 'job-old', + supersededBy: null, + conflictsResolved: [ + expect.objectContaining({ + contestedKey: 'gross_revenue', + flaggedForHuman: false, + }), + ], + }), + }), + ); + expect(deps.reportsRepo.markSuperseded).toHaveBeenCalledWith('job-old', 'job-new'); + await rm(tempRoot, { recursive: true, force: true }); + }); + + it('passes connection canonical pins into each WorkUnit system prompt', async () => { + const deps = makeDeps(); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'wu-orders', + rawFiles: ['cards/orders.yml'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + deps.canonicalPins.listPins.mockResolvedValue([ + { + contestedKey: 'gross_revenue', + canonicalArtifactKey: 'finance.gross_revenue', + pinnedAt: '2026-04-27T12:00:00.000Z', + pinnedBy: 'user-1', + reason: 'finance owns revenue definitions', + }, + ]); + deps.agentRunner.runLoop.mockResolvedValue({ stopReason: 'natural' }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['cards/orders.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + const workUnitCall = deps.agentRunner.runLoop.mock.calls.find( + ([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu', + ); + expect(workUnitCall?.[0].systemPrompt).toContain(''); + expect(workUnitCall?.[0].systemPrompt).toContain('contestedKey: gross_revenue'); + expect(workUnitCall?.[0].systemPrompt).toContain('canonicalArtifactKey: finance.gross_revenue'); + expect(deps.canonicalPins.listPins).toHaveBeenCalledTimes(1); + expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['c1']); + }); + + it('builds WorkUnit SL index and canonical pins across adapter target connections', async () => { + const deps = makeDeps(); + deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'looker-explore-b2b-orders', + rawFiles: ['explores/b2b/orders.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + deps.canonicalPins.listPins.mockResolvedValue([ + { + contestedKey: 'gross_revenue', + canonicalArtifactKey: 'finance.gross_revenue', + pinnedAt: '2026-05-01T12:00:00.000Z', + pinnedBy: 'user-1', + reason: 'finance owns revenue definitions', + }, + ]); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['explores/b2b/orders.json', 'h1']]), + rawDirInWorktree: 'raw-sources/looker-run/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'looker-run', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + const workUnitCall = deps.agentRunner.runLoop.mock.calls.find( + ([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu', + ); + expect(deps.adapter.listTargetConnectionIds).toHaveBeenCalledWith('/tmp/stage/upload-x'); + expect(deps.semanticLayerService.listFilesForConnection).toHaveBeenCalledWith('looker-run'); + expect(deps.semanticLayerService.listFilesForConnection).toHaveBeenCalledWith('warehouse-2'); + expect(workUnitCall?.[0].userPrompt).toContain('looker__orders'); + expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['looker-run', 'warehouse-2']); + }); + + it('syncs wiki refs, reindexes, and records provenance on SL target connections', async () => { + const deps = makeDeps(); + let currentToolSession: any = null; + deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']); + deps.wikiService.readPage = vi.fn().mockResolvedValue({ + frontmatter: { sl_refs: ['looker__b2b__sales_pipeline.arr'] }, + }); + deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) => + Promise.resolve([{ name: `${connectionId}_source` }]), + ); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + currentToolSession.actions.push( + { + target: 'wiki', + type: 'created', + key: 'knowledge/global/pipeline.md', + detail: 'Pipeline article', + }, + { + target: 'sl', + type: 'created', + key: 'looker__b2b__sales_pipeline', + detail: 'Created warehouse source', + targetConnectionId: 'warehouse-2', + }, + ); + addTouchedSlSource(currentToolSession.touchedSlSources, 'warehouse-2', 'looker__b2b__sales_pipeline'); + } + return { stopReason: 'natural' }; + }); + deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => { + currentToolSession = toolSession; + return { + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h1']]), + rawDirInWorktree: 'raw-sources/looker-run/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'looker-run', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(deps.knowledgeSlRefs.syncFromWiki).toHaveBeenCalledWith({ + wikiPageKey: 'knowledge/global/pipeline.md', + wikiScope: 'GLOBAL', + wikiScopeId: null, + refs: [{ connectionId: 'warehouse-2', sourceName: 'looker__b2b__sales_pipeline' }], + }); + expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2'); + expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]); + expect(deps.provenanceRepo.insertMany).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ + connectionId: 'looker-run', + targetConnectionId: 'warehouse-2', + artifactKind: 'sl', + artifactKey: 'looker__b2b__sales_pipeline', + }), + expect.objectContaining({ + connectionId: 'looker-run', + targetConnectionId: null, + artifactKind: 'wiki', + artifactKey: 'knowledge/global/pipeline.md', + }), + ]), + ); + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + workUnits: [ + expect.objectContaining({ + touchedSlSources: [{ connectionId: 'warehouse-2', sourceName: 'looker__b2b__sales_pipeline' }], + }), + ], + provenanceRows: expect.arrayContaining([ + expect.objectContaining({ + artifactKind: 'sl', + artifactKey: 'looker__b2b__sales_pipeline', + targetConnectionId: 'warehouse-2', + }), + ]), + }), + }), + ); + }); + + it('runs a registered post-processor before squash, records the outcome, and reindexes touched sources after squash', async () => { + const deps = makeDeps(); + deps.adapter.source = 'metricflow'; + deps.registry.get.mockReturnValue(deps.adapter); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }], + parseArtifacts: { semanticModels: [{ name: 'orders' }] }, + }); + deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) => + Promise.resolve([{ name: `${connectionId}_source` }]), + ); + const postProcessor = { + run: vi.fn().mockResolvedValue({ + result: { sourcesCreated: 1 }, + warnings: ['kept going'], + errors: [], + touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }], + }), + }; + const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } }); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['semantic_models.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/metricflow/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'metricflow', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(postProcessor.run).toHaveBeenCalledWith({ + connectionId: 'c1', + sourceKey: 'metricflow', + syncId: expect.any(String), + jobId: 'j1', + runId: 'run-1', + workdir: '/tmp/wt', + parseArtifacts: { semanticModels: [{ name: 'orders' }] }, + }); + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + postProcessor: { + sourceKey: 'metricflow', + status: 'success', + result: { sourcesCreated: 1 }, + warnings: ['kept going'], + errors: [], + touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }], + }, + }), + }), + ); + expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2'); + expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]); + expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success'); + }); + + it('marks post-processor infrastructure failure as failed and preserves worktree cleanup state', async () => { + const deps = makeDeps(); + deps.adapter.source = 'metricflow'; + deps.registry.get.mockReturnValue(deps.adapter); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }], + parseArtifacts: { semanticModels: [{ name: 'orders' }] }, + }); + const postProcessor = { run: vi.fn().mockRejectedValue(new Error('worktree write failed')) }; + const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } }); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['semantic_models.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/metricflow/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await expect( + runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'metricflow', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }), + ).rejects.toThrow('worktree write failed'); + + expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1'); + expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled(); + expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash'); + }); + + it('includes existing global wiki pages in WorkUnit prompts', async () => { + const deps = makeDeps(); + deps.knowledgeIndex.listPagesForUser.mockResolvedValue([ + { + page_key: 'revenue-recognition', + summary: 'Recognize revenue net of refunds after fulfillment.', + scope: 'GLOBAL', + scope_id: null, + }, + ]); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['cards/orders.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + const workUnitCall = deps.agentRunner.runLoop.mock.calls.find( + ([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu', + ); + expect(workUnitCall?.[0].userPrompt).toContain('## Knowledge Pages'); + expect(workUnitCall?.[0].userPrompt).toContain( + '- revenue-recognition: Recognize revenue net of refunds after fulfillment.', + ); + expect(deps.knowledgeIndex.listPagesForUser).toHaveBeenCalledWith('system'); + }); + + it('passes relevant canonical pins into the reconciliation system prompt', async () => { + const deps = makeDeps(); + deps.diffSetService.compute.mockResolvedValue({ + added: [], + modified: [], + deleted: ['metrics/old.yml'], + unchanged: [], + }); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'wu-billing', + rawFiles: ['metrics/churn_risk_score.yml'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + eviction: { deletedRawPaths: ['metrics/old.yml'] }, + }); + deps.canonicalPins.listPins.mockResolvedValue([ + { + contestedKey: 'churn_risk_score', + canonicalArtifactKey: 'billing.churn_risk_score', + pinnedAt: '2026-04-27T12:00:00.000Z', + pinnedBy: 'user-1', + reason: 'billing owns the contractual definition', + }, + { + contestedKey: 'gross_margin', + canonicalArtifactKey: 'finance.gross_margin', + pinnedAt: '2026-04-27T12:01:00.000Z', + pinnedBy: 'user-2', + reason: null, + }, + ]); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([ + ['metrics/churn_risk_score.yml', 'h1'], + ['metrics/old.yml', 'h2'], + ]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + const reconcileCall = deps.agentRunner.runLoop.mock.calls.find( + ([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-reconcile', + ); + expect(reconcileCall?.[0].systemPrompt).toContain(''); + expect(reconcileCall?.[0].systemPrompt).toContain('contestedKey: churn_risk_score'); + expect(reconcileCall?.[0].systemPrompt).not.toContain('gross_margin'); + expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['c1']); + }); + + it('emits a monotonically non-decreasing progress sequence reaching 1.0, covering all 7 stages', async () => { + const deps = makeDeps(); + // Simulate an agent that calls onStepFinish a few times so stage 3 and 4 emit per-step progress. + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.onStepFinish) { + for (let i = 1; i <= 3; i++) { + await params.onStepFinish({ stepIndex: i, stepBudget: params.stepBudget }); + } + } + return { stopReason: 'natural' }; + }); + // Trigger Stage 4 reconciliation by having at least one action. + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + if (params.onStepFinish) { + await params.onStepFinish({ stepIndex: 1, stepBudget: params.stepBudget }); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const observed: Array<{ p: number; m?: string }> = []; + const ctx = new TestJobContext( + 'j1', + null, + () => Promise.resolve(), + (p, m) => { + observed.push({ p, m }); + return Promise.resolve(); + }, + ); + + await runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + ctx, + ); + + // Monotonic. + for (let i = 1; i < observed.length; i++) { + expect(observed[i].p).toBeGreaterThanOrEqual(observed[i - 1].p); + } + // Reaches completion. + expect(observed.at(-1)?.p).toBeCloseTo(1.0, 3); + // Every stage surfaces a user-facing message. + const phaseLabels = [ + 'Fetching source files', + 'Planning updates', + 'Processing', + /Reconcil|reconcil/, + 'Saving changes', + 'Recording history', + 'Wrapping up', + ]; + for (const label of phaseLabels) { + expect(observed.some((o) => (typeof label === 'string' ? o.m?.includes(label) : label.test(o.m ?? '')))).toBe( + true, + ); + } + }); + + it('a Stage 3 failure leaves the shared knowledge table untouched', async () => { + const deps = makeDeps(); + // Agent runner returns a successful result but the adapter emits a WU whose + // outcome still produces no actions — the point is that the scoped wiki service + // must not touch indexRepository during Stage 3, and syncFromCommit is what + // drives the shared table. If we cancel the run before squash, syncFromCommit + // must not be called. + deps.gitService.squashMergeIntoMain.mockRejectedValue(new Error('simulated squash failure')); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await expect( + runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }), + ).rejects.toThrow(/simulated squash failure/); + expect(deps.wikiService.syncFromCommit).not.toHaveBeenCalled(); + }); + + it('refuses to squash-merge when the session worktree has an in-progress sequencer op', async () => { + const deps = makeDeps(); + const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed'); + const sessionGit = { + revParseHead: vi.fn().mockResolvedValue('h'), + commitFiles: vi.fn(), + resetHardTo: vi.fn(), + assertWorktreeClean: vi.fn().mockRejectedValue(assertError), + }; + deps.sessionWorktreeService.create.mockResolvedValue({ + chatId: 'j1', + workdir: '/tmp/wt', + branch: 'session/j1', + baseSha: 'b', + createdAt: new Date(), + git: sessionGit, + config: {}, + }); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await expect( + runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }), + ).rejects.toThrow(/in-progress git operation/); + expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1'); + expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled(); + }); + + it('squash-merges only successful WUs into main when one WU fails sl_validate', async () => { + const homeDir = await mkdtemp(join(tmpdir(), 'ingest-rollback-')); + try { + const configDir = join(homeDir, 'config'); + const mainGit = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'Initialize test config repo', + bootstrapAuthor: 'test-system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await mainGit.onModuleInit(); + const baseSha = await mainGit.revParseHead(); + if (!baseSha) { + throw new Error('no base sha'); + } + + const deps = makeDeps(); + const sessionDir = join(homeDir, '.worktrees', 'session-j1'); + const sessionBranch = 'session/j1'; + let currentToolSession: any = null; + + deps.gitService = mainGit as any; + deps.sessionWorktreeService.create.mockImplementation(async (_jobId: string, startSha: string) => { + await mkdir(join(homeDir, '.worktrees'), { recursive: true }); + await mainGit.addWorktree(sessionDir, sessionBranch, startSha); + return { + chatId: 'j1', + workdir: sessionDir, + branch: sessionBranch, + baseSha: startSha, + createdAt: new Date(), + git: mainGit.forWorktree(sessionDir), + config: {}, + }; + }); + deps.sessionWorktreeService.cleanup.mockResolvedValue(undefined); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => { + currentToolSession = toolSession; + return { + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }; + }); + deps.slValidator.validateSingleSource.mockImplementation( + (_validationDeps: unknown, _connectionId: string, sourceName: string) => ({ + errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [], + warnings: [], + }), + ); + deps.agentRunner.runLoop.mockImplementation(async (params: any) => { + const unitKey = params.telemetryTags?.unitKey; + if (unitKey === 'wu-good') { + await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true }); + await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'good.yaml'), 'name: good\n'); + addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'good'); + currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'good', detail: '' }); + await currentToolSession.gitService.commitFiles( + ['semantic-layer/c1/good.yaml'], + 'test: add good source', + 'KLO Test', + 'system@klo.local', + ); + } + if (unitKey === 'wu-bad') { + await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true }); + await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'bad.yaml'), 'name: bad\n'); + addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'bad'); + currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'bad', detail: '' }); + await currentToolSession.gitService.commitFiles( + ['semantic-layer/c1/bad.yaml'], + 'test: add bad source', + 'KLO Test', + 'system@klo.local', + ); + } + return { stopReason: 'natural' }; + }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockImplementation(async ({ worktreeRoot }: any) => { + const rawDir = join(worktreeRoot, 'raw-sources', 'c1', 'fake', 's'); + await mkdir(rawDir, { recursive: true }); + await writeFile(join(rawDir, 'good.raw'), 'good raw'); + await writeFile(join(rawDir, 'bad.raw'), 'bad raw'); + return { + currentHashes: new Map([ + ['good.raw', 'good-hash'], + ['bad.raw', 'bad-hash'], + ]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }; + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + const result = await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(result.failedWorkUnits).toEqual(['wu-bad']); + expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'good.yaml'), 'utf-8')).toContain('good'); + expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'bad.yaml'), 'utf-8').catch(() => null)).toBeNull(); + expect(deps.reportsRepo.create).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + failedWorkUnits: ['wu-bad'], + }), + }), + ); + await expect(stat(join(configDir, '.git', 'sequencer'))).rejects.toThrow(); + } finally { + await rm(homeDir, { recursive: true, force: true }); + } + }); + + it('fails the run and rethrows when the adapter cannot detect the bundle', async () => { + const deps = makeDeps(); + deps.adapter.detect.mockResolvedValue(false); + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await expect( + runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }), + ).rejects.toThrow(/did not recognize/); + expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1'); + }); +}); diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts new file mode 100644 index 00000000..f25a1782 --- /dev/null +++ b/packages/context/src/ingest/ingest-bundle.runner.ts @@ -0,0 +1,1381 @@ +import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { type Tool, tool } from 'ai'; +import pLimit from 'p-limit'; +import { z } from 'zod'; +import { type KloLogger, noopLogger } from '../core/index.js'; +import type { CaptureSession, MemoryAction } from '../memory/index.js'; +import type { SlValidationDeps } from '../sl/index.js'; +import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js'; +import { actionTargetConnectionId } from './action-identity.js'; +import { selectRelevantCanonicalPins } from './canonical-pins.js'; +import { sanitizeMemoryFlowError } from './memory-flow/live-buffer.js'; +import type { MemoryFlowPlannedWorkUnit } from './memory-flow/types.js'; +import type { ContextEvidenceIndexSummary, IngestBundleRunnerDeps, PageTriageRunResult } from './ports.js'; +import { buildSyncId, rawSourcesDirForSync } from './raw-sources-paths.js'; +import { + buildStageIndexFromReportBody, + type IngestReportPostProcessorOutcome, + type IngestReportSnapshot, +} from './reports.js'; +import { + buildReconcileSystemPrompt, + buildReconcileToolSet, + buildReconcileUserPrompt, +} from './stages/build-reconcile-context.js'; +import { buildWuSystemPrompt, buildWuToolSet, buildWuUserPrompt } from './stages/build-wu-context.js'; +import { stageRawFilesStage1 } from './stages/stage-1-stage-raw-files.js'; +import { executeWorkUnit, type WorkUnitOutcome } from './stages/stage-3-work-units.js'; +import { runReconciliationStage4 } from './stages/stage-4-reconciliation.js'; +import type { StageIndex } from './stages/stage-index.types.js'; +import { validateWuTouchedSources } from './stages/validate-wu-sources.js'; +import { createEmitArtifactResolutionTool } from './tools/emit-artifact-resolution.tool.js'; +import { createEmitConflictResolutionTool } from './tools/emit-conflict-resolution.tool.js'; +import { createEmitEvictionDecisionTool } from './tools/emit-eviction-decision.tool.js'; +import { createEmitUnmappedFallbackTool } from './tools/emit-unmapped-fallback.tool.js'; +import { createEvictionListTool } from './tools/eviction-list.tool.js'; +import { createReadRawSpanTool } from './tools/read-raw-span.tool.js'; +import { createStageDiffTool } from './tools/stage-diff.tool.js'; +import { createStageListTool } from './tools/stage-list.tool.js'; +import { type ToolCallLogEntry, wrapToolsWithLogger } from './tools/tool-call-logger.js'; +import type { + EvictionUnit, + IngestBundleJob, + IngestBundleResult, + IngestJobContext, + UnresolvedCardInfo, + WorkUnit, +} from './types.js'; + +interface MutableToolTranscriptSummary { + unitKey: string; + path: string; + toolCallCount: number; + errorCount: number; + toolNames: Set; +} + +function workUnitToMemoryFlowPlannedWorkUnit(workUnit: WorkUnit): MemoryFlowPlannedWorkUnit { + return { + unitKey: workUnit.unitKey, + rawFiles: workUnit.rawFiles, + peerFileCount: workUnit.peerFileIndex.length, + dependencyCount: workUnit.dependencyPaths.length, + }; +} + +function stageIndexWorkUnitToMemoryFlowPlannedWorkUnit( + workUnit: StageIndex['workUnits'][number], +): MemoryFlowPlannedWorkUnit { + return { + unitKey: workUnit.unitKey, + rawFiles: workUnit.rawFiles, + peerFileCount: 0, + dependencyCount: 0, + }; +} + +function countMemoryFlowActions(actions: MemoryAction[], target: MemoryAction['target']): number { + return actions.filter((action) => action.target === target).length; +} + +function reportIdFromCreateResult(result: unknown): string | undefined { + if (!result || typeof result !== 'object' || !('id' in result)) { + return undefined; + } + const id = (result as { id?: unknown }).id; + return typeof id === 'string' && id.length > 0 ? id : undefined; +} + +export class IngestBundleRunner { + private readonly logger: KloLogger; + private readonly chainByConnection = new Map>(); + + constructor(private readonly deps: IngestBundleRunnerDeps) { + this.logger = deps.logger ?? noopLogger; + } + + async run(job: IngestBundleJob, ctx?: IngestJobContext): Promise { + const key = job.connectionId; + const previous = this.chainByConnection.get(key); + if (previous) { + this.logger.log(`[ingest-bundle] queued behind previous job for connection=${key}`); + } + const run = (previous ?? Promise.resolve()).catch(() => undefined).then(() => this.runInner(job, ctx)); + const chainSlot = run.finally(() => { + if (this.chainByConnection.get(key) === chainSlot) { + this.chainByConnection.delete(key); + } + }); + // Keep the chain alive but silence unhandled rejection — callers await `run` directly. + chainSlot.catch(() => undefined); + this.chainByConnection.set(key, chainSlot); + try { + const result = await run; + ctx?.memoryFlow?.finish('done'); + return { ...result, jobId: job.jobId }; + } catch (error) { + ctx?.memoryFlow?.finish('error', [sanitizeMemoryFlowError(error)]); + throw error; + } + } + + protected stageRawFilesStage1 = stageRawFilesStage1; + + private async syncKnowledgeSlRefsFromActions(connectionId: string, actions: MemoryAction[]): Promise { + if (!this.deps.knowledgeSlRefs) { + return; + } + + const slTargetsBySourceName = new Map>(); + const wikiActionsByKey = new Map(); + for (const action of actions) { + if (action.target === 'sl') { + const bucket = slTargetsBySourceName.get(action.key) ?? new Set(); + bucket.add(actionTargetConnectionId(action, connectionId)); + slTargetsBySourceName.set(action.key, bucket); + } + if (action.target === 'wiki') { + wikiActionsByKey.set(action.key, action); + } + } + + for (const action of wikiActionsByKey.values()) { + if (action.type === 'removed') { + await this.deps.knowledgeSlRefs.syncFromWiki({ + wikiPageKey: action.key, + wikiScope: 'GLOBAL', + wikiScopeId: null, + refs: [], + }); + continue; + } + + const page = await this.deps.wikiService.readPage('GLOBAL', null, action.key); + const bareSources = [ + ...new Set( + (page?.frontmatter.sl_refs ?? []) + .map((ref) => ref.split('.')[0]) + .filter((sourceName): sourceName is string => sourceName.length > 0), + ), + ]; + const refs = bareSources.flatMap((sourceName) => { + const targets = slTargetsBySourceName.get(sourceName); + if (!targets || targets.size === 0) { + return [{ connectionId, sourceName }]; + } + return [...targets].sort().map((targetConnectionId) => ({ connectionId: targetConnectionId, sourceName })); + }); + + await this.deps.knowledgeSlRefs.syncFromWiki({ + wikiPageKey: action.key, + wikiScope: 'GLOBAL', + wikiScopeId: null, + refs, + }); + } + } + + protected async materializeOverrideSnapshot( + report: IngestReportSnapshot, + ctx: { connectionId: string; sourceKey: string; jobId: string }, + ): Promise { + const rawRoot = rawSourcesDirForSync(ctx.connectionId, ctx.sourceKey, report.body.syncId); + const files = await this.deps.gitService.listFilesAtHead(rawRoot); + if (files.length === 0) { + throw new Error(`override ingest: no raw-source files found for prior sync ${report.body.syncId}`); + } + + const stagedDir = this.deps.storage.resolvePullDir(ctx.jobId); + await rm(stagedDir, { recursive: true, force: true }); + await mkdir(stagedDir, { recursive: true }); + + for (const file of files) { + const relativePath = file.startsWith(`${rawRoot}/`) ? file.slice(rawRoot.length + 1) : file; + const absoluteTarget = join(stagedDir, relativePath); + await mkdir(dirname(absoluteTarget), { recursive: true }); + await writeFile(absoluteTarget, await this.deps.gitService.getFileAtCommit(file, 'HEAD'), 'utf-8'); + } + + return stagedDir; + } + + protected async loadOverrideReport(job: IngestBundleJob): Promise { + if (job.bundleRef.kind !== 'override') { + return null; + } + const report = await this.deps.reports.findByJobId(job.bundleRef.priorJobId); + if (!report) { + throw new Error(`override ingest: prior report ${job.bundleRef.priorJobId} not found`); + } + if (report.connectionId !== job.connectionId || report.sourceKey !== job.sourceKey) { + throw new Error( + `override ingest: prior report ${job.bundleRef.priorJobId} belongs to ${report.connectionId}/${report.sourceKey}, not ${job.connectionId}/${job.sourceKey}`, + ); + } + return report; + } + + protected async resolveStagedDir( + ref: IngestBundleJob['bundleRef'], + ctx: { connectionId: string; sourceKey: string; jobId: string }, + ): Promise { + if (ref.kind === 'upload') { + return this.deps.storage.resolveUploadDir(ref.uploadId); + } + if (ref.kind === 'override') { + throw new Error('override bundle refs must be materialized from the prior report snapshot'); + } + const stagedDir = this.deps.storage.resolvePullDir(ctx.jobId); + await mkdir(stagedDir, { recursive: true }); + const adapter = this.deps.registry.get(ctx.sourceKey); + if (!adapter.fetch) { + throw new Error(`source adapter '${ctx.sourceKey}' does not support scheduled_pull (no fetch() method)`); + } + await adapter.fetch(ref.config, stagedDir, { connectionId: ctx.connectionId, sourceKey: ctx.sourceKey }); + return stagedDir; + } + + protected buildCommitMessage( + job: IngestBundleJob, + syncId: string, + diffSummary: { added: number; modified: number; deleted: number; unchanged: number }, + failedWUs: string[], + ): string { + const diff = `+${diffSummary.added}/~${diffSummary.modified}/-${diffSummary.deleted}/=${diffSummary.unchanged}`; + const failed = failedWUs.length > 0 ? `; failed WUs: ${failedWUs.join(', ')}` : ''; + return `ingest(${job.sourceKey}): ${job.jobId} syncId=${syncId} diff=${diff}${failed}`; + } + + private async buildWikiIndex(): Promise { + const pages = await this.deps.knowledgeIndex?.listPagesForUser('system'); + if (!pages || pages.length === 0) { + return '(empty)'; + } + + return `## Knowledge Pages\n${pages.map((page) => `- ${page.page_key}: ${page.summary}`).join('\n')}`; + } + + private async buildSlIndex(connectionIds: string[]): Promise { + const blocks = await Promise.all( + connectionIds.map(async (connectionId) => { + try { + const files = await this.deps.semanticLayerService.listFilesForConnection(connectionId); + const names = files.filter((f) => !f.startsWith('_schema/')).map((f) => f.replace(/\.yaml$/, '')); + const body = names.length > 0 ? names.join('\n') : '(no sources yet)'; + return `## ${connectionId}\n${body}`; + } catch { + return `## ${connectionId}\n(empty)`; + } + }), + ); + return blocks.join('\n\n'); + } + + private resolveContextCuratorBudget( + bundleRef: IngestBundleJob['bundleRef'], + stageIndex: StageIndex, + ): { creates: number; updates: number } { + const rawConfig = + bundleRef.kind === 'scheduled_pull' && bundleRef.config && typeof bundleRef.config === 'object' + ? (bundleRef.config as Record) + : {}; + const configuredCreates = + typeof rawConfig.maxKnowledgeCreatesPerRun === 'number' ? rawConfig.maxKnowledgeCreatesPerRun : 5; + const configuredUpdates = + typeof rawConfig.maxKnowledgeUpdatesPerRun === 'number' ? rawConfig.maxKnowledgeUpdatesPerRun : 20; + const wikiActions = stageIndex.workUnits.flatMap((wu) => wu.actions).filter((action) => action.target === 'wiki'); + const usedCreates = wikiActions.filter((action) => action.type === 'created').length; + const usedUpdates = wikiActions.filter((action) => action.type === 'updated').length; + + return { + creates: Math.max(0, configuredCreates - usedCreates), + updates: Math.max(0, configuredUpdates - usedUpdates), + }; + } + + private buildFailedWorkUnitOutcome(wu: WorkUnit, error: unknown): WorkUnitOutcome { + return { + unitKey: wu.unitKey, + status: 'failed', + reason: error instanceof Error ? error.message : String(error), + preSha: '', + postSha: '', + actions: [], + touchedSlSources: [], + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + }; + } + + private formatWorkUnitFailure(outcome: WorkUnitOutcome): string { + return `WorkUnit ${outcome.unitKey} failed: ${outcome.reason ?? 'unknown failure'}`; + } + + private filterWorkUnitsForTriage( + workUnits: WorkUnit[], + triageResult: { enabled: boolean; fullRawPaths: Set } | null, + ): WorkUnit[] { + if (!triageResult?.enabled) { + return workUnits; + } + return workUnits.filter((wu) => wu.rawFiles.some((rawPath) => triageResult.fullRawPaths.has(rawPath))); + } + + protected async runInner(job: IngestBundleJob, ctx?: IngestJobContext): Promise> { + const syncId = buildSyncId(new Date(), job.jobId); + const memoryFlow = ctx?.memoryFlow; + const baseSha = await this.deps.lockingService.withLock('config:repo', () => this.deps.gitService.revParseHead()); + if (!baseSha) { + throw new Error('ingest-bundle: config repo has no HEAD'); + } + const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId); + const transcriptSummaries = new Map(); + const recordTranscriptEntry = + (path: string) => + (entry: ToolCallLogEntry): void => { + const current = + transcriptSummaries.get(entry.wuKey) ?? + ({ + unitKey: entry.wuKey, + path, + toolCallCount: 0, + errorCount: 0, + toolNames: new Set(), + } satisfies MutableToolTranscriptSummary); + current.toolCallCount += 1; + current.errorCount += entry.error ? 1 : 0; + current.toolNames.add(entry.toolName); + transcriptSummaries.set(entry.wuKey, current); + }; + const overrideReport = await this.loadOverrideReport(job); + + const stage1 = ctx?.startPhase(0.08); + await stage1?.updateProgress(0.0, 'Fetching source files'); + + const adapter = this.deps.registry.get(job.sourceKey); + const stagedDir = overrideReport + ? await this.materializeOverrideSnapshot(overrideReport, { + connectionId: job.connectionId, + sourceKey: job.sourceKey, + jobId: job.jobId, + }) + : await this.resolveStagedDir(job.bundleRef, { + connectionId: job.connectionId, + sourceKey: job.sourceKey, + jobId: job.jobId, + }); + const fetchReport = adapter.readFetchReport ? await adapter.readFetchReport(stagedDir) : null; + + const scopeDescriptor = adapter.describeScope ? await adapter.describeScope(stagedDir) : null; + + const sessionWorktree = await this.deps.lockingService.withLock('config:repo', () => + this.deps.sessionWorktreeService.create(job.jobId, baseSha), + ); + let cleanupOutcome: 'success' | 'crash' = 'crash'; + + try { + const { currentHashes, rawDirInWorktree } = await this.stageRawFilesStage1({ + stagedDir, + worktreeRoot: sessionWorktree.workdir, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + }); + memoryFlow?.update({ + connectionId: job.connectionId, + adapter: job.sourceKey, + sourceDir: stagedDir, + syncId, + }); + memoryFlow?.emit({ + type: 'source_acquired', + adapter: job.sourceKey, + trigger: job.trigger, + fileCount: currentHashes.size, + }); + memoryFlow?.emit({ type: 'scope_detected', fingerprint: scopeDescriptor?.fingerprint ?? null }); + memoryFlow?.emit({ type: 'raw_snapshot_written', syncId, rawFileCount: currentHashes.size }); + + await sessionWorktree.git.commitFiles( + [rawDirInWorktree], + `ingest(${job.sourceKey}): stage raw files syncId=${syncId}`, + this.deps.storage.systemGitAuthor.name, + this.deps.storage.systemGitAuthor.email, + ); + + await stage1?.updateProgress(0.5, 'Checking what changed'); + + const diffSet = await this.deps.diffSetService.compute( + job.connectionId, + job.sourceKey, + currentHashes, + scopeDescriptor ? scopeDescriptor.isPathInScope.bind(scopeDescriptor) : undefined, + ); + const diffSummary = { + added: diffSet.added.length, + modified: diffSet.modified.length, + deleted: diffSet.deleted.length, + unchanged: diffSet.unchanged.length, + }; + memoryFlow?.emit({ type: 'diff_computed', ...diffSummary }); + + const runRow = await this.deps.runs.create({ + jobId: job.jobId, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + trigger: job.trigger, + scopeFingerprint: scopeDescriptor?.fingerprint ?? null, + }); + memoryFlow?.update({ runId: runRow.id }); + const ingestToolMetadata = { + runId: runRow.id, + jobId: job.jobId, + syncId, + sourceKey: job.sourceKey, + }; + + await stage1?.updateProgress( + 1.0, + `${diffSet.added.length} new, ${diffSet.modified.length} changed, ${diffSet.deleted.length} removed`, + ); + + const detected = await adapter.detect(stagedDir); + if (!detected) { + await this.deps.runs.markFailed(runRow.id); + throw new Error(`source adapter '${job.sourceKey}' did not recognize staged dir`); + } + + let contextReport: ContextEvidenceIndexSummary | null = null; + if (adapter.evidenceIndexing === 'documents' && this.deps.contextEvidenceIndex) { + contextReport = await this.deps.contextEvidenceIndex.indexStagedDir({ + stagedDir, + runId: runRow.id, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + diffSet, + currentHashes, + }); + } + + const stage2 = ctx?.startPhase(0.04); + await stage2?.updateProgress(0.0, 'Planning updates'); + let workUnits: WorkUnit[] = []; + let eviction: EvictionUnit | undefined; + let unresolvedCards: UnresolvedCardInfo[] | undefined; + let sourceContextReport: { capped?: boolean; warnings?: string[] } | undefined; + let parseArtifacts: unknown; + let postProcessorOutcome: IngestReportPostProcessorOutcome | undefined; + let reconcileNotes: string[] = []; + let triageResult: PageTriageRunResult | null = null; + if (overrideReport) { + eviction = + overrideReport.body.evictionInputs.length > 0 + ? { deletedRawPaths: overrideReport.body.evictionInputs } + : undefined; + unresolvedCards = overrideReport.body.unresolvedCards; + await stage2?.updateProgress(1.0, `Loaded prior report ${overrideReport.jobId} for override reconciliation`); + } else { + const chunk = await adapter.chunk(stagedDir, diffSet); + workUnits = chunk.workUnits; + eviction = chunk.eviction; + unresolvedCards = chunk.unresolvedCards; + sourceContextReport = chunk.contextReport; + parseArtifacts = chunk.parseArtifacts; + reconcileNotes = chunk.reconcileNotes ?? []; + triageResult = + contextReport && adapter.triageSupported && this.deps.pageTriage + ? await this.deps.pageTriage.triageRun({ + stagedDir, + runId: runRow.id, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + jobId: job.jobId, + diffSet, + adapter, + }) + : null; + workUnits = this.filterWorkUnitsForTriage(workUnits, triageResult); + if (adapter.clusterWorkUnits && workUnits.length > 0) { + workUnits = await adapter.clusterWorkUnits({ + workUnits, + stagedDir, + embedding: this.deps.embedding, + }); + } + await stage2?.updateProgress(1.0, `Planned ${workUnits.length} update${workUnits.length === 1 ? '' : 's'}`); + } + + const targetConnectionIds = new Set([job.connectionId]); + if (!overrideReport && adapter.listTargetConnectionIds) { + for (const connectionId of await adapter.listTargetConnectionIds(stagedDir)) { + targetConnectionIds.add(connectionId); + } + } + if (overrideReport) { + for (const wu of overrideReport.body.workUnits) { + for (const action of wu.actions) { + if (action.target === 'sl' && action.targetConnectionId) { + targetConnectionIds.add(action.targetConnectionId); + } + } + for (const touched of wu.touchedSlSources) { + targetConnectionIds.add(touched.connectionId); + } + } + } + const slConnectionIds = [...targetConnectionIds].sort(); + + // Build shared per-job context. + const [wikiIndex, slIndex] = await Promise.all([this.buildWikiIndex(), this.buildSlIndex(slConnectionIds)]); + + const baseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_work_unit'); + const wuSkillNames = Array.from( + new Set([...adapter.skillNames, 'ingest_triage', 'sl_capture', 'knowledge_capture']), + ); + const wuSkills = await this.deps.skillsRegistry.listSkills(wuSkillNames, 'memory_agent'); + const skillsPrompt = this.deps.skillsRegistry.buildSkillsPrompt(wuSkills, 'memory_agent'); + const canonicalPins = await this.deps.canonicalPins.listPins(slConnectionIds); + + const workUnitOutcomes: WorkUnitOutcome[] = []; + const failedWorkUnits: string[] = []; + const stageIndex: StageIndex = overrideReport + ? buildStageIndexFromReportBody(job.jobId, overrideReport.connectionId, overrideReport.body) + : { + jobId: job.jobId, + connectionId: job.connectionId, + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + artifactResolutions: [], + }; + const memoryFlowPlannedWorkUnits = overrideReport + ? stageIndex.workUnits.map(stageIndexWorkUnitToMemoryFlowPlannedWorkUnit) + : workUnits.map(workUnitToMemoryFlowPlannedWorkUnit); + memoryFlow?.update({ plannedWorkUnits: memoryFlowPlannedWorkUnits }); + memoryFlow?.emit({ + type: 'chunks_planned', + chunkCount: memoryFlowPlannedWorkUnits.length, + workUnitCount: memoryFlowPlannedWorkUnits.length, + evictionCount: eviction?.deletedRawPaths.length ?? 0, + }); + + const stage3 = ctx?.startPhase(0.6); + await stage3?.updateProgress(0.0, `Processing ${workUnits.length} update${workUnits.length === 1 ? '' : 's'}`); + this.logger.log(`[ingest-bundle] job=${job.jobId} tool-call transcripts: ${transcriptDir}/`); + + if (!overrideReport) { + const workUnitSettings = { + maxConcurrency: this.deps.settings.workUnitMaxConcurrency ?? 1, + stepBudget: this.deps.settings.workUnitStepBudget ?? 40, + failureMode: this.deps.settings.workUnitFailureMode ?? 'continue', + }; + const limitWorkUnit = pLimit(workUnitSettings.maxConcurrency); + const workUnitOutcomesByIndex: WorkUnitOutcome[] = []; + let completedWorkUnits = 0; + let abortRequested = false; + + const runSingleWorkUnit = async (wu: WorkUnit): Promise => { + const session: CaptureSession = { + userId: 'system', + chatId: wu.unitKey, + userMessage: `ingest(${job.sourceKey}) WU=${wu.unitKey}`, + connectionId: job.connectionId, + userScopedEnabled: false, + forceGlobalScope: true, + touchedSlSources: createTouchedSlSources(), + preHead: sessionWorktree.baseSha, + }; + const sessionActions: MemoryAction[] = []; + + const scopedWikiService = this.deps.wikiService.forWorktree(sessionWorktree.workdir); + const scopedSemanticLayerService = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir); + + const toolSession: ToolSession = { + connectionId: job.connectionId, + isWorktreeScoped: true, + preHead: sessionWorktree.baseSha, + touchedSlSources: session.touchedSlSources, + actions: sessionActions, + semanticLayerService: scopedSemanticLayerService, + wikiService: scopedWikiService, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + ingest: ingestToolMetadata, + }; + + const slValidationDeps: SlValidationDeps = { + semanticLayerService: scopedSemanticLayerService, + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + }; + + const wuToolset = this.deps.toolsetFactory.createIngestWuToolset(toolSession, { + includeContextEvidenceTools: adapter.evidenceIndexing === 'documents' && !!contextReport, + }); + const wuToolContext: ToolContext = { + sourceId: 'ingest', + messageId: `${job.jobId}-wu-${wu.unitKey}`, + userId: 'system', + connectionId: job.connectionId, + ingest: ingestToolMetadata, + session: toolSession, + }; + + const skillsLoadedPerWu: string[] = []; + const loadSkillTool: Record = { + load_skill: tool({ + description: + 'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.', + inputSchema: z.object({ name: z.string() }), + execute: async ({ name }) => { + const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent'); + if (!skill) { + const available = + (await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || + '(none)'; + return `Skill "${name}" not available. Available: ${available}`; + } + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + if (!skillsLoadedPerWu.includes(skill.name)) { + skillsLoadedPerWu.push(skill.name); + } + return { + name: skill.name, + skillDirectory: skill.path, + content: this.deps.skillsRegistry.stripFrontmatter(body), + }; + }, + }), + }; + + const priorProvenance = await this.deps.provenance.findLatestArtifactsForRawPaths( + job.connectionId, + job.sourceKey, + wu.rawFiles, + ); + const wuEmitUnmappedFallbackTool = { + emit_unmapped_fallback: createEmitUnmappedFallbackTool({ + stageIndex, + allowedPaths: new Set(wu.rawFiles), + }), + }; + + const systemPrompt = buildWuSystemPrompt({ + baseFraming, + skillsPrompt, + syncId, + sourceKey: job.sourceKey, + canonicalPins, + }); + + memoryFlow?.emit({ + type: 'work_unit_started', + unitKey: wu.unitKey, + skills: wuSkillNames, + stepBudget: workUnitSettings.stepBudget, + }); + return executeWorkUnit( + { + sessionWorktreeGit: sessionWorktree.git, + agentRunner: this.deps.agentRunner, + validateTouchedSources: (touched) => + validateWuTouchedSources({ ...slValidationDeps, slValidator: this.deps.slValidator }, touched), + resetHardTo: (targetSha) => sessionWorktree.git.resetHardTo(targetSha), + buildSystemPrompt: () => systemPrompt, + buildUserPrompt: (wuInner) => buildWuUserPrompt({ wu: wuInner, wikiIndex, slIndex, priorProvenance }), + buildToolSet: (wuInner) => + wrapToolsWithLogger( + buildWuToolSet({ + sourceKey: job.sourceKey, + stagedDir, + wu: wuInner, + loadSkillTool, + emitUnmappedFallbackTool: wuEmitUnmappedFallbackTool, + toolsetTools: wuToolset.toAiSdkTools(wuToolContext), + }), + join(transcriptDir, `${wuInner.unitKey}.jsonl`), + wuInner.unitKey, + { onEntry: recordTranscriptEntry(join(transcriptDir, `${wuInner.unitKey}.jsonl`)) }, + ), + captureSession: session, + sessionActions, + modelRole: 'candidateExtraction', + stepBudget: workUnitSettings.stepBudget, + sourceKey: job.sourceKey, + connectionId: job.connectionId, + jobId: job.jobId, + onStepFinish: ({ stepIndex, stepBudget }) => { + memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget }); + }, + }, + wu, + ); + }; + + if (workUnits.length === 0) { + await stage3?.updateProgress(1.0, '0 of 0 work units complete'); + } + + try { + await Promise.all( + workUnits.map((wu, index) => + limitWorkUnit(async () => { + if (abortRequested) { + return; + } + + let outcome: WorkUnitOutcome; + try { + outcome = await runSingleWorkUnit(wu); + } catch (error) { + outcome = this.buildFailedWorkUnitOutcome(wu, error); + } + + workUnitOutcomesByIndex[index] = outcome; + for (const action of outcome.actions) { + memoryFlow?.emit({ + type: 'candidate_action', + unitKey: outcome.unitKey, + target: action.target, + action: action.type, + key: action.key, + }); + } + memoryFlow?.emit({ + type: 'work_unit_finished', + unitKey: outcome.unitKey, + status: outcome.status, + ...(outcome.reason ? { reason: outcome.reason } : {}), + }); + completedWorkUnits += 1; + await stage3?.updateProgress( + completedWorkUnits / workUnits.length, + `${completedWorkUnits} of ${workUnits.length} work units complete`, + ); + + if (outcome.status === 'failed') { + this.logger.warn(`[ingest-bundle] WU=${outcome.unitKey} failed: ${outcome.reason}`); + if (workUnitSettings.failureMode === 'abort') { + abortRequested = true; + throw new Error(this.formatWorkUnitFailure(outcome)); + } + } + }), + ), + ); + } catch (error) { + await this.deps.runs.markFailed(runRow.id); + throw error; + } + + workUnitOutcomes.push( + ...workUnitOutcomesByIndex.filter((outcome): outcome is WorkUnitOutcome => Boolean(outcome)), + ); + failedWorkUnits.push( + ...workUnitOutcomes.filter((outcome) => outcome.status === 'failed').map((outcome) => outcome.unitKey), + ); + + // Complete the typed Stage Index from the outcomes once, and use it for + // Stage 4, provenance writes (Phase G), and the report body (Phase F3). + stageIndex.workUnits = workUnitOutcomes.map((o) => ({ + unitKey: o.unitKey, + rawFiles: workUnits.find((w) => w.unitKey === o.unitKey)?.rawFiles ?? [], + status: o.status, + reason: o.reason, + actions: o.actions, + touchedSlSources: o.touchedSlSources, + slDisallowed: o.slDisallowed, + slDisallowedReason: o.slDisallowedReason, + })); + } + const carryForwardResult = + contextReport && this.deps.contextCandidateCarryforward + ? await this.deps.contextCandidateCarryforward.carryForward({ + runId: runRow.id, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + }) + : null; + const dedupResult = + contextReport && this.deps.candidateDedup ? await this.deps.candidateDedup.deduplicateRun(runRow.id) : null; + + // Stage 4 — reconciliation. Shares scoped wiki/SL with a fresh CaptureSession + // so reconciliation writes land in the same worktree Stage 3 used. + const reconcileSession: CaptureSession = { + userId: 'system', + chatId: `${job.jobId}-reconcile`, + userMessage: `reconcile(${job.sourceKey})`, + connectionId: job.connectionId, + userScopedEnabled: false, + forceGlobalScope: true, + touchedSlSources: createTouchedSlSources(), + preHead: await sessionWorktree.git.revParseHead(), + }; + const reconcileActions: MemoryAction[] = []; + const rcScopedWiki = this.deps.wikiService.forWorktree(sessionWorktree.workdir); + const rcScopedSl = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir); + + const rcToolSession: ToolSession = { + connectionId: job.connectionId, + isWorktreeScoped: true, + preHead: reconcileSession.preHead, + touchedSlSources: reconcileSession.touchedSlSources, + actions: reconcileActions, + semanticLayerService: rcScopedSl, + wikiService: rcScopedWiki, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + ingest: ingestToolMetadata, + evictionDecisions: [], + }; + + const rcToolset = this.deps.toolsetFactory.createIngestWuToolset(rcToolSession, { + includeContextEvidenceTools: adapter.evidenceIndexing === 'documents' && !!contextReport, + }); + const rcToolContext: ToolContext = { + sourceId: 'ingest', + messageId: `${job.jobId}-reconcile`, + userId: 'system', + connectionId: job.connectionId, + ingest: ingestToolMetadata, + session: rcToolSession, + }; + const rcLoadSkill: Record = { + load_skill: tool({ + description: 'Load a skill.', + inputSchema: z.object({ name: z.string() }), + execute: async ({ name }) => { + const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent'); + if (!skill) { + return `Skill "${name}" not found`; + } + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + return { name: skill.name, content: this.deps.skillsRegistry.stripFrontmatter(body) }; + }, + }), + }; + const allStagedPaths = new Set([...currentHashes.keys()]); + const rcRawSpanTool = { read_raw_span: createReadRawSpanTool({ stagedDir, allowedPaths: allStagedPaths }) }; + const rcStageListTool = { stage_list: createStageListTool({ stageIndex }) }; + const rcStageDiffTool = { stage_diff: createStageDiffTool({ stageIndex }) }; + const rcEvictionListTool = { + eviction_list: createEvictionListTool({ + provenance: this.deps.provenance, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + deletedRawPaths: eviction?.deletedRawPaths ?? [], + }), + }; + const rcEmitConflictResolutionTool = { + emit_conflict_resolution: createEmitConflictResolutionTool({ stageIndex }), + }; + const rcEmitEvictionDecisionTool = { + emit_eviction_decision: createEmitEvictionDecisionTool({ + stageIndex, + deletedRawPaths: eviction?.deletedRawPaths ?? [], + }), + }; + const rcEmitArtifactResolutionTool = { + emit_artifact_resolution: createEmitArtifactResolutionTool({ + stageIndex, + allowedPaths: allStagedPaths, + }), + }; + const rcEmitUnmappedFallbackTool = { + emit_unmapped_fallback: createEmitUnmappedFallbackTool({ + stageIndex, + allowedPaths: allStagedPaths, + }), + }; + + const reconcileBaseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_reconcile'); + const reconcileSkills = await this.deps.skillsRegistry.listSkills( + Array.from( + new Set(['ingest_triage', 'sl_capture', 'knowledge_capture', ...(adapter.reconcileSkillNames ?? [])]), + ), + 'memory_agent', + ); + const reconcileSkillsPrompt = this.deps.skillsRegistry.buildSkillsPrompt(reconcileSkills, 'memory_agent'); + const relevantCanonicalPins = selectRelevantCanonicalPins(stageIndex, canonicalPins); + + const stage4 = ctx?.startPhase(0.16); + const hasCandidateReconcileWork = (dedupResult?.representatives.length ?? 0) > 0; + const hasReconcileWork = + stageIndex.workUnits.some((wu) => wu.actions.length > 0) || + (eviction?.deletedRawPaths.length ?? 0) > 0 || + hasCandidateReconcileWork; + if (hasReconcileWork || overrideReport) { + await stage4?.updateProgress(0.0, 'Reconciling results'); + } + + let curatorReport = null; + let curatorWarnings: string[] = []; + let reconcileOutcome: Awaited>; + + if (contextReport && this.deps.curatorPagination) { + const curatorOutcome = await this.deps.curatorPagination.reconcile({ + runId: runRow.id, + sourceKey: job.sourceKey, + jobId: job.jobId, + stageIndex, + evictionUnit: eviction, + representatives: dedupResult?.representatives ?? [], + initialBudget: this.resolveContextCuratorBudget(job.bundleRef, stageIndex), + modelRole: 'curator', + buildSystemPrompt: () => + buildReconcileSystemPrompt({ + baseFraming: reconcileBaseFraming, + skillsPrompt: reconcileSkillsPrompt, + syncId, + sourceKey: job.sourceKey, + canonicalPins: relevantCanonicalPins, + }), + buildUserPrompt: ({ summary, items, runState }) => + buildReconcileUserPrompt(stageIndex, eviction, { summary, items }, reconcileNotes, runState), + buildToolSet: (_passNumber) => + wrapToolsWithLogger( + buildReconcileToolSet({ + loadSkillTool: rcLoadSkill, + stageListTool: rcStageListTool, + stageDiffTool: rcStageDiffTool, + evictionListTool: rcEvictionListTool, + emitConflictResolutionTool: rcEmitConflictResolutionTool, + emitEvictionDecisionTool: rcEmitEvictionDecisionTool, + emitArtifactResolutionTool: rcEmitArtifactResolutionTool, + emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool, + readRawSpanTool: rcRawSpanTool, + toolsetTools: rcToolset.toAiSdkTools(rcToolContext), + }), + join(transcriptDir, 'reconcile.jsonl'), + 'reconcile', + { onEntry: recordTranscriptEntry(join(transcriptDir, 'reconcile.jsonl')) }, + ), + getReconciliationActions: () => reconcileActions, + onStepFinish: stage4 + ? ({ passNumber, stepIndex, stepBudget }) => { + void stage4.updateProgress( + stepIndex / stepBudget, + `Reconciling results · pass ${passNumber} step ${stepIndex}`, + ); + } + : undefined, + }); + curatorReport = curatorOutcome.report; + curatorWarnings = curatorOutcome.warnings; + reconcileOutcome = { + skipped: curatorOutcome.skipped, + stopReason: curatorOutcome.stopReason, + error: curatorOutcome.error, + }; + } else { + reconcileOutcome = await runReconciliationStage4({ + stageIndex, + evictionUnit: eviction, + agentRunner: this.deps.agentRunner, + buildSystemPrompt: () => + buildReconcileSystemPrompt({ + baseFraming: reconcileBaseFraming, + skillsPrompt: reconcileSkillsPrompt, + syncId, + sourceKey: job.sourceKey, + canonicalPins: relevantCanonicalPins, + }), + buildUserPrompt: (idx, ev) => buildReconcileUserPrompt(idx, ev, undefined, reconcileNotes), + buildToolSet: () => + wrapToolsWithLogger( + buildReconcileToolSet({ + loadSkillTool: rcLoadSkill, + stageListTool: rcStageListTool, + stageDiffTool: rcStageDiffTool, + evictionListTool: rcEvictionListTool, + emitConflictResolutionTool: rcEmitConflictResolutionTool, + emitEvictionDecisionTool: rcEmitEvictionDecisionTool, + emitArtifactResolutionTool: rcEmitArtifactResolutionTool, + emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool, + readRawSpanTool: rcRawSpanTool, + toolsetTools: rcToolset.toAiSdkTools(rcToolContext), + }), + join(transcriptDir, 'reconcile.jsonl'), + 'reconcile', + { onEntry: recordTranscriptEntry(join(transcriptDir, 'reconcile.jsonl')) }, + ), + modelRole: 'reconcile', + stepBudget: 60, + sourceKey: job.sourceKey, + jobId: job.jobId, + force: !!overrideReport, + onStepFinish: stage4 + ? ({ stepIndex, stepBudget }) => { + void stage4.updateProgress(stepIndex / stepBudget, `Reconciling results · step ${stepIndex}`); + } + : undefined, + }); + } + + const candidateSummaryAfterReconcile = + contextReport && this.deps.contextEvidenceCandidates + ? await this.deps.contextEvidenceCandidates.getCandidateSummary(runRow.id) + : null; + memoryFlow?.emit({ + type: 'reconciliation_finished', + conflictCount: stageIndex.conflictsResolved.length, + fallbackCount: stageIndex.unmappedFallbacks.length, + }); + + await stage4?.updateProgress(1.0, reconcileOutcome.skipped ? 'No reconciliation needed' : 'Reconciled'); + + const postProcessor = this.deps.postProcessors?.[job.sourceKey]; + if (postProcessor) { + const stagePostProcessor = ctx?.startPhase(0.04); + await stagePostProcessor?.updateProgress(0.0, 'Running deterministic imports'); + try { + const result = await postProcessor.run({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + jobId: job.jobId, + runId: runRow.id, + workdir: sessionWorktree.workdir, + parseArtifacts, + }); + postProcessorOutcome = { + sourceKey: job.sourceKey, + status: result.errors.length > 0 && result.touchedSources.length === 0 ? 'failed' : 'success', + result: result.result, + errors: result.errors, + warnings: result.warnings, + touchedSources: result.touchedSources, + }; + await stagePostProcessor?.updateProgress(1.0, 'Deterministic imports complete'); + } catch (error) { + postProcessorOutcome = { + sourceKey: job.sourceKey, + status: 'failed', + errors: [error instanceof Error ? error.message : String(error)], + warnings: [], + touchedSources: [], + }; + await this.deps.runs.markFailed(runRow.id); + throw error; + } + } + + // Stage 6 — squash commit + const stage6 = ctx?.startPhase(0.04); + await stage6?.updateProgress(0.0, 'Saving changes'); + try { + await sessionWorktree.git.assertWorktreeClean(); + } catch (error) { + await this.deps.runs.markFailed(runRow.id); + throw error; + } + const commitMessage = this.buildCommitMessage(job, syncId, diffSummary, failedWorkUnits); + const squashResult = await this.deps.lockingService.withLock('config:repo', async () => { + const preSquashSha = await this.deps.gitService.revParseHead(); + const merge = await this.deps.gitService.squashMergeIntoMain( + sessionWorktree.branch, + this.deps.storage.systemGitAuthor.name, + this.deps.storage.systemGitAuthor.email, + commitMessage, + ); + return { preSquashSha, merge }; + }); + const mergeResult = squashResult.merge; + if (!mergeResult.ok) { + await this.deps.runs.markFailed(runRow.id); + throw new Error(`squash merge conflict: ${mergeResult.conflictPaths.join(', ')}`); + } + const commitSha = mergeResult.touchedPaths.length === 0 ? null : mergeResult.squashSha; + const memoryFlowSavedActions = stageIndex.workUnits.flatMap((wu) => wu.actions).concat(reconcileActions); + memoryFlow?.emit({ + type: 'saved', + commitSha, + wikiCount: countMemoryFlowActions(memoryFlowSavedActions, 'wiki'), + slCount: countMemoryFlowActions(memoryFlowSavedActions, 'sl'), + }); + await stage6?.updateProgress(1.0, commitSha ? `Saved changes (${commitSha.slice(0, 8)})` : 'No changes to save'); + + // Sync the shared `knowledge` index from the squashed diff in a single + // transaction. If this throws, the run fails and no partial index state + // survives (thanks to the transactional upsert in applyDiffTransactional). + if (commitSha) { + // Multi-file squash → omit path so the handler diffs the whole commit + // (a comma-joined pathspec would match nothing and the job would no-op). + const pathFilter = mergeResult.touchedPaths.length === 1 ? mergeResult.touchedPaths[0] : ''; + await this.deps.commitMessages.enqueueForExternalCommit({ commitHash: commitSha }, commitMessage, pathFilter); + await this.deps.wikiService.syncFromCommit(squashResult.preSquashSha, commitSha, runRow.id); + await this.syncKnowledgeSlRefsFromActions(job.connectionId, memoryFlowSavedActions); + const touchedConnections = [ + ...new Set( + memoryFlowSavedActions + .filter((action) => action.target === 'sl') + .map((action) => actionTargetConnectionId(action, job.connectionId)) + .concat((postProcessorOutcome?.touchedSources ?? []).map((source) => source.connectionId)), + ), + ].sort(); + for (const connectionId of touchedConnections) { + try { + const allSources = await this.deps.semanticLayerService.loadAllSources(connectionId); + await this.deps.slSearchService.indexSources(connectionId, allSources); + } catch (err) { + this.logger.warn( + `[ingest-bundle] post-squash SL reindex failed for connection=${connectionId}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + } + + const stage5 = ctx?.startPhase(0.04); + await stage5?.updateProgress(0.0, 'Recording history'); + + // Provenance rows: per-artifact when the WU emitted actions, plus a `skipped` + // fallback for raw files that produced nothing so the next DiffSet still sees + // them. + const provenanceRows: Parameters[0] = []; + const actionToType = (a: MemoryAction): 'source_created' | 'measure_added' | 'wiki_written' => { + if (a.target === 'wiki') { + return 'wiki_written'; + } + // SL action: 'created' → source_created; 'updated' → measure_added (coarse-grained; + // action.detail preserves the finer distinction for the report body). + return a.type === 'created' ? 'source_created' : 'measure_added'; + }; + const producedPaths = new Set(); + for (const wu of stageIndex.workUnits) { + for (const rawPath of wu.rawFiles) { + const hash = currentHashes.get(rawPath) ?? 'unknown'; + for (const action of wu.actions) { + provenanceRows.push({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + rawPath, + rawContentHash: hash, + artifactKind: action.target, + artifactKey: action.key, + targetConnectionId: action.target === 'sl' ? (action.targetConnectionId ?? null) : null, + artifactContentHash: null, + actionType: actionToType(action), + }); + producedPaths.add(rawPath); + } + } + } + for (const resolution of stageIndex.artifactResolutions ?? []) { + const hash = currentHashes.get(resolution.rawPath) ?? 'unknown'; + provenanceRows.push({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + rawPath: resolution.rawPath, + rawContentHash: hash, + artifactKind: resolution.artifactKind, + artifactKey: resolution.artifactKey, + targetConnectionId: null, + artifactContentHash: null, + actionType: resolution.actionType, + }); + producedPaths.add(resolution.rawPath); + } + for (const [rawPath, hash] of currentHashes) { + if (producedPaths.has(rawPath)) { + continue; + } + provenanceRows.push({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + rawPath, + rawContentHash: hash, + artifactKind: null, + artifactKey: null, + targetConnectionId: null, + artifactContentHash: null, + actionType: 'skipped', + }); + } + await this.deps.provenance.insertMany(provenanceRows); + memoryFlow?.emit({ type: 'provenance_recorded', rowCount: provenanceRows.length }); + await stage5?.updateProgress( + 1.0, + `Recorded ${provenanceRows.length} history entr${provenanceRows.length === 1 ? 'y' : 'ies'}`, + ); + + const stage7 = ctx?.startPhase(0.04); + await stage7?.updateProgress(0.0, 'Wrapping up'); + + const reportProvenanceRows = provenanceRows.map( + ({ rawPath, artifactKind, artifactKey, actionType, targetConnectionId }) => ({ + rawPath, + artifactKind, + artifactKey, + targetConnectionId: targetConnectionId ?? null, + actionType, + }), + ); + const reportToolTranscripts = Array.from(transcriptSummaries.values()).map((summary) => ({ + unitKey: summary.unitKey, + path: summary.path, + toolCallCount: summary.toolCallCount, + errorCount: summary.errorCount, + toolNames: Array.from(summary.toolNames).sort(), + })); + const capturedMemoryFlow = memoryFlow?.snapshot(); + const reportMemoryFlow = capturedMemoryFlow + ? { + ...capturedMemoryFlow, + metadata: { + schemaVersion: 1 as const, + mode: 'full' as const, + origin: 'captured' as const, + timing: 'captured' as const, + capturedAt: new Date().toISOString(), + sourceReportId: null, + sourceReportPath: null, + fallbackReason: null, + }, + } + : undefined; + + const reportBody = { + syncId, + diffSummary, + fetch: fetchReport ?? undefined, + commitSha, + workUnits: stageIndex.workUnits.map((wu) => ({ + unitKey: wu.unitKey, + rawFiles: wu.rawFiles, + status: wu.status, + reason: wu.reason, + actions: wu.actions, + touchedSlSources: wu.touchedSlSources, + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + })), + failedWorkUnits, + reconciliationSkipped: reconcileOutcome.skipped, + conflictsResolved: stageIndex.conflictsResolved, + evictionsApplied: stageIndex.evictionsApplied, + unmappedFallbacks: stageIndex.unmappedFallbacks, + artifactResolutions: stageIndex.artifactResolutions ?? [], + evictionInputs: eviction?.deletedRawPaths ?? [], + reconciliationActions: reconcileActions, + evictionDecisions: rcToolSession.evictionDecisions ?? [], + unresolvedCards: unresolvedCards ?? [], + supersededBy: null, + overrideOf: overrideReport?.jobId ?? null, + provenanceRows: reportProvenanceRows, + toolTranscripts: reportToolTranscripts, + postProcessor: postProcessorOutcome, + ...(reportMemoryFlow ? { memoryFlow: reportMemoryFlow } : {}), + context: contextReport + ? { + documentsIndexed: contextReport.documentsIndexed, + chunksIndexed: contextReport.chunksIndexed, + documentsDeleted: contextReport.documentsDeleted, + embeddingFailures: contextReport.embeddingFailures, + candidatesCreated: candidateSummaryAfterReconcile?.total ?? 0, + candidatesPromoted: candidateSummaryAfterReconcile?.promoted ?? 0, + candidatesRejected: candidateSummaryAfterReconcile?.rejected ?? 0, + triage: triageResult?.report, + dedup: dedupResult?.enabled + ? { + candidatesIn: dedupResult.candidatesIn, + clustersOut: dedupResult.clustersOut, + mergedCount: dedupResult.mergedCount, + largestClusterSize: dedupResult.largestClusterSize, + embeddingFailures: dedupResult.embeddingFailures, + } + : undefined, + curator: curatorReport ?? undefined, + knowledgeCreates: stageIndex.workUnits + .flatMap((wu) => wu.actions) + .concat(reconcileActions) + .filter((action) => action.target === 'wiki' && action.type === 'created').length, + knowledgeUpdates: stageIndex.workUnits + .flatMap((wu) => wu.actions) + .concat(reconcileActions) + .filter((action) => action.target === 'wiki' && action.type === 'updated').length, + capped: sourceContextReport?.capped ?? false, + warnings: [ + ...new Set([ + ...contextReport.warnings, + ...(sourceContextReport?.warnings ?? []), + ...(triageResult?.warnings ?? []), + ...(carryForwardResult?.warnings ?? []), + ...(dedupResult?.warnings ?? []), + ...curatorWarnings, + ]), + ], + } + : undefined, + }; + const createdReport = await this.deps.reports.create({ + runId: runRow.id, + jobId: job.jobId, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + body: reportBody, + }); + const reportId = reportIdFromCreateResult(createdReport); + memoryFlow?.update({ + ...(reportId ? { reportId, reportPath: reportId } : {}), + }); + memoryFlow?.emit({ + type: 'report_created', + runId: runRow.id, + ...(reportId ? { reportPath: reportId } : {}), + }); + if (overrideReport) { + await this.deps.reports.markSuperseded(overrideReport.jobId, job.jobId); + } + if (contextReport && this.deps.contextEvidenceIndex) { + await this.deps.contextEvidenceIndex.publishSync({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + diffSet, + }); + } + + // Stage 7 — status + await this.deps.runs.markCompleted( + runRow.id, + diffSummary, + fetchReport?.status === 'partial' ? 'partial' : 'completed', + ); + if (job.bundleRef.kind === 'scheduled_pull') { + await adapter.onPullSucceeded?.({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + trigger: job.trigger, + completedAt: new Date(), + stagedDir, + }); + } + await stage7?.updateProgress(1.0, 'Done'); + + cleanupOutcome = 'success'; + return { + runId: runRow.id, + syncId, + diffSummary, + workUnitCount: workUnits.length, + failedWorkUnits, + artifactsWritten: provenanceRows.filter((r) => r.actionType !== 'skipped').length, + commitSha, + }; + } finally { + await this.deps.sessionWorktreeService.cleanup(sessionWorktree, cleanupOutcome); + } + } +} diff --git a/packages/context/src/ingest/ingest-prompts.test.ts b/packages/context/src/ingest/ingest-prompts.test.ts new file mode 100644 index 00000000..7a9eb888 --- /dev/null +++ b/packages/context/src/ingest/ingest-prompts.test.ts @@ -0,0 +1,76 @@ +import { readFile } from 'node:fs/promises'; +import { describe, expect, it } from 'vitest'; + +function forbiddenProductPattern() { + return new RegExp([['Kae', 'lio'].join(''), ['kae', 'lio'].join(''), ['KAE', 'LIO_'].join('')].join('|')); +} + +describe('ingest prompt assets', () => { + it('teaches WorkUnit agents to apply canonical pins before writing contested artifacts', async () => { + const prompt = await readFile( + new URL('../../prompts/memory_agent_bundle_ingest_work_unit.md', import.meta.url), + 'utf-8', + ); + + expect(prompt).toContain(''); + expect(prompt).toContain('canonicalArtifactKey'); + expect(prompt).toContain('prefer editing the pinned canonical artifact'); + expect(prompt).toContain('Do not create a duplicate contested artifact'); + }); + + it('uses product-neutral KLO runtime wording', async () => { + const prompt = await readFile( + new URL('../../prompts/memory_agent_bundle_ingest_work_unit.md', import.meta.url), + 'utf-8', + ); + + expect(prompt).toContain('KLO semantic-layer sources and/or knowledge wiki pages'); + expect(prompt).toContain('maps cleanly to KLO'); + expect(prompt).not.toMatch(forbiddenProductPattern()); + }); + + it('pins historic-SQL triage rules with synthetic signal fixtures', async () => { + const prompt = await readFile(new URL('../../prompts/skills/page_triage_classifier.md', import.meta.url), 'utf-8'); + + expect(prompt).toContain('signals.objectType === "historic_sql_template"'); + expect(prompt).toContain('executions_bucket=low AND distinct_users_bucket=solo'); + expect(prompt).toContain('service_account_only=true AND below the frequency floor'); + expect(prompt).toContain('shared human usage with mid or high execution volume'); + + const fixtures = [ + { + label: 'skip low solo template', + objectType: '"objectType": "historic_sql_template"', + executions: '"executions_bucket": "low"', + users: '"distinct_users_bucket": "solo"', + serviceAccount: '"service_account_only": "false"', + lane: '-> `skip`', + }, + { + label: 'light service-account-only template', + objectType: '"objectType": "historic_sql_template"', + executions: '"executions_bucket": "high"', + users: '"distinct_users_bucket": "solo"', + serviceAccount: '"service_account_only": "true"', + lane: '-> `light`', + }, + { + label: 'full shared human template', + objectType: '"objectType": "historic_sql_template"', + executions: '"executions_bucket": "high"', + users: '"distinct_users_bucket": "team"', + serviceAccount: '"service_account_only": "false"', + lane: '-> `full`', + }, + ]; + + for (const fixture of fixtures) { + expect(prompt).toContain(fixture.label); + expect(prompt).toContain(fixture.objectType); + expect(prompt).toContain(fixture.executions); + expect(prompt).toContain(fixture.users); + expect(prompt).toContain(fixture.serviceAccount); + expect(prompt).toContain(fixture.lane); + } + }); +}); diff --git a/packages/context/src/ingest/ingest-runtime-assets.test.ts b/packages/context/src/ingest/ingest-runtime-assets.test.ts new file mode 100644 index 00000000..8d82db41 --- /dev/null +++ b/packages/context/src/ingest/ingest-runtime-assets.test.ts @@ -0,0 +1,132 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { describe, expect, it } from 'vitest'; +import { PromptService } from '../prompts/index.js'; +import { SkillsRegistryService } from '../skills/index.js'; + +const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url)); +const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url)); + +const adapterSkillNames = [ + 'live_database_ingest', + 'lookml_ingest', + 'metabase_ingest', + 'metricflow_ingest', + 'notion_synthesize', + 'historic_sql_ingest', + 'ingest_triage', + 'knowledge_capture', + 'sl_capture', +] as const; + +const adapterReconcileSkillNames = [ + 'historic_sql_curator', + 'ingest_triage', + 'knowledge_capture', + 'sl_capture', +] as const; + +const pageTriagePromptNames = ['skills/page_triage_classifier', 'skills/light_extraction'] as const; + +function forbiddenProductPattern() { + return new RegExp([['Kae', 'lio'].join(''), ['kae', 'lio'].join(''), ['KAE', 'LIO_'].join('')].join('|')); +} + +describe('ingest runtime assets', () => { + it('resolves every reusable ingest skill from packaged KLO assets without server fallback', async () => { + const registry = new SkillsRegistryService({ skillsDir }); + const expected = [...new Set([...adapterSkillNames, ...adapterReconcileSkillNames])].sort(); + + const skills = await registry.listSkills(expected, 'memory_agent'); + + expect(skills.map((skill) => skill.name).sort()).toEqual(expected); + for (const skill of skills) { + expect(skill.path.startsWith(skillsDir)).toBe(true); + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + expect(body).not.toMatch(forbiddenProductPattern()); + } + }); + + it('loads page-triage and light-extraction prompts from packaged KLO prompt assets', async () => { + const prompts = new PromptService({ promptsDir, partials: [] }); + + for (const promptName of pageTriagePromptNames) { + const prompt = await prompts.loadPrompt(promptName); + expect(prompt.trim().length).toBeGreaterThan(100); + expect(prompt).not.toMatch(forbiddenProductPattern()); + } + + await expect(prompts.loadPrompt('skills/page_triage_classifier')).resolves.toContain('# Page Triage Classifier'); + await expect(prompts.loadPrompt('skills/page_triage_classifier')).resolves.toContain( + 'signals.objectType === "historic_sql_template"', + ); + await expect(prompts.loadPrompt('skills/page_triage_classifier')).resolves.toContain( + 'service_account_only=true AND below the frequency floor', + ); + await expect(prompts.loadPrompt('skills/light_extraction')).resolves.toContain('# Light Context Extraction'); + }); + + it('packages historic-SQL WorkUnit skill guidance from KLO assets', async () => { + const registry = new SkillsRegistryService({ skillsDir }); + const skills = await registry.listSkills(['historic_sql_ingest'], 'memory_agent'); + + expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_ingest']); + + const [skill] = skills; + if (!skill) { + throw new Error('historic_sql_ingest skill missing'); + } + + expect(skill.path.startsWith(skillsDir)).toBe(true); + + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + expect(body).toContain('# Historic SQL Ingest'); + expect(body).toContain('Read exactly one historic-SQL template WorkUnit'); + expect(body).toContain('metadata.json'); + expect(body).toContain('page.md'); + expect(body).toContain('usage.json'); + expect(body).toContain('manifest.json'); + expect(body).toContain('wiki_write'); + expect(body).toContain('key: "queries/"'); + expect(body).toContain('"source": "historic-sql"'); + expect(body).toContain('representative_sql'); + expect(body).toContain('fingerprints'); + expect(body).toContain('usage'); + expect(body).toContain('SL proposal threshold'); + expect(body).toContain('Do not group sibling templates'); + expect(body).toContain('Do not copy sample bound_sql'); + expect(body).not.toContain('store historic-SQL provenance in the markdown body'); + expect(body).not.toMatch(forbiddenProductPattern()); + }); + + it('packages historic-SQL curator reconcile guidance from KLO assets', async () => { + const registry = new SkillsRegistryService({ skillsDir }); + const skills = await registry.listSkills(['historic_sql_curator'], 'memory_agent'); + + expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_curator']); + + const [skill] = skills; + if (!skill) { + throw new Error('historic_sql_curator skill missing'); + } + + expect(skill.path.startsWith(skillsDir)).toBe(true); + + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + expect(body).toContain('# Historic SQL Curator'); + expect(body).toContain('curator pagination'); + expect(body).toContain('stage_list'); + expect(body).toContain('stage_diff'); + expect(body).toContain('read_raw_span'); + expect(body).toContain('wiki_search'); + expect(body).toContain('wiki_read'); + expect(body).toContain('wiki_write'); + expect(body).toContain('emit_artifact_resolution'); + expect(body).toContain('emit_eviction_decision'); + expect(body).toContain('categorical sub-cluster'); + expect(body).toContain('historic-sql-demoted'); + expect(body).toContain('Do not call `context_candidate_write`'); + expect(body).not.toMatch(forbiddenProductPattern()); + }); +}); diff --git a/packages/context/src/ingest/local-adapters.test.ts b/packages/context/src/ingest/local-adapters.test.ts new file mode 100644 index 00000000..5dfca3e7 --- /dev/null +++ b/packages/context/src/ingest/local-adapters.test.ts @@ -0,0 +1,444 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { initKloProject, type KloLocalProject, loadKloProject } from '../project/index.js'; +import type { SqlAnalysisPort } from '../sql-analysis/index.js'; +import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; +import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from './local-adapters.js'; + +describe('local ingest adapters', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-adapters-')); + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + project = await loadKloProject({ projectDir }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + function projectWithConnections(connections: KloLocalProject['config']['connections']): KloLocalProject { + return { + ...project, + config: { + ...project.config, + connections, + }, + }; + } + + it('registers Metabase locally as a staged-bundle adapter', () => { + const adapters = createDefaultLocalIngestAdapters(project); + + expect(adapters.map((adapter) => adapter.source)).toEqual([ + 'fake', + 'live-database', + 'lookml', + 'dbt', + 'metabase', + 'looker', + 'metricflow', + 'notion', + ]); + expect(adapters.find((adapter) => adapter.source === 'metabase')?.fetch).toBeTypeOf('function'); + }); + + it('uses an explicit Looker runtime client seam for local adapter fetch tests', async () => { + const runtimeClient = { + cleanup: vi.fn().mockResolvedValue(undefined), + listDashboards: vi.fn().mockResolvedValue([]), + listLooks: vi.fn().mockResolvedValue([]), + listFolders: vi.fn().mockResolvedValue({ folders: [] }), + listUsers: vi.fn().mockResolvedValue([]), + listGroups: vi.fn().mockResolvedValue([]), + listLookmlModels: vi.fn().mockResolvedValue({ models: [] }), + getDashboard: vi.fn(), + getLook: vi.fn(), + getExplore: vi.fn(), + getSignals: vi.fn().mockResolvedValue({ + dashboardUsage: [], + lookUsage: [], + scheduledPlans: [], + favorites: [], + }), + }; + const adapters = createDefaultLocalIngestAdapters(project, { looker: { runtimeClient } }); + const looker = adapters.find((adapter) => adapter.source === 'looker'); + + expect(looker).toBeDefined(); + expect(looker?.fetch).toBeTypeOf('function'); + }); + + it('returns the explicit Metabase fan-out boundary before runner construction', async () => { + const metabase = createDefaultLocalIngestAdapters(project).find((adapter) => adapter.source === 'metabase'); + + await expect(localPullConfigForAdapter(project, metabase!, 'warehouse')).rejects.toThrow( + 'Metabase scheduled pulls fan out by mapping', + ); + }); + + it('registers historic-sql locally when Postgres historic-SQL deps are provided', () => { + const sqlAnalysis: SqlAnalysisPort = { + async analyzeForFingerprint(sql) { + return { + fingerprint: 'fp', + normalizedSql: sql, + tablesTouched: ['public.orders'], + literalSlots: [], + }; + }, + }; + const adapters = createDefaultLocalIngestAdapters(project, { + historicSql: { + sqlAnalysis, + postgresQueryClient: { + async executeQuery() { + return { headers: [], rows: [] }; + }, + }, + postgresBaselineRootDir: join(project.projectDir, '.klo/cache/historic-sql'), + }, + }); + + expect(adapters.map((adapter) => adapter.source)).toContain('historic-sql'); + expect(adapters.find((adapter) => adapter.source === 'historic-sql')?.fetch).toBeTypeOf('function'); + }); + + it('builds Postgres historic-sql pull config from a local connection', async () => { + const historicSql = createDefaultLocalIngestAdapters(project, { + historicSql: { + sqlAnalysis: { + async analyzeForFingerprint(sql) { + return { + fingerprint: 'fp', + normalizedSql: sql, + tablesTouched: ['public.orders'], + literalSlots: [], + }; + }, + }, + postgresQueryClient: { + async executeQuery() { + return { headers: [], rows: [] }; + }, + }, + }, + }).find((adapter) => adapter.source === 'historic-sql'); + const postgresProject = projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + historicSql: { + enabled: true, + dialect: 'postgres', + minCalls: 7, + maxTemplatesPerRun: 123, + serviceAccountUserPatterns: ['^svc_'], + }, + }, + }); + + await expect(localPullConfigForAdapter(postgresProject, historicSql!, 'warehouse')).resolves.toEqual({ + dialect: 'postgres', + windowDays: 90, + lastSuccessfulCursor: null, + serviceAccountUserPatterns: ['^svc_'], + redactionPatterns: [], + maxTemplatesPerRun: 123, + minCalls: 7, + }); + }); + + it('rejects local historic-sql pulls when the connection has not enabled historic SQL', async () => { + const historicSql = createDefaultLocalIngestAdapters(project, { + historicSql: { + sqlAnalysis: { + async analyzeForFingerprint(sql) { + return { + fingerprint: 'fp', + normalizedSql: sql, + tablesTouched: [], + literalSlots: [], + }; + }, + }, + postgresQueryClient: { + async executeQuery() { + return { headers: [], rows: [] }; + }, + }, + }, + }).find((adapter) => adapter.source === 'historic-sql'); + const postgresProject = projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + }, + }); + + await expect(localPullConfigForAdapter(postgresProject, historicSql!, 'warehouse')).rejects.toThrow( + 'Connection "warehouse" does not have historicSql.enabled: true', + ); + }); + + it('builds Looker pull config from local mapping state', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-local-looker-')); + const lookerProject = { + projectDir, + config: { + connections: { + 'prod-looker': { + driver: 'looker', + base_url: 'https://looker.example.test', + client_id: 'client', + }, + 'prod-warehouse': { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + }, + }, + } as never; + const store = new LocalLookerRuntimeStore({ dbPath: join(projectDir, '.klo/db.sqlite') }); + await store.setCursors('prod-looker', { dashboardsLastSyncedAt: null, looksLastSyncedAt: null }); + await store.upsertConnectionMapping({ + lookerConnectionId: 'prod-looker', + lookerConnectionName: 'analytics', + kloConnectionId: 'prod-warehouse', + source: 'cli', + }); + const lookerDeps = { + looker: { + client: { + listLookmlModels: async () => ({ + source: 'looker', + fetchedAt: '2026-05-05T00:00:00.000Z', + models: [{ name: 'ecommerce', label: null, explores: [{ name: 'orders', label: null }] }], + }), + getExplore: async () => ({ + source: 'looker', + modelName: 'ecommerce', + exploreName: 'orders', + label: null, + description: null, + connectionName: 'analytics', + viewName: null, + rawSqlTableName: 'public.orders', + fields: { dimensions: [], measures: [] }, + joins: [], + targetWarehouseConnectionId: null, + targetTable: null, + }), + }, + parser: { + parse: async () => ({ + 'ecommerce.orders': { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + }), + }, + }, + }; + const adapter = createDefaultLocalIngestAdapters(lookerProject, lookerDeps).find( + (candidate) => candidate.source === 'looker', + ); + + await expect(localPullConfigForAdapter(lookerProject, adapter!, 'prod-looker', lookerDeps)).resolves.toMatchObject({ + lookerConnectionId: 'prod-looker', + connectionMappings: { analytics: 'prod-warehouse' }, + connectionTypes: { analytics: 'POSTGRESQL' }, + parsedTargetTables: { + 'ecommerce.orders': { ok: true, schema: 'public', name: 'orders', canonicalTable: 'public.orders' }, + }, + }); + }); + + it('builds Looker pull config from yaml mapping bootstrap when SQLite is empty', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-local-looker-yaml-')); + const lookerProject = { + projectDir, + config: { + connections: { + 'prod-looker': { + driver: 'looker', + base_url: 'https://looker.example.test', + client_id: 'client', + mappings: { connectionMappings: { analytics: 'prod-warehouse' } }, + }, + 'prod-warehouse': { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + }, + }, + } as never; + const lookerDeps = { + looker: { + client: { + listLookmlModels: async () => ({ + source: 'looker', + fetchedAt: '2026-05-05T00:00:00.000Z', + models: [{ name: 'ecommerce', label: null, explores: [{ name: 'orders', label: null }] }], + }), + getExplore: async () => ({ + source: 'looker', + modelName: 'ecommerce', + exploreName: 'orders', + label: null, + description: null, + connectionName: 'analytics', + viewName: null, + rawSqlTableName: 'public.orders', + fields: { dimensions: [], measures: [] }, + joins: [], + targetWarehouseConnectionId: null, + targetTable: null, + }), + }, + parser: { + parse: async () => ({ + 'ecommerce.orders': { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + }), + }, + }, + }; + const adapter = createDefaultLocalIngestAdapters(lookerProject, lookerDeps).find( + (candidate) => candidate.source === 'looker', + ); + + await expect(localPullConfigForAdapter(lookerProject, adapter!, 'prod-looker', lookerDeps)).resolves.toMatchObject({ + connectionMappings: { analytics: 'prod-warehouse' }, + connectionTypes: { analytics: 'POSTGRESQL' }, + }); + }); + + it('builds LookML pull config from flat klo.yaml connection fields', async () => { + const lookmlProject = { + projectDir: tempDir, + config: { + connections: { + 'prod-lookml': { + driver: 'lookml', + repo_url: 'https://github.com/acme/looker.git', + branch: 'main', + path: 'models', + auth_token_ref: 'env:GITHUB_TOKEN', + mappings: { expectedLookerConnectionName: 'bigquery_prod' }, + }, + }, + }, + } as never; + const adapter = createDefaultLocalIngestAdapters(lookmlProject).find((candidate) => candidate.source === 'lookml'); + + await expect( + localPullConfigForAdapter(lookmlProject, adapter!, 'prod-lookml', { + looker: { env: { GITHUB_TOKEN: 'ghp_test_token' } }, + }), + ).resolves.toEqual({ + repoUrl: 'https://github.com/acme/looker.git', + branch: 'main', + path: 'models', + authToken: 'ghp_test_token', + expectedLookerConnectionName: 'bigquery_prod', + parsedTargetTables: {}, + }); + }); + + it('rejects local LookML scheduled pulls when repo_url is missing', async () => { + const lookmlProject = { + projectDir: tempDir, + config: { connections: { 'prod-lookml': { driver: 'lookml' } } }, + } as never; + const adapter = createDefaultLocalIngestAdapters(lookmlProject).find((candidate) => candidate.source === 'lookml'); + + await expect(localPullConfigForAdapter(lookmlProject, adapter!, 'prod-lookml')).rejects.toThrow( + 'lookml integration config missing repoUrl', + ); + }); + + it('reads dbt source_dir from local connection config', async () => { + const project = projectWithConnections({ + analytics_dbt: { + driver: 'dbt', + source_dir: '/repo/dbt', + profiles_path: '/repo/profiles', + target: 'prod', + project_name: 'analytics', + }, + }); + const adapter = createDefaultLocalIngestAdapters(project).find((candidate) => candidate.source === 'dbt'); + + await expect(localPullConfigForAdapter(project, adapter!, 'analytics_dbt')).resolves.toEqual({ + sourceDir: '/repo/dbt', + profilesPath: '/repo/profiles', + target: 'prod', + projectName: 'analytics', + }); + }); + + it('reads dbt git repo config from local connection config', async () => { + const dbtProject = projectWithConnections({ + analytics_dbt: { + driver: 'dbt', + repo_url: 'https://github.com/acme/dbt.git', + branch: 'main', + path: 'analytics', + auth_token_ref: 'env:DBT_REPO_TOKEN', + }, + }); + const adapter = createDefaultLocalIngestAdapters(dbtProject).find((candidate) => candidate.source === 'dbt'); + + await expect( + localPullConfigForAdapter(dbtProject, adapter!, 'analytics_dbt', { + looker: { env: { DBT_REPO_TOKEN: 'token-123' } as NodeJS.ProcessEnv }, + }), + ).resolves.toEqual({ + repoUrl: 'https://github.com/acme/dbt.git', + branch: 'main', + path: 'analytics', + authToken: 'token-123', + }); + }); + + it('resolves MetricFlow auth_token_ref without writing literal tokens to config', async () => { + const project = projectWithConnections({ + metricflow_main: { + driver: 'metricflow', + metricflow: { + repoUrl: 'https://github.com/acme/metrics.git', + branch: 'main', + path: 'semantic_models', + auth_token_ref: 'env:METRICFLOW_REPO_TOKEN', + }, + }, + }); + const adapter = createDefaultLocalIngestAdapters(project).find((candidate) => candidate.source === 'metricflow'); + + await expect( + localPullConfigForAdapter(project, adapter!, 'metricflow_main', { + looker: { env: { METRICFLOW_REPO_TOKEN: 'token-123' } as NodeJS.ProcessEnv }, + }), + ).resolves.toEqual({ + repoUrl: 'https://github.com/acme/metrics.git', + branch: 'main', + path: 'semantic_models', + authToken: 'token-123', + parsedTargetTables: {}, + }); + }); +}); diff --git a/packages/context/src/ingest/local-adapters.ts b/packages/context/src/ingest/local-adapters.ts new file mode 100644 index 00000000..97c40844 --- /dev/null +++ b/packages/context/src/ingest/local-adapters.ts @@ -0,0 +1,256 @@ +import { join } from 'node:path'; +import { localConnectionToWarehouseDescriptor, notionConnectionToPullConfig, parseNotionConnectionConfig } from '../connections/index.js'; +import { resolveKloConfigReference } from '../core/config-reference.js'; +import type { KloLocalProject } from '../project/index.js'; +import type { SqlAnalysisPort } from '../sql-analysis/index.js'; +import { DbtSourceAdapter } from './adapters/dbt/dbt.adapter.js'; +import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js'; +import { HistoricSqlSourceAdapter } from './adapters/historic-sql/historic-sql.adapter.js'; +import { PostgresPgssQueryHistoryReader } from './adapters/historic-sql/postgres-pgss-query-history-reader.js'; +import { SnowflakeHistoricSqlQueryHistoryReader } from './adapters/historic-sql/snowflake-query-history-reader.js'; +import { + HISTORIC_SQL_SOURCE_KEY, + historicSqlPullConfigSchema, + type KloPostgresQueryClient, +} from './adapters/historic-sql/types.js'; +import { + createDaemonLiveDatabaseIntrospection, + type DaemonLiveDatabaseIntrospectionOptions, +} from './adapters/live-database/daemon-introspection.js'; +import { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js'; +import { createDaemonLookerTableIdentifierParser } from './adapters/looker/daemon-table-identifier-parser.js'; +import { DefaultLookerConnectionClientFactory } from './adapters/looker/factory.js'; +import { createLocalLookerCredentialResolver } from './adapters/looker/local-looker.adapter.js'; +import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; +import { LookerSourceAdapter } from './adapters/looker/looker.adapter.js'; +import { + buildLookerPullConfigFromInputs, + type LookerMappingClient, + type LookerTableIdentifierParser, +} from './adapters/looker/mapping.js'; +import type { LookerRuntimeClient } from './adapters/looker/fetch.js'; +import { LookmlSourceAdapter } from './adapters/lookml/lookml.adapter.js'; +import { pullConfigFromIntegrationConfig } from './adapters/lookml/pull-config.js'; +import { createLocalMetabaseSourceAdapter } from './adapters/metabase/local-metabase.adapter.js'; +import { MetricflowSourceAdapter } from './adapters/metricflow/metricflow.adapter.js'; +import { pullConfigFromMetricflowIntegration } from './adapters/metricflow/pull-config.js'; +import { NotionSourceAdapter } from './adapters/notion/notion.adapter.js'; +import { seedLocalMappingStateFromKloYaml } from './local-mapping-reconcile.js'; +import type { SourceAdapter } from './types.js'; + +export interface DefaultLocalIngestAdaptersOptions { + databaseIntrospectionUrl?: string; + databaseIntrospection?: Omit; + historicSql?: { + sqlAnalysis: SqlAnalysisPort; + postgresQueryClient: KloPostgresQueryClient; + postgresBaselineRootDir?: string; + now?: () => Date; + }; + looker?: { + daemonBaseUrl?: string; + client?: Pick; + runtimeClient?: LookerRuntimeClient; + parser?: LookerTableIdentifierParser; + env?: NodeJS.ProcessEnv; + }; +} + +export function createDefaultLocalIngestAdapters( + project: KloLocalProject, + options: DefaultLocalIngestAdaptersOptions = {}, +): SourceAdapter[] { + const lookerConnectionFactory = new DefaultLookerConnectionClientFactory( + createLocalLookerCredentialResolver(project, options.looker?.env), + ); + + const adapters: SourceAdapter[] = [ + new FakeSourceAdapter(), + new LiveDatabaseSourceAdapter({ + introspection: createDaemonLiveDatabaseIntrospection({ + connections: project.config.connections, + ...options.databaseIntrospection, + ...(options.databaseIntrospectionUrl ? { baseUrl: options.databaseIntrospectionUrl } : {}), + }), + }), + new LookmlSourceAdapter({ homeDir: join(project.projectDir, '.klo/cache') }), + new DbtSourceAdapter({ homeDir: join(project.projectDir, '.klo/cache') }), + createLocalMetabaseSourceAdapter(project), + new LookerSourceAdapter({ + clientFactory: { + async createClient(config, ctx) { + if (options.looker?.runtimeClient) { + return options.looker.runtimeClient; + } + return lookerConnectionFactory.createClient(config.lookerConnectionId ?? ctx.connectionId); + }, + }, + }), + new MetricflowSourceAdapter({ homeDir: join(project.projectDir, '.klo/cache') }), + new NotionSourceAdapter(), + ]; + + if (options.historicSql) { + adapters.push( + new HistoricSqlSourceAdapter({ + sqlAnalysis: options.historicSql.sqlAnalysis, + reader: new SnowflakeHistoricSqlQueryHistoryReader(), + queryClient: { + executeQuery: async () => { + throw new Error('Local historic-SQL currently supports Postgres pg_stat_statements only'); + }, + }, + postgresReader: new PostgresPgssQueryHistoryReader(), + postgresQueryClient: options.historicSql.postgresQueryClient, + postgresBaselineRootDir: options.historicSql.postgresBaselineRootDir, + now: options.historicSql.now, + }), + ); + } + + return adapters; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function stringField(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function localLookmlPullConfigFromConnection(connection: Record | undefined, env: NodeJS.ProcessEnv) { + const mappings = isRecord(connection?.mappings) ? connection.mappings : {}; + const authTokenRef = stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef); + const literalAuthToken = stringField(connection?.authToken) ?? stringField(connection?.auth_token); + + return pullConfigFromIntegrationConfig({ + repoUrl: stringField(connection?.repoUrl) ?? stringField(connection?.repo_url) ?? null, + branch: stringField(connection?.branch), + path: stringField(connection?.path), + authToken: literalAuthToken ?? resolveKloConfigReference(authTokenRef ?? undefined, env) ?? null, + expectedLookerConnectionName: stringField(mappings.expectedLookerConnectionName), + }); +} + +function localDbtPullConfigFromConnection(connection: Record | undefined, env: NodeJS.ProcessEnv) { + const sourceDir = stringField(connection?.source_dir) ?? stringField(connection?.sourceDir); + const repoUrl = stringField(connection?.repo_url) ?? stringField(connection?.repoUrl); + if (sourceDir) { + return { + sourceDir, + ...(stringField(connection?.profiles_path) ? { profilesPath: stringField(connection?.profiles_path) } : {}), + ...(stringField(connection?.profilesPath) ? { profilesPath: stringField(connection?.profilesPath) } : {}), + ...(stringField(connection?.target) ? { target: stringField(connection?.target) } : {}), + ...(stringField(connection?.project_name) ? { projectName: stringField(connection?.project_name) } : {}), + ...(stringField(connection?.projectName) ? { projectName: stringField(connection?.projectName) } : {}), + }; + } + if (!repoUrl) { + return undefined; + } + const authToken = + stringField(connection?.authToken) ?? + resolveKloConfigReference( + stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef) ?? undefined, + env, + ); + return { + repoUrl, + ...(stringField(connection?.branch) ? { branch: stringField(connection?.branch) } : {}), + ...(stringField(connection?.path) ? { path: stringField(connection?.path) } : {}), + ...(authToken ? { authToken } : {}), + }; +} + +export async function localPullConfigForAdapter( + project: KloLocalProject, + adapter: SourceAdapter, + connectionId: string, + options: DefaultLocalIngestAdaptersOptions = {}, +): Promise { + if (adapter.source === 'metabase') { + throw new Error( + 'Metabase scheduled pulls fan out by mapping. Call runLocalMetabaseIngest() or use `klo ingest run --adapter metabase --connection-id ` from the CLI.', + ); + } + const connection = project.config.connections[connectionId]; + if (adapter.source === HISTORIC_SQL_SOURCE_KEY) { + const historicSql = isRecord(connection?.historicSql) ? connection.historicSql : null; + if (historicSql?.enabled !== true) { + throw new Error(`Connection "${connectionId}" does not have historicSql.enabled: true`); + } + return historicSqlPullConfigSchema.parse({ + ...historicSql, + lastSuccessfulCursor: stringField(historicSql.lastSuccessfulCursor), + }); + } + if (adapter.source === 'looker') { + await seedLocalMappingStateFromKloYaml(project, connectionId); + const store = new LocalLookerRuntimeStore({ dbPath: join(project.projectDir, '.klo', 'db.sqlite') }); + const targetConnections = new Map( + Object.entries(project.config.connections).flatMap(([id, config]) => { + const descriptor = localConnectionToWarehouseDescriptor(id, config); + return descriptor ? [[id, descriptor]] : []; + }), + ); + const parser = + options.looker?.parser ?? + createDaemonLookerTableIdentifierParser({ + baseUrl: options.looker?.daemonBaseUrl ?? process.env.KLO_DAEMON_URL ?? 'http://127.0.0.1:8765', + }); + let cleanupClient: Pick | null = null; + let client: Pick; + if (options.looker?.client) { + client = options.looker.client; + } else { + const runtimeClient = await new DefaultLookerConnectionClientFactory( + createLocalLookerCredentialResolver(project, options.looker?.env), + ).createClient(connectionId); + cleanupClient = runtimeClient; + client = runtimeClient; + } + try { + return await buildLookerPullConfigFromInputs({ + lookerConnectionId: connectionId, + cursors: await store.readCursors(connectionId), + refreshedMappings: await store.readMappings(connectionId), + targetConnections, + client, + parser, + }); + } finally { + await cleanupClient?.cleanup?.(); + } + } + if (adapter.source === 'lookml') { + return localLookmlPullConfigFromConnection(connection, options.looker?.env ?? process.env); + } + if (adapter.source === 'dbt') { + return localDbtPullConfigFromConnection(connection, options.looker?.env ?? process.env); + } + if (adapter.source === 'notion') { + return notionConnectionToPullConfig(parseNotionConnectionConfig(connection)); + } + if (adapter.source === 'metricflow') { + const metricflow = connection.metricflow; + const metricflowConfig = + typeof metricflow === 'object' && metricflow !== null && !Array.isArray(metricflow) + ? (metricflow as Record) + : null; + const authToken = + typeof metricflowConfig?.authToken === 'string' + ? metricflowConfig.authToken + : resolveKloConfigReference( + typeof metricflowConfig?.auth_token_ref === 'string' ? metricflowConfig.auth_token_ref : undefined, + options.looker?.env ?? process.env, + ); + return pullConfigFromMetricflowIntegration({ + repoUrl: typeof metricflowConfig?.repoUrl === 'string' ? metricflowConfig.repoUrl : null, + branch: typeof metricflowConfig?.branch === 'string' ? metricflowConfig.branch : null, + path: typeof metricflowConfig?.path === 'string' ? metricflowConfig.path : null, + authToken: authToken ?? null, + }); + } + return undefined; +} diff --git a/packages/context/src/ingest/local-bundle-ingest.test.ts b/packages/context/src/ingest/local-bundle-ingest.test.ts new file mode 100644 index 00000000..9c0ce2b3 --- /dev/null +++ b/packages/context/src/ingest/local-bundle-ingest.test.ts @@ -0,0 +1,608 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { AgentRunnerService } from '../agent/index.js'; +import { initKloProject, type KloLocalProject, loadKloProject } from '../project/index.js'; +import { makeLocalGitRepo } from '../test/make-local-git-repo.js'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js'; +import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; +import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from './local-adapters.js'; +import { getLocalIngestStatus, runLocalIngest } from './local-ingest.js'; + +class TestAgentRunner extends AgentRunnerService { + override runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +class LookerSlWritingAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async (params: any) => { + if ( + params.telemetryTags?.operationName === 'ingest-bundle-wu' && + params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' + ) { + const slWrite = params.toolSet.sl_write_source; + if (!slWrite?.execute) { + throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); + } + const result = await slWrite.execute( + { + connectionId: 'prod-warehouse', + sourceName: 'looker__ecommerce__orders', + source: { + name: 'looker__ecommerce__orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'revenue', type: 'number' }, + ], + measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }], + }, + }, + { toolCallId: 'looker-sl-write' }, + ); + if (!result.structured.success) { + throw new Error(result.markdown); + } + } + return { stopReason: 'natural' as const }; + }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +function makeLookerRuntimeClient() { + const lookerModels = { + models: [{ name: 'ecommerce', label: 'Ecommerce', explores: [{ name: 'orders', label: 'Orders' }] }], + }; + const lookerExplore = { + modelName: 'ecommerce', + exploreName: 'orders', + label: 'Orders', + description: null, + connectionName: 'analytics', + viewName: 'orders', + rawSqlTableName: 'public.orders', + fields: { + dimensions: [{ name: 'orders.id', label: null, type: null, sql: null, description: null }], + measures: [{ name: 'orders.revenue', label: null, type: null, sql: null, description: null }], + }, + joins: [ + { + name: 'users', + type: 'left_outer', + relationship: 'many_to_one', + rawSqlTableName: 'public.users', + sqlOn: '${orders.user_id} = ${users.id}', + from: null, + targetTable: null, + }, + ], + targetWarehouseConnectionId: null, + targetTable: null, + }; + + return { + listDashboards: vi.fn().mockResolvedValue([{ id: '10', updatedAt: '2026-05-05T08:00:00.000Z' }]), + getDashboard: vi.fn().mockResolvedValue({ + lookerId: '10', + title: 'Revenue Overview', + description: 'Revenue dashboard', + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:00:00.000Z', + tiles: [{ id: '100', title: 'Revenue', lookId: null, query: { model: 'ecommerce', view: 'orders' } }], + }), + listLooks: vi.fn().mockResolvedValue([{ id: '20', updatedAt: '2026-05-05T08:10:00.000Z' }]), + getLook: vi.fn().mockResolvedValue({ + lookerId: '20', + title: 'Revenue Look', + description: null, + folderId: '7', + ownerId: '3', + updatedAt: '2026-05-05T08:10:00.000Z', + query: { model: 'ecommerce', view: 'orders', fields: ['orders.revenue'] }, + }), + listFolders: vi.fn().mockResolvedValue({ folders: [{ id: '7', name: 'Shared', parentId: null, path: ['Shared'] }] }), + listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: 'ada@example.test' }]), + listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Analysts' }]), + listLookmlModels: vi.fn().mockResolvedValue(lookerModels), + getExplore: vi.fn().mockResolvedValue(lookerExplore), + getSignals: vi.fn().mockResolvedValue({ + dashboardUsage: [{ contentId: '10', queryCount30d: 12, uniqueUsers30d: 3, lastRunAt: null, topUsers: ['3'] }], + lookUsage: [{ contentId: '20', queryCount30d: 4, uniqueUsers30d: 2, lastRunAt: null, topUsers: ['3'] }], + scheduledPlans: [ + { contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 4 }, + ], + favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 2 }], + }), + cleanup: vi.fn().mockResolvedValue(undefined), + }; +} + +function makeLookerParser() { + return { + parse: vi.fn().mockResolvedValue({ + 'ecommerce.orders': { + ok: true, + catalog: null, + schema: 'public', + name: 'orders', + canonical_table: 'public.orders', + }, + 'ecommerce.orders.users': { + ok: true, + catalog: null, + schema: 'public', + name: 'users', + canonical_table: 'public.users', + }, + }), + }; +} + +describe('canonical local ingest', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-full-ingest-')); + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('runs the full IngestBundleRunner through local ports and stores a bundle report', async () => { + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const agentRunner = new TestAgentRunner(); + + const result = await runLocalIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'full-local-1', + agentRunner, + }); + + expect(agentRunner.runLoop).toHaveBeenCalledTimes(1); + expect(result.result).toMatchObject({ + jobId: 'full-local-1', + runId: expect.any(String), + workUnitCount: 1, + failedWorkUnits: [], + }); + expect(result.report).toMatchObject({ + jobId: 'full-local-1', + connectionId: 'warehouse', + sourceKey: 'fake', + body: { + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + failedWorkUnits: [], + workUnits: [ + expect.objectContaining({ + unitKey: 'fake-orders', + status: 'success', + rawFiles: ['orders/orders.json'], + }), + ], + }, + }); + expect(result.report.body.provenanceRows).toEqual([ + { + rawPath: 'orders/orders.json', + artifactKind: null, + artifactKey: null, + targetConnectionId: null, + actionType: 'skipped', + }, + ]); + + const stagedRawPath = join( + project.projectDir, + 'raw-sources', + 'warehouse', + 'fake', + result.report.body.syncId, + 'orders', + 'orders.json', + ); + await expect(readFile(stagedRawPath, 'utf-8')).resolves.toBe('{"name":"orders"}\n'); + + await expect(getLocalIngestStatus(project, result.report.id)).resolves.toMatchObject({ + id: result.report.id, + jobId: 'full-local-1', + }); + await expect(getLocalIngestStatus(project, result.report.runId)).resolves.toMatchObject({ + id: result.report.id, + jobId: 'full-local-1', + }); + await expect(getLocalIngestStatus(project, 'full-local-1')).resolves.toMatchObject({ + id: result.report.id, + jobId: 'full-local-1', + }); + }); + + it('rejects direct Metabase scheduled pulls before requiring a local ingest LLM provider', async () => { + const projectDir = join(tempDir, 'metabase-project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - metabase', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + const metabaseProject = await loadKloProject({ projectDir }); + + await expect( + runLocalIngest({ + project: metabaseProject, + adapters: createDefaultLocalIngestAdapters(metabaseProject), + adapter: 'metabase', + connectionId: 'warehouse', + jobId: 'metabase-local', + }), + ).rejects.toThrow('Metabase scheduled pulls fan out by mapping'); + }); + + it('runs full MetricFlow local ingest from a dbt repo fixture through the canonical runner', async () => { + const projectDir = join(tempDir, 'metricflow-run-project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + const fixtureDir = join(tempDir, 'metricflow-fixture'); + await mkdir(join(fixtureDir, 'models'), { recursive: true }); + await writeFile( + join(fixtureDir, 'dbt_project.yml'), + [ + 'name: analytics', + 'version: "1.0.0"', + 'config-version: 2', + 'profile: analytics', + 'model-paths: ["models"]', + '', + ].join('\n'), + 'utf-8', + ); + await writeFile( + join(fixtureDir, 'models/orders.yml'), + [ + 'semantic_models:', + ' - name: orders', + ' model: ref("orders")', + ' entities:', + ' - name: order', + ' type: primary', + ' expr: order_id', + ' dimensions:', + ' - name: ordered_at', + ' type: time', + ' expr: ordered_at', + ' measures:', + ' - name: revenue', + ' agg: sum', + ' expr: revenue', + 'metrics:', + ' - name: total_revenue', + ' type: simple', + ' type_params:', + ' measure: revenue', + '', + ].join('\n'), + 'utf-8', + ); + const repo = await makeLocalGitRepo(fixtureDir, join(tempDir, 'metricflow-origin')); + + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' metricflow:', + ` repoUrl: ${repo.repoUrl}`, + ' branch: main', + 'ingest:', + ' adapters:', + ' - metricflow', + ' embeddings:', + ' backend: deterministic', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + ' git:', + ' auto_commit: false', + ' author: KLO Test ', + '', + ].join('\n'), + 'utf-8', + ); + + const metricflowProject = await loadKloProject({ projectDir }); + const agentRunner = new TestAgentRunner(); + const result = await runLocalIngest({ + project: metricflowProject, + adapters: createDefaultLocalIngestAdapters(metricflowProject), + adapter: 'metricflow', + connectionId: 'warehouse', + jobId: 'metricflow-local-full', + agentRunner, + }); + + expect(agentRunner.runLoop).toHaveBeenCalledTimes(1); + expect(result.result).toMatchObject({ + jobId: 'metricflow-local-full', + workUnitCount: 1, + failedWorkUnits: [], + }); + expect(result.report).toMatchObject({ + jobId: 'metricflow-local-full', + connectionId: 'warehouse', + sourceKey: 'metricflow', + body: { + failedWorkUnits: [], + workUnits: [ + expect.objectContaining({ + unitKey: 'metricflow-orders', + status: 'success', + rawFiles: ['models/orders.yml'], + }), + ], + }, + }); + + const stagedRawPath = join( + metricflowProject.projectDir, + 'raw-sources', + 'warehouse', + 'metricflow', + result.report.body.syncId, + 'models', + 'orders.yml', + ); + await expect(readFile(stagedRawPath, 'utf-8')).resolves.toContain('semantic_models:'); + }); + + it('local metricflow ingest can fetch from connection metricflow config without sourceDir', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-local-mf-fetch-')); + const fixtureDir = join(projectDir, 'fixture-src'); + await mkdir(join(fixtureDir, 'models'), { recursive: true }); + await writeFile(join(fixtureDir, 'dbt_project.yml'), 'name: analytics\n', 'utf-8'); + await writeFile( + join(fixtureDir, 'models/orders.yml'), + 'semantic_models:\n - name: orders\n model: ref("orders")\n', + 'utf-8', + ); + const repo = await makeLocalGitRepo(fixtureDir, join(projectDir, 'origin')); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: local-mf', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' metricflow:', + ` repoUrl: ${repo.repoUrl}`, + ' branch: main', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + ' git:', + ' auto_commit: false', + ' author: KLO Test ', + '', + ].join('\n'), + 'utf-8', + ); + + const metricflowProject = await loadKloProject({ projectDir }); + const adapters = createDefaultLocalIngestAdapters(metricflowProject); + const metricflow = adapters.find((adapter) => adapter.source === 'metricflow'); + + expect(metricflow?.fetch).toBeTypeOf('function'); + await expect(localPullConfigForAdapter(metricflowProject, metricflow!, 'warehouse')).resolves.toMatchObject({ + repoUrl: repo.repoUrl, + branch: 'main', + path: null, + authToken: null, + parsedTargetTables: {}, + }); + }); + + it('runs scheduled Looker ingest through the canonical local runner and records SL target evidence', async () => { + const projectDir = join(tempDir, 'looker-project'); + await initKloProject({ projectDir, projectName: 'looker-runtime' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: looker-runtime', + 'connections:', + ' prod-looker:', + ' driver: looker', + ' base_url: https://looker.example.test', + ' client_id: client', + ' prod-warehouse:', + ' driver: postgres', + ' url: postgresql://readonly@warehouse.example.test/analytics', + 'ingest:', + ' adapters:', + ' - looker', + ' embeddings:', + ' backend: deterministic', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + ' git:', + ' auto_commit: false', + ' author: KLO Test ', + '', + ].join('\n'), + 'utf-8', + ); + + const lookerProject = await loadKloProject({ projectDir }); + const localStore = new LocalLookerRuntimeStore({ dbPath: join(lookerProject.projectDir, '.klo', 'db.sqlite') }); + await localStore.setCursors('prod-looker', { + dashboardsLastSyncedAt: null, + looksLastSyncedAt: null, + }); + await localStore.upsertConnectionMapping({ + lookerConnectionId: 'prod-looker', + lookerConnectionName: 'analytics', + kloConnectionId: 'prod-warehouse', + source: 'cli', + }); + + const runtimeClient = makeLookerRuntimeClient(); + const parser = makeLookerParser(); + const agentRunner = new LookerSlWritingAgentRunner(); + + const result = await runLocalIngest({ + project: lookerProject, + adapters: createDefaultLocalIngestAdapters(lookerProject, { looker: { runtimeClient } }), + adapter: 'looker', + connectionId: 'prod-looker', + jobId: 'looker-local-report-parity', + agentRunner, + pullConfigOptions: { + looker: { + client: runtimeClient, + parser, + }, + }, + }); + + expect(runtimeClient.cleanup).toHaveBeenCalledTimes(1); + expect(parser.parse).toHaveBeenCalledWith([ + { key: 'ecommerce.orders', sql_table_name: 'public.orders', dialect: 'postgres' }, + { key: 'ecommerce.orders.users', sql_table_name: 'public.users', dialect: 'postgres' }, + ]); + expect(result.result).toMatchObject({ + jobId: 'looker-local-report-parity', + workUnitCount: 3, + failedWorkUnits: [], + }); + expect(result.report).toMatchObject({ + jobId: 'looker-local-report-parity', + connectionId: 'prod-looker', + sourceKey: 'looker', + body: { + fetch: { + status: 'success', + retryRecommended: false, + skipped: [], + warnings: [], + }, + failedWorkUnits: [], + }, + }); + + const exploreWorkUnit = result.report.body.workUnits.find((wu) => wu.unitKey === 'looker-explore-ecommerce-orders'); + expect(exploreWorkUnit).toMatchObject({ + status: 'success', + rawFiles: expect.arrayContaining(['explores/ecommerce/orders.json']), + actions: [ + expect.objectContaining({ + target: 'sl', + type: 'created', + key: 'looker__ecommerce__orders', + targetConnectionId: 'prod-warehouse', + }), + ], + touchedSlSources: [{ connectionId: 'prod-warehouse', sourceName: 'looker__ecommerce__orders' }], + }); + + expect(result.report.body.provenanceRows).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + rawPath: 'explores/ecommerce/orders.json', + artifactKind: 'sl', + artifactKey: 'looker__ecommerce__orders', + targetConnectionId: 'prod-warehouse', + actionType: 'source_created', + }), + ]), + ); + + const rawRoot = join( + lookerProject.projectDir, + 'raw-sources', + 'prod-looker', + 'looker', + result.report.body.syncId, + ); + const explore = JSON.parse(await readFile(join(rawRoot, 'explores/ecommerce/orders.json'), 'utf-8')); + expect(explore).toMatchObject({ + targetWarehouseConnectionId: 'prod-warehouse', + targetTable: { + ok: true, + schema: 'public', + name: 'orders', + canonicalTable: 'public.orders', + }, + joins: [ + expect.objectContaining({ + name: 'users', + targetTable: expect.objectContaining({ + ok: true, + schema: 'public', + name: 'users', + canonicalTable: 'public.users', + }), + }), + ], + }); + + const dashboard = JSON.parse(await readFile(join(rawRoot, 'dashboards/10.json'), 'utf-8')); + expect(dashboard.tiles[0].query).toMatchObject({ + targetWarehouseConnectionId: 'prod-warehouse', + targetTable: expect.objectContaining({ ok: true, canonicalTable: 'public.orders' }), + }); + + const sourceYaml = await readFile( + join(lookerProject.projectDir, 'semantic-layer/prod-warehouse/looker__ecommerce__orders.yaml'), + 'utf-8', + ); + expect(sourceYaml).toContain('table: public.orders'); + expect(sourceYaml).toContain('total_revenue'); + }); +}); diff --git a/packages/context/src/ingest/local-bundle-runtime.test.ts b/packages/context/src/ingest/local-bundle-runtime.test.ts new file mode 100644 index 00000000..9c3d402f --- /dev/null +++ b/packages/context/src/ingest/local-bundle-runtime.test.ts @@ -0,0 +1,144 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { AgentRunnerService } from '../agent/index.js'; +import { initKloProject, type KloLocalProject, loadKloProject } from '../project/index.js'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js'; +import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; + +type RuntimeWithConnectionDeps = { + deps: { + connections: { + listEnabledConnections(ids: string[]): Promise>; + getConnectionById(connectionId: string): Promise<{ id: string; name: string; connectionType: string } | null>; + }; + }; +}; + +describe('createLocalBundleIngestRuntime', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-bundle-runtime-')); + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('requires an agent runner or configured local ingest LLM', () => { + expect(() => + createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + }), + ).toThrow('klo dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner'); + }); + + it('builds runner deps with local SQLite stores and context tools enabled', async () => { + const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any }); + + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner, + jobIdFactory: () => 'job-1', + }); + + expect(runtime.nextJobId()).toBe('job-1'); + expect(runtime.storage.resolvePullDir('job-1')).toBe(join(project.projectDir, '.klo/cache/local-ingest/job-1/pull')); + expect(runtime.storage.resolveUploadDir('job-1')).toBe( + join(project.projectDir, '.klo/cache/local-ingest/job-1/upload'), + ); + expect(runtime.storage.resolveTranscriptDir('job-1')).toBe( + join(project.projectDir, '.klo/ingest-transcripts/job-1'), + ); + + await mkdir(runtime.storage.resolveUploadDir('job-1'), { recursive: true }); + }); + + it('exposes canonical warehouse connection types to local ingest SL tools', async () => { + project.config.connections.warehouse = { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }; + project.config.connections.bq = { + driver: 'bigquery', + project_id: 'acme', + dataset_id: 'warehouse', + }; + const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any }); + + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner, + }); + const connections = (runtime.runner as unknown as RuntimeWithConnectionDeps).deps.connections; + + await expect(connections.getConnectionById('warehouse')).resolves.toMatchObject({ + id: 'warehouse', + connectionType: 'POSTGRESQL', + }); + await expect(connections.listEnabledConnections(['warehouse', 'bq'])).resolves.toEqual([ + { id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' }, + { id: 'bq', name: 'bq', connectionType: 'BIGQUERY' }, + ]); + }); + + it('accepts a debug LLM request file when constructing the default agent runner', async () => { + await writeFile( + join(project.projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'llm:', + ' provider:', + ' backend: gateway', + ' gateway:', + ' base_url: https://gateway.example/v1', + ' models:', + ' default: anthropic/claude-sonnet-4-6', + 'ingest:', + ' adapters:', + ' - fake', + ' embeddings:', + ' backend: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir: project.projectDir }); + + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + llmDebugRequestFile: join(project.projectDir, '.klo', 'llm-debug.jsonl'), + }); + + expect(runtime.storage.resolvePullDir('job-1')).toBe(join(project.projectDir, '.klo/cache/local-ingest/job-1/pull')); + }); +}); diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts new file mode 100644 index 00000000..78f7e4c3 --- /dev/null +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -0,0 +1,594 @@ +import { mkdirSync } from 'node:fs'; +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import type { KloLlmProvider } from '@klo/llm'; +import YAML from 'yaml'; +import type { AgentRunnerService } from '../agent/index.js'; +import { AgentRunnerService as DefaultAgentRunnerService } from '../agent/index.js'; +import { localConnectionInfoFromConfig } from '../connections/index.js'; +import type { KloEmbeddingPort, KloLogger } from '../core/index.js'; +import { noopLogger, SessionWorktreeService } from '../core/index.js'; +import type { KloSemanticLayerComputePort } from '../daemon/index.js'; +import { + createJsonlKloLlmDebugRequestRecorder, + createLocalKloEmbeddingProviderFromConfig, + createLocalKloLlmProviderFromConfig, + KloIngestEmbeddingPortAdapter, +} from '../llm/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { kloLocalStateDbPath } from '../project/index.js'; +import { PromptService } from '../prompts/index.js'; +import { SkillsRegistryService } from '../skills/index.js'; +import { + type KloConnectionInfo, + type KloQueryResult, + SemanticLayerService, + type SemanticLayerSource, + type SlConnectionCatalogPort, + SlDiscoverTool, + SlEditSourceTool, + type SlPythonPort, + SlReadSourceTool, + SlRollbackTool, + SlSearchService, + type SlSourcesIndexPort, + SlValidateTool, + type SlValidationDeps, + type SlValidatorPort, + SlWriteSourceTool, + SqliteSlSourcesIndex, + sourceDefinitionSchema, + sourceOverlaySchema, +} from '../sl/index.js'; +import { + BaseTool, + ContextCandidateMarkTool, + ContextCandidateWriteTool, + ContextEvidenceNeighborsTool, + ContextEvidenceReadTool, + ContextEvidenceSearchTool, + type GitAuthorResolverPort, + type ToolContext, + type ToolSession, +} from '../tools/index.js'; +import { + type KnowledgeEventPort, + type KnowledgeIndexPort, + KnowledgeWikiService, + searchLocalKnowledgePages, + WikiListTagsTool, + WikiReadTool, + WikiRemoveTool, + WikiSearchTool, + WikiWriteTool, +} from '../wiki/index.js'; +import { + CandidateDedupService, + ContextCandidateCarryforwardService, + CuratorPaginationService, +} from './context-candidates/index.js'; +import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './context-evidence/index.js'; +import { DiffSetService } from './diff-set.service.js'; +import { IngestBundleRunner } from './ingest-bundle.runner.js'; +import { PageTriageService } from './page-triage/index.js'; +import type { + IngestBundleRunnerDeps, + IngestCommitMessagePort, + IngestLockPort, + IngestStoragePort, + IngestToolsetFactoryPort, + IngestToolsetLike, + SourceAdapterRegistryPort, +} from './ports.js'; +import { SourceAdapterRegistry } from './source-adapter-registry.js'; +import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js'; +import type { SourceAdapter } from './types.js'; + +const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url)); +const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url)); +const LOCAL_AUTHOR = { name: 'KLO Local', email: 'local@klo.local' }; +const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape only.'; + +export interface CreateLocalBundleIngestRuntimeOptions { + project: KloLocalProject; + adapters: SourceAdapter[]; + agentRunner?: AgentRunnerService; + llmProvider?: KloLlmProvider; + llmDebugRequestFile?: string; + memoryModel?: string; + semanticLayerCompute?: KloSemanticLayerComputePort; + queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; + jobIdFactory?: () => string; + logger?: KloLogger; +} + +export interface LocalBundleIngestRuntime { + runner: IngestBundleRunner; + store: SqliteBundleIngestStore; + contextStore: SqliteContextEvidenceStore; + storage: IngestStoragePort; + registry: SourceAdapterRegistryPort; + nextJobId(): string; +} + +class NoopEmbeddingPort implements KloEmbeddingPort { + readonly maxBatchSize = 64; + + async computeEmbedding(): Promise { + return []; + } + + async computeEmbeddingsBulk(texts: string[]): Promise { + return texts.map(() => []); + } +} + +class LocalIngestStorage implements IngestStoragePort { + readonly homeDir: string; + readonly systemGitAuthor = LOCAL_AUTHOR; + + constructor(private readonly project: KloLocalProject) { + this.homeDir = join(project.projectDir, '.klo'); + } + + resolveUploadDir(uploadId: string): string { + return join(this.project.projectDir, '.klo/cache/local-ingest', uploadId, 'upload'); + } + + resolvePullDir(jobId: string): string { + return join(this.project.projectDir, '.klo/cache/local-ingest', jobId, 'pull'); + } + + resolveTranscriptDir(jobId: string): string { + return join(this.project.projectDir, '.klo/ingest-transcripts', jobId); + } +} + +class LocalIngestLock implements IngestLockPort { + async withLock(_key: string, fn: () => Promise): Promise { + return fn(); + } +} + +class LocalCommitMessagePort implements IngestCommitMessagePort { + async enqueueForExternalCommit(): Promise {} +} + +class LocalAuthorResolver implements GitAuthorResolverPort { + async resolve() { + return LOCAL_AUTHOR; + } +} + +class LocalConnectionCatalog implements SlConnectionCatalogPort { + constructor( + private readonly project: KloLocalProject, + private readonly queryExecutor?: { + execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise; + }, + ) {} + + async listEnabledConnections(ids: string[]): Promise { + return ids + .map((id) => localConnectionInfoFromConfig(id, this.project.config.connections[id])) + .filter((connection): connection is KloConnectionInfo => connection !== null); + } + + async getConnectionById(connectionId: string): Promise { + const connection = localConnectionInfoFromConfig(connectionId, this.project.config.connections[connectionId]); + if (!connection) { + throw new Error(`Connection not found: ${connectionId}`); + } + return connection; + } + + async executeQuery(connectionId: string, sql: string): Promise { + if (!this.queryExecutor) { + throw new Error('Local ingest has no query executor configured'); + } + return this.queryExecutor.execute({ connectionId, sql }); + } +} + +class LocalSlPythonPort implements SlPythonPort { + constructor(private readonly compute?: KloSemanticLayerComputePort) {} + + async validateSources(input: Parameters[0]) { + if (!this.compute) { + return { data: { errors: [], warnings: [LOCAL_SHAPE_WARNING], per_source_warnings: {} } }; + } + const result = await this.compute.validateSources({ + sources: input.sources, + dialect: input.dialect, + recentlyTouched: input.recently_touched, + }); + return { + data: { + errors: result.errors, + warnings: result.warnings, + per_source_warnings: result.perSourceWarnings, + }, + }; + } + + async query(input: Parameters[0]) { + if (!this.compute) { + return { error: 'Local ingest has no semantic compute adapter configured' }; + } + const result = await this.compute.query({ + sources: input.sources, + dialect: input.dialect, + query: input.query, + }); + return { data: { sql: result.sql, plan: result.plan } }; + } +} + +class LocalShapeOnlySlValidator implements SlValidatorPort { + async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) { + try { + const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName); + const parsed = YAML.parse(file.content) as SemanticLayerSource; + const isOverlay = parsed.table == null && parsed.sql == null; + const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed); + return result.success + ? { errors: [], warnings: [LOCAL_SHAPE_WARNING] } + : { + errors: result.error.issues.map( + (issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`, + ), + warnings: [], + }; + } catch (error) { + return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] }; + } + } +} + +function parseWiki(raw: string): { summary: string; content: string } { + const match = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/); + if (!match) { + return { summary: '', content: raw.trim() }; + } + const frontmatter = (YAML.parse(match[1]) ?? {}) as Record; + return { + summary: typeof frontmatter.summary === 'string' ? frontmatter.summary : '', + content: match[2].trim(), + }; +} + +function scoreText(text: string, query: string): number { + const normalized = query.toLowerCase().trim(); + if (!normalized) { + return 0; + } + const haystack = text.toLowerCase(); + if (haystack.includes(normalized)) { + return 1; + } + const words = normalized.split(/\s+/).filter(Boolean); + return words.filter((word) => haystack.includes(word)).length / Math.max(words.length, 1); +} + +class LocalKnowledgeIndex implements KnowledgeIndexPort { + constructor(private readonly project: KloLocalProject) {} + + async upsertPage(): Promise {} + + async applyDiffTransactional(): Promise {} + + async getExistingSearchTexts(): Promise> { + return new Map(); + } + + async deleteStale(): Promise {} + + async deleteByScope(): Promise {} + + async deleteByKey(): Promise {} + + async findPageByKey(scope: string, scopeId: string | null, pageKey: string) { + const path = scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`; + try { + await this.project.fileStore.readFile(path); + return { page_key: pageKey }; + } catch { + return null; + } + } + + async listPagesForUser( + userId: string, + ): Promise> { + const pages: Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }> = []; + for (const scope of [ + { scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' }, + { scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` }, + ]) { + const listed = await this.project.fileStore.listFiles(scope.dir, true); + for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { + const pageKey = file.replace(/\.md$/, ''); + const raw = await this.project.fileStore.readFile(`${scope.dir}/${file}`); + const parsed = parseWiki(raw.content); + pages.push({ + page_key: pageKey, + summary: parsed.summary, + scope: scope.scope, + scope_id: scope.scopeId, + }); + } + } + return pages.sort((left, right) => left.page_key.localeCompare(right.page_key)); + } + + async getUserPageCount(userId: string): Promise { + return (await this.listPagesForUser(userId)).filter((page) => page.scope === 'USER').length; + } + + async incrementUsageCount(): Promise {} + + async searchRRF( + userId: string, + _embedding: number[] | null, + queryText: string, + limit: number, + ): Promise> { + const pages = await this.listPagesForUser(userId); + return pages + .map((page) => ({ + pageKey: page.page_key, + summary: page.summary, + rrfScore: scoreText(`${page.page_key} ${page.summary}`, queryText), + })) + .filter((page) => page.rrfScore > 0) + .sort((left, right) => right.rrfScore - left.rrfScore || left.pageKey.localeCompare(right.pageKey)) + .slice(0, limit); + } +} + +class NoopKnowledgeEventPort implements KnowledgeEventPort { + async createEvent(): Promise {} +} + +class LocalIngestToolSet implements IngestToolsetLike { + constructor(private readonly tools: BaseTool[]) {} + + toAiSdkTools(context: ToolContext) { + return Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toAiSdkTool(context)])); + } +} + +class LocalIngestToolsetFactory implements IngestToolsetFactoryPort { + private readonly baseTools: BaseTool[]; + private readonly contextTools: BaseTool[]; + + constructor(deps: { + project: KloLocalProject; + wikiService: KnowledgeWikiService; + knowledgeIndex: KnowledgeIndexPort; + knowledgeEvents: KnowledgeEventPort; + semanticLayerService: SemanticLayerService; + slSearchService: SlSearchService; + authorResolver: GitAuthorResolverPort; + slSourcesRepository: SlSourcesIndexPort; + connections: SlConnectionCatalogPort; + contextStore: SqliteContextEvidenceStore; + embedding: KloEmbeddingPort; + }) { + const slDeps = { + semanticLayerService: deps.semanticLayerService, + slSearchService: deps.slSearchService, + authorResolver: deps.authorResolver, + }; + this.baseTools = [ + new WikiReadTool(deps.wikiService, deps.knowledgeIndex), + new WikiSearchTool({ + search: async (input) => { + const results = await searchLocalKnowledgePages(deps.project, { + userId: input.userId, + query: input.query, + limit: input.limit, + embeddingService: deps.embedding, + }); + return { + results: results.slice(0, input.limit).map((result) => ({ + key: result.key, + path: result.path, + summary: result.summary, + score: result.score, + matchReasons: result.matchReasons, + lanes: result.lanes, + })), + totalFound: results.length, + }; + }, + }), + new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex), + new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), + new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), + new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }), + new SlEditSourceTool(slDeps), + new SlReadSourceTool(slDeps), + new SlWriteSourceTool(slDeps), + new SlValidateTool(slDeps), + new SlRollbackTool(deps.slSourcesRepository, deps.connections, 0), + ]; + this.contextTools = [ + new ContextEvidenceSearchTool(deps.contextStore, deps.embedding), + new ContextEvidenceReadTool(deps.contextStore), + new ContextEvidenceNeighborsTool(deps.contextStore), + new ContextCandidateWriteTool(deps.contextStore, deps.embedding), + new ContextCandidateMarkTool(deps.contextStore), + ]; + } + + createIngestWuToolset(_session: ToolSession, options?: { includeContextEvidenceTools?: boolean }): IngestToolsetLike { + return new LocalIngestToolSet( + options?.includeContextEvidenceTools ? [...this.baseTools, ...this.contextTools] : this.baseTools, + ); + } +} + +function registerAdapters(adapters: SourceAdapter[]): SourceAdapterRegistry { + const registry = new SourceAdapterRegistry(); + for (const adapter of adapters) { + registry.register(adapter); + } + return registry; +} + +function nextLocalJobId(): string { + return `local-${Date.now().toString(36)}`; +} + +function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): { + agentRunner: AgentRunnerService; + llmProvider?: KloLlmProvider; +} { + const llmProvider = + options.llmProvider ?? createLocalKloLlmProviderFromConfig(options.project.config.llm) ?? undefined; + + if (options.agentRunner) { + return { agentRunner: options.agentRunner, ...(llmProvider ? { llmProvider } : {}) }; + } + + if (!llmProvider) { + throw new Error( + 'klo dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', + ); + } + + return { + agentRunner: new DefaultAgentRunnerService({ + llmProvider, + logger: options.logger ?? noopLogger, + ...(options.llmDebugRequestFile + ? { debugRequestRecorder: createJsonlKloLlmDebugRequestRecorder(options.llmDebugRequestFile) } + : {}), + }), + llmProvider, + }; +} + +export function createLocalBundleIngestRuntime( + options: CreateLocalBundleIngestRuntimeOptions, +): LocalBundleIngestRuntime { + const logger = options.logger ?? noopLogger; + const dbPath = kloLocalStateDbPath(options.project); + mkdirSync(join(options.project.projectDir, '.klo/cache/local-ingest'), { recursive: true }); + const store = new SqliteBundleIngestStore({ dbPath }); + const contextStore = new SqliteContextEvidenceStore({ dbPath }); + const embeddingProvider = createLocalKloEmbeddingProviderFromConfig(options.project.config.ingest.embeddings); + const embedding = embeddingProvider ? new KloIngestEmbeddingPortAdapter(embeddingProvider) : new NoopEmbeddingPort(); + const connections = new LocalConnectionCatalog(options.project, options.queryExecutor); + const rootFileStore = options.project.fileStore; + const semanticLayerService = new SemanticLayerService( + rootFileStore, + connections, + new LocalSlPythonPort(options.semanticLayerCompute), + logger, + ); + const slSourcesRepository = new SqliteSlSourcesIndex({ dbPath }); + const slSearchService = new SlSearchService(embedding, slSourcesRepository, logger); + const knowledgeIndex = new LocalKnowledgeIndex(options.project); + const knowledgeEvents = new NoopKnowledgeEventPort(); + const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, options.project.git, logger); + const { agentRunner, llmProvider } = resolveAgentRunner(options); + const promptService = new PromptService({ promptsDir, partials: [], logger }); + const storage = new LocalIngestStorage(options.project); + const registry = registerAdapters(options.adapters); + const toolsetFactory = new LocalIngestToolsetFactory({ + project: options.project, + wikiService, + knowledgeIndex, + knowledgeEvents, + semanticLayerService, + slSearchService, + authorResolver: new LocalAuthorResolver(), + slSourcesRepository, + connections, + contextStore, + embedding, + }); + + const deps: IngestBundleRunnerDeps = { + runs: store, + provenance: store, + reports: store, + canonicalPins: store, + registry, + diffSetService: new DiffSetService(store), + sessionWorktreeService: new SessionWorktreeService({ + coreConfig: options.project.coreConfig, + gitService: options.project.git, + configService: rootFileStore, + }), + agentRunner, + gitService: options.project.git, + lockingService: new LocalIngestLock(), + storage, + settings: { + memoryIngestionModel: options.project.config.llm.models.default ?? 'local-ingest-model', + probeRowCount: 0, + workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency, + workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget, + workUnitFailureMode: options.project.config.ingest.workUnits.failureMode, + }, + skillsRegistry: new SkillsRegistryService({ skillsDir, logger }), + promptService, + wikiService, + knowledgeIndex, + semanticLayerService, + slSearchService, + slSourcesRepository, + connections, + slValidator: new LocalShapeOnlySlValidator(), + toolsetFactory, + commitMessages: new LocalCommitMessagePort(), + embedding, + contextEvidenceIndex: new ContextEvidenceIndexService({ store: contextStore, embeddings: embedding, logger }), + pageTriage: llmProvider + ? new PageTriageService({ + store: contextStore, + llmProvider, + settings: { + enabled: true, + maxConcurrency: 2, + lightExtractionEnabled: true, + classifierModel: null, + lightExtractionMaxCandidates: 5, + }, + promptService, + logger, + }) + : undefined, + contextEvidenceCandidates: contextStore, + candidateDedup: new CandidateDedupService({ + store: contextStore, + embeddings: embedding, + settings: { enabled: true, topicSimilarityThreshold: 0.86, scoreAggregation: 'max' }, + logger, + }), + contextCandidateCarryforward: new ContextCandidateCarryforwardService({ + store: contextStore, + settings: { reExamineBudgetExhaustedOnRerun: true }, + logger, + }), + curatorPagination: new CuratorPaginationService({ + store: contextStore, + agentRunner, + settings: { batchSize: 8, maxPasses: 8, stepBudgetPerPass: 60 }, + logger, + }), + logger, + }; + + return { + runner: new IngestBundleRunner(deps), + store, + contextStore, + storage, + registry, + nextJobId: options.jobIdFactory ?? nextLocalJobId, + }; +} diff --git a/packages/context/src/ingest/local-embedding-provider.integration.test.ts b/packages/context/src/ingest/local-embedding-provider.integration.test.ts new file mode 100644 index 00000000..f1e35e93 --- /dev/null +++ b/packages/context/src/ingest/local-embedding-provider.integration.test.ts @@ -0,0 +1,162 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { createLocalKloEmbeddingProviderFromConfig, KloIngestEmbeddingPortAdapter } from '../llm/index.js'; +import { CandidateDedupService } from './context-candidates/candidate-dedup.service.js'; +import { ContextEvidenceIndexService } from './context-evidence/context-evidence-index.service.js'; +import { SqliteContextEvidenceStore } from './context-evidence/sqlite-context-evidence-store.js'; +import type { DiffSet } from './types.js'; + +describe('local ingest embedding providers with SQLite ingest stores', () => { + let tempDir: string; + let dbPath: string; + let stagedDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-ingest-embedding-')); + dbPath = join(tempDir, '.klo', 'db.sqlite'); + stagedDir = join(tempDir, 'staged'); + await mkdir(join(stagedDir, 'pages', 'revenue'), { recursive: true }); + await writeFile( + join(stagedDir, 'pages', 'revenue', 'metadata.json'), + `${JSON.stringify({ + objectType: 'page', + id: 'page-revenue', + title: 'Revenue Policy', + path: 'Revenue Policy', + url: 'https://notion.test/revenue', + parentId: null, + lastEditedAt: '2026-04-30T12:00:00.000Z', + properties: {}, + })}\n`, + 'utf8', + ); + await writeFile( + join(stagedDir, 'pages', 'revenue', 'page.md'), + ['# Approval', '', 'Owner approval is required before enterprise discounts are granted.', ''].join('\n'), + 'utf8', + ); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + function embeddings() { + const provider = createLocalKloEmbeddingProviderFromConfig({ + backend: 'deterministic', + dimensions: 8, + batchSize: 4, + }); + if (!provider) { + throw new Error('deterministic local embedding provider was not created'); + } + return new KloIngestEmbeddingPortAdapter(provider); + } + + it('indexes and searches context evidence using a package-owned local embedding provider', async () => { + const store = new SqliteContextEvidenceStore({ dbPath }); + const embeddingPort = embeddings(); + const indexer = new ContextEvidenceIndexService({ store, embeddings: embeddingPort }); + const diffSet: DiffSet = { + added: ['pages/revenue/metadata.json', 'pages/revenue/page.md'], + modified: [], + deleted: [], + unchanged: [], + }; + + const summary = await indexer.indexStagedDir({ + stagedDir, + runId: 'run-1', + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-1', + diffSet, + currentHashes: new Map([ + ['pages/revenue/metadata.json', 'metadata-hash'], + ['pages/revenue/page.md', 'page-hash'], + ]), + }); + + expect(summary).toMatchObject({ + documentsIndexed: 1, + embeddingFailures: 0, + }); + expect(summary.chunksIndexed).toBeGreaterThan(0); + + const queryText = [ + 'Revenue Policy', + 'Revenue Policy', + 'Approval', + 'Owner approval is required before enterprise discounts are granted.', + ].join('\n'); + const queryEmbedding = await embeddingPort.computeEmbedding(queryText); + const results = await store.searchRRF({ + connectionId: 'docs', + sourceKey: 'notion', + queryEmbedding, + queryText, + limit: 5, + includeDeleted: false, + currentRunId: 'run-1', + }); + + expect(results[0]?.title).toBe('Revenue Policy'); + expect(results[0]?.stableCitationKey).toContain('notion:page-revenue'); + expect(results[0]).toMatchObject({ + matchReasons: expect.arrayContaining(['semantic']), + lanes: expect.arrayContaining([ + expect.objectContaining({ lane: 'semantic', status: 'available' }), + expect.objectContaining({ lane: 'lexical', status: 'available' }), + expect.objectContaining({ lane: 'token', status: 'available' }), + ]), + }); + }); + + it('deduplicates candidates using package-owned local embeddings and SQLite persistence', async () => { + const store = new SqliteContextEvidenceStore({ dbPath }); + const embeddingPort = embeddings(); + const candidateBase = { + runId: 'run-1', + connectionId: 'docs', + sourceKey: 'notion', + topic: 'Enterprise discount approval', + assertion: 'Owner approval is required before enterprise discounts are granted.', + rationale: 'The source policy states that approval is required.', + evidenceChunkIds: [], + evidenceRefs: [], + suggestedPageKey: 'revenue-policy', + actionHint: 'create' as const, + durabilityScore: 3, + authorityScore: 3, + reuseScore: 3, + noveltyScore: 2, + riskScore: 0, + promotionScore: 11, + status: 'pending' as const, + rejectionReason: null, + lane: 'full' as const, + embedding: null, + }; + + await store.insertCandidate({ ...candidateBase, candidateKey: 'discount-policy-a' }); + await store.insertCandidate({ ...candidateBase, candidateKey: 'discount-policy-b' }); + + const result = await new CandidateDedupService({ + store, + embeddings: embeddingPort, + settings: { + enabled: true, + topicSimilarityThreshold: -1, + scoreAggregation: 'max', + }, + }).deduplicateRun('run-1'); + + expect(result.enabled).toBe(true); + expect(result.embeddingFailures).toBe(0); + expect(result.candidatesIn).toBe(2); + expect(result.clustersOut).toBe(1); + expect(result.mergedCount).toBe(1); + }); +}); diff --git a/packages/context/src/ingest/local-ingest.ts b/packages/context/src/ingest/local-ingest.ts new file mode 100644 index 00000000..ca7f4ace --- /dev/null +++ b/packages/context/src/ingest/local-ingest.ts @@ -0,0 +1,459 @@ +import { randomUUID } from 'node:crypto'; +import { cp, mkdir, rm } from 'node:fs/promises'; +import { isAbsolute, resolve } from 'node:path'; +import type { KloLlmProvider } from '@klo/llm'; +import type { AgentRunnerService } from '../agent/index.js'; +import type { KloLogger } from '../core/index.js'; +import type { KloSemanticLayerComputePort } from '../daemon/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { kloLocalStateDbPath } from '../project/index.js'; +import type { KloQueryResult } from '../sl/index.js'; +import { planMetabaseFanoutChildren } from './adapters/metabase/fanout-planner.js'; +import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; +import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } from './local-adapters.js'; +import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; +import { seedLocalMappingStateFromKloYaml } from './local-mapping-reconcile.js'; +import type { MemoryFlowEventSink } from './memory-flow/types.js'; +import { buildSyncId } from './raw-sources-paths.js'; +import type { IngestReportBody, IngestReportSnapshot } from './reports.js'; +import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js'; +import type { IngestBundleResult, IngestJobContext, IngestJobPhase, IngestTrigger, SourceAdapter } from './types.js'; + +export interface RunLocalIngestOptions { + project: KloLocalProject; + adapters: SourceAdapter[]; + adapter: string; + connectionId: string; + sourceDir?: string; + pullConfigOptions?: DefaultLocalIngestAdaptersOptions; + trigger?: IngestTrigger; + jobId?: string; + memoryFlow?: MemoryFlowEventSink; + agentRunner?: AgentRunnerService; + llmProvider?: KloLlmProvider; + llmDebugRequestFile?: string; + memoryModel?: string; + semanticLayerCompute?: KloSemanticLayerComputePort; + queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; + logger?: KloLogger; +} + +export interface LocalIngestMcpOptions + extends Pick< + RunLocalIngestOptions, + 'agentRunner' | 'llmProvider' | 'memoryModel' | 'semanticLayerCompute' | 'queryExecutor' | 'logger' +> { + adapters?: SourceAdapter[]; + jobIdFactory?: () => string; + runLocalMetabaseIngest?: (options: RunLocalMetabaseIngestOptions) => Promise; +} + +export interface LocalIngestResult { + result: IngestBundleResult; + report: IngestReportSnapshot; +} + +export interface LocalMetabaseFanoutChild { + jobId: string; + metabaseConnectionId: string; + metabaseDatabaseId: number; + targetConnectionId: string; + result: IngestBundleResult; + report: IngestReportSnapshot; +} + +export interface LocalMetabaseFanoutResult { + metabaseConnectionId: string; + children: LocalMetabaseFanoutChild[]; + status: 'all_succeeded' | 'partial_failure' | 'all_failed'; + totals?: { workUnits: number; failedWorkUnits: number }; +} + +export interface LocalMetabaseFanoutProgressChild { + metabaseDatabaseId: number; + targetConnectionId: string; +} + +export interface LocalMetabaseFanoutProgress { + onMetabaseFanoutPlanned?(event: { + metabaseConnectionId: string; + children: LocalMetabaseFanoutProgressChild[]; + }): void; + onMetabaseChildStarted?(event: { + metabaseConnectionId: string; + metabaseDatabaseId: number; + targetConnectionId: string; + jobId: string; + }): void; + onMetabaseChildCompleted?(event: { + metabaseConnectionId: string; + metabaseDatabaseId: number; + targetConnectionId: string; + jobId: string; + status: 'done' | 'failed'; + }): void; +} + +export interface RunLocalMetabaseIngestOptions + extends Omit { + metabaseConnectionId: string; + jobIdFactory?: () => string; + progress?: LocalMetabaseFanoutProgress; +} + +class LocalIngestPhase implements IngestJobPhase { + async updateProgress(): Promise {} + + startPhase(): IngestJobPhase { + return new LocalIngestPhase(); + } +} + +function safeSegment(kind: string, value: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(value)) { + throw new Error(`Unsafe ${kind}: ${value}`); + } + return value; +} + +function assertConfigured(project: KloLocalProject, adapter: string, connectionId: string): void { + if (!project.config.connections[connectionId]) { + throw new Error(`Connection "${connectionId}" is not configured in klo.yaml`); + } + if (!project.config.ingest.adapters.includes(adapter)) { + throw new Error(`Adapter "${adapter}" is not enabled in klo.yaml`); + } +} + +function findAdapter(adapters: SourceAdapter[], source: string): SourceAdapter { + const adapter = adapters.find((candidate) => candidate.source === source); + if (!adapter) { + throw new Error(`Adapter "${source}" is not available for local ingest`); + } + return adapter; +} + +function localJobContext(jobId: string, memoryFlow?: MemoryFlowEventSink): IngestJobContext { + return { + jobId, + ...(memoryFlow ? { memoryFlow } : {}), + startPhase() { + return new LocalIngestPhase(); + }, + }; +} + +async function copySourceDirToUpload(sourceDir: string, uploadDir: string): Promise { + if (!isAbsolute(sourceDir)) { + throw new Error('sourceDir must be an absolute path'); + } + await rm(uploadDir, { recursive: true, force: true }); + await mkdir(uploadDir, { recursive: true }); + await cp(resolve(sourceDir), uploadDir, { recursive: true }); +} + +async function runScheduledPullJob(options: { + project: KloLocalProject; + adapters: SourceAdapter[]; + adapter: SourceAdapter; + connectionId: string; + pullConfig: unknown; + trigger?: IngestTrigger; + jobId?: string; + memoryFlow?: MemoryFlowEventSink; + agentRunner?: AgentRunnerService; + llmProvider?: KloLlmProvider; + memoryModel?: string; + semanticLayerCompute?: KloSemanticLayerComputePort; + queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; + logger?: KloLogger; +}): Promise { + const runtime = createLocalBundleIngestRuntime(options); + const jobId = options.jobId ?? runtime.nextJobId(); + const result = await runtime.runner.run( + { + jobId, + connectionId: options.connectionId, + sourceKey: options.adapter.source, + trigger: options.trigger ?? 'manual_resync', + bundleRef: { kind: 'scheduled_pull', config: options.pullConfig }, + }, + localJobContext(jobId, options.memoryFlow), + ); + const report = await runtime.store.findByJobId(jobId); + if (!report) { + throw new Error(`Local ingest report for job "${jobId}" was not created`); + } + return { result, report }; +} + +export async function runLocalIngest(options: RunLocalIngestOptions): Promise { + const adapterName = safeSegment('adapter', options.adapter); + const connectionId = safeSegment('connection id', options.connectionId); + assertConfigured(options.project, adapterName, connectionId); + const adapter = findAdapter(options.adapters, adapterName); + const pullConfig = options.sourceDir + ? undefined + : await localPullConfigForAdapter(options.project, adapter, connectionId, options.pullConfigOptions); + const runtime = createLocalBundleIngestRuntime(options); + const jobId = options.jobId ?? runtime.nextJobId(); + + const bundleRef = options.sourceDir + ? { kind: 'upload' as const, uploadId: jobId } + : { kind: 'scheduled_pull' as const, config: pullConfig }; + + if (options.sourceDir) { + await copySourceDirToUpload(options.sourceDir, runtime.storage.resolveUploadDir(jobId)); + } else { + return runScheduledPullJob({ + project: options.project, + adapters: options.adapters, + adapter, + connectionId, + pullConfig, + trigger: options.trigger, + jobId, + memoryFlow: options.memoryFlow, + agentRunner: options.agentRunner, + llmProvider: options.llmProvider, + memoryModel: options.memoryModel, + semanticLayerCompute: options.semanticLayerCompute, + queryExecutor: options.queryExecutor, + logger: options.logger, + }); + } + + const result = await runtime.runner.run( + { + jobId, + connectionId, + sourceKey: adapter.source, + trigger: options.trigger ?? (options.sourceDir ? 'upload' : 'manual_resync'), + bundleRef, + }, + localJobContext(jobId, options.memoryFlow), + ); + const report = await runtime.store.findByJobId(jobId); + if (!report) { + throw new Error(`Local ingest report for job "${jobId}" was not created`); + } + return { result, report }; +} + +function metabaseFanoutStatus(children: LocalMetabaseFanoutChild[]): LocalMetabaseFanoutResult['status'] { + const succeeded = children.filter((child) => child.report.body.failedWorkUnits.length === 0).length; + if (succeeded === children.length) { + return 'all_succeeded'; + } + if (succeeded === 0) { + return 'all_failed'; + } + return 'partial_failure'; +} + +function metabaseFanoutTotals(children: LocalMetabaseFanoutChild[]): LocalMetabaseFanoutResult['totals'] { + return { + workUnits: children.reduce((sum, child) => sum + child.report.body.workUnits.length, 0), + failedWorkUnits: children.reduce((sum, child) => sum + child.report.body.failedWorkUnits.length, 0), + }; +} + +const METABASE_FETCH_FAILURE_UNIT = 'metabase-fetch'; + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function metabaseChildJobId(metabaseDatabaseId: number): string { + return `local-metabase-${metabaseDatabaseId}-${randomUUID()}`; +} + +async function recordLocalMetabaseChildFailure(options: { + project: KloLocalProject; + jobId: string; + targetConnectionId: string; + metabaseDatabaseId: number; + trigger?: IngestTrigger; + error: unknown; +}): Promise { + const store = new SqliteBundleIngestStore({ dbPath: kloLocalStateDbPath(options.project) }); + const syncId = buildSyncId(new Date(), options.jobId); + const diffSummary = { added: 0, modified: 0, deleted: 0, unchanged: 0 }; + const reason = errorMessage(options.error); + const run = await store.create({ + jobId: options.jobId, + connectionId: options.targetConnectionId, + sourceKey: 'metabase', + syncId, + trigger: options.trigger ?? 'manual_resync', + scopeFingerprint: null, + }); + await store.markFailed(run.id); + + const body: IngestReportBody = { + syncId, + diffSummary, + commitSha: null, + workUnits: [ + { + unitKey: METABASE_FETCH_FAILURE_UNIT, + rawFiles: [], + status: 'failed', + reason, + actions: [], + touchedSlSources: [], + }, + ], + failedWorkUnits: [METABASE_FETCH_FAILURE_UNIT], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + artifactResolutions: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + postProcessor: { + sourceKey: 'metabase', + status: 'failed', + errors: [reason], + warnings: [], + touchedSources: [], + }, + }; + + const report = await store.create({ + runId: run.id, + jobId: options.jobId, + connectionId: options.targetConnectionId, + sourceKey: 'metabase', + body, + }); + + return { + result: { + jobId: options.jobId, + runId: run.id, + syncId, + diffSummary, + workUnitCount: 1, + failedWorkUnits: [METABASE_FETCH_FAILURE_UNIT], + artifactsWritten: 0, + commitSha: null, + }, + report, + }; +} + +export async function runLocalMetabaseIngest( + options: RunLocalMetabaseIngestOptions, +): Promise { + if ((options as RunLocalMetabaseIngestOptions & { sourceDir?: string }).sourceDir) { + throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter'); + } + + const metabaseConnectionId = safeSegment('metabase connection id', options.metabaseConnectionId); + assertConfigured(options.project, 'metabase', metabaseConnectionId); + await seedLocalMappingStateFromKloYaml(options.project, metabaseConnectionId); + const adapter = findAdapter(options.adapters, 'metabase'); + const sourceStateReader = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(options.project) }); + + const unhydrated = await sourceStateReader.getUnhydratedSyncEnabledMappingIds(metabaseConnectionId); + if (unhydrated.length > 0) { + throw new Error( + `Metabase mappings ${unhydrated.join(', ')} are not hydrated; run \`klo connection mapping refresh ${metabaseConnectionId}\` before local Metabase ingest.`, + ); + } + + const state = await sourceStateReader.getSourceState(metabaseConnectionId); + const childPlans = planMetabaseFanoutChildren({ + metabaseConnectionId, + mappings: state.mappings, + }); + options.progress?.onMetabaseFanoutPlanned?.({ + metabaseConnectionId, + children: childPlans.map((childPlan) => ({ + metabaseDatabaseId: childPlan.metabaseDatabaseId, + targetConnectionId: childPlan.targetConnectionId, + })), + }); + + const children: LocalMetabaseFanoutChild[] = []; + for (const childPlan of childPlans) { + const targetConnectionId = safeSegment('target connection id', childPlan.targetConnectionId); + if (!options.project.config.connections[targetConnectionId]) { + throw new Error(`Target connection "${targetConnectionId}" is not configured in klo.yaml`); + } + const childJobId = options.jobIdFactory?.() ?? metabaseChildJobId(childPlan.metabaseDatabaseId); + options.progress?.onMetabaseChildStarted?.({ + metabaseConnectionId, + metabaseDatabaseId: childPlan.metabaseDatabaseId, + targetConnectionId, + jobId: childJobId, + }); + let child: LocalIngestResult; + try { + child = await runScheduledPullJob({ + project: options.project, + adapters: options.adapters, + adapter, + connectionId: targetConnectionId, + pullConfig: childPlan.pullConfig, + trigger: options.trigger, + jobId: childJobId, + memoryFlow: options.memoryFlow, + agentRunner: options.agentRunner, + llmProvider: options.llmProvider, + memoryModel: options.memoryModel, + semanticLayerCompute: options.semanticLayerCompute, + queryExecutor: options.queryExecutor, + logger: options.logger, + }); + } catch (error) { + child = await recordLocalMetabaseChildFailure({ + project: options.project, + jobId: childJobId, + targetConnectionId, + metabaseDatabaseId: childPlan.metabaseDatabaseId, + trigger: options.trigger, + error, + }); + } + options.progress?.onMetabaseChildCompleted?.({ + metabaseConnectionId, + metabaseDatabaseId: childPlan.metabaseDatabaseId, + targetConnectionId, + jobId: child.report.jobId, + status: child.report.body.failedWorkUnits.length > 0 ? 'failed' : 'done', + }); + children.push({ + jobId: child.report.jobId, + metabaseConnectionId, + metabaseDatabaseId: childPlan.metabaseDatabaseId, + targetConnectionId, + result: child.result, + report: child.report, + }); + } + + return { + metabaseConnectionId, + children, + status: metabaseFanoutStatus(children), + totals: metabaseFanoutTotals(children), + }; +} + +export async function getLocalIngestStatus( + project: KloLocalProject, + id: string, +): Promise { + return new SqliteBundleIngestStore({ dbPath: kloLocalStateDbPath(project) }).findReportByAnyId(id); +} + +export async function getLatestLocalIngestStatus(project: KloLocalProject): Promise { + return new SqliteBundleIngestStore({ dbPath: kloLocalStateDbPath(project) }).findLatestReport(); +} diff --git a/packages/context/src/ingest/local-mapping-reconcile.test.ts b/packages/context/src/ingest/local-mapping-reconcile.test.ts new file mode 100644 index 00000000..4880fab1 --- /dev/null +++ b/packages/context/src/ingest/local-mapping-reconcile.test.ts @@ -0,0 +1,79 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { kloLocalStateDbPath, type KloLocalProject } from '../project/index.js'; +import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; +import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; +import { seedLocalMappingStateFromKloYaml } from './local-mapping-reconcile.js'; + +describe('local mapping yaml reconciliation bridge', () => { + let tempDir: string; + + afterEach(async () => { + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + function projectWithConnections(connections: KloLocalProject['config']['connections']): KloLocalProject { + return { + projectDir: tempDir, + config: { connections }, + } as KloLocalProject; + } + + it('seeds Metabase local state from klo.yaml mapping intent', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-metabase-yaml-seed-')); + const project = projectWithConnections({ + 'prod-metabase': { + driver: 'metabase', + mappings: { + databaseMappings: { '1': 'prod-warehouse' }, + syncEnabled: { '1': true }, + syncMode: 'ONLY', + selections: { collections: [12] }, + defaultTagNames: ['klo'], + }, + }, + 'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' }, + }); + + await seedLocalMappingStateFromKloYaml(project, 'prod-metabase'); + + const store = new LocalMetabaseSourceStateReader({ dbPath: kloLocalStateDbPath(project) }); + await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ + { metabaseDatabaseId: 1, targetConnectionId: 'prod-warehouse', syncEnabled: true, source: 'klo.yaml' }, + ]); + await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ + syncMode: 'ONLY', + selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], + defaultTagNames: ['klo'], + }); + }); + + it('seeds Looker local mappings from klo.yaml mapping intent', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-looker-yaml-seed-')); + const project = projectWithConnections({ + 'prod-looker': { + driver: 'looker', + mappings: { connectionMappings: { analytics: 'prod-warehouse' } }, + }, + 'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' }, + }); + + await seedLocalMappingStateFromKloYaml(project, 'prod-looker'); + + const store = new LocalLookerRuntimeStore({ dbPath: kloLocalStateDbPath(project) }); + await expect(store.listConnectionMappings('prod-looker')).resolves.toMatchObject([ + { lookerConnectionName: 'analytics', kloConnectionId: 'prod-warehouse', source: 'klo.yaml' }, + ]); + }); + + it('does nothing for connections without mapping bootstrap intent', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-no-yaml-seed-')); + const project = projectWithConnections({ warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' } }); + + await expect(seedLocalMappingStateFromKloYaml(project, 'warehouse')).resolves.toBeUndefined(); + }); +}); diff --git a/packages/context/src/ingest/local-mapping-reconcile.ts b/packages/context/src/ingest/local-mapping-reconcile.ts new file mode 100644 index 00000000..58b81e15 --- /dev/null +++ b/packages/context/src/ingest/local-mapping-reconcile.ts @@ -0,0 +1,65 @@ +import { + kloLocalStateDbPath, + parseConnectionMappingBootstrap, + type KloLocalProject, + type LookerMappingBootstrap, + type MetabaseMappingBootstrap, +} from '../project/index.js'; +import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; +import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; + +function metabaseSelections(bootstrap: MetabaseMappingBootstrap) { + return [ + ...bootstrap.selections.collections.map((id) => ({ selectionType: 'collection' as const, metabaseObjectId: id })), + ...bootstrap.selections.items.map((id) => ({ selectionType: 'item' as const, metabaseObjectId: id })), + ]; +} + +function metabaseMappings(bootstrap: MetabaseMappingBootstrap) { + const ids = new Set([...Object.keys(bootstrap.databaseMappings), ...Object.keys(bootstrap.syncEnabled)]); + return [...ids] + .map((id) => Number(id)) + .sort((a, b) => a - b) + .map((id) => ({ + metabaseDatabaseId: id, + targetConnectionId: bootstrap.databaseMappings[String(id)] ?? null, + syncEnabled: bootstrap.syncEnabled[String(id)] ?? false, + })); +} + +function lookerMappings(bootstrap: LookerMappingBootstrap) { + return Object.entries(bootstrap.connectionMappings) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([lookerConnectionName, kloConnectionId]) => ({ lookerConnectionName, kloConnectionId })); +} + +export async function seedLocalMappingStateFromKloYaml(project: KloLocalProject, connectionId: string): Promise { + const connection = project.config.connections[connectionId]; + if (!connection) { + return; + } + + const bootstrap = parseConnectionMappingBootstrap(connectionId, connection); + if (!bootstrap) { + return; + } + + const dbPath = kloLocalStateDbPath(project); + if (bootstrap.adapter === 'metabase') { + await new LocalMetabaseSourceStateReader({ dbPath }).applyYamlBootstrap({ + connectionId, + syncMode: bootstrap.syncMode, + defaultTagNames: bootstrap.defaultTagNames, + selections: metabaseSelections(bootstrap), + mappings: metabaseMappings(bootstrap), + }); + return; + } + + if (bootstrap.adapter === 'looker') { + await new LocalLookerRuntimeStore({ dbPath }).applyYamlBootstrap({ + lookerConnectionId: connectionId, + mappings: lookerMappings(bootstrap), + }); + } +} diff --git a/packages/context/src/ingest/local-metabase-ingest.test.ts b/packages/context/src/ingest/local-metabase-ingest.test.ts new file mode 100644 index 00000000..86dcb7fa --- /dev/null +++ b/packages/context/src/ingest/local-metabase-ingest.test.ts @@ -0,0 +1,328 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { AgentRunnerService } from '../agent/index.js'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; +import { getLocalIngestStatus, runLocalMetabaseIngest } from './local-ingest.js'; +import type { ChunkResult, FetchContext, SourceAdapter } from './types.js'; + +class TestAgentRunner extends AgentRunnerService { + override runLoop = vi.fn(async (params: Parameters[0]) => { + if (params.userPrompt.includes('metabase-db-2')) { + return { stopReason: 'error' as const, error: new Error('database 2 failed') }; + } + return { stopReason: 'natural' as const }; + }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +class FakeMetabaseSourceAdapter implements SourceAdapter { + readonly source = 'metabase'; + readonly skillNames: string[] = []; + + detect(): Promise { + return Promise.resolve(true); + } + + async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number }; + await mkdir(join(stagedDir, 'cards'), { recursive: true }); + await mkdir(join(stagedDir, 'databases'), { recursive: true }); + await writeFile( + join(stagedDir, 'cards', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ connectionId: ctx.connectionId, databaseId: config.metabaseDatabaseId }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'databases', `${config.metabaseDatabaseId}.json`), + JSON.stringify({ metabaseConnectionId: config.metabaseConnectionId }), + 'utf-8', + ); + } + + async chunk(stagedDir: string): Promise { + const databaseId = Number(stagedDir.match(/metabase-child-(\d+)/)?.[1] ?? 1); + return { + workUnits: [ + { + unitKey: `metabase-db-${databaseId}`, + rawFiles: [`cards/${databaseId}.json`], + peerFileIndex: [], + dependencyPaths: [`databases/${databaseId}.json`], + }, + ], + }; + } +} + +class ThrowingFetchMetabaseSourceAdapter extends FakeMetabaseSourceAdapter { + override async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise { + const config = pullConfig as { metabaseConnectionId: string; metabaseDatabaseId: number }; + if (config.metabaseDatabaseId === 2) { + throw new Error('Metabase fetch failed for database 2'); + } + await super.fetch(pullConfig, stagedDir, ctx); + } +} + +describe('runLocalMetabaseIngest', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-metabase-fanout-')); + project = await initKloProject({ projectDir: tempDir, force: true }); + project.config.connections = { + 'prod-metabase': { + driver: 'metabase', + api_url: 'https://metabase.example.com', + api_key: 'literal-test-key', // pragma: allowlist secret + }, + warehouse_a: { driver: 'postgres', url: 'postgres://localhost/a' }, + warehouse_b: { driver: 'postgres', url: 'postgres://localhost/b' }, + }; + project.config.ingest.adapters = ['metabase']; + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + async function seedMetabaseState(): Promise { + const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.klo', 'db.sqlite') }); + await store.replaceSourceState({ + connectionId: 'prod-metabase', + syncMode: 'ALL', + defaultTagNames: ['klo'], + selections: [], + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Warehouse A', + metabaseEngine: 'postgres', + metabaseHost: 'localhost', + metabaseDbName: 'a', + targetConnectionId: 'warehouse_a', + syncEnabled: true, + source: 'refresh', + }, + { + metabaseDatabaseId: 2, + metabaseDatabaseName: 'Warehouse B', + metabaseEngine: 'postgres', + metabaseHost: 'localhost', + metabaseDbName: 'b', + targetConnectionId: 'warehouse_b', + syncEnabled: true, + source: 'refresh', + }, + ], + }); + } + + it('runs one child job per sync-enabled Metabase mapping', async () => { + await seedMetabaseState(); + const agentRunner = new TestAgentRunner(); + const ids = ['metabase-child-1', 'metabase-child-3']; + + const result = await runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner, + jobIdFactory: () => ids.shift() ?? 'metabase-child-extra', + }); + + expect(result.metabaseConnectionId).toBe('prod-metabase'); + expect(result.status).toBe('all_succeeded'); + expect(result.children.map((child) => child.targetConnectionId)).toEqual(['warehouse_a', 'warehouse_b']); + expect(result.children.map((child) => child.metabaseDatabaseId)).toEqual([1, 2]); + expect(new Set(result.children.map((child) => child.jobId)).size).toBe(2); + await expect(getLocalIngestStatus(project, result.children[0].jobId)).resolves.toMatchObject({ + jobId: result.children[0].jobId, + connectionId: 'warehouse_a', + sourceKey: 'metabase', + }); + }); + + it('throws before runner work when there are no sync-enabled mapped rows', async () => { + const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.klo', 'db.sqlite') }); + await store.replaceSourceState({ + connectionId: 'prod-metabase', + mappings: [ + { + metabaseDatabaseId: 1, + metabaseDatabaseName: 'Warehouse A', + metabaseEngine: 'postgres', + metabaseHost: null, + metabaseDbName: null, + targetConnectionId: null, + syncEnabled: true, + source: 'refresh', + }, + ], + }); + + await expect( + runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner: new TestAgentRunner(), + }), + ).rejects.toThrow('no sync-enabled mappings with a target connection'); + }); + + it('throws with refresh guidance for unhydrated sync-enabled rows', async () => { + const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.klo', 'db.sqlite') }); + await store.replaceSourceState({ + connectionId: 'prod-metabase', + mappings: [ + { + metabaseDatabaseId: 7, + metabaseDatabaseName: null, + metabaseEngine: null, + metabaseHost: null, + metabaseDbName: null, + targetConnectionId: 'warehouse_a', + syncEnabled: true, + source: 'klo.yaml', + }, + ], + }); + + await expect( + runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner: new TestAgentRunner(), + }), + ).rejects.toThrow('run `klo connection mapping refresh prod-metabase`'); + }); + + it('seeds yaml-only Metabase mappings before the unhydrated fan-out preflight', async () => { + const project = { + projectDir: tempDir, + config: { + ingest: { adapters: ['metabase'] }, + connections: { + 'prod-metabase': { + driver: 'metabase', + mappings: { + databaseMappings: { '1': 'prod-warehouse' }, + syncEnabled: { '1': true }, + }, + }, + 'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' }, + }, + }, + } as never; + + await expect( + runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + }), + ).rejects.toThrow('run `klo connection mapping refresh prod-metabase`'); + }); + + it('rejects source-dir uploads through the Metabase fan-out runner', async () => { + await expect( + runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner: new TestAgentRunner(), + sourceDir: tempDir, + } as Parameters[0] & { sourceDir: string }), + ).rejects.toThrow('source-dir uploads are not supported for the Metabase fan-out adapter'); + }); + + it('reports partial failure when a child job fails', async () => { + await seedMetabaseState(); + const agentRunner = new TestAgentRunner(); + const ids = ['metabase-child-1', 'metabase-child-2']; + + const result = await runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner, + jobIdFactory: () => ids.shift() ?? 'metabase-child-extra', + }); + + expect(result.status).toBe('partial_failure'); + expect(result.totals).toEqual({ workUnits: 2, failedWorkUnits: 1 }); + expect(result.children[1]?.report.body.failedWorkUnits).toEqual(['metabase-db-2']); + }); + + it('captures fetch-time child failures and continues later mappings', async () => { + await seedMetabaseState(); + project.config.connections.warehouse_c = { driver: 'postgres', url: 'postgres://localhost/c' }; + const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.klo', 'db.sqlite') }); + await store.upsertDatabaseMapping({ + connectionId: 'prod-metabase', + metabaseDatabaseId: 3, + targetConnectionId: 'warehouse_c', + syncEnabled: true, + source: 'cli', + }); + await store.refreshDiscoveredDatabases({ + connectionId: 'prod-metabase', + discovered: [ + { id: 1, name: 'Warehouse A', engine: 'postgres', host: 'localhost', dbName: 'a' }, + { id: 2, name: 'Warehouse B', engine: 'postgres', host: 'localhost', dbName: 'b' }, + { id: 3, name: 'Warehouse C', engine: 'postgres', host: 'localhost', dbName: 'c' }, + ], + }); + + const ids = ['metabase-child-1', 'metabase-child-2', 'metabase-child-3']; + const result = await runLocalMetabaseIngest({ + project, + adapters: [new ThrowingFetchMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner: new TestAgentRunner(), + jobIdFactory: () => ids.shift() ?? 'metabase-child-extra', + }); + + expect(result.status).toBe('partial_failure'); + expect(result.children.map((child) => child.jobId)).toEqual([ + 'metabase-child-1', + 'metabase-child-2', + 'metabase-child-3', + ]); + expect(result.children.map((child) => child.metabaseDatabaseId)).toEqual([1, 2, 3]); + expect(result.children.map((child) => child.targetConnectionId)).toEqual(['warehouse_a', 'warehouse_b', 'warehouse_c']); + expect(result.totals).toEqual({ workUnits: 3, failedWorkUnits: 1 }); + + const failed = result.children[1]; + expect(failed.result).toMatchObject({ + jobId: 'metabase-child-2', + failedWorkUnits: ['metabase-fetch'], + artifactsWritten: 0, + commitSha: null, + }); + expect(failed.report.body.workUnits).toMatchObject([ + { + unitKey: 'metabase-fetch', + status: 'failed', + reason: 'Metabase fetch failed for database 2', + }, + ]); + await expect(getLocalIngestStatus(project, failed.jobId)).resolves.toMatchObject({ + jobId: 'metabase-child-2', + connectionId: 'warehouse_b', + sourceKey: 'metabase', + body: { + failedWorkUnits: ['metabase-fetch'], + }, + }); + }); +}); diff --git a/packages/context/src/ingest/local-stage-ingest.test.ts b/packages/context/src/ingest/local-stage-ingest.test.ts new file mode 100644 index 00000000..77615558 --- /dev/null +++ b/packages/context/src/ingest/local-stage-ingest.test.ts @@ -0,0 +1,706 @@ +import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { initKloProject, type KloLocalProject, loadKloProject } from '../project/index.js'; +import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js'; +import { createDefaultLocalIngestAdapters } from './local-adapters.js'; +import { + getLocalStageOnlyIngestStatus, + runLocalStageOnlyIngest, +} from './local-stage-ingest.js'; +import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js'; +import type { MemoryFlowReplayInput } from './memory-flow/types.js'; +import type { SourceAdapter } from './types.js'; + +async function writeWarehouseConfig(projectDir: string): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + 'ingest:', + ' adapters:', + ' - fake', + '', + ].join('\n'), + 'utf-8', + ); +} + +async function writeLiveDatabaseConfig(projectDir: string): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: postgres://localhost:5432/warehouse', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); +} + +function fetchOnlyAdapter(): SourceAdapter { + return { + source: 'live-database', + skillNames: ['live_database_ingest'], + async fetch(_pullConfig, stagedDir) { + await mkdir(join(stagedDir, 'tables'), { recursive: true }); + await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8'); + await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + await writeFile( + join(stagedDir, 'tables', 'orders.json'), + '{"name":"orders","db":"public","columns":[{"name":"id","type":"integer","nullable":false,"primaryKey":true}]}\n', + 'utf-8', + ); + }, + async detect(stagedDir) { + await readFile(join(stagedDir, 'connection.json'), 'utf-8'); + return true; + }, + async chunk() { + return { + workUnits: [ + { + unitKey: 'live-database-public-orders', + rawFiles: ['tables/orders.json'], + dependencyPaths: ['connection.json', 'foreign-keys.json'], + peerFileIndex: [], + }, + ], + }; + }, + }; +} + +describe('local ingest', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-ingest-')); + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + project = await loadKloProject({ projectDir }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('stages a source directory, chunks it, records status, and commits raw files', async () => { + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const result = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-job-1', + now: () => new Date('2026-04-27T12:00:00.000Z'), + }); + + expect(result).toMatchObject({ + runId: 'local-job-1', + jobId: 'local-job-1', + status: 'done', + adapter: 'fake', + connectionId: 'warehouse', + progress: 1, + done: true, + previousRunId: null, + workUnitCount: 1, + rawFileCount: 1, + evictionDeletedRawPaths: [], + errors: [], + }); + expect(result.syncId).toBe('2026-04-27-120000-local-job-1'); + expect(result.diffSummary).toEqual({ added: 1, modified: 0, deleted: 0, unchanged: 0 }); + expect(result.diffPaths).toEqual({ + added: ['orders/orders.json'], + modified: [], + deleted: [], + unchanged: [], + }); + expect(result.workUnits).toEqual([ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + dependencyPaths: [], + peerFileIndex: [], + }, + ]); + + const rawPath = join( + project.projectDir, + 'raw-sources', + 'warehouse', + 'fake', + '2026-04-27-120000-local-job-1', + 'orders', + 'orders.json', + ); + await expect(readFile(rawPath, 'utf-8')).resolves.toBe('{"name":"orders"}\n'); + + const status = await getLocalStageOnlyIngestStatus(project, 'local-job-1'); + expect(status).toEqual(result); + + await expect(access(join(project.projectDir, '.klo', 'db.sqlite'))).resolves.toBeUndefined(); + await expect( + readFile(join(project.projectDir, '.klo', 'ingest-runs', 'local-job-1.json'), 'utf-8'), + ).rejects.toThrow(); + await expect( + readFile(join(project.projectDir, '.klo', 'ingest-reports', 'local-job-1.json'), 'utf-8'), + ).rejects.toThrow(); + }); + + it('emits memory-flow events while staging and planning a local ingest', async () => { + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const snapshots: MemoryFlowReplayInput[] = []; + const memoryFlow = createMemoryFlowLiveBuffer( + { + runId: 'local-flow-1', + connectionId: 'warehouse', + adapter: 'fake', + status: 'running', + sourceDir, + syncId: 'pending', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }, + { onChange: (snapshot) => snapshots.push(snapshot) }, + ); + + const result = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-flow-1', + now: () => new Date('2026-04-30T13:00:00.000Z'), + memoryFlow, + }); + + expect(result.status).toBe('done'); + expect(memoryFlow.snapshot()).toMatchObject({ + runId: 'local-flow-1', + status: 'done', + syncId: '2026-04-30-130000-local-flow-1', + plannedWorkUnits: [{ unitKey: 'fake-orders', rawFiles: ['orders/orders.json'] }], + }); + expect(memoryFlow.snapshot().events.map(({ emittedAt: _emittedAt, ...event }) => event)).toEqual([ + { type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }, + { type: 'scope_detected', fingerprint: null }, + { type: 'raw_snapshot_written', syncId: '2026-04-30-130000-local-flow-1', rawFileCount: 1 }, + { type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'report_created', runId: 'local-flow-1' }, + ]); + expect(snapshots.at(-1)?.status).toBe('done'); + }); + + it('marks the memory-flow buffer as error when local ingest fails', async () => { + const memoryFlow = createMemoryFlowLiveBuffer({ + runId: 'local-flow-error', + connectionId: 'warehouse', + adapter: 'fake', + status: 'running', + sourceDir: null, + syncId: 'pending', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }); + + await expect( + runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + jobId: 'local-flow-error', + now: () => new Date('2026-04-30T13:05:00.000Z'), + memoryFlow, + }), + ).rejects.toThrow('Local ingest adapter "fake" requires sourceDir because it does not implement fetch().'); + + expect(memoryFlow.snapshot()).toMatchObject({ + status: 'error', + errors: ['Local ingest adapter "fake" requires sourceDir because it does not implement fetch().'], + }); + }); + + it('returns null for missing local ingest status records', async () => { + await expect(getLocalStageOnlyIngestStatus(project, 'missing-run')).resolves.toBeNull(); + }); + + it('diffs local reruns against the latest completed report for the same connection and adapter', async () => { + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders","version":1}\n', 'utf-8'); + await writeFile(join(sourceDir, 'orders', 'customers.json'), '{"name":"customers","version":1}\n', 'utf-8'); + + const first = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-job-1', + now: () => new Date('2026-04-27T12:00:00.000Z'), + }); + + expect(first.previousRunId).toBeNull(); + expect(first.diffSummary).toEqual({ added: 2, modified: 0, deleted: 0, unchanged: 0 }); + expect(first.workUnitCount).toBe(1); + expect(first.evictionDeletedRawPaths).toEqual([]); + + const unchanged = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-job-2', + now: () => new Date('2026-04-27T12:05:00.000Z'), + }); + + expect(unchanged.previousRunId).toBe('local-job-1'); + expect(unchanged.syncId).toBe(first.syncId); + expect(unchanged.diffSummary).toEqual({ added: 0, modified: 0, deleted: 0, unchanged: 2 }); + expect(unchanged.workUnitCount).toBe(0); + expect(unchanged.workUnits).toEqual([]); + + const rawWriteSpy = vi.spyOn(project.fileStore, 'writeFile'); + const commitSpy = vi.spyOn(project.git, 'commitFiles'); + + const secondUnchanged = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-job-unchanged-2', + now: () => new Date('2026-04-27T12:06:00.000Z'), + }); + + expect(secondUnchanged.previousRunId).toBe('local-job-2'); + expect(secondUnchanged.syncId).toBe(first.syncId); + expect(secondUnchanged.diffSummary).toEqual({ added: 0, modified: 0, deleted: 0, unchanged: 2 }); + expect(rawWriteSpy).not.toHaveBeenCalled(); + expect(commitSpy).not.toHaveBeenCalled(); + + const unchangedFiles = await project.fileStore.listFiles('raw-sources/warehouse/fake'); + expect(unchangedFiles.files.every((file) => file.includes(first.syncId))).toBe(true); + + rawWriteSpy.mockRestore(); + commitSpy.mockRestore(); + + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders","version":2}\n', 'utf-8'); + await writeFile(join(sourceDir, 'orders', 'payments.json'), '{"name":"payments","version":1}\n', 'utf-8'); + await rm(join(sourceDir, 'orders', 'customers.json')); + + const changed = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-job-3', + now: () => new Date('2026-04-27T12:10:00.000Z'), + }); + + expect(changed.previousRunId).toBe('local-job-unchanged-2'); + expect(changed.diffSummary).toEqual({ added: 1, modified: 1, deleted: 1, unchanged: 0 }); + expect(changed.evictionDeletedRawPaths).toEqual(['orders/customers.json']); + expect(changed.workUnits).toEqual([ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json', 'orders/payments.json'], + dependencyPaths: [], + peerFileIndex: [], + }, + ]); + + const status = await getLocalStageOnlyIngestStatus(project, 'local-job-3'); + expect(status).toEqual(changed); + + await expect(access(join(project.projectDir, '.klo', 'db.sqlite'))).resolves.toBeUndefined(); + await expect( + readFile(join(project.projectDir, '.klo', 'ingest-runs', 'local-job-3.json'), 'utf-8'), + ).rejects.toThrow(); + await expect( + readFile(join(project.projectDir, '.klo', 'ingest-reports', 'local-job-3.json'), 'utf-8'), + ).rejects.toThrow(); + }); + + it('reuses the existing sync id when the same local run id is retried', async () => { + const sourceDir = join(tempDir, 'idempotent-source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders","version":1}\n', 'utf-8'); + + const first = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-idempotent-run', + now: () => new Date('2026-04-27T12:30:00.000Z'), + }); + + const retry = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-idempotent-run', + now: () => new Date('2026-04-27T13:30:00.000Z'), + }); + + expect(retry.runId).toBe(first.runId); + expect(retry.syncId).toBe(first.syncId); + expect(retry.previousRunId).toBeNull(); + expect(retry.diffSummary).toEqual(first.diffSummary); + + const status = await getLocalStageOnlyIngestStatus(project, 'local-idempotent-run'); + expect(status?.syncId).toBe(first.syncId); + + const files = await project.fileStore.listFiles('raw-sources/warehouse/fake'); + expect(files.files).toEqual(['raw-sources/warehouse/fake/2026-04-27-123000-local-idempotent-run/orders/orders.json']); + }); + + it('prunes stale raw files when retrying the same local run id with a smaller snapshot', async () => { + const sourceDir = join(tempDir, 'idempotent-prune-source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders","version":1}\n', 'utf-8'); + await writeFile(join(sourceDir, 'orders', 'customers.json'), '{"name":"customers","version":1}\n', 'utf-8'); + + const first = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-idempotent-prune', + now: () => new Date('2026-04-27T12:40:00.000Z'), + }); + + await rm(join(sourceDir, 'orders', 'customers.json')); + + const retry = await runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-idempotent-prune', + now: () => new Date('2026-04-27T13:40:00.000Z'), + }); + + expect(retry.syncId).toBe(first.syncId); + + const files = await project.fileStore.listFiles(`raw-sources/warehouse/fake/${first.syncId}`); + expect(files.files).toEqual([`raw-sources/warehouse/fake/${first.syncId}/orders/orders.json`]); + await expect( + readFile(join(project.projectDir, 'raw-sources/warehouse/fake', first.syncId, 'orders', 'customers.json'), 'utf-8'), + ).rejects.toThrow(); + }); + + it('runs fetch-capable adapters without a source directory', async () => { + await writeLiveDatabaseConfig(project.projectDir); + project = await loadKloProject({ projectDir: project.projectDir }); + + const result = await runLocalStageOnlyIngest({ + project, + adapters: [fetchOnlyAdapter()], + adapter: 'live-database', + connectionId: 'warehouse', + jobId: 'local-live-db-1', + now: () => new Date('2026-04-27T12:00:00.000Z'), + }); + + expect(result).toMatchObject({ + runId: 'local-live-db-1', + status: 'done', + adapter: 'live-database', + connectionId: 'warehouse', + sourceDir: null, + rawFileCount: 3, + workUnitCount: 1, + }); + expect(result.diffSummary).toEqual({ added: 3, modified: 0, deleted: 0, unchanged: 0 }); + + await expect( + readFile( + join( + project.projectDir, + 'raw-sources', + 'warehouse', + 'live-database', + '2026-04-27-120000-local-live-db-1', + 'tables', + 'orders.json', + ), + 'utf-8', + ), + ).resolves.toContain('"orders"'); + }); + + it('supports dry-run planning without writing raw files, status, or commits', async () => { + await writeLiveDatabaseConfig(project.projectDir); + project = await loadKloProject({ projectDir: project.projectDir }); + + const result = await runLocalStageOnlyIngest({ + project, + adapters: [fetchOnlyAdapter()], + adapter: 'live-database', + connectionId: 'warehouse', + jobId: 'local-live-db-dry-run-1', + now: () => new Date('2026-04-29T08:00:00.000Z'), + dryRun: true, + }); + + expect(result).toMatchObject({ + runId: 'local-live-db-dry-run-1', + status: 'done', + adapter: 'live-database', + connectionId: 'warehouse', + syncId: '2026-04-29-080000-local-live-db-dry-run-1', + rawFileCount: 3, + workUnitCount: 1, + diffPaths: { + added: ['connection.json', 'foreign-keys.json', expect.stringMatching(/^tables\//)], + modified: [], + deleted: [], + unchanged: [], + }, + }); + + await expect( + readFile( + join( + project.projectDir, + 'raw-sources', + 'warehouse', + 'live-database', + '2026-04-29-080000-local-live-db-dry-run-1', + 'connection.json', + ), + 'utf-8', + ), + ).rejects.toThrow(); + await expect(getLocalStageOnlyIngestStatus(project, 'local-live-db-dry-run-1')).resolves.toBeNull(); + }); + + it('uses daemon-backed live-database introspection in default local adapters', async () => { + await writeLiveDatabaseConfig(project.projectDir); + project = await loadKloProject({ projectDir: project.projectDir }); + const runJson = vi.fn(async () => ({ + connection_id: 'warehouse', + extracted_at: '2026-04-28T10:00:00+00:00', + metadata: { driver: 'postgres', schemas: ['public'] }, + tables: [ + { + catalog: 'warehouse', + db: 'public', + name: 'orders', + comment: null, + columns: [{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: null }], + foreign_keys: [], + }, + ], + })); + + const result = await runLocalStageOnlyIngest({ + project, + adapters: createDefaultLocalIngestAdapters(project, { + databaseIntrospection: { runJson }, + }), + adapter: 'live-database', + connectionId: 'warehouse', + jobId: 'local-live-db-daemon-1', + now: () => new Date('2026-04-28T10:00:00.000Z'), + }); + + expect(runJson).toHaveBeenCalledWith('database-introspect', { + connection_id: 'warehouse', + driver: 'postgres', + url: 'postgres://localhost:5432/warehouse', + schemas: ['public'], + statement_timeout_ms: 30_000, + connection_timeout_seconds: 5, + }); + expect(result).toMatchObject({ + runId: 'local-live-db-daemon-1', + status: 'done', + adapter: 'live-database', + connectionId: 'warehouse', + rawFileCount: 3, + workUnitCount: 1, + }); + }); + + it('includes upload-capable KLO adapters in default local ingest adapters', () => { + expect(createDefaultLocalIngestAdapters(project).map((adapter) => adapter.source)).toEqual( + expect.arrayContaining(['dbt', 'metricflow', 'notion']), + ); + }); + + it('passes resolved standalone Notion config into fetch adapters', async () => { + const priorToken = process.env.NOTION_AUTH_TOKEN; + process.env.NOTION_AUTH_TOKEN = 'ntn_local_test_token'; + try { + await writeFile( + join(project.projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' notion-main:', + ' driver: notion', + ' auth_token_ref: env:NOTION_AUTH_TOKEN', + ' crawl_mode: selected_roots', + ' root_page_ids:', + ' - page-1', + 'ingest:', + ' adapters:', + ' - notion', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir: project.projectDir }); + + const fetch = vi.fn(async (_pullConfig: unknown, stagedDir: string) => { + await mkdir(join(stagedDir, 'pages', 'page-1'), { recursive: true }); + await writeFile( + join(stagedDir, 'manifest.json'), + JSON.stringify({ + source: 'notion', + apiVersion: '2026-03-11', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + rootDatabaseIds: [], + rootDataSourceIds: [], + fetchedAt: '2026-04-30T00:00:00.000Z', + pageCount: 1, + databaseCount: 0, + dataSourceCount: 0, + capped: false, + continuedFromCursor: false, + partialSnapshot: false, + maxPagesPerRun: 1000, + maxKnowledgeCreatesPerRun: 5, + maxKnowledgeUpdatesPerRun: 20, + nextSuccessfulCursor: null, + skipped: [], + warnings: [], + }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'pages', 'page-1', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'page-1', + title: 'Revenue Policy', + path: 'Revenue Policy', + }), + 'utf-8', + ); + await writeFile(join(stagedDir, 'pages', 'page-1', 'page.md'), '# Revenue Policy\n\nDurable rule.\n', 'utf-8'); + }); + const adapter: SourceAdapter = { + source: 'notion', + skillNames: ['notion_synthesize'], + detect: async () => true, + fetch, + chunk: async () => ({ workUnits: [] }), + }; + + const result = await runLocalStageOnlyIngest({ + project, + adapters: [adapter], + adapter: 'notion', + connectionId: 'notion-main', + jobId: 'local-notion-fetch-1', + now: () => new Date('2026-04-30T00:00:00.000Z'), + }); + + expect(fetch).toHaveBeenCalledWith( + expect.objectContaining({ + authToken: 'ntn_local_test_token', + crawlMode: 'selected_roots', + rootPageIds: ['page-1'], + maxPagesPerRun: 1000, + }), + expect.any(String), + { connectionId: 'notion-main', sourceKey: 'notion' }, + ); + expect(result).toMatchObject({ + status: 'done', + adapter: 'notion', + connectionId: 'notion-main', + rawFileCount: 3, + }); + } finally { + if (priorToken === undefined) { + delete process.env.NOTION_AUTH_TOKEN; + } else { + process.env.NOTION_AUTH_TOKEN = priorToken; + } + } + }); + + it('keeps requiring sourceDir for adapters without fetch', async () => { + await expect( + runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'fake', + connectionId: 'warehouse', + jobId: 'local-job-no-source', + now: () => new Date('2026-04-27T12:00:00.000Z'), + }), + ).rejects.toThrow('Local ingest adapter "fake" requires sourceDir because it does not implement fetch().'); + }); + + it('rejects adapters that are not enabled in klo.yaml', async () => { + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + await expect( + runLocalStageOnlyIngest({ + project, + adapters: [new FakeSourceAdapter()], + adapter: 'metricflow', + connectionId: 'warehouse', + sourceDir, + jobId: 'local-job-2', + now: () => new Date('2026-04-27T12:00:00.000Z'), + }), + ).rejects.toThrow('Adapter "metricflow" is not enabled in klo.yaml'); + }); +}); diff --git a/packages/context/src/ingest/local-stage-ingest.ts b/packages/context/src/ingest/local-stage-ingest.ts new file mode 100644 index 00000000..eb2a2f7e --- /dev/null +++ b/packages/context/src/ingest/local-stage-ingest.ts @@ -0,0 +1,411 @@ +import { createHash } from 'node:crypto'; +import { cp, mkdir, readdir, readFile, rm } from 'node:fs/promises'; +import { isAbsolute, join, relative, resolve, sep } from 'node:path'; +import type { KloLocalProject } from '../project/index.js'; +import { kloLocalStateDbPath } from '../project/local-state-db.js'; +import { computeDiffSetFromHashes } from './diff-set.service.js'; +import { localPullConfigForAdapter } from './local-adapters.js'; +import { sanitizeMemoryFlowError } from './memory-flow/live-buffer.js'; +import type { MemoryFlowEventSink, MemoryFlowPlannedWorkUnit } from './memory-flow/types.js'; +import { buildSyncId } from './raw-sources-paths.js'; +import { SqliteLocalIngestStore } from './sqlite-local-ingest-store.js'; +import type { IngestTrigger, SourceAdapter, WorkUnit } from './types.js'; + +export type LocalIngestStatus = 'running' | 'done' | 'error'; + +export interface LocalIngestDiffPaths { + added: string[]; + modified: string[]; + deleted: string[]; + unchanged: string[]; +} + +export interface LocalIngestRunRecord { + runId: string; + jobId: string; + status: LocalIngestStatus; + adapter: string; + connectionId: string; + sourceDir: string | null; + syncId: string; + startedAt: string; + completedAt: string; + progress: number; + done: boolean; + previousRunId: string | null; + diffSummary: { + added: number; + modified: number; + deleted: number; + unchanged: number; + }; + diffPaths: LocalIngestDiffPaths; + workUnitCount: number; + rawFileCount: number; + workUnits: Array>; + evictionDeletedRawPaths: string[]; + errors: string[]; +} + +export type LocalIngestReport = LocalIngestRunRecord & { + rawContentHashes: Record; +}; + +export interface RunLocalStageOnlyIngestOptions { + project: KloLocalProject; + adapters: SourceAdapter[]; + adapter: string; + connectionId: string; + sourceDir?: string; + trigger?: IngestTrigger; + jobId?: string; + now?: () => Date; + dryRun?: boolean; + memoryFlow?: MemoryFlowEventSink; +} + +const LOCAL_AUTHOR = 'klo'; +const LOCAL_AUTHOR_EMAIL = 'klo@example.com'; + +function safeSegment(kind: string, value: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(value)) { + throw new Error(`Unsafe ${kind}: ${value}`); + } + return value; +} + +function normalizeRelativePath(filePath: string): string { + return filePath.replaceAll('\\', '/'); +} + +function assertInside(rootDir: string, childPath: string): string { + const root = resolve(rootDir); + const child = resolve(childPath); + if (child !== root && !child.startsWith(`${root}${sep}`)) { + throw new Error(`Path escapes root directory: ${childPath}`); + } + return child; +} + +async function walkFiles(rootDir: string, currentDir = rootDir): Promise { + const entries = await readdir(currentDir, { withFileTypes: true }); + const files: string[] = []; + for (const entry of entries) { + const absolutePath = join(currentDir, entry.name); + if (entry.isDirectory()) { + files.push(...(await walkFiles(rootDir, absolutePath))); + continue; + } + if (entry.isFile()) { + files.push(normalizeRelativePath(relative(rootDir, absolutePath))); + } + } + return files.sort(); +} + +async function hashFile(path: string): Promise { + const content = await readFile(path); + return createHash('sha256').update(content).digest('hex'); +} + +function stableLiveDatabaseHashContent(relativePath: string, content: Buffer): Buffer | string { + if (relativePath !== 'connection.json') { + return content; + } + + try { + const parsed = JSON.parse(content.toString('utf-8')) as unknown; + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + return content; + } + const stable = { ...(parsed as Record) }; + delete stable.extractedAt; + return `${JSON.stringify(stable)}\n`; + } catch { + return content; + } +} + +async function hashStagedFile(adapter: SourceAdapter, stagedDir: string, relativePath: string): Promise { + const absolutePath = join(stagedDir, relativePath); + if (adapter.source !== 'live-database') { + return hashFile(absolutePath); + } + const content = await readFile(absolutePath); + return createHash('sha256').update(stableLiveDatabaseHashContent(relativePath, content)).digest('hex'); +} + +function findAdapter(adapters: SourceAdapter[], source: string): SourceAdapter { + const adapter = adapters.find((candidate) => candidate.source === source); + if (!adapter) { + throw new Error(`Adapter "${source}" is not available for local ingest`); + } + return adapter; +} + +function assertConfigured(project: KloLocalProject, adapter: string, connectionId: string): void { + if (!project.config.connections[connectionId]) { + throw new Error(`Connection "${connectionId}" is not configured in klo.yaml`); + } + if (!project.config.ingest.adapters.includes(adapter)) { + throw new Error(`Adapter "${adapter}" is not enabled in klo.yaml`); + } +} + +function createLocalIngestStore(project: KloLocalProject): SqliteLocalIngestStore { + return new SqliteLocalIngestStore({ dbPath: kloLocalStateDbPath(project) }); +} + +function buildLocalJobId(now: Date): string { + return `local-${now.getTime().toString(36)}`; +} + +function assertCompatibleExistingRun( + existingRun: LocalIngestRunRecord | null, + runId: string, + adapter: string, + connectionId: string, +): void { + if (!existingRun) { + return; + } + if (existingRun.adapter === adapter && existingRun.connectionId === connectionId) { + return; + } + throw new Error( + `Local ingest run "${runId}" already exists for ${existingRun.connectionId}/${existingRun.adapter} and cannot be reused for ${connectionId}/${adapter}`, + ); +} + +function memoryFlowPlannedWorkUnits( + workUnits: Array>, +): MemoryFlowPlannedWorkUnit[] { + return workUnits.map((workUnit) => ({ + unitKey: workUnit.unitKey, + rawFiles: workUnit.rawFiles, + peerFileCount: workUnit.peerFileIndex.length, + dependencyCount: workUnit.dependencyPaths.length, + })); +} + +async function pruneStaleRawFiles(input: { + project: KloLocalProject; + rawPrefix: string; + nextRawPaths: string[]; + adapter: string; +}): Promise { + const existing = await input.project.fileStore.listFiles(input.rawPrefix); + const nextRawPathSet = new Set(input.nextRawPaths); + const staleRawPaths = existing.files.filter((path) => !nextRawPathSet.has(path)); + for (const staleRawPath of staleRawPaths) { + await input.project.fileStore.deleteFile( + staleRawPath, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `Remove stale ${input.adapter} raw file: ${staleRawPath}`, + { skipLock: true }, + ); + } + return staleRawPaths; +} + +async function prepareLocalStagedDir( + project: KloLocalProject, + adapter: SourceAdapter, + stagedDir: string, + sourceDir: string | undefined, + connectionId: string, +): Promise { + await rm(stagedDir, { recursive: true, force: true }); + await mkdir(stagedDir, { recursive: true }); + if (sourceDir) { + if (!isAbsolute(sourceDir)) { + throw new Error('sourceDir must be an absolute path'); + } + const resolvedSourceDir = resolve(sourceDir); + await cp(resolvedSourceDir, stagedDir, { recursive: true }); + return resolvedSourceDir; + } + if (!adapter.fetch) { + throw new Error( + `Local ingest adapter "${adapter.source}" requires sourceDir because it does not implement fetch().`, + ); + } + const pullConfig = await localPullConfigForAdapter(project, adapter, connectionId); + await adapter.fetch(pullConfig, stagedDir, { connectionId, sourceKey: adapter.source }); + return null; +} + +export async function runLocalStageOnlyIngest(options: RunLocalStageOnlyIngestOptions): Promise { + try { + return await runLocalStageOnlyIngestInner(options); + } catch (error) { + options.memoryFlow?.finish('error', [sanitizeMemoryFlowError(error)]); + throw error; + } +} + +async function runLocalStageOnlyIngestInner(options: RunLocalStageOnlyIngestOptions): Promise { + const started = options.now?.() ?? new Date(); + const jobId = options.jobId ?? buildLocalJobId(started); + const runId = jobId; + const adapterName = safeSegment('adapter', options.adapter); + const connectionId = safeSegment('connection id', options.connectionId); + assertConfigured(options.project, adapterName, connectionId); + const adapter = findAdapter(options.adapters, adapterName); + options.memoryFlow?.update({ + runId, + connectionId, + adapter: adapter.source, + status: 'running', + }); + const store = createLocalIngestStore(options.project); + const existingRun = options.dryRun ? null : store.findRunById(runId); + assertCompatibleExistingRun(existingRun, runId, adapter.source, connectionId); + + const stagedDir = join(options.project.projectDir, '.klo/cache/local-ingest', runId, 'staged'); + const sourceDir = await prepareLocalStagedDir(options.project, adapter, stagedDir, options.sourceDir, connectionId); + + const detected = await adapter.detect(stagedDir); + if (!detected) { + throw new Error(`Adapter "${adapter.source}" did not recognize ${sourceDir ?? 'fetched source output'}`); + } + + const relativeFiles = await walkFiles(stagedDir); + options.memoryFlow?.update({ sourceDir }); + options.memoryFlow?.emit({ + type: 'source_acquired', + adapter: adapter.source, + trigger: options.trigger ?? 'manual_resync', + fileCount: relativeFiles.length, + }); + const hashes = new Map(); + for (const file of relativeFiles) { + hashes.set(file, await hashStagedFile(adapter, stagedDir, file)); + } + const latestReport = store.findLatestCompletedReport(connectionId, adapter.source, { excludeRunId: runId }); + const priorHashes = latestReport ? new Map(Object.entries(latestReport.rawContentHashes)) : new Map(); + const scopeDescriptor = adapter.describeScope ? await adapter.describeScope(stagedDir) : null; + options.memoryFlow?.emit({ type: 'scope_detected', fingerprint: scopeDescriptor?.fingerprint ?? null }); + const diffSet = computeDiffSetFromHashes( + hashes, + priorHashes, + scopeDescriptor ? scopeDescriptor.isPathInScope.bind(scopeDescriptor) : undefined, + ); + const unchangedFromLatestCompletedRun = + !existingRun && + !!latestReport && + diffSet.added.length === 0 && + diffSet.modified.length === 0 && + diffSet.deleted.length === 0; + const syncId = + existingRun?.syncId ?? (unchangedFromLatestCompletedRun ? latestReport.syncId : buildSyncId(started, jobId)); + options.memoryFlow?.update({ syncId }); + options.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId, rawFileCount: relativeFiles.length }); + options.memoryFlow?.emit({ + type: 'diff_computed', + added: diffSet.added.length, + modified: diffSet.modified.length, + deleted: diffSet.deleted.length, + unchanged: diffSet.unchanged.length, + }); + const chunkResult = await adapter.chunk(stagedDir, diffSet); + options.memoryFlow?.update({ plannedWorkUnits: memoryFlowPlannedWorkUnits(chunkResult.workUnits) }); + options.memoryFlow?.emit({ + type: 'chunks_planned', + chunkCount: chunkResult.workUnits.length, + workUnitCount: chunkResult.workUnits.length, + evictionCount: chunkResult.eviction?.deletedRawPaths.length ?? 0, + }); + const rawPrefix = `raw-sources/${connectionId}/${adapter.source}/${syncId}`; + const rawPaths = relativeFiles.map((file) => `${rawPrefix}/${file}`); + const staleRawPaths = options.dryRun || unchangedFromLatestCompletedRun + ? [] + : await pruneStaleRawFiles({ + project: options.project, + rawPrefix, + nextRawPaths: rawPaths, + adapter: adapter.source, + }); + + for (const file of relativeFiles) { + const absolutePath = assertInside(stagedDir, join(stagedDir, file)); + const rawPath = `${rawPrefix}/${file}`; + if (!options.dryRun && !unchangedFromLatestCompletedRun) { + await options.project.fileStore.writeFile( + rawPath, + await readFile(absolutePath, 'utf-8'), + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `Stage ${adapter.source} raw file: ${file}`, + { skipLock: true }, + ); + } + } + + const completed = options.now?.() ?? new Date(); + const record: LocalIngestRunRecord = { + runId, + jobId, + status: 'done', + adapter: adapter.source, + connectionId, + sourceDir, + syncId, + startedAt: started.toISOString(), + completedAt: completed.toISOString(), + progress: 1, + done: true, + previousRunId: latestReport?.runId ?? null, + diffSummary: { + added: diffSet.added.length, + modified: diffSet.modified.length, + deleted: diffSet.deleted.length, + unchanged: diffSet.unchanged.length, + }, + diffPaths: { + added: diffSet.added, + modified: diffSet.modified, + deleted: diffSet.deleted, + unchanged: diffSet.unchanged, + }, + workUnitCount: chunkResult.workUnits.length, + rawFileCount: relativeFiles.length, + workUnits: chunkResult.workUnits.map((workUnit) => ({ + unitKey: workUnit.unitKey, + rawFiles: workUnit.rawFiles, + peerFileIndex: workUnit.peerFileIndex, + dependencyPaths: workUnit.dependencyPaths, + })), + evictionDeletedRawPaths: chunkResult.eviction?.deletedRawPaths ?? [], + errors: [], + }; + + if (!options.dryRun) { + store.saveCompletedRun({ + record, + rawContentHashes: Object.fromEntries(hashes), + }); + + const commitPaths = unchangedFromLatestCompletedRun ? [] : [...rawPaths, ...staleRawPaths].sort(); + if (commitPaths.length > 0) { + await options.project.git.commitFiles( + commitPaths, + `ingest(${adapter.source}): ${jobId} syncId=${syncId}`, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + ); + } + } + + options.memoryFlow?.emit({ type: 'report_created', runId }); + options.memoryFlow?.finish(record.status, record.errors); + return record; +} + +export async function getLocalStageOnlyIngestStatus( + project: KloLocalProject, + runId: string, +): Promise { + return createLocalIngestStore(project).findRunById(runId); +} diff --git a/packages/context/src/ingest/memory-flow/acceptance-fixtures.ts b/packages/context/src/ingest/memory-flow/acceptance-fixtures.ts new file mode 100644 index 00000000..f4f01c12 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/acceptance-fixtures.ts @@ -0,0 +1,168 @@ +import type { MemoryFlowReplayInput } from './types.js'; + +function baseScenario(overrides: Partial = {}): MemoryFlowReplayInput { + return { + runId: 'run-success', + connectionId: 'warehouse', + adapter: 'metricflow', + status: 'done', + sourceDir: '/tmp/source', + syncId: 'sync-success', + reportPath: 'ingest-report.json', + errors: [], + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 4 }, + { type: 'scope_detected', fingerprint: 'metricflow:demo' }, + { type: 'raw_snapshot_written', syncId: 'sync-success', rawFileCount: 4 }, + { type: 'diff_computed', added: 2, modified: 1, deleted: 0, unchanged: 1 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/global/orders.md' }, + { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, + { type: 'work_unit_started', unitKey: 'revenue', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'revenue', target: 'wiki', action: 'updated', key: 'knowledge/global/revenue.md' }, + { type: 'work_unit_finished', unitKey: 'revenue', status: 'success' }, + { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, + { type: 'saved', commitSha: 'abc123456789', wikiCount: 2, slCount: 1 }, // pragma: allowlist secret + { type: 'provenance_recorded', rowCount: 4 }, + { type: 'report_created', runId: 'run-success', reportPath: 'ingest-report.json' }, + ], + plannedWorkUnits: [ + { unitKey: 'orders', rawFiles: ['models/orders.yml', 'models/customers.yml'], peerFileCount: 1, dependencyCount: 1 }, + { unitKey: 'revenue', rawFiles: ['docs/revenue.md'], peerFileCount: 0, dependencyCount: 0 }, + ], + details: { + actions: [ + { + unitKey: 'orders', + target: 'wiki', + action: 'created', + key: 'knowledge/global/orders.md', + summary: 'Captured order definitions', + rawFiles: ['models/orders.yml'], + status: 'success', + }, + { + unitKey: 'orders', + target: 'sl', + action: 'updated', + key: 'warehouse.orders', + summary: 'Updated orders source', + rawFiles: ['models/orders.yml'], + status: 'success', + }, + { + unitKey: 'revenue', + target: 'wiki', + action: 'updated', + key: 'knowledge/global/revenue.md', + summary: 'Updated revenue notes', + rawFiles: ['docs/revenue.md'], + status: 'success', + }, + ], + provenance: [ + { + rawPath: 'models/orders.yml', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/orders.md', + actionType: 'created', + }, + { rawPath: 'models/orders.yml', artifactKind: 'sl', artifactKey: 'warehouse.orders', actionType: 'updated' }, + ], + transcripts: [ + { + unitKey: 'orders', + path: 'transcripts/orders.json', + toolCallCount: 3, + errorCount: 0, + toolNames: ['wiki_write', 'sl_write_source'], + }, + ], + }, + ...overrides, + }; +} + +export function successfulReplayScenario(): MemoryFlowReplayInput { + return baseScenario(); +} + +export function deletedRawPathsScenario(): MemoryFlowReplayInput { + return baseScenario({ + events: baseScenario().events.map((event) => + event.type === 'diff_computed' + ? { ...event, deleted: 2 } + : event.type === 'chunks_planned' + ? { ...event, evictionCount: 2 } + : event, + ), + }); +} + +export function validationRevertScenario(): MemoryFlowReplayInput { + return baseScenario({ + runId: 'run-validation-failure', + status: 'error', + errors: ['semantic-layer validation failed for warehouse.orders'], + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 1 }, + { type: 'raw_snapshot_written', syncId: 'sync-validation', rawFileCount: 1 }, + { type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, + { + type: 'work_unit_finished', + unitKey: 'orders', + status: 'failed', + reason: 'semantic-layer validation failed for warehouse.orders', + }, + ], + plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['models/orders.yml'], peerFileCount: 0, dependencyCount: 0 }], + details: { + actions: [ + { + unitKey: 'orders', + target: 'sl', + action: 'updated', + key: 'warehouse.orders', + summary: 'Invalid measure was reverted', + rawFiles: ['models/orders.yml'], + status: 'failed', + }, + ], + provenance: [], + transcripts: [ + { + unitKey: 'orders', + path: 'transcripts/orders.json', + toolCallCount: 2, + errorCount: 1, + toolNames: ['sl_write_source'], + }, + ], + }, + }); +} + +export function flaggedFallbackScenario(): MemoryFlowReplayInput { + return baseScenario({ + runId: 'run-flagged-fallback', + events: baseScenario().events.map((event) => + event.type === 'reconciliation_finished' ? { ...event, fallbackCount: 1 } : event, + ), + }); +} + +export function postSaveSecretFailureScenario(): MemoryFlowReplayInput { + return baseScenario({ + runId: 'run-post-save-failure', + status: 'error', + errors: ['index refresh failed https://example.com/private token=abc123'], + events: baseScenario().events.map((event) => + event.type === 'saved' ? { ...event, commitSha: 'def456789012' } : event, // pragma: allowlist secret + ), + }); +} diff --git a/packages/context/src/ingest/memory-flow/acceptance.test.ts b/packages/context/src/ingest/memory-flow/acceptance.test.ts new file mode 100644 index 00000000..7376c5db --- /dev/null +++ b/packages/context/src/ingest/memory-flow/acceptance.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from 'vitest'; +import { + deletedRawPathsScenario, + flaggedFallbackScenario, + postSaveSecretFailureScenario, + successfulReplayScenario, + validationRevertScenario, +} from './acceptance-fixtures.js'; +import { renderMemoryFlowReplay } from './render.js'; +import { buildMemoryFlowViewModel } from './view-model.js'; + +function renderScenario(input = successfulReplayScenario(), terminalWidth = 140): string { + return renderMemoryFlowReplay(buildMemoryFlowViewModel(input), { terminalWidth }); +} + +describe('memory-flow acceptance scenarios', () => { + it('renders a completed replay with a clear saved-memory completion line', () => { + const output = renderScenario(successfulReplayScenario()); + + expect(output).toContain('KLO memory flow warehouse/metricflow done'); + expect(output).toContain('Saved 3 memories from 4 raw files: 2 wiki pages, 1 SL updates.'); + expect(output).toContain('Commit: abc12345 Run: run-success Report: ingest-report.json'); + }); + + it('renders deleted raw paths as eviction candidates without listing every raw path by default', () => { + const output = renderScenario(deletedRawPathsScenario()); + + expect(output).toContain('2 deletions'); + expect(output).toContain('Eviction candidates: 2'); + expect(output).not.toContain('/full/local/path/private/orders-2024.sql'); + }); + + it('renders invalid semantic-layer writes as reverted, not saved', () => { + const output = renderScenario(validationRevertScenario()); + + expect(output).toContain('orders reverted: semantic-layer validation failed for warehouse.orders'); + expect(output).toContain('Invalid semantic-layer writes were not saved.'); + expect(output).not.toContain('Saved 1 memories'); + }); + + it('renders flagged fallbacks in gates details', () => { + const output = renderScenario(flaggedFallbackScenario()); + + expect(output).toContain('0 conflict, 1 fallback'); + expect(output).toContain('Flagged fallbacks: 1'); + }); + + it('renders no ANSI color codes in the text fallback for terminals without color support', () => { + const output = renderScenario(successfulReplayScenario(), 80); + + expect(output).toContain('KLO memory flow warehouse/metricflow done'); + expect(output).not.toMatch(/\u001b\[[0-9;]*m/); + }); + + it('redacts secrets in visible post-save failure text', () => { + const output = renderScenario(postSaveSecretFailureScenario()); + + expect(output).toContain('Post-save error: index refresh failed https://[redacted] token=[redacted]'); + expect(output).not.toContain('abc123'); + expect(output).not.toContain('https://example.com/private'); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/events.test.ts b/packages/context/src/ingest/memory-flow/events.test.ts new file mode 100644 index 00000000..46f4eaf4 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/events.test.ts @@ -0,0 +1,332 @@ +import { describe, expect, it } from 'vitest'; +import type { LocalIngestRunRecord } from '../local-stage-ingest.js'; +import type { IngestReportSnapshot } from '../reports.js'; +import { ingestReportToMemoryFlowReplay, localIngestRunToMemoryFlowReplay } from './events.js'; + +function localRecord(): LocalIngestRunRecord { + return { + runId: 'local-run-1', + jobId: 'local-run-1', + status: 'done', + adapter: 'metricflow', + connectionId: 'warehouse', + sourceDir: '/tmp/source', + syncId: 'sync-1', + startedAt: '2026-04-30T10:00:00.000Z', + completedAt: '2026-04-30T10:00:01.000Z', + progress: 1, + done: true, + previousRunId: null, + diffSummary: { added: 2, modified: 1, deleted: 1, unchanged: 4 }, + diffPaths: { + added: ['models/orders.yml', 'models/revenue.yml'], + modified: ['models/customers.yml'], + deleted: ['models/old.yml'], + unchanged: ['models/a.yml', 'models/b.yml', 'models/c.yml', 'models/d.yml'], + }, + workUnitCount: 2, + rawFileCount: 7, + workUnits: [ + { + unitKey: 'orders', + rawFiles: ['models/orders.yml'], + peerFileIndex: ['models/customers.yml'], + dependencyPaths: ['models/base.yml'], + }, + { + unitKey: 'revenue', + rawFiles: ['models/revenue.yml'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + evictionDeletedRawPaths: ['raw-sources/warehouse/metricflow/sync-1/models/old.yml'], + errors: [], + }; +} + +function reportSnapshot(): IngestReportSnapshot { + return { + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'lookml', + createdAt: '2026-04-30T10:00:02.000Z', + body: { + syncId: 'sync-2', + diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 }, + commitSha: 'abc123456789', // pragma: allowlist secret + failedWorkUnits: ['customers'], + reconciliationSkipped: false, + conflictsResolved: [ + { + kind: 'near_duplicate', + artifactKey: 'warehouse.orders', + detail: 'kept candidate definition', + flaggedForHuman: false, + }, + ], + evictionsApplied: [], + unmappedFallbacks: [{ rawPath: 'cards/42.json', reason: 'no_connection_mapping', fallback: 'flagged' }], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [ + { + rawPath: 'views/orders.view.lkml', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/orders.md', + actionType: 'wiki_written', + }, + { + rawPath: 'views/orders.view.lkml', + artifactKind: 'sl', + artifactKey: 'warehouse.orders', + actionType: 'measure_added', + }, + { + rawPath: 'views/customers.view.lkml', + artifactKind: null, + artifactKey: null, + actionType: 'skipped', + }, + ], + toolTranscripts: [ + { + unitKey: 'orders', + path: '/tmp/klo/run/wu-transcripts/job-1/orders.jsonl', + toolCallCount: 3, + errorCount: 0, + toolNames: ['read_raw_span', 'wiki_write', 'sl_write_source'], + }, + { + unitKey: 'customers', + path: '/tmp/klo/run/wu-transcripts/job-1/customers.jsonl', + toolCallCount: 2, + errorCount: 1, + toolNames: ['read_raw_span', 'sl_write_source'], + }, + ], + workUnits: [ + { + unitKey: 'orders', + rawFiles: ['views/orders.view.lkml'], + status: 'success', + actions: [ + { target: 'wiki', type: 'created', key: 'knowledge/global/orders.md', detail: 'order facts' }, + { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'order measures' }, + ], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], + }, + { + unitKey: 'customers', + rawFiles: ['views/customers.view.lkml'], + status: 'failed', + reason: 'semantic-layer validation failed', + actions: [{ target: 'sl', type: 'created', key: 'warehouse.customers', detail: 'invalid source' }], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.customers' }], + }, + ], + }, + }; +} + +describe('memory-flow event mapping', () => { + it('maps a local ingest run to source, snapshot, diff, chunk, and report events', () => { + const replay = localIngestRunToMemoryFlowReplay(localRecord()); + + expect(replay).toMatchObject({ + runId: 'local-run-1', + connectionId: 'warehouse', + adapter: 'metricflow', + status: 'done', + sourceDir: '/tmp/source', + syncId: 'sync-1', + plannedWorkUnits: [ + { unitKey: 'orders', rawFiles: ['models/orders.yml'], peerFileCount: 1, dependencyCount: 1 }, + { unitKey: 'revenue', rawFiles: ['models/revenue.yml'], peerFileCount: 0, dependencyCount: 0 }, + ], + }); + expect(replay.events).toEqual([ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 7 }, + { type: 'scope_detected', fingerprint: null }, + { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 7 }, + { type: 'diff_computed', added: 2, modified: 1, deleted: 1, unchanged: 4 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 1 }, + { type: 'report_created', runId: 'local-run-1' }, + ]); + }); + + it('maps an ingest report snapshot to work-unit, candidate, gate, saved, provenance, and report events', () => { + const replay = ingestReportToMemoryFlowReplay(reportSnapshot(), { provenanceRowCount: 5 }); + + expect(replay).toMatchObject({ + runId: 'run-1', + connectionId: 'warehouse', + adapter: 'lookml', + status: 'error', + sourceDir: null, + syncId: 'sync-2', + reportId: 'report-1', + plannedWorkUnits: [ + { unitKey: 'orders', rawFiles: ['views/orders.view.lkml'], peerFileCount: 0, dependencyCount: 0 }, + { unitKey: 'customers', rawFiles: ['views/customers.view.lkml'], peerFileCount: 0, dependencyCount: 0 }, + ], + }); + expect(replay.events).toContainEqual({ + type: 'candidate_action', + unitKey: 'orders', + target: 'wiki', + action: 'created', + key: 'knowledge/global/orders.md', + }); + expect(replay.events).toContainEqual({ + type: 'work_unit_finished', + unitKey: 'customers', + status: 'failed', + reason: 'semantic-layer validation failed', + }); + expect(replay.events).toContainEqual({ type: 'reconciliation_finished', conflictCount: 1, fallbackCount: 1 }); + expect(replay.events).toContainEqual({ type: 'saved', commitSha: 'abc123456789', wikiCount: 1, slCount: 2 }); // pragma: allowlist secret + expect(replay.events).toContainEqual({ type: 'provenance_recorded', rowCount: 5 }); + expect(replay.events).toContainEqual({ type: 'report_created', runId: 'run-1', reportPath: 'report-1' }); + expect(replay.details.actions).toEqual([ + { + unitKey: 'orders', + target: 'wiki', + action: 'created', + key: 'knowledge/global/orders.md', + summary: 'order facts', + rawFiles: ['views/orders.view.lkml'], + status: 'success', + }, + { + unitKey: 'orders', + target: 'sl', + action: 'updated', + key: 'warehouse.orders', + summary: 'order measures', + rawFiles: ['views/orders.view.lkml'], + status: 'success', + }, + { + unitKey: 'customers', + target: 'sl', + action: 'created', + key: 'warehouse.customers', + summary: 'invalid source', + rawFiles: ['views/customers.view.lkml'], + status: 'failed', + }, + ]); + expect(replay.details.provenance).toEqual([ + { + rawPath: 'views/orders.view.lkml', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/orders.md', + actionType: 'wiki_written', + }, + { + rawPath: 'views/orders.view.lkml', + artifactKind: 'sl', + artifactKey: 'warehouse.orders', + actionType: 'measure_added', + }, + { + rawPath: 'views/customers.view.lkml', + artifactKind: null, + artifactKey: null, + actionType: 'skipped', + }, + ]); + expect(replay.details.transcripts).toEqual([ + { + unitKey: 'orders', + path: '/tmp/klo/run/wu-transcripts/job-1/orders.jsonl', + toolCallCount: 3, + errorCount: 0, + toolNames: ['read_raw_span', 'wiki_write', 'sl_write_source'], + }, + { + unitKey: 'customers', + path: '/tmp/klo/run/wu-transcripts/job-1/customers.jsonl', + toolCallCount: 2, + errorCount: 1, + toolNames: ['read_raw_span', 'sl_write_source'], + }, + ]); + }); + + it('prefers captured memory-flow snapshots from report bodies', () => { + const report = reportSnapshot(); + Object.assign(report.body, { + memoryFlow: { + metadata: { + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: null, + sourceReportPath: null, + fallbackReason: null, + }, + runId: 'run-1', + connectionId: 'warehouse', + adapter: 'lookml', + status: 'running', + sourceDir: null, + syncId: 'sync-2', + errors: [], + plannedWorkUnits: [ + { unitKey: 'orders', rawFiles: ['views/orders.view.lkml'], peerFileCount: 1, dependencyCount: 2 }, + ], + details: { actions: [], provenance: [], transcripts: [] }, + events: [ + { + type: 'source_acquired', + adapter: 'lookml', + trigger: 'manual_resync', + fileCount: 1, + emittedAt: '2026-05-01T10:00:00.000Z', + }, + ], + }, + }); + + const replay = ingestReportToMemoryFlowReplay(report); + + expect(replay.metadata).toEqual({ + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: 'report-1', + sourceReportPath: 'report-1', + fallbackReason: null, + }); + expect(replay.status).toBe('error'); + expect(replay.reportId).toBe('report-1'); + expect(replay.reportPath).toBe('report-1'); + expect(replay.events[0]).toMatchObject({ type: 'source_acquired', emittedAt: '2026-05-01T10:00:00.000Z' }); + expect(replay.events).toContainEqual({ type: 'report_created', runId: 'run-1', reportPath: 'report-1' }); + }); + + it('labels reconstructed report replays as synthetic when no captured snapshot exists', () => { + const replay = ingestReportToMemoryFlowReplay(reportSnapshot(), { provenanceRowCount: 5 }); + + expect(replay.metadata).toEqual({ + schemaVersion: 1, + mode: 'full', + origin: 'synthetic-report', + timing: 'synthetic', + capturedAt: '2026-04-30T10:00:02.000Z', + sourceReportId: 'report-1', + sourceReportPath: 'report-1', + fallbackReason: 'report did not include captured memory-flow events', + }); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/events.ts b/packages/context/src/ingest/memory-flow/events.ts new file mode 100644 index 00000000..7692e710 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/events.ts @@ -0,0 +1,247 @@ +import type { MemoryAction } from '../../memory/index.js'; +import type { LocalIngestRunRecord } from '../local-stage-ingest.js'; +import type { IngestReportSnapshot } from '../reports.js'; +import type { + MemoryFlowActionDetail, + MemoryFlowDetailSections, + MemoryFlowEvent, + MemoryFlowPlannedWorkUnit, + MemoryFlowReplayInput, +} from './types.js'; + +interface ReportReplayOptions { + provenanceRowCount?: number; +} + +function plannedWorkUnitFromLocal( + workUnit: LocalIngestRunRecord['workUnits'][number], +): MemoryFlowPlannedWorkUnit { + return { + unitKey: workUnit.unitKey, + rawFiles: workUnit.rawFiles, + peerFileCount: workUnit.peerFileIndex.length, + dependencyCount: workUnit.dependencyPaths.length, + }; +} + +function plannedWorkUnitFromReport( + workUnit: IngestReportSnapshot['body']['workUnits'][number], +): MemoryFlowPlannedWorkUnit { + return { + unitKey: workUnit.unitKey, + rawFiles: workUnit.rawFiles, + peerFileCount: 0, + dependencyCount: 0, + }; +} + +function countActions(actions: MemoryAction[], target: MemoryAction['target']): number { + return actions.filter((action) => action.target === target).length; +} + +function allReportActions(report: IngestReportSnapshot): MemoryAction[] { + return report.body.workUnits.flatMap((workUnit) => workUnit.actions); +} + +function rawFileCount(report: IngestReportSnapshot): number { + return new Set(report.body.workUnits.flatMap((workUnit) => workUnit.rawFiles)).size; +} + +function emptyMemoryFlowDetails(): MemoryFlowDetailSections { + return { actions: [], provenance: [], transcripts: [] }; +} + +function fullModeMetadata(input: { + origin: 'captured' | 'synthetic-report'; + timing: 'captured' | 'synthetic'; + capturedAt: string | null; + sourceReportId: string | null; + sourceReportPath: string | null; + fallbackReason: string | null; +}): MemoryFlowReplayInput['metadata'] { + return { + schemaVersion: 1, + mode: 'full', + origin: input.origin, + timing: input.timing, + capturedAt: input.capturedAt, + sourceReportId: input.sourceReportId, + sourceReportPath: input.sourceReportPath, + fallbackReason: input.fallbackReason, + }; +} + +function reportStatus(report: IngestReportSnapshot): MemoryFlowReplayInput['status'] { + return report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; +} + +function reportCreatedEvent(report: IngestReportSnapshot): MemoryFlowEvent { + return { type: 'report_created', runId: report.runId, reportPath: report.id }; +} + +function capturedReportReplay(report: IngestReportSnapshot): MemoryFlowReplayInput | null { + if (!report.body.memoryFlow) { + return null; + } + + const hasReportCreated = report.body.memoryFlow.events.some((event) => event.type === 'report_created'); + return { + ...report.body.memoryFlow, + metadata: fullModeMetadata({ + origin: 'captured', + timing: 'captured', + capturedAt: report.body.memoryFlow.metadata?.capturedAt ?? report.createdAt, + sourceReportId: report.id, + sourceReportPath: report.id, + fallbackReason: null, + }), + runId: report.runId, + connectionId: report.connectionId, + adapter: report.sourceKey, + status: reportStatus(report), + syncId: report.body.syncId, + reportId: report.id, + reportPath: report.id, + errors: report.body.failedWorkUnits, + events: hasReportCreated ? report.body.memoryFlow.events : [...report.body.memoryFlow.events, reportCreatedEvent(report)], + }; +} + +function actionDetailsFromReport(report: IngestReportSnapshot): MemoryFlowActionDetail[] { + return report.body.workUnits.flatMap((workUnit) => + workUnit.actions.map((action) => ({ + unitKey: workUnit.unitKey, + target: action.target, + action: action.type, + key: action.key, + summary: action.detail, + rawFiles: [...workUnit.rawFiles], + status: workUnit.status, + })), + ); +} + +function detailSectionsFromReport(report: IngestReportSnapshot): MemoryFlowDetailSections { + return { + actions: actionDetailsFromReport(report), + provenance: report.body.provenanceRows.map((row) => ({ ...row })), + transcripts: report.body.toolTranscripts.map((summary) => ({ + ...summary, + toolNames: [...summary.toolNames], + })), + }; +} + +export function localIngestRunToMemoryFlowReplay(record: LocalIngestRunRecord): MemoryFlowReplayInput { + const events: MemoryFlowEvent[] = [ + { type: 'source_acquired', adapter: record.adapter, trigger: 'manual_resync', fileCount: record.rawFileCount }, + { type: 'scope_detected', fingerprint: null }, + { type: 'raw_snapshot_written', syncId: record.syncId, rawFileCount: record.rawFileCount }, + { type: 'diff_computed', ...record.diffSummary }, + { + type: 'chunks_planned', + chunkCount: record.workUnitCount, + workUnitCount: record.workUnitCount, + evictionCount: record.evictionDeletedRawPaths.length, + }, + { type: 'report_created', runId: record.runId }, + ]; + + return { + runId: record.runId, + connectionId: record.connectionId, + adapter: record.adapter, + status: record.status, + sourceDir: record.sourceDir, + syncId: record.syncId, + errors: record.errors, + events, + plannedWorkUnits: record.workUnits.map(plannedWorkUnitFromLocal), + details: emptyMemoryFlowDetails(), + }; +} + +export function ingestReportToMemoryFlowReplay( + report: IngestReportSnapshot, + options: ReportReplayOptions = {}, +): MemoryFlowReplayInput { + const captured = capturedReportReplay(report); + if (captured) { + return captured; + } + + const actions = allReportActions(report); + const workUnitEvents: MemoryFlowEvent[] = report.body.workUnits.flatMap((workUnit) => [ + { type: 'work_unit_started', unitKey: workUnit.unitKey, skills: [], stepBudget: 0 } satisfies MemoryFlowEvent, + ...workUnit.actions.map( + (action): MemoryFlowEvent => ({ + type: 'candidate_action', + unitKey: workUnit.unitKey, + target: action.target, + action: action.type, + key: action.key, + }), + ), + { + type: 'work_unit_finished', + unitKey: workUnit.unitKey, + status: workUnit.status, + ...(workUnit.reason ? { reason: workUnit.reason } : {}), + } satisfies MemoryFlowEvent, + ]); + + const events: MemoryFlowEvent[] = [ + { + type: 'source_acquired', + adapter: report.sourceKey, + trigger: 'manual_resync', + fileCount: rawFileCount(report), + }, + { type: 'scope_detected', fingerprint: null }, + { type: 'raw_snapshot_written', syncId: report.body.syncId, rawFileCount: rawFileCount(report) }, + { type: 'diff_computed', ...report.body.diffSummary }, + { + type: 'chunks_planned', + chunkCount: report.body.workUnits.length, + workUnitCount: report.body.workUnits.length, + evictionCount: report.body.evictionInputs.length, + }, + ...workUnitEvents, + { + type: 'reconciliation_finished', + conflictCount: report.body.conflictsResolved.length, + fallbackCount: report.body.unmappedFallbacks.length, + }, + { + type: 'saved', + commitSha: report.body.commitSha, + wikiCount: countActions(actions, 'wiki'), + slCount: countActions(actions, 'sl'), + }, + { type: 'provenance_recorded', rowCount: options.provenanceRowCount ?? actions.length }, + { type: 'report_created', runId: report.runId, reportPath: report.id }, + ]; + + return { + metadata: fullModeMetadata({ + origin: 'synthetic-report', + timing: 'synthetic', + capturedAt: report.createdAt, + sourceReportId: report.id, + sourceReportPath: report.id, + fallbackReason: 'report did not include captured memory-flow events', + }), + runId: report.runId, + connectionId: report.connectionId, + adapter: report.sourceKey, + status: reportStatus(report), + sourceDir: null, + syncId: report.body.syncId, + reportId: report.id, + reportPath: report.id, + errors: report.body.failedWorkUnits, + events, + plannedWorkUnits: report.body.workUnits.map(plannedWorkUnitFromReport), + details: detailSectionsFromReport(report), + }; +} diff --git a/packages/context/src/ingest/memory-flow/index.ts b/packages/context/src/ingest/memory-flow/index.ts new file mode 100644 index 00000000..fa4f8fc5 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/index.ts @@ -0,0 +1,17 @@ +export { + memoryFlowReplayInputSchema, + memoryFlowStreamEventSchema, + parseMemoryFlowReplayInput, +} from './schema.js'; +export type { MemoryFlowStreamEvent } from './schema.js'; +export { buildMemoryFlowViewModel } from './view-model.js'; +export { renderMemoryFlowReplay } from './render.js'; +export { formatMemoryFlowFinalSummary } from './summary.js'; +export type { + MemoryFlowDetailSections, + MemoryFlowEvent, + MemoryFlowPlannedWorkUnit, + MemoryFlowReplayInput, + MemoryFlowRunStatus, + MemoryFlowViewModel, +} from './types.js'; diff --git a/packages/context/src/ingest/memory-flow/interaction.test.ts b/packages/context/src/ingest/memory-flow/interaction.test.ts new file mode 100644 index 00000000..138878ed --- /dev/null +++ b/packages/context/src/ingest/memory-flow/interaction.test.ts @@ -0,0 +1,326 @@ +import { describe, expect, it } from 'vitest'; +import { + createInitialMemoryFlowInteractionState, + findMemoryFlowSearchMatches, + reduceMemoryFlowInteractionState, + selectMemoryFlowChip, + selectMemoryFlowColumn, + selectedMemoryFlowColumn, + selectedMemoryFlowDetails, + visibleMemoryFlowChips, +} from './interaction.js'; +import type { MemoryFlowInteractionState, MemoryFlowViewModel } from './types.js'; + +function view(): MemoryFlowViewModel { + return { + title: 'KLO memory flow warehouse/metricflow running', + subtitle: 'Run run-1 Sync sync-1', + status: 'running', + activeLine: 'active: WorkUnit orders step 2/4', + selectedTitle: 'WORKUNITS', + selectedDetails: ['orders: 1 raw, 0 peers, 1 deps'], + completionLine: null, + trustIssues: [ + { + id: 'flagged-fallbacks', + severity: 'warning', + title: 'Flagged fallbacks', + detail: '1 fallback needs review', + columnId: 'gates', + }, + { + id: 'work-unit-failed:customers', + severity: 'failed', + title: 'WorkUnit failed', + detail: 'customers failed: semantic-layer validation failed', + columnId: 'workUnits', + targetLabel: 'customers', + }, + ], + details: { + actions: [ + { + unitKey: 'orders', + target: 'wiki', + action: 'created', + key: 'knowledge/orders.md', + summary: 'order facts', + rawFiles: ['orders.yml'], + status: 'success', + }, + ], + provenance: [ + { + rawPath: 'orders.yml', + artifactKind: 'wiki', + artifactKey: 'knowledge/orders.md', + actionType: 'wiki_written', + }, + ], + transcripts: [ + { + unitKey: 'customers', + path: '/tmp/transcripts/customers.jsonl', + toolCallCount: 2, + errorCount: 1, + toolNames: ['read_raw_span', 'sl_write_source'], + }, + ], + }, + columns: [ + { + id: 'source', + title: 'SOURCE', + status: 'complete', + headline: '2 raw files', + counters: ['sync sync-1', 'scope none'], + chips: [{ label: 'metricflow', status: 'complete' }], + details: ['Trigger: manual_resync', 'Adapter: metricflow'], + }, + { + id: 'chunks', + title: 'CHUNKS', + status: 'complete', + headline: '2 chunks', + counters: ['+1 ~1 -0 =0', '0 deletions'], + chips: [{ label: 'orders', status: 'complete' }], + details: ['Work units planned: 2', 'Eviction candidates: 0'], + }, + { + id: 'workUnits', + title: 'WORKUNITS', + status: 'active', + headline: '2 WUs', + counters: ['1 done', '1 failed', '1 active'], + chips: [ + { label: 'orders', status: 'complete', detail: '1 raw span' }, + { label: 'customers', status: 'failed', detail: 'semantic-layer validation failed' }, + ], + details: ['orders: 1 raw, 0 peers, 1 deps', 'customers: 1 raw, 0 peers, 0 deps'], + }, + { + id: 'actions', + title: 'ACTIONS', + status: 'complete', + headline: '2 candidates', + counters: ['1 wiki', '1 SL'], + chips: [{ label: 'knowledge/orders.md', status: 'complete' }], + details: ['wiki created: knowledge/orders.md', 'sl updated: warehouse.orders'], + }, + { + id: 'gates', + title: 'GATES', + status: 'warning', + headline: '0 conflict, 1 fallback', + counters: ['1 failed', '1 flagged'], + chips: [{ label: 'customers', status: 'failed' }], + details: ['Failed work units: 1', 'Flagged fallbacks: 1', 'customers: semantic-layer validation failed'], + }, + { + id: 'saved', + title: 'SAVED', + status: 'complete', + headline: '2 memories', + counters: ['1 wiki', '1 SL', '2 provenance'], + chips: [{ label: 'abc12345', status: 'complete' }], + details: ['Commit: abc12345', 'Run: run-1', 'Report: report-1', 'Provenance rows: 2'], + }, + ], + }; +} + +describe('memory-flow interaction reducer', () => { + it('selects the active work-unit column by default', () => { + const state = createInitialMemoryFlowInteractionState(view()); + + expect(state).toEqual({ + selectedColumnId: 'workUnits', + selectedChipIndex: 0, + expanded: false, + pane: 'overview', + filter: 'all', + search: { editing: false, query: '', matchIndex: 0 }, + shouldQuit: false, + }); + expect(selectedMemoryFlowColumn(view(), state).title).toBe('WORKUNITS'); + }); + + it('moves between columns and clamps chip selection', () => { + let state = createInitialMemoryFlowInteractionState(view()); + + state = reduceMemoryFlowInteractionState(state, 'down', view()); + state = reduceMemoryFlowInteractionState(state, 'down', view()); + expect(state.selectedChipIndex).toBe(1); + + state = reduceMemoryFlowInteractionState(state, 'right', view()); + expect(state.selectedColumnId).toBe('actions'); + expect(state.selectedChipIndex).toBe(0); + + state = reduceMemoryFlowInteractionState(state, 'left', view()); + expect(state.selectedColumnId).toBe('workUnits'); + expect(state.selectedChipIndex).toBe(0); + }); + + it('selects a column directly for mouse-driven renderers', () => { + const initial = createInitialMemoryFlowInteractionState(view()); + + const selected = selectMemoryFlowColumn(view(), initial, 'actions'); + + expect(selected).toMatchObject({ + selectedColumnId: 'actions', + selectedChipIndex: 0, + expanded: true, + shouldQuit: false, + }); + expect(selectedMemoryFlowColumn(view(), selected).title).toBe('ACTIONS'); + expect(selectedMemoryFlowDetails(view(), selected)).toContain('wiki created: knowledge/orders.md'); + }); + + it('selects and clamps a chip directly for mouse-driven renderers', () => { + const initial = createInitialMemoryFlowInteractionState(view()); + + const selected = selectMemoryFlowChip(view(), initial, 'workUnits', 99); + + expect(selected).toMatchObject({ + selectedColumnId: 'workUnits', + selectedChipIndex: 1, + expanded: true, + shouldQuit: false, + }); + expect(selectedMemoryFlowDetails(view(), selected)).toContain( + 'Selected chip: customers (semantic-layer validation failed)', + ); + }); + + it('ignores direct selection of an unknown column', () => { + const initial = createInitialMemoryFlowInteractionState(view()); + + const selected = selectMemoryFlowColumn(view(), initial, 'missing' as never); + + expect(selected).toEqual({ ...initial, shouldQuit: false }); + }); + + it('toggles expansion, attention filtering, all panes, and quit', () => { + let state: MemoryFlowInteractionState = createInitialMemoryFlowInteractionState(view()); + + state = reduceMemoryFlowInteractionState(state, 'enter', view()); + expect(state.expanded).toBe(true); + expect(selectedMemoryFlowDetails(view(), state)).toContain('orders: 1 raw, 0 peers, 1 deps'); + + state = reduceMemoryFlowInteractionState(state, 'filter', view()); + expect(state.filter).toBe('failed_or_flagged'); + expect(visibleMemoryFlowChips(selectedMemoryFlowColumn(view(), state), state)).toEqual([ + { label: 'customers', status: 'failed', detail: 'semantic-layer validation failed' }, + ]); + + state = reduceMemoryFlowInteractionState(state, 'tab', view()); + expect(state.pane).toBe('trust'); + + state = reduceMemoryFlowInteractionState(state, 'tab', view()); + expect(state.pane).toBe('details'); + + state = reduceMemoryFlowInteractionState(state, 'tab', view()); + expect(state.pane).toBe('log'); + expect(selectedMemoryFlowDetails(view(), state)).toContain('WORKUNITS active: 2 WUs'); + + state = reduceMemoryFlowInteractionState(state, 'tab', view()); + expect(state.pane).toBe('provenance'); + expect(selectedMemoryFlowDetails(view(), state)).toContain( + 'orders.yml -> wiki:knowledge/orders.md (wiki_written)', + ); + + state = reduceMemoryFlowInteractionState(state, 'tab', view()); + expect(state.pane).toBe('transcript'); + expect(selectedMemoryFlowDetails(view(), state)).toContain( + 'customers: 2 tool calls, 1 errors, tools read_raw_span, sl_write_source', + ); + + state = reduceMemoryFlowInteractionState(state, 'tab', view()); + expect(state.pane).toBe('overview'); + + state = reduceMemoryFlowInteractionState(state, 'provenance', view()); + expect(state.pane).toBe('provenance'); + expect(selectedMemoryFlowDetails(view(), state)).toContain( + 'orders.yml -> wiki:knowledge/orders.md (wiki_written)', + ); + + state = reduceMemoryFlowInteractionState(state, 'transcript', view()); + expect(state.pane).toBe('transcript'); + expect(selectedMemoryFlowDetails(view(), state)).toContain( + 'customers: 2 tool calls, 1 errors, tools read_raw_span, sl_write_source', + ); + + state = reduceMemoryFlowInteractionState(state, 'quit', view()); + expect(state.shouldQuit).toBe(true); + }); + + it('shows trust issue details and filters chips using issue targets', () => { + let state: MemoryFlowInteractionState = createInitialMemoryFlowInteractionState(view()); + + state = reduceMemoryFlowInteractionState(state, 'tab', view()); + expect(state.pane).toBe('trust'); + expect(selectedMemoryFlowDetails(view(), state)).toEqual([ + 'FAILED WorkUnit failed: customers failed: semantic-layer validation failed', + 'WARNING Flagged fallbacks: 1 fallback needs review', + ]); + + state = reduceMemoryFlowInteractionState(state, 'filter', view()); + expect(visibleMemoryFlowChips(selectedMemoryFlowColumn(view(), state), state, view())).toEqual([ + { label: 'customers', status: 'failed', detail: 'semantic-layer validation failed' }, + ]); + }); + + it('searches across columns, trust issues, actions, provenance, and transcripts', () => { + const matches = findMemoryFlowSearchMatches(view(), 'customers'); + + expect(matches.map((match) => match.label)).toEqual([ + 'WORKUNITS > customers', + 'GATES', + 'Trust > WorkUnit failed', + 'Transcript > customers', + ]); + + let state = createInitialMemoryFlowInteractionState(view()); + state = reduceMemoryFlowInteractionState(state, 'search-start', view()); + state = reduceMemoryFlowInteractionState(state, { type: 'search-input', value: 'customers' }, view()); + + expect(state.search).toEqual({ + editing: true, + query: 'customers', + matchIndex: 0, + }); + expect(state.selectedColumnId).toBe('workUnits'); + expect(state.selectedChipIndex).toBe(1); + + state = reduceMemoryFlowInteractionState(state, 'search-submit', view()); + expect(state.search.editing).toBe(false); + }); + + it('cycles search matches forward and backward with wraparound', () => { + let state = createInitialMemoryFlowInteractionState(view()); + state = reduceMemoryFlowInteractionState(state, 'search-start', view()); + state = reduceMemoryFlowInteractionState(state, { type: 'search-input', value: 'customers' }, view()); + + expect(state.search).toEqual({ editing: true, query: 'customers', matchIndex: 0 }); + expect(state.selectedColumnId).toBe('workUnits'); + expect(state.selectedChipIndex).toBe(1); + + state = reduceMemoryFlowInteractionState(state, 'search-next', view()); + expect(state.search).toEqual({ editing: true, query: 'customers', matchIndex: 1 }); + expect(state.selectedColumnId).toBe('gates'); + expect(state.selectedChipIndex).toBe(0); + + state = reduceMemoryFlowInteractionState(state, 'search-next', view()); + expect(state.search).toEqual({ editing: true, query: 'customers', matchIndex: 2 }); + expect(state.selectedColumnId).toBe('workUnits'); + + state = reduceMemoryFlowInteractionState(state, 'search-previous', view()); + expect(state.search).toEqual({ editing: true, query: 'customers', matchIndex: 1 }); + expect(state.selectedColumnId).toBe('gates'); + + state = reduceMemoryFlowInteractionState(state, 'search-previous', view()); + state = reduceMemoryFlowInteractionState(state, 'search-previous', view()); + expect(state.search).toEqual({ editing: true, query: 'customers', matchIndex: 3 }); + expect(state.selectedColumnId).toBe('workUnits'); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/interaction.ts b/packages/context/src/ingest/memory-flow/interaction.ts new file mode 100644 index 00000000..a7703c36 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/interaction.ts @@ -0,0 +1,450 @@ +import type { + MemoryFlowChip, + MemoryFlowColumnView, + MemoryFlowFilterMode, + MemoryFlowInteractionCommand, + MemoryFlowInteractionState, + MemoryFlowPaneId, + MemoryFlowSearchMatch, + MemoryFlowViewModel, +} from './types.js'; + +const CYCLING_PANES: MemoryFlowPaneId[] = ['overview', 'trust', 'details', 'log', 'provenance', 'transcript']; + +function attentionStatus(status: MemoryFlowChip['status']): boolean { + return status === 'failed' || status === 'warning'; +} + +function trustIssueTargets(view: MemoryFlowViewModel, column: MemoryFlowColumnView): Set { + return new Set( + view.trustIssues + .filter((issue) => issue.columnId === column.id && issue.targetLabel) + .map((issue) => issue.targetLabel as string), + ); +} + +function columnIndex(view: MemoryFlowViewModel, columnId: MemoryFlowInteractionState['selectedColumnId']): number { + const index = view.columns.findIndex((column) => column.id === columnId); + return index >= 0 ? index : 0; +} + +function clampChipIndex(column: MemoryFlowColumnView, state: MemoryFlowInteractionState, view?: MemoryFlowViewModel): number { + const chips = visibleMemoryFlowChips(column, state, view); + if (chips.length === 0) { + return 0; + } + return Math.max(0, Math.min(state.selectedChipIndex, chips.length - 1)); +} + +function withColumn( + view: MemoryFlowViewModel, + state: MemoryFlowInteractionState, + direction: -1 | 1, +): MemoryFlowInteractionState { + const nextIndex = Math.max(0, Math.min(columnIndex(view, state.selectedColumnId) + direction, view.columns.length - 1)); + const selectedColumnId = view.columns[nextIndex]?.id ?? state.selectedColumnId; + const nextState = { ...state, selectedColumnId, selectedChipIndex: 0, expanded: false }; + return { ...nextState, selectedChipIndex: clampChipIndex(selectedMemoryFlowColumn(view, nextState), nextState, view) }; +} + +function nextPane(current: MemoryFlowPaneId): MemoryFlowPaneId { + const currentIndex = CYCLING_PANES.indexOf(current); + if (currentIndex === -1) { + return 'overview'; + } + return CYCLING_PANES[(currentIndex + 1) % CYCLING_PANES.length] ?? 'overview'; +} + +function toggleFilter(filter: MemoryFlowFilterMode): MemoryFlowFilterMode { + return filter === 'all' ? 'failed_or_flagged' : 'all'; +} + +export function visibleMemoryFlowChips( + column: MemoryFlowColumnView, + state: Pick, + view?: MemoryFlowViewModel, +): MemoryFlowChip[] { + if (state.filter === 'all') { + return column.chips; + } + + const issueTargets = view ? trustIssueTargets(view, column) : new Set(); + return column.chips.filter((chip) => attentionStatus(chip.status) || issueTargets.has(chip.label)); +} + +function includesQuery(value: string, query: string): boolean { + return value.toLocaleLowerCase().includes(query.toLocaleLowerCase()); +} + +function pushMatch( + matches: MemoryFlowSearchMatch[], + query: string, + match: MemoryFlowSearchMatch, + values: string[], +): void { + if (values.some((value) => includesQuery(value, query))) { + matches.push(match); + } +} + +export function findMemoryFlowSearchMatches(view: MemoryFlowViewModel, query: string): MemoryFlowSearchMatch[] { + const normalized = query.trim(); + if (!normalized) { + return []; + } + + const matches: MemoryFlowSearchMatch[] = []; + for (const column of view.columns) { + const chipMatches = column.chips + .map((chip, chipIndex) => ({ chip, chipIndex })) + .filter(({ chip }) => includesQuery(chip.label, normalized) || includesQuery(chip.detail ?? '', normalized)); + + for (const { chip, chipIndex } of chipMatches) { + if (column.id === 'workUnits' || column.id === 'actions') { + matches.push({ + columnId: column.id, + chipIndex, + label: `${column.title} > ${chip.label}`, + detail: chip.detail ?? column.headline, + }); + } + } + + if (chipMatches.length === 0 || column.id !== 'workUnits') { + pushMatch(matches, normalized, { columnId: column.id, label: column.title, detail: column.headline }, [ + column.title, + column.headline, + ...column.counters, + ...column.details, + ]); + } + } + + for (const issue of view.trustIssues) { + pushMatch( + matches, + normalized, + { columnId: issue.columnId, label: `Trust > ${issue.title}`, detail: issue.detail }, + [issue.title, issue.detail, issue.targetLabel ?? ''], + ); + } + + for (const row of view.details.provenance) { + pushMatch( + matches, + normalized, + { + columnId: 'saved', + label: `Provenance > ${row.rawPath}`, + detail: `${row.rawPath} ${row.artifactKind ?? 'none'} ${row.artifactKey ?? 'none'} ${row.actionType}`, + }, + [row.rawPath, row.artifactKind ?? '', row.artifactKey ?? '', row.actionType], + ); + } + + for (const transcript of view.details.transcripts) { + pushMatch( + matches, + normalized, + { + columnId: 'workUnits', + label: `Transcript > ${transcript.unitKey}`, + detail: `${transcript.path} ${transcript.toolNames.join(' ')}`, + }, + [transcript.unitKey, transcript.path, ...transcript.toolNames], + ); + } + + return matches; +} + +function selectSearchMatch( + view: MemoryFlowViewModel, + state: MemoryFlowInteractionState, + query: string, + matchIndex: number, +): MemoryFlowInteractionState { + const matches = findMemoryFlowSearchMatches(view, query); + if (matches.length === 0) { + return { + ...state, + search: { editing: state.search.editing, query, matchIndex: 0 }, + shouldQuit: false, + }; + } + + const index = Math.max(0, Math.min(matchIndex, matches.length - 1)); + const match = matches[index]!; + const nextState = { + ...state, + selectedColumnId: match.columnId, + selectedChipIndex: match.chipIndex ?? 0, + expanded: true, + search: { editing: state.search.editing, query, matchIndex: index }, + shouldQuit: false, + }; + return { + ...nextState, + selectedChipIndex: clampChipIndex(selectedMemoryFlowColumn(view, nextState), nextState, view), + }; +} + +function moveSearchMatch( + view: MemoryFlowViewModel, + state: MemoryFlowInteractionState, + direction: -1 | 1, +): MemoryFlowInteractionState { + const query = state.search.query.trim(); + if (!query) { + return { ...state, search: { ...state.search, matchIndex: 0 }, shouldQuit: false }; + } + + const matches = findMemoryFlowSearchMatches(view, query); + if (matches.length === 0) { + return { ...state, search: { ...state.search, matchIndex: 0 }, shouldQuit: false }; + } + + const nextIndex = (state.search.matchIndex + direction + matches.length) % matches.length; + return selectSearchMatch(view, state, state.search.query, nextIndex); +} + +export function selectedMemoryFlowColumn( + view: MemoryFlowViewModel, + state: Pick, +): MemoryFlowColumnView { + return view.columns.find((column) => column.id === state.selectedColumnId) ?? view.columns[0]!; +} + +export function createInitialMemoryFlowInteractionState(view: MemoryFlowViewModel): MemoryFlowInteractionState { + const column = + view.columns.find((candidate) => candidate.status === 'active') ?? + view.columns.find((candidate) => candidate.status === 'failed' || candidate.status === 'warning') ?? + view.columns.find((candidate) => candidate.details.length > 0) ?? + view.columns[0]!; + + return { + selectedColumnId: column.id, + selectedChipIndex: 0, + expanded: false, + pane: 'overview', + filter: 'all', + search: { editing: false, query: '', matchIndex: 0 }, + shouldQuit: false, + }; +} + +export function selectMemoryFlowColumn( + view: MemoryFlowViewModel, + state: MemoryFlowInteractionState, + columnId: MemoryFlowInteractionState['selectedColumnId'], +): MemoryFlowInteractionState { + const column = view.columns.find((candidate) => candidate.id === columnId); + if (!column) { + return { ...state, shouldQuit: false }; + } + + const nextState = { + ...state, + selectedColumnId: column.id, + selectedChipIndex: 0, + expanded: true, + shouldQuit: false, + }; + return { ...nextState, selectedChipIndex: clampChipIndex(column, nextState, view) }; +} + +export function selectMemoryFlowChip( + view: MemoryFlowViewModel, + state: MemoryFlowInteractionState, + columnId: MemoryFlowInteractionState['selectedColumnId'], + chipIndex: number, +): MemoryFlowInteractionState { + const column = view.columns.find((candidate) => candidate.id === columnId); + if (!column) { + return { ...state, shouldQuit: false }; + } + + const nextState = { + ...state, + selectedColumnId: column.id, + selectedChipIndex: Math.max(0, chipIndex), + expanded: true, + shouldQuit: false, + }; + return { ...nextState, selectedChipIndex: clampChipIndex(column, nextState, view) }; +} + +export function reduceMemoryFlowInteractionState( + state: MemoryFlowInteractionState, + command: MemoryFlowInteractionCommand, + view: MemoryFlowViewModel, +): MemoryFlowInteractionState { + if (command === 'search-start') { + return { ...state, pane: 'details', search: { ...state.search, editing: true }, shouldQuit: false }; + } + + if (command === 'search-submit') { + return { ...state, search: { ...state.search, editing: false }, shouldQuit: false }; + } + + if (command === 'search-clear') { + return { ...state, search: { editing: false, query: '', matchIndex: 0 }, shouldQuit: false }; + } + + if (command === 'search-backspace') { + return selectSearchMatch(view, state, state.search.query.slice(0, -1), 0); + } + + if (command === 'search-next') { + return moveSearchMatch(view, state, 1); + } + + if (command === 'search-previous') { + return moveSearchMatch(view, state, -1); + } + + if (typeof command === 'object' && command.type === 'search-input') { + return selectSearchMatch(view, state, `${state.search.query}${command.value}`, 0); + } + + if (command === 'quit') { + return { ...state, shouldQuit: true }; + } + + if (command === 'left') { + return withColumn(view, { ...state, shouldQuit: false }, -1); + } + + if (command === 'right') { + return withColumn(view, { ...state, shouldQuit: false }, 1); + } + + if (command === 'up' || command === 'down') { + const column = selectedMemoryFlowColumn(view, state); + const visibleChips = visibleMemoryFlowChips(column, state, view); + const delta = command === 'up' ? -1 : 1; + return { + ...state, + selectedChipIndex: + visibleChips.length === 0 + ? 0 + : Math.max(0, Math.min(state.selectedChipIndex + delta, visibleChips.length - 1)), + shouldQuit: false, + }; + } + + if (command === 'enter') { + return { ...state, expanded: !state.expanded, shouldQuit: false }; + } + + if (command === 'tab') { + return { ...state, pane: nextPane(state.pane), shouldQuit: false }; + } + + if (command === 'filter') { + const nextState = { ...state, filter: toggleFilter(state.filter), selectedChipIndex: 0, shouldQuit: false }; + return { + ...nextState, + selectedChipIndex: clampChipIndex(selectedMemoryFlowColumn(view, nextState), nextState, view), + }; + } + + if (command === 'provenance') { + return { ...state, pane: 'provenance', expanded: true, shouldQuit: false }; + } + + if (command === 'transcript') { + return { ...state, pane: 'transcript', expanded: true, shouldQuit: false }; + } + + return { ...state, shouldQuit: false }; +} + +function trustIssueDetailLines(view: MemoryFlowViewModel): string[] { + if (view.trustIssues.length === 0) { + return ['No trust issues detected.']; + } + + return view.trustIssues + .slice() + .sort((left, right) => { + if (left.severity === right.severity) return 0; + return left.severity === 'failed' ? -1 : 1; + }) + .map((issue) => { + const label = issue.severity === 'failed' ? 'FAILED' : 'WARNING'; + return `${label} ${issue.title}: ${issue.detail}`; + }); +} + +function provenanceDetailLines(view: MemoryFlowViewModel): string[] { + if (view.details.provenance.length === 0) { + const savedColumn = view.columns.find((candidate) => candidate.id === 'saved'); + return savedColumn?.details.length ? savedColumn.details : ['Provenance rows: 0']; + } + + return view.details.provenance.map((row) => { + const artifact = row.artifactKind && row.artifactKey ? `${row.artifactKind}:${row.artifactKey}` : 'no saved artifact'; + return `${row.rawPath} -> ${artifact} (${row.actionType})`; + }); +} + +function transcriptDetailLines(view: MemoryFlowViewModel, selectedChip: MemoryFlowChip | undefined): string[] { + const selectedUnit = selectedChip?.label; + const transcripts = + selectedUnit && view.details.transcripts.some((summary) => summary.unitKey === selectedUnit) + ? view.details.transcripts.filter((summary) => summary.unitKey === selectedUnit) + : view.details.transcripts; + + if (transcripts.length === 0) { + const workUnitsColumn = view.columns.find((candidate) => candidate.id === 'workUnits'); + return workUnitsColumn?.details.length ? workUnitsColumn.details : ['No work-unit transcript summary available.']; + } + + return transcripts.map( + (summary) => + `${summary.unitKey}: ${summary.toolCallCount} tool calls, ${summary.errorCount} errors, tools ${ + summary.toolNames.join(', ') || 'none' + }`, + ); +} + +export function selectedMemoryFlowDetails(view: MemoryFlowViewModel, state: MemoryFlowInteractionState): string[] { + const column = selectedMemoryFlowColumn(view, state); + const chips = visibleMemoryFlowChips(column, state, view); + const selectedChip = chips[state.selectedChipIndex]; + + if (state.pane === 'log') { + return [ + view.activeLine, + ...view.columns.map((candidate) => `${candidate.title} ${candidate.status}: ${candidate.headline}`), + ...(view.completionLine ? [view.completionLine] : []), + ]; + } + + if (state.pane === 'trust') { + return trustIssueDetailLines(view); + } + + if (state.pane === 'provenance') { + return provenanceDetailLines(view); + } + + if (state.pane === 'transcript') { + return transcriptDetailLines(view, selectedChip); + } + + const baseDetails = column.details.length ? column.details : [`${column.title}: ${column.headline}`]; + if (state.pane === 'overview' && !state.expanded) { + return [ + column.headline, + ...column.counters, + ...(selectedChip ? [`Selected chip: ${selectedChip.label}${selectedChip.detail ? ` (${selectedChip.detail})` : ''}`] : []), + ]; + } + + return [ + ...baseDetails, + ...(selectedChip ? [`Selected chip: ${selectedChip.label}${selectedChip.detail ? ` (${selectedChip.detail})` : ''}`] : []), + ]; +} diff --git a/packages/context/src/ingest/memory-flow/interactive-render.test.ts b/packages/context/src/ingest/memory-flow/interactive-render.test.ts new file mode 100644 index 00000000..50e00127 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/interactive-render.test.ts @@ -0,0 +1,177 @@ +import { describe, expect, it } from 'vitest'; +import { createInitialMemoryFlowInteractionState, reduceMemoryFlowInteractionState } from './interaction.js'; +import { renderMemoryFlowInteractive } from './interactive-render.js'; +import type { MemoryFlowViewModel } from './types.js'; + +function view(): MemoryFlowViewModel { + return { + title: 'KLO memory flow warehouse/metricflow done', + subtitle: 'Run run-1 Sync sync-1', + status: 'done', + activeLine: 'active: complete', + selectedTitle: 'WORKUNITS', + selectedDetails: ['orders: 1 raw, 0 peers, 1 deps'], + completionLine: + 'Saved 2 memories from 2 raw files: 1 wiki pages, 1 SL updates. Commit: abc12345 Run: run-1 Report: report-1', + trustIssues: [ + { + id: 'work-unit-failed:customers', + severity: 'failed', + title: 'WorkUnit failed', + detail: 'customers failed: validation reset', + columnId: 'workUnits', + targetLabel: 'customers', + }, + { + id: 'flagged-fallbacks', + severity: 'warning', + title: 'Flagged fallbacks', + detail: '1 fallback needs review', + columnId: 'gates', + }, + ], + details: { + actions: [ + { + unitKey: 'orders', + target: 'wiki', + action: 'created', + key: 'knowledge/orders.md', + summary: 'order facts', + rawFiles: ['orders.yml'], + status: 'success', + }, + ], + provenance: [ + { + rawPath: 'orders.yml', + artifactKind: 'wiki', + artifactKey: 'knowledge/orders.md', + actionType: 'wiki_written', + }, + ], + transcripts: [ + { + unitKey: 'customers', + path: '/tmp/transcripts/customers.jsonl', + toolCallCount: 2, + errorCount: 1, + toolNames: ['read_raw_span', 'sl_write_source'], + }, + ], + }, + columns: [ + { + id: 'source', + title: 'SOURCE', + status: 'complete', + headline: '2 raw files', + counters: ['sync sync-1', 'scope none'], + chips: [{ label: 'metricflow', status: 'complete' }], + details: ['Trigger: manual_resync', 'Adapter: metricflow'], + }, + { + id: 'chunks', + title: 'CHUNKS', + status: 'complete', + headline: '2 chunks', + counters: ['+1 ~1 -0 =0', '0 deletions'], + chips: [{ label: 'orders', status: 'complete' }], + details: ['Work units planned: 2', 'Eviction candidates: 0'], + }, + { + id: 'workUnits', + title: 'WORKUNITS', + status: 'warning', + headline: '2 WUs', + counters: ['1 done', '1 failed', '0 active'], + chips: [ + { label: 'orders', status: 'complete', detail: '1 raw span' }, + { label: 'customers', status: 'failed', detail: 'validation reset' }, + ], + details: ['orders: 1 raw, 0 peers, 1 deps', 'customers: 1 raw, 0 peers, 0 deps'], + }, + { + id: 'actions', + title: 'ACTIONS', + status: 'complete', + headline: '2 candidates', + counters: ['1 wiki', '1 SL'], + chips: [{ label: 'knowledge/orders.md', status: 'complete' }], + details: ['wiki created: knowledge/orders.md', 'sl updated: warehouse.orders'], + }, + { + id: 'gates', + title: 'GATES', + status: 'warning', + headline: '0 conflict, 1 fallback', + counters: ['1 failed', '1 flagged'], + chips: [{ label: 'customers', status: 'failed' }], + details: ['Failed work units: 1', 'Flagged fallbacks: 1'], + }, + { + id: 'saved', + title: 'SAVED', + status: 'complete', + headline: '2 memories', + counters: ['1 wiki', '1 SL', '2 provenance'], + chips: [{ label: 'abc12345', status: 'complete' }], + details: ['Commit: abc12345', 'Run: run-1', 'Report: report-1', 'Provenance rows: 2'], + }, + ], + }; +} + +describe('renderMemoryFlowInteractive', () => { + it('marks the selected column and selected chip in a wide layout', () => { + const state = createInitialMemoryFlowInteractionState(view()); + + const output = renderMemoryFlowInteractive(view(), state, { terminalWidth: 140 }); + + expect(output).toContain('KLO memory flow warehouse/metricflow done'); + expect(output).toContain('OK SOURCE -> OK CHUNKS -> !! WORKUNITS -> OK ACTIONS -> !! GATES -> OK SAVED'); + expect(output).toContain('[WORKUNITS]'); + expect(output).toContain('> orders'); + expect(output).toContain('Selected: WORKUNITS > orders'); + expect(output).toContain('Pane: overview Filter: all'); + expect(output).toContain('- Selected chip: orders (1 raw span)'); + expect(output).toContain( + 'Saved 2 memories from 2 raw files: 1 wiki pages, 1 SL updates. Commit: abc12345 Run: run-1 Report: report-1', + ); + }); + + it('renders attention-filtered details in a narrow layout', () => { + let state = createInitialMemoryFlowInteractionState(view()); + state = reduceMemoryFlowInteractionState(state, 'filter', view()); + state = reduceMemoryFlowInteractionState(state, 'enter', view()); + + const output = renderMemoryFlowInteractive(view(), state, { terminalWidth: 72 }); + + expect(output).toContain('OK SOURCE -> OK CHUNKS -> !! WORKUNITS -> OK ACTIONS -> !! GATES -> OK SAVED'); + expect(output).toContain('[WORKUNITS]'); + expect(output).toContain('Filter: failed_or_flagged'); + expect(output).toContain('> customers'); + expect(output).toContain('- customers: 1 raw, 0 peers, 0 deps'); + }); + + it('renders report-backed transcript detail pane rows', () => { + let state = createInitialMemoryFlowInteractionState(view()); + state = reduceMemoryFlowInteractionState(state, 'down', view()); + state = reduceMemoryFlowInteractionState(state, 'transcript', view()); + + const output = renderMemoryFlowInteractive(view(), state, { terminalWidth: 100 }); + + expect(output).toContain('Pane: transcript Filter: all'); + expect(output).toContain('- customers: 2 tool calls, 1 errors, tools read_raw_span, sl_write_source'); + }); + + it('keeps trust issues visible in the interactive renderer', () => { + const state = createInitialMemoryFlowInteractionState(view()); + + const output = renderMemoryFlowInteractive(view(), state, { terminalWidth: 140 }); + + expect(output).toContain('Trust issues'); + expect(output).toContain('FAILED WorkUnit failed: customers failed: validation reset'); + expect(output).toContain('WARNING Flagged fallbacks: 1 fallback needs review'); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/interactive-render.ts b/packages/context/src/ingest/memory-flow/interactive-render.ts new file mode 100644 index 00000000..52785d39 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/interactive-render.ts @@ -0,0 +1,160 @@ +import { + findMemoryFlowSearchMatches, + selectedMemoryFlowColumn, + selectedMemoryFlowDetails, + visibleMemoryFlowChips, +} from './interaction.js'; +import type { + MemoryFlowColumnView, + MemoryFlowInteractionState, + MemoryFlowRenderOptions, + MemoryFlowViewModel, +} from './types.js'; +import { renderMemoryFlowConnectorLine } from './visuals.js'; + +const WIDE_COLUMN_WIDTH = 18; + +function cell(value: string | undefined, width = WIDE_COLUMN_WIDTH): string { + const text = value ?? ''; + const normalized = text.length > width ? text.slice(0, width - 1) : text; + return normalized.padEnd(width, ' '); +} + +function row(values: string[]): string { + return values.map((value) => cell(value)).join(' ').trimEnd(); +} + +function columnLabel(column: MemoryFlowColumnView, state: MemoryFlowInteractionState): string { + return column.id === state.selectedColumnId ? `[${column.title}]` : column.title; +} + +function counterAt(column: MemoryFlowColumnView, index: number): string { + return column.counters[index] ?? ''; +} + +function chipLabel(view: MemoryFlowViewModel, column: MemoryFlowColumnView, state: MemoryFlowInteractionState): string { + const chips = visibleMemoryFlowChips(column, state, view); + if (chips.length === 0) { + return '-'; + } + const selectedIndex = column.id === state.selectedColumnId ? state.selectedChipIndex : -1; + return chips + .slice(0, 2) + .map((chip, index) => `${index === selectedIndex ? '> ' : ''}${chip.label}`) + .join(', '); +} + +function selectedLine(view: MemoryFlowViewModel, state: MemoryFlowInteractionState): string { + const column = selectedMemoryFlowColumn(view, state); + const chip = visibleMemoryFlowChips(column, state, view)[state.selectedChipIndex]; + return `Selected: ${column.title}${chip ? ` > ${chip.label}` : ''}`; +} + +function trustIssueLines(view: MemoryFlowViewModel): string[] { + if (view.trustIssues.length === 0) { + return []; + } + + return [ + 'Trust issues', + ...view.trustIssues.slice(0, 4).map((issue) => { + const label = issue.severity === 'failed' ? 'FAILED' : 'WARNING'; + return `${label} ${issue.title}: ${issue.detail}`; + }), + ...(view.trustIssues.length > 4 ? [`+${view.trustIssues.length - 4} more trust issues`] : []), + '', + ]; +} + +function searchLine(view: MemoryFlowViewModel, state: MemoryFlowInteractionState): string | null { + if (!state.search.editing && state.search.query.length === 0) { + return null; + } + + const matches = findMemoryFlowSearchMatches(view, state.search.query); + const active = state.search.editing ? 'editing' : 'locked'; + return `Search: ${state.search.query || '/'} (${matches.length} matches, ${active})`; +} + +function detailLines(view: MemoryFlowViewModel, state: MemoryFlowInteractionState): string[] { + const currentSearchLine = searchLine(view, state); + return [ + selectedLine(view, state), + `Pane: ${state.pane} Filter: ${state.filter}`, + ...(currentSearchLine ? [currentSearchLine] : []), + ...selectedMemoryFlowDetails(view, state).map((detail) => `- ${detail}`), + ]; +} + +function renderWide(view: MemoryFlowViewModel, state: MemoryFlowInteractionState): string { + const lines = [ + view.title, + view.activeLine, + view.subtitle, + renderMemoryFlowConnectorLine(view), + ...trustIssueLines(view), + '', + row(view.columns.map((column) => columnLabel(column, state))), + row(view.columns.map((column) => column.headline)), + row(view.columns.map((column) => counterAt(column, 0))), + row(view.columns.map((column) => counterAt(column, 1))), + row(view.columns.map((column) => chipLabel(view, column, state))), + '', + ...detailLines(view, state), + ]; + + if (view.completionLine) { + lines.push('', view.completionLine); + } + + lines.push(''); + return lines.join('\n'); +} + +function renderNarrowColumn( + view: MemoryFlowViewModel, + column: MemoryFlowColumnView, + state: MemoryFlowInteractionState, +): string[] { + return [ + columnLabel(column, state), + ` ${column.headline}`, + ...column.counters.slice(0, 3).map((counter) => ` ${counter}`), + ` ${chipLabel(view, column, state)}`, + ]; +} + +function renderNarrow(view: MemoryFlowViewModel, state: MemoryFlowInteractionState): string { + const lines = [ + view.title, + view.activeLine, + view.subtitle, + renderMemoryFlowConnectorLine(view), + ...trustIssueLines(view), + '', + ...view.columns.flatMap((column, index) => [ + ...(index > 0 ? [''] : []), + ...renderNarrowColumn(view, column, state), + ]), + '', + ...detailLines(view, state), + ]; + + if (view.completionLine) { + lines.push('', view.completionLine); + } + + lines.push(''); + return lines.join('\n'); +} + +export function renderMemoryFlowInteractive( + view: MemoryFlowViewModel, + state: MemoryFlowInteractionState, + options: MemoryFlowRenderOptions = {}, +): string { + if ((options.terminalWidth ?? 120) < 100) { + return renderNarrow(view, state); + } + return renderWide(view, state); +} diff --git a/packages/context/src/ingest/memory-flow/live-buffer.test.ts b/packages/context/src/ingest/memory-flow/live-buffer.test.ts new file mode 100644 index 00000000..fc1962a3 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/live-buffer.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createMemoryFlowLiveBuffer, sanitizeMemoryFlowError } from './live-buffer.js'; +import type { MemoryFlowReplayInput } from './types.js'; + +function initialReplay(): MemoryFlowReplayInput { + return { + runId: 'live-run-1', + connectionId: 'warehouse', + adapter: 'fake', + status: 'running', + sourceDir: '/tmp/source', + syncId: 'pending', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }; +} + +describe('createMemoryFlowLiveBuffer', () => { + it('emits immutable replay snapshots on every live change', () => { + const onChange = vi.fn(); + const buffer = createMemoryFlowLiveBuffer(initialReplay(), { onChange }); + + buffer.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 2 }); + buffer.update({ + syncId: 'sync-1', + plannedWorkUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + buffer.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + buffer.finish('done'); + + expect(onChange).toHaveBeenCalledTimes(4); + expect(buffer.snapshot()).toMatchObject({ + runId: 'live-run-1', + status: 'done', + syncId: 'sync-1', + plannedWorkUnits: [{ unitKey: 'fake-orders' }], + }); + expect(buffer.snapshot().events.map((event) => event.type)).toEqual(['source_acquired', 'chunks_planned']); + + const staleSnapshot = onChange.mock.calls[1][0] as MemoryFlowReplayInput; + expect(staleSnapshot.details).toEqual({ actions: [], provenance: [], transcripts: [] }); + staleSnapshot.events.push({ type: 'report_created', runId: 'mutated' }); + expect(buffer.snapshot().events.map((event) => event.type)).toEqual(['source_acquired', 'chunks_planned']); + }); + + it('stamps live events with emittedAt without mutating caller events', () => { + const event = { type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 2 } as const; + const buffer = createMemoryFlowLiveBuffer(initialReplay(), { + now: () => new Date('2026-05-01T10:00:00.000Z'), + }); + + buffer.emit(event); + + expect(event).not.toHaveProperty('emittedAt'); + expect(buffer.snapshot().events).toEqual([ + { + type: 'source_acquired', + adapter: 'fake', + trigger: 'manual_resync', + fileCount: 2, + emittedAt: '2026-05-01T10:00:00.000Z', + }, + ]); + }); + + it('marks failed runs with sanitized error messages', () => { + const onChange = vi.fn(); + const buffer = createMemoryFlowLiveBuffer(initialReplay(), { onChange }); + + buffer.finish('error', [ + sanitizeMemoryFlowError( + new Error('Connection failed for postgres://user:password@localhost:5432/db?api_key=abc password=secret'), // pragma: allowlist secret + ), + ]); + + expect(buffer.snapshot()).toMatchObject({ + status: 'error', + errors: ['Connection failed for postgres://[redacted] password=[redacted]'], + }); + expect(onChange).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/live-buffer.ts b/packages/context/src/ingest/memory-flow/live-buffer.ts new file mode 100644 index 00000000..8c96cd9b --- /dev/null +++ b/packages/context/src/ingest/memory-flow/live-buffer.ts @@ -0,0 +1,74 @@ +import type { + MemoryFlowEvent, + MemoryFlowEventSink, + MemoryFlowLiveBufferOptions, + MemoryFlowReplayInput, + MemoryFlowReplayPatch, + MemoryFlowRunStatus, +} from './types.js'; + +const URL_PATTERN = /\b[a-z][a-z0-9+.-]*:\/\/[^\s]+/gi; +const SECRET_ASSIGNMENT_PATTERN = /\b(password|passwd|pwd|token|api[_-]?key|secret)=([^\s&]+)/gi; + +function copyReplayInput(input: MemoryFlowReplayInput): MemoryFlowReplayInput { + return { + ...input, + errors: [...input.errors], + events: [...input.events], + plannedWorkUnits: input.plannedWorkUnits.map((workUnit) => ({ + ...workUnit, + rawFiles: [...workUnit.rawFiles], + })), + details: { + actions: input.details.actions.map((action) => ({ ...action, rawFiles: [...action.rawFiles] })), + provenance: input.details.provenance.map((row) => ({ ...row })), + transcripts: input.details.transcripts.map((summary) => ({ ...summary, toolNames: [...summary.toolNames] })), + }, + }; +} + +function notify(input: MemoryFlowReplayInput, options: MemoryFlowLiveBufferOptions): void { + options.onChange?.(copyReplayInput(input)); +} + +function stampEvent(event: MemoryFlowEvent, options: MemoryFlowLiveBufferOptions): MemoryFlowEvent { + if (event.emittedAt) { + return { ...event }; + } + return { ...event, emittedAt: (options.now ?? (() => new Date()))().toISOString() }; +} + +export function sanitizeMemoryFlowError(error: unknown): string { + const raw = error instanceof Error ? error.message : String(error); + return raw + .replace(URL_PATTERN, (value) => `${value.slice(0, value.indexOf('://'))}://[redacted]`) + .replace(SECRET_ASSIGNMENT_PATTERN, '$1=[redacted]'); +} + +export function createMemoryFlowLiveBuffer( + initialInput: MemoryFlowReplayInput, + options: MemoryFlowLiveBufferOptions = {}, +): MemoryFlowEventSink { + let input = copyReplayInput(initialInput); + + return { + emit(event: MemoryFlowEvent): void { + input = { ...input, events: [...input.events, stampEvent(event, options)] }; + notify(input, options); + }, + + update(patch: MemoryFlowReplayPatch): void { + input = copyReplayInput({ ...input, ...patch }); + notify(input, options); + }, + + finish(status: MemoryFlowRunStatus, errors: string[] = input.errors): void { + input = copyReplayInput({ ...input, status, errors }); + notify(input, options); + }, + + snapshot(): MemoryFlowReplayInput { + return copyReplayInput(input); + }, + }; +} diff --git a/packages/context/src/ingest/memory-flow/package-export.test.ts b/packages/context/src/ingest/memory-flow/package-export.test.ts new file mode 100644 index 00000000..3ce54fdd --- /dev/null +++ b/packages/context/src/ingest/memory-flow/package-export.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, it } from 'vitest'; + +describe('@klo/context/ingest/memory-flow lightweight export', () => { + it('exports replay parsing and text rendering without the full ingest entry point', async () => { + const memoryFlow = await import('./index.js'); + + expect(memoryFlow.parseMemoryFlowReplayInput).toBeTypeOf('function'); + expect(memoryFlow.buildMemoryFlowViewModel).toBeTypeOf('function'); + expect(memoryFlow.renderMemoryFlowReplay).toBeTypeOf('function'); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/render.test.ts b/packages/context/src/ingest/memory-flow/render.test.ts new file mode 100644 index 00000000..d7900bf8 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/render.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, it } from 'vitest'; +import type { MemoryFlowViewModel } from './types.js'; +import { renderMemoryFlowReplay } from './render.js'; + +function view(): MemoryFlowViewModel { + return { + title: 'KLO memory flow warehouse/metricflow done', + subtitle: 'Run run-1 Sync sync-1', + status: 'done', + activeLine: 'active: complete', + selectedTitle: 'SOURCE', + selectedDetails: ['Trigger: manual_resync', 'Adapter: metricflow'], + completionLine: + 'Saved 2 memories from 2 raw files: 1 wiki pages, 1 SL updates. Commit: abc12345 Run: run-1 Report: report-1', + trustIssues: [], + details: { actions: [], provenance: [], transcripts: [] }, + columns: [ + { + id: 'source', + title: 'SOURCE', + status: 'complete', + headline: '2 raw files', + counters: ['sync sync-1', 'scope none'], + chips: [{ label: 'metricflow', status: 'complete' }], + details: ['Trigger: manual_resync'], + }, + { + id: 'chunks', + title: 'CHUNKS', + status: 'complete', + headline: '2 chunks', + counters: ['+1 ~1 -0 =3', '0 deletions'], + chips: [{ label: 'orders', status: 'complete' }], + details: ['Work units planned: 2'], + }, + { + id: 'workUnits', + title: 'WORKUNITS', + status: 'warning', + headline: '2 WUs', + counters: ['1 done', '1 failed', '0 active'], + chips: [{ label: 'orders', status: 'complete' }], + details: ['orders: 1 raw, 1 peers, 1 deps'], + }, + { + id: 'actions', + title: 'ACTIONS', + status: 'complete', + headline: '2 candidates', + counters: ['1 wiki', '1 SL'], + chips: [{ label: 'knowledge/orders.md', status: 'complete' }], + details: ['wiki created: knowledge/orders.md'], + }, + { + id: 'gates', + title: 'GATES', + status: 'warning', + headline: '1 conflict, 1 fallback', + counters: ['1 failed', '1 flagged'], + chips: [{ label: 'customers', status: 'failed' }], + details: ['Failed work units: 1'], + }, + { + id: 'saved', + title: 'SAVED', + status: 'complete', + headline: '2 memories', + counters: ['1 wiki', '1 SL', '3 provenance'], + chips: [{ label: 'abc12345', status: 'complete' }], + details: ['Commit: abc12345'], + }, + ], + }; +} + +describe('renderMemoryFlowReplay', () => { + it('renders a six-column wide terminal snapshot', () => { + expect(renderMemoryFlowReplay(view(), { terminalWidth: 140 })).toContain( + 'OK SOURCE -> OK CHUNKS -> !! WORKUNITS -> OK ACTIONS -> !! GATES -> OK SAVED', + ); + expect(renderMemoryFlowReplay(view(), { terminalWidth: 140 })).toMatchInlineSnapshot(` + "KLO memory flow warehouse/metricflow done + active: complete + Run run-1 Sync sync-1 + OK SOURCE -> OK CHUNKS -> !! WORKUNITS -> OK ACTIONS -> !! GATES -> OK SAVED + + SOURCE CHUNKS WORKUNITS ACTIONS GATES SAVED + 2 raw files 2 chunks 2 WUs 2 candidates 1 conflict, 1 fallb 2 memories + sync sync-1 +1 ~1 -0 =3 1 done 1 wiki 1 failed 1 wiki + scope none 0 deletions 1 failed 1 SL 1 flagged 1 SL + + Selected: SOURCE + - Trigger: manual_resync + - Adapter: metricflow + + Saved 2 memories from 2 raw files: 1 wiki pages, 1 SL updates. Commit: abc12345 Run: run-1 Report: report-1 + " + `); + }); + + it('renders a stacked narrow terminal snapshot', () => { + expect(renderMemoryFlowReplay(view(), { terminalWidth: 72 })).toContain( + 'OK SOURCE -> OK CHUNKS -> !! WORKUNITS -> OK ACTIONS -> !! GATES -> OK SAVED', + ); + expect(renderMemoryFlowReplay(view(), { terminalWidth: 72 })).toContain(`SOURCE + 2 raw files + sync sync-1 + scope none`); + expect(renderMemoryFlowReplay(view(), { terminalWidth: 72 })).toContain(`GATES + 1 conflict, 1 fallback + 1 failed + 1 flagged`); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/render.ts b/packages/context/src/ingest/memory-flow/render.ts new file mode 100644 index 00000000..5ffe2dda --- /dev/null +++ b/packages/context/src/ingest/memory-flow/render.ts @@ -0,0 +1,99 @@ +import type { MemoryFlowColumnView, MemoryFlowRenderOptions, MemoryFlowViewModel } from './types.js'; +import { renderMemoryFlowConnectorLine } from './visuals.js'; + +const WIDE_COLUMN_WIDTH = 20; + +function cell(value: string | undefined, width = WIDE_COLUMN_WIDTH): string { + const text = value ?? ''; + const normalized = text.length > width ? text.slice(0, width - 1) : text; + return normalized.padEnd(width, ' '); +} + +function row(values: string[]): string { + return values.map((value) => cell(value)).join(' ').trimEnd(); +} + +function counterAt(column: MemoryFlowColumnView, index: number): string { + return column.counters[index] ?? ''; +} + +function trustIssueLines(view: MemoryFlowViewModel): string[] { + if (view.trustIssues.length === 0) { + return []; + } + + return [ + 'Trust issues', + ...view.trustIssues.slice(0, 4).map((issue) => { + const label = issue.severity === 'failed' ? 'FAILED' : 'WARNING'; + return `${label} ${issue.title}: ${issue.detail}`; + }), + ...(view.trustIssues.length > 4 ? [`+${view.trustIssues.length - 4} more trust issues`] : []), + '', + ]; +} + +function renderWide(view: MemoryFlowViewModel): string { + const lines = [ + view.title, + view.activeLine, + view.subtitle, + renderMemoryFlowConnectorLine(view), + ...trustIssueLines(view), + '', + row(view.columns.map((column) => column.title)), + row(view.columns.map((column) => column.headline)), + row(view.columns.map((column) => counterAt(column, 0))), + row(view.columns.map((column) => counterAt(column, 1))), + '', + `Selected: ${view.selectedTitle}`, + ...view.selectedDetails.map((detail) => `- ${detail}`), + ]; + + if (view.completionLine) { + lines.push('', view.completionLine); + } + + lines.push(''); + return lines.join('\n'); +} + +function renderNarrowColumn(column: MemoryFlowColumnView): string[] { + return [ + column.title, + ` ${column.headline}`, + ...column.counters.slice(0, 3).map((counter) => ` ${counter}`), + ]; +} + +function renderNarrow(view: MemoryFlowViewModel): string { + const lines = [ + view.title, + view.activeLine, + view.subtitle, + renderMemoryFlowConnectorLine(view), + ...trustIssueLines(view), + '', + ...view.columns.flatMap((column, index) => [ + ...(index > 0 ? [''] : []), + ...renderNarrowColumn(column), + ]), + '', + `Selected: ${view.selectedTitle}`, + ...view.selectedDetails.map((detail) => `- ${detail}`), + ]; + + if (view.completionLine) { + lines.push('', view.completionLine); + } + + lines.push(''); + return lines.join('\n'); +} + +export function renderMemoryFlowReplay(view: MemoryFlowViewModel, options: MemoryFlowRenderOptions = {}): string { + if ((options.terminalWidth ?? 120) < 100) { + return renderNarrow(view); + } + return renderWide(view); +} diff --git a/packages/context/src/ingest/memory-flow/schema.test.ts b/packages/context/src/ingest/memory-flow/schema.test.ts new file mode 100644 index 00000000..c1fbda64 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/schema.test.ts @@ -0,0 +1,164 @@ +import { describe, expect, it } from 'vitest'; +import { + memoryFlowReplayInputSchema, + memoryFlowStreamEventSchema, + parseMemoryFlowReplayInput, +} from './schema.js'; +import type { MemoryFlowReplayInput } from './types.js'; + +function snapshot(overrides: Partial = {}): MemoryFlowReplayInput { + return { + runId: 'job-1', + connectionId: 'connection-1', + adapter: 'metabase', + status: 'running', + sourceDir: null, + syncId: 'sync-1', + errors: [], + events: [ + { type: 'source_acquired', adapter: 'metabase', trigger: 'manual_resync', fileCount: 2 }, + { type: 'scope_detected', fingerprint: 'scope-1' }, + { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, + { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_step', unitKey: 'orders', stepIndex: 1, stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md' }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, + { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, + { type: 'saved', commitSha: 'abc12345', wikiCount: 1, slCount: 0 }, + { type: 'provenance_recorded', rowCount: 1 }, + { type: 'report_created', runId: 'run-1', reportPath: 'ingest-report.json' }, + ], + plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders.md'], peerFileCount: 0, dependencyCount: 1 }], + details: { + actions: [ + { + unitKey: 'orders', + target: 'wiki', + action: 'created', + key: 'knowledge/orders.md', + summary: 'Created orders page', + rawFiles: ['orders.md'], + status: 'success', + }, + ], + provenance: [ + { + rawPath: 'orders.md', + artifactKind: 'wiki', + artifactKey: 'knowledge/orders.md', + actionType: 'wiki_written', + }, + ], + transcripts: [ + { + unitKey: 'orders', + path: 'transcripts/orders.jsonl', + toolCallCount: 2, + errorCount: 0, + toolNames: ['wiki_write'], + }, + ], + }, + ...overrides, + }; +} + +describe('memory-flow schemas', () => { + it('parses a full replay input snapshot', () => { + expect(parseMemoryFlowReplayInput(snapshot())).toEqual(snapshot()); + }); + + it('parses replay metadata and timestamped events', () => { + const parsed = parseMemoryFlowReplayInput( + snapshot({ + metadata: { + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: 'report-1', + sourceReportPath: 'reports/report-1.json', + fallbackReason: null, + }, + events: [ + { + type: 'source_acquired', + adapter: 'metabase', + trigger: 'manual_resync', + fileCount: 2, + emittedAt: '2026-05-01T10:00:00.000Z', + }, + ], + }), + ); + + expect(parsed.metadata).toEqual({ + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: 'report-1', + sourceReportPath: 'reports/report-1.json', + fallbackReason: null, + }); + expect(parsed.events).toEqual([ + { + type: 'source_acquired', + adapter: 'metabase', + trigger: 'manual_resync', + fileCount: 2, + emittedAt: '2026-05-01T10:00:00.000Z', + }, + ]); + }); + + it('parses skipped deterministic stages', () => { + const parsed = parseMemoryFlowReplayInput( + snapshot({ + status: 'done', + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'demo_deterministic', fileCount: 7 }, + { type: 'scope_detected', fingerprint: 'sqlite' }, + { type: 'raw_snapshot_written', syncId: 'sync-demo', rawFileCount: 7 }, + { type: 'diff_computed', added: 7, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 7, workUnitCount: 0, evictionCount: 0 }, + { type: 'stage_skipped', stage: 'workUnits', reason: 'deterministic mode' }, + { type: 'stage_skipped', stage: 'actions', reason: 'requires LLM' }, + { type: 'stage_skipped', stage: 'gates', reason: 'requires candidate actions' }, + { type: 'stage_skipped', stage: 'saved', reason: 'requires LLM memory synthesis' }, + { type: 'saved', commitSha: null, wikiCount: 0, slCount: 0 }, + { type: 'provenance_recorded', rowCount: 0 }, + { + type: 'report_created', + runId: 'scan-demo', + reportPath: 'raw-sources/orbit_demo/live-database/sync-demo/scan-report.json', + }, + ], + }), + ); + + expect(parsed.events).toContainEqual({ type: 'stage_skipped', stage: 'workUnits', reason: 'deterministic mode' }); + expect(parsed.events).toContainEqual({ type: 'stage_skipped', stage: 'actions', reason: 'requires LLM' }); + }); + + it('parses snapshot and closed stream events', () => { + expect(memoryFlowStreamEventSchema.parse({ type: 'snapshot', snapshot: snapshot({ status: 'done' }) })).toEqual({ + type: 'snapshot', + snapshot: snapshot({ status: 'done' }), + }); + + expect(memoryFlowStreamEventSchema.parse({ type: 'closed', status: 'done', errors: [] })).toEqual({ + type: 'closed', + status: 'done', + errors: [], + }); + }); + + it('rejects invalid replay status values', () => { + expect(() => memoryFlowReplayInputSchema.parse({ ...snapshot(), status: 'complete' })).toThrow(); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/schema.ts b/packages/context/src/ingest/memory-flow/schema.ts new file mode 100644 index 00000000..0e268f17 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/schema.ts @@ -0,0 +1,171 @@ +import * as z from 'zod'; +import type { MemoryFlowReplayInput } from './types.js'; + +export const memoryFlowRunStatusSchema = z.enum(['running', 'done', 'error']); + +const memoryFlowEventTimestampShape = { + emittedAt: z.string().datetime().optional(), +}; + +function eventSchema(shape: T): z.ZodObject { + return z.object({ ...shape, ...memoryFlowEventTimestampShape }); +} + +const memoryFlowReplayMetadataSchema = z.object({ + schemaVersion: z.literal(1), + mode: z.enum(['full', 'deterministic', 'replay', 'seeded']), + origin: z.enum(['captured', 'packaged', 'synthetic-report']), + timing: z.enum(['captured', 'synthetic', 'not-captured', 'prebuilt']), + capturedAt: z.string().datetime().nullable(), + sourceReportId: z.string().min(1).nullable(), + sourceReportPath: z.string().min(1).nullable(), + fallbackReason: z.string().min(1).nullable(), +}); + +export const memoryFlowEventSchema = z.discriminatedUnion('type', [ + eventSchema({ + type: z.literal('source_acquired'), + adapter: z.string().min(1), + trigger: z.string().min(1), + fileCount: z.number().int().min(0), + }), + eventSchema({ type: z.literal('scope_detected'), fingerprint: z.string().nullable() }), + eventSchema({ + type: z.literal('raw_snapshot_written'), + syncId: z.string().min(1), + rawFileCount: z.number().int().min(0), + }), + eventSchema({ + type: z.literal('diff_computed'), + added: z.number().int().min(0), + modified: z.number().int().min(0), + deleted: z.number().int().min(0), + unchanged: z.number().int().min(0), + }), + eventSchema({ + type: z.literal('chunks_planned'), + chunkCount: z.number().int().min(0), + workUnitCount: z.number().int().min(0), + evictionCount: z.number().int().min(0), + }), + eventSchema({ + type: z.literal('stage_skipped'), + stage: z.enum(['source', 'chunks', 'workUnits', 'actions', 'gates', 'saved']), + reason: z.string().min(1), + }), + eventSchema({ + type: z.literal('work_unit_started'), + unitKey: z.string().min(1), + skills: z.array(z.string().min(1)), + stepBudget: z.number().int().min(0), + }), + eventSchema({ + type: z.literal('work_unit_step'), + unitKey: z.string().min(1), + stepIndex: z.number().int().min(0), + stepBudget: z.number().int().min(0), + }), + eventSchema({ + type: z.literal('candidate_action'), + unitKey: z.string().min(1), + target: z.enum(['wiki', 'sl']), + action: z.string().min(1), + key: z.string().min(1), + }), + eventSchema({ + type: z.literal('work_unit_finished'), + unitKey: z.string().min(1), + status: z.enum(['success', 'failed']), + reason: z.string().optional(), + }), + eventSchema({ + type: z.literal('reconciliation_finished'), + conflictCount: z.number().int().min(0), + fallbackCount: z.number().int().min(0), + }), + eventSchema({ + type: z.literal('saved'), + commitSha: z.string().nullable(), + wikiCount: z.number().int().min(0), + slCount: z.number().int().min(0), + }), + eventSchema({ type: z.literal('provenance_recorded'), rowCount: z.number().int().min(0) }), + eventSchema({ + type: z.literal('report_created'), + runId: z.string().min(1), + reportPath: z.string().min(1).optional(), + }), +]); + +export const memoryFlowPlannedWorkUnitSchema = z.object({ + unitKey: z.string().min(1), + rawFiles: z.array(z.string()), + peerFileCount: z.number().int().min(0), + dependencyCount: z.number().int().min(0), +}); + +export const memoryFlowActionDetailSchema = z.object({ + unitKey: z.string().min(1), + target: z.enum(['wiki', 'sl']), + action: z.enum(['created', 'updated', 'removed']), + key: z.string().min(1), + summary: z.string(), + rawFiles: z.array(z.string()), + status: z.enum(['success', 'failed']), +}); + +const memoryFlowProvenanceDetailSchema = z.object({ + rawPath: z.string(), + artifactKind: z.enum(['sl', 'wiki']).nullable(), + artifactKey: z.string().nullable(), + actionType: z.string().min(1), +}); + +const memoryFlowTranscriptDetailSchema = z.object({ + unitKey: z.string().min(1), + path: z.string().min(1), + toolCallCount: z.number().int().min(0), + errorCount: z.number().int().min(0), + toolNames: z.array(z.string()), +}); + +export const memoryFlowDetailSectionsSchema = z.object({ + actions: z.array(memoryFlowActionDetailSchema), + provenance: z.array(memoryFlowProvenanceDetailSchema), + transcripts: z.array(memoryFlowTranscriptDetailSchema), +}); + +export const memoryFlowReplayInputSchema: z.ZodType = z.object({ + metadata: memoryFlowReplayMetadataSchema.optional(), + runId: z.string().min(1), + connectionId: z.string().min(1), + adapter: z.string().min(1), + status: memoryFlowRunStatusSchema, + sourceDir: z.string().nullable(), + syncId: z.string().min(1), + reportId: z.string().min(1).optional(), + reportPath: z.string().min(1).optional(), + errors: z.array(z.string()), + events: z.array(memoryFlowEventSchema), + plannedWorkUnits: z.array(memoryFlowPlannedWorkUnitSchema), + details: memoryFlowDetailSectionsSchema, +}); + +export const memoryFlowStreamEventSchema = z.discriminatedUnion('type', [ + z.object({ type: z.literal('snapshot'), snapshot: memoryFlowReplayInputSchema }), + z.object({ + type: z.literal('closed'), + status: memoryFlowRunStatusSchema, + errors: z.array(z.string()), + }), +]); + +export type MemoryFlowStreamEvent = z.infer; + +export function parseMemoryFlowReplayInput(value: unknown): MemoryFlowReplayInput { + const result = memoryFlowReplayInputSchema.safeParse(value); + if (!result.success) { + throw new Error(`Invalid memory-flow replay input: ${z.prettifyError(result.error)}`); + } + return result.data; +} diff --git a/packages/context/src/ingest/memory-flow/summary.test.ts b/packages/context/src/ingest/memory-flow/summary.test.ts new file mode 100644 index 00000000..61c25aa5 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/summary.test.ts @@ -0,0 +1,125 @@ +import { describe, expect, it } from 'vitest'; +import type { MemoryFlowReplayInput } from './types.js'; +import { formatMemoryFlowFinalSummary } from './summary.js'; + +function input(overrides: Partial = {}): MemoryFlowReplayInput { + return { + runId: 'run-1', + connectionId: 'warehouse', + adapter: 'metricflow', + status: 'done', + sourceDir: '/tmp/source', + syncId: 'sync-1', + errors: [], + plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders.yml'], peerFileCount: 0, dependencyCount: 0 }], + details: { actions: [], provenance: [], transcripts: [] }, + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 2 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, + { type: 'saved', commitSha: 'abc12345', wikiCount: 1, slCount: 1 }, + { type: 'provenance_recorded', rowCount: 2 }, + { type: 'report_created', runId: 'run-1', reportPath: 'report-1' }, + ], + ...overrides, + }; +} + +describe('formatMemoryFlowFinalSummary', () => { + it('summarizes a successful full memory-flow run', () => { + expect(formatMemoryFlowFinalSummary(input())).toBe( + [ + 'Memory-flow summary: done', + 'Connection: warehouse', + 'Adapter: metricflow', + 'Run: run-1', + 'Sync: sync-1', + 'Source files: 2', + 'Table reviews: 1 total, 1 done, 0 failed', + 'Saved memory: 1 wiki, 1 semantic layer', + 'Provenance rows: 2', + 'Report: report-1', + '', + ].join('\n'), + ); + }); + + it('includes trust issues and sanitized errors for failed runs', () => { + expect( + formatMemoryFlowFinalSummary( + input({ + status: 'error', + errors: ['failed token=secret'], + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 2 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'failed', reason: 'validation failed token=secret' }, + ], + }), + ), + ).toContain('Trust issues: 3'); + }); + + it('labels replay source metadata in final summaries', () => { + const summary = formatMemoryFlowFinalSummary({ + metadata: { + schemaVersion: 1, + mode: 'replay', + origin: 'packaged', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: 'demo-replay-report', + sourceReportPath: 'replays/replay.memory-flow.v1.json', + fallbackReason: null, + }, + runId: 'demo-replay-orbit', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'done', + sourceDir: null, + syncId: 'demo-replay-sync', + reportPath: 'replays/replay.memory-flow.v1.json', + errors: [], + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'demo_replay', fileCount: 7 }, + { type: 'saved', commitSha: null, wikiCount: 3, slCount: 2 }, + { type: 'provenance_recorded', rowCount: 5 }, + { type: 'report_created', runId: 'demo-replay-orbit', reportPath: 'replays/replay.memory-flow.v1.json' }, + ], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }); + + expect(summary).toContain('Replay source: packaged replay (captured timing)'); + expect(summary).toContain('Replay captured: 2026-05-01T10:00:03.000Z'); + }); + + it('labels synthetic report replays with the reconstruction reason', () => { + const summary = formatMemoryFlowFinalSummary({ + metadata: { + schemaVersion: 1, + mode: 'full', + origin: 'synthetic-report', + timing: 'synthetic', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: 'report-1', + sourceReportPath: 'report-1', + fallbackReason: 'report did not include captured memory-flow events', + }, + runId: 'run-1', + connectionId: 'warehouse', + adapter: 'lookml', + status: 'done', + sourceDir: null, + syncId: 'sync-1', + reportPath: 'report-1', + errors: [], + events: [{ type: 'report_created', runId: 'run-1', reportPath: 'report-1' }], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }); + + expect(summary).toContain('Replay source: synthetic report replay (synthetic timing)'); + expect(summary).toContain('Replay note: report did not include captured memory-flow events'); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/summary.ts b/packages/context/src/ingest/memory-flow/summary.ts new file mode 100644 index 00000000..b71526a1 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/summary.ts @@ -0,0 +1,93 @@ +import { sanitizeMemoryFlowError } from './live-buffer.js'; +import type { MemoryFlowEvent, MemoryFlowReplayInput } from './types.js'; +import { buildMemoryFlowViewModel } from './view-model.js'; + +function latest( + events: MemoryFlowEvent[], + type: T, +): Extract | undefined { + return events.filter((event): event is Extract => event.type === type).at(-1); +} + +function eventsOf( + events: MemoryFlowEvent[], + type: T, +): Array> { + return events.filter((event): event is Extract => event.type === type); +} + +function replaySourceLine(input: MemoryFlowReplayInput): string | null { + const metadata = input.metadata; + if (!metadata) { + return null; + } + + const origin = + metadata.origin === 'synthetic-report' + ? 'synthetic report replay' + : metadata.origin === 'packaged' + ? 'packaged replay' + : 'captured replay'; + return `Replay source: ${origin} (${metadata.timing} timing)`; +} + +function humanizeSummaryText(value: string): string { + return value + .replace(/\bWORKUNITS\b/g, 'PLAN') + .replace(/\bWorkUnit\b/g, 'Table review') + .replace(/\bwork units\b/gi, 'table reviews') + .replace(/\bWUs\b/g, 'tables') + .replace(/\braw files\b/gi, 'database files') + .replace(/\braw file\b/gi, 'database file') + .replace(/\bSL\b/g, 'semantic layer'); +} + +export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): string { + const sources = eventsOf(input.events, 'source_acquired'); + const source = sources.at(-1); + const totalFiles = sources.reduce((sum, s) => sum + s.fileCount, 0); + const saved = latest(input.events, 'saved'); + const provenance = latest(input.events, 'provenance_recorded'); + const report = latest(input.events, 'report_created'); + const finished = eventsOf(input.events, 'work_unit_finished'); + const failed = finished.filter((event) => event.status === 'failed'); + const view = buildMemoryFlowViewModel(input); + const lines = [ + `Memory-flow summary: ${input.status}`, + `Connection: ${input.connectionId}`, + ...(sources.length > 1 + ? [`Sources: ${[...new Set(sources.map((s) => s.adapter))].join(', ')}`] + : [`Adapter: ${input.adapter}`]), + `Run: ${input.runId}`, + `Sync: ${input.syncId}`, + `Source files: ${totalFiles}`, + `Table reviews: ${input.plannedWorkUnits.length || finished.length} total, ${finished.length - failed.length} done, ${failed.length} failed`, + `Saved memory: ${saved?.wikiCount ?? 0} wiki, ${saved?.slCount ?? 0} semantic layer`, + `Provenance rows: ${provenance?.rowCount ?? 0}`, + `Report: ${report?.reportPath ?? input.reportPath ?? 'none'}`, + ]; + const sourceLine = replaySourceLine(input); + if (sourceLine) { + lines.push(sourceLine); + } + if (input.metadata?.capturedAt) { + lines.push(`Replay captured: ${input.metadata.capturedAt}`); + } + if (input.metadata?.fallbackReason) { + lines.push(`Replay note: ${input.metadata.fallbackReason}`); + } + + if (view.trustIssues.length > 0) { + lines.push(`Trust issues: ${view.trustIssues.length}`); + for (const issue of view.trustIssues.slice(0, 3)) { + lines.push(`- ${humanizeSummaryText(issue.title)}: ${humanizeSummaryText(issue.detail)}`); + } + } + + for (const error of input.errors.slice(0, 3)) { + lines.push(`Error: ${sanitizeMemoryFlowError(error)}`); + } + + lines.push(''); + return lines.join('\n'); +} diff --git a/packages/context/src/ingest/memory-flow/types.ts b/packages/context/src/ingest/memory-flow/types.ts new file mode 100644 index 00000000..8a40ac04 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/types.ts @@ -0,0 +1,246 @@ +type MemoryFlowReplayMode = 'full' | 'deterministic' | 'replay' | 'seeded'; +type MemoryFlowReplayOrigin = 'captured' | 'packaged' | 'synthetic-report'; +type MemoryFlowReplayTiming = 'captured' | 'synthetic' | 'not-captured' | 'prebuilt'; + +interface MemoryFlowReplayMetadata { + schemaVersion: 1; + mode: MemoryFlowReplayMode; + origin: MemoryFlowReplayOrigin; + timing: MemoryFlowReplayTiming; + capturedAt: string | null; + sourceReportId: string | null; + sourceReportPath: string | null; + fallbackReason: string | null; +} + +type MemoryFlowEventPayload = + | { + type: 'source_acquired'; + adapter: string; + trigger: string; + fileCount: number; + } + | { type: 'scope_detected'; fingerprint: string | null } + | { + type: 'raw_snapshot_written'; + syncId: string; + rawFileCount: number; + } + | { + type: 'diff_computed'; + added: number; + modified: number; + deleted: number; + unchanged: number; + } + | { + type: 'chunks_planned'; + chunkCount: number; + workUnitCount: number; + evictionCount: number; + } + | { + type: 'stage_skipped'; + stage: MemoryFlowColumnId; + reason: string; + } + | { + type: 'work_unit_started'; + unitKey: string; + skills: string[]; + stepBudget: number; + } + | { + type: 'work_unit_step'; + unitKey: string; + stepIndex: number; + stepBudget: number; + } + | { + type: 'candidate_action'; + unitKey: string; + target: 'wiki' | 'sl'; + action: string; + key: string; + } + | { + type: 'work_unit_finished'; + unitKey: string; + status: 'success' | 'failed'; + reason?: string; + } + | { + type: 'reconciliation_finished'; + conflictCount: number; + fallbackCount: number; + } + | { + type: 'saved'; + commitSha: string | null; + wikiCount: number; + slCount: number; + } + | { type: 'provenance_recorded'; rowCount: number } + | { type: 'report_created'; runId: string; reportPath?: string }; + +export type MemoryFlowEvent = MemoryFlowEventPayload & { emittedAt?: string }; + +export type MemoryFlowRunStatus = 'running' | 'done' | 'error'; + +export interface MemoryFlowPlannedWorkUnit { + unitKey: string; + rawFiles: string[]; + peerFileCount: number; + dependencyCount: number; +} + +export interface MemoryFlowActionDetail { + unitKey: string; + target: 'wiki' | 'sl'; + action: 'created' | 'updated' | 'removed'; + key: string; + summary: string; + rawFiles: string[]; + status: 'success' | 'failed'; +} + +interface MemoryFlowProvenanceDetail { + rawPath: string; + artifactKind: 'sl' | 'wiki' | null; + artifactKey: string | null; + actionType: string; +} + +interface MemoryFlowTranscriptDetail { + unitKey: string; + path: string; + toolCallCount: number; + errorCount: number; + toolNames: string[]; +} + +export interface MemoryFlowDetailSections { + actions: MemoryFlowActionDetail[]; + provenance: MemoryFlowProvenanceDetail[]; + transcripts: MemoryFlowTranscriptDetail[]; +} + +export interface MemoryFlowReplayInput { + metadata?: MemoryFlowReplayMetadata; + runId: string; + connectionId: string; + adapter: string; + status: MemoryFlowRunStatus; + sourceDir: string | null; + syncId: string; + reportId?: string; + reportPath?: string; + errors: string[]; + events: MemoryFlowEvent[]; + plannedWorkUnits: MemoryFlowPlannedWorkUnit[]; + details: MemoryFlowDetailSections; +} + +export type MemoryFlowReplayPatch = Partial>; + +export interface MemoryFlowEventSink { + emit(event: MemoryFlowEvent): void; + update(patch: MemoryFlowReplayPatch): void; + finish(status: MemoryFlowRunStatus, errors?: string[]): void; + snapshot(): MemoryFlowReplayInput; +} + +export interface MemoryFlowLiveBufferOptions { + onChange?(snapshot: MemoryFlowReplayInput): void; + now?: () => Date; +} + +export type MemoryFlowColumnId = 'source' | 'chunks' | 'workUnits' | 'actions' | 'gates' | 'saved'; +export type MemoryFlowDisplayStatus = 'waiting' | 'active' | 'complete' | 'warning' | 'failed'; + +export interface MemoryFlowChip { + label: string; + status: MemoryFlowDisplayStatus; + detail?: string; +} + +export interface MemoryFlowColumnView { + id: MemoryFlowColumnId; + title: string; + status: MemoryFlowDisplayStatus; + headline: string; + counters: string[]; + chips: MemoryFlowChip[]; + details: string[]; +} + +export interface MemoryFlowTrustIssue { + id: string; + severity: 'warning' | 'failed'; + title: string; + detail: string; + columnId: MemoryFlowColumnId; + targetLabel?: string; +} + +export interface MemoryFlowSearchMatch { + columnId: MemoryFlowColumnId; + chipIndex?: number; + label: string; + detail: string; +} + +export interface MemoryFlowViewModel { + title: string; + subtitle: string; + status: MemoryFlowRunStatus; + activeLine: string; + columns: MemoryFlowColumnView[]; + trustIssues: MemoryFlowTrustIssue[]; + selectedTitle: string; + selectedDetails: string[]; + completionLine: string | null; + details: MemoryFlowDetailSections; +} + +export interface MemoryFlowRenderOptions { + terminalWidth?: number; +} + +export type MemoryFlowPaneId = 'overview' | 'trust' | 'details' | 'log' | 'provenance' | 'transcript'; +export type MemoryFlowFilterMode = 'all' | 'failed_or_flagged'; + +interface MemoryFlowSearchState { + editing: boolean; + query: string; + matchIndex: number; +} + +export interface MemoryFlowInteractionState { + selectedColumnId: MemoryFlowColumnId; + selectedChipIndex: number; + expanded: boolean; + pane: MemoryFlowPaneId; + filter: MemoryFlowFilterMode; + search: MemoryFlowSearchState; + shouldQuit: boolean; +} + +export type MemoryFlowInteractionCommand = + | 'left' + | 'right' + | 'up' + | 'down' + | 'enter' + | 'tab' + | 'filter' + | 'provenance' + | 'transcript' + | 'search-start' + | 'search-submit' + | 'search-backspace' + | 'search-clear' + | 'search-next' + | 'search-previous' + | 'quit' + | { type: 'search-input'; value: string }; diff --git a/packages/context/src/ingest/memory-flow/view-model.test.ts b/packages/context/src/ingest/memory-flow/view-model.test.ts new file mode 100644 index 00000000..527efe0a --- /dev/null +++ b/packages/context/src/ingest/memory-flow/view-model.test.ts @@ -0,0 +1,436 @@ +import { describe, expect, it } from 'vitest'; +import type { MemoryFlowReplayInput } from './types.js'; +import { buildMemoryFlowViewModel } from './view-model.js'; + +function replayInput(): MemoryFlowReplayInput { + return { + runId: 'run-1', + connectionId: 'warehouse', + adapter: 'metricflow', + status: 'done', + sourceDir: '/tmp/source', + syncId: 'sync-1', + errors: [], + plannedWorkUnits: [ + { unitKey: 'orders', rawFiles: ['orders.yml'], peerFileCount: 1, dependencyCount: 1 }, + { unitKey: 'revenue', rawFiles: ['revenue.yml'], peerFileCount: 0, dependencyCount: 0 }, + ], + details: { + actions: [ + { + unitKey: 'orders', + target: 'wiki', + action: 'created', + key: 'knowledge/orders.md', + summary: 'order facts', + rawFiles: ['orders.yml'], + status: 'success', + }, + { + unitKey: 'orders', + target: 'sl', + action: 'updated', + key: 'warehouse.orders', + summary: 'order measures', + rawFiles: ['orders.yml'], + status: 'success', + }, + ], + provenance: [ + { + rawPath: 'orders.yml', + artifactKind: 'wiki', + artifactKey: 'knowledge/orders.md', + actionType: 'wiki_written', + }, + ], + transcripts: [ + { + unitKey: 'orders', + path: '/tmp/transcripts/orders.jsonl', + toolCallCount: 3, + errorCount: 0, + toolNames: ['read_raw_span', 'wiki_write', 'sl_write_source'], + }, + ], + }, + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 2 }, + { type: 'scope_detected', fingerprint: 'scope-abc' }, + { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, + { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 3 }, + { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md' }, + { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, + { type: 'work_unit_finished', unitKey: 'revenue', status: 'failed', reason: 'validation failed' }, + { type: 'reconciliation_finished', conflictCount: 1, fallbackCount: 1 }, + { type: 'saved', commitSha: 'abc123456789', wikiCount: 1, slCount: 1 }, // pragma: allowlist secret + { type: 'provenance_recorded', rowCount: 3 }, + { type: 'report_created', runId: 'run-1', reportPath: 'report-1' }, + ], + }; +} + +function baseReplayInput(overrides: Partial = {}): MemoryFlowReplayInput { + return { + runId: 'run-errors', + connectionId: 'warehouse', + adapter: 'metricflow', + status: 'error', + sourceDir: '/tmp/source', + syncId: 'sync-errors', + errors: [], + events: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + ...overrides, + }; +} + +describe('buildMemoryFlowViewModel', () => { + it('builds six readable columns from replay events', () => { + const view = buildMemoryFlowViewModel(replayInput()); + + expect(view.title).toBe('KLO memory flow warehouse/metricflow done'); + expect(view.activeLine).toBe('active: complete'); + expect(view.columns.map((column) => column.id)).toEqual([ + 'source', + 'chunks', + 'workUnits', + 'actions', + 'gates', + 'saved', + ]); + expect(view.columns.map((column) => column.headline)).toEqual([ + '2 raw files', + '2 chunks', + '2 WUs', + '2 candidates', + '1 conflict, 1 fallback', + '2 memories', + ]); + expect(view.columns.find((column) => column.id === 'workUnits')?.counters).toEqual([ + '1 done', + '1 failed', + '0 active', + ]); + expect(view.columns.find((column) => column.id === 'actions')?.counters).toEqual(['1 wiki', '1 SL']); + expect(view.details.actions).toHaveLength(2); + expect(view.details.provenance).toEqual([ + { + rawPath: 'orders.yml', + artifactKind: 'wiki', + artifactKey: 'knowledge/orders.md', + actionType: 'wiki_written', + }, + ]); + expect(view.details.transcripts).toEqual([ + { + unitKey: 'orders', + path: '/tmp/transcripts/orders.jsonl', + toolCallCount: 3, + errorCount: 0, + toolNames: ['read_raw_span', 'wiki_write', 'sl_write_source'], + }, + ]); + expect(view.columns.find((column) => column.id === 'actions')?.details).toContain( + 'orders wiki created knowledge/orders.md: order facts', + ); + expect(view.columns.find((column) => column.id === 'saved')?.details).toContain('Commit: abc12345'); + expect(view.completionLine).toBe( + 'Saved 2 memories from 2 raw files: 1 wiki pages, 1 SL updates. Commit: abc12345 Run: run-1 Report: report-1', + ); + }); + + it('shows all seeded demo source families and sums raw files in the completion line', () => { + const view = buildMemoryFlowViewModel({ + runId: 'demo-seeded-orbit', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'done', + sourceDir: null, + syncId: 'demo-seeded-sync', + errors: [], + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'demo_seeded', fileCount: 8 }, + { type: 'source_acquired', adapter: 'dbt_descriptions', trigger: 'demo_seeded', fileCount: 6 }, + { type: 'source_acquired', adapter: 'looker', trigger: 'demo_seeded', fileCount: 7 }, + { type: 'source_acquired', adapter: 'notion', trigger: 'demo_seeded', fileCount: 8 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'revenue-and-contracts', skills: ['knowledge_capture'], stepBudget: 40 }, + { + type: 'candidate_action', + unitKey: 'revenue-and-contracts', + target: 'wiki', + action: 'created', + key: 'knowledge/global/arr-contract-first.md', + }, + { type: 'work_unit_finished', unitKey: 'revenue-and-contracts', status: 'success' }, + { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, + { type: 'saved', commitSha: 'demo-seeded', wikiCount: 10, slCount: 6 }, + { type: 'provenance_recorded', rowCount: 23 }, + { type: 'report_created', runId: 'demo-seeded-orbit', reportPath: 'reports/seeded-demo-report.json' }, + ], + plannedWorkUnits: [ + { unitKey: 'revenue-and-contracts', rawFiles: ['contracts'], peerFileCount: 1, dependencyCount: 1 }, + ], + details: { actions: [], provenance: [], transcripts: [] }, + }); + + expect(view.title).toBe('KLO memory flow Warehouse + dbt + BI + Docs done'); + expect(view.columns.find((column) => column.id === 'source')?.counters[0]).toBe('Warehouse, dbt, BI, Docs'); + expect(view.completionLine).toContain('Saved 16 memories from 29 raw files'); + }); + + it('derives sticky trust issues from failed work units, gates, and provenance mismatch', () => { + const input = replayInput(); + const view = buildMemoryFlowViewModel({ + ...input, + events: [ + ...input.events.filter((event) => event.type !== 'provenance_recorded'), + { type: 'provenance_recorded', rowCount: 1 }, + ], + }); + + expect(view.trustIssues).toEqual([ + { + id: 'work-unit-failed:revenue', + severity: 'failed', + title: 'WorkUnit failed', + detail: 'revenue failed: validation failed', + columnId: 'workUnits', + targetLabel: 'revenue', + }, + { + id: 'sl-validation-reverted:revenue', + severity: 'warning', + title: 'SL validation revert', + detail: 'revenue reverted after semantic-layer validation failure', + columnId: 'gates', + targetLabel: 'revenue', + }, + { + id: 'reconciliation-conflicts', + severity: 'warning', + title: 'Reconciliation conflicts', + detail: '1 conflict resolved during reconciliation', + columnId: 'gates', + }, + { + id: 'flagged-fallbacks', + severity: 'warning', + title: 'Flagged fallbacks', + detail: '1 fallback needs review', + columnId: 'gates', + }, + { + id: 'provenance-mismatch', + severity: 'warning', + title: 'Provenance mismatch', + detail: '2 saved memories but 1 provenance rows recorded', + columnId: 'saved', + }, + ]); + expect(view.columns.find((column) => column.id === 'workUnits')?.chips).toContainEqual({ + label: 'revenue', + status: 'failed', + detail: 'validation failed', + }); + }); + + it('accepts multiple provenance rows per saved memory', () => { + const input = replayInput(); + const view = buildMemoryFlowViewModel({ + ...input, + events: [ + ...input.events.filter((event) => event.type !== 'provenance_recorded'), + { type: 'provenance_recorded', rowCount: 23 }, + ], + }); + + expect(view.trustIssues.find((issue) => issue.id === 'provenance-mismatch')).toBeUndefined(); + }); + + it('derives deterministic mode as a degraded trust issue', () => { + const view = buildMemoryFlowViewModel({ + runId: 'demo-deterministic-scan', + connectionId: 'orbit_demo', + adapter: 'live-database', + status: 'done', + sourceDir: 'raw-sources/orbit_demo/live-database/sync-demo', + syncId: 'sync-demo', + reportPath: 'raw-sources/orbit_demo/live-database/sync-demo/scan-report.json', + errors: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + events: [ + { type: 'source_acquired', adapter: 'live-database', trigger: 'demo_deterministic', fileCount: 7 }, + { type: 'chunks_planned', chunkCount: 7, workUnitCount: 0, evictionCount: 0 }, + { type: 'stage_skipped', stage: 'workUnits', reason: 'deterministic mode' }, + { type: 'stage_skipped', stage: 'actions', reason: 'requires LLM' }, + { type: 'stage_skipped', stage: 'gates', reason: 'requires candidate actions' }, + { type: 'stage_skipped', stage: 'saved', reason: 'requires LLM memory synthesis' }, + ], + }); + + expect(view.trustIssues).toEqual([ + { + id: 'degraded-mode:workUnits', + severity: 'warning', + title: 'Degraded mode', + detail: 'WORKUNITS skipped: deterministic mode', + columnId: 'workUnits', + targetLabel: 'skipped', + }, + { + id: 'degraded-mode:actions', + severity: 'warning', + title: 'Degraded mode', + detail: 'ACTIONS skipped: requires LLM', + columnId: 'actions', + targetLabel: 'skipped', + }, + { + id: 'degraded-mode:gates', + severity: 'warning', + title: 'Degraded mode', + detail: 'GATES skipped: requires candidate actions', + columnId: 'gates', + targetLabel: 'skipped', + }, + { + id: 'degraded-mode:saved', + severity: 'warning', + title: 'Degraded mode', + detail: 'SAVED skipped: requires LLM memory synthesis', + columnId: 'saved', + targetLabel: 'skipped', + }, + ]); + }); + + it('keeps local planning-only runs honest about unsaved memory', () => { + const view = buildMemoryFlowViewModel({ + runId: 'local-run-1', + connectionId: 'warehouse', + adapter: 'fake', + status: 'done', + sourceDir: '/tmp/source', + syncId: 'sync-local', + errors: [], + plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders.json'], peerFileCount: 0, dependencyCount: 0 }], + details: { actions: [], provenance: [], transcripts: [] }, + events: [ + { type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 1 }, + { type: 'scope_detected', fingerprint: null }, + { type: 'raw_snapshot_written', syncId: 'sync-local', rawFileCount: 1 }, + { type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'report_created', runId: 'local-run-1' }, + ], + }); + + expect(view.columns.find((column) => column.id === 'actions')?.headline).toBe('0 candidates'); + expect(view.columns.find((column) => column.id === 'gates')?.headline).toBe('not run'); + expect(view.columns.find((column) => column.id === 'saved')?.headline).toBe('not saved'); + expect(view.completionLine).toBe(null); + }); + + it('surfaces a sanitized source acquisition error when no source event exists', () => { + const view = buildMemoryFlowViewModel( + baseReplayInput({ + errors: ['failed to read https://example.com/source?token=abc123 password=hunter2'], + }), + ); + + expect(view.activeLine).toBe('active: source failed - failed to read https://[redacted] password=[redacted]'); + expect(view.selectedTitle).toBe('SOURCE'); + expect(view.selectedDetails).toContain('Source acquisition failed: failed to read https://[redacted] password=[redacted]'); + }); + + it('surfaces a sanitized planning error after source acquisition but before chunks', () => { + const view = buildMemoryFlowViewModel( + baseReplayInput({ + errors: ['adapter detection failed api_key=abc123'], + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 3 }, + { type: 'raw_snapshot_written', syncId: 'sync-errors', rawFileCount: 3 }, + ], + }), + ); + + expect(view.activeLine).toBe('active: planning failed - adapter detection failed api_key=[redacted]'); + const source = view.columns.find((column) => column.id === 'source'); + expect(source?.details).toContain('Error: adapter detection failed api_key=[redacted]'); + }); + + it('labels failed semantic-layer WorkUnits as reverted in gates details', () => { + const view = buildMemoryFlowViewModel( + baseReplayInput({ + status: 'error', + errors: ['semantic-layer validation failed for warehouse.orders'], + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 2 }, + { type: 'raw_snapshot_written', syncId: 'sync-errors', rawFileCount: 2 }, + { type: 'diff_computed', added: 2, modified: 0, deleted: 0, unchanged: 0 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, + { + type: 'work_unit_finished', + unitKey: 'orders', + status: 'failed', + reason: 'semantic-layer validation failed for warehouse.orders', + }, + ], + plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders.yml'], peerFileCount: 0, dependencyCount: 0 }], + }), + ); + + const gates = view.columns.find((column) => column.id === 'gates'); + expect(gates?.details).toContain('orders reverted: semantic-layer validation failed for warehouse.orders'); + expect(gates?.details).toContain('Invalid semantic-layer writes were not saved.'); + }); + + it('keeps non-validation WorkUnit failures actionable', () => { + const view = buildMemoryFlowViewModel( + baseReplayInput({ + status: 'error', + errors: ['agent step budget exhausted'], + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 1 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_started', unitKey: 'docs', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_finished', unitKey: 'docs', status: 'failed', reason: 'agent step budget exhausted' }, + ], + plannedWorkUnits: [{ unitKey: 'docs', rawFiles: ['docs.md'], peerFileCount: 0, dependencyCount: 0 }], + }), + ); + + const gates = view.columns.find((column) => column.id === 'gates'); + expect(gates?.details).toContain('docs failed: agent step budget exhausted'); + }); + + it('shows whether durable memory landed before a post-save failure', () => { + const view = buildMemoryFlowViewModel( + baseReplayInput({ + status: 'error', + errors: ['index refresh failed token=abc123'], + events: [ + { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 2 }, + { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, + { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, + { type: 'saved', commitSha: 'abc123456789', wikiCount: 1, slCount: 1 }, // pragma: allowlist secret + ], + }), + ); + + const saved = view.columns.find((column) => column.id === 'saved'); + expect(saved?.details).toContain('Durable memory landed before failure.'); + expect(saved?.details).toContain('Post-save error: index refresh failed token=[redacted]'); + expect(view.activeLine).toBe('active: save failed - index refresh failed token=[redacted]'); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/view-model.ts b/packages/context/src/ingest/memory-flow/view-model.ts new file mode 100644 index 00000000..64d0b433 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/view-model.ts @@ -0,0 +1,523 @@ +import type { + MemoryFlowChip, + MemoryFlowColumnId, + MemoryFlowColumnView, + MemoryFlowDisplayStatus, + MemoryFlowEvent, + MemoryFlowReplayInput, + MemoryFlowTrustIssue, + MemoryFlowViewModel, +} from './types.js'; +import { sanitizeMemoryFlowError } from './live-buffer.js'; + +function latest( + events: MemoryFlowEvent[], + type: T, +): Extract | undefined { + return events.filter((event): event is Extract => event.type === type).at(-1); +} + +function eventsOf( + events: MemoryFlowEvent[], + type: T, +): Array> { + return events.filter((event): event is Extract => event.type === type); +} + +function skippedStage( + input: MemoryFlowReplayInput, + stage: Extract['stage'], +): Extract | undefined { + return eventsOf(input.events, 'stage_skipped').find((event) => event.stage === stage); +} + +function formatDiff(diff: Extract | undefined): string { + if (!diff) return '+0 ~0 -0 =0'; + return `+${diff.added} ~${diff.modified} -${diff.deleted} =${diff.unchanged}`; +} + +function countCandidateActions(events: MemoryFlowEvent[], target: 'wiki' | 'sl'): number { + return eventsOf(events, 'candidate_action').filter((event) => event.target === target).length; +} + +function columnStatus(input: { + hasFailures?: boolean; + hasWarnings?: boolean; + hasActivity?: boolean; + complete?: boolean; +}): MemoryFlowDisplayStatus { + if (input.hasFailures) return 'failed'; + if (input.hasWarnings) return 'warning'; + if (input.hasActivity) return 'active'; + if (input.complete) return 'complete'; + return 'waiting'; +} + +function firstChips(labels: string[], status: MemoryFlowDisplayStatus): Array<{ label: string; status: MemoryFlowDisplayStatus }> { + return labels.slice(0, 2).map((label) => ({ label, status })); +} + +function safeErrors(input: MemoryFlowReplayInput): string[] { + return input.errors.map((error) => sanitizeMemoryFlowError(error)).filter((error) => error.length > 0); +} + +function latestSafeError(input: MemoryFlowReplayInput): string | null { + return safeErrors(input)[0] ?? null; +} + +function failureStage(input: MemoryFlowReplayInput): 'source' | 'planning' | 'work_unit' | 'save' | 'run' { + const hasSource = !!latest(input.events, 'source_acquired'); + const hasChunks = !!latest(input.events, 'chunks_planned'); + const hasFailedWorkUnit = eventsOf(input.events, 'work_unit_finished').some((event) => event.status === 'failed'); + const hasSaved = !!latest(input.events, 'saved'); + + if (!hasSource) return 'source'; + if (!hasChunks) return 'planning'; + if (hasFailedWorkUnit) return 'work_unit'; + if (hasSaved) return 'save'; + return 'run'; +} + +function activeLine(input: MemoryFlowReplayInput): string { + if (input.status !== 'error') { + return input.status === 'running' ? 'active: running' : 'active: complete'; + } + + const error = latestSafeError(input); + if (!error) return 'active: error'; + + const stage = failureStage(input); + return `active: ${stage.replace('_', ' ')} failed - ${error}`; +} + +function errorDetails(input: MemoryFlowReplayInput): string[] { + const errors = safeErrors(input); + if (errors.length === 0) return []; + + const [first, ...rest] = errors; + const stage = failureStage(input); + const label = + stage === 'source' + ? 'Source acquisition failed' + : stage === 'planning' + ? 'Error' + : stage === 'save' + ? 'Post-save error' + : 'Error'; + + return [`${label}: ${first}`, ...rest.map((error) => `Error: ${error}`)]; +} + +function isValidationFailure(reason: string | undefined): boolean { + return /semantic-layer|validation|invalid/i.test(reason ?? ''); +} + +function failedWorkUnitDetails(failed: Array>): string[] { + const details = failed.map((event) => { + const reason = event.reason ?? 'failed'; + const label = isValidationFailure(reason) ? 'reverted' : 'failed'; + return `${event.unitKey} ${label}: ${sanitizeMemoryFlowError(reason)}`; + }); + + if (failed.some((event) => isValidationFailure(event.reason))) { + details.push('Invalid semantic-layer writes were not saved.'); + } + + return details; +} + +function columnTitle(columnId: MemoryFlowColumnId): string { + if (columnId === 'workUnits') return 'WORKUNITS'; + return columnId.toUpperCase(); +} + +function plural(value: number, singular: string, pluralLabel = `${singular}s`): string { + return `${value} ${value === 1 ? singular : pluralLabel}`; +} + +function finishedWorkUnitByKey( + input: MemoryFlowReplayInput, +): Map> { + return new Map(eventsOf(input.events, 'work_unit_finished').map((event) => [event.unitKey, event])); +} + +function workUnitChips(input: MemoryFlowReplayInput): MemoryFlowChip[] { + const finishedByKey = finishedWorkUnitByKey(input); + return input.plannedWorkUnits.slice(0, 8).map((workUnit) => { + const finished = finishedByKey.get(workUnit.unitKey); + if (finished?.status === 'failed') { + return { + label: workUnit.unitKey, + status: 'failed', + detail: sanitizeMemoryFlowError(finished.reason ?? 'failed'), + }; + } + return { label: workUnit.unitKey, status: finished ? 'complete' : 'active' }; + }); +} + +function actionChips( + input: MemoryFlowReplayInput, + events: Array>, +): MemoryFlowChip[] { + if (input.details.actions.length > 0) { + return input.details.actions.slice(0, 8).map((action) => ({ + label: action.key, + status: action.status === 'failed' ? 'failed' : 'complete', + detail: action.status === 'failed' ? action.summary : undefined, + })); + } + + return events.slice(0, 8).map((action) => ({ label: action.key, status: 'complete' })); +} + +function buildMemoryFlowTrustIssues(input: MemoryFlowReplayInput): MemoryFlowTrustIssue[] { + const issues: MemoryFlowTrustIssue[] = []; + const failed = eventsOf(input.events, 'work_unit_finished').filter((event) => event.status === 'failed'); + const reconciliation = latest(input.events, 'reconciliation_finished'); + const saved = latest(input.events, 'saved'); + const provenance = latest(input.events, 'provenance_recorded'); + + for (const event of failed) { + const reason = sanitizeMemoryFlowError(event.reason ?? 'failed'); + issues.push({ + id: `work-unit-failed:${event.unitKey}`, + severity: 'failed', + title: 'WorkUnit failed', + detail: `${event.unitKey} failed: ${reason}`, + columnId: 'workUnits', + targetLabel: event.unitKey, + }); + + if (isValidationFailure(event.reason)) { + issues.push({ + id: `sl-validation-reverted:${event.unitKey}`, + severity: 'warning', + title: 'SL validation revert', + detail: `${event.unitKey} reverted after semantic-layer validation failure`, + columnId: 'gates', + targetLabel: event.unitKey, + }); + } + } + + if ((reconciliation?.conflictCount ?? 0) > 0) { + issues.push({ + id: 'reconciliation-conflicts', + severity: 'warning', + title: 'Reconciliation conflicts', + detail: `${plural(reconciliation?.conflictCount ?? 0, 'conflict')} resolved during reconciliation`, + columnId: 'gates', + }); + } + + if ((reconciliation?.fallbackCount ?? 0) > 0) { + issues.push({ + id: 'flagged-fallbacks', + severity: 'warning', + title: 'Flagged fallbacks', + detail: `${plural(reconciliation?.fallbackCount ?? 0, 'fallback')} needs review`, + columnId: 'gates', + }); + } + + const savedCount = (saved?.wikiCount ?? 0) + (saved?.slCount ?? 0); + if (savedCount > 0 && provenance && provenance.rowCount < savedCount) { + issues.push({ + id: 'provenance-mismatch', + severity: 'warning', + title: 'Provenance mismatch', + detail: `${savedCount} saved memories but ${provenance.rowCount} provenance rows recorded`, + columnId: 'saved', + }); + } + + for (const skipped of eventsOf(input.events, 'stage_skipped')) { + issues.push({ + id: `degraded-mode:${skipped.stage}`, + severity: 'warning', + title: 'Degraded mode', + detail: `${columnTitle(skipped.stage)} skipped: ${skipped.reason}`, + columnId: skipped.stage, + targetLabel: 'skipped', + }); + } + + for (const [index, error] of safeErrors(input).entries()) { + issues.push({ + id: `run-error:${index}`, + severity: 'failed', + title: 'Run error', + detail: error, + columnId: failureStage(input) === 'source' ? 'source' : 'gates', + }); + } + + return issues; +} + +function humanizeAdapter(adapter: string): string { + const labels: Record = { + 'live-database': 'Warehouse', + 'live_database': 'Warehouse', + 'dbt_descriptions': 'dbt', + 'looker': 'BI', + 'lookml': 'BI', + 'notion': 'Docs', + 'metabase': 'BI', + 'metricflow': 'dbt', + 'historic_sql': 'SQL', + }; + return labels[adapter] ?? adapter; +} + +function sourceColumn(input: MemoryFlowReplayInput): MemoryFlowColumnView { + const sources = eventsOf(input.events, 'source_acquired'); + const source = sources.at(-1); + const snapshot = latest(input.events, 'raw_snapshot_written'); + const scope = latest(input.events, 'scope_detected'); + const totalFiles = sources.reduce((sum, s) => sum + s.fileCount, 0); + const adapterLabels = sources.length > 1 + ? [...new Set(sources.map((s) => humanizeAdapter(s.adapter)))] + : [input.adapter, input.connectionId]; + return { + id: 'source', + title: 'SOURCE', + status: columnStatus({ complete: !!source }), + headline: `${totalFiles} raw files`, + counters: sources.length > 1 + ? [adapterLabels.join(', '), `sync ${snapshot?.syncId ?? input.syncId}`] + : [`sync ${snapshot?.syncId ?? input.syncId}`, scope?.fingerprint ? `scope ${scope.fingerprint}` : 'scope none'], + chips: adapterLabels.map((label) => ({ label, status: 'complete' as MemoryFlowDisplayStatus })), + details: [ + `Trigger: ${source?.trigger ?? 'unknown'}`, + ...(sources.length > 1 + ? sources.map((s) => `${humanizeAdapter(s.adapter)}: ${s.fileCount} files`) + : [`Adapter: ${input.adapter}`]), + `Connection: ${input.connectionId}`, + `Source: ${input.sourceDir ?? 'stored report'}`, + ...errorDetails(input), + ], + }; +} + +function chunksColumn(input: MemoryFlowReplayInput): MemoryFlowColumnView { + const chunks = latest(input.events, 'chunks_planned'); + const diff = latest(input.events, 'diff_computed'); + return { + id: 'chunks', + title: 'CHUNKS', + status: columnStatus({ hasWarnings: (chunks?.evictionCount ?? 0) > 0, complete: !!chunks }), + headline: `${chunks?.chunkCount ?? 0} chunks`, + counters: [formatDiff(diff), `${chunks?.evictionCount ?? 0} deletions`], + chips: firstChips(input.plannedWorkUnits.map((workUnit) => workUnit.unitKey), 'complete'), + details: [ + `Work units planned: ${chunks?.workUnitCount ?? 0}`, + `Eviction candidates: ${chunks?.evictionCount ?? 0}`, + `Diff: ${formatDiff(diff)}`, + ], + }; +} + +function workUnitsColumn(input: MemoryFlowReplayInput): MemoryFlowColumnView { + const finished = eventsOf(input.events, 'work_unit_finished'); + const failed = finished.filter((event) => event.status === 'failed'); + const succeeded = finished.filter((event) => event.status === 'success'); + const active = eventsOf(input.events, 'work_unit_started').filter( + (started) => !finished.some((event) => event.unitKey === started.unitKey), + ); + const total = input.plannedWorkUnits.length || latest(input.events, 'chunks_planned')?.workUnitCount || 0; + const skipped = skippedStage(input, 'workUnits'); + if (skipped) { + return { + id: 'workUnits', + title: 'WORKUNITS', + status: 'warning', + headline: 'skipped', + counters: ['0 done', '0 failed', '0 active'], + chips: [{ label: 'skipped', status: 'warning', detail: skipped.reason }], + details: [`Skipped: ${skipped.reason}`], + }; + } + + return { + id: 'workUnits', + title: 'WORKUNITS', + status: columnStatus({ hasFailures: failed.length > 0, hasActivity: active.length > 0, complete: total > 0 }), + headline: `${total} WUs`, + counters: [`${succeeded.length} done`, `${failed.length} failed`, `${active.length} active`], + chips: workUnitChips(input), + details: input.plannedWorkUnits.map( + (workUnit) => + `${workUnit.unitKey}: ${workUnit.rawFiles.length} raw, ${workUnit.peerFileCount} peers, ${workUnit.dependencyCount} deps`, + ), + }; +} + +function actionsColumn(input: MemoryFlowReplayInput): MemoryFlowColumnView { + const actions = eventsOf(input.events, 'candidate_action'); + const wikiCount = countCandidateActions(input.events, 'wiki'); + const slCount = countCandidateActions(input.events, 'sl'); + const skipped = skippedStage(input, 'actions'); + if (skipped) { + return { + id: 'actions', + title: 'ACTIONS', + status: 'warning', + headline: 'skipped', + counters: ['0 wiki', '0 SL'], + chips: [{ label: 'skipped', status: 'warning', detail: skipped.reason }], + details: [`Skipped: ${skipped.reason}`], + }; + } + const details = input.details.actions.length + ? input.details.actions.map( + (action) => `${action.unitKey} ${action.target} ${action.action} ${action.key}: ${action.summary}`, + ) + : actions.map((action) => `${action.target} ${action.action}: ${action.key}`); + return { + id: 'actions', + title: 'ACTIONS', + status: columnStatus({ complete: actions.length > 0 }), + headline: `${actions.length} candidates`, + counters: [`${wikiCount} wiki`, `${slCount} SL`], + chips: actionChips(input, actions), + details, + }; +} + +function gatesColumn(input: MemoryFlowReplayInput): MemoryFlowColumnView { + const reconciliation = latest(input.events, 'reconciliation_finished'); + const failed = eventsOf(input.events, 'work_unit_finished').filter((event) => event.status === 'failed'); + const headline = reconciliation + ? `${reconciliation.conflictCount} conflict, ${reconciliation.fallbackCount} fallback` + : 'not run'; + const skipped = skippedStage(input, 'gates'); + if (skipped) { + return { + id: 'gates', + title: 'GATES', + status: 'warning', + headline: 'skipped', + counters: ['0 failed', '0 flagged'], + chips: [{ label: 'skipped', status: 'warning', detail: skipped.reason }], + details: [`Skipped: ${skipped.reason}`], + }; + } + return { + id: 'gates', + title: 'GATES', + status: columnStatus({ + hasFailures: failed.length > 0, + hasWarnings: (reconciliation?.conflictCount ?? 0) > 0 || (reconciliation?.fallbackCount ?? 0) > 0, + complete: !!reconciliation, + }), + headline, + counters: [`${failed.length} failed`, `${reconciliation?.fallbackCount ?? 0} flagged`], + chips: firstChips(failed.map((event) => event.unitKey), 'failed'), + details: [ + `Reconciliation: ${headline}`, + `Failed work units: ${failed.length}`, + `Conflicts resolved: ${reconciliation?.conflictCount ?? 0}`, + `Flagged fallbacks: ${reconciliation?.fallbackCount ?? 0}`, + ...failedWorkUnitDetails(failed), + ...errorDetails(input), + ], + }; +} + +function savedColumn(input: MemoryFlowReplayInput): MemoryFlowColumnView { + const saved = latest(input.events, 'saved'); + const provenance = latest(input.events, 'provenance_recorded'); + const report = latest(input.events, 'report_created'); + const memoryCount = (saved?.wikiCount ?? 0) + (saved?.slCount ?? 0); + const chipLabels = [saved?.commitSha ? saved.commitSha.slice(0, 8) : '', report?.reportPath ?? ''].filter( + (label): label is string => label.length > 0, + ); + const skipped = skippedStage(input, 'saved'); + if (skipped) { + return { + id: 'saved', + title: 'SAVED', + status: 'warning', + headline: '0 memories', + counters: ['0 wiki', '0 SL', '0 provenance'], + chips: [{ label: 'skipped', status: 'warning', detail: skipped.reason }], + details: [ + `Skipped: ${skipped.reason}`, + `Run: ${input.runId}`, + `Report: ${report?.reportPath ?? input.reportPath ?? 'none'}`, + ], + }; + } + return { + id: 'saved', + title: 'SAVED', + status: columnStatus({ complete: memoryCount > 0 }), + headline: memoryCount > 0 ? `${memoryCount} memories` : 'not saved', + counters: [`${saved?.wikiCount ?? 0} wiki`, `${saved?.slCount ?? 0} SL`, `${provenance?.rowCount ?? 0} provenance`], + chips: firstChips(chipLabels, 'complete'), + details: [ + `Commit: ${saved?.commitSha ? saved.commitSha.slice(0, 8) : 'none'}`, + `Run: ${input.runId}`, + `Report: ${report?.reportPath ?? input.reportPath ?? 'none'}`, + `Provenance rows: ${provenance?.rowCount ?? 0}`, + ...(input.status === 'error' && saved ? ['Durable memory landed before failure.'] : []), + ...(input.status === 'error' && saved ? errorDetails(input) : []), + ], + }; +} + +function completionLine(input: MemoryFlowReplayInput): string | null { + const sources = eventsOf(input.events, 'source_acquired'); + const saved = latest(input.events, 'saved'); + const report = latest(input.events, 'report_created'); + if (sources.length === 0 || !saved || saved.wikiCount + saved.slCount === 0) { + return null; + } + const totalFiles = sources.reduce((sum, event) => sum + event.fileCount, 0); + const commit = saved.commitSha ? saved.commitSha.slice(0, 8) : 'none'; + return `Saved ${saved.wikiCount + saved.slCount} memories from ${totalFiles} raw files: ${saved.wikiCount} wiki pages, ${saved.slCount} SL updates. Commit: ${commit} Run: ${input.runId} Report: ${report?.reportPath ?? input.reportPath ?? 'none'}`; +} + +export function buildMemoryFlowViewModel(input: MemoryFlowReplayInput): MemoryFlowViewModel { + const columns = [ + sourceColumn(input), + chunksColumn(input), + workUnitsColumn(input), + actionsColumn(input), + gatesColumn(input), + savedColumn(input), + ]; + const plannedWorkUnitsColumn = columns.find((column) => column.id === 'workUnits'); + const errorColumn = + input.status === 'error' + ? columns.find((column) => column.id === (failureStage(input) === 'source' ? 'source' : 'gates')) + : undefined; + const warningColumn = columns.find((column) => column.status === 'warning'); + const firstExpandableColumn = + errorColumn ?? + warningColumn ?? + (input.plannedWorkUnits.length > 0 && !latest(input.events, 'saved') && plannedWorkUnitsColumn + ? plannedWorkUnitsColumn + : (columns.find((column) => column.details.length > 0) ?? columns[0])); + const trustIssues = buildMemoryFlowTrustIssues(input); + + const sources = eventsOf(input.events, 'source_acquired'); + const titleSources = sources.length > 1 + ? [...new Set(sources.map((s) => humanizeAdapter(s.adapter)))].join(' + ') + : `${input.connectionId}/${input.adapter}`; + + return { + title: `KLO memory flow ${titleSources} ${input.status}`, + subtitle: `Run ${input.runId} Sync ${input.syncId}`, + status: input.status, + activeLine: activeLine(input), + columns, + trustIssues, + selectedTitle: firstExpandableColumn.title, + selectedDetails: firstExpandableColumn.details, + completionLine: completionLine(input), + details: input.details, + }; +} diff --git a/packages/context/src/ingest/memory-flow/visuals.test.ts b/packages/context/src/ingest/memory-flow/visuals.test.ts new file mode 100644 index 00000000..75bb16a1 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/visuals.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from 'vitest'; +import { + buildMemoryFlowVisualModel, + memoryFlowStatusBadge, + renderMemoryFlowConnectorLine, +} from './visuals.js'; +import type { MemoryFlowViewModel } from './types.js'; + +function viewWithStatuses(statuses: Array<'waiting' | 'active' | 'complete' | 'warning' | 'failed'>): MemoryFlowViewModel { + const titles = ['SOURCE', 'CHUNKS', 'WORKUNITS', 'ACTIONS', 'GATES', 'SAVED']; + const ids = ['source', 'chunks', 'workUnits', 'actions', 'gates', 'saved'] as const; + + return { + title: 'KLO memory flow warehouse/metricflow running', + subtitle: 'Run run-1 Sync sync-1', + status: 'running', + activeLine: 'active: WorkUnit orders', + selectedTitle: 'WORKUNITS', + selectedDetails: ['orders: 1 raw, 0 peers, 1 deps'], + completionLine: null, + trustIssues: [], + details: { actions: [], provenance: [], transcripts: [] }, + columns: statuses.map((status, index) => ({ + id: ids[index], + title: titles[index], + status, + headline: `${titles[index].toLowerCase()} headline`, + counters: [], + chips: [], + details: [], + })), + }; +} + +describe('memory-flow visual helpers', () => { + it('uses ASCII badges with text meaning for every status', () => { + expect(memoryFlowStatusBadge('waiting')).toEqual({ label: '..', text: 'waiting' }); + expect(memoryFlowStatusBadge('active')).toEqual({ label: '>>', text: 'active' }); + expect(memoryFlowStatusBadge('complete')).toEqual({ label: 'OK', text: 'complete' }); + expect(memoryFlowStatusBadge('warning')).toEqual({ label: '!!', text: 'warning' }); + expect(memoryFlowStatusBadge('failed')).toEqual({ label: 'XX', text: 'failed' }); + }); + + it('renders a no-color connector line with status badges and six columns', () => { + const view = viewWithStatuses(['complete', 'complete', 'active', 'waiting', 'waiting', 'waiting']); + + expect(renderMemoryFlowConnectorLine(view)).toBe( + 'OK SOURCE -> OK CHUNKS -> >> WORKUNITS -> .. ACTIONS -> .. GATES -> .. SAVED', + ); + }); + + it('moves the pulse to the active column, then warnings, failures, and the last completed column', () => { + expect( + buildMemoryFlowVisualModel(viewWithStatuses(['complete', 'complete', 'active', 'waiting', 'waiting', 'waiting'])) + .pulseColumnId, + ).toBe('workUnits'); + expect( + buildMemoryFlowVisualModel(viewWithStatuses(['complete', 'warning', 'complete', 'waiting', 'waiting', 'waiting'])) + .pulseColumnId, + ).toBe('chunks'); + expect( + buildMemoryFlowVisualModel(viewWithStatuses(['complete', 'complete', 'failed', 'waiting', 'waiting', 'waiting'])) + .pulseColumnId, + ).toBe('workUnits'); + expect( + buildMemoryFlowVisualModel(viewWithStatuses(['complete', 'complete', 'complete', 'complete', 'waiting', 'waiting'])) + .pulseColumnId, + ).toBe('actions'); + }); +}); diff --git a/packages/context/src/ingest/memory-flow/visuals.ts b/packages/context/src/ingest/memory-flow/visuals.ts new file mode 100644 index 00000000..84eb6113 --- /dev/null +++ b/packages/context/src/ingest/memory-flow/visuals.ts @@ -0,0 +1,78 @@ +import type { + MemoryFlowColumnId, + MemoryFlowColumnView, + MemoryFlowDisplayStatus, + MemoryFlowViewModel, +} from './types.js'; + +export interface MemoryFlowStatusBadge { + label: '..' | '>>' | 'OK' | '!!' | 'XX'; + text: 'waiting' | 'active' | 'complete' | 'warning' | 'failed'; +} + +export interface MemoryFlowVisualColumn { + id: MemoryFlowColumnId; + title: string; + status: MemoryFlowDisplayStatus; + badge: MemoryFlowStatusBadge; + pulse: boolean; +} + +export interface MemoryFlowVisualModel { + columns: MemoryFlowVisualColumn[]; + connectorLine: string; + pulseColumnId: MemoryFlowColumnId; +} + +export function memoryFlowStatusBadge(status: MemoryFlowDisplayStatus): MemoryFlowStatusBadge { + if (status === 'active') return { label: '>>', text: 'active' }; + if (status === 'complete') return { label: 'OK', text: 'complete' }; + if (status === 'warning') return { label: '!!', text: 'warning' }; + if (status === 'failed') return { label: 'XX', text: 'failed' }; + return { label: '..', text: 'waiting' }; +} + +function firstColumnWithStatus( + columns: MemoryFlowColumnView[], + status: MemoryFlowDisplayStatus, +): MemoryFlowColumnView | undefined { + return columns.find((column) => column.status === status); +} + +function lastCompletedColumn(columns: MemoryFlowColumnView[]): MemoryFlowColumnView { + return [...columns].reverse().find((column) => column.status === 'complete') ?? columns[0]; +} + +function selectPulseColumn(columns: MemoryFlowColumnView[]): MemoryFlowColumnView { + return ( + firstColumnWithStatus(columns, 'active') ?? + firstColumnWithStatus(columns, 'warning') ?? + firstColumnWithStatus(columns, 'failed') ?? + lastCompletedColumn(columns) + ); +} + +function renderColumn(column: MemoryFlowVisualColumn): string { + return `${column.badge.label} ${column.title}`; +} + +export function buildMemoryFlowVisualModel(view: MemoryFlowViewModel): MemoryFlowVisualModel { + const pulseColumn = selectPulseColumn(view.columns); + const columns = view.columns.map((column) => ({ + id: column.id, + title: column.title, + status: column.status, + badge: memoryFlowStatusBadge(column.status), + pulse: column.id === pulseColumn.id, + })); + + return { + columns, + connectorLine: columns.map(renderColumn).join(' -> '), + pulseColumnId: pulseColumn.id, + }; +} + +export function renderMemoryFlowConnectorLine(view: MemoryFlowViewModel): string { + return buildMemoryFlowVisualModel(view).connectorLine; +} diff --git a/packages/context/src/ingest/metabase-mapping.ts b/packages/context/src/ingest/metabase-mapping.ts new file mode 100644 index 00000000..471d01d9 --- /dev/null +++ b/packages/context/src/ingest/metabase-mapping.ts @@ -0,0 +1,23 @@ +export { + computeMetabaseMappingDrift, + computeMetabaseMappingPhysicalMismatches, + discoverMetabaseDatabases, + findBestMatch, + METABASE_ENGINE_TO_CONNECTION_TYPE, + refreshMetabaseMapping, + validateMappingPhysicalMatch, + validateMetabaseMappings, +} from './adapters/metabase/mapping.js'; +export type { + AutoMatchCandidate, + AutoMatchResult as MetabaseAutoMatchResult, + DiscoveredMetabaseDatabase, + KloConnectionPhysicalInfo, + MappingPhysicalInfo, + MappingRefreshReport, + MetabaseMappedConnectionType, + MetabaseMappingDrift, + MetabaseMappingValidationResult, + PhysicalMismatch, + PhysicalMismatchInput, +} from './adapters/metabase/mapping.js'; diff --git a/packages/context/src/ingest/page-triage/index.ts b/packages/context/src/ingest/page-triage/index.ts new file mode 100644 index 00000000..e589165e --- /dev/null +++ b/packages/context/src/ingest/page-triage/index.ts @@ -0,0 +1,9 @@ +export type { + PageTriageEvidenceChunk, + PageTriageReport, + PageTriageRunArgs, + PageTriageServiceDeps, + PageTriageSettings, + PageTriageStorePort, +} from './page-triage.service.js'; +export { PageTriageService } from './page-triage.service.js'; diff --git a/packages/context/src/ingest/page-triage/page-triage.service.test.ts b/packages/context/src/ingest/page-triage/page-triage.service.test.ts new file mode 100644 index 00000000..1586727f --- /dev/null +++ b/packages/context/src/ingest/page-triage/page-triage.service.test.ts @@ -0,0 +1,569 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { PageTriageService } from './page-triage.service.js'; + +describe('PageTriageService', () => { + let stagedDir: string; + let repository: { + setDocumentTriageLane: ReturnType; + listDocumentChunksForLightExtraction: ReturnType; + insertCandidate: ReturnType; + }; + let service: PageTriageService; + let triageSettings: { + enabled: boolean; + maxConcurrency: number; + lightExtractionEnabled: boolean; + classifierModel: string | null; + lightExtractionMaxCandidates: number; + }; + let promptService: { loadPrompt: ReturnType Promise>> }; + let adapter: { triageSupported: true; getTriageSignals: ReturnType }; + let generateTextMock: ReturnType; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'page-triage-')); + await mkdir(join(stagedDir, 'pages', 'page-1'), { recursive: true }); + await writeFile( + join(stagedDir, 'pages', 'page-1', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'page-1', + title: 'Support Handoff', + path: 'Company / Support Handoff', + url: null, + parentId: null, + databaseId: null, + dataSourceId: null, + lastEditedAt: '2026-04-29T12:00:00.000Z', + lastEditedBy: null, + properties: { Status: 'Approved' }, + }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'pages', 'page-1', 'page.md'), + '# Support Handoff\n\nSupport handoffs require a named customer owner.\n', + 'utf-8', + ); + + repository = { + setDocumentTriageLane: vi.fn().mockResolvedValue(1), + listDocumentChunksForLightExtraction: vi.fn().mockResolvedValue([ + { + chunkId: '00000000-0000-0000-0000-000000000101', + headingPath: ['Support Handoff'], + ordinal: 0, + content: 'Support handoffs require a named customer owner.', + stableCitationKey: 'notion:page-1:support-handoff', + citation: { source: 'notion', pageId: 'page-1' }, + rawPath: 'pages/page-1/page.md', + title: 'Support Handoff', + path: 'Company / Support Handoff', + url: null, + lastEditedAt: new Date('2026-04-29T12:00:00.000Z'), + }, + ]), + insertCandidate: vi + .fn() + .mockImplementation((input) => + Promise.resolve({ candidate_key: input.candidateKey, promotion_score: input.promotionScore }), + ), + }; + triageSettings = { + enabled: true, + maxConcurrency: 2, + lightExtractionEnabled: true, + classifierModel: null, + lightExtractionMaxCandidates: 3, + }; + adapter = { + triageSupported: true, + getTriageSignals: vi.fn().mockResolvedValue({ objectType: 'page', propertyHints: { Status: 'Approved' } }), + }; + promptService = { + loadPrompt: vi + .fn<(name: string) => Promise>() + .mockImplementation((name) => Promise.resolve(`prompt:${name}`)), + }; + generateTextMock = vi.fn(); + service = new PageTriageService({ + store: repository as any, + llmProvider: { + getModel: vi.fn().mockReturnValue('model'), + getModelByName: vi.fn(), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(), + telemetryConfig: vi.fn(), + promptCachingConfig: vi.fn(() => ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + })), + activeBackend: vi.fn(() => 'anthropic'), + } as any, + settings: triageSettings, + promptService: promptService as any, + generateText: generateTextMock as any, + }); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + }); + + function parseSignalsFromClassifierPrompt(prompt: string): unknown { + const match = /\n([\s\S]*?)\n<\/signals>/.exec(prompt); + if (!match) { + throw new Error('classifier prompt did not include a block'); + } + return JSON.parse(match[1]); + } + + it('writes light-lane candidates and keeps the page out of full WorkUnits', async () => { + generateTextMock + .mockResolvedValueOnce({ text: JSON.stringify({ lane: 'light', reason: 'short durable policy' }) } as any) + .mockResolvedValueOnce({ + text: JSON.stringify({ + candidates: [ + { + candidateKey: 'support-handoff-owner', + topic: 'Support Handoff', + assertion: 'Support handoffs require a named customer owner.', + rationale: 'The staged Support Handoff page states the owner rule.', + evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'], + suggestedPageKey: 'support-handoff', + actionHint: 'create', + durabilityScore: 3, + authorityScore: 2, + reuseScore: 3, + noveltyScore: 2, + riskScore: 0, + }, + ], + }), + } as any); + + const result = await service.triageRun({ + stagedDir, + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'notion', + syncId: 'sync-1', + jobId: 'job-1', + diffSet: { + added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'], + modified: [], + deleted: [], + unchanged: [], + }, + adapter: adapter as any, + }); + + expect(result.enabled).toBe(true); + expect(result.report).toEqual({ + pageCount: 1, + skip: 0, + light: 1, + full: 0, + classifierFailures: 0, + lightExtractionFailures: 0, + }); + expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(false); + expect(adapter.getTriageSignals).toHaveBeenCalledWith(stagedDir, 'page-1'); + expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'light'); + expect(repository.insertCandidate).toHaveBeenCalledWith( + expect.objectContaining({ + runId: 'run-1', + candidateKey: 'support-handoff-owner', + lane: 'light', + promotionScore: 10, + }), + ); + }); + + it('does not classify named reusable sales scripts as skip', async () => { + await writeFile( + join(stagedDir, 'pages', 'page-1', 'metadata.json'), + JSON.stringify({ + objectType: 'page', + id: 'page-1', + title: 'Cold Call Script', + path: 'Sales / Cold Call Script', + url: null, + parentId: null, + databaseId: null, + dataSourceId: null, + lastEditedAt: '2026-04-29T12:00:00.000Z', + lastEditedBy: null, + properties: { Team: 'Sales' }, + }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'pages', 'page-1', 'page.md'), + [ + '# Cold Call Script', + '', + 'Reusable outbound sequence:', + '', + '- Ask about current customer success expansion workflow.', + '- Position KLO as AI search visibility for CS teams.', + '- Close with a discovery call request.', + ].join('\n'), + 'utf-8', + ); + + promptService.loadPrompt.mockImplementation((name: string) => { + if (name === 'skills/page_triage_classifier') { + return Promise.resolve( + [ + 'Reusable templates and scripts are durable knowledge regardless of subject matter.', + 'Date-titled standups are still skip; named templates and scripts are not.', + ].join('\n'), + ); + } + return Promise.resolve(`prompt:${name}`); + }); + generateTextMock + .mockImplementationOnce((args: any) => { + const prompt = args.messages[0].content as string; + expect(prompt).toContain('Reusable templates and scripts are durable knowledge regardless of subject matter.'); + expect(prompt).toContain('Date-titled standups are still skip; named templates and scripts are not.'); + expect(prompt).toContain('Cold Call Script'); + return { text: JSON.stringify({ lane: 'light', reason: 'reusable sales script' }) } as any; + }) + .mockResolvedValueOnce({ + text: JSON.stringify({ + candidates: [ + { + candidateKey: 'cold-call-script', + topic: 'Cold Call Script', + assertion: 'Cold call outreach should position KLO around AI search visibility for CS teams.', + rationale: 'The script gives a reusable outbound call sequence and positioning language.', + evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'], + suggestedPageKey: 'cold-call-script', + actionHint: 'create', + durabilityScore: 3, + authorityScore: 2, + reuseScore: 3, + noveltyScore: 2, + riskScore: 0, + }, + ], + }), + } as any); + + const result = await service.triageRun({ + stagedDir, + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'notion', + syncId: 'sync-1', + jobId: 'job-1', + diffSet: { + added: ['pages/page-1/metadata.json', 'pages/page-1/page.md'], + modified: [], + deleted: [], + unchanged: [], + }, + adapter: adapter as any, + }); + + expect(result.report).toMatchObject({ pageCount: 1, skip: 0, light: 1, full: 0 }); + expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'light'); + }); + + it.each([ + { + name: 'skip low solo template', + propertyHints: { + executions_bucket: 'low', + distinct_users_bucket: 'solo', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '1 constant, 1 runtime', + }, + expectedLane: 'skip', + expectedReport: { skip: 1, light: 0, full: 0 }, + }, + { + name: 'light service-account-only template', + propertyHints: { + executions_bucket: 'high', + distinct_users_bucket: 'solo', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'true', + slot_summary: '1 constant, 0 runtime', + }, + expectedLane: 'light', + expectedReport: { skip: 0, light: 1, full: 0 }, + }, + { + name: 'full shared human template', + propertyHints: { + executions_bucket: 'high', + distinct_users_bucket: 'team', + error_rate_bucket: 'ok', + recency_bucket: 'active', + service_account_only: 'false', + slot_summary: '2 constant, 1 runtime', + }, + expectedLane: 'full', + expectedReport: { skip: 0, light: 0, full: 1 }, + }, + ] as const)('triages historic-SQL synthetic signal fixture as $expectedLane for $name', async ({ + name, + propertyHints, + expectedLane, + expectedReport, + }) => { + const externalId = name.replace(/[^a-z0-9]+/g, '_'); + const templateDir = join(stagedDir, 'templates', externalId); + await mkdir(templateDir, { recursive: true }); + await writeFile( + join(templateDir, 'metadata.json'), + JSON.stringify({ + id: externalId, + title: `snowflake - analytics.orders [${externalId.slice(0, 6)}]`, + path: `templates/${externalId}/page.md`, + objectType: 'historic_sql_template', + lastEditedAt: null, + properties: { + fingerprint: externalId, + sub_cluster_id: null, + dialect: 'snowflake', + tables_touched: ['analytics.orders'], + literal_slots: [{ position: 1, type: 'string', classification: 'constant' }], + triage_signals: propertyHints, + }, + }), + 'utf-8', + ); + await writeFile( + join(templateDir, 'page.md'), + [ + `# ${externalId}`, + '', + '## Normalized SQL', + '```sql', + 'SELECT count(*) FROM analytics.orders WHERE status = ?', + '```', + '', + '## Tables touched', + '- analytics.orders', + ].join('\n'), + 'utf-8', + ); + + adapter.getTriageSignals.mockResolvedValueOnce({ + objectType: 'historic_sql_template', + lastEditedAt: '2026-05-04T12:00:00.000Z', + propertyHints, + }); + promptService.loadPrompt.mockImplementation((promptName: string) => { + if (promptName === 'skills/page_triage_classifier') { + return readFile(new URL('../../../prompts/skills/page_triage_classifier.md', import.meta.url), 'utf-8'); + } + return Promise.resolve(`prompt:${promptName}`); + }); + generateTextMock.mockImplementationOnce((args: any) => { + const prompt = args.messages[0].content as string; + expect(prompt).toContain('signals.objectType === "historic_sql_template"'); + expect(prompt).toContain('executions_bucket=low AND distinct_users_bucket=solo'); + expect(prompt).toContain('service_account_only=true AND below the frequency floor'); + expect(prompt).toContain('shared human usage with mid or high execution volume'); + expect(parseSignalsFromClassifierPrompt(prompt)).toEqual({ + objectType: 'historic_sql_template', + lastEditedAt: '2026-05-04T12:00:00.000Z', + propertyHints, + }); + return { text: JSON.stringify({ lane: expectedLane, reason: `${name} fixture` }) } as any; + }); + if (expectedLane === 'light') { + generateTextMock.mockResolvedValueOnce({ + text: JSON.stringify({ + candidates: [ + { + candidateKey: 'historic-sql-service-account-template', + topic: 'Historic SQL Service Account Template', + assertion: 'A service-account-only historic SQL template can remain as light evidence.', + rationale: 'The synthetic historic-SQL fixture is service-account-only and below the frequency floor.', + evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'], + suggestedPageKey: 'historic-sql-service-account-template', + actionHint: 'create', + durabilityScore: 2, + authorityScore: 1, + reuseScore: 2, + noveltyScore: 1, + riskScore: 0, + }, + ], + }), + } as any); + } + + const result = await service.triageRun({ + stagedDir, + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'historic-sql', + syncId: 'sync-1', + jobId: 'job-1', + diffSet: { + added: [`templates/${externalId}/metadata.json`, `templates/${externalId}/page.md`], + modified: [], + deleted: [], + unchanged: [], + }, + adapter: adapter as any, + }); + + expect(result.report).toMatchObject({ pageCount: 1, ...expectedReport }); + expect(repository.setDocumentTriageLane).toHaveBeenCalledWith( + 'run-1', + `templates/${externalId}/page.md`, + expectedLane, + ); + expect(result.fullRawPaths.has(`templates/${externalId}/metadata.json`)).toBe(expectedLane === 'full'); + expect(result.fullRawPaths.has(`templates/${externalId}/page.md`)).toBe(expectedLane === 'full'); + }); + + it('triages Notion data-source row pages without reading data-source metadata as page markdown', async () => { + triageSettings.lightExtractionEnabled = false; + + await mkdir(join(stagedDir, 'data-sources', 'ds-1', 'rows', 'row-1'), { recursive: true }); + await writeFile( + join(stagedDir, 'data-sources', 'ds-1', 'metadata.json'), + JSON.stringify({ + objectType: 'data_source', + id: 'ds-1', + title: 'Product Docs', + path: 'Product Docs', + }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'data-sources', 'ds-1', 'rows', 'row-1', 'metadata.json'), + JSON.stringify({ + objectType: 'data_source_row', + id: 'row-1', + title: 'Launch Policy', + path: 'Product Docs / Launch Policy', + dataSourceId: 'ds-1', + }), + 'utf-8', + ); + await writeFile( + join(stagedDir, 'data-sources', 'ds-1', 'rows', 'row-1', 'page.md'), + '# Launch Policy\n\nLaunches require a customer-facing rollback owner.\n', + 'utf-8', + ); + + generateTextMock.mockResolvedValue({ + text: JSON.stringify({ lane: 'full', reason: 'durable policy page' }), + } as any); + + const result = await service.triageRun({ + stagedDir, + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'notion', + syncId: 'sync-1', + jobId: 'job-1', + diffSet: { + added: [ + 'pages/page-1/metadata.json', + 'pages/page-1/page.md', + 'data-sources/ds-1/metadata.json', + 'data-sources/ds-1/rows/row-1/metadata.json', + 'data-sources/ds-1/rows/row-1/page.md', + ], + modified: [], + deleted: [], + unchanged: [], + }, + adapter: adapter as any, + }); + + expect(result.report).toMatchObject({ pageCount: 2, skip: 0, light: 0, full: 2 }); + expect([...result.fullRawPaths].sort()).toEqual( + expect.arrayContaining(['data-sources/ds-1/rows/row-1/page.md', 'pages/page-1/page.md']), + ); + expect(result.fullRawPaths.has('data-sources/ds-1/metadata.json')).toBe(false); + expect(repository.setDocumentTriageLane).toHaveBeenCalledWith( + 'run-1', + 'data-sources/ds-1/rows/row-1/page.md', + 'full', + ); + }); + + it('falls back to full when classifier output is malformed', async () => { + generateTextMock.mockResolvedValueOnce({ text: 'not-json' } as any); + + const result = await service.triageRun({ + stagedDir, + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'notion', + syncId: 'sync-1', + jobId: 'job-1', + diffSet: { added: ['pages/page-1/page.md'], modified: [], deleted: [], unchanged: [] }, + adapter: adapter as any, + }); + + expect(result.report).toMatchObject({ pageCount: 1, skip: 0, light: 0, full: 1, classifierFailures: 1 }); + expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(true); + expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'full'); + }); + + it('promotes a light page to full when light extraction fails', async () => { + generateTextMock + .mockResolvedValueOnce({ text: JSON.stringify({ lane: 'light', reason: 'short durable policy' }) } as any) + .mockRejectedValueOnce(new Error('provider unavailable')); + + const result = await service.triageRun({ + stagedDir, + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'notion', + syncId: 'sync-1', + jobId: 'job-1', + diffSet: { added: ['pages/page-1/page.md'], modified: [], deleted: [], unchanged: [] }, + adapter: adapter as any, + }); + + expect(result.report).toMatchObject({ pageCount: 1, skip: 0, light: 0, full: 1, lightExtractionFailures: 1 }); + expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(true); + expect(repository.setDocumentTriageLane).toHaveBeenLastCalledWith('run-1', 'pages/page-1/page.md', 'full'); + }); + + it('short-circuits when triage is disabled', async () => { + triageSettings.enabled = false; + + const result = await service.triageRun({ + stagedDir, + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'notion', + syncId: 'sync-1', + jobId: 'job-1', + diffSet: { added: ['pages/page-1/page.md'], modified: [], deleted: [], unchanged: [] }, + adapter: adapter as any, + }); + + expect(result).toEqual({ enabled: false, report: undefined, fullRawPaths: new Set(), warnings: [] }); + expect(generateTextMock).not.toHaveBeenCalled(); + expect(repository.setDocumentTriageLane).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/ingest/page-triage/page-triage.service.ts b/packages/context/src/ingest/page-triage/page-triage.service.ts new file mode 100644 index 00000000..fc4bc389 --- /dev/null +++ b/packages/context/src/ingest/page-triage/page-triage.service.ts @@ -0,0 +1,481 @@ +import { createHash } from 'node:crypto'; +import { readdir, readFile } from 'node:fs/promises'; +import { dirname, join, relative } from 'node:path'; +import { KloMessageBuilder, type KloLlmProvider } from '@klo/llm'; +import { generateText, type ToolSet } from 'ai'; +import pLimit from 'p-limit'; +import { z } from 'zod'; +import { type KloLogger, noopLogger } from '../../core/index.js'; +import type { PromptService } from '../../prompts/index.js'; +import type { InsertContextCandidateInput } from '../context-candidates/index.js'; +import type { JsonValue } from '../ports.js'; +import type { DiffSet, SourceAdapter, TriageLane, TriageSignals } from '../types.js'; + +const scoreSchema = z.number().int().min(0).max(3); +const triageOutputSchema = z.object({ + lane: z.enum(['skip', 'light', 'full']), + reason: z.string().optional(), +}); +const lightCandidateSchema = z.object({ + candidateKey: z.string().min(1).max(160), + topic: z.string().min(1).max(200), + assertion: z.string().min(1).max(500), + rationale: z.string().min(1).max(1000), + evidenceChunkIds: z.array(z.string().min(1)).min(1), + suggestedPageKey: z.string().min(1).max(120).optional(), + actionHint: z.enum(['create', 'update', 'merge', 'conflict', 'skip']), + durabilityScore: scoreSchema, + authorityScore: scoreSchema, + reuseScore: scoreSchema, + noveltyScore: scoreSchema, + riskScore: scoreSchema, +}); +const lightOutputSchema = z.object({ + candidates: z.array(lightCandidateSchema).default([]), +}); + +interface StagedTriageDocument { + externalId: string; + title: string; + path: string; + metadataRawPath: string; + markdownRawPath: string; + markdown: string; +} + +export interface PageTriageReport { + pageCount: number; + skip: number; + light: number; + full: number; + classifierFailures: number; + lightExtractionFailures: number; +} + +interface PageTriageRunResult { + enabled: boolean; + report?: PageTriageReport; + fullRawPaths: Set; + warnings: string[]; +} + +export interface PageTriageRunArgs { + stagedDir: string; + runId: string; + connectionId: string; + sourceKey: string; + syncId: string; + jobId: string; + diffSet: DiffSet; + adapter: Pick; +} + +export interface PageTriageEvidenceChunk { + chunkId: string; + headingPath: string[]; + ordinal: number; + content: string; + stableCitationKey: string; + citation: JsonValue; + rawPath: string; + title: string; + path: string; + url: string | null; + lastEditedAt: Date | null; +} + +export interface PageTriageStorePort { + setDocumentTriageLane(runId: string, rawPath: string, lane: TriageLane): Promise; + listDocumentChunksForLightExtraction(runId: string, rawPath: string): Promise; + insertCandidate(input: InsertContextCandidateInput): Promise; +} + +export interface PageTriageSettings { + enabled: boolean; + maxConcurrency: number; + lightExtractionEnabled: boolean; + classifierModel: string | null; + lightExtractionMaxCandidates: number; +} + +export interface PageTriageServiceDeps { + store: PageTriageStorePort; + llmProvider: KloLlmProvider; + settings: PageTriageSettings; + promptService: PromptService; + logger?: KloLogger; + generateText?: typeof generateText; +} + +export class PageTriageService { + private readonly logger: KloLogger; + private readonly runGenerateText: typeof generateText; + + constructor(private readonly deps: PageTriageServiceDeps) { + this.logger = deps.logger ?? noopLogger; + this.runGenerateText = deps.generateText ?? generateText; + } + + async triageRun(args: PageTriageRunArgs): Promise { + const config = this.deps.settings; + if (!config.enabled || !args.adapter.triageSupported) { + return { enabled: false, report: undefined, fullRawPaths: new Set(), warnings: [] }; + } + + const documents = await this.collectChangedDocuments(args.stagedDir, args.diffSet); + const report: PageTriageReport = { + pageCount: documents.length, + skip: 0, + light: 0, + full: 0, + classifierFailures: 0, + lightExtractionFailures: 0, + }; + const fullRawPaths = new Set(); + const warnings: string[] = []; + const limit = pLimit(config.maxConcurrency); + const startedAt = Date.now(); + + await Promise.all( + documents.map((document) => + limit(async () => { + const outcome = await this.triageDocument(args, document, warnings); + report.classifierFailures += outcome.classifierFailed ? 1 : 0; + report.lightExtractionFailures += outcome.lightExtractionFailed ? 1 : 0; + report[outcome.lane] += 1; + + if (outcome.lane === 'full') { + fullRawPaths.add(document.metadataRawPath); + fullRawPaths.add(document.markdownRawPath); + } + }), + ), + ); + + this.logger.log( + `Stage 2.5 triage took ${Date.now() - startedAt}ms (${config.maxConcurrency} max concurrent classifier calls)`, + ); + this.logger.log(`Triage lanes: ${report.skip} skip, ${report.light} light, ${report.full} full`); + + return { enabled: true, report, fullRawPaths, warnings }; + } + + private async triageDocument( + args: PageTriageRunArgs, + document: StagedTriageDocument, + warnings: string[], + ): Promise<{ lane: TriageLane; classifierFailed: boolean; lightExtractionFailed: boolean }> { + const config = this.deps.settings; + let lane: TriageLane = 'full'; + let classifierFailed = false; + let lightExtractionFailed = false; + + try { + const signals = await this.getSignals(args, document, warnings); + const classifierPrompt = await this.buildClassifierPrompt(document, signals); + const modelText = await this.callModel({ + operationName: 'page-triage', + prompt: classifierPrompt, + sourceKey: args.sourceKey, + jobId: args.jobId, + unitKey: document.markdownRawPath, + }); + lane = triageOutputSchema.parse(this.parseJson(modelText)).lane; + if (lane === 'light' && !config.lightExtractionEnabled) { + lane = 'full'; + } + } catch (error) { + classifierFailed = true; + lane = 'full'; + warnings.push( + `Triage classifier failed for ${document.markdownRawPath}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + + await this.deps.store.setDocumentTriageLane(args.runId, document.markdownRawPath, lane); + + if (lane !== 'light') { + return { lane, classifierFailed, lightExtractionFailed }; + } + + try { + await this.extractLightCandidates(args, document); + return { lane: 'light', classifierFailed, lightExtractionFailed }; + } catch (error) { + lightExtractionFailed = true; + warnings.push( + `Light extraction failed for ${document.markdownRawPath}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + await this.deps.store.setDocumentTriageLane(args.runId, document.markdownRawPath, 'full'); + return { lane: 'full', classifierFailed, lightExtractionFailed }; + } + } + + private async getSignals( + args: PageTriageRunArgs, + document: StagedTriageDocument, + warnings: string[], + ): Promise { + if (!args.adapter.getTriageSignals) { + return undefined; + } + + try { + return await args.adapter.getTriageSignals(args.stagedDir, document.externalId); + } catch (error) { + warnings.push( + `Triage signals failed for ${document.markdownRawPath}: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return undefined; + } + } + + private async extractLightCandidates(args: PageTriageRunArgs, document: StagedTriageDocument): Promise { + const chunks = await this.deps.store.listDocumentChunksForLightExtraction(args.runId, document.markdownRawPath); + if (chunks.length === 0) { + throw new Error('no indexed chunks available for light extraction'); + } + + const prompt = await this.buildLightExtractionPrompt(document, chunks); + const text = await this.callModel({ + operationName: 'light-extraction', + prompt, + sourceKey: args.sourceKey, + jobId: args.jobId, + unitKey: document.markdownRawPath, + }); + const output = lightOutputSchema.parse(this.parseJson(text)); + const maxCandidates = this.deps.settings.lightExtractionMaxCandidates; + + for (const [index, candidate] of output.candidates.slice(0, maxCandidates).entries()) { + const evidenceChunkIds = this.validEvidenceChunkIds(candidate.evidenceChunkIds, chunks); + const promotionScore = + candidate.durabilityScore + + candidate.authorityScore + + candidate.reuseScore + + candidate.noveltyScore - + candidate.riskScore; + const status = + candidate.actionHint === 'conflict' ? 'conflict' : candidate.actionHint === 'skip' ? 'rejected' : 'pending'; + + await this.deps.store.insertCandidate({ + runId: args.runId, + connectionId: args.connectionId, + sourceKey: args.sourceKey, + candidateKey: this.stableCandidateKey(candidate.candidateKey, document.externalId, index), + topic: candidate.topic, + assertion: candidate.assertion, + rationale: candidate.rationale, + evidenceChunkIds, + evidenceRefs: this.evidenceRefs(evidenceChunkIds, chunks), + suggestedPageKey: candidate.suggestedPageKey ?? null, + actionHint: candidate.actionHint, + durabilityScore: candidate.durabilityScore, + authorityScore: candidate.authorityScore, + reuseScore: candidate.reuseScore, + noveltyScore: candidate.noveltyScore, + riskScore: candidate.riskScore, + promotionScore, + status, + rejectionReason: candidate.actionHint === 'skip' ? 'not_durable' : null, + lane: 'light', + }); + } + } + + private validEvidenceChunkIds(candidateIds: string[], chunks: PageTriageEvidenceChunk[]): string[] { + const available = new Set(chunks.map((chunk) => chunk.chunkId)); + const valid = candidateIds.filter((chunkId) => available.has(chunkId)); + return valid.length > 0 ? valid : [chunks[0].chunkId]; + } + + private evidenceRefs(chunkIds: string[], chunks: PageTriageEvidenceChunk[]): JsonValue { + const byId = new Map(chunks.map((chunk) => [chunk.chunkId, chunk])); + return chunkIds.flatMap((chunkId) => { + const chunk = byId.get(chunkId); + if (!chunk) { + return []; + } + return [ + { + chunkId: chunk.chunkId, + stableCitationKey: chunk.stableCitationKey, + syncId: this.syncIdFromCitation(chunk.citation), + rawPath: chunk.rawPath, + title: chunk.title, + path: chunk.path, + url: chunk.url, + lastEditedAt: chunk.lastEditedAt?.toISOString() ?? null, + snippetHash: createHash('sha256').update(chunk.content).digest('hex'), + citation: chunk.citation, + }, + ]; + }); + } + + private syncIdFromCitation(citation: JsonValue): string | null { + if (citation && typeof citation === 'object' && !Array.isArray(citation)) { + const syncId = (citation as Record).syncId; + return typeof syncId === 'string' ? syncId : null; + } + return null; + } + + private async callModel(params: { + operationName: 'page-triage' | 'light-extraction'; + prompt: string; + sourceKey: string; + jobId: string; + unitKey: string; + }): Promise { + const model = this.deps.llmProvider.getModel('triage'); + const built = new KloMessageBuilder(this.deps.llmProvider).wrapSimple({ + messages: [{ role: 'user', content: params.prompt }], + tools: {}, + model, + }); + const result = await this.runGenerateText({ + model, + temperature: 0, + messages: built.messages, + tools: built.tools as ToolSet, + }); + return result.text; + } + + private async buildClassifierPrompt( + document: StagedTriageDocument, + signals: TriageSignals | undefined, + ): Promise { + const base = await this.deps.promptService.loadPrompt('skills/page_triage_classifier'); + return [ + base, + '', + `externalId: ${document.externalId}`, + `title: ${document.title}`, + `path: ${document.path}`, + `rawPath: ${document.markdownRawPath}`, + '', + '', + JSON.stringify(signals ?? {}, null, 2), + '', + '', + document.markdown.slice(0, 2048), + '', + ].join('\n'); + } + + private async buildLightExtractionPrompt( + document: StagedTriageDocument, + chunks: PageTriageEvidenceChunk[], + ): Promise { + const base = await this.deps.promptService.loadPrompt('skills/light_extraction'); + return [ + base, + `Maximum candidates: ${this.deps.settings.lightExtractionMaxCandidates}`, + '', + `externalId: ${document.externalId}`, + `title: ${document.title}`, + `path: ${document.path}`, + `rawPath: ${document.markdownRawPath}`, + '', + '', + JSON.stringify( + chunks.map((chunk) => ({ + chunkId: chunk.chunkId, + headingPath: chunk.headingPath, + stableCitationKey: chunk.stableCitationKey, + content: chunk.content, + })), + null, + 2, + ), + '', + '', + document.markdown, + '', + ].join('\n'); + } + + private parseJson(text: string): unknown { + const trimmed = text.trim(); + const fenced = /^```(?:json)?\s*([\s\S]*?)\s*```$/i.exec(trimmed); + return JSON.parse(fenced ? fenced[1] : trimmed); + } + + private stableCandidateKey(candidateKey: string, externalId: string, index: number): string { + const normalized = candidateKey + .trim() + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 120); + if (normalized) { + return normalized; + } + const digest = createHash('sha256').update(`${externalId}:${index}`).digest('hex').slice(0, 12); + return `light-${digest}`; + } + + private async collectChangedDocuments(stagedDir: string, diffSet: DiffSet): Promise { + const touched = new Set([...diffSet.added, ...diffSet.modified]); + const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true }); + const markdownPaths = entries + .filter((entry) => entry.isFile() && entry.name === 'page.md') + .map((entry) => join(entry.parentPath, entry.name)) + .sort(); + const documents: StagedTriageDocument[] = []; + + for (const markdownPath of markdownPaths) { + const metadataPath = join(dirname(markdownPath), 'metadata.json'); + const metadataRawPath = this.toRawPath(stagedDir, metadataPath); + const markdownRawPath = this.toRawPath(stagedDir, markdownPath); + if (!touched.has(metadataRawPath) && !touched.has(markdownRawPath)) { + continue; + } + + let metadataRaw: string; + try { + metadataRaw = await readFile(metadataPath, 'utf-8'); + } catch (error) { + this.logger.debug( + `Skipping triage document ${markdownRawPath}: missing sibling metadata.json (${ + error instanceof Error ? error.message : String(error) + })`, + ); + continue; + } + + const metadata = JSON.parse(metadataRaw) as { + id?: string; + title?: string; + path?: string; + }; + const markdown = await readFile(markdownPath, 'utf-8'); + if (!metadata.id || !metadata.title) { + continue; + } + + documents.push({ + externalId: metadata.id, + title: metadata.title, + path: metadata.path ?? metadata.title, + metadataRawPath, + markdownRawPath, + markdown, + }); + } + + return documents; + } + + private toRawPath(stagedDir: string, fullPath: string): string { + return relative(stagedDir, fullPath).split('\\').join('/'); + } +} diff --git a/packages/context/src/ingest/parsed-target-table.ts b/packages/context/src/ingest/parsed-target-table.ts new file mode 100644 index 00000000..4394598d --- /dev/null +++ b/packages/context/src/ingest/parsed-target-table.ts @@ -0,0 +1,28 @@ +import { z } from 'zod'; + +const parsedTargetTableReasonSchema = z.enum([ + 'no_connection_mapping', + 'looker_template_unresolved', + 'derived_table_not_supported', + 'no_physical_table', + 'multiple_table_references', + 'unsupported_dialect', + 'parse_error', +]); + +export const parsedTargetTableSchema = z.discriminatedUnion('ok', [ + z.object({ + ok: z.literal(true), + catalog: z.string().nullable(), + schema: z.string().nullable(), + name: z.string().min(1), + canonicalTable: z.string().min(1), + }), + z.object({ + ok: z.literal(false), + reason: parsedTargetTableReasonSchema, + detail: z.string().optional(), + }), +]); + +export type ParsedTargetTable = z.infer; diff --git a/packages/context/src/ingest/ports.ts b/packages/context/src/ingest/ports.ts new file mode 100644 index 00000000..0ec51939 --- /dev/null +++ b/packages/context/src/ingest/ports.ts @@ -0,0 +1,386 @@ +import type { ToolSet } from 'ai'; +import type { KloModelRole } from '@klo/llm'; +import type { AgentRunnerService } from '../agent/index.js'; +import type { KloEmbeddingPort } from '../core/embedding.js'; +import type { GitService, KloFileStorePort, KloLogger, SessionOutcome } from '../core/index.js'; +import type { CaptureSession, MemoryAction, MemoryKnowledgeSlRefsPort } from '../memory/index.js'; +import type { PromptService } from '../prompts/index.js'; +import type { SkillsRegistryService } from '../skills/index.js'; +import type { + SemanticLayerService, + SlConnectionCatalogPort, + SlSearchService, + SlSourcesIndexPort, + SlValidationDeps, + SlValidatorPort, +} from '../sl/index.js'; +import type { ToolContext, ToolSession, TouchedSlSource } from '../tools/index.js'; +import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js'; +import type { CanonicalPin } from './canonical-pins.js'; +import type { IngestReportSnapshot } from './reports.js'; +import type { + ReconcileCandidateForPrompt, + ReconcileCandidateSummary, + ReconcilePromptRunState, +} from './stages/build-reconcile-context.js'; +import type { ReconciliationOutcome } from './stages/stage-4-reconciliation.js'; +import type { StageIndex } from './stages/stage-index.types.js'; +import type { + DiffSet, + EvictionUnit, + IngestBundleJob, + IngestDiffSummary, + IngestTrigger, + SourceAdapter, +} from './types.js'; + +export type JsonPrimitive = string | number | boolean | null; +export type JsonValue = JsonPrimitive | JsonValue[] | { [key: string]: JsonValue | undefined }; + +export interface IngestRunRecord { + id: string; +} + +export interface CreateIngestRunArgs { + jobId: string; + connectionId: string; + sourceKey: string; + syncId: string; + trigger: IngestTrigger; + scopeFingerprint?: string | null; +} + +export interface IngestRunsPort { + create(args: CreateIngestRunArgs): Promise; + markCompleted(id: string, diffSummary: IngestDiffSummary, status?: 'completed' | 'partial'): Promise; + markFailed(id: string): Promise; +} + +export type ProvenanceActionType = + | 'source_created' + | 'measure_added' + | 'join_added' + | 'merged' + | 'subsumed' + | 'wiki_written' + | 'skipped'; + +export interface IngestProvenanceInsert { + connectionId: string; + sourceKey: string; + syncId: string; + rawPath: string; + rawContentHash: string; + artifactKind: 'sl' | 'wiki' | null; + artifactKey: string | null; + targetConnectionId?: string | null; + artifactContentHash: string | null; + actionType: ProvenanceActionType; +} + +export interface IngestProvenanceRow { + sync_id: string; + raw_path: string; + raw_content_hash: string; + artifact_kind: 'sl' | 'wiki' | null; + artifact_key: string | null; + target_connection_id: string | null; + artifact_content_hash: string | null; + action_type: ProvenanceActionType; +} + +export interface IngestProvenancePort { + insertMany(rows: IngestProvenanceInsert[]): Promise; + findLatestHashesForCompletedSyncs(connectionId: string, sourceKey: string): Promise>; + findLatestArtifactsForRawPaths( + connectionId: string, + sourceKey: string, + rawPaths: string[], + ): Promise>; +} + +export interface IngestReportsPort { + create(args: { + runId: string; + jobId: string; + connectionId: string; + sourceKey: string; + body: unknown; + }): Promise; + findByJobId(jobId: string): Promise; + findReportByAnyId?(id: string): Promise; + markSuperseded(jobId: string, supersededByJobId: string): Promise; +} + +export interface IngestCanonicalPinsPort { + listPins(connectionIds: string[]): Promise; +} + +export interface IngestLockPort { + withLock(key: string, fn: () => Promise): Promise; +} + +export interface IngestFileStorePort extends KloFileStorePort {} + +export interface IngestSessionWorktree { + chatId: string; + workdir: string; + branch: string; + baseSha: string; + createdAt: Date; + git: GitService; + config: IngestFileStorePort; +} + +export interface IngestSessionWorktreePort { + create(sessionKey: string, baseSha: string): Promise; + cleanup(session: IngestSessionWorktree, outcome: SessionOutcome): Promise; +} + +export interface IngestSettingsPort { + memoryIngestionModel: string; + probeRowCount: number; + workUnitMaxConcurrency?: number; + workUnitStepBudget?: number; + workUnitFailureMode?: 'abort' | 'continue'; +} + +export interface IngestGitAuthor { + name: string; + email: string; +} + +export interface IngestStoragePort { + homeDir: string; + systemGitAuthor: IngestGitAuthor; + resolveUploadDir(uploadId: string): string; + resolvePullDir(jobId: string): string; + resolveTranscriptDir(jobId: string): string; +} + +export interface IngestCommitMessagePort { + enqueueForExternalCommit(args: { commitHash: string }, message: string, pathFilter: string): Promise; +} + +export interface IngestToolsetLike { + toAiSdkTools(context: ToolContext): ToolSet; +} + +export interface IngestToolsetFactoryPort { + createIngestWuToolset(session: ToolSession, options?: { includeContextEvidenceTools?: boolean }): IngestToolsetLike; +} + +export type IngestKnowledgeIndexPort = Pick; + +export interface SourceAdapterRegistryPort { + register(adapter: SourceAdapter): void; + get(sourceKey: string): SourceAdapter; + has(sourceKey: string): boolean; + list(): string[]; +} + +export interface DiffSetComputerPort { + compute( + connectionId: string, + sourceKey: string, + currentHashes: Map, + isPathInScope?: (rawPath: string) => boolean, + ): Promise<{ + added: string[]; + modified: string[]; + deleted: string[]; + unchanged: string[]; + }>; +} + +export interface ContextEvidenceIndexSummary { + documentsIndexed: number; + chunksIndexed: number; + documentsDeleted: number; + embeddingFailures: number; + warnings: string[]; +} + +export interface ContextEvidenceIndexPort { + indexStagedDir(args: { + stagedDir: string; + runId: string; + connectionId: string; + sourceKey: string; + syncId: string; + diffSet: DiffSet; + currentHashes: Map; + }): Promise; + publishSync(args: { connectionId: string; sourceKey: string; syncId: string; diffSet: DiffSet }): Promise; +} + +export interface PageTriageRunResult { + enabled: boolean; + report?: { + pageCount: number; + skip: number; + light: number; + full: number; + classifierFailures: number; + lightExtractionFailures: number; + }; + fullRawPaths: Set; + warnings: string[]; +} + +export interface PageTriagePort { + triageRun(args: { + stagedDir: string; + runId: string; + connectionId: string; + sourceKey: string; + syncId: string; + jobId: string; + diffSet: DiffSet; + adapter: SourceAdapter; + }): Promise; +} + +export interface ContextCandidateCarryforwardPort { + carryForward(args: { runId: string; connectionId: string; sourceKey: string }): Promise<{ warnings: string[] }>; +} + +export interface ContextCandidateForDedup { + id: string; + candidateKey: string; + topic: string; + assertion: string; + promotionScore: number; + createdAt: Date; + evidenceChunkIds: string[]; + evidenceRefs: JsonValue; + embedding: string | null; + lane: 'light' | 'full' | null; +} + +export interface CandidateDedupResult { + enabled: boolean; + candidatesIn: number; + clustersOut: number; + mergedCount: number; + largestClusterSize: number; + embeddingFailures: number; + representatives: ContextCandidateForDedup[]; + warnings: string[]; +} + +export interface CandidateDedupPort { + deduplicateRun(runId: string): Promise; +} + +export interface ContextCandidateSummary { + total: number; + pending: number; + promoted: number; + merged: number; + rejected: number; + conflict: number; +} + +export interface ContextEvidenceCandidatesPort { + getCandidateSummary(runId: string): Promise; +} + +export interface CuratorPaginationReport { + passesRun: number; + topicsExamined: number; + topicsByVerdict: { + promoted: number; + merged: number; + rejected: number; + conflict: number; + }; + topicsRejectedByReason: Record; + budgetExhausted: boolean; +} + +export interface CuratorPaginationPort { + reconcile(input: { + runId: string; + sourceKey: string; + jobId: string; + stageIndex: StageIndex; + evictionUnit: EvictionUnit | undefined; + representatives: ContextCandidateForDedup[]; + initialBudget: { creates: number; updates: number }; + modelRole: KloModelRole; + buildSystemPrompt: () => string; + buildUserPrompt: (input: { + summary: ReconcileCandidateSummary; + items: ReconcileCandidateForPrompt[]; + runState: ReconcilePromptRunState; + }) => string; + buildToolSet: (passNumber: number) => ToolSet; + getReconciliationActions: () => MemoryAction[]; + onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void; + }): Promise; +} + +export interface IngestBundlePostProcessorInput { + connectionId: string; + sourceKey: string; + syncId: string; + jobId: string; + runId: string; + workdir: string; + parseArtifacts: unknown; +} + +export interface IngestBundlePostProcessorResult { + result?: unknown; + warnings: string[]; + errors: string[]; + touchedSources: TouchedSlSource[]; +} + +export interface IngestBundlePostProcessorPort { + run(input: IngestBundlePostProcessorInput): Promise; +} + +export interface IngestBundleRunnerDeps { + runs: IngestRunsPort; + provenance: IngestProvenancePort; + reports: IngestReportsPort; + canonicalPins: IngestCanonicalPinsPort; + registry: SourceAdapterRegistryPort; + diffSetService: DiffSetComputerPort; + sessionWorktreeService: IngestSessionWorktreePort; + agentRunner: AgentRunnerService; + gitService: GitService; + lockingService: IngestLockPort; + storage: IngestStoragePort; + settings: IngestSettingsPort; + skillsRegistry: SkillsRegistryService; + promptService: PromptService; + wikiService: KnowledgeWikiService; + knowledgeSlRefs?: MemoryKnowledgeSlRefsPort; + knowledgeIndex?: IngestKnowledgeIndexPort; + semanticLayerService: SemanticLayerService; + slSearchService: SlSearchService; + slSourcesRepository: SlSourcesIndexPort; + connections: SlConnectionCatalogPort; + slValidator: SlValidatorPort; + toolsetFactory: IngestToolsetFactoryPort; + commitMessages: IngestCommitMessagePort; + embedding: KloEmbeddingPort; + contextEvidenceIndex?: ContextEvidenceIndexPort; + pageTriage?: PageTriagePort; + contextEvidenceCandidates?: ContextEvidenceCandidatesPort; + candidateDedup?: CandidateDedupPort; + contextCandidateCarryforward?: ContextCandidateCarryforwardPort; + curatorPagination?: CuratorPaginationPort; + postProcessors?: Record; + logger?: KloLogger; +} + +export interface IngestCaptureState { + session: CaptureSession; + actions: MemoryAction[]; +} + +export type IngestRunnerJob = IngestBundleJob; diff --git a/packages/context/src/ingest/raw-sources-paths.test.ts b/packages/context/src/ingest/raw-sources-paths.test.ts new file mode 100644 index 00000000..dcc17ddc --- /dev/null +++ b/packages/context/src/ingest/raw-sources-paths.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from 'vitest'; +import { buildSyncId, provenanceMarker, rawSourcesDirForSync, rawSourcesRoot } from './raw-sources-paths.js'; + +describe('raw-sources paths', () => { + it('buildSyncId uses timestamp + jobId', () => { + const id = buildSyncId(new Date('2026-04-22T14:30:00Z'), 'job-abc'); + expect(id).toBe('2026-04-22-143000-job-abc'); + }); + + it('rawSourcesDirForSync composes the canonical path', () => { + const path = rawSourcesDirForSync('c1', 'fake', 's1'); + expect(path).toBe('raw-sources/c1/fake/s1'); + }); + + it('rawSourcesRoot is stable', () => { + expect(rawSourcesRoot).toBe('raw-sources'); + }); + + it('provenanceMarker produces the documented HTML-comment shape', () => { + expect(provenanceMarker('raw-sources/c1/fake/s1/a.yml', 15, 28)).toBe( + '', + ); + }); +}); diff --git a/packages/context/src/ingest/raw-sources-paths.ts b/packages/context/src/ingest/raw-sources-paths.ts new file mode 100644 index 00000000..654ce8ba --- /dev/null +++ b/packages/context/src/ingest/raw-sources-paths.ts @@ -0,0 +1,19 @@ +export const rawSourcesRoot = 'raw-sources'; + +export function buildSyncId(now: Date, jobId: string): string { + const y = now.getUTCFullYear(); + const m = String(now.getUTCMonth() + 1).padStart(2, '0'); + const d = String(now.getUTCDate()).padStart(2, '0'); + const hh = String(now.getUTCHours()).padStart(2, '0'); + const mm = String(now.getUTCMinutes()).padStart(2, '0'); + const ss = String(now.getUTCSeconds()).padStart(2, '0'); + return `${y}-${m}-${d}-${hh}${mm}${ss}-${jobId}`; +} + +export function rawSourcesDirForSync(connectionId: string, sourceKey: string, syncId: string): string { + return `${rawSourcesRoot}/${connectionId}/${sourceKey}/${syncId}`; +} + +export function provenanceMarker(rawPath: string, startLine: number, endLine: number): string { + return ``; +} diff --git a/packages/context/src/ingest/repo-fetch.test.ts b/packages/context/src/ingest/repo-fetch.test.ts new file mode 100644 index 00000000..0c8e2343 --- /dev/null +++ b/packages/context/src/ingest/repo-fetch.test.ts @@ -0,0 +1,227 @@ +import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { makeLocalGitRepo } from '../test/make-local-git-repo.js'; + +const FIXTURE_ROOT = join(__dirname, '../../test/fixtures/lookml/single-model'); + +async function loadRepoFetch() { + return await import('./repo-fetch.js'); +} + +describe('repo-fetch', () => { + let tmpRoot: string; + + beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'repo-fetch-')); + vi.resetModules(); + vi.doUnmock('./git-env.js'); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + vi.resetModules(); + vi.doUnmock('./git-env.js'); + await rm(tmpRoot, { recursive: true, force: true }); + }); + + it('builds authenticated URLs for GitHub, GitLab, generic hosts, empty tokens, and file URLs', async () => { + const { buildAuthenticatedUrl } = await loadRepoFetch(); + + expect(buildAuthenticatedUrl('https://github.com/acme/repo.git', null)).toBe( + 'https://github.com/acme/repo.git', + ); + expect(buildAuthenticatedUrl('file:///tmp/repo', 'tok')).toBe('file:///tmp/repo'); + expect(buildAuthenticatedUrl('https://github.com/acme/repo.git', 'tok')).toBe( + 'https://x-token-auth:tok@github.com/acme/repo.git', // pragma: allowlist secret + ); + expect(buildAuthenticatedUrl('https://gitlab.com/acme/repo.git', 'tok')).toBe( + 'https://oauth2:tok@gitlab.com/acme/repo.git', // pragma: allowlist secret + ); + expect(buildAuthenticatedUrl('https://git.example.com/acme/repo.git', 'tok')).toBe( + 'https://token:tok@git.example.com/acme/repo.git', // pragma: allowlist secret + ); + }); + + it('sanitizes both URL password patterns and literal token text', async () => { + const { sanitizeRepoError } = await loadRepoFetch(); + + const message = sanitizeRepoError( + new Error('failed https://token:supersecret@git.example.com/acme/repo.git because supersecret expired'), // pragma: allowlist secret + 'supersecret', + ); + + expect(message).toContain('https://token:***@git.example.com/acme/repo.git'); + expect(message).not.toContain('supersecret'); + }); + + it('validates required and URL-shaped repository configuration', async () => { + const { RepoConfigError, validateRepoConfig } = await loadRepoFetch(); + + expect(() => validateRepoConfig({ repoUrl: '' })).toThrow(RepoConfigError); + expect(() => validateRepoConfig({ repoUrl: 'not a url' })).toThrow(RepoConfigError); + expect(() => validateRepoConfig({ repoUrl: 'file:///tmp/repo' })).not.toThrow(); + }); + + it('clones a local repo and returns the full 40-character commit hash', async () => { + const { cloneOrPull } = await loadRepoFetch(); + const repo = await makeLocalGitRepo(FIXTURE_ROOT, join(tmpRoot, 'origin')); + const cacheDir = join(tmpRoot, 'cache', 'conn-1'); + + const result = await cloneOrPull({ + repoUrl: repo.repoUrl, + cacheDir, + branch: 'main', + }); + + expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(readFile(join(cacheDir, 'orders.model.lkml'), 'utf-8')).resolves.toMatch(/connection:/); + }); + + it('pulls an existing cache and returns the new origin commit hash', async () => { + const { cloneOrPull } = await loadRepoFetch(); + const repo = await makeLocalGitRepo(FIXTURE_ROOT, join(tmpRoot, 'origin')); + const cacheDir = join(tmpRoot, 'cache', 'conn-pull'); + + const first = await cloneOrPull({ repoUrl: repo.repoUrl, cacheDir, branch: 'main' }); + + await repo.writeFile('views/orders.view.lkml', 'view: orders { sql_table_name: public.orders_v2 ;; }\n'); + const secondCommit = await repo.commit('bump lookml view'); + + const second = await cloneOrPull({ repoUrl: repo.repoUrl, cacheDir, branch: 'main' }); + + expect(second.commitHash).toBe(secondCommit); + expect(second.commitHash).not.toBe(first.commitHash); + await expect(readFile(join(cacheDir, 'views', 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/orders_v2/); + }); + + it('falls back to a fresh clone when the existing cache diverges locally', async () => { + const { cloneOrPull } = await loadRepoFetch(); + const { createSimpleGit } = await import('./git-env.js'); + const repo = await makeLocalGitRepo(FIXTURE_ROOT, join(tmpRoot, 'origin')); + const cacheDir = join(tmpRoot, 'cache', 'conn-diverged'); + + await cloneOrPull({ repoUrl: repo.repoUrl, cacheDir, branch: 'main' }); + + const cacheGit = createSimpleGit(cacheDir); + await cacheGit.addConfig('user.email', 'test@klo.local'); + await cacheGit.addConfig('user.name', 'KLO Test'); + await writeFile(join(cacheDir, 'local-only.txt'), 'local commit\n', 'utf-8'); + await cacheGit.add('.'); + await cacheGit.commit('local-only divergent commit'); + + await repo.writeFile('views/orders.view.lkml', 'view: orders { sql_table_name: public.orders_remote ;; }\n'); + const originCommit = await repo.commit('remote commit'); + + const result = await cloneOrPull({ repoUrl: repo.repoUrl, cacheDir, branch: 'main' }); + + expect(result.commitHash).toBe(originCommit); + await expect(access(join(cacheDir, 'local-only.txt'))).rejects.toThrow(); + await expect(readFile(join(cacheDir, 'views', 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/orders_remote/); + }); + + it('falls back to a fresh clone when the cache has a corrupt .git directory', async () => { + const { cloneOrPull } = await loadRepoFetch(); + const repo = await makeLocalGitRepo(FIXTURE_ROOT, join(tmpRoot, 'origin')); + const cacheDir = join(tmpRoot, 'cache', 'conn-corrupt'); + + await mkdir(join(cacheDir, '.git'), { recursive: true }); + await writeFile(join(cacheDir, '.git', 'HEAD'), 'garbage\n', 'utf-8'); + + const result = await cloneOrPull({ repoUrl: repo.repoUrl, cacheDir, branch: 'main' }); + + expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(readFile(join(cacheDir, 'orders.model.lkml'), 'utf-8')).resolves.toMatch(/connection:/); + }); + + it('returns a sanitized RepoFetchError when fresh clone fails', async () => { + const { RepoFetchError, cloneOrPull } = await loadRepoFetch(); + const repo = await makeLocalGitRepo(FIXTURE_ROOT, join(tmpRoot, 'origin')); + + await expect( + cloneOrPull({ + repoUrl: repo.repoUrl, + cacheDir: join(tmpRoot, 'cache', 'missing-branch'), + branch: 'missing', + authToken: 'supersecret-token', + }), + ).rejects.toThrow(RepoFetchError); + + await expect( + cloneOrPull({ + repoUrl: repo.repoUrl, + cacheDir: join(tmpRoot, 'cache', 'missing-branch-2'), + branch: 'missing', + authToken: 'supersecret-token', + }), + ).rejects.toThrow(expect.objectContaining({ message: expect.not.stringContaining('supersecret-token') })); + }); + + it('testRepoConnection returns ok true for a local repo and ok false for a missing local repo', async () => { + const { testRepoConnection } = await loadRepoFetch(); + const repo = await makeLocalGitRepo(FIXTURE_ROOT, join(tmpRoot, 'origin')); + + await expect(testRepoConnection({ repoUrl: repo.repoUrl })).resolves.toEqual({ ok: true }); + + const failed = await testRepoConnection({ repoUrl: `file://${join(tmpRoot, 'does-not-exist')}` }); + expect(failed.ok).toBe(false); + if (!failed.ok) { + expect(failed.error).toEqual(expect.any(String)); + } + }); + + it('cleans up non-existent and existing repository directories idempotently', async () => { + const { cleanupRepoDir } = await loadRepoFetch(); + const existing = join(tmpRoot, 'cache', 'to-clean'); + + await mkdir(join(existing, '.git'), { recursive: true }); + await cleanupRepoDir(existing); + await cleanupRepoDir(existing); + + await expect(access(existing)).rejects.toThrow(); + }); + + it('sets the remote URL on every pull so token rotation and token removal update cached .git/config', async () => { + const cacheDir = join(tmpRoot, 'cache', 'auth-refresh'); + await mkdir(join(cacheDir, '.git'), { recursive: true }); + + const worktreeGit = { + remote: vi.fn(async () => undefined), + fetch: vi.fn(async () => undefined), + checkout: vi.fn(async () => undefined), + pull: vi.fn(async () => undefined), + log: vi.fn(async () => ({ latest: { hash: 'a'.repeat(40) } })), + }; + const rootGit = { + clone: vi.fn(async () => undefined), + }; + + vi.doMock('./git-env.js', () => ({ + createSimpleGit: vi.fn((baseDir?: string) => (baseDir ? worktreeGit : rootGit)), + })); + + const { cloneOrPull } = await loadRepoFetch(); + + await cloneOrPull({ + repoUrl: 'https://github.com/acme/repo.git', + authToken: 'new-token', + cacheDir, + branch: 'main', + }); + await cloneOrPull({ + repoUrl: 'https://github.com/acme/repo.git', + authToken: null, + cacheDir, + branch: 'main', + }); + + expect(worktreeGit.remote).toHaveBeenCalledWith([ + 'set-url', + 'origin', + 'https://x-token-auth:new-token@github.com/acme/repo.git', // pragma: allowlist secret + ]); + expect(worktreeGit.remote).toHaveBeenCalledWith(['set-url', 'origin', 'https://github.com/acme/repo.git']); + expect(rootGit.clone).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/ingest/repo-fetch.ts b/packages/context/src/ingest/repo-fetch.ts new file mode 100644 index 00000000..cf394675 --- /dev/null +++ b/packages/context/src/ingest/repo-fetch.ts @@ -0,0 +1,155 @@ +import { access, mkdir, rm } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { CloneOptions } from 'simple-git'; +import { createSimpleGit } from './git-env.js'; + +export interface RepoFetchConfig { + repoUrl: string; + branch?: string; + authToken?: string | null; +} + +export class RepoConfigError extends Error { + constructor(message: string) { + super(message); + this.name = 'RepoConfigError'; + } +} + +export class RepoFetchError extends Error { + constructor(message: string) { + super(message); + this.name = 'RepoFetchError'; + } +} + +export function validateRepoConfig(config: RepoFetchConfig): void { + if (!config.repoUrl) { + throw new RepoConfigError('Repository URL is required'); + } + + try { + new URL(config.repoUrl); + } catch { + throw new RepoConfigError(`Invalid repository URL: ${config.repoUrl}`); + } +} + +export function buildAuthenticatedUrl(repoUrl: string, authToken: string | null | undefined): string { + if (!authToken) { + return repoUrl; + } + + try { + const url = new URL(repoUrl); + if (url.protocol === 'file:') { + return repoUrl; + } + if (url.hostname.includes('github.com')) { + url.username = 'x-token-auth'; + url.password = authToken; + } else if (url.hostname.includes('gitlab.com')) { + url.username = 'oauth2'; + url.password = authToken; + } else { + url.username = 'token'; + url.password = authToken; + } + return url.toString(); + } catch { + return repoUrl; + } +} + +export function sanitizeRepoError(err: unknown, authToken: string | null | undefined): string { + const raw = err instanceof Error ? err.message : String(err); + let sanitized = raw.replace(/:[^@/]*@/g, ':***@'); + if (authToken) { + sanitized = sanitized.split(authToken).join('***'); + } + return sanitized; +} + +export async function repoDirExists(dir: string): Promise { + try { + await access(join(dir, '.git')); + return true; + } catch { + return false; + } +} + +export async function cloneOrPull(args: { + repoUrl: string; + authToken?: string | null; + cacheDir: string; + branch?: string; + freshOnPullFailure?: boolean; +}): Promise<{ commitHash: string }> { + validateRepoConfig(args); + + const branch = args.branch || 'main'; + const authUrl = buildAuthenticatedUrl(args.repoUrl, args.authToken); + + try { + if (await repoDirExists(args.cacheDir)) { + const pulled = await tryPull(args.cacheDir, authUrl, branch); + if (!pulled) { + if (args.freshOnPullFailure === false) { + throw new RepoFetchError(`Failed to pull repository: ${args.repoUrl}`); + } + await cleanupRepoDir(args.cacheDir); + await cloneFresh(authUrl, args.cacheDir, branch); + } + } else { + await cloneFresh(authUrl, args.cacheDir, branch); + } + + const git = createSimpleGit(args.cacheDir); + const log = await git.log({ maxCount: 1 }); + return { commitHash: log.latest?.hash ?? 'unknown' }; + } catch (error) { + if (error instanceof RepoFetchError) { + throw error; + } + throw new RepoFetchError(sanitizeRepoError(error, args.authToken)); + } +} + +export async function testRepoConnection(args: { + repoUrl: string; + authToken?: string | null; +}): Promise<{ ok: true } | { ok: false; error: string }> { + try { + validateRepoConfig(args); + const repoUrl = buildAuthenticatedUrl(args.repoUrl, args.authToken); + await createSimpleGit().listRemote([repoUrl, '--heads']); + return { ok: true }; + } catch (error) { + return { ok: false, error: sanitizeRepoError(error, args.authToken) }; + } +} + +export async function cleanupRepoDir(dir: string): Promise { + await rm(dir, { recursive: true, force: true }); +} + +async function cloneFresh(authUrl: string, cacheDir: string, branch: string): Promise { + await mkdir(cacheDir, { recursive: true }); + const git = createSimpleGit(); + const opts: CloneOptions = { '--branch': branch, '--depth': 1, '--single-branch': null }; + await git.clone(authUrl, cacheDir, opts); +} + +async function tryPull(cacheDir: string, authUrl: string, branch: string): Promise { + try { + const git = createSimpleGit(cacheDir); + await git.remote(['set-url', 'origin', authUrl]); + await git.fetch(['origin', branch]); + await git.checkout(branch); + await git.pull('origin', branch); + return true; + } catch { + return false; + } +} diff --git a/packages/context/src/ingest/report-snapshot.test.ts b/packages/context/src/ingest/report-snapshot.test.ts new file mode 100644 index 00000000..13d3eff7 --- /dev/null +++ b/packages/context/src/ingest/report-snapshot.test.ts @@ -0,0 +1,219 @@ +import { describe, expect, it } from 'vitest'; +import { parseIngestReportSnapshot } from './report-snapshot.js'; + +function validReportSnapshot() { + return { + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-04-30T12:00:00.000Z', + body: { + syncId: 'sync-1', + diffSummary: { added: 2, modified: 1, deleted: 0, unchanged: 4 }, + commitSha: 'abc12345', + workUnits: [ + { + unitKey: 'cards', + rawFiles: ['cards/1.json', 'cards/2.json'], + status: 'success', + actions: [ + { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, + { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, + ], + touchedSlSources: ['warehouse.orders'], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [ + { + rawPath: 'cards/1.json', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/revenue.md', + actionType: 'wiki_written', + }, + ], + toolTranscripts: [ + { + unitKey: 'cards', + path: 'tool-transcripts/cards.jsonl', + toolCallCount: 3, + errorCount: 0, + toolNames: ['knowledge_capture'], + }, + ], + reconciliationActions: [], + evictionDecisions: [], + context: { + documentsIndexed: 2, + chunksIndexed: 2, + documentsDeleted: 0, + embeddingFailures: 0, + candidatesCreated: 1, + candidatesPromoted: 1, + candidatesRejected: 0, + knowledgeCreates: 1, + knowledgeUpdates: 0, + capped: false, + warnings: [], + }, + }, + }; +} + +describe('parseIngestReportSnapshot', () => { + it('parses a bundle ingest report snapshot and preserves report detail arrays', () => { + const snapshot = parseIngestReportSnapshot(validReportSnapshot()); + + expect(snapshot).toMatchObject({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + body: { + syncId: 'sync-1', + commitSha: 'abc12345', + failedWorkUnits: [], + }, + }); + expect(snapshot.body.workUnits[0]?.actions).toEqual([ + { + target: 'wiki', + type: 'created', + key: 'knowledge/global/revenue.md', + detail: 'Revenue overview', + targetConnectionId: null, + }, + { + target: 'sl', + type: 'updated', + key: 'warehouse.orders', + detail: 'Added order amount measure', + targetConnectionId: null, + }, + ]); + expect(snapshot.body.provenanceRows).toHaveLength(1); + expect(snapshot.body.toolTranscripts).toHaveLength(1); + }); + + it('parses target-aware actions and normalizes legacy touched source strings', () => { + const report = validReportSnapshot(); + report.body.workUnits[0] = { + ...report.body.workUnits[0], + actions: [ + { + target: 'sl', + type: 'created', + key: 'looker__b2b__sales_pipeline', + detail: 'Created source', + targetConnectionId: 'warehouse-1', + }, + ], + // Legacy report shape: bare strings are normalized to the report connection ID. + touchedSlSources: ['looker__b2b__sales_pipeline'], + } as never; + + const snapshot = parseIngestReportSnapshot(report); + + expect(snapshot.body.workUnits[0]?.actions).toEqual([ + { + target: 'sl', + type: 'created', + key: 'looker__b2b__sales_pipeline', + detail: 'Created source', + targetConnectionId: 'warehouse-1', + }, + ]); + expect(snapshot.body.workUnits[0]?.touchedSlSources).toEqual([ + { connectionId: 'warehouse', sourceName: 'looker__b2b__sales_pipeline' }, + ]); + }); + + it('parses captured memory-flow snapshots in report bodies', () => { + const report = validReportSnapshot(); + report.body = { + ...report.body, + memoryFlow: { + metadata: { + schemaVersion: 1, + mode: 'full', + origin: 'captured', + timing: 'captured', + capturedAt: '2026-05-01T10:00:03.000Z', + sourceReportId: null, + sourceReportPath: null, + fallbackReason: null, + }, + runId: 'run-1', + connectionId: 'warehouse', + adapter: 'lookml', + status: 'running', + sourceDir: null, + syncId: 'sync-2', + errors: [], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + events: [ + { + type: 'source_acquired', + adapter: 'lookml', + trigger: 'manual_resync', + fileCount: 2, + emittedAt: '2026-05-01T10:00:00.000Z', + }, + ], + }, + } as typeof report.body; + + expect(parseIngestReportSnapshot(report).body.memoryFlow?.events).toEqual([ + { + type: 'source_acquired', + adapter: 'lookml', + trigger: 'manual_resync', + fileCount: 2, + emittedAt: '2026-05-01T10:00:00.000Z', + }, + ]); + }); + + it('applies defaults for optional report fields emitted by older reports', () => { + const report = validReportSnapshot(); + delete (report.body as Record).conflictsResolved; + delete (report.body as Record).evictionsApplied; + delete (report.body as Record).unmappedFallbacks; + delete (report.body as Record).supersededBy; + delete (report.body as Record).overrideOf; + delete (report.body as Record).provenanceRows; + delete (report.body as Record).toolTranscripts; + + const snapshot = parseIngestReportSnapshot(report); + + expect(snapshot.body.conflictsResolved).toEqual([]); + expect(snapshot.body.evictionsApplied).toEqual([]); + expect(snapshot.body.unmappedFallbacks).toEqual([]); + expect(snapshot.body.supersededBy).toBeNull(); + expect(snapshot.body.overrideOf).toBeNull(); + expect(snapshot.body.provenanceRows).toEqual([]); + expect(snapshot.body.toolTranscripts).toEqual([]); + }); + + it('rejects malformed report snapshots with a concise message', () => { + const report = validReportSnapshot(); + report.body.workUnits[0] = { + ...report.body.workUnits[0], + actions: [{ target: 'database', type: 'created', key: 'bad', detail: 'bad target' }], + } as never; + + expect(() => parseIngestReportSnapshot(report)).toThrow('Invalid ingest report snapshot'); + }); +}); diff --git a/packages/context/src/ingest/report-snapshot.ts b/packages/context/src/ingest/report-snapshot.ts new file mode 100644 index 00000000..891c6fa7 --- /dev/null +++ b/packages/context/src/ingest/report-snapshot.ts @@ -0,0 +1,194 @@ +import * as z from 'zod'; +import type { TouchedSlSource } from '../tools/index.js'; +import { memoryFlowReplayInputSchema } from './memory-flow/schema.js'; +import type { IngestReportSnapshot } from './reports.js'; + +const ingestDiffSummarySchema = z.object({ + added: z.number().int(), + modified: z.number().int(), + deleted: z.number().int(), + unchanged: z.number().int(), +}); + +const ingestActionSchema = z.object({ + target: z.enum(['wiki', 'sl']), + type: z.enum(['created', 'updated', 'removed']), + key: z.string(), + detail: z.string(), + targetConnectionId: z.string().nullable().default(null), +}); + +const touchedSlSourceSchema = z.object({ + connectionId: z.string().min(1), + sourceName: z.string().min(1), +}); + +const touchedSlSourceInputSchema = z.union([z.string(), touchedSlSourceSchema]); + +const conflictResolvedSchema = z + .object({ + unitKey: z.string().optional(), + kind: z.enum(['structural_duplicate', 'near_duplicate', 'definitional_contradiction', 're_ingest_change']), + contestedKey: z.string().optional(), + artifactKey: z.string(), + detail: z.string(), + flaggedForHuman: z.boolean(), + }) + .passthrough(); + +const evictionAppliedSchema = z + .object({ + rawPath: z.string(), + artifactKind: z.enum(['sl', 'wiki']), + artifactKey: z.string(), + action: z.enum(['removed', 'retained_deprecated']), + reason: z.string(), + }) + .passthrough(); + +const unmappedFallbackSchema = z + .object({ + rawPath: z.string(), + reason: z.enum([ + 'no_connection_mapping', + 'looker_template_unresolved', + 'derived_table_not_supported', + 'no_physical_table', + 'multiple_table_references', + 'unsupported_dialect', + 'parse_error', + 'missing_target_table', + ]), + detail: z.string().optional(), + fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']), + }) + .passthrough(); + +const artifactResolutionSchema = z + .object({ + rawPath: z.string(), + artifactKind: z.enum(['sl', 'wiki']), + artifactKey: z.string(), + actionType: z.enum(['merged', 'subsumed']), + reason: z.string(), + }) + .passthrough(); + +const provenanceDetailSchema = z.object({ + rawPath: z.string(), + artifactKind: z.enum(['sl', 'wiki']).nullable(), + artifactKey: z.string().nullable(), + targetConnectionId: z.string().nullable().default(null), + actionType: z.enum([ + 'source_created', + 'measure_added', + 'join_added', + 'merged', + 'subsumed', + 'wiki_written', + 'skipped', + ]), +}); + +const toolTranscriptSummarySchema = z.object({ + unitKey: z.string(), + path: z.string(), + toolCallCount: z.number().int().min(0), + errorCount: z.number().int().min(0), + toolNames: z.array(z.string()), +}); + +const sourceFetchIssueKindSchema = z.enum([ + 'unmapped_looker_connection', + 'unparseable_sql_table_name', + 'looker_template_unresolved', + 'derived_table_not_supported', + 'lookml_connection_mismatch', +]); + +const sourceFetchIssueSchema = z.object({ + rawPath: z.string(), + entityType: z.string(), + entityId: z.string().nullable(), + severity: z.enum(['warning', 'error']), + statusCode: z.number().int().nullable(), + message: z.string(), + retryRecommended: z.boolean(), + kind: sourceFetchIssueKindSchema.optional(), + details: z.record(z.string(), z.unknown()).optional(), +}); + +const sourceFetchReportSchema = z.object({ + status: z.enum(['success', 'partial']), + retryRecommended: z.boolean(), + skipped: z.array(sourceFetchIssueSchema).default([]), + warnings: z.array(sourceFetchIssueSchema).default([]), +}); + +export const ingestReportSnapshotSchema = z + .object({ + id: z.string().min(1), + runId: z.string().min(1), + jobId: z.string().min(1), + connectionId: z.string().min(1), + sourceKey: z.string().min(1), + createdAt: z.string().min(1), + body: z + .object({ + syncId: z.string().min(1), + diffSummary: ingestDiffSummarySchema, + fetch: sourceFetchReportSchema.optional(), + commitSha: z.string().nullable(), + workUnits: z.array( + z.object({ + unitKey: z.string().min(1), + rawFiles: z.array(z.string()), + status: z.enum(['success', 'failed']), + reason: z.string().optional(), + actions: z.array(ingestActionSchema), + touchedSlSources: z.array(touchedSlSourceInputSchema), + slDisallowed: z.boolean().optional(), + slDisallowedReason: z.enum(['lookml_connection_mismatch']).optional(), + }), + ), + failedWorkUnits: z.array(z.string()), + reconciliationSkipped: z.boolean(), + conflictsResolved: z.array(conflictResolvedSchema).default([]), + evictionsApplied: z.array(evictionAppliedSchema).default([]), + unmappedFallbacks: z.array(unmappedFallbackSchema).default([]), + artifactResolutions: z.array(artifactResolutionSchema).default([]), + evictionInputs: z.array(z.string()), + unresolvedCards: z.array(z.unknown()).default([]), + supersededBy: z.string().nullable().default(null), + overrideOf: z.string().nullable().default(null), + provenanceRows: z.array(provenanceDetailSchema).default([]), + toolTranscripts: z.array(toolTranscriptSummarySchema).default([]), + memoryFlow: memoryFlowReplayInputSchema.optional(), + }) + .passthrough(), + }) + .passthrough(); + +function normalizeTouchedSlSources(connectionId: string, value: Array): TouchedSlSource[] { + return value.map((entry) => + typeof entry === 'string' + ? { connectionId, sourceName: entry } + : { connectionId: entry.connectionId, sourceName: entry.sourceName }, + ); +} + +export function parseIngestReportSnapshot(value: unknown): IngestReportSnapshot { + const result = ingestReportSnapshotSchema.safeParse(value); + if (!result.success) { + throw new Error(`Invalid ingest report snapshot: ${z.prettifyError(result.error)}`); + } + const snapshot = result.data as IngestReportSnapshot; + snapshot.body.workUnits = snapshot.body.workUnits.map((workUnit) => ({ + ...workUnit, + touchedSlSources: normalizeTouchedSlSources( + snapshot.connectionId, + workUnit.touchedSlSources as Array, + ), + })); + return snapshot; +} diff --git a/packages/context/src/ingest/reports.ts b/packages/context/src/ingest/reports.ts new file mode 100644 index 00000000..7cf4418a --- /dev/null +++ b/packages/context/src/ingest/reports.ts @@ -0,0 +1,101 @@ +import type { MemoryAction } from '../memory/index.js'; +import type { TouchedSlSource } from '../tools/index.js'; +import type { MemoryFlowReplayInput } from './memory-flow/types.js'; +import type { IngestProvenanceInsert } from './ports.js'; +import type { + ArtifactResolutionRecord, + ConflictResolvedRecord, + EvictionAppliedRecord, + StageIndex, + UnmappedFallbackRecord, +} from './stages/stage-index.types.js'; +import type { IngestDiffSummary, SourceFetchReport, UnresolvedCardInfo } from './types.js'; + +export interface IngestReportWorkUnit { + unitKey: string; + rawFiles: string[]; + status: 'success' | 'failed'; + reason?: string; + actions: MemoryAction[]; + touchedSlSources: TouchedSlSource[]; + slDisallowed?: boolean; + slDisallowedReason?: 'lookml_connection_mismatch'; +} + +export interface IngestReportProvenanceDetail { + rawPath: string; + artifactKind: 'sl' | 'wiki' | null; + artifactKey: string | null; + targetConnectionId?: string | null; + actionType: IngestProvenanceInsert['actionType']; +} + +export interface IngestReportToolTranscriptSummary { + unitKey: string; + path: string; + toolCallCount: number; + errorCount: number; + toolNames: string[]; +} + +export interface IngestReportPostProcessorOutcome { + sourceKey: string; + status: 'success' | 'failed'; + result?: unknown; + errors: string[]; + warnings: string[]; + touchedSources: TouchedSlSource[]; +} + +export interface IngestReportBody { + syncId: string; + diffSummary: IngestDiffSummary; + fetch?: SourceFetchReport; + commitSha: string | null; + workUnits: IngestReportWorkUnit[]; + failedWorkUnits: string[]; + reconciliationSkipped: boolean; + conflictsResolved: ConflictResolvedRecord[]; + evictionsApplied: EvictionAppliedRecord[]; + unmappedFallbacks: UnmappedFallbackRecord[]; + artifactResolutions?: ArtifactResolutionRecord[]; + evictionInputs: string[]; + unresolvedCards: UnresolvedCardInfo[]; + supersededBy: string | null; + overrideOf: string | null; + provenanceRows: IngestReportProvenanceDetail[]; + toolTranscripts: IngestReportToolTranscriptSummary[]; + postProcessor?: IngestReportPostProcessorOutcome; + memoryFlow?: MemoryFlowReplayInput; +} + +export interface IngestReportSnapshot { + id: string; + runId: string; + jobId: string; + connectionId: string; + sourceKey: string; + body: IngestReportBody; + createdAt: string; +} + +export function buildStageIndexFromReportBody(jobId: string, connectionId: string, body: IngestReportBody): StageIndex { + return { + jobId, + connectionId, + workUnits: body.workUnits.map((wu) => ({ + unitKey: wu.unitKey, + rawFiles: wu.rawFiles, + status: wu.status, + reason: wu.reason, + actions: wu.actions, + touchedSlSources: wu.touchedSlSources, + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + })), + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + artifactResolutions: body.artifactResolutions ?? [], + }; +} diff --git a/packages/context/src/ingest/source-adapter-registry.test.ts b/packages/context/src/ingest/source-adapter-registry.test.ts new file mode 100644 index 00000000..9a74c597 --- /dev/null +++ b/packages/context/src/ingest/source-adapter-registry.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'vitest'; +import { SourceAdapterRegistry } from './source-adapter-registry.js'; +import type { SourceAdapter } from './types.js'; + +const makeAdapter = (source: string): SourceAdapter => ({ + source, + skillNames: [], + detect() { + return Promise.resolve(true); + }, + chunk() { + return Promise.resolve({ workUnits: [] }); + }, +}); + +describe('SourceAdapterRegistry', () => { + it('returns a registered adapter by sourceKey', () => { + const registry = new SourceAdapterRegistry(); + const fake = makeAdapter('fake'); + registry.register(fake); + expect(registry.get('fake')).toBe(fake); + }); + + it('throws for an unknown sourceKey', () => { + const registry = new SourceAdapterRegistry(); + expect(() => registry.get('missing')).toThrow(/no source adapter registered for 'missing'/); + }); + + it('throws when a sourceKey is registered twice', () => { + const registry = new SourceAdapterRegistry(); + registry.register(makeAdapter('fake')); + expect(() => registry.register(makeAdapter('fake'))).toThrow(/already registered/); + }); + + it('has returns true only after registration', () => { + const registry = new SourceAdapterRegistry(); + expect(registry.has('fake')).toBe(false); + registry.register(makeAdapter('fake')); + expect(registry.has('fake')).toBe(true); + }); +}); diff --git a/packages/context/src/ingest/source-adapter-registry.ts b/packages/context/src/ingest/source-adapter-registry.ts new file mode 100644 index 00000000..dc8a0196 --- /dev/null +++ b/packages/context/src/ingest/source-adapter-registry.ts @@ -0,0 +1,29 @@ +import type { SourceAdapter } from './types.js'; + +export class SourceAdapterRegistry { + private readonly adapters = new Map(); + + register(adapter: SourceAdapter): void { + if (this.adapters.has(adapter.source)) { + throw new Error(`source adapter already registered for '${adapter.source}'`); + } + this.adapters.set(adapter.source, adapter); + } + + get(sourceKey: string): SourceAdapter { + const adapter = this.adapters.get(sourceKey); + if (!adapter) { + const known = [...this.adapters.keys()].join(', ') || '(none)'; + throw new Error(`no source adapter registered for '${sourceKey}'. Known: ${known}`); + } + return adapter; + } + + has(sourceKey: string): boolean { + return this.adapters.has(sourceKey); + } + + list(): string[] { + return [...this.adapters.keys()]; + } +} diff --git a/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts b/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts new file mode 100644 index 00000000..d9afa467 --- /dev/null +++ b/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts @@ -0,0 +1,373 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { DiffSetService } from './diff-set.service.js'; +import type { IngestDiffSummary, IngestReportBody, IngestTrigger } from './index.js'; +import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js'; + +function idFactory(ids: string[]): () => string { + let index = 0; + return () => ids[index++] ?? `generated-${index}`; +} + +function runArgs(input: { + jobId: string; + syncId: string; + connectionId?: string; + sourceKey?: string; + trigger?: IngestTrigger; +}) { + return { + jobId: input.jobId, + connectionId: input.connectionId ?? 'docs', + sourceKey: input.sourceKey ?? 'notion', + syncId: input.syncId, + trigger: input.trigger ?? 'manual_resync', + scopeFingerprint: `scope-${input.syncId}`, + }; +} + +function diffSummary(overrides: Partial = {}): IngestDiffSummary { + return { + added: 1, + modified: 0, + deleted: 0, + unchanged: 0, + ...overrides, + }; +} + +function reportBody(syncId: string, supersededBy: string | null = null): IngestReportBody { + return { + syncId, + diffSummary: diffSummary(), + commitSha: null, + workUnits: [ + { + unitKey: 'revenue-policy', + rawFiles: ['pages/revenue.md'], + status: 'success', + actions: [], + touchedSlSources: [], + }, + ], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }; +} + +describe('SqliteBundleIngestStore', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-bundle-ingest-store-')); + dbPath = join(tempDir, '.klo', 'db.sqlite'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('persists run and report state across reopened SQLite handles', async () => { + const store = new SqliteBundleIngestStore({ + dbPath, + idFactory: idFactory(['run-1', 'report-1']), + now: () => new Date('2026-04-30T10:00:00.000Z'), + }); + + const run = await store.create(runArgs({ jobId: 'job-1', syncId: 'sync-1' })); + expect(run).toEqual({ id: 'run-1' }); + + await store.markCompleted(run.id, diffSummary({ added: 2, unchanged: 1 })); + const report = await store.create({ + runId: run.id, + jobId: 'job-1', + connectionId: 'docs', + sourceKey: 'notion', + body: reportBody('sync-1'), + }); + + expect(report).toMatchObject({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'docs', + sourceKey: 'notion', + body: { syncId: 'sync-1' }, + createdAt: '2026-04-30T10:00:00.000Z', + }); + + const reopened = new SqliteBundleIngestStore({ dbPath }); + await expect(reopened.findByJobId('job-1')).resolves.toMatchObject({ + id: 'report-1', + runId: 'run-1', + body: { syncId: 'sync-1', supersededBy: null }, + }); + + await reopened.markSuperseded('job-1', 'job-2'); + await expect(reopened.findByJobId('job-1')).resolves.toMatchObject({ + body: { syncId: 'sync-1', supersededBy: 'job-2' }, + }); + await expect(reopened.findByJobId('missing-job')).resolves.toBeNull(); + }); + + it('uses only completed runs when serving latest provenance hashes and artifacts', async () => { + const store = new SqliteBundleIngestStore({ + dbPath, + idFactory: idFactory(['run-old', 'run-failed', 'run-new']), + now: () => new Date('2026-04-30T10:00:00.000Z'), + }); + + const oldRun = await store.create(runArgs({ jobId: 'job-old', syncId: 'sync-old' })); + await store.insertMany([ + { + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-old', + rawPath: 'pages/revenue.md', + rawContentHash: 'hash-old', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/revenue.md', + artifactContentHash: null, + actionType: 'wiki_written', + }, + ]); + await store.markCompleted(oldRun.id, diffSummary()); + + const failedRun = await store.create(runArgs({ jobId: 'job-failed', syncId: 'sync-failed' })); + await store.insertMany([ + { + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-failed', + rawPath: 'pages/revenue.md', + rawContentHash: 'hash-failed', + artifactKind: null, + artifactKey: null, + artifactContentHash: null, + actionType: 'skipped', + }, + ]); + await store.markFailed(failedRun.id); + + const newRun = await store.create(runArgs({ jobId: 'job-new', syncId: 'sync-new' })); + await store.insertMany([ + { + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-new', + rawPath: 'pages/revenue.md', + rawContentHash: 'hash-new', + artifactKind: 'wiki', + artifactKey: 'knowledge/global/revenue.md', + artifactContentHash: 'artifact-hash-new', + actionType: 'wiki_written', + }, + { + connectionId: 'docs', + sourceKey: 'notion', + syncId: 'sync-new', + rawPath: 'pages/revenue.md', + rawContentHash: 'hash-new', + artifactKind: 'sl', + artifactKey: 'warehouse.revenue', + artifactContentHash: null, + actionType: 'measure_added', + }, + ]); + await store.markCompleted(newRun.id, diffSummary({ modified: 1 })); + + await expect(store.findLatestHashesForCompletedSyncs('docs', 'notion')).resolves.toEqual( + new Map([['pages/revenue.md', 'hash-new']]), + ); + const diffSet = await new DiffSetService(store).compute( + 'docs', + 'notion', + new Map([ + ['pages/revenue.md', 'hash-new'], + ['pages/new-policy.md', 'hash-added'], + ]), + ); + expect(diffSet).toEqual({ + added: ['pages/new-policy.md'], + modified: [], + deleted: [], + unchanged: ['pages/revenue.md'], + }); + + const artifacts = await store.findLatestArtifactsForRawPaths('docs', 'notion', ['pages/revenue.md']); + expect(artifacts.get('pages/revenue.md')).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + sync_id: 'sync-new', + raw_content_hash: 'hash-new', + artifact_kind: 'wiki', + artifact_key: 'knowledge/global/revenue.md', + action_type: 'wiki_written', + }), + expect.objectContaining({ + sync_id: 'sync-new', + artifact_kind: 'sl', + artifact_key: 'warehouse.revenue', + action_type: 'measure_added', + }), + ]), + ); + }); + + it('returns the latest stored report across bundle ingest runs', async () => { + const store = new SqliteBundleIngestStore({ + dbPath, + idFactory: idFactory(['run-old', 'report-old', 'run-new', 'report-new']), + now: () => new Date('2026-04-30T10:00:00.000Z'), + }); + + const oldRun = await store.create(runArgs({ jobId: 'job-old', syncId: 'sync-old' })); + await store.markCompleted(oldRun.id, diffSummary()); + await store.create({ + runId: oldRun.id, + jobId: 'job-old', + connectionId: 'docs', + sourceKey: 'notion', + body: reportBody('sync-old'), + }); + + const newRun = await store.create(runArgs({ jobId: 'job-new', syncId: 'sync-new' })); + await store.markCompleted(newRun.id, diffSummary({ modified: 1 })); + await store.create({ + runId: newRun.id, + jobId: 'job-new', + connectionId: 'docs', + sourceKey: 'notion', + body: reportBody('sync-new'), + }); + + await expect(store.findLatestReport()).resolves.toMatchObject({ + id: 'report-new', + runId: 'run-new', + jobId: 'job-new', + body: { syncId: 'sync-new' }, + }); + }); + + it('replaces a prior run with the same job_id when re-creating', async () => { + const store = new SqliteBundleIngestStore({ + dbPath, + idFactory: idFactory(['run-old', 'report-old', 'run-new', 'report-new']), + now: () => new Date('2026-04-30T10:00:00.000Z'), + }); + + const oldRun = await store.create(runArgs({ jobId: 'demo-full-ingest', syncId: 'sync-1' })); + expect(oldRun).toEqual({ id: 'run-old' }); + await store.markCompleted(oldRun.id, diffSummary()); + await store.create({ + runId: oldRun.id, + jobId: 'demo-full-ingest', + connectionId: 'docs', + sourceKey: 'notion', + body: reportBody('sync-1'), + }); + + const newRun = await store.create(runArgs({ jobId: 'demo-full-ingest', syncId: 'sync-2' })); + expect(newRun).toEqual({ id: 'run-new' }); + await store.markCompleted(newRun.id, diffSummary()); + await store.create({ + runId: newRun.id, + jobId: 'demo-full-ingest', + connectionId: 'docs', + sourceKey: 'notion', + body: reportBody('sync-2'), + }); + + const reopened = new SqliteBundleIngestStore({ dbPath }); + await expect(reopened.findByJobId('demo-full-ingest')).resolves.toMatchObject({ + runId: 'run-new', + body: { syncId: 'sync-2' }, + }); + }); + + it('lists local canonical pins for the bundle runner port', async () => { + const store = new SqliteBundleIngestStore({ dbPath }); + + await store.replaceCanonicalPins('docs', [ + { + contestedKey: 'gross revenue', + canonicalArtifactKey: 'finance.revenue', + pinnedAt: '2026-04-30T09:00:00.000Z', + pinnedBy: 'analyst@example.com', + reason: 'Finance source is canonical.', + }, + { + contestedKey: 'active customer', + canonicalArtifactKey: 'crm.active_customer', + pinnedAt: '2026-04-30T09:05:00.000Z', + pinnedBy: 'analyst@example.com', + reason: null, + }, + ]); + + await expect(store.listPins(['docs'])).resolves.toEqual([ + { + contestedKey: 'active customer', + canonicalArtifactKey: 'crm.active_customer', + pinnedAt: '2026-04-30T09:05:00.000Z', + pinnedBy: 'analyst@example.com', + reason: null, + }, + { + contestedKey: 'gross revenue', + canonicalArtifactKey: 'finance.revenue', + pinnedAt: '2026-04-30T09:00:00.000Z', + pinnedBy: 'analyst@example.com', + reason: 'Finance source is canonical.', + }, + ]); + await expect(store.listPins(['other'])).resolves.toEqual([]); + }); + + it('finds a report by report id, run id, or job id for local status and replay', async () => { + const store = new SqliteBundleIngestStore({ + dbPath, + idFactory: idFactory(['run-lookup', 'report-lookup']), + now: () => new Date('2026-04-30T11:00:00.000Z'), + }); + + const run = await store.create(runArgs({ jobId: 'job-lookup', syncId: 'sync-lookup' })); + await store.markCompleted(run.id, diffSummary({ added: 1 })); + await store.create({ + runId: run.id, + jobId: 'job-lookup', + connectionId: 'docs', + sourceKey: 'notion', + body: reportBody('sync-lookup'), + }); + + await expect(store.findReportByAnyId('report-lookup')).resolves.toMatchObject({ + id: 'report-lookup', + runId: 'run-lookup', + jobId: 'job-lookup', + }); + await expect(store.findReportByAnyId('run-lookup')).resolves.toMatchObject({ + id: 'report-lookup', + runId: 'run-lookup', + jobId: 'job-lookup', + }); + await expect(store.findReportByAnyId('job-lookup')).resolves.toMatchObject({ + id: 'report-lookup', + runId: 'run-lookup', + jobId: 'job-lookup', + }); + await expect(store.findReportByAnyId('missing')).resolves.toBeNull(); + }); +}); diff --git a/packages/context/src/ingest/sqlite-bundle-ingest-store.ts b/packages/context/src/ingest/sqlite-bundle-ingest-store.ts new file mode 100644 index 00000000..50c2d75d --- /dev/null +++ b/packages/context/src/ingest/sqlite-bundle-ingest-store.ts @@ -0,0 +1,598 @@ +import { randomUUID } from 'node:crypto'; +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import type { CanonicalPin } from './canonical-pins.js'; +import type { + CreateIngestRunArgs, + IngestCanonicalPinsPort, + IngestProvenanceInsert, + IngestProvenancePort, + IngestProvenanceRow, + IngestReportsPort, + IngestRunRecord, + IngestRunsPort, + ProvenanceActionType, +} from './ports.js'; +import type { IngestReportBody, IngestReportSnapshot } from './reports.js'; +import type { IngestDiffSummary } from './types.js'; + +export interface SqliteBundleIngestStoreOptions { + dbPath: string; + idFactory?: () => string; + now?: () => Date; +} + +type RunStatus = 'running' | 'completed' | 'failed'; + +interface ReportRow { + id: string; + run_id: string; + job_id: string; + connection_id: string; + source_key: string; + body_json: string; + created_at: string; +} + +interface ProvenanceRow { + sync_id: string; + raw_path: string; + raw_content_hash: string; + artifact_kind: string | null; + artifact_key: string | null; + target_connection_id: string | null; + artifact_content_hash: string | null; + action_type: string; +} + +function parseArtifactKind(kind: string | null): IngestProvenanceRow['artifact_kind'] { + if (kind === null || kind === 'sl' || kind === 'wiki') { + return kind; + } + throw new Error(`Unexpected local ingest artifact kind: ${kind}`); +} + +function parseActionType(action: string): ProvenanceActionType { + switch (action) { + case 'source_created': + case 'measure_added': + case 'join_added': + case 'merged': + case 'subsumed': + case 'wiki_written': + case 'skipped': + return action; + default: + throw new Error(`Unexpected local ingest provenance action type: ${action}`); + } +} + +function parseReport(row: ReportRow): IngestReportSnapshot { + return { + id: row.id, + runId: row.run_id, + jobId: row.job_id, + connectionId: row.connection_id, + sourceKey: row.source_key, + body: JSON.parse(row.body_json) as IngestReportBody, + createdAt: row.created_at, + }; +} + +function toPortProvenanceRow(row: ProvenanceRow): IngestProvenanceRow { + return { + sync_id: row.sync_id, + raw_path: row.raw_path, + raw_content_hash: row.raw_content_hash, + artifact_kind: parseArtifactKind(row.artifact_kind), + artifact_key: row.artifact_key, + target_connection_id: row.target_connection_id, + artifact_content_hash: row.artifact_content_hash, + action_type: parseActionType(row.action_type), + }; +} + +function placeholders(values: readonly unknown[]): string { + return values.map(() => '?').join(', '); +} + +export class SqliteBundleIngestStore + implements IngestRunsPort, IngestReportsPort, IngestProvenancePort, IngestCanonicalPinsPort +{ + private readonly db: Database.Database; + private readonly idFactory: () => string; + private readonly now: () => Date; + + constructor(options: SqliteBundleIngestStoreOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.idFactory = options.idFactory ?? (() => randomUUID()); + this.now = options.now ?? (() => new Date()); + this.db.exec(` + CREATE TABLE IF NOT EXISTS bundle_ingest_runs ( + id TEXT PRIMARY KEY, + job_id TEXT NOT NULL UNIQUE, + connection_id TEXT NOT NULL, + source_key TEXT NOT NULL, + sync_id TEXT NOT NULL, + trigger TEXT NOT NULL, + scope_fingerprint TEXT, + status TEXT NOT NULL, + diff_summary_json TEXT, + started_at TEXT NOT NULL, + completed_at TEXT, + failed_at TEXT + ); + + CREATE INDEX IF NOT EXISTS bundle_ingest_runs_completed_lookup_idx + ON bundle_ingest_runs (connection_id, source_key, sync_id, status, completed_at DESC); + + CREATE TABLE IF NOT EXISTS bundle_ingest_reports ( + id TEXT PRIMARY KEY, + run_id TEXT NOT NULL REFERENCES bundle_ingest_runs(id) ON DELETE CASCADE, + job_id TEXT NOT NULL UNIQUE, + connection_id TEXT NOT NULL, + source_key TEXT NOT NULL, + body_json TEXT NOT NULL, + created_at TEXT NOT NULL + ); + + CREATE INDEX IF NOT EXISTS bundle_ingest_reports_run_idx + ON bundle_ingest_reports (run_id); + + CREATE TABLE IF NOT EXISTS bundle_ingest_provenance ( + id TEXT PRIMARY KEY, + connection_id TEXT NOT NULL, + source_key TEXT NOT NULL, + sync_id TEXT NOT NULL, + raw_path TEXT NOT NULL, + raw_content_hash TEXT NOT NULL, + artifact_kind TEXT, + artifact_key TEXT, + target_connection_id TEXT, + artifact_content_hash TEXT, + action_type TEXT NOT NULL, + created_at TEXT NOT NULL + ); + + CREATE INDEX IF NOT EXISTS bundle_ingest_provenance_latest_idx + ON bundle_ingest_provenance (connection_id, source_key, raw_path, sync_id, created_at DESC); + + CREATE TABLE IF NOT EXISTS bundle_ingest_canonical_pins ( + connection_id TEXT NOT NULL, + contested_key TEXT NOT NULL, + canonical_artifact_key TEXT NOT NULL, + pinned_at TEXT NOT NULL, + pinned_by TEXT NOT NULL, + reason TEXT, + created_at TEXT NOT NULL, + PRIMARY KEY (connection_id, contested_key) + ); + `); + this.ensureColumn('bundle_ingest_provenance', 'target_connection_id', 'TEXT'); + } + + private ensureColumn(table: string, column: string, definition: string): void { + const columns = this.db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>; + if (!columns.some((existing) => existing.name === column)) { + this.db.prepare(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`).run(); + } + } + + async create(args: CreateIngestRunArgs): Promise; + async create(args: { + runId: string; + jobId: string; + connectionId: string; + sourceKey: string; + body: unknown; + }): Promise; + async create( + args: + | CreateIngestRunArgs + | { + runId: string; + jobId: string; + connectionId: string; + sourceKey: string; + body: unknown; + }, + ): Promise { + if ('body' in args) { + return this.createReport(args); + } + return this.createRun(args); + } + + async markCompleted(id: string, diffSummary: IngestDiffSummary): Promise { + this.markRun(id, 'completed', diffSummary); + } + + async markFailed(id: string): Promise { + this.markRun(id, 'failed', null); + } + + async insertMany(rows: IngestProvenanceInsert[]): Promise { + if (rows.length === 0) { + return; + } + const insert = this.db.prepare(` + INSERT INTO bundle_ingest_provenance ( + id, + connection_id, + source_key, + sync_id, + raw_path, + raw_content_hash, + artifact_kind, + artifact_key, + target_connection_id, + artifact_content_hash, + action_type, + created_at + ) + VALUES ( + @id, + @connectionId, + @sourceKey, + @syncId, + @rawPath, + @rawContentHash, + @artifactKind, + @artifactKey, + @targetConnectionId, + @artifactContentHash, + @actionType, + @createdAt + ) + `); + const createdAt = this.now().toISOString(); + const save = this.db.transaction((inputRows: IngestProvenanceInsert[]) => { + for (const row of inputRows) { + insert.run({ + id: this.idFactory(), + connectionId: row.connectionId, + sourceKey: row.sourceKey, + syncId: row.syncId, + rawPath: row.rawPath, + rawContentHash: row.rawContentHash, + artifactKind: row.artifactKind, + artifactKey: row.artifactKey, + targetConnectionId: row.targetConnectionId ?? null, + artifactContentHash: row.artifactContentHash, + actionType: row.actionType, + createdAt, + }); + } + }); + save(rows); + } + + async findLatestHashesForCompletedSyncs(connectionId: string, sourceKey: string): Promise> { + const rows = this.db + .prepare( + ` + SELECT p.raw_path, p.raw_content_hash + FROM bundle_ingest_provenance p + INNER JOIN bundle_ingest_runs r + ON r.connection_id = p.connection_id + AND r.source_key = p.source_key + AND r.sync_id = p.sync_id + WHERE p.connection_id = ? + AND p.source_key = ? + AND r.status = 'completed' + ORDER BY r.completed_at DESC, r.rowid DESC, p.created_at DESC, p.rowid DESC + `, + ) + .all(connectionId, sourceKey) as Array<{ raw_path: string; raw_content_hash: string }>; + + const latest = new Map(); + for (const row of rows) { + if (!latest.has(row.raw_path)) { + latest.set(row.raw_path, row.raw_content_hash); + } + } + return latest; + } + + async findLatestArtifactsForRawPaths( + connectionId: string, + sourceKey: string, + rawPaths: string[], + ): Promise> { + if (rawPaths.length === 0) { + return new Map(); + } + const rows = this.db + .prepare( + ` + SELECT + p.sync_id, + p.raw_path, + p.raw_content_hash, + p.artifact_kind, + p.artifact_key, + p.target_connection_id, + p.artifact_content_hash, + p.action_type + FROM bundle_ingest_provenance p + INNER JOIN bundle_ingest_runs r + ON r.connection_id = p.connection_id + AND r.source_key = p.source_key + AND r.sync_id = p.sync_id + WHERE p.connection_id = ? + AND p.source_key = ? + AND p.raw_path IN (${placeholders(rawPaths)}) + AND r.status = 'completed' + ORDER BY r.completed_at DESC, r.rowid DESC, p.created_at DESC, p.rowid DESC + `, + ) + .all(connectionId, sourceKey, ...rawPaths) as ProvenanceRow[]; + + const selectedSyncByPath = new Map(); + const result = new Map(); + for (const row of rows) { + if (!selectedSyncByPath.has(row.raw_path)) { + selectedSyncByPath.set(row.raw_path, row.sync_id); + } + if (selectedSyncByPath.get(row.raw_path) !== row.sync_id) { + continue; + } + const group = result.get(row.raw_path) ?? []; + group.push(toPortProvenanceRow(row)); + result.set(row.raw_path, group); + } + return result; + } + + async findByJobId(jobId: string): Promise { + const row = this.db + .prepare( + ` + SELECT id, run_id, job_id, connection_id, source_key, body_json, created_at + FROM bundle_ingest_reports + WHERE job_id = ? + `, + ) + .get(jobId) as ReportRow | undefined; + return row ? parseReport(row) : null; + } + + async findReportByAnyId(id: string): Promise { + const row = this.db + .prepare( + ` + SELECT id, run_id, job_id, connection_id, source_key, body_json, created_at + FROM bundle_ingest_reports + WHERE id = ? + OR run_id = ? + OR job_id = ? + ORDER BY created_at DESC, rowid DESC + LIMIT 1 + `, + ) + .get(id, id, id) as ReportRow | undefined; + return row ? parseReport(row) : null; + } + + async findLatestReport(): Promise { + const row = this.db + .prepare( + ` + SELECT br.id, br.run_id, br.job_id, br.connection_id, br.source_key, br.body_json, br.created_at + FROM bundle_ingest_reports br + LEFT JOIN bundle_ingest_runs r + ON r.id = br.run_id + ORDER BY + COALESCE(r.completed_at, r.failed_at, r.started_at, br.created_at) DESC, + br.created_at DESC, + br.rowid DESC + LIMIT 1 + `, + ) + .get() as ReportRow | undefined; + return row ? parseReport(row) : null; + } + + async markSuperseded(jobId: string, supersededByJobId: string): Promise { + const report = await this.findByJobId(jobId); + if (!report) { + return; + } + const nextBody = { + ...report.body, + supersededBy: supersededByJobId, + }; + this.db + .prepare('UPDATE bundle_ingest_reports SET body_json = ? WHERE job_id = ?') + .run(JSON.stringify(nextBody), jobId); + } + + async listPins(connectionIds: string[]): Promise { + if (connectionIds.length === 0) { + return []; + } + const rows = this.db + .prepare( + ` + SELECT contested_key, canonical_artifact_key, pinned_at, pinned_by, reason + FROM bundle_ingest_canonical_pins + WHERE connection_id IN (${placeholders(connectionIds)}) + ORDER BY contested_key ASC + `, + ) + .all(...connectionIds) as Array<{ + contested_key: string; + canonical_artifact_key: string; + pinned_at: string; + pinned_by: string; + reason: string | null; + }>; + return rows.map((row) => ({ + contestedKey: row.contested_key, + canonicalArtifactKey: row.canonical_artifact_key, + pinnedAt: row.pinned_at, + pinnedBy: row.pinned_by, + reason: row.reason, + })); + } + + async replaceCanonicalPins(connectionId: string, pins: CanonicalPin[]): Promise { + const createdAt = this.now().toISOString(); + const replace = this.db.transaction(() => { + this.db.prepare('DELETE FROM bundle_ingest_canonical_pins WHERE connection_id = ?').run(connectionId); + const insert = this.db.prepare(` + INSERT INTO bundle_ingest_canonical_pins ( + connection_id, + contested_key, + canonical_artifact_key, + pinned_at, + pinned_by, + reason, + created_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?) + `); + for (const pin of pins) { + insert.run( + connectionId, + pin.contestedKey, + pin.canonicalArtifactKey, + pin.pinnedAt, + pin.pinnedBy, + pin.reason ?? null, + createdAt, + ); + } + }); + replace(); + } + + private async createRun(args: CreateIngestRunArgs): Promise { + const id = this.idFactory(); + const deletePrior = this.db.prepare('DELETE FROM bundle_ingest_runs WHERE job_id = ?'); + const insert = this.db.prepare(` + INSERT INTO bundle_ingest_runs ( + id, + job_id, + connection_id, + source_key, + sync_id, + trigger, + scope_fingerprint, + status, + started_at + ) + VALUES ( + @id, + @jobId, + @connectionId, + @sourceKey, + @syncId, + @trigger, + @scopeFingerprint, + 'running', + @startedAt + ) + `); + const replace = this.db.transaction((row: Record) => { + deletePrior.run(args.jobId); + insert.run(row); + }); + replace({ + id, + jobId: args.jobId, + connectionId: args.connectionId, + sourceKey: args.sourceKey, + syncId: args.syncId, + trigger: args.trigger, + scopeFingerprint: args.scopeFingerprint ?? null, + startedAt: this.now().toISOString(), + }); + return { id }; + } + + private async createReport(args: { + runId: string; + jobId: string; + connectionId: string; + sourceKey: string; + body: unknown; + }): Promise { + const id = this.idFactory(); + const createdAt = this.now().toISOString(); + const body = args.body as IngestReportBody; + this.db + .prepare( + ` + INSERT INTO bundle_ingest_reports ( + id, + run_id, + job_id, + connection_id, + source_key, + body_json, + created_at + ) + VALUES ( + @id, + @runId, + @jobId, + @connectionId, + @sourceKey, + @bodyJson, + @createdAt + ) + ON CONFLICT(job_id) DO UPDATE SET + run_id = excluded.run_id, + connection_id = excluded.connection_id, + source_key = excluded.source_key, + body_json = excluded.body_json, + created_at = excluded.created_at + `, + ) + .run({ + id, + runId: args.runId, + jobId: args.jobId, + connectionId: args.connectionId, + sourceKey: args.sourceKey, + bodyJson: JSON.stringify(body), + createdAt, + }); + return { + id, + runId: args.runId, + jobId: args.jobId, + connectionId: args.connectionId, + sourceKey: args.sourceKey, + body, + createdAt, + }; + } + + private markRun(id: string, status: RunStatus, diffSummary: IngestDiffSummary | null): void { + const timestamp = this.now().toISOString(); + this.db + .prepare( + ` + UPDATE bundle_ingest_runs + SET + status = @status, + diff_summary_json = @diffSummaryJson, + completed_at = CASE WHEN @status = 'completed' THEN @timestamp ELSE completed_at END, + failed_at = CASE WHEN @status = 'failed' THEN @timestamp ELSE failed_at END + WHERE id = @id + `, + ) + .run({ + id, + status, + diffSummaryJson: diffSummary ? JSON.stringify(diffSummary) : null, + timestamp, + }); + } +} diff --git a/packages/context/src/ingest/sqlite-local-ingest-store.test.ts b/packages/context/src/ingest/sqlite-local-ingest-store.test.ts new file mode 100644 index 00000000..f311bf31 --- /dev/null +++ b/packages/context/src/ingest/sqlite-local-ingest-store.test.ts @@ -0,0 +1,156 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { SqliteLocalIngestStore } from './sqlite-local-ingest-store.js'; +import type { LocalIngestRunRecord } from './local-stage-ingest.js'; + +function runRecord(overrides: Partial = {}): LocalIngestRunRecord { + return { + runId: 'local-run-1', + jobId: 'local-run-1', + status: 'done', + adapter: 'fake', + connectionId: 'warehouse', + sourceDir: '/tmp/source', + syncId: '2026-04-27-120000-local-run-1', + startedAt: '2026-04-27T12:00:00.000Z', + completedAt: '2026-04-27T12:00:01.000Z', + progress: 1, + done: true, + previousRunId: null, + diffSummary: { + added: 1, + modified: 0, + deleted: 0, + unchanged: 0, + }, + diffPaths: { + added: ['orders/orders.json'], + modified: [], + deleted: [], + unchanged: [], + }, + workUnitCount: 1, + rawFileCount: 1, + workUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + evictionDeletedRawPaths: [], + errors: [], + ...overrides, + }; +} + +describe('SqliteLocalIngestStore', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-sqlite-local-ingest-')); + dbPath = join(tempDir, '.klo', 'db.sqlite'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('persists and reads a local ingest run by id', () => { + const store = new SqliteLocalIngestStore({ dbPath }); + const record = runRecord(); + + store.saveCompletedRun({ + record, + rawContentHashes: { + 'orders/orders.json': 'hash-1', + }, + }); + + expect(store.findRunById('local-run-1')).toEqual(record); + expect(store.findRunById('missing-run')).toBeNull(); + }); + + it('returns the latest completed report for the same connection and adapter', () => { + const store = new SqliteLocalIngestStore({ dbPath }); + const first = runRecord({ + runId: 'local-run-1', + jobId: 'local-run-1', + completedAt: '2026-04-27T12:00:00.000Z', + }); + const second = runRecord({ + runId: 'local-run-2', + jobId: 'local-run-2', + syncId: '2026-04-27-120500-local-run-2', + completedAt: '2026-04-27T12:05:00.000Z', + previousRunId: 'local-run-1', + }); + const otherAdapter = runRecord({ + runId: 'metabase-run-1', + jobId: 'metabase-run-1', + adapter: 'metabase', + syncId: '2026-04-27-121000-metabase-run-1', + completedAt: '2026-04-27T12:10:00.000Z', + }); + + store.saveCompletedRun({ + record: first, + rawContentHashes: { + 'orders/orders.json': 'hash-1', + }, + }); + store.saveCompletedRun({ + record: second, + rawContentHashes: { + 'orders/orders.json': 'hash-2', + 'orders/payments.json': 'hash-3', + }, + }); + store.saveCompletedRun({ + record: otherAdapter, + rawContentHashes: { + 'cards/revenue.json': 'hash-4', + }, + }); + + expect(store.findLatestCompletedReport('warehouse', 'fake')).toMatchObject({ + runId: 'local-run-2', + previousRunId: 'local-run-1', + rawContentHashes: { + 'orders/orders.json': 'hash-2', + 'orders/payments.json': 'hash-3', + }, + }); + expect(store.findLatestCompletedReport('warehouse', 'fake', { excludeRunId: 'local-run-2' })).toMatchObject({ + runId: 'local-run-1', + rawContentHashes: { + 'orders/orders.json': 'hash-1', + }, + }); + expect(store.findLatestCompletedReport('warehouse', 'fake', { excludeRunId: 'local-run-1' })).toMatchObject({ + runId: 'local-run-2', + rawContentHashes: { + 'orders/orders.json': 'hash-2', + 'orders/payments.json': 'hash-3', + }, + }); + expect(store.findLatestCompletedReport('warehouse', 'metabase')).toMatchObject({ + runId: 'metabase-run-1', + rawContentHashes: { + 'cards/revenue.json': 'hash-4', + }, + }); + expect(store.findLatestCompletedReport('missing', 'fake')).toBeNull(); + }); + + it('ignores malformed run ids when reading status', () => { + const store = new SqliteLocalIngestStore({ dbPath }); + + expect(store.findRunById('../escape')).toBeNull(); + expect(store.findRunById('')).toBeNull(); + }); +}); diff --git a/packages/context/src/ingest/sqlite-local-ingest-store.ts b/packages/context/src/ingest/sqlite-local-ingest-store.ts new file mode 100644 index 00000000..2dd4bc13 --- /dev/null +++ b/packages/context/src/ingest/sqlite-local-ingest-store.ts @@ -0,0 +1,233 @@ +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import type { LocalIngestReport, LocalIngestRunRecord } from './local-stage-ingest.js'; + +export interface SqliteLocalIngestStoreOptions { + dbPath: string; +} + +export interface SaveCompletedLocalIngestRunInput { + record: LocalIngestRunRecord; + rawContentHashes: Record; +} + +interface FindLatestCompletedLocalIngestReportOptions { + excludeRunId?: string; +} + +interface JsonRow { + body_json: string; +} + +function isSafeRunId(runId: string): boolean { + return /^[a-zA-Z0-9][a-zA-Z0-9_.-]*$/.test(runId); +} + +function parseRunRecord(raw: string): LocalIngestRunRecord | null { + const parsed = JSON.parse(raw) as Partial; + if ( + typeof parsed.runId !== 'string' || + typeof parsed.jobId !== 'string' || + (parsed.status !== 'running' && parsed.status !== 'done' && parsed.status !== 'error') || + typeof parsed.adapter !== 'string' || + typeof parsed.connectionId !== 'string' || + typeof parsed.syncId !== 'string' + ) { + return null; + } + return parsed as LocalIngestRunRecord; +} + +function parseReport(raw: string): LocalIngestReport | null { + const parsed = JSON.parse(raw) as Partial; + if ( + typeof parsed.runId !== 'string' || + parsed.status !== 'done' || + typeof parsed.adapter !== 'string' || + typeof parsed.connectionId !== 'string' || + typeof parsed.completedAt !== 'string' || + typeof parsed.rawContentHashes !== 'object' || + parsed.rawContentHashes === null || + Array.isArray(parsed.rawContentHashes) + ) { + return null; + } + return parsed as LocalIngestReport; +} + +export class SqliteLocalIngestStore { + private readonly db: Database.Database; + + constructor(options: SqliteLocalIngestStoreOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.db.exec(` + CREATE TABLE IF NOT EXISTS local_ingest_runs ( + run_id TEXT PRIMARY KEY, + job_id TEXT NOT NULL, + status TEXT NOT NULL, + adapter TEXT NOT NULL, + connection_id TEXT NOT NULL, + sync_id TEXT NOT NULL, + started_at TEXT NOT NULL, + completed_at TEXT NOT NULL, + body_json TEXT NOT NULL + ); + + CREATE INDEX IF NOT EXISTS local_ingest_runs_connection_adapter_idx + ON local_ingest_runs (connection_id, adapter, completed_at DESC); + + CREATE TABLE IF NOT EXISTS local_ingest_reports ( + run_id TEXT PRIMARY KEY REFERENCES local_ingest_runs(run_id) ON DELETE CASCADE, + adapter TEXT NOT NULL, + connection_id TEXT NOT NULL, + status TEXT NOT NULL, + completed_at TEXT NOT NULL, + raw_content_hashes_json TEXT NOT NULL, + body_json TEXT NOT NULL + ); + + CREATE INDEX IF NOT EXISTS local_ingest_reports_latest_completed_idx + ON local_ingest_reports (connection_id, adapter, status, completed_at DESC, run_id DESC); + `); + } + + saveCompletedRun(input: SaveCompletedLocalIngestRunInput): void { + const report: LocalIngestReport = { + ...input.record, + rawContentHashes: input.rawContentHashes, + }; + const runBody = JSON.stringify(input.record); + const reportBody = JSON.stringify(report); + const rawContentHashesJson = JSON.stringify(input.rawContentHashes); + + const save = this.db.transaction(() => { + this.db + .prepare( + ` + INSERT INTO local_ingest_runs ( + run_id, + job_id, + status, + adapter, + connection_id, + sync_id, + started_at, + completed_at, + body_json + ) + VALUES ( + @runId, + @jobId, + @status, + @adapter, + @connectionId, + @syncId, + @startedAt, + @completedAt, + @bodyJson + ) + ON CONFLICT(run_id) DO UPDATE SET + job_id = excluded.job_id, + status = excluded.status, + adapter = excluded.adapter, + connection_id = excluded.connection_id, + sync_id = excluded.sync_id, + started_at = excluded.started_at, + completed_at = excluded.completed_at, + body_json = excluded.body_json + `, + ) + .run({ + runId: input.record.runId, + jobId: input.record.jobId, + status: input.record.status, + adapter: input.record.adapter, + connectionId: input.record.connectionId, + syncId: input.record.syncId, + startedAt: input.record.startedAt, + completedAt: input.record.completedAt, + bodyJson: runBody, + }); + + this.db + .prepare( + ` + INSERT INTO local_ingest_reports ( + run_id, + adapter, + connection_id, + status, + completed_at, + raw_content_hashes_json, + body_json + ) + VALUES ( + @runId, + @adapter, + @connectionId, + @status, + @completedAt, + @rawContentHashesJson, + @bodyJson + ) + ON CONFLICT(run_id) DO UPDATE SET + adapter = excluded.adapter, + connection_id = excluded.connection_id, + status = excluded.status, + completed_at = excluded.completed_at, + raw_content_hashes_json = excluded.raw_content_hashes_json, + body_json = excluded.body_json + `, + ) + .run({ + runId: report.runId, + adapter: report.adapter, + connectionId: report.connectionId, + status: report.status, + completedAt: report.completedAt, + rawContentHashesJson, + bodyJson: reportBody, + }); + }); + + save(); + } + + findRunById(runId: string): LocalIngestRunRecord | null { + if (!isSafeRunId(runId)) { + return null; + } + const row = this.db + .prepare('SELECT body_json FROM local_ingest_runs WHERE run_id = ?') + .get(runId) as JsonRow | undefined; + return row ? parseRunRecord(row.body_json) : null; + } + + findLatestCompletedReport( + connectionId: string, + adapter: string, + options: FindLatestCompletedLocalIngestReportOptions = {}, + ): LocalIngestReport | null { + const excludeCurrentRunClause = options.excludeRunId ? 'AND run_id <> ?' : ''; + const params = options.excludeRunId ? [connectionId, adapter, options.excludeRunId] : [connectionId, adapter]; + const row = this.db + .prepare( + ` + SELECT body_json + FROM local_ingest_reports + WHERE connection_id = ? + AND adapter = ? + AND status = 'done' + ${excludeCurrentRunClause} + ORDER BY completed_at DESC, run_id DESC + LIMIT 1 + `, + ) + .get(...params) as JsonRow | undefined; + return row ? parseReport(row.body_json) : null; + } +} diff --git a/packages/context/src/ingest/stages/build-reconcile-context.context-candidates.test.ts b/packages/context/src/ingest/stages/build-reconcile-context.context-candidates.test.ts new file mode 100644 index 00000000..22427ddd --- /dev/null +++ b/packages/context/src/ingest/stages/build-reconcile-context.context-candidates.test.ts @@ -0,0 +1,124 @@ +import { describe, expect, it } from 'vitest'; +import { buildReconcileUserPrompt } from './build-reconcile-context.js'; + +const emptyStageIndex = { + jobId: 'job-1', + connectionId: 'c1', + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], +}; + +describe('buildReconcileUserPrompt', () => { + it('includes context knowledge candidates for curator reconciliation', () => { + const prompt = buildReconcileUserPrompt(emptyStageIndex, undefined, { + summary: { total: 1, pending: 1, promoted: 0, merged: 0, rejected: 0, conflict: 0 }, + items: [ + { + candidateKey: 'revenue-definition', + topic: 'Revenue', + assertion: 'Booked revenue excludes refunds.', + rationale: 'Finance policy is authoritative.', + actionHint: 'create', + status: 'pending', + promotionScore: 10, + suggestedPageKey: 'revenue-definition', + evidenceRefs: [{ stableCitationKey: 'notion:page-1:policy:abc' }], + }, + ], + }); + + expect(prompt).toContain('# Context Knowledge Candidates'); + expect(prompt).toContain('candidateKey: revenue-definition'); + expect(prompt).toContain('promotionScore: 10'); + }); + + it('caps serialized candidate evidence refs in the prompt', () => { + const prompt = buildReconcileUserPrompt(emptyStageIndex, undefined, { + summary: { total: 1, pending: 1, promoted: 0, merged: 0, rejected: 0, conflict: 0 }, + items: [ + { + candidateKey: 'large-evidence', + topic: 'Large Evidence', + assertion: 'Large evidence refs are summarized.', + rationale: 'Avoid reconcile prompt bloat.', + actionHint: 'create', + status: 'pending', + promotionScore: 10, + suggestedPageKey: 'large-evidence', + evidenceRefs: Array.from({ length: 25 }, (_, index) => ({ + stableCitationKey: `notion:page-${index}:policy`, + rawPath: `pages/page-${index}/page.md`, + largeMetadata: 'x'.repeat(500), + })), + }, + ], + }); + + expect(prompt).toContain('notion:page-0:policy'); + expect(prompt).toContain('15 more evidence refs omitted'); + expect(prompt).not.toContain('notion:page-24:policy'); + expect(prompt).not.toContain('largeMetadata'); + }); + + it('includes source reconciliation notes after context candidates', () => { + const prompt = buildReconcileUserPrompt( + emptyStageIndex, + undefined, + { + summary: { total: 0, pending: 0, promoted: 0, merged: 0, rejected: 0, conflict: 0 }, + items: [], + }, + ['Notion maxKnowledgeCreatesPerRun=5', 'Notion maxKnowledgeUpdatesPerRun=20'], + ); + + expect(prompt).toContain('# Context Knowledge Candidates'); + expect(prompt).toContain('# Source Reconciliation Notes'); + expect(prompt.indexOf('# Source Reconciliation Notes')).toBeGreaterThan( + prompt.indexOf('# Context Knowledge Candidates'), + ); + expect(prompt).toContain('- Notion maxKnowledgeCreatesPerRun=5'); + expect(prompt).toContain('- Notion maxKnowledgeUpdatesPerRun=20'); + }); + + it('includes curator pass state when supplied', () => { + const prompt = buildReconcileUserPrompt( + emptyStageIndex, + undefined, + { + summary: { total: 2, pending: 2, promoted: 0, merged: 0, rejected: 0, conflict: 0 }, + items: [ + { + candidateKey: 'revenue-definition', + topic: 'Revenue', + assertion: 'Booked revenue excludes refunds.', + rationale: 'Finance policy is authoritative.', + actionHint: 'create', + status: 'pending', + promotionScore: 10, + suggestedPageKey: 'revenue-definition', + evidenceRefs: [{ stableCitationKey: 'notion:page-1:policy:abc' }], + }, + ], + }, + ['Notion maxKnowledgeCreatesPerRun=5'], + { + passNumber: 2, + maxPasses: 5, + budgetRemaining: { creates: 3, updates: 18 }, + previouslyPromotedInRun: [ + { pageKey: 'revenue-policy', action: 'created', summary: 'Revenue policy' }, + { pageKey: 'support-handoff', action: 'updated', summary: 'Support handoff owner' }, + ], + }, + ); + + expect(prompt).toContain('# Curator Pass State'); + expect(prompt).toContain('pass: 2 of 5'); + expect(prompt).toContain('budgetRemaining: creates=3 updates=18'); + expect(prompt).toContain('- revenue-policy (created): Revenue policy'); + expect(prompt).toContain('- support-handoff (updated): Support handoff owner'); + expect(prompt.indexOf('# Context Knowledge Candidates')).toBeGreaterThan(prompt.indexOf('# Curator Pass State')); + }); +}); diff --git a/packages/context/src/ingest/stages/build-reconcile-context.test.ts b/packages/context/src/ingest/stages/build-reconcile-context.test.ts new file mode 100644 index 00000000..fb919109 --- /dev/null +++ b/packages/context/src/ingest/stages/build-reconcile-context.test.ts @@ -0,0 +1,83 @@ +import { describe, expect, it, vi } from 'vitest'; +import { buildReconcileSystemPrompt, buildReconcileToolSet } from './build-reconcile-context.js'; + +describe('buildReconcileSystemPrompt', () => { + it('appends canonical pins when relevant pins are supplied', () => { + const prompt = buildReconcileSystemPrompt({ + baseFraming: 'reconcile', + skillsPrompt: 'ingest_triage', + syncId: 'sync-1', + sourceKey: 'lookml', + canonicalPins: [ + { + contestedKey: 'churn_risk_score', + canonicalArtifactKey: 'billing.churn_risk_score', + pinnedAt: '2026-04-27T12:00:00.000Z', + pinnedBy: 'user-1', + reason: 'billing owns the contractual definition', + }, + ], + }); + + expect(prompt).toContain(''); + expect(prompt).toContain('contestedKey: churn_risk_score'); + expect(prompt).toContain('canonicalArtifactKey: billing.churn_risk_score'); + expect(prompt).toContain(''); + }); + + it('omits canonical_pins when none are relevant', () => { + const prompt = buildReconcileSystemPrompt({ + baseFraming: 'reconcile', + skillsPrompt: '', + syncId: 'sync-1', + sourceKey: 'lookml', + canonicalPins: [], + }); + + expect(prompt).not.toContain(''); + expect(prompt).toContain('syncId: sync-1'); + }); +}); + +describe('buildReconcileToolSet', () => { + it('includes emit_unmapped_fallback with the reconciliation tools', () => { + const toolSet = buildReconcileToolSet({ + loadSkillTool: { load_skill: { description: 'load', inputSchema: {} as any, execute: vi.fn() } } as any, + stageListTool: { stage_list: { description: 'stage list', inputSchema: {} as any, execute: vi.fn() } } as any, + stageDiffTool: { stage_diff: { description: 'stage diff', inputSchema: {} as any, execute: vi.fn() } } as any, + evictionListTool: { + eviction_list: { description: 'eviction list', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitConflictResolutionTool: { + emit_conflict_resolution: { description: 'conflict', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitEvictionDecisionTool: { + emit_eviction_decision: { description: 'eviction', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitArtifactResolutionTool: { + emit_artifact_resolution: { description: 'resolution', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitUnmappedFallbackTool: { + emit_unmapped_fallback: { description: 'fallback', inputSchema: {} as any, execute: vi.fn() }, + } as any, + readRawSpanTool: { read_raw_span: { description: 'raw span', inputSchema: {} as any, execute: vi.fn() } } as any, + toolsetTools: { sl_write_source: {} as any, wiki_write: {} as any }, + }); + + expect(Object.keys(toolSet).sort()).toEqual( + [ + 'emit_conflict_resolution', + 'emit_eviction_decision', + 'emit_artifact_resolution', + 'emit_unmapped_fallback', + 'eviction_list', + 'load_skill', + 'read_raw_span', + 'sl_write_source', + 'stage_diff', + 'stage_list', + 'wiki_write', + ].sort(), + ); + }); +}); diff --git a/packages/context/src/ingest/stages/build-reconcile-context.ts b/packages/context/src/ingest/stages/build-reconcile-context.ts new file mode 100644 index 00000000..17867a55 --- /dev/null +++ b/packages/context/src/ingest/stages/build-reconcile-context.ts @@ -0,0 +1,192 @@ +import type { Tool, ToolSet } from 'ai'; +import { buildCanonicalPinsPromptBlock, type CanonicalPin } from '../canonical-pins.js'; +import type { EvictionUnit } from '../types.js'; +import type { StageIndex } from './stage-index.types.js'; + +export function buildReconcileSystemPrompt(params: { + baseFraming: string; + skillsPrompt: string; + syncId: string; + sourceKey: string; + canonicalPins: CanonicalPin[]; +}): string { + return [ + params.baseFraming.trimEnd(), + params.skillsPrompt.trimEnd(), + buildCanonicalPinsPromptBlock(params.canonicalPins), + `\n\nsyncId: ${params.syncId}\nsource: ${params.sourceKey}\n`, + ] + .filter(Boolean) + .join('\n'); +} + +export interface ReconcileCandidateSummary { + total: number; + pending: number; + promoted: number; + merged: number; + rejected: number; + conflict: number; +} + +export interface ReconcileCandidateForPrompt { + candidateKey: string; + topic: string; + assertion: string; + rationale: string; + actionHint: string; + status: string; + promotionScore: number; + suggestedPageKey: string | null; + evidenceRefs: unknown; +} + +export interface WikiPageRef { + pageKey: string; + action: 'created' | 'updated'; + summary: string; +} + +export interface ReconcilePromptRunState { + passNumber: number; + maxPasses: number; + budgetRemaining: { + creates: number; + updates: number; + }; + previouslyPromotedInRun: WikiPageRef[]; +} + +const MAX_RECONCILE_EVIDENCE_REFS = 10; + +function evidenceRefsSummary(evidenceRefs: unknown): string { + if (!Array.isArray(evidenceRefs)) { + return JSON.stringify(evidenceRefs); + } + + const visible = evidenceRefs.slice(0, MAX_RECONCILE_EVIDENCE_REFS).map((ref) => { + if (!ref || typeof ref !== 'object') { + return ref; + } + const typed = ref as Record; + return { + stableCitationKey: typed.stableCitationKey, + rawPath: typed.rawPath, + title: typed.title, + path: typed.path, + syncId: typed.syncId, + }; + }); + const omitted = evidenceRefs.length - visible.length; + const suffix = omitted > 0 ? ` (${omitted} more evidence refs omitted; use context_evidence_read for details)` : ''; + return `${JSON.stringify(visible)}${suffix}`; +} + +function curatorPassStateSummary(runState?: ReconcilePromptRunState): string { + if (!runState) { + return ''; + } + + const previous = + runState.previouslyPromotedInRun.length === 0 + ? '(none)' + : runState.previouslyPromotedInRun + .map((page) => `- ${page.pageKey} (${page.action}): ${page.summary}`) + .join('\n'); + + return [ + '# Curator Pass State', + `pass: ${runState.passNumber} of ${runState.maxPasses}`, + `budgetRemaining: creates=${runState.budgetRemaining.creates} updates=${runState.budgetRemaining.updates}`, + 'previouslyPromotedInRun:', + previous, + '', + ].join('\n'); +} + +export function buildReconcileUserPrompt( + stageIndex: StageIndex, + ev: EvictionUnit | undefined, + candidates?: { summary: ReconcileCandidateSummary; items: ReconcileCandidateForPrompt[] }, + sourceNotes: string[] = [], + runState?: ReconcilePromptRunState, +): string { + const wuLines = + stageIndex.workUnits.length === 0 + ? '(no WorkUnits wrote anything)' + : stageIndex.workUnits + .map((wu) => { + const actions = + wu.actions.length === 0 + ? ' actions: (none)' + : wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n'); + return `- unitKey: ${wu.unitKey} (status=${wu.status})\n${actions}`; + }) + .join('\n'); + const evLines = + !ev || ev.deletedRawPaths.length === 0 ? '(no deletions)' : ev.deletedRawPaths.map((p) => `- ${p}`).join('\n'); + const candidateLines = + !candidates || candidates.items.length === 0 + ? '(no context knowledge candidates)' + : [ + `summary: total=${candidates.summary.total} pending=${candidates.summary.pending} promoted=${candidates.summary.promoted} merged=${candidates.summary.merged} rejected=${candidates.summary.rejected} conflict=${candidates.summary.conflict}`, + ...candidates.items.map( + (candidate) => + `- candidateKey: ${candidate.candidateKey}\n` + + ` topic: ${candidate.topic}\n` + + ` status: ${candidate.status}\n` + + ` actionHint: ${candidate.actionHint}\n` + + ` promotionScore: ${candidate.promotionScore}\n` + + ` suggestedPageKey: ${candidate.suggestedPageKey ?? '(none)'}\n` + + ` assertion: ${candidate.assertion}\n` + + ` rationale: ${candidate.rationale}\n` + + ` evidenceRefs: ${evidenceRefsSummary(candidate.evidenceRefs)}`, + ), + ].join('\n'); + const sourceNoteLines = + sourceNotes.length === 0 + ? '(no source-specific reconciliation notes)' + : sourceNotes.map((note) => `- ${note}`).join('\n'); + return [ + '# Stage Index', + wuLines, + '', + '# Eviction Set (deleted raw paths — look up artifacts via eviction_list)', + evLines, + '', + curatorPassStateSummary(runState), + '# Context Knowledge Candidates', + candidateLines, + '', + '# Source Reconciliation Notes', + sourceNoteLines, + ].join('\n'); +} + +export interface ReconcileToolSetInput { + loadSkillTool: Record; + stageListTool: Record; + stageDiffTool: Record; + evictionListTool: Record; + emitConflictResolutionTool: Record; + emitEvictionDecisionTool: Record; + emitArtifactResolutionTool: Record; + emitUnmappedFallbackTool: Record; + readRawSpanTool: Record; + toolsetTools: ToolSet; +} + +export function buildReconcileToolSet(input: ReconcileToolSetInput): ToolSet { + return { + ...input.toolsetTools, + ...input.loadSkillTool, + ...input.stageListTool, + ...input.stageDiffTool, + ...input.evictionListTool, + ...input.emitConflictResolutionTool, + ...input.emitEvictionDecisionTool, + ...input.emitArtifactResolutionTool, + ...input.emitUnmappedFallbackTool, + ...input.readRawSpanTool, + }; +} diff --git a/packages/context/src/ingest/stages/build-wu-context.test.ts b/packages/context/src/ingest/stages/build-wu-context.test.ts new file mode 100644 index 00000000..13a3ff8f --- /dev/null +++ b/packages/context/src/ingest/stages/build-wu-context.test.ts @@ -0,0 +1,188 @@ +import { describe, expect, it, vi } from 'vitest'; +import { buildWuSystemPrompt, buildWuToolSet, buildWuUserPrompt } from './build-wu-context.js'; + +describe('buildWuUserPrompt', () => { + it('includes rawFiles, dependencyPaths, peerFileIndex, and priorProvenance when present', () => { + const prompt = buildWuUserPrompt({ + wu: { unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: ['p.yml'], dependencyPaths: ['dep.yml'] }, + wikiIndex: '(empty)', + slIndex: '(empty)', + priorProvenance: new Map([ + [ + 'a.yml', + [{ artifact_kind: 'sl', artifact_key: 'src_a', action_type: 'source_created', sync_id: 'prev' } as any], + ], + ]), + }); + expect(prompt).toContain('## WorkUnit: u1'); + expect(prompt).toContain('### rawFiles\n- a.yml'); + expect(prompt).toContain('### dependencyPaths\n- dep.yml'); + expect(prompt).toContain('### peerFileIndex\n- p.yml'); + expect(prompt).toContain('a.yml'); + expect(prompt).toContain('src_a'); + }); + + it('omits priorProvenance block when every rawFile is new', () => { + const prompt = buildWuUserPrompt({ + wu: { unitKey: 'u1', rawFiles: ['new.yml'], peerFileIndex: [], dependencyPaths: [] }, + wikiIndex: '', + slIndex: '', + priorProvenance: new Map([['new.yml', []]]), + }); + expect(prompt).not.toContain('priorProvenance'); + }); + + it('caps very large peer file indexes in the prompt', () => { + const prompt = buildWuUserPrompt({ + wu: { + unitKey: 'u1', + rawFiles: ['current.yml'], + peerFileIndex: Array.from({ length: 140 }, (_, i) => `peer-${i + 1}.yml`), + dependencyPaths: [], + }, + wikiIndex: '', + slIndex: '', + priorProvenance: new Map(), + }); + + expect(prompt).toContain('- peer-100.yml'); + expect(prompt).not.toContain('- peer-101.yml'); + expect(prompt).toContain('40 more peer files omitted'); + }); +}); + +describe('buildWuToolSet', () => { + it('includes load_skill, emit_unmapped_fallback, read_raw_file, read_raw_span, and provided toolset tools', () => { + const toolSet = buildWuToolSet({ + stagedDir: '/tmp/staged', + wu: { unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: ['dep.yml'] }, + loadSkillTool: { load_skill: { description: 'load', inputSchema: {} as any, execute: vi.fn() } } as any, + emitUnmappedFallbackTool: { + emit_unmapped_fallback: { description: 'fallback', inputSchema: {} as any, execute: vi.fn() }, + } as any, + toolsetTools: { wiki_search: {} as any, sl_write_source: {} as any }, + }); + expect(Object.keys(toolSet).sort()).toEqual( + [ + 'emit_unmapped_fallback', + 'load_skill', + 'read_raw_file', + 'read_raw_span', + 'sl_write_source', + 'wiki_search', + ].sort(), + ); + }); + + it('includes looker_query_to_sl only for Looker WorkUnits', () => { + const toolSet = buildWuToolSet({ + sourceKey: 'looker', + stagedDir: '/tmp/staged', + wu: { unitKey: 'looker-look-20', rawFiles: ['looks/20.json'], peerFileIndex: [], dependencyPaths: [] }, + loadSkillTool: { load_skill: { description: 'load', inputSchema: {} as any, execute: vi.fn() } } as any, + emitUnmappedFallbackTool: { + emit_unmapped_fallback: { description: 'fallback', inputSchema: {} as any, execute: vi.fn() }, + } as any, + toolsetTools: { wiki_search: {} as any, sl_write_source: {} as any }, + }); + + expect(Object.keys(toolSet).sort()).toEqual( + [ + 'emit_unmapped_fallback', + 'load_skill', + 'looker_query_to_sl', + 'read_raw_file', + 'read_raw_span', + 'sl_write_source', + 'wiki_search', + ].sort(), + ); + }); + + it('does not expose looker_query_to_sl to non-Looker WorkUnits', () => { + const toolSet = buildWuToolSet({ + sourceKey: 'metabase', + stagedDir: '/tmp/staged', + wu: { unitKey: 'metabase-col-1', rawFiles: ['cards/1.json'], peerFileIndex: [], dependencyPaths: [] }, + loadSkillTool: { load_skill: { description: 'load', inputSchema: {} as any, execute: vi.fn() } } as any, + emitUnmappedFallbackTool: { + emit_unmapped_fallback: { description: 'fallback', inputSchema: {} as any, execute: vi.fn() }, + } as any, + toolsetTools: { wiki_search: {} as any, sl_write_source: {} as any }, + }); + + expect(Object.keys(toolSet)).not.toContain('looker_query_to_sl'); + }); + + it('removes write/edit SL tools for SL-disallowed WorkUnits', () => { + const toolSet = buildWuToolSet({ + sourceKey: 'lookml', + stagedDir: '/tmp/staged', + wu: { + unitKey: 'lookml-b2b', + rawFiles: ['b2b.model.lkml'], + peerFileIndex: [], + dependencyPaths: [], + slDisallowed: true, + slDisallowedReason: 'lookml_connection_mismatch', + }, + loadSkillTool: { load_skill: { description: 'load', inputSchema: {} as any, execute: vi.fn() } } as any, + emitUnmappedFallbackTool: { + emit_unmapped_fallback: { description: 'fallback', inputSchema: {} as any, execute: vi.fn() }, + } as any, + toolsetTools: { + sl_write_source: {} as any, + sl_edit_source: {} as any, + sl_read_source: {} as any, + wiki_search: {} as any, + }, + }); + + expect(Object.keys(toolSet)).not.toContain('sl_write_source'); + expect(Object.keys(toolSet)).not.toContain('sl_edit_source'); + expect(Object.keys(toolSet)).toContain('sl_read_source'); + expect(Object.keys(toolSet)).toContain('wiki_search'); + }); +}); + +describe('buildWuSystemPrompt', () => { + it('emits a context block with syncId and source, but NOT rawDirInWorktree', () => { + const prompt = buildWuSystemPrompt({ + baseFraming: 'BASE', + skillsPrompt: 'SKILLS', + syncId: 'sync-abc', + sourceKey: 'metabase', + canonicalPins: [], + }); + expect(prompt).toContain('BASE'); + expect(prompt).toContain('SKILLS'); + expect(prompt).toContain(''); + expect(prompt).toContain('syncId: sync-abc'); + expect(prompt).toContain('source: metabase'); + expect(prompt).not.toMatch(/rawDirInWorktree/i); + expect(prompt).not.toContain(''); + }); + + it('appends canonical pins before the WorkUnit context block', () => { + const prompt = buildWuSystemPrompt({ + baseFraming: 'work unit', + skillsPrompt: 'ingest_triage', + syncId: 'sync-abc', + sourceKey: 'metabase', + canonicalPins: [ + { + contestedKey: 'gross_revenue', + canonicalArtifactKey: 'finance.gross_revenue', + pinnedAt: '2026-04-27T12:00:00.000Z', + pinnedBy: 'user-1', + reason: 'finance owns revenue definitions', + }, + ], + }); + + expect(prompt).toContain(''); + expect(prompt).toContain('contestedKey: gross_revenue'); + expect(prompt).toContain('canonicalArtifactKey: finance.gross_revenue'); + expect(prompt.indexOf('')).toBeLessThan(prompt.indexOf('')); + }); +}); diff --git a/packages/context/src/ingest/stages/build-wu-context.ts b/packages/context/src/ingest/stages/build-wu-context.ts new file mode 100644 index 00000000..6ba26fd7 --- /dev/null +++ b/packages/context/src/ingest/stages/build-wu-context.ts @@ -0,0 +1,114 @@ +import type { Tool, ToolSet } from 'ai'; +import { buildCanonicalPinsPromptBlock, type CanonicalPin } from '../canonical-pins.js'; +import { createLookerQueryToSlTool } from '../adapters/looker/tools/looker-query-to-sl.tool.js'; +import type { IngestProvenanceRow } from '../ports.js'; +import { createReadRawFileTool } from '../tools/read-raw-file.tool.js'; +import { createReadRawSpanTool } from '../tools/read-raw-span.tool.js'; +import type { WorkUnit } from '../types.js'; + +const PEER_FILE_INDEX_PROMPT_LIMIT = 100; + +export interface BuildWuPromptInput { + wu: WorkUnit; + wikiIndex: string; + slIndex: string; + priorProvenance: Map; +} + +export function buildWuSystemPrompt(params: { + baseFraming: string; + skillsPrompt: string; + syncId: string; + sourceKey: string; + canonicalPins?: CanonicalPin[]; +}): string { + const parts = [ + params.baseFraming.trimEnd(), + params.skillsPrompt.trimEnd(), + buildCanonicalPinsPromptBlock(params.canonicalPins ?? []), + `\n\nsyncId: ${params.syncId}\nsource: ${params.sourceKey}\n`, + ]; + return parts.filter(Boolean).join('\n'); +} + +export function buildWuUserPrompt(input: BuildWuPromptInput): string { + const { wu, wikiIndex, slIndex, priorProvenance } = input; + const hasPrior = [...priorProvenance.values()].some((rows) => rows.length > 0); + const priorBlock = hasPrior + ? [ + '### priorProvenance', + ...[...priorProvenance.entries()] + .filter(([, rows]) => rows.length > 0) + .map(([path, rows]) => { + const artifacts = rows + .map((r) => ` - kind: ${r.artifact_kind} key: ${r.artifact_key} action: ${r.action_type}`) + .join('\n'); + return `- raw_path: ${path}\n prior_sync_id: ${rows[0].sync_id}\n artifacts:\n${artifacts}`; + }), + ].join('\n') + : ''; + const sections: string[] = []; + if (wikiIndex) { + sections.push(`# Wiki Index\n\n${wikiIndex}`); + } + if (slIndex) { + sections.push(`# Semantic Layer Sources\n\n${slIndex}`); + } + sections.push('---'); + sections.push(`## WorkUnit: ${wu.unitKey}`); + sections.push(`### rawFiles\n${wu.rawFiles.map((p) => `- ${p}`).join('\n')}`); + if (wu.dependencyPaths.length > 0) { + sections.push(`### dependencyPaths\n${wu.dependencyPaths.map((p) => `- ${p}`).join('\n')}`); + } + if (wu.peerFileIndex.length > 0) { + const visiblePeerFiles = wu.peerFileIndex.slice(0, PEER_FILE_INDEX_PROMPT_LIMIT); + const omittedCount = wu.peerFileIndex.length - visiblePeerFiles.length; + const peerLines = visiblePeerFiles.map((p) => `- ${p}`); + if (omittedCount > 0) { + peerLines.push(`- (${omittedCount} more peer files omitted)`); + } + sections.push(`### peerFileIndex\n${peerLines.join('\n')}`); + } + if (priorBlock) { + sections.push(priorBlock); + } + if (wu.notes) { + sections.push(`### notes\n${wu.notes}`); + } + return sections.join('\n\n'); +} + +export interface BuildWuToolSetInput { + sourceKey?: string; + stagedDir: string; + wu: WorkUnit; + loadSkillTool: Record; + emitUnmappedFallbackTool: Record; + toolsetTools: ToolSet; +} + +function withoutWriteSlTools(toolset: ToolSet, wu: WorkUnit): ToolSet { + if (!wu.slDisallowed) { + return toolset; + } + const next = { ...toolset }; + delete next.sl_write_source; + delete next.sl_edit_source; + return next; +} + +export function buildWuToolSet(input: BuildWuToolSetInput): ToolSet { + const allowedPaths = new Set([...input.wu.rawFiles, ...input.wu.dependencyPaths]); + const lookerTools: ToolSet = input.sourceKey === 'looker' ? { looker_query_to_sl: createLookerQueryToSlTool() } : {}; + return withoutWriteSlTools( + { + ...input.toolsetTools, + ...lookerTools, + ...input.loadSkillTool, + ...input.emitUnmappedFallbackTool, + read_raw_file: createReadRawFileTool({ stagedDir: input.stagedDir, allowedPaths }), + read_raw_span: createReadRawSpanTool({ stagedDir: input.stagedDir, allowedPaths }), + }, + input.wu, + ); +} diff --git a/packages/context/src/ingest/stages/stage-1-stage-raw-files.test.ts b/packages/context/src/ingest/stages/stage-1-stage-raw-files.test.ts new file mode 100644 index 00000000..3cd5cde5 --- /dev/null +++ b/packages/context/src/ingest/stages/stage-1-stage-raw-files.test.ts @@ -0,0 +1,59 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { stageRawFilesStage1 } from './stage-1-stage-raw-files.js'; + +describe('Stage 1 — stageRawFiles', () => { + let stagedDir: string; + let workdir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'stage1-src-')); + workdir = await mkdtemp(join(tmpdir(), 'stage1-wt-')); + await mkdir(join(stagedDir, 'views'), { recursive: true }); + await writeFile(join(stagedDir, 'views', 'a.yml'), 'alpha', 'utf-8'); + await writeFile(join(stagedDir, 'b.yml'), 'beta', 'utf-8'); + }); + + afterEach(async () => { + await rm(stagedDir, { recursive: true, force: true }); + await rm(workdir, { recursive: true, force: true }); + }); + + it('copies all files under raw-sources/// and returns their hash map', async () => { + const result = await stageRawFilesStage1({ + stagedDir, + worktreeRoot: workdir, + connectionId: 'c1', + sourceKey: 'fake', + syncId: 's1', + }); + const copied = await readFile(join(workdir, 'raw-sources/c1/fake/s1/views/a.yml'), 'utf-8'); + expect(copied).toBe('alpha'); + expect(result.currentHashes.get('views/a.yml')).toMatch(/^[0-9a-f]{64}$/); + expect(result.currentHashes.get('b.yml')).toMatch(/^[0-9a-f]{64}$/); + expect(result.rawDirInWorktree).toBe('raw-sources/c1/fake/s1'); + }); + + it('different content produces different hashes', async () => { + const r1 = await stageRawFilesStage1({ + stagedDir, + worktreeRoot: workdir, + connectionId: 'c1', + sourceKey: 'fake', + syncId: 's1', + }); + const other = await mkdtemp(join(tmpdir(), 'stage1-other-')); + await writeFile(join(other, 'b.yml'), 'bravo', 'utf-8'); + const r2 = await stageRawFilesStage1({ + stagedDir: other, + worktreeRoot: workdir, + connectionId: 'c1', + sourceKey: 'fake', + syncId: 's2', + }); + expect(r1.currentHashes.get('b.yml')).not.toBe(r2.currentHashes.get('b.yml')); + await rm(other, { recursive: true, force: true }); + }); +}); diff --git a/packages/context/src/ingest/stages/stage-1-stage-raw-files.ts b/packages/context/src/ingest/stages/stage-1-stage-raw-files.ts new file mode 100644 index 00000000..26238725 --- /dev/null +++ b/packages/context/src/ingest/stages/stage-1-stage-raw-files.ts @@ -0,0 +1,39 @@ +import { createHash } from 'node:crypto'; +import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'; +import { join, relative } from 'node:path'; +import { rawSourcesDirForSync } from '../raw-sources-paths.js'; + +interface StageRawFilesParams { + stagedDir: string; + worktreeRoot: string; + connectionId: string; + sourceKey: string; + syncId: string; +} + +interface StageRawFilesResult { + currentHashes: Map; + rawDirInWorktree: string; +} + +export async function stageRawFilesStage1(params: StageRawFilesParams): Promise { + const rawDirRel = rawSourcesDirForSync(params.connectionId, params.sourceKey, params.syncId); + const targetRoot = join(params.worktreeRoot, rawDirRel); + const currentHashes = new Map(); + + const entries = await readdir(params.stagedDir, { withFileTypes: true, recursive: true }); + for (const entry of entries) { + if (!entry.isFile()) { + continue; + } + const absSrc = join(entry.parentPath, entry.name); + const rel = relative(params.stagedDir, absSrc); + const body = await readFile(absSrc); + const hash = createHash('sha256').update(body).digest('hex'); + currentHashes.set(rel, hash); + const dest = join(targetRoot, rel); + await mkdir(join(dest, '..'), { recursive: true }); + await writeFile(dest, body); + } + return { currentHashes, rawDirInWorktree: rawDirRel }; +} diff --git a/packages/context/src/ingest/stages/stage-3-work-units.test.ts b/packages/context/src/ingest/stages/stage-3-work-units.test.ts new file mode 100644 index 00000000..ba01d60d --- /dev/null +++ b/packages/context/src/ingest/stages/stage-3-work-units.test.ts @@ -0,0 +1,150 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { CaptureSession, MemoryAction } from '../../memory/index.js'; +import { addTouchedSlSource, createTouchedSlSources } from '../../tools/index.js'; +import type { WorkUnit } from '../types.js'; +import { executeWorkUnit, type WorkUnitExecutionDeps } from './stage-3-work-units.js'; + +const makeWu = (overrides: Partial = {}): WorkUnit => ({ + unitKey: 'u1', + rawFiles: ['a.yml'], + peerFileIndex: [], + dependencyPaths: [], + ...overrides, +}); + +const makeDeps = (): WorkUnitExecutionDeps => { + const session: CaptureSession = { + userId: 'system', + chatId: 'u1', + userMessage: '', + connectionId: 'c1', + userScopedEnabled: false, + forceGlobalScope: true, + touchedSlSources: createTouchedSlSources(), + preHead: null, + }; + const sessionActions: MemoryAction[] = []; + return { + sessionWorktreeGit: { revParseHead: vi.fn() } as any, + agentRunner: { runLoop: vi.fn() } as any, + validateTouchedSources: vi.fn().mockResolvedValue({ validSources: [], invalidSources: [] }), + resetHardTo: vi.fn(), + buildSystemPrompt: () => 'sys', + buildUserPrompt: () => 'usr', + buildToolSet: () => ({ noop: { description: 'x', inputSchema: {} as any, execute: vi.fn() } as any }), + captureSession: session, + sessionActions, + modelRole: 'candidateExtraction', + stepBudget: 40, + sourceKey: 'fake', + connectionId: 'c1', + jobId: 'j1', + }; +}; + +describe('Stage 3 — executeWorkUnit', () => { + beforeEach(() => vi.clearAllMocks()); + + it('clean path — no touched sources, passes through as success with empty actions', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' }); + const outcome = await executeWorkUnit(deps, makeWu()); + expect(outcome.status).toBe('success'); + expect(outcome.preSha).toBe('pre'); + expect(outcome.postSha).toBe('post'); + expect(outcome.actions).toEqual([]); + expect(deps.validateTouchedSources).not.toHaveBeenCalled(); + expect(deps.resetHardTo).not.toHaveBeenCalled(); + expect(deps.agentRunner.runLoop).toHaveBeenCalledWith(expect.objectContaining({ modelRole: 'candidateExtraction' })); + }); + + it('validates touched sources and passes through as success when all are valid', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockImplementation(() => { + deps.sessionActions.push({ target: 'sl', type: 'created', key: 'src_good', detail: '' }); + addTouchedSlSource(deps.captureSession.touchedSlSources, 'c1', 'src_good'); + return Promise.resolve({ stopReason: 'natural' }); + }); + deps.validateTouchedSources = vi.fn().mockResolvedValue({ validSources: ['c1:src_good'], invalidSources: [] }); + const outcome = await executeWorkUnit(deps, makeWu()); + expect(outcome.status).toBe('success'); + expect(outcome.actions.map((a) => a.key)).toEqual(['src_good']); + expect(outcome.touchedSlSources).toEqual([{ connectionId: 'c1', sourceName: 'src_good' }]); + expect(deps.resetHardTo).not.toHaveBeenCalled(); + }); + + it('any invalid source resets to the pre-WU SHA and marks WU failed', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockImplementation(() => { + deps.sessionActions.push({ target: 'sl', type: 'created', key: 'src_bad', detail: '' }); + deps.sessionActions.push({ target: 'sl', type: 'created', key: 'src_good', detail: '' }); + addTouchedSlSource(deps.captureSession.touchedSlSources, 'c1', 'src_bad'); + addTouchedSlSource(deps.captureSession.touchedSlSources, 'c1', 'src_good'); + return Promise.resolve({ stopReason: 'natural' }); + }); + deps.validateTouchedSources = vi + .fn() + .mockResolvedValue({ validSources: ['c1:src_good'], invalidSources: ['c1:src_bad'] }); + const outcome = await executeWorkUnit(deps, makeWu()); + expect(outcome.status).toBe('failed'); + expect(outcome.reason).toMatch(/src_bad/); + expect(outcome.actions).toEqual([]); + expect(outcome.touchedSlSources).toEqual([]); + expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); + }); + + it('runner loop error resets to the pre-WU SHA and marks WU failed', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockResolvedValue({ stopReason: 'error', error: new Error('LLM down') }); + const outcome = await executeWorkUnit(deps, makeWu()); + expect(outcome.status).toBe('failed'); + expect(outcome.reason).toMatch(/LLM down/); + expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); + }); + + it('runner loop thrown exception resets to the pre-WU SHA and marks WU failed', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('pre').mockResolvedValueOnce('post'); + deps.agentRunner.runLoop = vi.fn().mockRejectedValue(new Error('provider disconnected')); + + const outcome = await executeWorkUnit(deps, makeWu()); + + expect(outcome).toMatchObject({ + unitKey: 'u1', + status: 'failed', + reason: 'provider disconnected', + preSha: 'pre', + postSha: 'post', + actions: [], + touchedSlSources: [], + }); + expect(deps.resetHardTo).toHaveBeenCalledWith('pre'); + }); + + it('fails before model execution when the assembled prompt is too large', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValue('pre'); + deps.buildSystemPrompt = () => 'sys'; + deps.buildUserPrompt = () => 'x'.repeat(260_000); + + const outcome = await executeWorkUnit(deps, makeWu()); + + expect(outcome.status).toBe('failed'); + expect(outcome.reason).toMatch(/prompt too large/i); + expect(deps.agentRunner.runLoop).not.toHaveBeenCalled(); + expect(deps.resetHardTo).not.toHaveBeenCalled(); + }); + + it('no commits made during WU — skips resetHardTo even on runner error', async () => { + const deps = makeDeps(); + deps.sessionWorktreeGit.revParseHead = vi.fn().mockResolvedValueOnce('same').mockResolvedValueOnce('same'); + deps.agentRunner.runLoop = vi.fn().mockResolvedValue({ stopReason: 'error', error: new Error('bail') }); + const outcome = await executeWorkUnit(deps, makeWu()); + expect(outcome.status).toBe('failed'); + expect(deps.resetHardTo).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/ingest/stages/stage-3-work-units.ts b/packages/context/src/ingest/stages/stage-3-work-units.ts new file mode 100644 index 00000000..511ada14 --- /dev/null +++ b/packages/context/src/ingest/stages/stage-3-work-units.ts @@ -0,0 +1,152 @@ +import type { AgentRunnerService } from '@klo/context/agent'; +import type { KloModelRole } from '@klo/llm'; +import type { Tool } from 'ai'; +import type { CaptureSession, MemoryAction } from '../../memory/index.js'; +import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js'; +import type { WorkUnit } from '../types.js'; + +const MAX_WORK_UNIT_PROMPT_CHARS = 240_000; + +export interface TouchedValidationResult { + invalidSources: string[]; + validSources: string[]; +} + +export interface WorkUnitExecutionDeps { + sessionWorktreeGit: { revParseHead(): Promise }; + agentRunner: AgentRunnerService; + validateTouchedSources: (touched: TouchedSlSource[]) => Promise; + resetHardTo: (targetSha: string) => Promise; + buildSystemPrompt: (wu: WorkUnit) => string; + buildUserPrompt: (wu: WorkUnit) => string; + buildToolSet: (wu: WorkUnit) => Record; + captureSession: CaptureSession; + sessionActions: MemoryAction[]; + modelRole: KloModelRole; + stepBudget: number; + sourceKey: string; + connectionId: string; + jobId: string; + onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; +} + +export interface WorkUnitOutcome { + unitKey: string; + status: 'success' | 'failed'; + reason?: string; + preSha: string; + postSha: string; + actions: MemoryAction[]; + touchedSlSources: TouchedSlSource[]; + slDisallowed?: boolean; + slDisallowedReason?: 'lookml_connection_mismatch'; +} + +export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit): Promise { + const preSha = (await deps.sessionWorktreeGit.revParseHead()) ?? ''; + deps.captureSession.preHead = preSha || null; + + const failWithoutReset = (reason: string): WorkUnitOutcome => ({ + unitKey: wu.unitKey, + status: 'failed', + reason, + preSha, + postSha: preSha, + actions: [], + touchedSlSources: [], + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + }); + + const systemPrompt = deps.buildSystemPrompt(wu); + const userPrompt = deps.buildUserPrompt(wu); + const promptChars = systemPrompt.length + userPrompt.length; + if (promptChars > MAX_WORK_UNIT_PROMPT_CHARS) { + return failWithoutReset( + `prompt too large for WorkUnit ${wu.unitKey}: ${promptChars} chars exceeds ${MAX_WORK_UNIT_PROMPT_CHARS}`, + ); + } + + const failWithResetFromCurrentHead = async (reason: string): Promise => { + const failureHead = (await deps.sessionWorktreeGit.revParseHead()) ?? preSha; + if (failureHead !== preSha && preSha !== '') { + await deps.resetHardTo(preSha); + } + return { + unitKey: wu.unitKey, + status: 'failed', + reason, + preSha, + postSha: failureHead, + actions: [], + touchedSlSources: [], + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + }; + }; + + let runResult: Awaited>; + try { + runResult = await deps.agentRunner.runLoop({ + modelRole: deps.modelRole, + systemPrompt, + userPrompt, + toolSet: deps.buildToolSet(wu), + stepBudget: deps.stepBudget, + telemetryTags: { + operationName: 'ingest-bundle-wu', + source: deps.sourceKey, + unitKey: wu.unitKey, + jobId: deps.jobId, + }, + onStepFinish: deps.onStepFinish, + }); + } catch (error) { + return failWithResetFromCurrentHead(error instanceof Error ? error.message : String(error)); + } + + const postSha = (await deps.sessionWorktreeGit.revParseHead()) ?? preSha; + + const failWithReset = async (reason: string): Promise => { + if (postSha !== preSha && preSha !== '') { + await deps.resetHardTo(preSha); + } + return { + unitKey: wu.unitKey, + status: 'failed', + reason, + preSha, + postSha, + actions: [], + touchedSlSources: [], + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + }; + }; + + if (runResult.stopReason === 'error') { + return failWithReset(runResult.error?.message ?? 'agent loop errored'); + } + + const touched = listTouchedSlSources(deps.captureSession.touchedSlSources); + if (touched.length > 0) { + const validation = await deps.validateTouchedSources(touched); + if (validation.invalidSources.length > 0) { + // Spec: invalid SL writes reset the session worktree to the WU's pre-state, WU is marked failed, + // its files are absent from the Stage Index. Per-source surgical revert is the + // memory-agent pattern — NOT the bundle-ingest pattern. + return failWithReset(`sl_validate failed for: ${validation.invalidSources.join(', ')}`); + } + } + + return { + unitKey: wu.unitKey, + status: 'success', + preSha, + postSha, + actions: [...deps.sessionActions], + touchedSlSources: touched, + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + }; +} diff --git a/packages/context/src/ingest/stages/stage-4-reconciliation.test.ts b/packages/context/src/ingest/stages/stage-4-reconciliation.test.ts new file mode 100644 index 00000000..4244ab12 --- /dev/null +++ b/packages/context/src/ingest/stages/stage-4-reconciliation.test.ts @@ -0,0 +1,149 @@ +import { describe, expect, it, vi } from 'vitest'; +import { runReconciliationStage4 } from './stage-4-reconciliation.js'; + +describe('Stage 4 — runReconciliationStage4', () => { + it('short-circuits when stage index is empty and eviction is empty', async () => { + const runLoop = vi.fn(); + const result = await runReconciliationStage4({ + stageIndex: { + jobId: 'j1', + connectionId: 'c1', + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + evictionUnit: undefined, + agentRunner: { runLoop } as any, + buildSystemPrompt: () => 's', + buildUserPrompt: () => 'u', + buildToolSet: () => ({}), + modelRole: 'reconcile', + stepBudget: 60, + sourceKey: 'fake', + jobId: 'j1', + }); + expect(result.skipped).toBe(true); + expect(runLoop).not.toHaveBeenCalled(); + }); + + it('invokes the agent when any WU wrote actions', async () => { + const runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' }); + const result = await runReconciliationStage4({ + stageIndex: { + jobId: 'j1', + connectionId: 'c1', + workUnits: [ + { + unitKey: 'u1', + rawFiles: ['a.yml'], + status: 'success', + actions: [{ target: 'sl', type: 'created', key: 'src_a', detail: 'x' }], + touchedSlSources: [{ connectionId: 'c1', sourceName: 'src_a' }], + }, + ], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + evictionUnit: undefined, + agentRunner: { runLoop } as any, + buildSystemPrompt: () => 's', + buildUserPrompt: () => 'u', + buildToolSet: () => ({}), + modelRole: 'reconcile', + stepBudget: 60, + sourceKey: 'fake', + jobId: 'j1', + }); + expect(result.skipped).toBe(false); + expect(runLoop).toHaveBeenCalledOnce(); + expect(runLoop).toHaveBeenCalledWith(expect.objectContaining({ modelRole: 'reconcile' })); + }); + + it('invokes the agent when eviction set is non-empty even with no writes', async () => { + const runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' }); + const result = await runReconciliationStage4({ + stageIndex: { + jobId: 'j1', + connectionId: 'c1', + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + evictionUnit: { deletedRawPaths: ['views/old.lkml'] }, + agentRunner: { runLoop } as any, + buildSystemPrompt: () => 's', + buildUserPrompt: () => 'u', + buildToolSet: () => ({}), + modelRole: 'reconcile', + stepBudget: 60, + sourceKey: 'fake', + jobId: 'j1', + }); + expect(result.skipped).toBe(false); + expect(runLoop).toHaveBeenCalledOnce(); + }); + + it('invokes the agent when forced for candidate reconciliation', async () => { + const runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' }); + const result = await runReconciliationStage4({ + stageIndex: { + jobId: 'j1', + connectionId: 'c1', + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + evictionUnit: undefined, + agentRunner: { runLoop } as any, + buildSystemPrompt: () => 's', + buildUserPrompt: () => 'u', + buildToolSet: () => ({}), + modelRole: 'reconcile', + stepBudget: 60, + sourceKey: 'fake', + jobId: 'j1', + forceRun: true, + }); + expect(result.skipped).toBe(false); + expect(runLoop).toHaveBeenCalledOnce(); + }); + + it('returns stopReason on runner error', async () => { + const err = new Error('LLM timeout'); + const runLoop = vi.fn().mockResolvedValue({ stopReason: 'error', error: err }); + const result = await runReconciliationStage4({ + stageIndex: { + jobId: 'j1', + connectionId: 'c1', + workUnits: [ + { + unitKey: 'u1', + rawFiles: [], + status: 'success', + actions: [{ target: 'sl', type: 'created', key: 'k', detail: 'd' }], + touchedSlSources: [], + }, + ], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + evictionUnit: undefined, + agentRunner: { runLoop } as any, + buildSystemPrompt: () => 's', + buildUserPrompt: () => 'u', + buildToolSet: () => ({}), + modelRole: 'reconcile', + stepBudget: 60, + sourceKey: 'fake', + jobId: 'j1', + }); + expect(result.skipped).toBe(false); + expect(result.stopReason).toBe('error'); + expect(result.error).toBe(err); + }); +}); diff --git a/packages/context/src/ingest/stages/stage-4-reconciliation.ts b/packages/context/src/ingest/stages/stage-4-reconciliation.ts new file mode 100644 index 00000000..9252130d --- /dev/null +++ b/packages/context/src/ingest/stages/stage-4-reconciliation.ts @@ -0,0 +1,45 @@ +import type { AgentRunnerService } from '@klo/context/agent'; +import type { KloModelRole } from '@klo/llm'; +import type { ToolSet } from 'ai'; +import type { EvictionUnit } from '../types.js'; +import type { StageIndex } from './stage-index.types.js'; + +export interface ReconciliationContext { + stageIndex: StageIndex; + evictionUnit: EvictionUnit | undefined; + agentRunner: AgentRunnerService; + buildSystemPrompt: (idx: StageIndex, ev: EvictionUnit | undefined) => string; + buildUserPrompt: (idx: StageIndex, ev: EvictionUnit | undefined) => string; + buildToolSet: () => ToolSet; + modelRole: KloModelRole; + stepBudget: number; + sourceKey: string; + jobId: string; + force?: boolean; + onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + forceRun?: boolean; +} + +export interface ReconciliationOutcome { + skipped: boolean; + stopReason?: 'budget' | 'natural' | 'error'; + error?: Error; +} + +export async function runReconciliationStage4(ctx: ReconciliationContext): Promise { + const hasWrites = ctx.stageIndex.workUnits.some((wu) => wu.actions.length > 0); + const hasEvictions = !!ctx.evictionUnit && ctx.evictionUnit.deletedRawPaths.length > 0; + if (!ctx.force && !ctx.forceRun && !hasWrites && !hasEvictions) { + return { skipped: true }; + } + const run = await ctx.agentRunner.runLoop({ + modelRole: ctx.modelRole, + systemPrompt: ctx.buildSystemPrompt(ctx.stageIndex, ctx.evictionUnit), + userPrompt: ctx.buildUserPrompt(ctx.stageIndex, ctx.evictionUnit), + toolSet: ctx.buildToolSet(), + stepBudget: ctx.stepBudget, + telemetryTags: { operationName: 'ingest-bundle-reconcile', source: ctx.sourceKey, jobId: ctx.jobId }, + onStepFinish: ctx.onStepFinish, + }); + return { skipped: false, stopReason: run.stopReason, error: run.error }; +} diff --git a/packages/context/src/ingest/stages/stage-index.types.ts b/packages/context/src/ingest/stages/stage-index.types.ts new file mode 100644 index 00000000..c8d7e4b3 --- /dev/null +++ b/packages/context/src/ingest/stages/stage-index.types.ts @@ -0,0 +1,65 @@ +import type { MemoryAction } from '../../memory/index.js'; +import type { TouchedSlSource } from '../../tools/index.js'; + +export interface StageIndexWorkUnit { + unitKey: string; + rawFiles: string[]; + status: 'success' | 'failed'; + reason?: string; + actions: MemoryAction[]; + touchedSlSources: TouchedSlSource[]; + slDisallowed?: boolean; + slDisallowedReason?: 'lookml_connection_mismatch'; +} + +export interface ConflictResolvedRecord { + unitKey?: string; + kind: 'structural_duplicate' | 'near_duplicate' | 'definitional_contradiction' | 're_ingest_change'; + contestedKey?: string; + artifactKey: string; + detail: string; + flaggedForHuman: boolean; +} + +export interface EvictionAppliedRecord { + rawPath: string; + artifactKind: 'sl' | 'wiki'; + artifactKey: string; + action: 'removed' | 'retained_deprecated'; + reason: string; +} + +export type UnmappedFallbackReason = + | 'no_connection_mapping' + | 'looker_template_unresolved' + | 'derived_table_not_supported' + | 'no_physical_table' + | 'multiple_table_references' + | 'unsupported_dialect' + | 'parse_error' + | 'missing_target_table'; + +export interface UnmappedFallbackRecord { + rawPath: string; + reason: UnmappedFallbackReason; + detail?: string; + fallback: 'sql_standalone' | 'wiki_only' | 'flagged'; +} + +export interface ArtifactResolutionRecord { + rawPath: string; + artifactKind: 'sl' | 'wiki'; + artifactKey: string; + actionType: 'merged' | 'subsumed'; + reason: string; +} + +export interface StageIndex { + jobId: string; + connectionId: string; + workUnits: StageIndexWorkUnit[]; + conflictsResolved: ConflictResolvedRecord[]; + evictionsApplied: EvictionAppliedRecord[]; + unmappedFallbacks: UnmappedFallbackRecord[]; + artifactResolutions?: ArtifactResolutionRecord[]; +} diff --git a/packages/context/src/ingest/stages/validate-wu-sources.test.ts b/packages/context/src/ingest/stages/validate-wu-sources.test.ts new file mode 100644 index 00000000..668062e9 --- /dev/null +++ b/packages/context/src/ingest/stages/validate-wu-sources.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it, vi } from 'vitest'; +import { validateWuTouchedSources } from './validate-wu-sources.js'; + +describe('validateWuTouchedSources', () => { + it('validates each touched source against its own connection', async () => { + const validateSingleSource = vi + .fn() + .mockImplementation((_deps: unknown, conn: string, name: string) => + Promise.resolve( + conn === 'warehouse-a' && name === 'good' + ? { errors: [], warnings: [] } + : { errors: ['invalid measure'], warnings: [] }, + ), + ); + const deps = { slValidator: { validateSingleSource } } as any; + + const result = await validateWuTouchedSources(deps, [ + { connectionId: 'warehouse-a', sourceName: 'good' }, + { connectionId: 'warehouse-b', sourceName: 'bad' }, + ]); + + expect(result.validSources).toEqual(['warehouse-a:good']); + expect(result.invalidSources).toEqual(['warehouse-b:bad']); + expect(validateSingleSource).toHaveBeenNthCalledWith(1, deps, 'warehouse-a', 'good'); + expect(validateSingleSource).toHaveBeenNthCalledWith(2, deps, 'warehouse-b', 'bad'); + }); + + it('returns empty arrays when no sources are touched', async () => { + const validateSingleSource = vi.fn(); + const deps = { slValidator: { validateSingleSource } } as any; + const result = await validateWuTouchedSources(deps, []); + expect(result).toEqual({ validSources: [], invalidSources: [] }); + expect(validateSingleSource).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/ingest/stages/validate-wu-sources.ts b/packages/context/src/ingest/stages/validate-wu-sources.ts new file mode 100644 index 00000000..a6394c50 --- /dev/null +++ b/packages/context/src/ingest/stages/validate-wu-sources.ts @@ -0,0 +1,24 @@ +import type { SlValidationDeps, SlValidatorPort } from '@klo/context/sl'; +import type { TouchedSlSource } from '../../tools/index.js'; + +export interface WuValidationResult { + validSources: string[]; + invalidSources: string[]; +} + +export async function validateWuTouchedSources( + deps: SlValidationDeps & { slValidator: SlValidatorPort }, + touched: TouchedSlSource[], +): Promise { + const valid: string[] = []; + const invalid: string[] = []; + for (const source of touched) { + const result = await deps.slValidator.validateSingleSource(deps, source.connectionId, source.sourceName); + if (result.errors.length === 0) { + valid.push(`${source.connectionId}:${source.sourceName}`); + } else { + invalid.push(`${source.connectionId}:${source.sourceName}`); + } + } + return { validSources: valid, invalidSources: invalid }; +} diff --git a/packages/context/src/ingest/tools/emit-artifact-resolution.tool.ts b/packages/context/src/ingest/tools/emit-artifact-resolution.tool.ts new file mode 100644 index 00000000..06ac2cae --- /dev/null +++ b/packages/context/src/ingest/tools/emit-artifact-resolution.tool.ts @@ -0,0 +1,53 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import type { ArtifactResolutionRecord, StageIndex } from '../stages/stage-index.types.js'; + +interface EmitArtifactResolutionDeps { + stageIndex: StageIndex; + allowedPaths: Set; +} + +function sameArtifactResolution(left: ArtifactResolutionRecord, right: ArtifactResolutionRecord): boolean { + return ( + left.rawPath === right.rawPath && + left.artifactKind === right.artifactKind && + left.artifactKey === right.artifactKey && + left.actionType === right.actionType + ); +} + +export function createEmitArtifactResolutionTool(deps: EmitArtifactResolutionDeps) { + return tool({ + description: + 'Record one explicit artifact resolution for ingest provenance. Use when reconciliation merges or subsumes an artifact without creating a new wiki or SL write action.', + inputSchema: z.object({ + rawPath: z.string().min(1), + artifactKind: z.enum(['sl', 'wiki']), + artifactKey: z.string().min(1), + actionType: z.enum(['merged', 'subsumed']), + reason: z.string().min(1), + }), + execute: async (input): Promise => { + if (!deps.allowedPaths.has(input.rawPath)) { + return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`; + } + + const record: ArtifactResolutionRecord = { + rawPath: input.rawPath, + artifactKind: input.artifactKind, + artifactKey: input.artifactKey, + actionType: input.actionType, + reason: input.reason, + }; + const existingIndex = deps.stageIndex.artifactResolutions?.findIndex((candidate) => + sameArtifactResolution(candidate, record), + ); + if (existingIndex !== undefined && existingIndex >= 0 && deps.stageIndex.artifactResolutions) { + deps.stageIndex.artifactResolutions[existingIndex] = record; + } else { + deps.stageIndex.artifactResolutions = [...(deps.stageIndex.artifactResolutions ?? []), record]; + } + return `recorded artifact resolution for ${record.artifactKind}:${record.artifactKey}`; + }, + }); +} diff --git a/packages/context/src/ingest/tools/emit-conflict-resolution.tool.ts b/packages/context/src/ingest/tools/emit-conflict-resolution.tool.ts new file mode 100644 index 00000000..b25e1395 --- /dev/null +++ b/packages/context/src/ingest/tools/emit-conflict-resolution.tool.ts @@ -0,0 +1,38 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import type { ConflictResolvedRecord, StageIndex } from '../stages/stage-index.types.js'; + +interface EmitConflictResolutionDeps { + stageIndex: StageIndex; +} + +export function createEmitConflictResolutionTool(deps: EmitConflictResolutionDeps) { + return tool({ + description: + 'Record one conflict resolution decision for the final IngestReport. Call after resolving or flagging a cross-WorkUnit conflict.', + inputSchema: z.object({ + unitKey: z.string().min(1).optional(), + kind: z.enum(['structural_duplicate', 'near_duplicate', 'definitional_contradiction', 're_ingest_change']), + contestedKey: z.string().min(1).optional(), + artifactKey: z.string().min(1), + detail: z.string().min(1), + flaggedForHuman: z.boolean().default(false), + }), + execute: async (input): Promise => { + const record: ConflictResolvedRecord = { + kind: input.kind, + artifactKey: input.artifactKey, + detail: input.detail, + flaggedForHuman: input.flaggedForHuman, + }; + if (input.unitKey) { + record.unitKey = input.unitKey; + } + if (input.contestedKey) { + record.contestedKey = input.contestedKey; + } + deps.stageIndex.conflictsResolved.push(record); + return `recorded conflict resolution for ${record.artifactKey}`; + }, + }); +} diff --git a/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts b/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts new file mode 100644 index 00000000..f44214ea --- /dev/null +++ b/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts @@ -0,0 +1,51 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import type { EvictionAppliedRecord, StageIndex } from '../stages/stage-index.types.js'; + +interface EmitEvictionDecisionDeps { + stageIndex: StageIndex; + deletedRawPaths: string[]; +} + +function sameEvictionArtifact(left: EvictionAppliedRecord, right: EvictionAppliedRecord): boolean { + return ( + left.rawPath === right.rawPath && left.artifactKind === right.artifactKind && left.artifactKey === right.artifactKey + ); +} + +export function createEmitEvictionDecisionTool(deps: EmitEvictionDecisionDeps) { + const allowedPaths = new Set(deps.deletedRawPaths); + return tool({ + description: + 'Record one eviction decision for the final IngestReport. The rawPath must come from the current Eviction Set.', + inputSchema: z.object({ + rawPath: z.string().min(1), + artifactKind: z.enum(['sl', 'wiki']), + artifactKey: z.string().min(1), + action: z.enum(['removed', 'retained_deprecated']), + reason: z.string().min(1), + }), + execute: async (input): Promise => { + if (!allowedPaths.has(input.rawPath)) { + return `Error: rawPath "${input.rawPath}" is not in the current eviction set`; + } + + const record: EvictionAppliedRecord = { + rawPath: input.rawPath, + artifactKind: input.artifactKind, + artifactKey: input.artifactKey, + action: input.action, + reason: input.reason, + }; + const existingIndex = deps.stageIndex.evictionsApplied.findIndex((candidate) => + sameEvictionArtifact(candidate, record), + ); + if (existingIndex >= 0) { + deps.stageIndex.evictionsApplied[existingIndex] = record; + } else { + deps.stageIndex.evictionsApplied.push(record); + } + return `recorded eviction decision for ${record.rawPath} -> ${record.artifactKind}:${record.artifactKey}`; + }, + }); +} diff --git a/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts b/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts new file mode 100644 index 00000000..2d82fed7 --- /dev/null +++ b/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts @@ -0,0 +1,228 @@ +import type { Tool } from 'ai'; +import { describe, expect, it } from 'vitest'; +import type { StageIndex } from '../stages/stage-index.types.js'; +import { createEmitArtifactResolutionTool } from './emit-artifact-resolution.tool.js'; +import { createEmitConflictResolutionTool } from './emit-conflict-resolution.tool.js'; +import { createEmitEvictionDecisionTool } from './emit-eviction-decision.tool.js'; +import { createEmitUnmappedFallbackTool } from './emit-unmapped-fallback.tool.js'; + +function makeStageIndex(): StageIndex { + return { + jobId: 'job-1', + connectionId: 'c1', + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }; +} + +async function executeTool(tool: Tool, input: NoInfer) { + if (!tool.execute) { + throw new Error('tool is not executable'); + } + return (await tool.execute(input, { toolCallId: 'tool-call-1', messages: [] })) as string; +} + +describe('reconciliation emit tools', () => { + it('records conflict resolutions on the shared stage index', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitConflictResolutionTool({ stageIndex }); + + const output = await executeTool(tool, { + unitKey: 'wu-orders', + kind: 'near_duplicate', + contestedKey: 'gross_revenue', + artifactKey: 'sl:orders.gross_revenue', + detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical', + flaggedForHuman: true, + }); + + expect(stageIndex.conflictsResolved).toEqual([ + { + unitKey: 'wu-orders', + kind: 'near_duplicate', + contestedKey: 'gross_revenue', + artifactKey: 'sl:orders.gross_revenue', + detail: 'orders and order_facts compute the same revenue metric; retained orders as canonical', + flaggedForHuman: true, + }, + ]); + expect(output).toBe('recorded conflict resolution for sl:orders.gross_revenue'); + }); + + it('records eviction decisions only for deleted raw paths in the current eviction set', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitEvictionDecisionTool({ + stageIndex, + deletedRawPaths: ['views/old_orders.view.lkml'], + }); + + const output = await executeTool(tool, { + rawPath: 'views/old_orders.view.lkml', + artifactKind: 'sl', + artifactKey: 'old_orders', + action: 'removed', + reason: 'source raw file was deleted and no retained artifacts are required', + }); + + expect(output).toContain('recorded eviction decision for views/old_orders.view.lkml'); + expect(stageIndex.evictionsApplied).toEqual([ + { + rawPath: 'views/old_orders.view.lkml', + artifactKind: 'sl', + artifactKey: 'old_orders', + action: 'removed', + reason: 'source raw file was deleted and no retained artifacts are required', + }, + ]); + }); + + it('updates an existing eviction decision for the same raw path and artifact', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitEvictionDecisionTool({ + stageIndex, + deletedRawPaths: ['views/old_orders.view.lkml'], + }); + + await executeTool(tool, { + rawPath: 'views/old_orders.view.lkml', + artifactKind: 'wiki', + artifactKey: 'orders/legacy', + action: 'retained_deprecated', + reason: 'first pass', + }); + await executeTool(tool, { + rawPath: 'views/old_orders.view.lkml', + artifactKind: 'wiki', + artifactKey: 'orders/legacy', + action: 'removed', + reason: 'second pass after checking references', + }); + + expect(stageIndex.evictionsApplied).toEqual([ + { + rawPath: 'views/old_orders.view.lkml', + artifactKind: 'wiki', + artifactKey: 'orders/legacy', + action: 'removed', + reason: 'second pass after checking references', + }, + ]); + }); + + it('rejects eviction decisions for raw paths outside the current eviction set', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitEvictionDecisionTool({ + stageIndex, + deletedRawPaths: ['views/old_orders.view.lkml'], + }); + + const output = await executeTool(tool, { + rawPath: 'views/not_deleted.view.lkml', + artifactKind: 'sl', + artifactKey: 'not_deleted', + action: 'removed', + reason: 'bad input', + }); + + expect(output).toContain('Error: rawPath "views/not_deleted.view.lkml" is not in the current eviction set'); + expect(stageIndex.evictionsApplied).toEqual([]); + }); + + it('records unmapped fallback decisions for allowed raw paths', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitUnmappedFallbackTool({ + stageIndex, + allowedPaths: new Set(['metrics/conversion.yml']), + }); + + const output = await executeTool(tool, { + rawPath: 'metrics/conversion.yml', + reason: 'no_physical_table', + fallback: 'flagged', + }); + + expect(output).toContain('recorded unmapped fallback for metrics/conversion.yml'); + expect(stageIndex.unmappedFallbacks).toEqual([ + { + rawPath: 'metrics/conversion.yml', + reason: 'no_physical_table', + fallback: 'flagged', + }, + ]); + }); + + it('deduplicates identical unmapped fallback decisions', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitUnmappedFallbackTool({ + stageIndex, + allowedPaths: new Set(['metrics/conversion.yml']), + }); + + await executeTool(tool, { + rawPath: 'metrics/conversion.yml', + reason: 'no_physical_table', + fallback: 'flagged', + }); + await executeTool(tool, { + rawPath: 'metrics/conversion.yml', + reason: 'no_physical_table', + fallback: 'flagged', + }); + + expect(stageIndex.unmappedFallbacks).toEqual([ + { + rawPath: 'metrics/conversion.yml', + reason: 'no_physical_table', + fallback: 'flagged', + }, + ]); + }); + + it('rejects unmapped fallback decisions for raw paths outside the allowed set', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitUnmappedFallbackTool({ + stageIndex, + allowedPaths: new Set(['metrics/conversion.yml']), + }); + + const output = await executeTool(tool, { + rawPath: 'metrics/not-in-this-work-unit.yml', + reason: 'no_physical_table', + fallback: 'flagged', + }); + + expect(output).toContain( + 'Error: rawPath "metrics/not-in-this-work-unit.yml" is not available to this ingest stage', + ); + expect(stageIndex.unmappedFallbacks).toEqual([]); + }); + + it('records explicit artifact resolutions for provenance rows', async () => { + const stageIndex = makeStageIndex(); + const tool = createEmitArtifactResolutionTool({ + stageIndex, + allowedPaths: new Set(['explores/b2b/sales_pipeline.json']), + }); + + const output = await executeTool(tool, { + rawPath: 'explores/b2b/sales_pipeline.json', + artifactKind: 'sl', + artifactKey: 'looker__b2b__sales_pipeline', + actionType: 'subsumed', + reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.', + }); + + expect(output).toBe('recorded artifact resolution for sl:looker__b2b__sales_pipeline'); + expect(stageIndex.artifactResolutions).toEqual([ + { + rawPath: 'explores/b2b/sales_pipeline.json', + artifactKind: 'sl', + artifactKey: 'looker__b2b__sales_pipeline', + actionType: 'subsumed', + reason: 'File-adapter source b2b__sales_pipeline is canonical for this explore.', + }, + ]); + }); +}); diff --git a/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts b/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts new file mode 100644 index 00000000..78f2e6ea --- /dev/null +++ b/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts @@ -0,0 +1,52 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import type { StageIndex, UnmappedFallbackRecord } from '../stages/stage-index.types.js'; + +interface EmitUnmappedFallbackDeps { + stageIndex: StageIndex; + allowedPaths: ReadonlySet; +} + +const unmappedFallbackReasonSchema = z.enum([ + 'no_connection_mapping', + 'looker_template_unresolved', + 'derived_table_not_supported', + 'no_physical_table', + 'multiple_table_references', + 'unsupported_dialect', + 'parse_error', + 'missing_target_table', +]); + +function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallbackRecord): boolean { + return left.rawPath === right.rawPath && left.reason === right.reason && left.fallback === right.fallback; +} + +export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) { + return tool({ + description: + 'Record one unmapped fallback decision for the final IngestReport. The rawPath must be available to the current ingest stage. The reason MUST be one of the structured codes; put any human-readable context in detail.', + inputSchema: z.object({ + rawPath: z.string().min(1), + reason: unmappedFallbackReasonSchema, + detail: z.string().optional(), + fallback: z.enum(['sql_standalone', 'wiki_only', 'flagged']), + }), + execute: async (input): Promise => { + if (!deps.allowedPaths.has(input.rawPath)) { + return `Error: rawPath "${input.rawPath}" is not available to this ingest stage`; + } + + const record: UnmappedFallbackRecord = { + rawPath: input.rawPath, + reason: input.reason, + ...(input.detail !== undefined ? { detail: input.detail } : {}), + fallback: input.fallback, + }; + if (!deps.stageIndex.unmappedFallbacks.some((candidate) => sameUnmappedFallback(candidate, record))) { + deps.stageIndex.unmappedFallbacks.push(record); + } + return `recorded unmapped fallback for ${record.rawPath} (${record.fallback})`; + }, + }); +} diff --git a/packages/context/src/ingest/tools/eviction-list.tool.test.ts b/packages/context/src/ingest/tools/eviction-list.tool.test.ts new file mode 100644 index 00000000..1bd1d82a --- /dev/null +++ b/packages/context/src/ingest/tools/eviction-list.tool.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createEvictionListTool } from './eviction-list.tool.js'; + +describe('eviction_list tool', () => { + it('returns artifacts produced for each deleted raw path', async () => { + const provenance = { + findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue( + new Map([ + [ + 'views/old.lkml', + [{ artifact_kind: 'sl', artifact_key: 'old_metric', action_type: 'source_created' } as any], + ], + ['views/gone.lkml', []], + ]), + ), + }; + const tool = createEvictionListTool({ + provenance: provenance as any, + connectionId: 'c1', + sourceKey: 'lookml', + deletedRawPaths: ['views/old.lkml', 'views/gone.lkml'], + }); + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + {}, + { toolCallId: 't', messages: [] }, + )) as string; + expect(out).toContain('views/old.lkml'); + expect(out).toContain('old_metric'); + expect(out).toContain('views/gone.lkml'); + }); + + it('returns empty string when no deletions', async () => { + const tool = createEvictionListTool({ + provenance: {} as any, + connectionId: 'c1', + sourceKey: 'lookml', + deletedRawPaths: [], + }); + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + {}, + { toolCallId: 't', messages: [] }, + )) as string; + expect(out).toMatch(/empty/i); + }); + + it('tells curators to record decisions', () => { + const tool = createEvictionListTool({ + provenance: {} as any, + connectionId: 'c1', + sourceKey: 'lookml', + deletedRawPaths: [], + }); + + expect(tool.description).toContain('context_eviction_decision_write'); + }); +}); diff --git a/packages/context/src/ingest/tools/eviction-list.tool.ts b/packages/context/src/ingest/tools/eviction-list.tool.ts new file mode 100644 index 00000000..1e2ca3a0 --- /dev/null +++ b/packages/context/src/ingest/tools/eviction-list.tool.ts @@ -0,0 +1,39 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import type { IngestProvenancePort } from '../ports.js'; + +export interface EvictionListDeps { + provenance: IngestProvenancePort; + connectionId: string; + sourceKey: string; + deletedRawPaths: string[]; +} + +export function createEvictionListTool(deps: EvictionListDeps) { + return tool({ + description: + 'List every artifact that the most recent completed sync produced from a now-deleted raw file. Use this to decide whether to remove (no inbound refs) or retain with deprecation (has inbound refs). Inbound refs are NOT currently computed — treat every retained entry as a candidate and ask the user via the IngestReport. After deciding, record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.', + inputSchema: z.object({}), + execute: async () => { + if (deps.deletedRawPaths.length === 0) { + return '(empty) — no files were deleted since the last sync'; + } + const map = await deps.provenance.findLatestArtifactsForRawPaths( + deps.connectionId, + deps.sourceKey, + deps.deletedRawPaths, + ); + return [...map.entries()] + .map(([path, rows]) => { + if (rows.length === 0) { + return `- raw_path: ${path}\n artifacts: (none)`; + } + const artifactLines = rows + .map((r) => ` - kind: ${r.artifact_kind} key: ${r.artifact_key} (last action: ${r.action_type})`) + .join('\n'); + return `- raw_path: ${path}\n artifacts:\n${artifactLines}`; + }) + .join('\n'); + }, + }); +} diff --git a/packages/context/src/ingest/tools/read-raw-file.tool.test.ts b/packages/context/src/ingest/tools/read-raw-file.tool.test.ts new file mode 100644 index 00000000..db4aef42 --- /dev/null +++ b/packages/context/src/ingest/tools/read-raw-file.tool.test.ts @@ -0,0 +1,69 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { createReadRawFileTool } from './read-raw-file.tool.js'; + +describe('read_raw_file tool', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'readraw-')); + await mkdir(join(stagedDir, 'views'), { recursive: true }); + await writeFile(join(stagedDir, 'views', 'a.yml'), 'line1\nline2\nline3\n', 'utf-8'); + await writeFile(join(stagedDir, 'peer.yml'), 'secret', 'utf-8'); + }); + + afterEach(async () => rm(stagedDir, { recursive: true, force: true })); + + it('returns content for an allowed path', async () => { + const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'views/a.yml' }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toContain('line1'); + expect(result).toContain('line2'); + }); + + it('refuses to return oversized files and directs callers to read spans', async () => { + await writeFile(join(stagedDir, 'views', 'huge.yml'), `${'x'.repeat(160_000)}\n`, 'utf-8'); + const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/huge.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'views/huge.yml' }, + { toolCallId: 't1', messages: [] }, + ); + + expect(result).toMatch(/too large/i); + expect(result).toMatch(/read_raw_span/i); + expect(String(result).length).toBeLessThan(1000); + }); + + it('rejects a path not in the allow-list', async () => { + const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'peer.yml' }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toMatch(/not accessible/i); + expect(result).not.toContain('secret'); + }); + + it('rejects directory traversal attempts', async () => { + const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/a.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: '../outside.yml' }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toMatch(/not accessible/i); + }); + + it('returns a clear error when the file is missing despite being allowed', async () => { + const tool = createReadRawFileTool({ stagedDir, allowedPaths: new Set(['views/missing.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'views/missing.yml' }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toMatch(/not found/i); + }); +}); diff --git a/packages/context/src/ingest/tools/read-raw-file.tool.ts b/packages/context/src/ingest/tools/read-raw-file.tool.ts new file mode 100644 index 00000000..4f84f28d --- /dev/null +++ b/packages/context/src/ingest/tools/read-raw-file.tool.ts @@ -0,0 +1,41 @@ +import { readFile, stat } from 'node:fs/promises'; +import { join, normalize, resolve } from 'node:path'; +import { tool } from 'ai'; +import { z } from 'zod'; + +interface ReadRawFileDeps { + stagedDir: string; + allowedPaths: Set; +} + +const MAX_READ_RAW_FILE_BYTES = 120_000; + +export function createReadRawFileTool(deps: ReadRawFileDeps) { + const stagedRoot = resolve(deps.stagedDir); + return tool({ + description: + "Read the full text content of a raw source file inside this WorkUnit. `path` must be relative to the staged bundle root (no leading slash, no `..`) and must appear in the WorkUnit's rawFiles or dependencyPaths list.", + inputSchema: z.object({ + path: z.string().describe('Path relative to the staged bundle root. Example: "views/customers/customer.lkml".'), + }), + execute: async ({ path }) => { + const normalized = normalize(path).replace(/^[/\\]+/, ''); + if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) { + return `Error: path "${path}" is not accessible from this WorkUnit. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`; + } + const absolute = resolve(join(stagedRoot, normalized)); + if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) { + return `Error: path "${path}" is not accessible from this WorkUnit.`; + } + try { + const fileStat = await stat(absolute); + if (fileStat.size > MAX_READ_RAW_FILE_BYTES) { + return `Error: file "${path}" is too large to return in full (${fileStat.size} bytes). Use read_raw_span with targeted line ranges instead.`; + } + return await readFile(absolute, 'utf-8'); + } catch (err) { + return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`; + } + }, + }); +} diff --git a/packages/context/src/ingest/tools/read-raw-span.tool.test.ts b/packages/context/src/ingest/tools/read-raw-span.tool.test.ts new file mode 100644 index 00000000..30696046 --- /dev/null +++ b/packages/context/src/ingest/tools/read-raw-span.tool.test.ts @@ -0,0 +1,53 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { createReadRawSpanTool } from './read-raw-span.tool.js'; + +describe('read_raw_span tool', () => { + let stagedDir: string; + + beforeEach(async () => { + stagedDir = await mkdtemp(join(tmpdir(), 'readspan-')); + await mkdir(join(stagedDir, 'v'), { recursive: true }); + await writeFile(join(stagedDir, 'v', 'a.yml'), 'line1\nline2\nline3\nline4\nline5\n', 'utf-8'); + }); + + afterEach(async () => rm(stagedDir, { recursive: true, force: true })); + + it('returns the requested 1-based inclusive line range', async () => { + const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'v/a.yml', startLine: 2, endLine: 4 }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toBe('line2\nline3\nline4'); + }); + + it('clamps endLine to the end of the file', async () => { + const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'v/a.yml', startLine: 4, endLine: 99 }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toBe('line4\nline5'); + }); + + it('rejects start > end', async () => { + const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set(['v/a.yml']) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'v/a.yml', startLine: 5, endLine: 2 }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toMatch(/startLine must be/i); + }); + + it('rejects paths not in the allow-list', async () => { + const tool = createReadRawSpanTool({ stagedDir, allowedPaths: new Set([]) }); + const result = await (tool.execute as (...args: unknown[]) => unknown)( + { path: 'v/a.yml', startLine: 1, endLine: 1 }, + { toolCallId: 't1', messages: [] }, + ); + expect(result).toMatch(/not accessible/i); + }); +}); diff --git a/packages/context/src/ingest/tools/read-raw-span.tool.ts b/packages/context/src/ingest/tools/read-raw-span.tool.ts new file mode 100644 index 00000000..21da54d1 --- /dev/null +++ b/packages/context/src/ingest/tools/read-raw-span.tool.ts @@ -0,0 +1,46 @@ +import { readFile } from 'node:fs/promises'; +import { join, normalize, resolve } from 'node:path'; +import { tool } from 'ai'; +import { z } from 'zod'; + +interface ReadRawSpanDeps { + stagedDir: string; + allowedPaths: Set; +} + +export function createReadRawSpanTool(deps: ReadRawSpanDeps) { + const stagedRoot = resolve(deps.stagedDir); + return tool({ + description: + 'Read a 1-based inclusive line range from a raw source file. Use this to resolve a provenance pointer like `file.lkml#L15-28` without loading the whole file into context.', + inputSchema: z.object({ + path: z.string().describe('Path relative to the staged bundle root.'), + startLine: z.number().int().min(1).describe('First line to return (1-based, inclusive).'), + endLine: z.number().int().min(1).describe('Last line to return (1-based, inclusive). Clamped to file length.'), + }), + execute: async ({ path, startLine, endLine }) => { + if (startLine > endLine) { + return `Error: startLine must be <= endLine (got startLine=${startLine}, endLine=${endLine})`; + } + const normalized = normalize(path).replace(/^[/\\]+/, ''); + if (normalized.startsWith('..') || !deps.allowedPaths.has(normalized)) { + return `Error: path "${path}" is not accessible from this context. Allowed paths: ${[...deps.allowedPaths].sort().join(', ')}`; + } + const absolute = resolve(join(stagedRoot, normalized)); + if (!absolute.startsWith(`${stagedRoot}/`) && absolute !== stagedRoot) { + return `Error: path "${path}" is not accessible from this context.`; + } + try { + const body = await readFile(absolute, 'utf-8'); + const rawLines = body.split('\n'); + // Treat a trailing empty element caused by a file-ending newline as NOT a line. + const lines = rawLines.length > 0 && rawLines[rawLines.length - 1] === '' ? rawLines.slice(0, -1) : rawLines; + const from = Math.max(1, startLine); + const to = Math.min(lines.length, endLine); + return lines.slice(from - 1, to).join('\n'); + } catch (err) { + return `Error: file "${path}" not found. (${err instanceof Error ? err.message : String(err)})`; + } + }, + }); +} diff --git a/packages/context/src/ingest/tools/stage-diff.tool.test.ts b/packages/context/src/ingest/tools/stage-diff.tool.test.ts new file mode 100644 index 00000000..0dae87ab --- /dev/null +++ b/packages/context/src/ingest/tools/stage-diff.tool.test.ts @@ -0,0 +1,131 @@ +import { describe, expect, it } from 'vitest'; +import { createStageDiffTool } from './stage-diff.tool.js'; + +describe('stage_diff tool', () => { + const stageIndex = { + jobId: 'j', + connectionId: 'c1', + workUnits: [ + { + unitKey: 'u1', + rawFiles: [], + status: 'success' as const, + actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'customers' }], + touchedSlSources: [{ connectionId: 'c1', sourceName: 'customers' }], + }, + { + unitKey: 'u2', + rawFiles: [], + status: 'success' as const, + actions: [{ target: 'sl' as const, type: 'created' as const, key: 'churn_risk_score', detail: 'billing' }], + touchedSlSources: [{ connectionId: 'c1', sourceName: 'billing' }], + }, + ], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }; + + it('finds overlapping artifact keys between two WUs', async () => { + const tool = createStageDiffTool({ stageIndex }); + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + { unitKeyA: 'u1', unitKeyB: 'u2' }, + { toolCallId: 't', messages: [] }, + )) as string; + expect(out).toContain('churn_risk_score'); + expect(out).toMatch(/overlap/i); + }); + + it('says no overlap when keys are disjoint', async () => { + const tool = createStageDiffTool({ + stageIndex: { + jobId: 'j', + connectionId: 'c1', + workUnits: [ + { + unitKey: 'u1', + rawFiles: [], + status: 'success', + actions: [{ target: 'sl', type: 'created', key: 'a', detail: '' }], + touchedSlSources: [{ connectionId: 'c1', sourceName: 'a' }], + }, + { + unitKey: 'u2', + rawFiles: [], + status: 'success', + actions: [{ target: 'sl', type: 'created', key: 'b', detail: '' }], + touchedSlSources: [{ connectionId: 'c1', sourceName: 'b' }], + }, + ], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + }); + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + { unitKeyA: 'u1', unitKeyB: 'u2' }, + { toolCallId: 't', messages: [] }, + )) as string; + expect(out).toMatch(/no overlap/i); + }); + + it('does not overlap same-named SL actions on different target connections', async () => { + const tool = createStageDiffTool({ + stageIndex: { + jobId: 'j', + connectionId: 'looker-run', + workUnits: [ + { + unitKey: 'u1', + rawFiles: [], + status: 'success', + actions: [ + { + target: 'sl', + type: 'created', + key: 'looker__b2b__sales_pipeline', + detail: 'W1', + targetConnectionId: 'W1', + }, + ], + touchedSlSources: [{ connectionId: 'W1', sourceName: 'looker__b2b__sales_pipeline' }], + }, + { + unitKey: 'u2', + rawFiles: [], + status: 'success', + actions: [ + { + target: 'sl', + type: 'created', + key: 'looker__b2b__sales_pipeline', + detail: 'W2', + targetConnectionId: 'W2', + }, + ], + touchedSlSources: [{ connectionId: 'W2', sourceName: 'looker__b2b__sales_pipeline' }], + }, + ], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + }); + + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + { unitKeyA: 'u1', unitKeyB: 'u2' }, + { toolCallId: 't', messages: [] }, + )) as string; + + expect(out).toMatch(/no overlap/i); + }); + + it('returns an error when a unitKey is unknown', async () => { + const tool = createStageDiffTool({ stageIndex }); + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + { unitKeyA: 'u1', unitKeyB: 'nope' }, + { toolCallId: 't', messages: [] }, + )) as string; + expect(out).toMatch(/unknown/i); + }); +}); diff --git a/packages/context/src/ingest/tools/stage-diff.tool.ts b/packages/context/src/ingest/tools/stage-diff.tool.ts new file mode 100644 index 00000000..f1cfc1a0 --- /dev/null +++ b/packages/context/src/ingest/tools/stage-diff.tool.ts @@ -0,0 +1,44 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import { memoryActionIdentity } from '../action-identity.js'; +import type { StageIndex } from '../stages/stage-index.types.js'; + +export interface StageDiffDeps { + stageIndex: StageIndex; +} + +export function createStageDiffTool(deps: StageDiffDeps) { + return tool({ + description: + 'Compare two WorkUnits by their writes. SL writes overlap only when target connection and artifact key both match; same-key SL actions on different target connections are non-overlapping.', + inputSchema: z.object({ + unitKeyA: z.string(), + unitKeyB: z.string(), + }), + execute: ({ unitKeyA, unitKeyB }) => { + const a = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyA); + const b = deps.stageIndex.workUnits.find((wu) => wu.unitKey === unitKeyB); + if (!a) { + return Promise.resolve(`Error: unknown unitKey "${unitKeyA}"`); + } + if (!b) { + return Promise.resolve(`Error: unknown unitKey "${unitKeyB}"`); + } + const runConnectionId = deps.stageIndex.connectionId; + const keysA = new Set(a.actions.map((ac) => memoryActionIdentity(ac, runConnectionId))); + const keysB = new Set(b.actions.map((ac) => memoryActionIdentity(ac, runConnectionId))); + const overlap = [...keysA].filter((k) => keysB.has(k)); + if (overlap.length === 0) { + return Promise.resolve(`No overlap between ${unitKeyA} and ${unitKeyB}.`); + } + const overlapDetail = overlap + .map((k) => { + const aDetail = a.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k); + const bDetail = b.actions.find((ac) => memoryActionIdentity(ac, runConnectionId) === k); + return `- ${k}\n ${unitKeyA}: ${aDetail?.detail ?? ''}\n ${unitKeyB}: ${bDetail?.detail ?? ''}`; + }) + .join('\n'); + return Promise.resolve(`Overlap between ${unitKeyA} and ${unitKeyB}:\n${overlapDetail}`); + }, + }); +} diff --git a/packages/context/src/ingest/tools/stage-list.tool.test.ts b/packages/context/src/ingest/tools/stage-list.tool.test.ts new file mode 100644 index 00000000..05c9cac3 --- /dev/null +++ b/packages/context/src/ingest/tools/stage-list.tool.test.ts @@ -0,0 +1,58 @@ +import { describe, expect, it } from 'vitest'; +import { createStageListTool } from './stage-list.tool.js'; + +describe('stage_list tool', () => { + it('returns a compact summary of the stage index', async () => { + const tool = createStageListTool({ + stageIndex: { + jobId: 'j1', + connectionId: 'c1', + workUnits: [ + { + unitKey: 'u1', + rawFiles: ['a.yml'], + status: 'success', + actions: [{ target: 'sl', type: 'created', key: 'src_a', detail: '' }], + touchedSlSources: [{ connectionId: 'c1', sourceName: 'src_a' }], + }, + { + unitKey: 'u2', + rawFiles: ['b.yml'], + status: 'success', + actions: [{ target: 'wiki', type: 'created', key: 'page_b', detail: '' }], + touchedSlSources: [], + }, + ], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + }); + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + {}, + { toolCallId: 't', messages: [] }, + )) as string; + expect(out).toContain('u1'); + expect(out).toContain('src_a'); + expect(out).toContain('u2'); + expect(out).toContain('page_b'); + }); + + it('says empty when no writes', async () => { + const tool = createStageListTool({ + stageIndex: { + jobId: 'j', + connectionId: 'c1', + workUnits: [], + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + }, + }); + const out = (await (tool.execute as (...args: unknown[]) => unknown)( + {}, + { toolCallId: 't', messages: [] }, + )) as string; + expect(out).toMatch(/empty/i); + }); +}); diff --git a/packages/context/src/ingest/tools/stage-list.tool.ts b/packages/context/src/ingest/tools/stage-list.tool.ts new file mode 100644 index 00000000..b06df0a0 --- /dev/null +++ b/packages/context/src/ingest/tools/stage-list.tool.ts @@ -0,0 +1,30 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import type { StageIndex } from '../stages/stage-index.types.js'; + +export interface StageListDeps { + stageIndex: StageIndex; +} + +export function createStageListTool(deps: StageListDeps) { + return tool({ + description: + 'List every write made by Stage 3 WorkUnits in this job. Each entry has the unitKey, raw files, and the action set (SL sources touched, wiki pages written).', + inputSchema: z.object({}), + execute: () => { + if (deps.stageIndex.workUnits.length === 0) { + return Promise.resolve('(empty) — no WorkUnits wrote anything in this job'); + } + const out = deps.stageIndex.workUnits + .map((wu) => { + const actions = + wu.actions.length === 0 + ? ' (no actions)' + : wu.actions.map((a) => ` - ${a.target}:${a.type} ${a.key}`).join('\n'); + return `- unitKey: ${wu.unitKey} (status=${wu.status})\n rawFiles: ${wu.rawFiles.join(', ') || '(none)'}\n actions:\n${actions}`; + }) + .join('\n'); + return Promise.resolve(out); + }, + }); +} diff --git a/packages/context/src/ingest/tools/tool-call-logger.ts b/packages/context/src/ingest/tools/tool-call-logger.ts new file mode 100644 index 00000000..5a4aefde --- /dev/null +++ b/packages/context/src/ingest/tools/tool-call-logger.ts @@ -0,0 +1,106 @@ +import { appendFile, mkdir } from 'node:fs/promises'; +import { dirname } from 'node:path'; +import type { ToolExecuteFunction, ToolExecutionOptions, ToolSet } from 'ai'; + +export interface ToolCallLogEntry { + ts: string; + wuKey: string; + toolCallId?: string; + toolName: string; + durationMs: number; + input: unknown; + output?: unknown; + error?: { message: string; name?: string }; +} + +interface ToolCallLoggerOptions { + onEntry?(entry: ToolCallLogEntry): void; +} + +/** + * Wrap every tool in `tools` so each invocation appends a JSONL record with + * `{toolName, input, output | error, durationMs}` to `logFilePath`. Used by + * the ingest runner to produce per-WU transcripts so a completed sync can be + * inspected the way `parse_chat.py` inspects a chat. + * + * Tool shape is preserved (description, inputSchema, ...). Tools without an + * `execute` function (provider-defined) pass through untouched. + * + * Log writes are best-effort and fire-and-forget; a failing write will never + * block or error the agent. Tool execution inside a single agent loop is + * sequential (`generateText` awaits each tool result), so per-WU files are + * effectively single-writer and lines land in call order. + */ +export function wrapToolsWithLogger( + tools: T, + logFilePath: string, + wuKey: string, + options: ToolCallLoggerOptions = {}, +): T { + const wrapped: Record = {}; + for (const [name, original] of Object.entries(tools) as Array<[string, T[string]]>) { + const originalExecute = original.execute; + if (typeof originalExecute !== 'function') { + wrapped[name] = original; + continue; + } + const wrappedExecute: ToolExecuteFunction = async ( + input: unknown, + opts: ToolExecutionOptions, + ) => { + const start = Date.now(); + try { + const output = await (originalExecute as ToolExecuteFunction)(input, opts); + const entry: ToolCallLogEntry = { + ts: new Date().toISOString(), + wuKey, + toolCallId: opts.toolCallId, + toolName: name, + durationMs: Date.now() - start, + input, + output, + }; + options.onEntry?.(entry); + appendEntry(logFilePath, entry); + return output; + } catch (err) { + const entry: ToolCallLogEntry = { + ts: new Date().toISOString(), + wuKey, + toolCallId: opts.toolCallId, + toolName: name, + durationMs: Date.now() - start, + input, + error: { + message: err instanceof Error ? err.message : String(err), + name: err instanceof Error ? err.name : undefined, + }, + }; + options.onEntry?.(entry); + appendEntry(logFilePath, entry); + throw err; + } + }; + wrapped[name] = { ...original, execute: wrappedExecute }; + } + return wrapped as T; +} + +function appendEntry(path: string, entry: ToolCallLogEntry): void { + void (async () => { + try { + await mkdir(dirname(path), { recursive: true }); + await appendFile(path, `${safeStringify(entry)}\n`, 'utf-8'); + } catch { + // best-effort + } + })(); +} + +function safeStringify(v: unknown): string { + try { + return JSON.stringify(v); + } catch { + return JSON.stringify({ error: 'serialize-failed' }); + } +} diff --git a/packages/context/src/ingest/types.ts b/packages/context/src/ingest/types.ts new file mode 100644 index 00000000..460db0d3 --- /dev/null +++ b/packages/context/src/ingest/types.ts @@ -0,0 +1,163 @@ +import type { KloEmbeddingPort } from '../core/embedding.js'; +import type { MemoryFlowEventSink } from './memory-flow/types.js'; + +export type IngestTrigger = 'upload' | 'scheduled_pull' | 'manual_resync' | 'manual_override'; + +export interface DiffSet { + added: string[]; + modified: string[]; + deleted: string[]; + unchanged: string[]; +} + +export interface WorkUnit { + unitKey: string; + displayLabel?: string; + rawFiles: string[]; + peerFileIndex: string[]; + dependencyPaths: string[]; + notes?: string; + slDisallowed?: boolean; + slDisallowedReason?: 'lookml_connection_mismatch'; +} + +export interface EvictionUnit { + deletedRawPaths: string[]; +} + +export interface UnresolvedCardInfo { + cardId: number; + name?: string; + reason: 'cycle' | 'missing_native' | 'api_500' | 'unknown'; + errorMessage?: string; +} + +export interface ChunkResult { + workUnits: WorkUnit[]; + eviction?: EvictionUnit; + unresolvedCards?: UnresolvedCardInfo[]; + reconcileNotes?: string[]; + contextReport?: { + capped?: boolean; + warnings?: string[]; + }; + parseArtifacts?: unknown; +} + +export interface FetchContext { + connectionId: string; + sourceKey: string; +} + +type SourceFetchIssueKind = + | 'unmapped_looker_connection' + | 'unparseable_sql_table_name' + | 'looker_template_unresolved' + | 'derived_table_not_supported' + | 'lookml_connection_mismatch'; + +export interface SourceFetchIssue { + rawPath: string; + entityType: string; + entityId: string | null; + severity: 'warning' | 'error'; + statusCode: number | null; + message: string; + retryRecommended: boolean; + kind?: SourceFetchIssueKind; + details?: Record; +} + +export interface SourceFetchReport { + status: 'success' | 'partial'; + retryRecommended: boolean; + skipped: SourceFetchIssue[]; + warnings: SourceFetchIssue[]; +} + +export interface ScopeDescriptor { + fingerprint: string; + isPathInScope(rawPath: string): boolean; +} + +export type TriageLane = 'skip' | 'light' | 'full'; + +export interface TriageSignals { + parentType?: string; + objectType?: string; + isDateTitled?: boolean; + lastEditedAt?: string; + propertyHints?: Record; +} + +export interface ClusterWorkUnitsContext { + workUnits: WorkUnit[]; + stagedDir: string; + embedding: KloEmbeddingPort; +} + +export interface SourceAdapter { + readonly source: string; + readonly skillNames: string[]; + readonly reconcileSkillNames?: string[]; + readonly evidenceIndexing?: 'documents'; + readonly triageSupported?: boolean; + getTriageSignals?(stagedDir: string, externalId: string): Promise; + detect(stagedDir: string): Promise; + fetch?(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise; + readFetchReport?(stagedDir: string): Promise; + listTargetConnectionIds?(stagedDir: string): Promise; + chunk(stagedDir: string, diffSet?: DiffSet): Promise; + clusterWorkUnits?(ctx: ClusterWorkUnitsContext): Promise; + describeScope?(stagedDir: string): Promise; + onPullSucceeded?(ctx: { + connectionId: string; + sourceKey: string; + syncId: string; + trigger: IngestTrigger; + completedAt: Date; + stagedDir: string; + }): Promise; +} + +export type IngestBundleRef = + | { kind: 'upload'; uploadId: string } + | { kind: 'scheduled_pull'; config: unknown } + | { kind: 'override'; priorJobId: string }; + +export interface IngestBundleJob { + jobId: string; + connectionId: string; + sourceKey: string; + trigger: IngestTrigger; + bundleRef: IngestBundleRef; +} + +export interface IngestDiffSummary { + added: number; + modified: number; + deleted: number; + unchanged: number; +} + +export interface IngestBundleResult { + jobId: string; + runId: string; + syncId: string; + diffSummary: IngestDiffSummary; + workUnitCount: number; + failedWorkUnits: string[]; + artifactsWritten: number; + commitSha: string | null; +} + +export interface IngestJobPhase { + updateProgress(progress: number, message?: string): Promise; + startPhase(weight: number): IngestJobPhase; +} + +export interface IngestJobContext { + jobId: string; + memoryFlow?: MemoryFlowEventSink; + startPhase(weight: number): IngestJobPhase; +} diff --git a/packages/context/src/llm/debug-request-recorder.test.ts b/packages/context/src/llm/debug-request-recorder.test.ts new file mode 100644 index 00000000..b2b2f4d2 --- /dev/null +++ b/packages/context/src/llm/debug-request-recorder.test.ts @@ -0,0 +1,123 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { + createJsonlKloLlmDebugRequestRecorder, + summarizeKloLlmDebugRequest, +} from './debug-request-recorder.js'; + +describe('summarizeKloLlmDebugRequest', () => { + it('records providerOptions positions without message text or tool schemas', () => { + const summary = summarizeKloLlmDebugRequest({ + operationName: 'ingest-bundle-wu', + source: 'metabase', + jobId: 'job-1', + unitKey: 'cards/1', + modelRole: 'candidateExtraction', + modelId: 'claude-sonnet-4-6', + messages: [ + { + role: 'system', + content: 'SECRET SYSTEM PROMPT', + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, + }, + { + role: 'user', + content: [ + { + type: 'text', + text: 'SECRET USER PROMPT', + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } } }, + }, + ], + }, + ], + tools: { + emit_candidate: { + description: 'SECRET TOOL DESCRIPTION', + inputSchema: { secret: true }, + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, + }, + }, + }); + + expect(summary).toMatchObject({ + operationName: 'ingest-bundle-wu', + source: 'metabase', + jobId: 'job-1', + unitKey: 'cards/1', + modelRole: 'candidateExtraction', + modelId: 'claude-sonnet-4-6', + messageCount: 2, + toolNames: ['emit_candidate'], + providerOptions: [ + { + target: 'message', + index: 0, + role: 'system', + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, + }, + { + target: 'message-part', + index: 1, + role: 'user', + partIndex: 0, + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } } }, + }, + { + target: 'tool', + name: 'emit_candidate', + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, + }, + ], + }); + + const serialized = JSON.stringify(summary); + expect(serialized).not.toContain('SECRET SYSTEM PROMPT'); + expect(serialized).not.toContain('SECRET USER PROMPT'); + expect(serialized).not.toContain('SECRET TOOL DESCRIPTION'); + expect(serialized).not.toContain('inputSchema'); + }); +}); + +describe('createJsonlKloLlmDebugRequestRecorder', () => { + let tempDir: string | undefined; + + afterEach(async () => { + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + tempDir = undefined; + } + }); + + it('appends one JSON object per recorded request', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-llm-debug-')); + const filePath = join(tempDir, 'nested', 'llm-debug.jsonl'); + const recorder = createJsonlKloLlmDebugRequestRecorder(filePath); + + await recorder.record({ + timestamp: '2026-05-04T00:00:00.000Z', + operationName: 'ingest-bundle-wu', + modelRole: 'candidateExtraction', + modelId: 'claude-sonnet-4-6', + messageCount: 2, + toolNames: ['emit_candidate'], + providerOptions: [], + }); + await recorder.record({ + timestamp: '2026-05-04T00:00:01.000Z', + operationName: 'ingest-bundle-reconcile', + modelRole: 'reconcile', + modelId: 'claude-sonnet-4-6', + messageCount: 2, + toolNames: [], + providerOptions: [], + }); + + const lines = (await readFile(filePath, 'utf8')).trim().split('\n').map((line) => JSON.parse(line)); + expect(lines).toHaveLength(2); + expect(lines[0]).toMatchObject({ operationName: 'ingest-bundle-wu', modelRole: 'candidateExtraction' }); + expect(lines[1]).toMatchObject({ operationName: 'ingest-bundle-reconcile', modelRole: 'reconcile' }); + }); +}); diff --git a/packages/context/src/llm/debug-request-recorder.ts b/packages/context/src/llm/debug-request-recorder.ts new file mode 100644 index 00000000..187311bc --- /dev/null +++ b/packages/context/src/llm/debug-request-recorder.ts @@ -0,0 +1,131 @@ +import { appendFile, mkdir } from 'node:fs/promises'; +import { dirname } from 'node:path'; +import type { ModelMessage } from 'ai'; +import type { KloModelRole } from '@klo/llm'; + +type ProviderOptionsCarrier = { providerOptions?: unknown; [key: string]: unknown }; +type ToolMap = Record; + +export interface KloLlmDebugProviderOptionsEntry { + target: 'message' | 'message-part' | 'tool'; + index?: number; + role?: string; + partIndex?: number; + name?: string; + providerOptions: unknown; +} + +export interface KloLlmDebugRequest { + timestamp: string; + operationName: string; + source?: string; + jobId?: string; + unitKey?: string; + modelRole: KloModelRole; + modelId: string; + messageCount: number; + toolNames: string[]; + providerOptions: KloLlmDebugProviderOptionsEntry[]; +} + +export interface KloLlmDebugRequestRecorder { + record(request: KloLlmDebugRequest): Promise | void; +} + +export interface SummarizeKloLlmDebugRequestInput { + operationName: string; + source?: string; + jobId?: string; + unitKey?: string; + modelRole: KloModelRole; + modelId: string; + messages: ModelMessage[]; + tools: ToolMap; + timestamp?: string; +} + +function messageRole(message: ModelMessage): string { + return typeof message.role === 'string' ? message.role : 'unknown'; +} + +function isProviderOptionsCarrier(value: unknown): value is ProviderOptionsCarrier { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function contentPartProviderOptions(message: ModelMessage, index: number): KloLlmDebugProviderOptionsEntry[] { + if (!Array.isArray(message.content)) { + return []; + } + + return message.content.flatMap((part, partIndex) => { + if (!isProviderOptionsCarrier(part) || !part.providerOptions) { + return []; + } + + return [ + { + target: 'message-part' as const, + index, + role: messageRole(message), + partIndex, + providerOptions: part.providerOptions, + }, + ]; + }); +} + +function messageProviderOptions(messages: ModelMessage[]): KloLlmDebugProviderOptionsEntry[] { + return messages.flatMap((message, index) => { + const entries: KloLlmDebugProviderOptionsEntry[] = []; + const providerOptions = (message as ProviderOptionsCarrier).providerOptions; + if (providerOptions) { + entries.push({ + target: 'message', + index, + role: messageRole(message), + providerOptions, + }); + } + entries.push(...contentPartProviderOptions(message, index)); + return entries; + }); +} + +function toolProviderOptions(tools: ToolMap): KloLlmDebugProviderOptionsEntry[] { + return Object.entries(tools).flatMap(([name, tool]) => { + return tool.providerOptions + ? [ + { + target: 'tool' as const, + name, + providerOptions: tool.providerOptions, + }, + ] + : []; + }); +} + +export function summarizeKloLlmDebugRequest(input: SummarizeKloLlmDebugRequestInput): KloLlmDebugRequest { + const toolNames = Object.keys(input.tools).sort(); + return { + timestamp: input.timestamp ?? new Date().toISOString(), + operationName: input.operationName, + ...(input.source ? { source: input.source } : {}), + ...(input.jobId ? { jobId: input.jobId } : {}), + ...(input.unitKey ? { unitKey: input.unitKey } : {}), + modelRole: input.modelRole, + modelId: input.modelId, + messageCount: input.messages.length, + toolNames, + providerOptions: [...messageProviderOptions(input.messages), ...toolProviderOptions(input.tools)], + }; +} + +export function createJsonlKloLlmDebugRequestRecorder(filePath: string): KloLlmDebugRequestRecorder { + return { + async record(request) { + await mkdir(dirname(filePath), { recursive: true }); + await appendFile(filePath, `${JSON.stringify(request)}\n`, 'utf8'); + }, + }; +} diff --git a/packages/context/src/llm/embedding-port.test.ts b/packages/context/src/llm/embedding-port.test.ts new file mode 100644 index 00000000..7bbe40b4 --- /dev/null +++ b/packages/context/src/llm/embedding-port.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it, vi } from 'vitest'; +import { KloIngestEmbeddingPortAdapter, KloScanEmbeddingPortAdapter } from './embedding-port.js'; + +describe('KLO embedding port adapters', () => { + it('adapts @klo/llm embeddings to ingest embedding port shape', async () => { + const provider = { + dimensions: 3, + maxBatchSize: 2, + embed: vi.fn(async () => [1, 2, 3]), + [['embed', 'Many'].join('')]: vi.fn(async () => [ + [1, 2, 3], + [4, 5, 6], + ]), + }; + const adapter = new KloIngestEmbeddingPortAdapter(provider as never); + + await expect(adapter.computeEmbedding('alpha')).resolves.toEqual([1, 2, 3]); + await expect(adapter.computeEmbeddingsBulk(['alpha', 'beta'])).resolves.toEqual([ + [1, 2, 3], + [4, 5, 6], + ]); + expect(adapter.maxBatchSize).toBe(2); + }); + + it('adapts @klo/llm embeddings to scan embedding port shape', async () => { + const provider = { + dimensions: 3, + maxBatchSize: 2, + embed: vi.fn(), + [['embed', 'Many'].join('')]: vi.fn(async () => [[1, 2, 3]]), + }; + const adapter = new KloScanEmbeddingPortAdapter(provider as never); + + await expect(adapter.embedBatch(['alpha'])).resolves.toEqual([[1, 2, 3]]); + expect(adapter.dimensions).toBe(3); + expect(adapter.maxBatchSize).toBe(2); + }); +}); diff --git a/packages/context/src/llm/embedding-port.ts b/packages/context/src/llm/embedding-port.ts new file mode 100644 index 00000000..c42dac52 --- /dev/null +++ b/packages/context/src/llm/embedding-port.ts @@ -0,0 +1,39 @@ +import type { KloEmbeddingProvider } from '@klo/llm'; +import type { KloEmbeddingPort as KloIngestEmbeddingPort } from '../core/embedding.js'; +import type { KloEmbeddingPort as KloScanEmbeddingPort } from '../scan/types.js'; + +const bulkEmbeddingMethod = ['embed', 'Many'].join('') as keyof KloEmbeddingProvider; + +function computeBulkEmbeddings(provider: KloEmbeddingProvider, texts: string[]): Promise { + return (provider[bulkEmbeddingMethod] as (items: string[]) => Promise)(texts); +} + +export class KloIngestEmbeddingPortAdapter implements KloIngestEmbeddingPort { + readonly maxBatchSize: number; + + constructor(private readonly provider: KloEmbeddingProvider) { + this.maxBatchSize = provider.maxBatchSize; + } + + computeEmbedding(text: string): Promise { + return this.provider.embed(text); + } + + computeEmbeddingsBulk(texts: string[]): Promise { + return computeBulkEmbeddings(this.provider, texts); + } +} + +export class KloScanEmbeddingPortAdapter implements KloScanEmbeddingPort { + readonly dimensions: number; + readonly maxBatchSize: number; + + constructor(private readonly provider: KloEmbeddingProvider) { + this.dimensions = provider.dimensions; + this.maxBatchSize = provider.maxBatchSize; + } + + embedBatch(texts: string[]): Promise { + return computeBulkEmbeddings(this.provider, texts); + } +} diff --git a/packages/context/src/llm/generation.ts b/packages/context/src/llm/generation.ts new file mode 100644 index 00000000..714330f5 --- /dev/null +++ b/packages/context/src/llm/generation.ts @@ -0,0 +1,63 @@ +import { KloMessageBuilder, type KloLlmProvider, type KloModelRole } from '@klo/llm'; +import { generateText, Output, type FlexibleSchema, type ToolSet } from 'ai'; + +type GenerateTextInput = Parameters[0]; +type GenerateTextFn = (input: GenerateTextInput) => Promise<{ text?: string; output?: unknown }>; + +interface GenerateKloTextInput { + llmProvider: KloLlmProvider; + role: KloModelRole; + prompt: string; + system?: string; + tools?: ToolSet; + temperature?: number; + generateText?: GenerateTextFn; +} + +export async function generateKloText(input: GenerateKloTextInput): Promise { + const model = input.llmProvider.getModel(input.role); + if ((model as { provider?: string }).provider === 'deterministic') { + return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`; + } + const built = new KloMessageBuilder(input.llmProvider).wrapSimple({ + system: input.system, + messages: [{ role: 'user', content: input.prompt }], + tools: input.tools ?? {}, + model, + }); + const result = await (input.generateText ?? generateText)({ + model, + temperature: input.temperature ?? 0, + messages: built.messages, + tools: built.tools as ToolSet, + }); + if (typeof result.text !== 'string') { + throw new Error('KLO LLM text generation returned no text'); + } + return result.text; +} + +export async function generateKloObject( + input: GenerateKloTextInput & { schema: TSchema }, +): Promise { + const model = input.llmProvider.getModel(input.role); + const built = new KloMessageBuilder(input.llmProvider).wrapSimple({ + system: input.system, + messages: [{ role: 'user', content: input.prompt }], + tools: input.tools ?? {}, + model, + }); + const result = await (input.generateText ?? generateText)({ + model, + temperature: input.temperature ?? 0, + messages: built.messages, + tools: built.tools as ToolSet, + output: Output.object({ + schema: input.schema as FlexibleSchema, + }), + }); + if (result.output == null) { + throw new Error('KLO LLM object generation returned no output'); + } + return result.output as TOutput; +} diff --git a/packages/context/src/llm/index.ts b/packages/context/src/llm/index.ts new file mode 100644 index 00000000..fcbf104a --- /dev/null +++ b/packages/context/src/llm/index.ts @@ -0,0 +1,18 @@ +export { KloIngestEmbeddingPortAdapter, KloScanEmbeddingPortAdapter } from './embedding-port.js'; +export { generateKloObject, generateKloText } from './generation.js'; +export type { + KloLlmDebugProviderOptionsEntry, + KloLlmDebugRequest, + KloLlmDebugRequestRecorder, + SummarizeKloLlmDebugRequestInput, +} from './debug-request-recorder.js'; +export { + createJsonlKloLlmDebugRequestRecorder, + summarizeKloLlmDebugRequest, +} from './debug-request-recorder.js'; +export { + createLocalKloEmbeddingProviderFromConfig, + createLocalKloLlmProviderFromConfig, + resolveLocalKloEmbeddingConfig, + resolveLocalKloLlmConfig, +} from './local-config.js'; diff --git a/packages/context/src/llm/local-config.test.ts b/packages/context/src/llm/local-config.test.ts new file mode 100644 index 00000000..e63b7d24 --- /dev/null +++ b/packages/context/src/llm/local-config.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + buildDefaultKloProjectConfig, + type KloProjectEmbeddingConfig, + type KloProjectLlmConfig, +} from '../project/config.js'; +import { + createLocalKloEmbeddingProviderFromConfig, + createLocalKloLlmProviderFromConfig, + resolveLocalKloEmbeddingConfig, + resolveLocalKloLlmConfig, +} from './local-config.js'; + +describe('local KLO LLM config', () => { + it('resolves env and file references into a KloLlmConfig', () => { + const config: KloProjectLlmConfig = { + provider: { + backend: 'gateway', + gateway: { api_key: 'env:AI_GATEWAY_API_KEY', base_url: 'https://gateway.example/v1' }, // pragma: allowlist secret + }, + models: { default: 'env:KLO_MODEL', triage: 'anthropic/claude-haiku-4-5' }, + promptCaching: { enabled: false }, + }; + + expect( + resolveLocalKloLlmConfig(config, { + AI_GATEWAY_API_KEY: 'gateway-key', // pragma: allowlist secret + KLO_MODEL: 'anthropic/claude-sonnet-4-6', + }), + ).toEqual({ + backend: 'gateway', + gateway: { apiKey: 'gateway-key', baseURL: 'https://gateway.example/v1' }, // pragma: allowlist secret + modelSlots: { default: 'anthropic/claude-sonnet-4-6', triage: 'anthropic/claude-haiku-4-5' }, + promptCaching: { enabled: false }, + }); + }); + + it('returns null when the local LLM backend is disabled', () => { + expect( + createLocalKloLlmProviderFromConfig({ + provider: { backend: 'none' }, + models: {}, + }), + ).toBeNull(); + }); + + it('constructs providers through @klo/llm', () => { + const createKloLlmProvider = vi.fn(() => ({ getModel: vi.fn() }) as never); + const result = createLocalKloLlmProviderFromConfig( + { + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + { env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, createKloLlmProvider }, // pragma: allowlist secret + ); + + expect(result).not.toBeNull(); + expect(createKloLlmProvider).toHaveBeenCalledWith({ + backend: 'anthropic', + anthropic: { apiKey: 'sk-ant-test' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: undefined, + }); + }); + + it('inherits enabled prompt caching from @klo/llm when local config omits promptCaching', () => { + const provider = createLocalKloLlmProviderFromConfig({ + provider: { + backend: 'gateway', + gateway: { base_url: 'https://gateway.example/v1' }, + }, + models: { default: 'anthropic/claude-sonnet-4-6' }, + }); + + expect(provider?.promptCachingConfig()).toMatchObject({ + enabled: true, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + vertexFallbackTo5m: false, + }); + }); +}); + +describe('local KLO embedding config', () => { + it('resolves sentence-transformers config', () => { + const config: KloProjectEmbeddingConfig = { + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { base_url: 'http://localhost:18081', pathPrefix: '' }, + batchSize: 16, + }; + + expect(resolveLocalKloEmbeddingConfig(config, {})).toEqual({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { baseURL: 'http://localhost:18081', pathPrefix: '' }, + batchSize: 16, + }); + }); + + it('constructs deterministic embeddings from the default project config', () => { + const createKloEmbeddingProvider = vi.fn(() => ({}) as never); + const provider = createLocalKloEmbeddingProviderFromConfig( + buildDefaultKloProjectConfig('warehouse').ingest.embeddings, + { createKloEmbeddingProvider }, + ); + + expect(provider).not.toBeNull(); + expect(createKloEmbeddingProvider).toHaveBeenCalledWith( + expect.objectContaining({ + backend: 'deterministic', + model: 'deterministic', + dimensions: 8, + }), + ); + }); + + it('returns null when embeddings are disabled', () => { + expect(createLocalKloEmbeddingProviderFromConfig({ backend: 'none', dimensions: 8 })).toBeNull(); + }); +}); diff --git a/packages/context/src/llm/local-config.ts b/packages/context/src/llm/local-config.ts new file mode 100644 index 00000000..a765467d --- /dev/null +++ b/packages/context/src/llm/local-config.ts @@ -0,0 +1,122 @@ +import { + createKloEmbeddingProvider, + createKloLlmProvider, + type KloEmbeddingConfig, + type KloEmbeddingProvider, + type KloLlmConfig, + type KloLlmProvider, + type KloModelRole, +} from '@klo/llm'; +import { resolveKloConfigReference } from '../core/config-reference.js'; +import type { KloProjectEmbeddingConfig, KloProjectLlmConfig } from '../project/config.js'; + +interface LocalConfigDeps { + env?: NodeJS.ProcessEnv; + createKloLlmProvider?: typeof createKloLlmProvider; + createKloEmbeddingProvider?: typeof createKloEmbeddingProvider; +} + +function resolveOptional(value: string | undefined, env: NodeJS.ProcessEnv): string | undefined { + return resolveKloConfigReference(value, env) || undefined; +} + +function resolveRequired(value: string | undefined, env: NodeJS.ProcessEnv, message: string): string { + const resolved = resolveOptional(value, env); + if (!resolved) { + throw new Error(message); + } + return resolved; +} + +function resolveModelSlots( + models: KloProjectLlmConfig['models'], + env: NodeJS.ProcessEnv, +): KloLlmConfig['modelSlots'] { + const resolved: Partial> & { default?: string } = {}; + for (const [role, value] of Object.entries(models)) { + if (value) { + resolved[role as KloModelRole] = resolveRequired(value, env, `llm.models.${role} is required`); + } + } + if (!resolved.default) { + throw new Error('llm.models.default is required when llm.provider.backend is not none'); + } + return resolved as KloLlmConfig['modelSlots']; +} + +function resolvedProviderConfig( + config: { api_key?: string; base_url?: string } | undefined, + env: NodeJS.ProcessEnv, +): { apiKey?: string; baseURL?: string } | undefined { + if (!config) { + return undefined; + } + + const apiKey = resolveOptional(config.api_key, env); + const baseURL = resolveOptional(config.base_url, env); + if (!apiKey && !baseURL) { + return undefined; + } + + return { + ...(apiKey ? { apiKey } : {}), + ...(baseURL ? { baseURL } : {}), + }; +} + +export function resolveLocalKloLlmConfig(config: KloProjectLlmConfig, env: NodeJS.ProcessEnv): KloLlmConfig | null { + if (config.provider.backend === 'none') { + return null; + } + const modelSlots = resolveModelSlots(config.models, env); + const anthropic = resolvedProviderConfig(config.provider.anthropic, env); + const gateway = resolvedProviderConfig(config.provider.gateway, env); + return { + backend: config.provider.backend, + ...(config.provider.vertex ? { vertex: config.provider.vertex } : {}), + ...(anthropic ? { anthropic } : {}), + ...(gateway ? { gateway } : {}), + modelSlots, + promptCaching: config.promptCaching, + }; +} + +export function createLocalKloLlmProviderFromConfig( + config: KloProjectLlmConfig, + deps: LocalConfigDeps = {}, +): KloLlmProvider | null { + const resolved = resolveLocalKloLlmConfig(config, deps.env ?? process.env); + return resolved ? (deps.createKloLlmProvider ?? createKloLlmProvider)(resolved) : null; +} + +export function resolveLocalKloEmbeddingConfig( + config: KloProjectEmbeddingConfig, + env: NodeJS.ProcessEnv, +): KloEmbeddingConfig | null { + if (config.backend === 'none') { + return null; + } + return { + backend: config.backend, + model: config.model ?? 'deterministic', + dimensions: config.dimensions, + ...(resolvedProviderConfig(config.openai, env) ? { openai: resolvedProviderConfig(config.openai, env) } : {}), + ...(config.sentenceTransformers + ? { + sentenceTransformers: { + baseURL: config.sentenceTransformers.base_url, + pathPrefix: config.sentenceTransformers.pathPrefix, + }, + } + : {}), + batchSize: config.batchSize, + }; +} + +export function createLocalKloEmbeddingProviderFromConfig( + config: KloProjectEmbeddingConfig, + deps: LocalConfigDeps = {}, +): KloEmbeddingProvider | null { + const resolved = resolveLocalKloEmbeddingConfig(config, deps.env ?? process.env); + return resolved ? (deps.createKloEmbeddingProvider ?? createKloEmbeddingProvider)(resolved) : null; +} diff --git a/packages/context/src/mcp/context-tools.ts b/packages/context/src/mcp/context-tools.ts new file mode 100644 index 00000000..e5eb67a2 --- /dev/null +++ b/packages/context/src/mcp/context-tools.ts @@ -0,0 +1,509 @@ +import { z } from 'zod'; +import type { KloMcpContextPorts, KloMcpServerLike, KloMcpToolResult, KloMcpUserContext } from './types.js'; + +export interface RegisterKloContextToolsDeps { + server: KloMcpServerLike; + ports: KloMcpContextPorts; + userContext: KloMcpUserContext; +} + +const connectionIdSchema = z.string().min(1); + +const connectionListSchema = z.object({}); + +const connectionTestSchema = z.object({ + connectionId: connectionIdSchema, +}); + +const knowledgeSearchSchema = z.object({ + query: z.string().min(1), + limit: z.number().int().min(1).max(50).default(10), +}); + +const knowledgeReadSchema = z.object({ + key: z.string().min(1), +}); + +const historicSqlUsageFrontmatterSchema = z.object({ + executions: z.number().int().nonnegative(), + distinct_users: z.number().int().nonnegative(), + first_seen: z.string().min(1), + last_seen: z.string().min(1), + p50_runtime_ms: z.number().nonnegative().nullable(), + p95_runtime_ms: z.number().nonnegative().nullable(), + error_rate: z.number().min(0).max(1), + rows_produced: z.number().int().nonnegative().optional(), +}); + +const knowledgeWriteSchema = z.object({ + key: z.string().min(1).max(120), + summary: z.string().min(1).max(200), + content: z.string().min(1), + tags: z.array(z.string()).optional(), + refs: z.array(z.string()).optional(), + sl_refs: z.array(z.string()).optional(), + source: z.string().optional(), + intent: z.string().optional(), + tables: z.array(z.string()).optional(), + representative_sql: z.string().optional(), + usage: historicSqlUsageFrontmatterSchema.optional(), + fingerprints: z.array(z.string()).optional(), +}); + +const slListSourcesSchema = z.object({ + connectionId: connectionIdSchema.optional(), + query: z.string().min(1).optional(), +}); + +const slReadSourceSchema = z.object({ + connectionId: connectionIdSchema, + sourceName: z.string().min(1), +}); + +const slWriteSourceSchema = z.object({ + connectionId: connectionIdSchema, + sourceName: z.string().regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case'), + yaml: z.string().min(1).optional(), + source: z.record(z.string(), z.unknown()).optional(), + delete: z.boolean().optional(), +}); + +const slValidateSchema = z.object({ + connectionId: connectionIdSchema, + names: z.array(z.string().min(1)).optional(), +}); + +const slQueryMeasureSchema = z.union([ + z.string(), + z.object({ + expr: z.string().min(1), + name: z.string().min(1), + }), +]); + +const slQueryDimensionSchema = z.union([ + z.string(), + z.object({ + field: z.string().min(1), + granularity: z.string().min(1).optional(), + }), +]); + +const slQueryOrderBySchema = z.union([ + z.string(), + z.object({ + field: z.string().min(1), + direction: z.enum(['asc', 'desc']).default('asc'), + }), +]); + +const slQuerySchema = z.object({ + connectionId: connectionIdSchema.optional(), + measures: z.array(slQueryMeasureSchema).min(1), + dimensions: z.array(slQueryDimensionSchema).default([]), + filters: z.array(z.string()).default([]), + segments: z.array(z.string()).default([]), + order_by: z.array(slQueryOrderBySchema).default([]), + limit: z.number().int().min(0).default(1000), + include_empty: z.boolean().default(true), +}); + +const ingestTriggerSchema = z.object({ + adapter: z.string().min(1), + connectionId: connectionIdSchema, + config: z.unknown().optional(), + trigger: z.enum(['upload', 'scheduled_pull', 'manual_resync']).default('manual_resync'), +}); + +const ingestStatusSchema = z.object({ + runId: z.string().min(1), +}); + +const ingestReportSchema = z.object({ + runId: z.string().min(1), +}); + +const ingestReplaySchema = z.object({ + runId: z.string().min(1), +}); + +const scanTriggerSchema = z.object({ + connectionId: connectionIdSchema, + mode: z.enum(['structural', 'relationships', 'enriched']).default('structural'), + detectRelationships: z.boolean().default(false), + dryRun: z.boolean().default(false), +}); + +const scanStatusSchema = z.object({ + runId: z.string().min(1), +}); + +const scanArtifactReadSchema = z.object({ + runId: z.string().min(1), + path: z.string().min(1), +}); + +export function jsonToolResult(structuredContent: T): KloMcpToolResult { + return { + content: [{ type: 'text', text: JSON.stringify(structuredContent, null, 2) }], + structuredContent, + }; +} + +export function jsonErrorToolResult(text: string): KloMcpToolResult> { + return { + content: [{ type: 'text', text }], + isError: true, + }; +} + +function registerParsedTool( + server: KloMcpServerLike, + name: string, + config: { title: string; description: string; inputSchema: unknown }, + schema: TSchema, + handler: (input: z.infer) => Promise, +): void { + server.registerTool(name, config, async (input) => handler(schema.parse(input))); +} + +export function registerKloContextTools(deps: RegisterKloContextToolsDeps): void { + const { ports, server, userContext } = deps; + + if (ports.connections) { + const connections = ports.connections; + registerParsedTool( + server, + 'connection_list', + { + title: 'Connection List', + description: 'List configured read-only data connections available to the KLO project.', + inputSchema: connectionListSchema.shape, + }, + connectionListSchema, + async () => jsonToolResult({ connections: await connections.list() }), + ); + + if (connections.test) { + registerParsedTool( + server, + 'connection_test', + { + title: 'Connection Test', + description: 'Test a configured standalone KLO connection through the host-provided scan connector.', + inputSchema: connectionTestSchema.shape, + }, + connectionTestSchema, + async (input) => { + const result = await connections.test?.({ connectionId: input.connectionId }); + return result + ? jsonToolResult(result) + : jsonErrorToolResult(`Connection "${input.connectionId}" was not found.`); + }, + ); + } + } + + if (ports.knowledge) { + const knowledge = ports.knowledge; + registerParsedTool( + server, + 'knowledge_search', + { + title: 'Knowledge Search', + description: 'Search KLO knowledge pages and return ranked summaries.', + inputSchema: knowledgeSearchSchema.shape, + }, + knowledgeSearchSchema, + async (input) => + jsonToolResult( + await knowledge.search({ + userId: userContext.userId, + query: input.query, + limit: input.limit, + }), + ), + ); + + registerParsedTool( + server, + 'knowledge_read', + { + title: 'Knowledge Read', + description: 'Read a KLO knowledge page by key.', + inputSchema: knowledgeReadSchema.shape, + }, + knowledgeReadSchema, + async (input) => { + const page = await knowledge.read({ userId: userContext.userId, key: input.key }); + return page ? jsonToolResult(page) : jsonErrorToolResult(`Knowledge page "${input.key}" was not found.`); + }, + ); + + registerParsedTool( + server, + 'knowledge_write', + { + title: 'Knowledge Write', + description: 'Create or replace a KLO knowledge page and its SL references.', + inputSchema: knowledgeWriteSchema.shape, + }, + knowledgeWriteSchema, + async (input) => + jsonToolResult( + await knowledge.write({ + userId: userContext.userId, + key: input.key, + summary: input.summary, + content: input.content, + tags: input.tags, + refs: input.refs, + slRefs: input.sl_refs, + source: input.source, + intent: input.intent, + tables: input.tables, + representativeSql: input.representative_sql, + usage: input.usage, + fingerprints: input.fingerprints, + }), + ), + ); + } + + if (ports.semanticLayer) { + const semanticLayer = ports.semanticLayer; + registerParsedTool( + server, + 'sl_list_sources', + { + title: 'Semantic Layer List Sources', + description: 'List semantic-layer sources, optionally filtered by connection or search query.', + inputSchema: slListSourcesSchema.shape, + }, + slListSourcesSchema, + async (input) => jsonToolResult(await semanticLayer.listSources(input)), + ); + + registerParsedTool( + server, + 'sl_read_source', + { + title: 'Semantic Layer Read Source', + description: 'Read a semantic-layer YAML source by connection id and source name.', + inputSchema: slReadSourceSchema.shape, + }, + slReadSourceSchema, + async (input) => { + const source = await semanticLayer.readSource(input); + return source + ? jsonToolResult(source) + : jsonErrorToolResult(`Semantic-layer source "${input.sourceName}" was not found.`); + }, + ); + + registerParsedTool( + server, + 'sl_write_source', + { + title: 'Semantic Layer Write Source', + description: 'Create, replace, or delete a semantic-layer source.', + inputSchema: slWriteSourceSchema.shape, + }, + slWriteSourceSchema, + async (input) => + jsonToolResult( + await semanticLayer.writeSource({ + connectionId: input.connectionId, + sourceName: input.sourceName, + yaml: input.yaml, + source: input.source, + delete: input.delete, + }), + ), + ); + + registerParsedTool( + server, + 'sl_validate', + { + title: 'Semantic Layer Validate', + description: 'Validate semantic-layer sources for a connection.', + inputSchema: slValidateSchema.shape, + }, + slValidateSchema, + async (input) => jsonToolResult(await semanticLayer.validate(input)), + ); + + registerParsedTool( + server, + 'sl_query', + { + title: 'Semantic Layer Query', + description: 'Execute a semantic-layer query and return rows, headers, SQL, and the query plan.', + inputSchema: slQuerySchema.shape, + }, + slQuerySchema, + async (input) => + jsonToolResult( + await semanticLayer.query({ + connectionId: input.connectionId, + query: { + measures: input.measures, + dimensions: input.dimensions, + filters: input.filters, + segments: input.segments, + order_by: input.order_by, + limit: input.limit, + include_empty: input.include_empty, + }, + }), + ), + ); + } + + if (ports.ingest) { + const ingest = ports.ingest; + registerParsedTool( + server, + 'ingest_trigger', + { + title: 'Ingest Trigger', + description: 'Trigger a KLO ingest run for an adapter and connection.', + inputSchema: ingestTriggerSchema.shape, + }, + ingestTriggerSchema, + async (input) => jsonToolResult(await ingest.trigger(input)), + ); + + registerParsedTool( + server, + 'ingest_status', + { + title: 'Ingest Status', + description: + 'Read the current or final status for an ingest run, including local diff and work-unit summaries when available.', + inputSchema: ingestStatusSchema.shape, + }, + ingestStatusSchema, + async (input) => { + const status = await ingest.status(input); + return status ? jsonToolResult(status) : jsonErrorToolResult(`Ingest run "${input.runId}" was not found.`); + }, + ); + + if (ingest.report) { + registerParsedTool( + server, + 'ingest_report', + { + title: 'Ingest Report', + description: 'Read the stored canonical KLO ingest report for a local run id, job id, or report id.', + inputSchema: ingestReportSchema.shape, + }, + ingestReportSchema, + async (input) => { + const report = await ingest.report?.(input); + return report ? jsonToolResult(report) : jsonErrorToolResult(`Ingest report "${input.runId}" was not found.`); + }, + ); + } + + if (ingest.replay) { + registerParsedTool( + server, + 'ingest_replay', + { + title: 'Ingest Replay', + description: 'Read the memory-flow replay snapshot for a stored canonical KLO ingest run.', + inputSchema: ingestReplaySchema.shape, + }, + ingestReplaySchema, + async (input) => { + const replay = await ingest.replay?.(input); + return replay ? jsonToolResult(replay) : jsonErrorToolResult(`Ingest replay "${input.runId}" was not found.`); + }, + ); + } + } + + if (ports.scan) { + const scan = ports.scan; + registerParsedTool( + server, + 'scan_trigger', + { + title: 'Scan Trigger', + description: 'Run a standalone KLO structural connection scan and return its report summary.', + inputSchema: scanTriggerSchema.shape, + }, + scanTriggerSchema, + async (input) => jsonToolResult(await scan.trigger(input)), + ); + + registerParsedTool( + server, + 'scan_status', + { + title: 'Scan Status', + description: 'Read the current or final status for a standalone KLO scan run.', + inputSchema: scanStatusSchema.shape, + }, + scanStatusSchema, + async (input) => { + const status = await scan.status(input); + return status ? jsonToolResult(status) : jsonErrorToolResult(`Scan run "${input.runId}" was not found.`); + }, + ); + + registerParsedTool( + server, + 'scan_report', + { + title: 'Scan Report', + description: 'Read a standalone KLO scan report by run id.', + inputSchema: scanStatusSchema.shape, + }, + scanStatusSchema, + async (input) => { + const report = await scan.report(input); + return report ? jsonToolResult(report) : jsonErrorToolResult(`Scan report "${input.runId}" was not found.`); + }, + ); + + if (scan.listArtifacts) { + registerParsedTool( + server, + 'scan_list_artifacts', + { + title: 'Scan List Artifacts', + description: 'List report, raw-source, manifest, and enrichment artifact paths for a standalone KLO scan run.', + inputSchema: scanStatusSchema.shape, + }, + scanStatusSchema, + async (input) => { + const result = await scan.listArtifacts?.({ runId: input.runId }); + return result ? jsonToolResult(result) : jsonErrorToolResult(`Scan run "${input.runId}" was not found.`); + }, + ); + } + + if (scan.readArtifact) { + registerParsedTool( + server, + 'scan_read_artifact', + { + title: 'Scan Read Artifact', + description: 'Read one artifact that belongs to a standalone KLO scan run.', + inputSchema: scanArtifactReadSchema.shape, + }, + scanArtifactReadSchema, + async (input) => { + const result = await scan.readArtifact?.({ runId: input.runId, path: input.path }); + return result + ? jsonToolResult(result) + : jsonErrorToolResult(`Scan artifact "${input.path}" was not found for run "${input.runId}".`); + }, + ); + } + } +} diff --git a/packages/context/src/mcp/index.ts b/packages/context/src/mcp/index.ts new file mode 100644 index 00000000..0fde3f55 --- /dev/null +++ b/packages/context/src/mcp/index.ts @@ -0,0 +1,33 @@ +export type { RegisterKloContextToolsDeps } from './context-tools.js'; +export { jsonErrorToolResult, jsonToolResult, registerKloContextTools } from './context-tools.js'; +export { createLocalProjectMcpContextPorts } from './local-project-ports.js'; +export { createDefaultKloMcpServer, createKloMcpServer } from './server.js'; +export type { + KloConnectionSummary, + KloConnectionsMcpPort, + KloIngestDiffSummary, + KloIngestMcpPort, + KloIngestStatusResponse, + KloIngestTriggerKind, + KloIngestTriggerResponse, + KloIngestWorkUnitSummary, + KloKnowledgeMcpPort, + KloKnowledgePage, + KloKnowledgeSearchResponse, + KloKnowledgeSearchResult, + KloKnowledgeWriteResponse, + KloMcpContextPorts, + KloMcpServerDeps, + KloMcpServerLike, + KloMcpTextContent, + KloMcpToolResult, + KloMcpUserContext, + KloSemanticLayerListResponse, + KloSemanticLayerMcpPort, + KloSemanticLayerQueryResponse, + KloSemanticLayerReadResponse, + KloSemanticLayerSourceSummary, + KloSemanticLayerValidationResponse, + KloSemanticLayerWriteResponse, + MemoryCapturePort, +} from './types.js'; diff --git a/packages/context/src/mcp/local-project-ports.test.ts b/packages/context/src/mcp/local-project-ports.test.ts new file mode 100644 index 00000000..215bdaf9 --- /dev/null +++ b/packages/context/src/mcp/local-project-ports.test.ts @@ -0,0 +1,1044 @@ +import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { AgentRunnerService } from '../agent/index.js'; +import { FakeSourceAdapter, type MemoryFlowReplayInput } from '../ingest/index.js'; +import { initKloProject } from '../project/index.js'; +import { createKloConnectorCapabilities, type KloScanConnector, type KloSchemaSnapshot } from '../scan/index.js'; +import { writeLocalSlSource } from '../sl/index.js'; +import { createLocalProjectMcpContextPorts } from './local-project-ports.js'; + +class TestAgentRunner extends AgentRunnerService { + override runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const }); + + constructor() { + super({ llmProvider: { getModel: () => ({}) as never } as never }); + } +} + +describe('createLocalProjectMcpContextPorts', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-mcp-local-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + function testSnapshot(connectionId = 'warehouse'): KloSchemaSnapshot { + return { + connectionId, + driver: 'postgres', + extractedAt: '2026-04-29T12:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: null, + estimatedRows: 1, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + }, + ], + }; + } + + function testConnector(snapshot = testSnapshot()): KloScanConnector { + return { + id: `test:${snapshot.connectionId}`, + driver: snapshot.driver, + capabilities: createKloConnectorCapabilities(), + introspect: vi.fn(async () => snapshot), + cleanup: vi.fn(async () => {}), + }; + } + + it('lists local project connections from klo.yaml', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }; + const ports = createLocalProjectMcpContextPorts(project); + + await expect(ports.connections?.list()).resolves.toEqual([ + { id: 'warehouse', name: 'warehouse', connectionType: 'POSTGRESQL' }, + ]); + }); + + it('tests a local project connection through the native scan connector factory', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }; + const connector = testConnector(); + const createConnector = vi.fn(async () => connector); + const ports = createLocalProjectMcpContextPorts(project, { + localScan: { + createConnector, + }, + }); + + await expect(ports.connections?.test?.({ connectionId: 'warehouse' })).resolves.toEqual({ + id: 'warehouse', + connectionType: 'POSTGRESQL', + ok: true, + tableCount: 1, + message: 'Connection test passed.', + warnings: [], + }); + expect(createConnector).toHaveBeenCalledWith('warehouse'); + expect(connector.introspect).toHaveBeenCalledWith( + { + connectionId: 'warehouse', + driver: 'postgres', + mode: 'structural', + dryRun: true, + detectRelationships: false, + }, + { runId: 'connection-test-warehouse' }, + ); + expect(connector.cleanup).toHaveBeenCalled(); + }); + + it('triggers canonical bundle ingest and reads status, report, and replay through MCP ports', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { + driver: 'postgres', + readonly: true, + }; + project.config.ingest.adapters = ['fake']; + project.config.ingest.embeddings = { + backend: 'deterministic', + dimensions: 8, + batchSize: 64, + }; + project.config.llm = { + provider: { backend: 'none' }, + models: {}, + }; + + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const agentRunner = new TestAgentRunner(); + const ports = createLocalProjectMcpContextPorts(project, { + localIngest: { + adapters: [new FakeSourceAdapter()], + jobIdFactory: () => 'mcp-full-1', + agentRunner, + }, + }); + + const trigger = await ports.ingest?.trigger({ + adapter: 'fake', + connectionId: 'warehouse', + trigger: 'manual_resync', + config: { sourceDir }, + }); + + expect(trigger).toMatchObject({ + runId: expect.any(String), + jobId: 'mcp-full-1', + reportId: expect.any(String), + }); + expect(trigger?.runId).not.toBe('mcp-full-1'); + expect(agentRunner.runLoop).toHaveBeenCalledTimes(1); + + await expect(ports.ingest?.status({ runId: trigger?.jobId ?? '' })).resolves.toMatchObject({ + runId: trigger?.runId, + jobId: 'mcp-full-1', + reportId: trigger?.reportId, + status: 'done', + stage: 'done', + progress: 1, + done: true, + adapter: 'fake', + connectionId: 'warehouse', + sourceDir: null, + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + rawFileCount: 1, + workUnitCount: 1, + workUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + evictionDeletedRawPaths: [], + errors: [], + }); + + await expect(ports.ingest?.report?.({ runId: trigger?.reportId ?? '' })).resolves.toMatchObject({ + id: trigger?.reportId, + runId: trigger?.runId, + jobId: 'mcp-full-1', + connectionId: 'warehouse', + sourceKey: 'fake', + }); + + const replay = (await ports.ingest?.replay?.({ runId: trigger?.runId ?? '' })) as MemoryFlowReplayInput | null; + expect(replay).toMatchObject({ + runId: trigger?.runId, + reportId: trigger?.reportId, + reportPath: trigger?.reportId, + status: 'done', + adapter: 'fake', + connectionId: 'warehouse', + syncId: expect.stringContaining('mcp-full-1'), + }); + expect(replay?.events).toEqual( + expect.arrayContaining([ + { type: 'work_unit_finished', unitKey: 'fake-orders', status: 'success' }, + { type: 'report_created', runId: trigger?.runId, reportPath: trigger?.reportId }, + ]), + ); + }); + + it('returns child run metadata for local Metabase fan-out triggers', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections = { + 'prod-metabase': { + driver: 'metabase', + api_url: 'https://metabase.example.com', + }, + warehouse_a: { driver: 'postgres', url: 'postgres://localhost/a' }, + warehouse_b: { driver: 'postgres', url: 'postgres://localhost/b' }, + }; + project.config.ingest.adapters = ['metabase']; + const reportA = { + id: 'report-a', + runId: 'run-a', + jobId: 'child-a', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + createdAt: '2026-05-04T12:00:00.000Z', + body: { + syncId: 'sync-a', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }; + const reportB = { + ...reportA, + id: 'report-b', + runId: 'run-b', + jobId: 'child-b', + connectionId: 'warehouse_b', + body: { ...reportA.body, syncId: 'sync-b' }, + }; + + const ports = createLocalProjectMcpContextPorts(project, { + localIngest: { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 2, failedWorkUnits: 0 }, + children: [ + { + jobId: 'child-a', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'child-a', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 0, + failedWorkUnits: [], + artifactsWritten: 0, + commitSha: null, + }, + report: reportA, + }, + { + jobId: 'child-b', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 2, + targetConnectionId: 'warehouse_b', + result: { + jobId: 'child-b', + runId: 'run-b', + syncId: 'sync-b', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 0, + failedWorkUnits: [], + artifactsWritten: 0, + commitSha: null, + }, + report: reportB, + }, + ], + }), + }, + }); + + await expect( + ports.ingest?.trigger({ + adapter: 'metabase', + connectionId: 'prod-metabase', + trigger: 'manual_resync', + }), + ).resolves.toEqual({ + runId: 'metabase-fanout:prod-metabase', + jobId: undefined, + reportId: undefined, + fanout: { + status: 'all_succeeded', + children: [ + { + runId: 'run-a', + jobId: 'child-a', + reportId: 'report-a', + targetConnectionId: 'warehouse_a', + metabaseDatabaseId: 1, + }, + { + runId: 'run-b', + jobId: 'child-b', + reportId: 'report-b', + targetConnectionId: 'warehouse_b', + metabaseDatabaseId: 2, + }, + ], + }, + }); + }); + + it('writes, reads, and searches global knowledge pages', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const ports = createLocalProjectMcpContextPorts(project); + + await expect( + ports.knowledge?.write({ + userId: 'local-user', + key: 'revenue', + summary: 'Revenue definition', + content: '# Revenue\n\nRevenue is net of refunds.', + tags: ['finance'], + refs: ['docs/revenue.md'], + slRefs: ['warehouse.orders'], + }), + ).resolves.toMatchObject({ success: true, key: 'revenue', action: 'created' }); + + await expect(ports.knowledge?.read({ userId: 'local-user', key: 'revenue' })).resolves.toMatchObject({ + key: 'revenue', + scope: 'GLOBAL', + summary: 'Revenue definition', + tags: ['finance'], + refs: ['docs/revenue.md'], + slRefs: ['warehouse.orders'], + content: '# Revenue\n\nRevenue is net of refunds.', + }); + + const search = await ports.knowledge?.search({ userId: 'local-user', query: 'refunds', limit: 5 }); + expect(search).toEqual({ + results: [ + expect.objectContaining({ + key: 'revenue', + path: 'knowledge/global/revenue.md', + scope: 'GLOBAL', + summary: 'Revenue definition', + score: expect.any(Number), + matchReasons: expect.arrayContaining(['lexical']), + }), + ], + totalFound: 1, + }); + expect(search?.results[0]?.score).toBeGreaterThan(0); + await expect(access(join(project.projectDir, '.klo', 'db.sqlite'))).resolves.toBeUndefined(); + }); + + it('writes, lists, reads, and validates semantic-layer sources', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const ports = createLocalProjectMcpContextPorts(project); + + await expect( + ports.semanticLayer?.writeSource({ + connectionId: 'warehouse', + sourceName: 'orders', + source: { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [{ name: 'order_count', expr: 'count(*)' }], + }, + }), + ).resolves.toMatchObject({ success: true, sourceName: 'orders' }); + + await expect(ports.semanticLayer?.listSources({ connectionId: 'warehouse' })).resolves.toEqual({ + sources: [ + { + connectionId: 'warehouse', + connectionName: 'warehouse', + name: 'orders', + columnCount: 1, + measureCount: 1, + joinCount: 0, + }, + ], + totalSources: 1, + }); + + await expect( + ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'order_count' }), + ).resolves.toEqual({ + sources: [ + expect.objectContaining({ + connectionId: 'warehouse', + connectionName: 'warehouse', + name: 'orders', + columnCount: 1, + measureCount: 1, + joinCount: 0, + score: expect.any(Number), + matchReasons: expect.arrayContaining(['lexical']), + }), + ], + totalSources: 1, + }); + await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined(); + + await expect( + ports.semanticLayer?.readSource({ connectionId: 'warehouse', sourceName: 'orders' }), + ).resolves.toMatchObject({ + sourceName: 'orders', + yaml: expect.stringContaining('name: orders'), + }); + + await expect(ports.semanticLayer?.validate({ connectionId: 'warehouse' })).resolves.toEqual({ + success: true, + errors: [], + warnings: ['Local stdio validation checks YAML shape only; Python semantic validation is not configured.'], + }); + }); + + it('returns semantic-layer hybrid search metadata through local project ports', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: [ + 'name: orders', + 'table: public.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + ' - name: status', + ' type: string', + '', + ].join('\n'), + }); + await project.fileStore.writeFile( + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json', + `${JSON.stringify( + { + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 2, + tables: [], + columns: { + 'orders.status': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + nativeType: 'text', + normalizedType: 'string', + rowCount: 10, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 0.2, + nullRate: 0, + sampleValues: ['paid', 'refunded'], + minTextLength: 4, + maxTextLength: 8, + }, + }, + warnings: [], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed dictionary profile', + ); + + const ports = createLocalProjectMcpContextPorts(project); + await expect(ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'paid' })).resolves.toEqual({ + sources: [ + expect.objectContaining({ + connectionId: 'warehouse', + connectionName: 'warehouse', + name: 'orders', + score: expect.any(Number), + matchReasons: expect.arrayContaining(['dictionary']), + dictionaryMatches: [{ column: 'status', values: ['paid'] }], + }), + ], + totalSources: 1, + }); + }); + + it('uses configured local embeddings for semantic-layer search when available', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.ingest.embeddings = { backend: 'none', dimensions: 2 }; + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: [ + 'name: orders', + 'description: Revenue facts', + 'table: public.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + '', + ].join('\n'), + }); + + const ports = createLocalProjectMcpContextPorts(project, { + embeddingService: { + maxBatchSize: 8, + async computeEmbedding(text: string) { + return text.includes('cash collection') ? [1, 0] : [0, 1]; + }, + async computeEmbeddingsBulk(texts: string[]) { + return texts.map((text) => (text.includes('Revenue facts') ? [1, 0] : [0, 1])); + }, + }, + }); + + const result = await ports.semanticLayer?.listSources({ connectionId: 'warehouse', query: 'cash collection' }); + + expect(result?.sources[0]).toMatchObject({ + name: 'orders', + matchReasons: expect.arrayContaining(['semantic']), + lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]), + }); + }); + + it('rejects path traversal keys before touching the project directory', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const ports = createLocalProjectMcpContextPorts(project); + + await expect( + ports.knowledge?.read({ + userId: 'local-user', + key: '../outside', + }), + ).rejects.toThrow('Unsafe knowledge key'); + + await expect( + ports.semanticLayer?.readSource({ + connectionId: 'warehouse', + sourceName: '../orders', + }), + ).rejects.toThrow('Unsafe semantic-layer source name'); + }); + + it('uses semantic compute for validation and compile-only sl_query when supplied', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }; + const shapeOnlyPorts = createLocalProjectMcpContextPorts(project); + await shapeOnlyPorts.semanticLayer?.writeSource({ + connectionId: 'warehouse', + sourceName: 'orders', + source: { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'status', type: 'string' }, + ], + joins: [], + measures: [{ name: 'order_count', expr: 'count(*)' }], + }, + }); + + const semanticLayerCompute = { + validateSources: vi.fn(async () => ({ + valid: true, + errors: [], + warnings: ['python validation ran'], + perSourceWarnings: {}, + })), + query: vi.fn(async () => ({ + sql: 'select status, count(*) as order_count from public.orders group by status', + dialect: 'postgres', + columns: [{ name: 'orders.status' }, { name: 'orders.order_count' }], + plan: { sources_used: ['orders'] }, + })), + generateSources: vi.fn(), + }; + const ports = createLocalProjectMcpContextPorts(project, { semanticLayerCompute }); + + await expect(ports.semanticLayer?.validate({ connectionId: 'warehouse', names: ['orders'] })).resolves.toEqual({ + success: true, + errors: [], + warnings: ['python validation ran'], + }); + expect(semanticLayerCompute.validateSources).toHaveBeenCalledWith({ + sources: [ + { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'status', type: 'string' }, + ], + joins: [], + measures: [{ name: 'order_count', expr: 'count(*)' }], + }, + ], + dialect: 'postgres', + recentlyTouched: ['orders'], + }); + + await expect( + ports.semanticLayer?.query({ + connectionId: 'warehouse', + query: { + measures: ['orders.order_count'], + dimensions: ['orders.status'], + }, + }), + ).resolves.toMatchObject({ + sql: 'select status, count(*) as order_count from public.orders group by status', + headers: ['orders.status', 'orders.order_count'], + rows: [], + totalRows: 0, + plan: { + sources_used: ['orders'], + execution: { + mode: 'compile_only', + reason: 'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.', + }, + }, + }); + }); + + it('executes local MCP sl_query when a query executor is configured', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }; + const shapeOnlyPorts = createLocalProjectMcpContextPorts(project); + await shapeOnlyPorts.semanticLayer?.writeSource({ + connectionId: 'warehouse', + sourceName: 'orders', + source: { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [{ name: 'order_count', expr: 'count(*)' }], + }, + }); + const compute = { + validateSources: vi.fn(), + generateSources: vi.fn(), + query: vi.fn(async () => ({ + sql: 'select count(*) as order_count from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: {}, + })), + }; + const queryExecutor = { + execute: vi.fn(async () => ({ + headers: ['orders.order_count'], + rows: [[3]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })), + }; + const ports = createLocalProjectMcpContextPorts(project, { + semanticLayerCompute: compute, + queryExecutor, + }); + + const result = await ports.semanticLayer?.query({ + connectionId: 'warehouse', + query: { measures: ['orders.order_count'], dimensions: [], limit: 5 }, + }); + + expect(result?.rows).toEqual([[3]]); + expect(result?.totalRows).toBe(1); + expect(queryExecutor.execute).toHaveBeenCalledWith( + expect.objectContaining({ + connectionId: 'warehouse', + maxRows: 5, + }), + ); + }); + + it('exposes detailed local ingest trigger and status ports when local ingest is enabled', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { driver: 'postgres' }; + project.config.ingest.adapters = ['fake']; + project.config.ingest.embeddings = { + backend: 'deterministic', + dimensions: 8, + batchSize: 64, + }; + project.config.llm = { + provider: { backend: 'none' }, + models: {}, + }; + const sourceDir = join(project.projectDir, 'upload'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + let nextJob = 0; + const agentRunner = new TestAgentRunner(); + const ports = createLocalProjectMcpContextPorts(project, { + localIngest: { + adapters: [new FakeSourceAdapter()], + jobIdFactory: () => `mcp-local-run-${++nextJob}`, + agentRunner, + }, + }); + + const firstTrigger = await ports.ingest?.trigger({ + adapter: 'fake', + connectionId: 'warehouse', + trigger: 'manual_resync', + config: { sourceDir }, + }); + + expect(firstTrigger).toMatchObject({ + runId: expect.any(String), + jobId: 'mcp-local-run-1', + reportId: expect.any(String), + }); + expect(firstTrigger?.runId).not.toBe('mcp-local-run-1'); + + await expect(ports.ingest?.status({ runId: 'mcp-local-run-1' })).resolves.toMatchObject({ + runId: firstTrigger?.runId, + jobId: 'mcp-local-run-1', + reportId: firstTrigger?.reportId, + status: 'done', + stage: 'done', + done: true, + progress: 1, + adapter: 'fake', + connectionId: 'warehouse', + sourceDir: null, + syncId: expect.stringContaining('mcp-local-run-1'), + startedAt: expect.any(String), + completedAt: expect.any(String), + previousRunId: null, + diffSummary: { + added: 1, + modified: 0, + deleted: 0, + unchanged: 0, + }, + rawFileCount: 1, + workUnitCount: 1, + workUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + evictionDeletedRawPaths: [], + errors: [], + }); + + const secondTrigger = await ports.ingest?.trigger({ + adapter: 'fake', + connectionId: 'warehouse', + trigger: 'manual_resync', + config: { sourceDir }, + }); + + expect(secondTrigger).toMatchObject({ + runId: expect.any(String), + jobId: 'mcp-local-run-2', + reportId: expect.any(String), + }); + expect(secondTrigger?.runId).not.toBe('mcp-local-run-2'); + + await expect(ports.ingest?.status({ runId: 'mcp-local-run-2' })).resolves.toMatchObject({ + runId: secondTrigger?.runId, + jobId: 'mcp-local-run-2', + reportId: secondTrigger?.reportId, + status: 'done', + stage: 'done', + done: true, + progress: 1, + adapter: 'fake', + connectionId: 'warehouse', + sourceDir: null, + syncId: expect.stringContaining('mcp-local-run-2'), + startedAt: expect.any(String), + completedAt: expect.any(String), + previousRunId: null, + diffSummary: { + added: 0, + modified: 0, + deleted: 0, + unchanged: 1, + }, + rawFileCount: 0, + workUnitCount: 0, + workUnits: [], + evictionDeletedRawPaths: [], + errors: [], + }); + expect(agentRunner.runLoop).toHaveBeenCalledTimes(1); + }); + + it('triggers fetch-capable local ingest without sourceDir config', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { + driver: 'postgres', + url: 'postgres://localhost:5432/warehouse', + readonly: true, + }; + project.config.ingest.adapters = ['live-database']; + project.config.llm = { + provider: { backend: 'none' }, + models: {}, + }; + const agentRunner = new TestAgentRunner(); + const ports = createLocalProjectMcpContextPorts(project, { + localIngest: { + adapters: [ + { + source: 'live-database', + skillNames: ['live_database_ingest'], + async fetch(_pullConfig, stagedDir) { + await mkdir(join(stagedDir, 'tables'), { recursive: true }); + await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8'); + await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + await writeFile( + join(stagedDir, 'tables', 'orders.json'), + '{"name":"orders","db":"public","columns":[]}\n', + 'utf-8', + ); + }, + async detect() { + return true; + }, + async chunk() { + return { + workUnits: [ + { + unitKey: 'live-database-public-orders', + rawFiles: ['tables/orders.json'], + dependencyPaths: ['connection.json', 'foreign-keys.json'], + peerFileIndex: [], + }, + ], + }; + }, + }, + ], + jobIdFactory: () => 'local-live-db-mcp', + agentRunner, + }, + }); + + const result = await ports.ingest?.trigger({ + adapter: 'live-database', + connectionId: 'warehouse', + trigger: 'manual_resync', + config: {}, + }); + + expect(result).toMatchObject({ + runId: expect.any(String), + jobId: 'local-live-db-mcp', + reportId: expect.any(String), + }); + expect(result?.runId).not.toBe('local-live-db-mcp'); + await expect(ports.ingest?.status({ runId: 'local-live-db-mcp' })).resolves.toMatchObject({ + runId: result?.runId, + jobId: 'local-live-db-mcp', + reportId: result?.reportId, + adapter: 'live-database', + sourceDir: null, + rawFileCount: 1, + workUnitCount: 1, + }); + expect(agentRunner.runLoop).toHaveBeenCalledTimes(1); + }); + + it('lists and reads only artifacts that belong to a local scan report', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { + driver: 'postgres', + url: 'env:DATABASE_URL', + readonly: true, + }; + project.config.ingest.adapters = ['live-database']; + const ports = createLocalProjectMcpContextPorts(project, { + localScan: { + adapters: [ + { + source: 'live-database', + skillNames: ['live_database_ingest'], + async fetch(_pullConfig, stagedDir) { + await mkdir(join(stagedDir, 'tables'), { recursive: true }); + await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8'); + await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + await writeFile( + join(stagedDir, 'tables', 'orders.json'), + '{"name":"orders","db":"public","columns":[]}\n', + 'utf-8', + ); + }, + async detect() { + return true; + }, + async chunk() { + return { + workUnits: [ + { + unitKey: 'live-database-public-orders', + rawFiles: ['tables/orders.json'], + dependencyPaths: ['connection.json', 'foreign-keys.json'], + peerFileIndex: [], + }, + ], + }; + }, + }, + ], + jobIdFactory: () => 'local-scan-artifacts', + now: () => new Date('2026-04-29T12:00:00.000Z'), + }, + }); + + const trigger = await ports.scan?.trigger({ + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }); + + expect(trigger?.runId).toBe('local-scan-artifacts'); + const syncId = '2026-04-29-120000-local-scan-artifacts'; + await expect(ports.scan?.listArtifacts?.({ runId: 'local-scan-artifacts' })).resolves.toEqual({ + runId: 'local-scan-artifacts', + artifacts: [ + { + path: `raw-sources/warehouse/live-database/${syncId}/connection.json`, + type: 'raw_source', + size: 29, + }, + { + path: `raw-sources/warehouse/live-database/${syncId}/foreign-keys.json`, + type: 'raw_source', + size: 19, + }, + { + path: `raw-sources/warehouse/live-database/${syncId}/scan-report.json`, + type: 'report', + size: expect.any(Number), + }, + { + path: `raw-sources/warehouse/live-database/${syncId}/tables/orders.json`, + type: 'raw_source', + size: 45, + }, + { + path: 'semantic-layer/warehouse/_schema/public.yaml', + type: 'manifest_shard', + size: expect.any(Number), + }, + ], + }); + + await expect( + ports.scan?.readArtifact?.({ + runId: 'local-scan-artifacts', + path: `raw-sources/warehouse/live-database/${syncId}/tables/orders.json`, + }), + ).resolves.toEqual({ + runId: 'local-scan-artifacts', + path: `raw-sources/warehouse/live-database/${syncId}/tables/orders.json`, + type: 'raw_source', + size: 45, + content: '{"name":"orders","db":"public","columns":[]}\n', + }); + + await expect( + ports.scan?.readArtifact?.({ + runId: 'local-scan-artifacts', + path: 'semantic-layer/warehouse/_schema/public.yaml', + }), + ).resolves.toMatchObject({ + runId: 'local-scan-artifacts', + path: 'semantic-layer/warehouse/_schema/public.yaml', + type: 'manifest_shard', + content: expect.stringContaining('orders:'), + }); + + await expect( + ports.scan?.readArtifact?.({ + runId: 'local-scan-artifacts', + path: 'klo.yaml', + }), + ).resolves.toBeNull(); + await expect(ports.scan?.listArtifacts?.({ runId: 'missing' })).resolves.toBeNull(); + await expect(readFile(join(project.projectDir, 'klo.yaml'), 'utf-8')).resolves.toContain('project: warehouse'); + }); +}); diff --git a/packages/context/src/mcp/local-project-ports.ts b/packages/context/src/mcp/local-project-ports.ts new file mode 100644 index 00000000..faf7ceb2 --- /dev/null +++ b/packages/context/src/mcp/local-project-ports.ts @@ -0,0 +1,683 @@ +import YAML from 'yaml'; +import { + type KloSqlQueryExecutorPort, + localConnectionInfoFromConfig, + localConnectionTypeForConfig, +} from '../connections/index.js'; +import type { KloEmbeddingPort } from '../core/index.js'; +import type { KloSemanticLayerComputePort } from '../daemon/index.js'; +import { + createDefaultLocalIngestAdapters, + getLocalIngestStatus, + type IngestReportSnapshot, + ingestReportToMemoryFlowReplay, + type LocalIngestMcpOptions, + runLocalIngest, + runLocalMetabaseIngest, +} from '../ingest/index.js'; +import { createLocalKloEmbeddingProviderFromConfig, KloIngestEmbeddingPortAdapter } from '../llm/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { + getLocalScanReport, + getLocalScanStatus, + type KloConnectionDriver, + type KloScanConnector, + type KloScanReport, + type LocalScanMcpOptions, + runLocalScan, +} from '../scan/index.js'; +import { + compileLocalSlQuery, + type LocalSlSourceSearchResult, + type LocalSlSourceSummary, + listLocalSlSources, + searchLocalSlSources, + sourceDefinitionSchema, + sourceOverlaySchema, +} from '../sl/index.js'; +import { readLocalKnowledgePage, searchLocalKnowledgePages, writeLocalKnowledgePage } from '../wiki/local-knowledge.js'; +import type { + KloConnectionTestResponse, + KloIngestStatusResponse, + KloMcpContextPorts, + KloScanArtifactListResponse, + KloScanArtifactReadResponse, + KloScanArtifactSummary, + KloScanArtifactType, +} from './types.js'; + +const LOCAL_AUTHOR = 'klo'; +const LOCAL_AUTHOR_EMAIL = 'klo@example.com'; +const SL_SHAPE_WARNING = 'Local stdio validation checks YAML shape only; Python semantic validation is not configured.'; + +interface CreateLocalProjectMcpContextPortsOptions { + semanticLayerCompute?: KloSemanticLayerComputePort; + queryExecutor?: KloSqlQueryExecutorPort; + localIngest?: LocalIngestMcpOptions; + localScan?: LocalScanMcpOptions; + embeddingService?: KloEmbeddingPort | null; +} + +function dialectForDriver(driver: string | undefined): string { + const normalized = (driver ?? 'postgres').toUpperCase(); + const map: Record = { + POSTGRESQL: 'postgres', + POSTGRES: 'postgres', + BIGQUERY: 'bigquery', + SNOWFLAKE: 'snowflake', + MYSQL: 'mysql', + SQLSERVER: 'tsql', + MSSQL: 'tsql', + SQLITE: 'sqlite', + DUCKDB: 'duckdb', + CLICKHOUSE: 'clickhouse', + REDSHIFT: 'redshift', + DATABRICKS: 'databricks', + }; + return map[normalized] ?? 'postgres'; +} + +function assertSafePathToken(kind: string, value: string): string { + if ( + value.trim().length === 0 || + value.includes('..') || + value.includes('\\') || + value.startsWith('/') || + value.startsWith('.') || + value.includes('//') + ) { + throw new Error(`Unsafe ${kind}: ${value}`); + } + return value; +} + +function assertSafeConnectionId(connectionId: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } + return assertSafePathToken('connection id', connectionId); +} + +function assertSafeSourceName(sourceName: string): string { + if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) { + throw new Error(`Unsafe semantic-layer source name: ${sourceName}`); + } + return assertSafePathToken('semantic-layer source name', sourceName); +} + +function normalizeScanDriver(driver: string | undefined): KloConnectionDriver { + const normalized = (driver ?? '').toLowerCase(); + if ( + normalized === 'postgres' || + normalized === 'postgresql' || + normalized === 'sqlite' || + normalized === 'sqlite3' || + normalized === 'mysql' || + normalized === 'clickhouse' || + normalized === 'sqlserver' || + normalized === 'bigquery' || + normalized === 'snowflake' || + normalized === 'posthog' + ) { + return normalized === 'sqlite3' ? 'sqlite' : normalized; + } + return 'postgres'; +} + +async function cleanupConnector(connector: KloScanConnector | null): Promise { + if (connector?.cleanup) { + await connector.cleanup(); + } +} + +async function testLocalConnection( + project: KloLocalProject, + options: CreateLocalProjectMcpContextPortsOptions, + connectionId: string, +): Promise { + const safeConnectionId = assertSafeConnectionId(connectionId); + const connection = project.config.connections[safeConnectionId]; + if (!connection) { + return null; + } + const connectionType = localConnectionTypeForConfig(safeConnectionId, connection); + const createConnector = options.localScan?.createConnector; + if (!createConnector) { + return { + id: safeConnectionId, + connectionType, + ok: true, + tableCount: null, + message: 'Connection is configured; no native scan connector is available for live testing.', + warnings: ['klo serve was not configured with a local scan connector factory.'], + }; + } + + let connector: KloScanConnector | null = null; + try { + connector = await createConnector(safeConnectionId); + const snapshot = await connector.introspect( + { + connectionId: safeConnectionId, + driver: normalizeScanDriver(connection.driver), + mode: 'structural', + dryRun: true, + detectRelationships: false, + }, + { runId: `connection-test-${safeConnectionId}` }, + ); + return { + id: safeConnectionId, + connectionType, + ok: true, + tableCount: snapshot.tables.length, + message: 'Connection test passed.', + warnings: [], + }; + } catch (error) { + return { + id: safeConnectionId, + connectionType, + ok: false, + tableCount: null, + message: error instanceof Error ? error.message : String(error), + warnings: [], + }; + } finally { + await cleanupConnector(connector); + } +} + +function scanArtifactType(path: string, report: KloScanReport): KloScanArtifactType { + if (path === report.artifactPaths.reportPath) { + return 'report'; + } + if (report.artifactPaths.manifestShards.includes(path)) { + return 'manifest_shard'; + } + if (report.artifactPaths.enrichmentArtifacts.includes(path)) { + return 'enrichment_artifact'; + } + return 'raw_source'; +} + +async function artifactSize(project: KloLocalProject, path: string): Promise { + try { + const result = await project.fileStore.readFile(path); + return typeof result.size === 'number' ? result.size : undefined; + } catch { + return undefined; + } +} + +async function listArtifactsForReport( + project: KloLocalProject, + runId: string, + report: KloScanReport, +): Promise { + const paths = new Set(); + if (report.artifactPaths.rawSourcesDir) { + const listed = await project.fileStore.listFiles(report.artifactPaths.rawSourcesDir); + for (const file of listed.files) { + paths.add(file); + } + } + if (report.artifactPaths.reportPath) { + paths.add(report.artifactPaths.reportPath); + } + for (const path of report.artifactPaths.manifestShards) { + paths.add(path); + } + for (const path of report.artifactPaths.enrichmentArtifacts) { + paths.add(path); + } + + const artifacts: KloScanArtifactSummary[] = []; + for (const path of [...paths].sort()) { + const size = await artifactSize(project, path); + artifacts.push({ + path, + type: scanArtifactType(path, report), + ...(size === undefined ? {} : { size }), + }); + } + return { runId, artifacts }; +} + +async function readScanArtifact( + project: KloLocalProject, + runId: string, + path: string, +): Promise { + const report = await getLocalScanReport(project, runId); + if (!report) { + return null; + } + const listed = await listArtifactsForReport(project, runId, report); + const artifact = listed.artifacts.find((candidate) => candidate.path === path); + if (!artifact) { + return null; + } + const result = await project.fileStore.readFile(path); + return { + runId, + path, + type: artifact.type, + ...(typeof result.size === 'number' ? { size: result.size } : {}), + content: result.content, + }; +} + +function slPath(connectionId: string, sourceName: string): string { + return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`; +} + +function sourceNameFromPath(path: string): string { + return ( + path + .split('/') + .at(-1) + ?.replace(/\.ya?ml$/, '') ?? path + ); +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function parseYamlRecord(raw: string): Record { + const parsed = YAML.parse(raw) as unknown; + if (!isRecord(parsed)) { + throw new Error('Semantic-layer source YAML must contain an object'); + } + return parsed; +} + +async function listSlPaths(project: KloLocalProject, connectionId?: string): Promise { + const root = connectionId ? `semantic-layer/${assertSafeConnectionId(connectionId)}` : 'semantic-layer'; + const listed = await project.fileStore.listFiles(root); + return listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort(); +} + +async function loadComputableSources( + project: KloLocalProject, + connectionId: string, +): Promise[]> { + const paths = await listSlPaths(project, connectionId); + const sources: Record[] = []; + for (const path of paths) { + const raw = await project.fileStore.readFile(path); + const source = parseYamlRecord(raw.content); + if (source.table || source.sql) { + sources.push(source); + } + } + return sources; +} + +function validateSourceRecord(sourceName: string, source: Record): string[] { + const namedSource = { ...source, name: typeof source.name === 'string' ? source.name : sourceName }; + const definition = sourceDefinitionSchema.safeParse(namedSource); + if (definition.success) { + return []; + } + const overlay = sourceOverlaySchema.safeParse(namedSource); + if (overlay.success) { + return []; + } + return definition.error.issues.map((issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`); +} + +function localIngestSourceDir(config: unknown): string | undefined { + if (!isRecord(config) || config.sourceDir === undefined) { + return undefined; + } + if (typeof config.sourceDir !== 'string' || config.sourceDir.trim().length === 0) { + throw new Error('Local ingest config sourceDir must be a non-empty string when provided'); + } + return config.sourceDir; +} + +function rawFileCountFromIngestReport(report: IngestReportSnapshot): number { + return new Set(report.body.workUnits.flatMap((workUnit) => workUnit.rawFiles)).size; +} + +function hasSlSearchMetadata( + source: LocalSlSourceSummary | LocalSlSourceSearchResult, +): source is LocalSlSourceSearchResult { + return 'score' in source; +} + +function statusFromIngestReport(report: IngestReportSnapshot): KloIngestStatusResponse { + const failedWorkUnits = report.body.failedWorkUnits; + return { + runId: report.runId, + jobId: report.jobId, + reportId: report.id, + status: failedWorkUnits.length > 0 ? 'error' : 'done', + stage: 'done', + progress: 1, + errors: failedWorkUnits, + done: true, + adapter: report.sourceKey, + connectionId: report.connectionId, + sourceDir: null, + syncId: report.body.syncId, + startedAt: report.createdAt, + completedAt: report.createdAt, + previousRunId: null, + diffSummary: report.body.diffSummary, + workUnitCount: report.body.workUnits.length, + rawFileCount: rawFileCountFromIngestReport(report), + workUnits: report.body.workUnits.map((workUnit) => ({ + unitKey: workUnit.unitKey, + rawFiles: [...workUnit.rawFiles], + peerFileIndex: [], + dependencyPaths: [], + })), + evictionDeletedRawPaths: [...report.body.evictionInputs], + }; +} + +export function createLocalProjectMcpContextPorts( + project: KloLocalProject, + options: CreateLocalProjectMcpContextPortsOptions = {}, +): KloMcpContextPorts { + const configuredEmbeddingProvider = createLocalKloEmbeddingProviderFromConfig(project.config.ingest.embeddings); + const embeddingService = + options.embeddingService ?? + (configuredEmbeddingProvider ? new KloIngestEmbeddingPortAdapter(configuredEmbeddingProvider) : null); + const ports: KloMcpContextPorts = { + connections: { + async list() { + return Object.entries(project.config.connections) + .map(([id, config]) => localConnectionInfoFromConfig(id, config)) + .filter( + (connection): connection is { id: string; name: string; connectionType: string } => connection !== null, + ) + .sort((a, b) => a.id.localeCompare(b.id)); + }, + async test(input) { + return testLocalConnection(project, options, input.connectionId); + }, + }, + knowledge: { + async search(input) { + const results = await searchLocalKnowledgePages(project, { + query: input.query, + userId: input.userId, + limit: input.limit, + embeddingService, + }); + return { + results: results.slice(0, input.limit).map((result) => ({ + key: result.key, + path: result.path, + scope: result.scope, + summary: result.summary, + score: result.score, + matchReasons: result.matchReasons, + lanes: result.lanes, + })), + totalFound: results.length, + }; + }, + async read(input) { + const page = await readLocalKnowledgePage(project, { + key: input.key, + userId: input.userId, + }); + return page + ? { + key: page.key, + scope: page.scope, + summary: page.summary, + content: page.content, + tags: page.tags, + refs: page.refs, + slRefs: page.slRefs, + } + : null; + }, + async write(input) { + const existing = await readLocalKnowledgePage(project, { + key: input.key, + userId: input.userId, + }); + await writeLocalKnowledgePage(project, { + key: input.key, + scope: 'GLOBAL', + userId: input.userId, + summary: input.summary, + content: input.content, + tags: input.tags, + refs: input.refs, + slRefs: input.slRefs, + source: input.source, + intent: input.intent, + tables: input.tables, + representativeSql: input.representativeSql, + usage: input.usage, + fingerprints: input.fingerprints, + }); + return { success: true, key: input.key, action: existing ? 'updated' : 'created' }; + }, + }, + semanticLayer: { + async listSources(input) { + const listed: Array = input.query + ? await searchLocalSlSources(project, { + connectionId: input.connectionId, + query: input.query, + embeddingService, + }) + : await listLocalSlSources(project, { connectionId: input.connectionId }); + const sources = listed.map((source) => ({ + connectionId: source.connectionId, + connectionName: source.connectionId, + name: source.name, + description: source.description, + columnCount: source.columnCount, + measureCount: source.measureCount, + joinCount: source.joinCount, + ...(hasSlSearchMetadata(source) ? { score: source.score } : {}), + ...(hasSlSearchMetadata(source) && source.matchReasons ? { matchReasons: source.matchReasons } : {}), + ...(hasSlSearchMetadata(source) && source.dictionaryMatches + ? { dictionaryMatches: source.dictionaryMatches } + : {}), + ...(hasSlSearchMetadata(source) && source.lanes ? { lanes: source.lanes } : {}), + })); + return { sources, totalSources: sources.length }; + }, + async readSource(input) { + const path = slPath(input.connectionId, input.sourceName); + try { + const result = await project.fileStore.readFile(path); + return { sourceName: input.sourceName, yaml: result.content }; + } catch { + return null; + } + }, + async writeSource(input) { + const path = slPath(input.connectionId, input.sourceName); + if (input.delete) { + const deleted = await project.fileStore.deleteFile( + path, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `Remove semantic-layer source: ${input.sourceName}`, + ); + return { success: Boolean(deleted), sourceName: input.sourceName }; + } + + const yaml = + input.yaml ?? YAML.stringify({ ...input.source, name: input.sourceName }, { indent: 2, lineWidth: 0 }); + parseYamlRecord(yaml); + await project.fileStore.writeFile( + path, + `${yaml.trimEnd()}\n`, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `Update semantic-layer source: ${input.sourceName}`, + ); + return { success: true, sourceName: input.sourceName, yaml: `${yaml.trimEnd()}\n` }; + }, + async validate(input) { + if (options.semanticLayerCompute) { + const connectionId = assertSafeConnectionId(input.connectionId); + const result = await options.semanticLayerCompute.validateSources({ + sources: await loadComputableSources(project, connectionId), + dialect: dialectForDriver(project.config.connections[connectionId]?.driver), + recentlyTouched: input.names, + }); + return { + success: result.valid, + errors: result.errors, + warnings: result.warnings, + }; + } + + const names = new Set(input.names ?? []); + const paths = await listSlPaths(project, input.connectionId); + const errors: string[] = []; + for (const path of paths) { + const sourceName = sourceNameFromPath(path); + if (names.size > 0 && !names.has(sourceName)) { + continue; + } + try { + const raw = await project.fileStore.readFile(path); + errors.push(...validateSourceRecord(sourceName, parseYamlRecord(raw.content))); + } catch (error) { + errors.push(`${sourceName}: ${error instanceof Error ? error.message : String(error)}`); + } + } + return { + success: errors.length === 0, + errors, + warnings: [SL_SHAPE_WARNING], + }; + }, + async query(input) { + if (!options.semanticLayerCompute) { + throw new Error( + 'sl_query requires a semantic-layer query adapter. Local stdio MCP exposes file-backed SL CRUD only.', + ); + } + return compileLocalSlQuery(project, { + connectionId: input.connectionId, + query: input.query, + compute: options.semanticLayerCompute, + execute: Boolean(options.queryExecutor), + maxRows: input.query.limit, + queryExecutor: options.queryExecutor, + }); + }, + }, + }; + + if (options.localIngest) { + ports.ingest = { + async trigger(input) { + const sourceDir = localIngestSourceDir(input.config); + if (input.adapter === 'metabase' && !sourceDir) { + const result = await (options.localIngest?.runLocalMetabaseIngest ?? runLocalMetabaseIngest)({ + project, + adapters: options.localIngest?.adapters ?? createDefaultLocalIngestAdapters(project), + metabaseConnectionId: input.connectionId, + trigger: input.trigger, + jobIdFactory: options.localIngest?.jobIdFactory, + agentRunner: options.localIngest?.agentRunner, + llmProvider: options.localIngest?.llmProvider, + memoryModel: options.localIngest?.memoryModel, + semanticLayerCompute: options.localIngest?.semanticLayerCompute ?? options.semanticLayerCompute, + queryExecutor: options.localIngest?.queryExecutor ?? options.queryExecutor, + logger: options.localIngest?.logger, + }); + return { + runId: `metabase-fanout:${result.metabaseConnectionId}`, + jobId: undefined, + reportId: undefined, + fanout: { + status: result.status, + children: result.children.map((child) => ({ + runId: child.report.runId, + jobId: child.report.jobId, + reportId: child.report.id, + targetConnectionId: child.targetConnectionId, + metabaseDatabaseId: child.metabaseDatabaseId, + })), + }, + }; + } + + const result = await runLocalIngest({ + project, + adapters: options.localIngest?.adapters ?? createDefaultLocalIngestAdapters(project), + adapter: input.adapter, + connectionId: input.connectionId, + sourceDir, + trigger: input.trigger, + jobId: options.localIngest?.jobIdFactory?.(), + agentRunner: options.localIngest?.agentRunner, + llmProvider: options.localIngest?.llmProvider, + memoryModel: options.localIngest?.memoryModel, + semanticLayerCompute: options.localIngest?.semanticLayerCompute ?? options.semanticLayerCompute, + queryExecutor: options.localIngest?.queryExecutor ?? options.queryExecutor, + logger: options.localIngest?.logger, + }); + return { + runId: result.report.runId, + jobId: result.report.jobId, + reportId: result.report.id, + }; + }, + async status(input) { + const report = await getLocalIngestStatus(project, input.runId); + return report ? statusFromIngestReport(report) : null; + }, + async report(input) { + return getLocalIngestStatus(project, input.runId); + }, + async replay(input) { + const report = await getLocalIngestStatus(project, input.runId); + return report ? ingestReportToMemoryFlowReplay(report) : null; + }, + }; + } + + if (options.localScan) { + ports.scan = { + async trigger(input) { + return runLocalScan({ + project, + connectionId: input.connectionId, + mode: input.mode, + detectRelationships: input.detectRelationships, + dryRun: input.dryRun, + trigger: 'mcp', + adapters: options.localScan?.adapters, + databaseIntrospectionUrl: options.localScan?.databaseIntrospectionUrl, + createConnector: options.localScan?.createConnector, + jobId: options.localScan?.jobIdFactory?.(), + now: options.localScan?.now, + }); + }, + async status(input) { + return getLocalScanStatus(project, input.runId); + }, + async report(input) { + return getLocalScanReport(project, input.runId); + }, + async listArtifacts(input) { + const report = await getLocalScanReport(project, input.runId); + return report ? listArtifactsForReport(project, input.runId, report) : null; + }, + async readArtifact(input) { + return readScanArtifact(project, input.runId, input.path); + }, + }; + } + + return ports; +} diff --git a/packages/context/src/mcp/server.test.ts b/packages/context/src/mcp/server.test.ts new file mode 100644 index 00000000..067e4230 --- /dev/null +++ b/packages/context/src/mcp/server.test.ts @@ -0,0 +1,869 @@ +import { access, mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { createLocalProjectMemoryCapture } from '../memory/index.js'; +import { initKloProject } from '../project/index.js'; +import { createKloMcpServer } from './server.js'; +import type { + KloIngestMcpPort, + KloKnowledgeMcpPort, + KloMcpContextPorts, + KloScanMcpPort, + KloSemanticLayerMcpPort, + MemoryCapturePort, +} from './types.js'; + +type RegisteredTool = { + name: string; + config: { title?: string; description?: string; inputSchema: unknown }; + handler: (input: Record) => Promise; +}; + +function makeFakeServer() { + const tools: RegisteredTool[] = []; + return { + tools, + server: { + registerTool(name: string, config: RegisteredTool['config'], handler: RegisteredTool['handler']): void { + tools.push({ name, config, handler }); + }, + }, + }; +} + +function getTool(tools: RegisteredTool[], name: string): RegisteredTool { + const found = tools.find((tool) => tool.name === name); + if (!found) { + throw new Error(`Tool not registered: ${name}`); + } + return found; +} + +describe('createKloMcpServer', () => { + it('registers context tools without memory capture tools when memory capture is omitted', async () => { + const fake = makeFakeServer(); + + createKloMcpServer({ + server: fake.server, + userContext: { userId: 'local-user' }, + contextTools: { + connections: { + async list() { + return [{ id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }]; + }, + }, + }, + }); + + expect(fake.tools.map((tool) => tool.name)).toEqual(['connection_list']); + await expect(getTool(fake.tools, 'connection_list').handler({})).resolves.toMatchObject({ + structuredContent: { + connections: [{ id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }], + }, + }); + }); + + it('registers memory capture tools without host app dependencies', async () => { + const fake = makeFakeServer(); + const capture: MemoryCapturePort = { + capture: vi.fn().mockResolvedValue({ runId: 'run-1' }), + status: vi.fn().mockResolvedValue({ + runId: 'run-1', + status: 'done', + stage: 'done', + done: true, + captured: { wiki: ['revenue'], sl: [], xrefs: [] }, + error: null, + commitHash: 'abc123', + skillsLoaded: ['knowledge_capture'], + signalDetected: true, + }), + }; + + createKloMcpServer({ + server: fake.server, + memoryCapture: capture, + userContext: { userId: 'mcp-user' }, + }); + + expect(fake.tools.map((tool) => tool.name).sort()).toEqual(['memory_capture', 'memory_capture_status']); + + const memoryCapture = getTool(fake.tools, 'memory_capture'); + await expect( + memoryCapture.handler({ + userMessage: 'Revenue means paid order value.', + assistantMessage: 'Captured.', + connectionId: '00000000-0000-4000-8000-000000000001', + }), + ).resolves.toEqual({ + content: [{ type: 'text', text: JSON.stringify({ runId: 'run-1' }, null, 2) }], + structuredContent: { runId: 'run-1' }, + }); + expect(capture.capture).toHaveBeenCalledWith({ + userId: 'mcp-user', + chatId: expect.stringMatching(/^mcp-/), + userMessage: 'Revenue means paid order value.', + assistantMessage: 'Captured.', + connectionId: '00000000-0000-4000-8000-000000000001', + sourceType: 'external_ingest', + }); + + const memoryStatus = getTool(fake.tools, 'memory_capture_status'); + await expect(memoryStatus.handler({ runId: 'run-1' })).resolves.toEqual({ + content: [ + { + type: 'text', + text: JSON.stringify( + { + runId: 'run-1', + status: 'done', + stage: 'done', + done: true, + captured: { wiki: ['revenue'], sl: [], xrefs: [] }, + error: null, + commitHash: 'abc123', + skillsLoaded: ['knowledge_capture'], + signalDetected: true, + }, + null, + 2, + ), + }, + ], + structuredContent: { + runId: 'run-1', + status: 'done', + stage: 'done', + done: true, + captured: { wiki: ['revenue'], sl: [], xrefs: [] }, + error: null, + commitHash: 'abc123', + skillsLoaded: ['knowledge_capture'], + signalDetected: true, + }, + }); + }); + + it('returns an MCP error payload for missing run ids', async () => { + const fake = makeFakeServer(); + const capture: MemoryCapturePort = { + capture: vi.fn(), + status: vi.fn().mockResolvedValue(null), + }; + + createKloMcpServer({ + server: fake.server, + memoryCapture: capture, + userContext: { userId: 'mcp-user' }, + }); + + const memoryStatus = getTool(fake.tools, 'memory_capture_status'); + await expect(memoryStatus.handler({ runId: 'missing' })).resolves.toEqual({ + content: [{ type: 'text', text: 'Memory capture run "missing" was not found.' }], + isError: true, + }); + }); + + it('runs MCP memory_capture against a local project memory port', async () => { + const tempDir = await mkdtemp(join(tmpdir(), 'klo-mcp-local-memory-')); + try { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const agentRunner = { + runLoop: async ({ + toolSet, + }: { + toolSet: Record Promise }>; + }) => { + await toolSet.load_skill.execute({ name: 'knowledge_capture' }); + await toolSet.wiki_write.execute( + { + key: 'arr', + summary: 'ARR definition', + content: 'ARR means annual recurring revenue.', + }, + { toolCallId: 'wiki-write' }, + ); + return { stopReason: 'natural' as const }; + }, + }; + const memoryCapture = createLocalProjectMemoryCapture(project, { + agentRunner: agentRunner as never, + runIdFactory: () => 'memory-run-mcp', + }); + const fake = makeFakeServer(); + + createKloMcpServer({ + server: fake.server, + memoryCapture, + userContext: { userId: 'mcp-user' }, + }); + + const capture = await getTool(fake.tools, 'memory_capture').handler({ + userMessage: 'define ARR as annual recurring revenue', + assistantMessage: 'Captured.', + }); + expect(capture).toMatchObject({ + structuredContent: { runId: 'memory-run-mcp' }, + }); + await memoryCapture.waitForRun('memory-run-mcp'); + + await expect( + getTool(fake.tools, 'memory_capture_status').handler({ runId: 'memory-run-mcp' }), + ).resolves.toMatchObject({ + structuredContent: { + runId: 'memory-run-mcp', + status: 'done', + done: true, + captured: { wiki: ['arr'], sl: [], xrefs: [] }, + }, + }); + await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined(); + await expect(access(join(project.projectDir, '.klo/memory-runs/memory-run-mcp.json'))).rejects.toThrow(); + await expect(readFile(join(project.projectDir, 'knowledge/global/arr.md'), 'utf-8')).resolves.toContain( + 'ARR means annual recurring revenue.', + ); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('registers KLO context MCP tools when context ports are supplied', async () => { + const fake = makeFakeServer(); + const capture: MemoryCapturePort = { + capture: vi.fn().mockResolvedValue({ runId: 'run-1' }), + status: vi.fn().mockResolvedValue(null), + }; + const contextTools: KloMcpContextPorts = { + connections: { + list: vi.fn().mockResolvedValue([ + { + id: '00000000-0000-4000-8000-000000000001', + name: 'Warehouse', + connectionType: 'POSTGRES', + }, + ]), + test: vi.fn().mockResolvedValue({ + id: 'warehouse', + connectionType: 'postgres', + ok: true, + tableCount: 2, + message: 'Connection test passed.', + warnings: [], + }), + }, + knowledge: { + search: vi.fn().mockResolvedValue({ + results: [ + { + key: 'revenue', + path: 'knowledge/global/revenue.md', + scope: 'GLOBAL', + summary: 'Paid order value', + score: 0.42, + matchReasons: ['lexical'], + }, + ], + totalFound: 1, + }), + read: vi.fn().mockResolvedValue({ + key: 'revenue', + summary: 'Paid order value', + content: '# Revenue', + scope: 'GLOBAL', + tags: ['finance'], + refs: [], + slRefs: ['orders'], + }), + write: vi.fn().mockResolvedValue({ + success: true, + key: 'revenue', + action: 'updated', + }), + }, + semanticLayer: { + listSources: vi.fn().mockResolvedValue({ + sources: [ + { + connectionId: '00000000-0000-4000-8000-000000000001', + connectionName: 'Warehouse', + name: 'orders', + description: 'Order facts', + columnCount: 2, + measureCount: 1, + joinCount: 0, + }, + ], + totalSources: 1, + }), + readSource: vi.fn().mockResolvedValue({ + sourceName: 'orders', + yaml: 'name: orders\n', + }), + writeSource: vi.fn().mockResolvedValue({ + success: true, + sourceName: 'orders', + yaml: 'name: orders\n', + commitHash: 'abc123', + }), + validate: vi.fn().mockResolvedValue({ + success: true, + errors: [], + warnings: [], + }), + query: vi.fn().mockResolvedValue({ + sql: 'select 1', + headers: ['count'], + rows: [[1]], + totalRows: 1, + plan: { sources: ['orders'] }, + }), + }, + ingest: { + trigger: vi.fn().mockResolvedValue({ + runId: 'run-42', + jobId: 'job-42', + reportId: 'report-42', + }), + status: vi.fn().mockResolvedValue({ + runId: 'run-42', + jobId: 'job-42', + reportId: 'report-42', + status: 'done', + stage: 'done', + progress: 1, + done: true, + adapter: 'fake', + connectionId: 'warehouse', + sourceDir: '/tmp/upload', + syncId: '2026-04-27-120000-run-42', + startedAt: '2026-04-27T12:00:00.000Z', + completedAt: '2026-04-27T12:00:01.000Z', + previousRunId: 'run-41', + diffSummary: { + added: 0, + modified: 1, + deleted: 0, + unchanged: 3, + }, + rawFileCount: 4, + workUnitCount: 1, + workUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + evictionDeletedRawPaths: [], + errors: [], + }), + report: vi.fn>().mockResolvedValue({ + id: 'report-42', + runId: 'run-42', + jobId: 'job-42', + connectionId: 'warehouse', + sourceKey: 'fake', + createdAt: '2026-04-27T12:00:01.000Z', + body: { + syncId: '2026-04-27-120000-run-42', + diffSummary: { added: 0, modified: 1, deleted: 0, unchanged: 3 }, + commitSha: null, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }), + replay: vi.fn>().mockResolvedValue({ + runId: 'run-42', + reportId: 'report-42', + reportPath: 'report-42', + connectionId: 'warehouse', + adapter: 'fake', + status: 'done', + sourceDir: null, + syncId: '2026-04-27-120000-run-42', + errors: [], + events: [{ type: 'report_created', runId: 'run-42', reportPath: 'report-42' }], + plannedWorkUnits: [], + details: { actions: [], provenance: [], transcripts: [] }, + }), + }, + scan: { + trigger: vi.fn().mockResolvedValue({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report: { + connectionId: 'warehouse', + driver: 'postgres', + syncId: 'sync-1', + runId: 'scan-run-1', + trigger: 'mcp', + mode: 'structural', + dryRun: false, + artifactPaths: { + rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1', + reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + manifestShards: [], + enrichmentArtifacts: [], + }, + diffSummary: { + tablesAdded: 1, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 0, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 0, + structuralSyncStats: { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, + }, + enrichment: { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'skipped', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', + }, + capabilityGaps: [], + warnings: [], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + enrichmentState: { + resumedStages: [], + completedStages: [], + failedStages: [], + }, + createdAt: '2026-04-29T09:00:00.000Z', + }, + }), + status: vi.fn().mockResolvedValue({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + progress: 1, + startedAt: '2026-04-29T09:00:00.000Z', + completedAt: '2026-04-29T09:00:01.000Z', + reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + warnings: [], + }), + report: vi.fn().mockResolvedValue(null), + listArtifacts: vi.fn>().mockResolvedValue({ + runId: 'scan-run-1', + artifacts: [ + { + path: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + type: 'report', + size: 128, + }, + { + path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json', + type: 'raw_source', + size: 64, + }, + ], + }), + readArtifact: vi.fn>().mockImplementation(async (input) => { + if (input.path !== 'raw-sources/warehouse/live-database/sync-1/tables/orders.json') { + return null; + } + return { + runId: input.runId, + path: input.path, + type: 'raw_source', + size: 64, + content: '{"name":"orders"}\n', + }; + }), + }, + }; + + createKloMcpServer({ + server: fake.server, + memoryCapture: capture, + userContext: { userId: 'mcp-user' }, + contextTools, + }); + + expect(fake.tools.map((tool) => tool.name).sort()).toEqual([ + 'connection_list', + 'connection_test', + 'ingest_replay', + 'ingest_report', + 'ingest_status', + 'ingest_trigger', + 'knowledge_read', + 'knowledge_search', + 'knowledge_write', + 'memory_capture', + 'memory_capture_status', + 'scan_list_artifacts', + 'scan_read_artifact', + 'scan_report', + 'scan_status', + 'scan_trigger', + 'sl_list_sources', + 'sl_query', + 'sl_read_source', + 'sl_validate', + 'sl_write_source', + ]); + + await expect(getTool(fake.tools, 'connection_list').handler({})).resolves.toEqual({ + content: [ + { + type: 'text', + text: JSON.stringify( + { + connections: [ + { + id: '00000000-0000-4000-8000-000000000001', + name: 'Warehouse', + connectionType: 'POSTGRES', + }, + ], + }, + null, + 2, + ), + }, + ], + structuredContent: { + connections: [ + { + id: '00000000-0000-4000-8000-000000000001', + name: 'Warehouse', + connectionType: 'POSTGRES', + }, + ], + }, + }); + + await expect(getTool(fake.tools, 'connection_test').handler({ connectionId: 'warehouse' })).resolves.toEqual({ + content: [ + { + type: 'text', + text: JSON.stringify( + { + id: 'warehouse', + connectionType: 'postgres', + ok: true, + tableCount: 2, + message: 'Connection test passed.', + warnings: [], + }, + null, + 2, + ), + }, + ], + structuredContent: { + id: 'warehouse', + connectionType: 'postgres', + ok: true, + tableCount: 2, + message: 'Connection test passed.', + warnings: [], + }, + }); + expect(contextTools.connections?.test).toHaveBeenCalledWith({ connectionId: 'warehouse' }); + + await getTool(fake.tools, 'knowledge_search').handler({ query: 'revenue', limit: 5 }); + expect(contextTools.knowledge?.search).toHaveBeenCalledWith({ + userId: 'mcp-user', + query: 'revenue', + limit: 5, + }); + + await getTool(fake.tools, 'knowledge_read').handler({ key: 'revenue' }); + expect(contextTools.knowledge?.read).toHaveBeenCalledWith({ + userId: 'mcp-user', + key: 'revenue', + }); + + await getTool(fake.tools, 'knowledge_write').handler({ + key: 'revenue', + summary: 'Paid order value', + content: '# Revenue', + tags: ['finance'], + refs: ['gross-margin'], + sl_refs: ['orders'], + }); + expect(contextTools.knowledge?.write).toHaveBeenCalledWith({ + userId: 'mcp-user', + key: 'revenue', + summary: 'Paid order value', + content: '# Revenue', + tags: ['finance'], + refs: ['gross-margin'], + slRefs: ['orders'], + }); + + await getTool(fake.tools, 'sl_list_sources').handler({ + connectionId: '00000000-0000-4000-8000-000000000001', + query: 'orders', + }); + expect(contextTools.semanticLayer?.listSources).toHaveBeenCalledWith({ + connectionId: '00000000-0000-4000-8000-000000000001', + query: 'orders', + }); + + await getTool(fake.tools, 'sl_read_source').handler({ + connectionId: 'warehouse', + sourceName: 'orders', + }); + expect(contextTools.semanticLayer?.readSource).toHaveBeenCalledWith({ + connectionId: 'warehouse', + sourceName: 'orders', + }); + + await getTool(fake.tools, 'sl_write_source').handler({ + connectionId: '00000000-0000-4000-8000-000000000001', + sourceName: 'orders', + source: { name: 'orders', table: 'public.orders', grain: ['id'], columns: [], joins: [], measures: [] }, + }); + expect(contextTools.semanticLayer?.writeSource).toHaveBeenCalledWith({ + connectionId: '00000000-0000-4000-8000-000000000001', + sourceName: 'orders', + source: { name: 'orders', table: 'public.orders', grain: ['id'], columns: [], joins: [], measures: [] }, + yaml: undefined, + delete: undefined, + }); + + await getTool(fake.tools, 'sl_validate').handler({ + connectionId: '00000000-0000-4000-8000-000000000001', + names: ['orders'], + }); + expect(contextTools.semanticLayer?.validate).toHaveBeenCalledWith({ + connectionId: '00000000-0000-4000-8000-000000000001', + names: ['orders'], + }); + + await getTool(fake.tools, 'sl_query').handler({ + connectionId: '00000000-0000-4000-8000-000000000001', + measures: ['orders.count'], + dimensions: ['orders.created_at'], + filters: ['orders.status = paid'], + limit: 25, + }); + expect(contextTools.semanticLayer?.query).toHaveBeenCalledWith({ + connectionId: '00000000-0000-4000-8000-000000000001', + query: { + measures: ['orders.count'], + dimensions: ['orders.created_at'], + filters: ['orders.status = paid'], + segments: [], + order_by: [], + limit: 25, + include_empty: true, + }, + }); + + await getTool(fake.tools, 'ingest_trigger').handler({ + adapter: 'lookml', + connectionId: '00000000-0000-4000-8000-000000000001', + trigger: 'scheduled_pull', + config: { repoUrl: 'https://github.com/acme/looker.git' }, + }); + expect(contextTools.ingest?.trigger).toHaveBeenCalledWith({ + adapter: 'lookml', + connectionId: '00000000-0000-4000-8000-000000000001', + trigger: 'scheduled_pull', + config: { repoUrl: 'https://github.com/acme/looker.git' }, + }); + + expect(getTool(fake.tools, 'ingest_status').config.description).toBe( + 'Read the current or final status for an ingest run, including local diff and work-unit summaries when available.', + ); + + await expect(getTool(fake.tools, 'ingest_status').handler({ runId: 'run-42' })).resolves.toMatchObject({ + structuredContent: { + runId: 'run-42', + status: 'done', + stage: 'done', + progress: 1, + done: true, + adapter: 'fake', + connectionId: 'warehouse', + sourceDir: '/tmp/upload', + syncId: '2026-04-27-120000-run-42', + previousRunId: 'run-41', + diffSummary: { + added: 0, + modified: 1, + deleted: 0, + unchanged: 3, + }, + rawFileCount: 4, + workUnitCount: 1, + workUnits: [ + { + unitKey: 'fake-orders', + rawFiles: ['orders/orders.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + evictionDeletedRawPaths: [], + errors: [], + }, + }); + expect(contextTools.ingest?.status).toHaveBeenCalledWith({ runId: 'run-42' }); + + await expect(getTool(fake.tools, 'ingest_report').handler({ runId: 'report-42' })).resolves.toMatchObject({ + structuredContent: { + id: 'report-42', + runId: 'run-42', + jobId: 'job-42', + sourceKey: 'fake', + }, + }); + expect(contextTools.ingest?.report).toHaveBeenCalledWith({ runId: 'report-42' }); + + await expect(getTool(fake.tools, 'ingest_replay').handler({ runId: 'run-42' })).resolves.toMatchObject({ + structuredContent: { + runId: 'run-42', + reportId: 'report-42', + status: 'done', + adapter: 'fake', + }, + }); + expect(contextTools.ingest?.replay).toHaveBeenCalledWith({ runId: 'run-42' }); + + await getTool(fake.tools, 'scan_trigger').handler({ + connectionId: 'warehouse', + mode: 'structural', + dryRun: true, + }); + expect(contextTools.scan?.trigger).toHaveBeenCalledWith({ + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: true, + }); + + await getTool(fake.tools, 'scan_trigger').handler({ + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }); + expect(contextTools.scan?.trigger).toHaveBeenCalledWith({ + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }); + + await expect(getTool(fake.tools, 'scan_status').handler({ runId: 'scan-run-1' })).resolves.toMatchObject({ + structuredContent: { + runId: 'scan-run-1', + status: 'done', + connectionId: 'warehouse', + }, + }); + + await expect(getTool(fake.tools, 'scan_report').handler({ runId: 'missing' })).resolves.toEqual({ + content: [{ type: 'text', text: 'Scan report "missing" was not found.' }], + isError: true, + }); + + await expect(getTool(fake.tools, 'scan_list_artifacts').handler({ runId: 'scan-run-1' })).resolves.toEqual({ + content: [ + { + type: 'text', + text: JSON.stringify( + { + runId: 'scan-run-1', + artifacts: [ + { + path: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + type: 'report', + size: 128, + }, + { + path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json', + type: 'raw_source', + size: 64, + }, + ], + }, + null, + 2, + ), + }, + ], + structuredContent: { + runId: 'scan-run-1', + artifacts: [ + { + path: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + type: 'report', + size: 128, + }, + { + path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json', + type: 'raw_source', + size: 64, + }, + ], + }, + }); + expect(contextTools.scan?.listArtifacts).toHaveBeenCalledWith({ runId: 'scan-run-1' }); + + await expect( + getTool(fake.tools, 'scan_read_artifact').handler({ + runId: 'scan-run-1', + path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json', + }), + ).resolves.toMatchObject({ + structuredContent: { + runId: 'scan-run-1', + path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json', + type: 'raw_source', + content: '{"name":"orders"}\n', + }, + }); + expect(contextTools.scan?.readArtifact).toHaveBeenCalledWith({ + runId: 'scan-run-1', + path: 'raw-sources/warehouse/live-database/sync-1/tables/orders.json', + }); + + await expect( + getTool(fake.tools, 'scan_read_artifact').handler({ + runId: 'scan-run-1', + path: 'klo.yaml', + }), + ).resolves.toEqual({ + content: [{ type: 'text', text: 'Scan artifact "klo.yaml" was not found for run "scan-run-1".' }], + isError: true, + }); + }); +}); diff --git a/packages/context/src/mcp/server.ts b/packages/context/src/mcp/server.ts new file mode 100644 index 00000000..85342826 --- /dev/null +++ b/packages/context/src/mcp/server.ts @@ -0,0 +1,94 @@ +import { randomUUID } from 'node:crypto'; +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { MemoryAgentInput } from '../memory/index.js'; +import { jsonErrorToolResult, jsonToolResult, registerKloContextTools } from './context-tools.js'; +import type { KloMcpServerDeps, KloMcpServerLike, MemoryCapturePort } from './types.js'; + +const memoryCaptureInputSchema = { + userMessage: z.string().min(1).describe('The user message that may contain durable knowledge.'), + assistantMessage: z.string().optional().describe('The assistant response that concluded the exchange.'), + connectionId: z.string().min(1).optional().describe('Optional connection id for semantic-layer capture.'), +}; + +const memoryCaptureStatusInputSchema = { + runId: z.string().min(1).describe('The memory capture run id returned by memory_capture.'), +}; + +function registerMemoryCaptureTools(deps: { + server: KloMcpServerLike; + memoryCapture: MemoryCapturePort; + userContext: KloMcpServerDeps['userContext']; +}): void { + deps.server.registerTool( + 'memory_capture', + { + title: 'Memory Capture', + description: + 'Capture durable knowledge and semantic-layer updates from the final user/assistant exchange. Returns a run id for polling.', + inputSchema: memoryCaptureInputSchema, + }, + async (input) => { + const captureInput: MemoryAgentInput = { + userId: deps.userContext.userId, + chatId: `mcp-${randomUUID()}`, + userMessage: String(input.userMessage), + assistantMessage: typeof input.assistantMessage === 'string' ? input.assistantMessage : undefined, + connectionId: typeof input.connectionId === 'string' ? input.connectionId : undefined, + sourceType: 'external_ingest', + }; + const result = await deps.memoryCapture.capture(captureInput); + return jsonToolResult(result); + }, + ); + + deps.server.registerTool( + 'memory_capture_status', + { + title: 'Memory Capture Status', + description: 'Read the current or final status for a memory capture run.', + inputSchema: memoryCaptureStatusInputSchema, + }, + async (input) => { + const runId = String(input.runId); + const status = await deps.memoryCapture.status(runId); + return status ? jsonToolResult(status) : jsonErrorToolResult(`Memory capture run "${runId}" was not found.`); + }, + ); +} + +export function createKloMcpServer(deps: KloMcpServerDeps): KloMcpServerDeps['server'] { + if (deps.memoryCapture) { + registerMemoryCaptureTools({ + server: deps.server, + memoryCapture: deps.memoryCapture, + userContext: deps.userContext, + }); + } + + if (deps.contextTools) { + registerKloContextTools({ + server: deps.server, + ports: deps.contextTools, + userContext: deps.userContext, + }); + } + + return deps.server; +} + +export function createDefaultKloMcpServer( + deps: Omit & { name?: string; version?: string }, +): McpServer { + const server = new McpServer({ + name: deps.name ?? 'klo', + version: deps.version ?? '0.0.0-private', + }); + createKloMcpServer({ + server: server as KloMcpServerLike, + memoryCapture: deps.memoryCapture, + userContext: deps.userContext, + contextTools: deps.contextTools, + }); + return server; +} diff --git a/packages/context/src/mcp/types.ts b/packages/context/src/mcp/types.ts new file mode 100644 index 00000000..899c04fb --- /dev/null +++ b/packages/context/src/mcp/types.ts @@ -0,0 +1,326 @@ +import type { IngestReportSnapshot, MemoryFlowReplayInput } from '../ingest/index.js'; +import type { MemoryCaptureService } from '../memory/index.js'; +import type { KloScanMode, KloScanReport } from '../scan/index.js'; +import type { + SemanticLayerQueryInput, + SlDictionaryMatch, + SlSearchLaneSummary, + SlSearchMatchReason, +} from '../sl/index.js'; +import type { WikiSearchLaneSummary, WikiSearchMatchReason } from '../wiki/index.js'; + +export interface KloMcpTextContent { + type: 'text'; + text: string; +} + +export interface KloMcpToolResult { + content: KloMcpTextContent[]; + structuredContent?: T; + isError?: true; +} + +export interface MemoryCapturePort { + capture: MemoryCaptureService['capture']; + status: MemoryCaptureService['status']; +} + +export interface KloMcpUserContext { + userId: string; +} + +export interface KloMcpServerLike { + registerTool( + name: string, + config: { + title?: string; + description?: string; + inputSchema: unknown; + }, + handler: (input: Record) => Promise, + ): void; +} + +export interface KloConnectionSummary { + id: string; + name: string; + connectionType: string; +} + +export interface KloConnectionTestResponse { + id: string; + connectionType: string; + ok: boolean; + tableCount: number | null; + message: string; + warnings: string[]; +} + +export interface KloConnectionsMcpPort { + list(): Promise; + test?(input: { connectionId: string }): Promise; +} + +export interface KloKnowledgeSearchResult { + key: string; + path: string; + scope: 'GLOBAL' | 'USER'; + summary: string; + score: number; + matchReasons?: WikiSearchMatchReason[]; + lanes?: WikiSearchLaneSummary[]; +} + +export interface KloKnowledgeSearchResponse { + results: KloKnowledgeSearchResult[]; + totalFound: number; +} + +export interface KloKnowledgePage { + key: string; + summary: string; + content: string; + scope: 'GLOBAL' | 'USER'; + tags?: string[]; + refs?: string[]; + slRefs?: string[]; +} + +interface KloHistoricSqlKnowledgeUsage { + executions: number; + distinct_users: number; + first_seen: string; + last_seen: string; + p50_runtime_ms: number | null; + p95_runtime_ms: number | null; + error_rate: number; + rows_produced?: number; +} + +export interface KloKnowledgeWriteResponse { + success: boolean; + key: string; + action: 'created' | 'updated'; +} + +export interface KloKnowledgeMcpPort { + search(input: { userId: string; query: string; limit: number }): Promise; + read(input: { userId: string; key: string }): Promise; + write(input: { + userId: string; + key: string; + summary: string; + content: string; + tags?: string[]; + refs?: string[]; + slRefs?: string[]; + source?: string; + intent?: string; + tables?: string[]; + representativeSql?: string; + usage?: KloHistoricSqlKnowledgeUsage; + fingerprints?: string[]; + }): Promise; +} + +export interface KloSemanticLayerSourceSummary { + connectionId: string; + connectionName: string; + name: string; + description?: string; + columnCount: number; + measureCount: number; + joinCount: number; + score?: number; + matchReasons?: SlSearchMatchReason[]; + dictionaryMatches?: SlDictionaryMatch[]; + lanes?: SlSearchLaneSummary[]; +} + +export interface KloSemanticLayerListResponse { + sources: KloSemanticLayerSourceSummary[]; + totalSources: number; +} + +export interface KloSemanticLayerReadResponse { + sourceName: string; + yaml: string; +} + +export interface KloSemanticLayerWriteResponse { + success: boolean; + sourceName: string; + yaml?: string; + errors?: string[]; + warnings?: string[]; + commitHash?: string; +} + +export interface KloSemanticLayerValidationResponse { + success: boolean; + errors: string[]; + warnings: string[]; +} + +export interface KloSemanticLayerQueryResponse { + sql: string; + headers: string[]; + rows: unknown[][]; + totalRows: number; + plan?: Record; +} + +export interface KloSemanticLayerMcpPort { + listSources(input: { connectionId?: string; query?: string }): Promise; + readSource(input: { connectionId: string; sourceName: string }): Promise; + writeSource(input: { + connectionId: string; + sourceName: string; + yaml?: string; + source?: Record; + delete?: boolean; + }): Promise; + validate(input: { connectionId: string; names?: string[] }): Promise; + query(input: { connectionId?: string; query: SemanticLayerQueryInput }): Promise; +} + +export type KloIngestTriggerKind = 'upload' | 'scheduled_pull' | 'manual_resync'; + +interface KloIngestTriggerFanoutChild { + runId: string; + jobId: string; + reportId: string; + targetConnectionId: string; + metabaseDatabaseId: number; +} + +export interface KloIngestTriggerResponse { + runId: string; + jobId?: string; + reportId?: string; + fanout?: { + status: 'all_succeeded' | 'partial_failure' | 'all_failed'; + children: KloIngestTriggerFanoutChild[]; + }; +} + +export interface KloIngestDiffSummary { + added: number; + modified: number; + deleted: number; + unchanged: number; +} + +export interface KloIngestWorkUnitSummary { + unitKey: string; + rawFiles: string[]; + peerFileIndex: string[]; + dependencyPaths: string[]; +} + +export interface KloIngestStatusResponse { + runId: string; + jobId?: string; + reportId?: string; + status: string; + stage?: string; + progress?: number; + errors?: string[]; + done: boolean; + adapter?: string; + connectionId?: string; + sourceDir?: string | null; + syncId?: string; + startedAt?: string; + completedAt?: string; + previousRunId?: string | null; + diffSummary?: KloIngestDiffSummary; + workUnitCount?: number; + rawFileCount?: number; + workUnits?: KloIngestWorkUnitSummary[]; + evictionDeletedRawPaths?: string[]; +} + +export interface KloIngestMcpPort { + trigger(input: { + adapter: string; + connectionId: string; + config?: unknown; + trigger: KloIngestTriggerKind; + }): Promise; + status(input: { runId: string }): Promise; + report?(input: { runId: string }): Promise; + replay?(input: { runId: string }): Promise; +} + +interface KloScanTriggerResponse { + runId: string; + status: 'done'; + done: true; + connectionId: string; + mode: KloScanMode; + dryRun: boolean; + syncId: string; + report: KloScanReport; +} + +interface KloScanStatusResponse { + runId: string; + status: string; + done: boolean; + connectionId: string; + mode: KloScanMode; + dryRun: boolean; + syncId: string; + progress: number; + startedAt: string; + completedAt: string; + reportPath: string | null; + warnings: KloScanReport['warnings']; +} + +export type KloScanArtifactType = 'report' | 'raw_source' | 'manifest_shard' | 'enrichment_artifact'; + +export interface KloScanArtifactSummary { + path: string; + type: KloScanArtifactType; + size?: number; +} + +export interface KloScanArtifactListResponse { + runId: string; + artifacts: KloScanArtifactSummary[]; +} + +export interface KloScanArtifactReadResponse extends KloScanArtifactSummary { + runId: string; + content: string; +} + +export interface KloScanMcpPort { + trigger(input: { + connectionId: string; + mode?: KloScanMode; + detectRelationships: boolean; + dryRun: boolean; + }): Promise; + status(input: { runId: string }): Promise; + report(input: { runId: string }): Promise; + listArtifacts?(input: { runId: string }): Promise; + readArtifact?(input: { runId: string; path: string }): Promise; +} + +export interface KloMcpContextPorts { + connections?: KloConnectionsMcpPort; + knowledge?: KloKnowledgeMcpPort; + semanticLayer?: KloSemanticLayerMcpPort; + ingest?: KloIngestMcpPort; + scan?: KloScanMcpPort; +} + +export interface KloMcpServerDeps { + server: KloMcpServerLike; + memoryCapture?: MemoryCapturePort; + userContext: KloMcpUserContext; + contextTools?: KloMcpContextPorts; +} diff --git a/packages/context/src/memory/capture-signals.ts b/packages/context/src/memory/capture-signals.ts new file mode 100644 index 00000000..8860474a --- /dev/null +++ b/packages/context/src/memory/capture-signals.ts @@ -0,0 +1,128 @@ +import type { CaptureSignals, MemoryAgentInput, MemoryAgentSourceType } from './types.js'; + +const SQL_AGGREGATE_PATTERN = /\b(SUM|AVG|COUNT|MIN|MAX|GROUP\s+BY|JOIN|WITH\s+\w+\s+AS\s*\()\b/i; +const SL_DEFINITION_PATTERN = /\b(define|going forward|always (apply|exclude)|treat as|cohort|reusable)\b/i; +const KNOWLEDGE_DEFINITION_PATTERN = + /\b(define|going forward|alias|stands for|means|convention|is the (canonical|definition))\b/i; +const TABLE_SEPARATOR_PATTERN = /\|\s*-{3,}\s*\|/; +const LOOKML_STRUCTURAL_PATTERN = /^\s*(view|explore|model|include)\s*:\s*[\w"`]/m; +const LOOKML_FIELDS_PATTERN = + /^\s*(measure|dimension|dimension_group|sql_table_name|derived_table|sql_always_where|drill_fields|join)\s*:/m; + +export const DEFAULT_SKILL_NAMES = ['sl', 'sl_capture', 'knowledge_capture'] as const; + +export function detectCaptureSignals(input: MemoryAgentInput): CaptureSignals { + const userMessage = input.userMessage?.trim() ?? ''; + const assistantMessage = input.assistantMessage?.trim() ?? ''; + const reasons: string[] = []; + + let sl = false; + if (assistantMessage && SQL_AGGREGATE_PATTERN.test(assistantMessage) && userMessage.length >= 100) { + sl = true; + reasons.push('sql aggregate in assistant message'); + } + if (userMessage && SL_DEFINITION_PATTERN.test(userMessage)) { + sl = true; + reasons.push('sl-style definition keyword in user message'); + } + + let knowledge = false; + if (userMessage && KNOWLEDGE_DEFINITION_PATTERN.test(userMessage)) { + knowledge = true; + reasons.push('definition keyword in user message'); + } + if (assistantMessage && TABLE_SEPARATOR_PATTERN.test(assistantMessage)) { + knowledge = true; + reasons.push('definition table in assistant message'); + } + + let dialect: CaptureSignals['dialect']; + if ( + assistantMessage && + LOOKML_STRUCTURAL_PATTERN.test(assistantMessage) && + LOOKML_FIELDS_PATTERN.test(assistantMessage) + ) { + dialect = 'lookml'; + sl = true; + reasons.push('lookml structure in assistant message'); + } + + return { knowledge, sl, dialect, reasons }; +} + +export function buildRequiredSkillsBlock(signals: CaptureSignals): string { + const required: Array<{ name: string; reason: string }> = []; + if (signals.knowledge) { + const reason = + signals.reasons.find((r) => r.includes('definition keyword') || r.includes('definition table')) ?? + 'wiki signal detected'; + required.push({ name: 'knowledge_capture', reason }); + } + if (signals.sl) { + const reason = + signals.reasons.find((r) => r.includes('sql aggregate') || r.includes('sl-style')) ?? 'sl signal detected'; + required.push({ name: 'sl', reason }); + } + if (signals.dialect === 'lookml') { + const reason = signals.reasons.find((r) => r.includes('lookml')) ?? 'lookml dialect detected'; + required.push({ name: 'lookml_ingest', reason }); + } + if (required.length === 0) { + return ''; + } + const lines = required.map((r) => `- \`${r.name}\` - reason: ${r.reason}`).join('\n'); + return [ + '', + 'The pre-scan flagged this turn as a likely capture candidate. Before exiting, you MUST `load_skill` for each skill below and follow its workflow. Skipping a required skill means a likely capture is being missed; only skip if, after reading the skill body and the turn, you are sure no capture applies.', + '', + lines, + '', + ].join('\n'); +} + +export function prefilterSkipReason(input: MemoryAgentInput, signals = detectCaptureSignals(input)): string | null { + const trimmedUser = input.userMessage?.trim() ?? ''; + const assistantMessage = input.assistantMessage ?? ''; + + const hasUserSignal = trimmedUser.length >= 6; + const hasAssistantSqlSignal = /\b(SUM|AVG|COUNT|MIN|MAX|GROUP\s+BY)\b/i.test(assistantMessage); + if (!hasUserSignal && !hasAssistantSqlSignal) { + return 'message too short, no SQL keywords'; + } + + if (signals.dialect === 'lookml') { + const hasStructural = /^\s*(derived_table|sql_always_where|join)\s*:/m.test(assistantMessage); + const hasNonCountAggregate = /\btype:\s*(sum|average|avg|min|max|count_distinct|median|percentile)\b/i.test( + assistantMessage, + ); + if (!hasStructural && !hasNonCountAggregate) { + return 'no semantic signal (lookml-wrapper)'; + } + } + + return null; +} + +export function isWorthAnalyzing(input: MemoryAgentInput): boolean { + return prefilterSkipReason(input, detectCaptureSignals(input)) === null; +} + +export function stepBudgetFor(sourceType: MemoryAgentSourceType): number { + switch (sourceType) { + case 'research': + return 20; + case 'external_ingest': + return 30; + case 'backfill': + case 'sql-review-migration': + return 25; + } +} + +export function promptNameFor(sourceType: MemoryAgentSourceType): string { + return sourceType === 'external_ingest' + ? 'memory_agent_external_ingest' + : sourceType === 'backfill' || sourceType === 'sql-review-migration' + ? 'memory_agent_backfill' + : 'memory_agent_research'; +} diff --git a/packages/context/src/memory/index.ts b/packages/context/src/memory/index.ts new file mode 100644 index 00000000..a23c4ed5 --- /dev/null +++ b/packages/context/src/memory/index.ts @@ -0,0 +1,41 @@ +export { + buildRequiredSkillsBlock, + DEFAULT_SKILL_NAMES, + detectCaptureSignals, + isWorthAnalyzing, + prefilterSkipReason, + promptNameFor, + stepBudgetFor, +} from './capture-signals.js'; +export { MemoryAgentService } from './memory-agent.service.js'; +export { createLocalProjectMemoryCapture, type CreateLocalProjectMemoryCaptureOptions } from './local-memory.js'; +export { LocalMemoryRunStore, type LocalMemoryRunStoreOptions } from './local-memory-runs.js'; +export { + MemoryCaptureService, + type MemoryCaptureServiceDeps, + type MemoryCaptureStartResult, + type MemoryCaptureStatus, + type MemoryRunRecord, + type MemoryRunStatus, + type MemoryRunStorePort, +} from './memory-runs.js'; + +export type { + CaptureSession, + CaptureSignals, + MemoryAction, + MemoryAgentInput, + MemoryAgentResult, + MemoryAgentServiceDeps, + MemoryAgentSettings, + MemoryAgentSourceType, + MemoryCommitMessagePort, + MemoryConnectionPort, + MemoryFileStorePort, + MemoryKnowledgeSlRefsPort, + MemoryLockPort, + MemorySlSourceReconcilerPort, + MemoryTelemetryPort, + MemoryToolSetLike, + MemoryToolsetFactoryPort, +} from './types.js'; diff --git a/packages/context/src/memory/local-memory-runs.ts b/packages/context/src/memory/local-memory-runs.ts new file mode 100644 index 00000000..29daa728 --- /dev/null +++ b/packages/context/src/memory/local-memory-runs.ts @@ -0,0 +1,211 @@ +import { randomUUID } from 'node:crypto'; +import { mkdirSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import Database from 'better-sqlite3'; +import type { MemoryRunRecord, MemoryRunStatus, MemoryRunStorePort } from './memory-runs.js'; +import type { MemoryAgentResult } from './types.js'; + +export interface LocalMemoryRunStoreOptions { + projectDir: string; + idFactory?: () => string; +} + +type MemoryRunRow = { + id: string; + status: string; + stage: string; + input_hash: string; + chat_id: string | null; + output_summary_json: string | null; + error: string | null; +}; + +function localMemoryDbPath(projectDir: string): string { + return join(projectDir, '.klo', 'db.sqlite'); +} + +function isSafeRunId(runId: string): boolean { + return /^[a-zA-Z0-9][a-zA-Z0-9_.-]*$/.test(runId); +} + +function isMemoryRunStatus(value: unknown): value is MemoryRunStatus { + return value === 'running' || value === 'done' || value === 'error'; +} + +function parseOutputSummary(raw: string | null): MemoryAgentResult | null { + if (!raw) { + return null; + } + return JSON.parse(raw) as MemoryAgentResult; +} + +function rowToRecord(row: MemoryRunRow): MemoryRunRecord | null { + if (!isMemoryRunStatus(row.status)) { + return null; + } + return { + id: row.id, + status: row.status, + stage: row.stage, + inputHash: row.input_hash, + chatId: row.chat_id, + outputSummary: parseOutputSummary(row.output_summary_json), + error: row.error, + }; +} + +export class LocalMemoryRunStore implements MemoryRunStorePort { + private readonly db: Database.Database; + private readonly idFactory: () => string; + + constructor(options: LocalMemoryRunStoreOptions) { + const dbPath = localMemoryDbPath(options.projectDir); + mkdirSync(dirname(dbPath), { recursive: true }); + this.db = new Database(dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.db.exec(` + CREATE TABLE IF NOT EXISTS local_memory_runs ( + id TEXT PRIMARY KEY, + status TEXT NOT NULL, + stage TEXT NOT NULL, + input_hash TEXT NOT NULL, + chat_id TEXT, + output_summary_json TEXT, + error TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE INDEX IF NOT EXISTS local_memory_runs_status_updated_idx + ON local_memory_runs (status, updated_at DESC); + `); + this.idFactory = options.idFactory ?? (() => `memory-${randomUUID()}`); + } + + async createRunning(args: { inputHash: string; chatId?: string | null }): Promise<{ id: string }> { + const now = new Date().toISOString(); + const id = this.idFactory(); + this.db + .prepare( + ` + INSERT INTO local_memory_runs ( + id, + status, + stage, + input_hash, + chat_id, + output_summary_json, + error, + created_at, + updated_at + ) + VALUES ( + @id, + 'running', + 'queued', + @inputHash, + @chatId, + NULL, + NULL, + @now, + @now + ) + `, + ) + .run({ + id, + inputHash: args.inputHash, + chatId: args.chatId ?? null, + now, + }); + return { id }; + } + + async markRunning(id: string, stage: string): Promise { + this.updateRun(id, { + status: 'running', + stage, + outputSummaryJson: null, + error: null, + }); + } + + async markDone(id: string, outputSummary: MemoryAgentResult): Promise { + this.updateRun(id, { + status: 'done', + stage: 'done', + outputSummaryJson: JSON.stringify(outputSummary), + error: null, + }); + } + + async markError(id: string, error: string): Promise { + this.updateRun(id, { + status: 'error', + stage: 'error', + outputSummaryJson: null, + error, + }); + } + + async findById(id: string): Promise { + if (!isSafeRunId(id)) { + return null; + } + const row = this.db + .prepare( + ` + SELECT + id, + status, + stage, + input_hash, + chat_id, + output_summary_json, + error + FROM local_memory_runs + WHERE id = ? + `, + ) + .get(id) as MemoryRunRow | undefined; + + return row ? rowToRecord(row) : null; + } + + private updateRun( + id: string, + input: { + status: MemoryRunStatus; + stage: string; + outputSummaryJson: string | null; + error: string | null; + }, + ): void { + const result = this.db + .prepare( + ` + UPDATE local_memory_runs + SET + status = @status, + stage = @stage, + output_summary_json = @outputSummaryJson, + error = @error, + updated_at = @updatedAt + WHERE id = @id + `, + ) + .run({ + id, + status: input.status, + stage: input.stage, + outputSummaryJson: input.outputSummaryJson, + error: input.error, + updatedAt: new Date().toISOString(), + }); + + if (result.changes === 0) { + throw new Error(`Memory run not found: ${id}`); + } + } +} diff --git a/packages/context/src/memory/local-memory.test.ts b/packages/context/src/memory/local-memory.test.ts new file mode 100644 index 00000000..bea67c26 --- /dev/null +++ b/packages/context/src/memory/local-memory.test.ts @@ -0,0 +1,204 @@ +import { access, mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { initKloProject } from '../project/index.js'; +import { createLocalProjectMemoryCapture } from './local-memory.js'; +import { LocalMemoryRunStore } from './local-memory-runs.js'; + +vi.mock('ai', () => ({ + generateText: vi.fn().mockResolvedValue({ text: '', toolCalls: [] }), + stepCountIs: (stepBudget: number) => stepBudget, + tool: (definition: unknown) => definition, +})); + +async function expectPathMissing(path: string): Promise { + await expect(access(path)).rejects.toThrow(); +} + +describe('LocalMemoryRunStore', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-memory-runs-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('persists running, done, and reloadable memory run status in SQLite', async () => { + const store = new LocalMemoryRunStore({ + projectDir: tempDir, + idFactory: () => 'memory-run-1', + }); + + const created = await store.createRunning({ inputHash: 'hash-1', chatId: 'chat-1' }); + expect(created).toEqual({ id: 'memory-run-1' }); + + await store.markRunning('memory-run-1', 'capturing'); + await store.markDone('memory-run-1', { + signalDetected: true, + actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'Revenue definition' }], + skillsLoaded: ['knowledge_capture'], + commitHash: 'abc123', + }); + + await expect(access(join(tempDir, '.klo/db.sqlite'))).resolves.toBeUndefined(); + await expectPathMissing(join(tempDir, '.klo/memory-runs/memory-run-1.json')); + + await expect(store.findById('memory-run-1')).resolves.toMatchObject({ + id: 'memory-run-1', + status: 'done', + stage: 'done', + inputHash: 'hash-1', + chatId: 'chat-1', + error: null, + outputSummary: { + signalDetected: true, + commitHash: 'abc123', + }, + }); + + const reloaded = new LocalMemoryRunStore({ projectDir: tempDir }); + await expect(reloaded.findById('memory-run-1')).resolves.toMatchObject({ + id: 'memory-run-1', + status: 'done', + stage: 'done', + inputHash: 'hash-1', + chatId: 'chat-1', + outputSummary: { + actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'Revenue definition' }], + skillsLoaded: ['knowledge_capture'], + signalDetected: true, + commitHash: 'abc123', + }, + }); + }); +}); + +describe('createLocalProjectMemoryCapture', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-memory-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('captures a wiki page through the local memory agent and persists pollable status', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + const agentRunner = { + runLoop: async ({ + toolSet, + }: { + toolSet: Record Promise }>; + }) => { + await toolSet.load_skill.execute({ name: 'knowledge_capture' }); + await toolSet.wiki_write.execute( + { + key: 'revenue', + summary: 'Revenue definition', + content: 'Revenue means paid order value net of refunds.', + tags: ['finance'], + }, + { toolCallId: 'wiki-write' }, + ); + return { stopReason: 'natural' as const }; + }, + }; + + const capture = createLocalProjectMemoryCapture(project, { + agentRunner: agentRunner as never, + runIdFactory: () => 'memory-run-1', + }); + + await expect( + capture.capture({ + userId: 'local-user', + chatId: 'chat-1', + userMessage: 'define revenue as paid order value net of refunds', + assistantMessage: 'Captured.', + sourceType: 'external_ingest', + }), + ).resolves.toEqual({ runId: 'memory-run-1' }); + await capture.waitForRun('memory-run-1'); + + await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined(); + await expectPathMissing(join(project.projectDir, '.klo/memory-runs/memory-run-1.json')); + + await expect(capture.status('memory-run-1')).resolves.toMatchObject({ + runId: 'memory-run-1', + status: 'done', + done: true, + captured: { wiki: ['revenue'], sl: [], xrefs: [] }, + skillsLoaded: ['knowledge_capture'], + signalDetected: true, + }); + + await expect(readFile(join(project.projectDir, 'knowledge/global/revenue.md'), 'utf-8')).resolves.toContain( + 'Revenue means paid order value net of refunds.', + ); + }); + + it('captures a semantic-layer source for a named local connection id', async () => { + const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { driver: 'postgres', readonly: true }; + const agentRunner = { + runLoop: async ({ + toolSet, + }: { + toolSet: Record Promise }>; + }) => { + await toolSet.load_skill.execute({ name: 'sl' }); + await toolSet.sl_write_source.execute( + { + connectionId: 'warehouse', + sourceName: 'orders', + source: { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [{ name: 'order_count', expr: 'count(*)' }], + }, + }, + { toolCallId: 'sl-write' }, + ); + return { stopReason: 'natural' as const }; + }, + }; + + const capture = createLocalProjectMemoryCapture(project, { + agentRunner: agentRunner as never, + runIdFactory: () => 'memory-run-2', + }); + + await capture.capture({ + userId: 'local-user', + chatId: 'chat-2', + userMessage: 'going forward define orders count as count of public orders', + assistantMessage: 'Captured.', + connectionId: 'warehouse', + sourceType: 'external_ingest', + }); + await capture.waitForRun('memory-run-2'); + + await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined(); + await expectPathMissing(join(project.projectDir, '.klo/memory-runs/memory-run-2.json')); + + await expect(capture.status('memory-run-2')).resolves.toMatchObject({ + runId: 'memory-run-2', + status: 'done', + captured: { wiki: [], sl: ['orders'], xrefs: [] }, + skillsLoaded: ['sl'], + signalDetected: true, + }); + await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'), 'utf-8')).resolves.toContain( + 'order_count', + ); + }); +}); diff --git a/packages/context/src/memory/local-memory.ts b/packages/context/src/memory/local-memory.ts new file mode 100644 index 00000000..cbad2fc8 --- /dev/null +++ b/packages/context/src/memory/local-memory.ts @@ -0,0 +1,482 @@ +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import type { KloLlmProvider } from '@klo/llm'; +import YAML from 'yaml'; +import { AgentRunnerService } from '../agent/index.js'; +import { localConnectionInfoFromConfig } from '../connections/index.js'; +import type { KloEmbeddingPort, KloFileStorePort, KloFileWriteResult } from '../core/index.js'; +import { type KloLogger, noopLogger, SessionWorktreeService } from '../core/index.js'; +import type { KloSemanticLayerComputePort } from '../daemon/index.js'; +import { createLocalKloLlmProviderFromConfig } from '../llm/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { PromptService } from '../prompts/index.js'; +import { SkillsRegistryService } from '../skills/index.js'; +import { + type KloConnectionInfo, + type KloQueryResult, + SemanticLayerService, + type SemanticLayerSource, + type SlConnectionCatalogPort, + SlDiscoverTool, + SlEditSourceTool, + type SlPythonPort, + SlReadSourceTool, + SlRollbackTool, + SlSearchService, + type SlSourcesIndexPort, + SlValidateTool, + type SlValidationDeps, + type SlValidatorPort, + SlWriteSourceTool, + SqliteSlSourcesIndex, + sourceDefinitionSchema, + sourceOverlaySchema, +} from '../sl/index.js'; +import { BaseTool, type GitAuthorResolverPort, type ToolContext } from '../tools/index.js'; +import { + type KnowledgeEventPort, + type KnowledgeIndexPort, + KnowledgeWikiService, + searchLocalKnowledgePages, + WikiListTagsTool, + WikiReadTool, + WikiRemoveTool, + WikiSearchTool, + WikiWriteTool, +} from '../wiki/index.js'; +import { LocalMemoryRunStore } from './local-memory-runs.js'; +import { MemoryAgentService } from './memory-agent.service.js'; +import { MemoryCaptureService } from './memory-runs.js'; +import type { + MemoryConnectionPort, + MemoryFileStorePort, + MemoryKnowledgeSlRefsPort, + MemorySlSourceReconcilerPort, + MemoryToolSetLike, + MemoryToolsetFactoryPort, +} from './types.js'; + +const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url)); +const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url)); +const LOCAL_AUTHOR = { name: 'KLO Local', email: 'local@klo.local' }; +const LOCAL_SHAPE_WARNING = 'Local memory capture validates semantic-layer YAML shape only.'; + +export interface CreateLocalProjectMemoryCaptureOptions { + llmProvider?: KloLlmProvider; + agentRunner?: AgentRunnerService; + memoryModel?: string; + semanticLayerCompute?: KloSemanticLayerComputePort; + queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; + runIdFactory?: () => string; + logger?: KloLogger; +} + +export function createLocalProjectMemoryCapture( + project: KloLocalProject, + options: CreateLocalProjectMemoryCaptureOptions = {}, +): MemoryCaptureService { + const logger = options.logger ?? noopLogger; + const rootFileStore = new LocalMemoryFileStore(project.fileStore); + const embedding = new NoopEmbeddingPort(); + const knowledgeIndex = new LocalKnowledgeIndex(project); + const knowledgeEvents = new NoopKnowledgeEventPort(); + const knowledgeSlRefs = new NoopKnowledgeSlRefsPort(); + const connections = new LocalMemoryConnections(project, options.queryExecutor); + const slPython = new LocalSlPythonPort(options.semanticLayerCompute); + const semanticLayerService = new SemanticLayerService(rootFileStore, connections, slPython, logger); + const slSourcesRepository = new SqliteSlSourcesIndex({ dbPath: join(project.projectDir, '.klo', 'db.sqlite') }); + const slSearchService = new SlSearchService(embedding, slSourcesRepository, logger); + const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, project.git, logger); + const authorResolver = new LocalAuthorResolver(); + const llmProvider = options.llmProvider ?? createLocalKloLlmProviderFromConfig(project.config.llm); + const toolsetFactory = new LocalMemoryToolsetFactory({ + project, + embedding, + wikiService, + knowledgeIndex, + knowledgeEvents, + semanticLayerService, + slSearchService, + authorResolver, + slSourcesRepository, + connections, + }); + const agentRunner = + options.agentRunner ?? + new AgentRunnerService({ + llmProvider: requireLlmProvider(llmProvider), + logger, + }); + const memoryAgent = new MemoryAgentService({ + settings: { + knowledge: { userScopedKnowledgeEnabled: false }, + slValidation: { probeRowCount: 0 }, + llm: { memoryIngestionModel: project.config.llm.models.default ?? 'local-memory-model' }, + }, + promptService: new PromptService({ promptsDir, partials: [] }), + skillsRegistry: new SkillsRegistryService({ skillsDir }), + wikiService, + knowledgeIndex, + knowledgeSlRefs, + semanticLayerService, + slSearchService, + connections, + rootFileStore, + gitService: project.git, + lockingService: new LocalMemoryLock(), + slSourcesRepository, + sessionWorktreeService: new SessionWorktreeService({ + coreConfig: project.coreConfig, + gitService: project.git, + configService: rootFileStore, + }), + semanticLayerSourceReconciler: new NoopSemanticLayerSourceReconciler(), + agentRunner, + slValidator: new LocalShapeOnlySlValidator(), + toolsetFactory, + logger, + }); + return new MemoryCaptureService({ + memoryAgent, + runs: new LocalMemoryRunStore({ projectDir: project.projectDir, idFactory: options.runIdFactory }), + }); +} + +function requireLlmProvider(provider: KloLlmProvider | null | undefined): KloLlmProvider { + if (!provider) { + throw new Error('createLocalProjectMemoryCapture requires llm.provider.backend or an injected agentRunner'); + } + return provider; +} + +class LocalMemoryFileStore implements MemoryFileStorePort { + constructor(private readonly fileStore: MemoryFileStorePort | KloFileStorePort) {} + + forWorktree(workdir: string): LocalMemoryFileStore { + return new LocalMemoryFileStore(this.fileStore.forWorktree(workdir) as KloFileStorePort); + } + + writeFile(...args: Parameters): Promise { + return this.fileStore.writeFile(...args); + } + + readFile(...args: Parameters) { + return this.fileStore.readFile(...args); + } + + deleteFile(...args: Parameters) { + return this.fileStore.deleteFile(...args); + } + + listFiles(...args: Parameters) { + return this.fileStore.listFiles(...args); + } + + getFileHistory(...args: Parameters) { + return this.fileStore.getFileHistory(...args); + } + + async enqueueCommitMessageJobForExternalCommit(): Promise {} +} + +class NoopEmbeddingPort implements KloEmbeddingPort { + readonly maxBatchSize = 64; + + async computeEmbedding(): Promise { + return []; + } + + async computeEmbeddingsBulk(texts: string[]): Promise { + return texts.map(() => []); + } +} + +class LocalKnowledgeIndex implements KnowledgeIndexPort { + constructor(private readonly project: KloLocalProject) {} + + async upsertPage(): Promise {} + + async applyDiffTransactional(): Promise {} + + async getExistingSearchTexts(): Promise> { + return new Map(); + } + + async deleteStale(): Promise {} + + async deleteByScope(): Promise {} + + async deleteByKey(): Promise {} + + async findPageByKey(scope: string, scopeId: string | null, pageKey: string) { + const path = this.pagePath(scope, scopeId, pageKey); + try { + await this.project.fileStore.readFile(path); + return { page_key: pageKey }; + } catch { + return null; + } + } + + async listPagesForUser(userId: string) { + const pages: Array<{ id?: string; page_key: string; summary: string; scope: string; scope_id: string | null }> = []; + for (const scope of [ + { scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' }, + { scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` }, + ]) { + const listed = await this.project.fileStore.listFiles(scope.dir, true); + for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { + const pageKey = file.replace(/\.md$/, ''); + const raw = await this.project.fileStore.readFile(`${scope.dir}/${file}`); + const parsed = parseWiki(raw.content); + pages.push({ + page_key: pageKey, + summary: parsed.summary, + scope: scope.scope, + scope_id: scope.scopeId, + }); + } + } + return pages.sort((a, b) => a.page_key.localeCompare(b.page_key)); + } + + async getUserPageCount(userId: string): Promise { + return (await this.listPagesForUser(userId)).filter((page) => page.scope === 'USER').length; + } + + async incrementUsageCount(): Promise {} + + async searchRRF(_userId: string, _embedding: number[] | null, queryText: string, limit: number) { + const pages = await this.listPagesForUser(_userId); + return pages + .map((page) => ({ + pageKey: page.page_key, + summary: page.summary, + rrfScore: scoreText(`${page.page_key} ${page.summary}`, queryText), + })) + .filter((page) => page.rrfScore > 0) + .sort((a, b) => b.rrfScore - a.rrfScore || a.pageKey.localeCompare(b.pageKey)) + .slice(0, limit); + } + + private pagePath(scope: string, scopeId: string | null, pageKey: string): string { + return scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`; + } +} + +class NoopKnowledgeEventPort implements KnowledgeEventPort { + async createEvent(): Promise {} +} + +class NoopKnowledgeSlRefsPort implements MemoryKnowledgeSlRefsPort { + async syncFromWiki(): Promise<{ inserted: number; deleted: number }> { + return { inserted: 0, deleted: 0 }; + } +} + +class LocalMemoryConnections implements MemoryConnectionPort, SlConnectionCatalogPort { + constructor( + private readonly project: KloLocalProject, + private readonly queryExecutor?: { + execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise; + }, + ) {} + + async listEnabledConnections(ids: string[]): Promise { + return ids + .map((id) => localConnectionInfoFromConfig(id, this.project.config.connections[id])) + .filter((connection): connection is KloConnectionInfo => connection !== null); + } + + async getConnectionById(connectionId: string): Promise { + const connection = localConnectionInfoFromConfig(connectionId, this.project.config.connections[connectionId]); + if (!connection) { + throw new Error(`Connection not found: ${connectionId}`); + } + return connection; + } + + async executeQuery(connectionId: string, sql: string): Promise { + if (!this.queryExecutor) { + throw new Error('Local memory capture has no query executor configured'); + } + return this.queryExecutor.execute({ connectionId, sql }); + } +} + +class LocalSlPythonPort implements SlPythonPort { + constructor(private readonly compute?: KloSemanticLayerComputePort) {} + + async validateSources(input: Parameters[0]) { + if (!this.compute) { + return { + data: { + errors: [], + warnings: [LOCAL_SHAPE_WARNING], + per_source_warnings: {}, + }, + }; + } + const result = await this.compute.validateSources({ + sources: input.sources, + dialect: input.dialect, + recentlyTouched: input.recently_touched, + }); + return { + data: { + errors: result.errors, + warnings: result.warnings, + per_source_warnings: result.perSourceWarnings, + }, + }; + } + + async query(input: Parameters[0]) { + if (!this.compute) { + return { error: 'Local memory capture has no semantic compute adapter configured' }; + } + const result = await this.compute.query({ + sources: input.sources, + dialect: input.dialect, + query: input.query, + }); + return { data: { sql: result.sql, plan: result.plan } }; + } +} + +class LocalAuthorResolver implements GitAuthorResolverPort { + async resolve() { + return LOCAL_AUTHOR; + } +} + +class LocalMemoryLock { + async withLock(_key: 'config:repo', fn: () => Promise): Promise { + return fn(); + } +} + +class NoopSemanticLayerSourceReconciler implements MemorySlSourceReconcilerPort { + async upsertRow(): Promise {} +} + +class LocalShapeOnlySlValidator implements SlValidatorPort { + async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) { + try { + const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName); + const parsed = YAML.parse(file.content) as SemanticLayerSource; + const isOverlay = parsed.table == null && parsed.sql == null; + const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed); + return result.success + ? { errors: [], warnings: [LOCAL_SHAPE_WARNING] } + : { + errors: result.error.issues.map( + (issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`, + ), + warnings: [], + }; + } catch (error) { + return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] }; + } + } +} + +class LocalMemoryToolSet implements MemoryToolSetLike { + constructor(private readonly tools: BaseTool[]) {} + + toAiSdkTools(context: ToolContext) { + return Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toAiSdkTool(context)])); + } +} + +class LocalMemoryToolsetFactory implements MemoryToolsetFactoryPort { + private readonly wikiTools: BaseTool[]; + private readonly slTools: BaseTool[]; + + constructor(deps: { + project: KloLocalProject; + embedding: KloEmbeddingPort; + wikiService: KnowledgeWikiService; + knowledgeIndex: KnowledgeIndexPort; + knowledgeEvents: KnowledgeEventPort; + semanticLayerService: SemanticLayerService; + slSearchService: SlSearchService; + authorResolver: GitAuthorResolverPort; + slSourcesRepository: SlSourcesIndexPort; + connections: SlConnectionCatalogPort; + }) { + const slDeps = { + semanticLayerService: deps.semanticLayerService, + slSearchService: deps.slSearchService, + authorResolver: deps.authorResolver, + }; + this.wikiTools = [ + new WikiReadTool(deps.wikiService, deps.knowledgeIndex), + new WikiSearchTool({ + search: async (input) => { + const results = await searchLocalKnowledgePages(deps.project, { + userId: input.userId, + query: input.query, + limit: input.limit, + embeddingService: deps.embedding, + }); + return { + results: results.slice(0, input.limit).map((result) => ({ + key: result.key, + path: result.path, + summary: result.summary, + score: result.score, + matchReasons: result.matchReasons, + lanes: result.lanes, + })), + totalFound: results.length, + }; + }, + }), + new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex), + new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), + new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), + ]; + this.slTools = [ + new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }), + new SlEditSourceTool(slDeps), + new SlReadSourceTool(slDeps), + new SlWriteSourceTool(slDeps), + new SlValidateTool(slDeps), + new SlRollbackTool(deps.slSourcesRepository, deps.connections, 0), + ]; + } + + createIngestWuToolset(): MemoryToolSetLike { + return new LocalMemoryToolSet([...this.wikiTools, ...this.slTools]); + } + + createToolset(): MemoryToolSetLike { + return new LocalMemoryToolSet(this.wikiTools); + } +} + +function parseWiki(raw: string): { summary: string; content: string } { + const match = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/); + if (!match) { + return { summary: '', content: raw.trim() }; + } + const frontmatter = (YAML.parse(match[1]) ?? {}) as Record; + return { + summary: typeof frontmatter.summary === 'string' ? frontmatter.summary : '', + content: match[2].trim(), + }; +} + +function scoreText(text: string, query: string): number { + const normalized = query.toLowerCase().trim(); + if (!normalized) { + return 0; + } + const haystack = text.toLowerCase(); + if (haystack.includes(normalized)) { + return 1; + } + const words = normalized.split(/\s+/).filter(Boolean); + return words.filter((word) => haystack.includes(word)).length / Math.max(words.length, 1); +} diff --git a/packages/context/src/memory/memory-agent.service.ingest.test.ts b/packages/context/src/memory/memory-agent.service.ingest.test.ts new file mode 100644 index 00000000..bf30a883 --- /dev/null +++ b/packages/context/src/memory/memory-agent.service.ingest.test.ts @@ -0,0 +1,375 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// Module-level mock for 'ai' so generateText is a stub. This file is separate from +// memory-agent.service.spec.ts so the existing pure-helper tests don't load the mock. +vi.mock('ai', () => ({ + generateText: vi.fn().mockResolvedValue({ text: '', toolCalls: [] }), + stepCountIs: (n: number) => n, + tool: (def: unknown) => def, +})); + +// Imported AFTER vi.mock so the mocked module is used. +import { generateText } from 'ai'; +import { SYSTEM_GIT_AUTHOR } from '../tools/index.js'; +import { MemoryAgentService } from './memory-agent.service.js'; + +interface BuiltMocks { + appSettings: any; + llmProvider: any; + prompt: any; + posthog: any; + telemetry: any; + skillsRegistry: any; + wikiService: any; + indexRepository: any; + knowledgeSlRefsRepository: any; + knowledgeRepository: any; + embeddingService: any; + semanticLayerService: any; + slSearchService: any; + dataSourcesService: any; + configService: any; + gitService: any; + lockingService: any; + slSourcesRepository: any; + sessionWorktreeService: any; + semanticLayerSourceReconciler: any; + agentRunner: any; + slValidator: any; + toolsetFactory: any; +} + +const buildMocks = (overrides: Partial = {}): BuiltMocks => { + const scopedConfig = { writeFile: vi.fn(), deleteFile: vi.fn() }; + const scopedGit = { revParseHead: vi.fn().mockResolvedValue('basesha') }; + const sessionWorktree = { + chatId: 'chat-1', + workdir: '/tmp/wt/session-chat-1', + branch: 'session/chat-1', + baseSha: 'basesha', + createdAt: new Date(), + git: scopedGit, + config: scopedConfig, + }; + + const defaults: BuiltMocks = { + appSettings: { + settings: { + ai: { + knowledge: { userScopedKnowledgeEnabled: false }, + slValidation: { probeRowCount: 1 }, + }, + llm: { memoryIngestionModel: 'test-model' }, + }, + }, + llmProvider: { getModel: vi.fn().mockReturnValue({}) }, + prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') }, + posthog: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) }, + telemetry: { + isEnabled: () => false, + appSettingsService: { settings: { telemetry: { recordInputs: false, recordOutputs: false } } }, + systemConfigService: { config: { instance: { name: 'test-instance' } } }, + }, + skillsRegistry: { + listSkills: vi.fn().mockResolvedValue([]), + buildSkillsPrompt: vi.fn().mockReturnValue(''), + getSkill: vi.fn(), + stripFrontmatter: vi.fn(), + }, + wikiService: { + forWorktree: vi.fn().mockReturnThis(), + readPage: vi.fn(), + syncSinglePage: vi.fn(), + deleteFromIndex: vi.fn(), + }, + indexRepository: { listPagesForUser: vi.fn().mockResolvedValue([]) }, + knowledgeSlRefsRepository: { syncFromWiki: vi.fn().mockResolvedValue({ inserted: 0, deleted: 0 }) }, + knowledgeRepository: {}, + embeddingService: { computeEmbedding: vi.fn() }, + semanticLayerService: { + forWorktree: vi.fn().mockReturnThis(), + loadAllSources: vi.fn().mockResolvedValue([]), + readSourceFile: vi.fn(), + }, + slSearchService: { indexSources: vi.fn(), buildSearchText: vi.fn() }, + dataSourcesService: { + listEnabledConnections: vi.fn().mockResolvedValue([]), + getConnectionById: vi.fn().mockResolvedValue({ + id: 'conn-1', + name: 'Warehouse', + connectionType: 'POSTGRESQL', + }), + executeQuery: vi.fn(), + }, + configService: { + enqueueCommitMessageJobForExternalCommit: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn(), + deleteFile: vi.fn(), + }, + gitService: { + revParseHead: vi.fn().mockResolvedValue('basesha'), + squashMergeIntoMain: vi.fn().mockResolvedValue({ ok: true, squashSha: 'cafebabe', touchedPaths: ['a.yaml'] }), + }, + lockingService: { + withLock: vi.fn().mockImplementation((_key: string, fn: () => Promise) => fn()), + }, + slSourcesRepository: { deleteByConnectionAndName: vi.fn() }, + sessionWorktreeService: { + create: vi.fn().mockResolvedValue(sessionWorktree), + cleanup: vi.fn().mockResolvedValue(undefined), + }, + semanticLayerSourceReconciler: { upsertRow: vi.fn() }, + agentRunner: { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }) }, + slValidator: { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) }, + toolsetFactory: { + createIngestWuToolset: vi.fn().mockReturnValue({ + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + }), + createToolset: vi.fn().mockReturnValue({ + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + }), + }, + }; + + return { ...defaults, ...overrides }; +}; + +const buildService = (mocks: BuiltMocks): MemoryAgentService => + new MemoryAgentService({ + settings: { + knowledge: { + userScopedKnowledgeEnabled: mocks.appSettings.settings.ai.knowledge.userScopedKnowledgeEnabled, + }, + slValidation: { + probeRowCount: mocks.appSettings.settings.ai.slValidation.probeRowCount, + }, + llm: { + memoryIngestionModel: mocks.appSettings.settings.llm.memoryIngestionModel, + }, + }, + promptService: mocks.prompt, + skillsRegistry: mocks.skillsRegistry, + wikiService: mocks.wikiService, + knowledgeIndex: mocks.indexRepository, + knowledgeSlRefs: mocks.knowledgeSlRefsRepository, + semanticLayerService: mocks.semanticLayerService, + slSearchService: mocks.slSearchService, + connections: { + listEnabledConnections: vi.fn().mockResolvedValue([]), + getConnectionById: + mocks.dataSourcesService.getConnectionById ?? + vi.fn().mockResolvedValue({ + id: 'conn-1', + name: 'Warehouse', + connectionType: 'POSTGRESQL', + }), + executeQuery: mocks.dataSourcesService.executeQuery, + }, + rootFileStore: mocks.configService, + gitService: mocks.gitService, + lockingService: mocks.lockingService, + slSourcesRepository: mocks.slSourcesRepository, + sessionWorktreeService: mocks.sessionWorktreeService, + semanticLayerSourceReconciler: mocks.semanticLayerSourceReconciler, + agentRunner: mocks.agentRunner, + slValidator: mocks.slValidator, + toolsetFactory: mocks.toolsetFactory, + telemetry: { + trackMemoryIngestion: mocks.posthog.trackEvent, + }, + }); + +const baseInput = { + userId: 'u1', + chatId: 'chat-1', + // Long enough + with a definition keyword so the prefilter doesn't skip. + userMessage: 'going forward exclude cancelled orders from revenue, this is the canonical definition', +}; + +const generateTextMock = vi.mocked(generateText); + +beforeEach(() => { + generateTextMock.mockReset(); + generateTextMock.mockResolvedValue({ text: '', toolCalls: [] } as never); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('MemoryAgentService.ingest — session-branch orchestration', () => { + it('happy path: creates worktree, runs LLM loop, squash-merges, enqueues note, cleans up', async () => { + const mocks = buildMocks(); + const svc = buildService(mocks); + + const result = await svc.ingest(baseInput); + + // Phase 1: session worktree was created from main's HEAD. + expect(mocks.sessionWorktreeService.create).toHaveBeenCalledWith('chat-1', 'basesha'); + + // Phase 2: LLM loop ran with the assembled tools/system/prompt. + expect(mocks.agentRunner.runLoop).toHaveBeenCalledOnce(); + + // Phase 3: squash-merged onto main. + expect(mocks.gitService.squashMergeIntoMain).toHaveBeenCalledWith( + 'session/chat-1', + SYSTEM_GIT_AUTHOR.name, + SYSTEM_GIT_AUTHOR.email, + expect.stringContaining('[chat=chat-1]'), + ); + + // Note enqueue happened on the ROOT configService, not the scoped one. The single + // touched path is passed as the diff scope. + expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).toHaveBeenCalledWith( + { commitHash: 'cafebabe' }, + expect.stringContaining('[chat=chat-1]'), + 'a.yaml', + ); + + // Cleanup ran with success. + expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith( + expect.objectContaining({ chatId: 'chat-1' }), + 'success', + expect.any(Object), + ); + + expect(result.commitHash).toBe('cafebabe'); + }); + + it('empty path: squash returns no touched paths → no enqueue, cleanup(empty), commitHash=null', async () => { + const mocks = buildMocks(); + mocks.gitService.squashMergeIntoMain.mockResolvedValue({ + ok: true, + squashSha: 'basesha', + touchedPaths: [], + }); + const svc = buildService(mocks); + + const result = await svc.ingest(baseInput); + + expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).not.toHaveBeenCalled(); + expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'empty', expect.any(Object)); + expect(result.commitHash).toBeNull(); + }); + + it('conflict path: rolls back DB, cleanup(conflict, conflictPaths), returns commitHash=null with empty actions', async () => { + const mocks = buildMocks(); + mocks.gitService.squashMergeIntoMain.mockResolvedValue({ + ok: false, + conflict: true, + conflictPaths: ['semantic-layer/conn-x/fct_intakes.yaml'], + }); + // Have the wikiService report a still-existing page in main, so rollback re-syncs. + mocks.wikiService.readPage.mockResolvedValue({ + pageKey: 'phantom', + frontmatter: { summary: 'x', usage_mode: 'auto' }, + content: 'body', + }); + const svc = buildService(mocks); + + const result = await svc.ingest(baseInput); + + expect(mocks.gitService.squashMergeIntoMain).toHaveBeenCalled(); + // Cleanup got the conflict outcome + the paths. + expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'conflict', { + conflictPaths: ['semantic-layer/conn-x/fct_intakes.yaml'], + }); + expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).not.toHaveBeenCalled(); + expect(result.commitHash).toBeNull(); + expect(result.actions).toEqual([]); + }); + + it('crash path: post-loop step throws → cleanup(crash), commitHash=null', async () => { + const mocks = buildMocks(); + // Force the cross-ref reconciler to throw, escaping into the outer try/catch and + // landing in the crash branch. + mocks.knowledgeSlRefsRepository.syncFromWiki.mockRejectedValue(new Error('db down')); + // squashMergeIntoMain shouldn't even be reached. + mocks.gitService.squashMergeIntoMain.mockRejectedValue(new Error('should not be called after crash')); + // Need a wiki action to trigger the cross-ref code path. Easiest: have the LLM mock + // not push actions, so syncFromWiki is never called and crash won't happen here. + // Instead, force the squash to throw. + mocks.knowledgeSlRefsRepository.syncFromWiki.mockResolvedValue({ inserted: 0, deleted: 0 }); + mocks.gitService.squashMergeIntoMain.mockRejectedValue(new Error('git crashed')); + + const svc = buildService(mocks); + + const result = await svc.ingest(baseInput); + + expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash', expect.any(Object)); + expect(result.commitHash).toBeNull(); + }); +}); + +describe('MemoryAgentService.ingest — concurrency regression', () => { + it('two parallel ingest() calls produce distinct squash commits (no absorption)', async () => { + // FIFO lock: each acquisition chains onto the previous holder's release. This is the + // same shape as production withLock — the test asserts that two parallel ingests + // sequence both their phase-1 (worktree create) and phase-3 (squash merge) calls + // without deadlocking, and produce distinct commits. + let chain: Promise = Promise.resolve(); + const lockingService = { + withLock: vi.fn().mockImplementation(async (_key: string, fn: () => Promise) => { + const previous = chain; + let releaseMe!: () => void; + chain = new Promise((resolve) => { + releaseMe = resolve; + }); + await previous; + try { + return await fn(); + } finally { + releaseMe(); + } + }), + }; + + let createCount = 0; + const sessionWorktreeService = { + create: vi.fn().mockImplementation((chatId: string) => { + createCount += 1; + return Promise.resolve({ + chatId, + workdir: `/tmp/wt/session-${chatId}`, + branch: `session/${chatId}`, + baseSha: 'basesha', + createdAt: new Date(), + git: { revParseHead: vi.fn().mockResolvedValue('basesha') }, + config: { writeFile: vi.fn() }, + }); + }), + cleanup: vi.fn().mockResolvedValue(undefined), + }; + + let mergeCount = 0; + const gitService = { + revParseHead: vi.fn().mockResolvedValue('basesha'), + squashMergeIntoMain: vi.fn().mockImplementation(() => { + mergeCount += 1; + return Promise.resolve({ + ok: true, + squashSha: `sha-${mergeCount}`, + touchedPaths: [`${mergeCount}.yaml`], + }); + }), + }; + + const mocksA = buildMocks({ lockingService, sessionWorktreeService, gitService }); + const mocksB = buildMocks({ lockingService, sessionWorktreeService, gitService }); + const svcA = buildService(mocksA); + const svcB = buildService(mocksB); + + const [a, b] = await Promise.all([ + svcA.ingest({ ...baseInput, chatId: 'chat-A' }), + svcB.ingest({ ...baseInput, chatId: 'chat-B' }), + ]); + + expect(createCount).toBe(2); + expect(gitService.squashMergeIntoMain).toHaveBeenCalledTimes(2); + expect(a.commitHash).not.toBeNull(); + expect(b.commitHash).not.toBeNull(); + expect(a.commitHash).not.toBe(b.commitHash); + }); +}); diff --git a/packages/context/src/memory/memory-agent.service.test.ts b/packages/context/src/memory/memory-agent.service.test.ts new file mode 100644 index 00000000..63f97e14 --- /dev/null +++ b/packages/context/src/memory/memory-agent.service.test.ts @@ -0,0 +1,475 @@ +import { describe, expect, it, vi } from 'vitest'; +import { validateSingleSource } from '../sl/index.js'; +import { createTouchedSlSources, hasTouchedSlSource } from '../tools/index.js'; +import { detectCaptureSignals, isWorthAnalyzing } from './capture-signals.js'; +import { MemoryAgentService } from './memory-agent.service.js'; + +const passthroughValidator = { + validateSingleSource: (d: unknown, c: string, n: string) => validateSingleSource(d as never, c, n), +} as never; + +describe('MemoryAgentService.detectCaptureSignals', () => { + it('fires sl on a long user message + SQL aggregate in assistant message', () => { + const userMessage = `${'A'.repeat(120)} show me revenue by month`; + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage, + assistantMessage: 'SELECT SUM(amount) FROM orders GROUP BY month', + }); + expect(result.sl).toBe(true); + expect(result.reasons).toContain('sql aggregate in assistant message'); + }); + + it('does NOT fire sl from aggregate alone when user message is short', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'show revenue', + assistantMessage: 'SELECT SUM(amount) FROM orders', + }); + expect(result.sl).toBe(false); + }); + + it('fires sl on definition keywords in user message regardless of length', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'going forward exclude cancelled orders from revenue', + }); + expect(result.sl).toBe(true); + expect(result.reasons).toContain('sl-style definition keyword in user message'); + }); + + it('fires knowledge on a definition keyword in user message', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'BYOL stands for Bring Your Own Lab', + }); + expect(result.knowledge).toBe(true); + expect(result.reasons).toContain('definition keyword in user message'); + }); + + it('fires both sl and knowledge when both signals hit', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'going forward, define revenue as sum of paid orders', + }); + expect(result.sl).toBe(true); + expect(result.knowledge).toBe(true); + }); + + it('fires neither for a plain ad-hoc question', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'how many users signed up last week?', + assistantMessage: '12 users.', + }); + expect(result.sl).toBe(false); + expect(result.knowledge).toBe(false); + expect(result.reasons).toEqual([]); + }); + + it('fires knowledge when assistant emits a markdown definition table', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'list our protocols', + assistantMessage: '| Term | Definition |\n|---|---|\n| TRT | Testosterone Replacement Therapy |', + }); + expect(result.knowledge).toBe(true); + expect(result.reasons).toContain('definition table in assistant message'); + }); + + it('accepts JOIN and CTE-style aggregates as sl signals', () => { + const userMessage = 'B'.repeat(150); + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage, + assistantMessage: 'WITH base AS (SELECT * FROM x) SELECT * FROM base', + }); + expect(result.sl).toBe(true); + }); + + it('reasons array is empty when no signal fires', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'hello', + }); + expect(result.reasons).toEqual([]); + }); + + it('detects LookML dialect from view/measure structural keywords', () => { + const result = detectCaptureSignals({ + userId: 'u', + chatId: 'c', + userMessage: 'ingest this', + assistantMessage: + 'view: fct_labs {\n sql_table_name: analytics.fct_labs ;;\n measure: count_lab_orders { type: count }\n}', + }); + expect(result.dialect).toBe('lookml'); + expect(result.sl).toBe(true); + expect(result.reasons).toContain('lookml structure in assistant message'); + }); +}); + +describe('MemoryAgentService.isWorthAnalyzing (C1 + F1)', () => { + const baseInput = (assistantMessage: string) => ({ + userId: 'u', + chatId: 'c', + userMessage: 'Ingest the following content into memory.', + assistantMessage, + }); + + it('skips a pure LookML wrapper (only view + sql_table_name + dimensions + measure: count)', () => { + const wrapper = `view: timeline { + sql_table_name: analytics.timeline ;; + dimension_group: date { type: time; description: "m/d/Y" } + dimension: notes { type: string; description: "notes" } + measure: count { type: count } +}`; + expect(isWorthAnalyzing(baseInput(wrapper))).toBe(false); + }); + + it('keeps a LookML view with a non-count aggregate (count_distinct, sum, avg, …)', () => { + const real = `view: fct_labs { + sql_table_name: analytics.fct_labs ;; + measure: count_lab_orders { type: count } + measure: count_distinct_patients { type: count_distinct; sql: \${admin_user_id} ;; } +}`; + expect(isWorthAnalyzing(baseInput(real))).toBe(true); + }); + + it('keeps a LookML view with derived_table even if it has no non-count measures', () => { + const derived = `view: lab_results { + derived_table: { sql: SELECT * FROM analytics.raw WHERE status = 'final' ;; } + dimension: lab_order_id { primary_key: yes; type: string } + measure: count { type: count } +}`; + expect(isWorthAnalyzing(baseInput(derived))).toBe(true); + }); + + it('keeps a LookML view with sql_always_where', () => { + const enforced = `view: rpt_daily_braze_email { + sql_table_name: analytics.fct_email_sends ;; + sql_always_where: \${TABLE}.channel = 'braze' ;; + measure: count { type: count } +}`; + expect(isWorthAnalyzing(baseInput(enforced))).toBe(true); + }); + + it('keeps a LookML view with a join: block', () => { + const joined = `view: fct_labs { + sql_table_name: analytics.fct_labs ;; + join: dim_customers { + sql_on: \${fct_labs.admin_user_id} = \${dim_customers.admin_user_id} ;; + relationship: many_to_one + } +}`; + expect(isWorthAnalyzing(baseInput(joined))).toBe(true); + }); +}); + +describe('MemoryAgentService.reconcileCrossRefs', () => { + type Action = { target: 'wiki' | 'sl'; type: 'created' | 'updated' | 'removed'; key: string; detail: string }; + + const buildService = (overrides: { + readPage?: ReturnType; + syncFromWiki?: ReturnType; + }) => { + const wikiService = { + readPage: overrides.readPage ?? vi.fn(), + }; + const knowledgeSlRefsRepository = { + syncFromWiki: overrides.syncFromWiki ?? vi.fn().mockResolvedValue({ inserted: 0, deleted: 0 }), + }; + const svc = new MemoryAgentService({ + settings: { + knowledge: { userScopedKnowledgeEnabled: false }, + slValidation: { probeRowCount: 1 }, + llm: { memoryIngestionModel: 'test-model' }, + }, + promptService: undefined as never, + skillsRegistry: undefined as never, + wikiService: wikiService as never, + knowledgeIndex: undefined as never, + knowledgeSlRefs: knowledgeSlRefsRepository as never, + semanticLayerService: undefined as never, + slSearchService: undefined as never, + connections: undefined as never, + rootFileStore: undefined as never, + gitService: undefined as never, + lockingService: undefined as never, + slSourcesRepository: undefined as never, + sessionWorktreeService: undefined as never, + semanticLayerSourceReconciler: undefined as never, + agentRunner: undefined as never, + slValidator: undefined as never, + toolsetFactory: undefined as never, + }); + return { svc, wikiService, knowledgeSlRefsRepository }; + }; + + const session = { + userId: 'u', + chatId: 'c', + userMessage: 'test', + connectionId: 'conn-1', + userScopedEnabled: false, + forceGlobalScope: false, + touchedSlSources: createTouchedSlSources(), + preHead: null, + }; + + it('projects a wiki page.sl_refs into knowledge_sl_refs via syncFromWiki', async () => { + const { svc, knowledgeSlRefsRepository } = buildService({ + readPage: vi.fn().mockResolvedValue({ + pageKey: 'byol-definition', + frontmatter: { summary: 'byol', sl_refs: ['fct_labs', 'lab_results'] }, + content: 'body', + }), + syncFromWiki: vi.fn().mockResolvedValue({ inserted: 2, deleted: 0 }), + }); + + const actions: Action[] = [{ target: 'wiki', type: 'created', key: 'byol-definition', detail: '' }]; + const synced = await svc.reconcileCrossRefs(actions, session); + + expect(synced).toBe(2); + expect(knowledgeSlRefsRepository.syncFromWiki).toHaveBeenCalledWith({ + wikiPageKey: 'byol-definition', + wikiScope: 'GLOBAL', + wikiScopeId: null, + refs: [ + { connectionId: 'conn-1', sourceName: 'fct_labs' }, + { connectionId: 'conn-1', sourceName: 'lab_results' }, + ], + }); + }); + + it('skips sync when the action has no connectionId in session', async () => { + const { svc, knowledgeSlRefsRepository } = buildService({ + readPage: vi.fn().mockResolvedValue({ + pageKey: 'byol-definition', + frontmatter: { summary: 'byol', sl_refs: ['fct_labs'] }, + content: 'body', + }), + }); + + const actions: Action[] = [{ target: 'wiki', type: 'created', key: 'byol-definition', detail: '' }]; + const synced = await svc.reconcileCrossRefs(actions, { ...session, connectionId: undefined }); + + expect(synced).toBe(0); + expect(knowledgeSlRefsRepository.syncFromWiki).not.toHaveBeenCalled(); + }); + + it('syncs an empty sl_refs list — clearing any stale rows for that wiki', async () => { + const { svc, knowledgeSlRefsRepository } = buildService({ + readPage: vi.fn().mockResolvedValue({ + pageKey: 'byol-definition', + frontmatter: { summary: 'byol' }, + content: 'body', + }), + syncFromWiki: vi.fn().mockResolvedValue({ inserted: 0, deleted: 1 }), + }); + + const actions: Action[] = [{ target: 'wiki', type: 'updated', key: 'byol-definition', detail: '' }]; + const synced = await svc.reconcileCrossRefs(actions, session); + + expect(synced).toBe(1); + expect(knowledgeSlRefsRepository.syncFromWiki).toHaveBeenCalledWith({ + wikiPageKey: 'byol-definition', + wikiScope: 'GLOBAL', + wikiScopeId: null, + refs: [], + }); + }); + + it('normalizes dotted sl_refs to bare source names, dedupes (H)', async () => { + const { svc, knowledgeSlRefsRepository } = buildService({ + readPage: vi.fn().mockResolvedValue({ + pageKey: 'fct-labs-overview', + frontmatter: { + summary: 'fct_labs', + sl_refs: ['fct_labs', 'fct_labs.count_lab_orders', 'fct_labs.count_distinct_patients', 'lab_results'], + }, + content: 'body', + }), + syncFromWiki: vi.fn().mockResolvedValue({ inserted: 2, deleted: 0 }), + }); + + const actions: Action[] = [{ target: 'wiki', type: 'created', key: 'fct-labs-overview', detail: '' }]; + await svc.reconcileCrossRefs(actions, session); + + expect(knowledgeSlRefsRepository.syncFromWiki).toHaveBeenCalledWith({ + wikiPageKey: 'fct-labs-overview', + wikiScope: 'GLOBAL', + wikiScopeId: null, + refs: [ + { connectionId: 'conn-1', sourceName: 'fct_labs' }, + { connectionId: 'conn-1', sourceName: 'lab_results' }, + ], + }); + }); + + it('ignores sl-only actions — the DB index is driven from the wiki side', async () => { + const { svc, knowledgeSlRefsRepository } = buildService({}); + + const actions: Action[] = [{ target: 'sl', type: 'updated', key: 'fct_labs', detail: '' }]; + const synced = await svc.reconcileCrossRefs(actions, session); + + expect(synced).toBe(0); + expect(knowledgeSlRefsRepository.syncFromWiki).not.toHaveBeenCalled(); + }); +}); + +describe('MemoryAgentService.gateRevertInvalidSources (J3)', () => { + type Action = { target: 'wiki' | 'sl'; type: 'created' | 'updated' | 'removed'; key: string; detail: string }; + + // Build a service with the minimal deps the gate needs: semanticLayerService + // (readSourceFile, loadSource, writeSource for revert), dataSourcesService + // (executeQuery for dry-run), configService (writeFile/deleteFile for revert), + // gitService (getFileAtCommit). + const buildService = (overrides: { + readSourceFile?: ReturnType; + executeQuery?: ReturnType; + writeFile?: ReturnType; + deleteFile?: ReturnType; + getFileAtCommit?: ReturnType; + }) => { + const semanticLayerService = { + readSourceFile: overrides.readSourceFile ?? vi.fn(), + isManifestBacked: vi.fn().mockResolvedValue(false), + }; + const connections = { + listEnabledConnections: vi.fn().mockResolvedValue([]), + getConnectionById: vi.fn().mockResolvedValue({ + id: 'conn-1', + name: 'Warehouse', + connectionType: 'POSTGRESQL', + }), + executeQuery: overrides.executeQuery ?? vi.fn(), + }; + const configService = { + writeFile: overrides.writeFile ?? vi.fn().mockResolvedValue({}), + deleteFile: overrides.deleteFile ?? vi.fn().mockResolvedValue({}), + }; + const gitService = { + getFileAtCommit: overrides.getFileAtCommit ?? vi.fn().mockRejectedValue(new Error('not present')), + }; + const slSourcesRepository = { + deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined), + }; + const svc = new MemoryAgentService({ + settings: { + knowledge: { userScopedKnowledgeEnabled: false }, + slValidation: { probeRowCount: 1 }, + llm: { memoryIngestionModel: 'test-model' }, + }, + promptService: undefined as never, + skillsRegistry: undefined as never, + wikiService: undefined as never, + knowledgeIndex: undefined as never, + knowledgeSlRefs: undefined as never, + semanticLayerService: semanticLayerService as never, + slSearchService: undefined as never, + connections: connections as never, + rootFileStore: configService as never, + gitService: gitService as never, + lockingService: undefined as never, + slSourcesRepository: slSourcesRepository as never, + sessionWorktreeService: undefined as never, + semanticLayerSourceReconciler: undefined as never, + agentRunner: undefined as never, + slValidator: passthroughValidator, + toolsetFactory: undefined as never, + }); + return { svc, semanticLayerService, connections, configService, gitService, slSourcesRepository }; + }; + + const session = { + userId: 'u', + chatId: 'c', + userMessage: 'test', + connectionId: 'conn-1', + userScopedEnabled: false, + forceGlobalScope: false, + touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'broken_source' }]), + preHead: null, + }; + + it('reverts (deletes) a source whose dry-run fails and drops its action', async () => { + const badYaml = `name: broken_source +source_type: sql +sql: | + SELECT fake_col FROM analytics.x +grain: [fake_col] +columns: [{name: fake_col, type: string}] +measures: [] +joins: [] +`; + const { svc, configService } = buildService({ + readSourceFile: vi.fn().mockResolvedValue({ content: badYaml, path: 'x' }), + executeQuery: vi.fn().mockResolvedValue({ + headers: [], + rows: [], + totalRows: 0, + error: 'Unrecognized name: fake_col', + }), + }); + const actions: Action[] = [ + { target: 'sl', type: 'created', key: 'broken_source', detail: 'create' }, + { target: 'wiki', type: 'created', key: 'some_wiki', detail: 'wiki' }, + ]; + const localSession = { + ...session, + touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'broken_source' }]), + }; + + const reverted = await svc.gateRevertInvalidSources(localSession as never, actions); + + expect(reverted).toEqual(['broken_source']); + expect(configService.deleteFile).toHaveBeenCalledWith( + 'semantic-layer/conn-1/broken_source.yaml', + expect.any(String), + expect.any(String), + expect.any(String), + { skipLock: true }, + ); + // Wiki action survives; SL action is scrubbed. + expect(actions.map((a) => `${a.target}:${a.key}`)).toEqual(['wiki:some_wiki']); + expect(hasTouchedSlSource(localSession.touchedSlSources, 'conn-1', 'broken_source')).toBe(false); + }); + + it('leaves a source alone when its dry-run passes', async () => { + const goodYaml = `name: good_source +source_type: sql +sql: | + SELECT id FROM analytics.x +grain: [id] +columns: [{name: id, type: string}] +measures: [] +joins: [] +`; + const { svc, configService } = buildService({ + readSourceFile: vi.fn().mockResolvedValue({ content: goodYaml, path: 'x' }), + executeQuery: vi.fn().mockResolvedValue({ headers: ['id'], rows: [], totalRows: 0, error: null }), + }); + const actions: Action[] = [{ target: 'sl', type: 'created', key: 'good_source', detail: 'create' }]; + const localSession = { + ...session, + touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'good_source' }]), + }; + + const reverted = await svc.gateRevertInvalidSources(localSession as never, actions); + + expect(reverted).toEqual([]); + expect(configService.writeFile).not.toHaveBeenCalled(); + expect(configService.deleteFile).not.toHaveBeenCalled(); + expect(actions).toHaveLength(1); + }); +}); diff --git a/packages/context/src/memory/memory-agent.service.ts b/packages/context/src/memory/memory-agent.service.ts new file mode 100644 index 00000000..e2ae9d42 --- /dev/null +++ b/packages/context/src/memory/memory-agent.service.ts @@ -0,0 +1,658 @@ +import { createHash } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tool } from 'ai'; +import * as YAML from 'yaml'; +import { z } from 'zod'; +import { type KloLogger, noopLogger } from '../core/index.js'; +import { + revertSourceToPreHead, + type SemanticLayerSource, + type SlValidationDeps, + type SlValidatorPort, +} from '../sl/index.js'; +import { + createTouchedSlSources, + deleteTouchedSlSource, + listTouchedSlSources, + SYSTEM_GIT_AUTHOR, + type ToolContext, + type ToolSession, + touchedSlSourceCount, + touchedSlSourceNamesForConnection, +} from '../tools/index.js'; +import { + buildRequiredSkillsBlock, + DEFAULT_SKILL_NAMES, + detectCaptureSignals, + prefilterSkipReason, + promptNameFor, + stepBudgetFor, +} from './capture-signals.js'; +import type { + CaptureSession, + MemoryAction, + MemoryAgentInput, + MemoryAgentResult, + MemoryAgentServiceDeps, + MemoryAgentSourceType, +} from './types.js'; + +type GateDeps = SlValidationDeps & { slValidator: SlValidatorPort }; + +export class MemoryAgentService { + private readonly logger: KloLogger; + + constructor(private readonly deps: MemoryAgentServiceDeps) { + this.logger = deps.logger ?? noopLogger; + } + + async ingest(input: MemoryAgentInput): Promise { + const chatId = input.chatId; + const sourceType: MemoryAgentSourceType = input.sourceType ?? 'research'; + const empty: MemoryAgentResult = { signalDetected: false, actions: [], skillsLoaded: [], commitHash: null }; + + const hasSL = !!input.connectionId; + const userScopedEnabled = this.deps.settings.knowledge.userScopedKnowledgeEnabled; + const forceGlobalScope = sourceType === 'external_ingest'; + + const signals = detectCaptureSignals(input); + + const skipReason = prefilterSkipReason(input, signals); + if (skipReason) { + this.logger.debug(`[memory-agent] chat=${chatId} skipped (pre-filter: ${skipReason})`); + return empty; + } + + // Phase 1 — create a per-session git worktree branched at main's HEAD. This runs under + // a brief `config:repo` lock so the baseSha snapshot is consistent with the branch + // creation, but releases before the LLM loop starts. The unlocked loop is what lets + // concurrent ingest() calls and interactive saves on main run in parallel. + const sessionWorktree = await this.deps.lockingService.withLock('config:repo', async () => { + const mainHead = await this.deps.gitService.revParseHead(); + if (!mainHead) { + throw new Error('memory-agent: config repo has no HEAD'); + } + return this.deps.sessionWorktreeService.create(chatId, mainHead); + }); + + const [wikiIndex, slIndex] = await Promise.all([ + this.buildWikiIndex(input.userId, userScopedEnabled), + hasSL ? this.buildSlIndex(input.connectionId!) : Promise.resolve(''), + ]); + + const skillsLoaded: string[] = []; + const actions: MemoryAction[] = []; + const session: CaptureSession = { + userId: input.userId, + chatId, + userMessageId: input.userMessageId, + userMessage: input.userMessage, + connectionId: input.connectionId, + userScopedEnabled, + forceGlobalScope, + touchedSlSources: createTouchedSlSources(), + preHead: sessionWorktree.baseSha, + }; + + // Wire scoped services so the LLM loop's reads + writes both target the session + // worktree, not main. Scoped wiki/SL services route their internal `configService` + // to the worktree; sl-tools take an explicit `configService` and `gitService`. + const scopedWikiService = this.deps.wikiService.forWorktree(sessionWorktree.workdir); + const scopedSemanticLayerService = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir); + + const toolSession: ToolSession = { + connectionId: input.connectionId ?? null, + isWorktreeScoped: true, + preHead: sessionWorktree.baseSha, + touchedSlSources: session.touchedSlSources, + actions, + semanticLayerService: scopedSemanticLayerService, + wikiService: scopedWikiService, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + }; + + const toolset = hasSL + ? this.deps.toolsetFactory.createIngestWuToolset(toolSession) + : this.deps.toolsetFactory.createToolset(['wiki']); + + const toolContext: ToolContext = { + sourceId: 'memory-agent', + messageId: chatId, + userId: input.userId, + connectionId: input.connectionId, + session: toolSession, + }; + + const loadSkillTool = { + load_skill: tool({ + description: + 'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.', + inputSchema: z.object({ + name: z.string().describe('The skill name as listed in the system prompt.'), + }), + execute: async ({ name }) => { + const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent'); + if (!skill) { + const available = + (await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || '(none)'; + return `Skill "${name}" not available to the memory agent. Available: ${available}`; + } + try { + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + if (!skillsLoaded.includes(skill.name)) { + skillsLoaded.push(skill.name); + } + return { + name: skill.name, + skillDirectory: skill.path, + content: this.deps.skillsRegistry.stripFrontmatter(body), + }; + } catch (e) { + return `Error loading skill "${name}": ${e instanceof Error ? e.message : String(e)}`; + } + }, + }), + }; + + const skillNames: string[] = [...DEFAULT_SKILL_NAMES]; + if (signals.dialect === 'lookml') { + skillNames.push('lookml_ingest'); + } + const skills = await this.deps.skillsRegistry.listSkills(skillNames, 'memory_agent'); + const skillsPrompt = this.deps.skillsRegistry.buildSkillsPrompt(skills, 'memory_agent'); + const baseFraming = await this.loadBaseFraming(sourceType); + const requiredSkillsBlock = buildRequiredSkillsBlock(signals); + const systemPrompt = [baseFraming.trimEnd(), skillsPrompt, requiredSkillsBlock].filter(Boolean).join('\n'); + + const clipLimit = sourceType === 'external_ingest' ? 48000 : 16000; + const assistantSection = input.assistantMessage?.trim() + ? `## Assistant Response\n${clip(input.assistantMessage.trim(), clipLimit)}` + : ''; + const prompt = [ + `# Wiki Index\n\n${wikiIndex}`, + hasSL ? `\n# Semantic Layer Sources\n\n${slIndex}` : '', + '\n---\n', + assistantSection, + `\n## User Message\n\n${input.userMessage.trim()}`, + ] + .filter(Boolean) + .join('\n'); + + const stepBudget = stepBudgetFor(sourceType); + const modelName = this.deps.settings.llm.memoryIngestionModel; + + const signalsList = [signals.knowledge && 'knowledge', signals.sl && 'sl'].filter(Boolean) as string[]; + const signalsSuffix = + signalsList.length > 0 ? ` signals=[${signalsList.join(', ')}] reasons=[${signals.reasons.join('; ')}]` : ''; + + const dialectSuffix = signals.dialect ? ` dialect=${signals.dialect}` : ''; + this.logger.log( + `[memory-agent] chat=${chatId} running (sourceType=${sourceType}, hasSL=${hasSL}, budget=${stepBudget}, model=${modelName})${signalsSuffix}${dialectSuffix}`, + ); + + if (process.env.MEMORY_AGENT_DEBUG_PROMPTS === '1') { + this.logger.debug(`[memory-agent prompt-debug] system=${systemPrompt}`); + this.logger.debug(`[memory-agent prompt-debug] user=${prompt}`); + } + + // Phase 2 — unlocked LLM loop against the session worktree. Crashes inside generateText + // are isolated; we still try to run the cross-ref + gate steps and surface what we can. + let sessionOutcome: 'success' | 'empty' | 'conflict' | 'crash' = 'success'; + let squashSha: string | null = null; + let touchedPaths: string[] = []; + let reconciledCrossRefs = 0; + let gateRevertedSources: string[] = []; + let sessionConflictPaths: string[] | undefined; + let sessionCrashed = false; + + try { + const runResult = await this.deps.agentRunner.runLoop({ + modelRole: 'candidateExtraction', + systemPrompt, + userPrompt: prompt, + toolSet: { ...toolset.toAiSdkTools(toolContext), ...loadSkillTool }, + stepBudget, + telemetryTags: { + operationName: 'memory-agent-ingest', + userId: input.userId, + chatId, + }, + }); + if (runResult.stopReason === 'error' && runResult.error) { + this.logger.warn(`[memory-agent] chat=${chatId} loop failed: ${runResult.error.message}`); + } + + // Cross-ref + revert gate: still scoped to the session worktree (writes via + // sl-tools' deps already use scoped services). Wiki cross-refs live in the DB, + // so they're connection-state and don't need scoping. + const gateDeps: GateDeps = { + semanticLayerService: scopedSemanticLayerService, + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + slValidator: this.deps.slValidator, + probeRowCount: this.deps.settings.slValidation.probeRowCount, + }; + reconciledCrossRefs = await this.reconcileCrossRefs(actions, session); + if (hasSL && touchedSlSourceCount(session.touchedSlSources) > 0) { + gateRevertedSources = await this.gateRevertInvalidSourcesWithDeps(session, actions, gateDeps); + } + if (gateRevertedSources.length > 0) { + this.logger.warn( + `[memory-agent] chat=${chatId} gate: reverted ${gateRevertedSources.length} unvalidatable SL source(s): ${gateRevertedSources.join(', ')}`, + ); + } + + // Phase 3 — squash-merge under a brief `config:repo` lock so interactive writes + // serialize against this short window. Empty merges (no diff vs main) skip the + // commit-message enqueue. Conflicts trigger a targeted DB rollback so eager + // session writes don't leave DB ahead of main. + const squashMessage = this.squashMessageForSession( + sourceType, + chatId, + actions, + reconciledCrossRefs, + gateRevertedSources, + ); + const mergeResult = await this.deps.lockingService.withLock('config:repo', () => + this.deps.gitService.squashMergeIntoMain( + sessionWorktree.branch, + SYSTEM_GIT_AUTHOR.name, + SYSTEM_GIT_AUTHOR.email, + squashMessage, + ), + ); + + if (!mergeResult.ok) { + sessionOutcome = 'conflict'; + sessionConflictPaths = mergeResult.conflictPaths; + await this.rollbackDbForAbortedSession(session, actions); + } else if (mergeResult.touchedPaths.length === 0) { + sessionOutcome = 'empty'; + } else { + squashSha = mergeResult.squashSha; + touchedPaths = mergeResult.touchedPaths; + // Single-file commits: pass the path so the handler diff is path-scoped. + // Multi-file commits: omit path so the handler grabs the full commit diff + // (a comma-joined pathspec would match nothing). + const pathFilter = touchedPaths.length === 1 ? touchedPaths[0] : ''; + await this.deps.rootFileStore.enqueueCommitMessageJobForExternalCommit( + { commitHash: squashSha }, + squashMessage, + pathFilter, + ); + } + } catch (error) { + sessionCrashed = true; + sessionOutcome = 'crash'; + this.logger.error( + `[memory-agent] chat=${chatId} session crashed: ${error instanceof Error ? error.message : String(error)}`, + ); + } finally { + await this.deps.sessionWorktreeService.cleanup(sessionWorktree, sessionOutcome, { + conflictPaths: sessionConflictPaths, + }); + } + + if (sessionCrashed) { + this.logger.warn(`[memory-agent] chat=${chatId} crashed; worktree preserved for inspection`); + } + + // On conflict/crash the session's git work was discarded — the action list no longer + // matches main. Drop it so callers don't think writes landed. + const finalActions = sessionOutcome === 'conflict' || sessionOutcome === 'crash' ? [] : actions; + + // Reindex SL search if any SL actions actually landed on main. + if (hasSL && finalActions.some((a) => a.target === 'sl')) { + try { + const allSources = await this.deps.semanticLayerService.loadAllSources(input.connectionId!); + await this.deps.slSearchService.indexSources(input.connectionId!, allSources); + } catch (e) { + this.logger.warn( + `[memory-agent] chat=${chatId} SL index reindex failed (non-fatal): ${e instanceof Error ? e.message : String(e)}`, + ); + } + } + + const signalsActedOn: string[] = []; + if (signals.knowledge && skillsLoaded.includes('knowledge_capture')) { + signalsActedOn.push('knowledge'); + } + if (signals.sl && skillsLoaded.includes('sl')) { + signalsActedOn.push('sl'); + } + + if (finalActions.length > 0) { + this.logger.log( + `[memory-agent] chat=${chatId} completed: ${finalActions.length} action(s) — ${finalActions.map((a) => `${a.target}:${a.type}:${a.key}`).join(', ')} (skills=[${skillsLoaded.join(', ')}], outcome=${sessionOutcome})`, + ); + this.deps.telemetry?.trackMemoryIngestion(input.userId, { + chat_id: chatId, + source_type: sourceType, + action_count: finalActions.length, + actions: finalActions.map((a) => `${a.target}:${a.type}:${a.key}`), + skills_loaded: skillsLoaded, + signals_detected: signalsList, + signals_acted_on: signalsActedOn, + reconciled_cross_refs: reconciledCrossRefs, + session_outcome: sessionOutcome, + }); + } else { + this.logger.log( + `[memory-agent] chat=${chatId} completed: 0 actions (skills=[${skillsLoaded.join(', ')}], outcome=${sessionOutcome})`, + ); + if (signalsList.length > 0) { + this.deps.telemetry?.trackMemoryIngestion(input.userId, { + chat_id: chatId, + source_type: sourceType, + action_count: 0, + actions: [], + skills_loaded: skillsLoaded, + signals_detected: signalsList, + signals_acted_on: signalsActedOn, + reconciled_cross_refs: reconciledCrossRefs, + session_outcome: sessionOutcome, + }); + } + } + + return { + signalDetected: skillsLoaded.length > 0 || finalActions.length > 0, + actions: finalActions, + skillsLoaded, + commitHash: squashSha, + }; + } + + /** + * Project wiki frontmatter `sl_refs:` into the `knowledge_sl_refs` DB index. The wiki + * YAML remains the authored source of truth; this is a pure derivation. Called inside + * the `config:repo` lock window so it lines up with the squash-at-end commit flow. + * + * Returns the number of DB rows that changed (inserts + deletes). + */ + async reconcileCrossRefs(actions: MemoryAction[], session: CaptureSession): Promise { + const writesGlobal = session.forceGlobalScope || !session.userScopedEnabled; + const wikiScope: 'GLOBAL' | 'USER' = writesGlobal ? 'GLOBAL' : 'USER'; + const wikiScopeId = wikiScope === 'USER' ? session.userId : null; + + let synced = 0; + + for (const action of actions) { + if (action.target !== 'wiki' || (action.type !== 'created' && action.type !== 'updated')) { + continue; + } + if (!session.connectionId) { + this.logger.debug( + `[memory-agent] reconcile: wiki=${action.key} skipped knowledge_sl_refs (no connectionId in session)`, + ); + continue; + } + const page = await this.deps.wikiService.readPage(wikiScope, wikiScopeId, action.key); + if (!page) { + continue; + } + const slRefs = page.frontmatter.sl_refs ?? []; + // Wiki authors write both bare source names (`fct_labs`) and measure-qualified refs + // (`fct_labs.count_lab_orders`). The reverse-edge index is a source-level projection — + // strip the `.measure` suffix and dedupe before persisting, so findBySource('fct_labs') + // returns one row for this wiki no matter how many dotted measures it cited. + const bareSources = [ + ...new Set( + slRefs.map((ref) => ref.split('.')[0]).filter((sourceName): sourceName is string => sourceName.length > 0), + ), + ]; + const { inserted, deleted } = await this.deps.knowledgeSlRefs.syncFromWiki({ + wikiPageKey: action.key, + wikiScope, + wikiScopeId, + refs: bareSources.map((sourceName) => ({ connectionId: session.connectionId!, sourceName })), + }); + synced += inserted + deleted; + } + + if (synced > 0) { + this.logger.log(`[memory-agent] chat=${session.chatId} knowledge_sl_refs_synced=${synced}`); + } + return synced; + } + + /** + * Pre-squash gate: walk every SL source touched by the agent this session, re-run the + * full validation (YAML + schema + warehouse dry-run), and for any that still fail, + * roll back to the pre-session state. Returns the list of source names that were + * reverted so the caller can log them and scrub the action list. + * + * Runs inside the `config:repo` lock; uses `skipLock: true` on downstream writes. + */ + async gateRevertInvalidSources(session: CaptureSession, actions: MemoryAction[]): Promise { + return this.gateRevertInvalidSourcesWithDeps(session, actions, { + semanticLayerService: this.deps.semanticLayerService, + connections: this.deps.connections, + configService: this.deps.rootFileStore, + gitService: this.deps.gitService, + slSourcesRepository: this.deps.slSourcesRepository, + slValidator: this.deps.slValidator, + probeRowCount: this.deps.settings.slValidation.probeRowCount, + }); + } + + /** + * Same as `gateRevertInvalidSources` but with explicit deps so the orchestrator can + * pass session-worktree-scoped services for the revert reads/writes. + */ + async gateRevertInvalidSourcesWithDeps( + session: CaptureSession, + actions: MemoryAction[], + deps: GateDeps, + ): Promise { + if (!session.connectionId) { + return []; + } + const reverted: string[] = []; + for (const sourceName of touchedSlSourceNamesForConnection(session.touchedSlSources, session.connectionId)) { + const result = await deps.slValidator.validateSingleSource(deps, session.connectionId, sourceName); + if (result.errors.length === 0) { + continue; + } + try { + await revertSourceToPreHead(deps, session.connectionId, session.preHead, sourceName); + reverted.push(sourceName); + deleteTouchedSlSource(session.touchedSlSources, session.connectionId, sourceName); + for (let i = actions.length - 1; i >= 0; i--) { + if (actions[i].target === 'sl' && actions[i].key === sourceName) { + actions.splice(i, 1); + } + } + } catch (e) { + this.logger.error( + `[memory-agent] chat=${session.chatId} gate: failed to revert ${sourceName}: ${e instanceof Error ? e.message : String(e)}`, + ); + } + } + return reverted; + } + + /** + * Abort-path DB rollback. After a session's merge was rejected because main moved + * underneath, the session's eager DB writes (sl_sources rows, knowledge_index entries) + * no longer correspond to anything on disk. For every source/page the agent touched, + * re-derive from main's current state and overwrite DB. Scoped to touched keys only — + * NOT a full reconciler run. + */ + async rollbackDbForAbortedSession(session: CaptureSession, actions: MemoryAction[]): Promise { + if (session.connectionId) { + for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) { + try { + const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName).catch(() => null); + if (file?.content) { + const parsed = this.parseYamlOrNull(file.content); + if (parsed) { + const hash = this.sha256Hex(file.content); + await this.deps.semanticLayerSourceReconciler.upsertRow(parsed, file.path, hash); + } + } else { + await this.deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName); + } + } catch (err) { + this.logger.warn( + `[memory-agent rollback] SL ${sourceName} failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + } + + const wikiActions = actions.filter((a) => a.target === 'wiki'); + const wikiScope: 'GLOBAL' | 'USER' = session.forceGlobalScope || !session.userScopedEnabled ? 'GLOBAL' : 'USER'; + const wikiScopeId = wikiScope === 'USER' ? session.userId : null; + + for (const action of wikiActions) { + try { + const page = await this.deps.wikiService.readPage(wikiScope, wikiScopeId, action.key).catch(() => null); + if (page) { + await this.deps.wikiService.syncSinglePage( + wikiScope, + wikiScopeId, + action.key, + page.frontmatter, + page.content, + ); + } else { + await this.deps.wikiService.deleteFromIndex(wikiScope, wikiScopeId, action.key); + } + } catch (err) { + this.logger.warn( + `[memory-agent rollback] wiki ${action.key} failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + } + + private parseYamlOrNull(content: string): SemanticLayerSource | null { + try { + return YAML.parse(content) as SemanticLayerSource; + } catch { + return null; + } + } + + private sha256Hex(content: string): string { + return createHash('sha256').update(content, 'utf-8').digest('hex'); + } + + /** + * Build the deterministic squash-merge commit message for a session ingest. Includes + * action counts, cross-ref reconciles, and revert-gate counts for triage. + */ + private squashMessageForSession( + sourceType: MemoryAgentSourceType, + chatId: string, + actions: MemoryAction[], + reconciledCrossRefs: number, + gateRevertedSources: string[], + ): string { + const wikiCount = actions.filter((a) => a.target === 'wiki').length; + const slCount = actions.filter((a) => a.target === 'sl').length; + const parts: string[] = []; + if (wikiCount > 0) { + parts.push(`${wikiCount} wiki`); + } + if (slCount > 0) { + parts.push(`${slCount} sl`); + } + if (reconciledCrossRefs > 0) { + parts.push(`${reconciledCrossRefs} xref`); + } + if (gateRevertedSources.length > 0) { + parts.push(`${gateRevertedSources.length} reverted`); + } + const summary = parts.length > 0 ? parts.join(', ') : 'no writes'; + return `Memory ingest (${sourceType}): ${summary} [chat=${chatId.slice(0, 8)}]`; + } + + private async loadBaseFraming(sourceType: MemoryAgentSourceType): Promise { + return this.deps.promptService.loadPrompt(promptNameFor(sourceType)); + } + + private async buildWikiIndex(userId: string, userScopedEnabled: boolean): Promise { + const pages = await this.deps.knowledgeIndex.listPagesForUser(userId); + if (pages.length === 0) { + return '(empty — no knowledge pages exist yet)'; + } + + const formatEntry = (p: { page_key: string; summary: string }) => `- ${p.page_key}: ${p.summary}`; + if (!userScopedEnabled) { + return `## Knowledge Pages\n${pages.map(formatEntry).join('\n')}`; + } + + const globalEntries: string[] = []; + const userEntries: string[] = []; + for (const page of pages) { + const entry = formatEntry(page); + if (page.scope === 'GLOBAL') { + globalEntries.push(entry); + } else { + userEntries.push(entry); + } + } + const sections: string[] = []; + if (globalEntries.length > 0) { + sections.push(`## Organization (read-only from USER scope)\n${globalEntries.join('\n')}`); + } + if (userEntries.length > 0) { + sections.push(`## Your Preferences\n${userEntries.join('\n')}`); + } + return sections.join('\n\n'); + } + + private async buildSlIndex(connectionId: string): Promise { + const [sources, warehouseLine] = await Promise.all([ + this.deps.semanticLayerService.loadAllSources(connectionId), + this.buildWarehouseLine(connectionId), + ]); + const indexLines = + sources.length === 0 + ? '(no existing sources)' + : sources + .map((s) => { + const measureCount = s.measures.length; + const joinCount = s.joins?.length ?? 0; + const header = `${s.name} [measures=${measureCount}, joins=${joinCount}]`; + if (measureCount === 0 && joinCount === 0) { + return `${header} — candidate for enrichment`; + } + const parts: string[] = [header]; + if (measureCount > 0) { + parts.push(` measures: ${s.measures.map((m) => `${s.name}.${m.name}`).join(', ')}`); + } + if (joinCount > 0) { + parts.push(` joins: ${(s.joins ?? []).map((j) => `→ ${j.to} (${j.relationship})`).join(', ')}`); + } + return parts.join('\n'); + }) + .join('\n'); + return warehouseLine ? `${warehouseLine}\n\n${indexLines}` : indexLines; + } + + /** + * Read the connection's warehouse type and project it as a `Warehouse: X` line so the + * agent picks dialect-correct date arithmetic + SQL idioms. The sl_capture skill + * documents the mapping; without this line the agent defaults to whatever flavor the + * SKILL examples used to show. + */ + private async buildWarehouseLine(connectionId: string): Promise { + try { + const connection = await this.deps.connections.getConnectionById(connectionId); + return `Warehouse: ${connection.connectionType}`; + } catch { + return ''; + } + } +} + +function clip(text: string, maxLength: number): string { + return text.length > maxLength ? `${text.slice(0, maxLength - 1)}…` : text; +} diff --git a/packages/context/src/memory/memory-runs.test.ts b/packages/context/src/memory/memory-runs.test.ts new file mode 100644 index 00000000..75c25a38 --- /dev/null +++ b/packages/context/src/memory/memory-runs.test.ts @@ -0,0 +1,198 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { MemoryAgentInput, MemoryAgentResult, MemoryAgentService } from './index.js'; +import { MemoryCaptureService, type MemoryRunStorePort } from './memory-runs.js'; + +class InMemoryRunStore implements MemoryRunStorePort { + readonly rows = new Map< + string, + { + id: string; + status: 'running' | 'done' | 'error'; + stage: string; + inputHash: string; + chatId: string | null; + outputSummary: MemoryAgentResult | null; + error: string | null; + } + >(); + + async createRunning(args: { inputHash: string; chatId?: string | null }): Promise<{ id: string }> { + const id = `run-${this.rows.size + 1}`; + this.rows.set(id, { + id, + status: 'running', + stage: 'queued', + inputHash: args.inputHash, + chatId: args.chatId ?? null, + outputSummary: null, + error: null, + }); + return { id }; + } + + async markRunning(id: string, stage: string): Promise { + const row = this.rows.get(id); + if (!row) { + throw new Error(`unknown run ${id}`); + } + row.stage = stage; + } + + async markDone(id: string, outputSummary: MemoryAgentResult): Promise { + const row = this.rows.get(id); + if (!row) { + throw new Error(`unknown run ${id}`); + } + row.status = 'done'; + row.stage = 'done'; + row.outputSummary = outputSummary; + } + + async markError(id: string, error: string): Promise { + const row = this.rows.get(id); + if (!row) { + throw new Error(`unknown run ${id}`); + } + row.status = 'error'; + row.stage = 'error'; + row.error = error; + } + + async findById(id: string) { + return this.rows.get(id) ?? null; + } +} + +function deferred() { + let resolve!: (value: T) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + +function buildService(): { + capture: MemoryCaptureService; + store: InMemoryRunStore; + ingest: ReturnType; + run: ReturnType>; +} { + const store = new InMemoryRunStore(); + const run = deferred(); + const ingest = vi.fn().mockReturnValue(run.promise); + const memoryAgent = { ingest }; + return { + capture: new MemoryCaptureService({ memoryAgent, runs: store }), + store, + ingest, + run, + }; +} + +describe('MemoryCaptureService', () => { + it('creates a run, executes memory capture, and stores a done summary', async () => { + const result: MemoryAgentResult = { + signalDetected: true, + actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'captured revenue definition' }], + skillsLoaded: ['knowledge_capture'], + commitHash: 'abc123', + }; + const { capture, store, ingest, run } = buildService(); + + const input: MemoryAgentInput = { + userId: 'user-1', + chatId: 'chat-1', + userMessage: 'Revenue means paid order value.', + assistantMessage: 'Captured.', + connectionId: '00000000-0000-0000-0000-000000000001', + }; + + const started = await capture.capture(input); + + expect(started.runId).toBe('run-1'); + expect(ingest).toHaveBeenCalledWith(input); + await expect(capture.status(started.runId)).resolves.toMatchObject({ + runId: 'run-1', + status: 'running', + stage: 'capturing', + done: false, + }); + + run.resolve(result); + await capture.waitForRun(started.runId); + + const status = await capture.status(started.runId); + expect(status).toEqual({ + runId: 'run-1', + stage: 'done', + done: true, + status: 'done', + captured: { + wiki: ['revenue'], + sl: [], + xrefs: [], + }, + error: null, + commitHash: 'abc123', + skillsLoaded: ['knowledge_capture'], + signalDetected: true, + }); + expect(store.rows.get('run-1')?.inputHash).toHaveLength(64); + }); + + it('stores no-signal captures as done with empty captured arrays', async () => { + const { capture, run } = buildService(); + + const started = await capture.capture({ + userId: 'user-1', + chatId: 'chat-2', + userMessage: 'Thanks.', + }); + + run.resolve({ + signalDetected: false, + actions: [], + skillsLoaded: [], + commitHash: null, + }); + await capture.waitForRun(started.runId); + + await expect(capture.status(started.runId)).resolves.toMatchObject({ + done: true, + status: 'done', + captured: { wiki: [], sl: [], xrefs: [] }, + signalDetected: false, + }); + }); + + it('stores thrown errors and projects them as failed statuses', async () => { + const store = new InMemoryRunStore(); + const memoryAgent = { + ingest: vi.fn().mockRejectedValue(new Error('LLM provider missing')), + }; + const capture = new MemoryCaptureService({ memoryAgent, runs: store }); + + const started = await capture.capture({ + userId: 'user-1', + chatId: 'chat-3', + userMessage: 'Remember this.', + }); + await capture.waitForRun(started.runId); + + await expect(capture.status(started.runId)).resolves.toMatchObject({ + done: true, + status: 'error', + stage: 'error', + captured: { wiki: [], sl: [], xrefs: [] }, + error: 'LLM provider missing', + }); + }); + + it('returns null for an unknown run id', async () => { + const { capture } = buildService(); + + await expect(capture.status('missing')).resolves.toBeNull(); + }); +}); diff --git a/packages/context/src/memory/memory-runs.ts b/packages/context/src/memory/memory-runs.ts new file mode 100644 index 00000000..5550d5d0 --- /dev/null +++ b/packages/context/src/memory/memory-runs.ts @@ -0,0 +1,133 @@ +import { createHash } from 'node:crypto'; +import type { MemoryAction, MemoryAgentInput, MemoryAgentResult, MemoryAgentService } from './index.js'; + +export type MemoryRunStatus = 'running' | 'done' | 'error'; + +export interface MemoryRunRecord { + id: string; + status: MemoryRunStatus; + stage: string; + inputHash: string; + chatId: string | null; + outputSummary: MemoryAgentResult | null; + error: string | null; +} + +export interface MemoryRunStorePort { + createRunning(args: { inputHash: string; chatId?: string | null }): Promise<{ id: string }>; + markRunning(id: string, stage: string): Promise; + markDone(id: string, outputSummary: MemoryAgentResult): Promise; + markError(id: string, error: string): Promise; + findById(id: string): Promise; +} + +export interface MemoryCaptureServiceDeps { + memoryAgent: Pick; + runs: MemoryRunStorePort; +} + +export interface MemoryCaptureStartResult { + runId: string; +} + +export interface MemoryCaptureStatus { + runId: string; + status: MemoryRunStatus; + stage: string; + done: boolean; + captured: { + wiki: string[]; + sl: string[]; + xrefs: string[]; + }; + error: string | null; + commitHash: string | null; + skillsLoaded: string[]; + signalDetected: boolean; +} + +function inputHash(input: MemoryAgentInput): string { + const stableInput = JSON.stringify({ + userMessage: input.userMessage, + assistantMessage: input.assistantMessage ?? '', + connectionId: input.connectionId ?? null, + }); + return createHash('sha256').update(stableInput).digest('hex'); +} + +function capturedKeys(actions: MemoryAction[]): MemoryCaptureStatus['captured'] { + const wiki = new Set(); + const sl = new Set(); + const xrefs = new Set(); + + for (const action of actions) { + if (action.target === 'wiki') { + wiki.add(action.key); + } else { + sl.add(action.key); + } + if (action.detail.toLowerCase().includes('xref') || action.detail.toLowerCase().includes('cross-ref')) { + xrefs.add(action.key); + } + } + + return { + wiki: [...wiki].sort(), + sl: [...sl].sort(), + xrefs: [...xrefs].sort(), + }; +} + +export class MemoryCaptureService { + private readonly inFlight = new Map>(); + + constructor(private readonly deps: MemoryCaptureServiceDeps) {} + + async capture(input: MemoryAgentInput): Promise { + const row = await this.deps.runs.createRunning({ + inputHash: inputHash(input), + chatId: input.chatId, + }); + + await this.deps.runs.markRunning(row.id, 'capturing'); + + const run = this.runCapture(row.id, input); + this.inFlight.set(row.id, run); + run.finally(() => this.inFlight.delete(row.id)).catch(() => undefined); + + return { runId: row.id }; + } + + async waitForRun(runId: string): Promise { + await this.inFlight.get(runId); + } + + private async runCapture(runId: string, input: MemoryAgentInput): Promise { + try { + const outputSummary = await this.deps.memoryAgent.ingest(input); + await this.deps.runs.markDone(runId, outputSummary); + } catch (error) { + await this.deps.runs.markError(runId, error instanceof Error ? error.message : String(error)); + } + } + + async status(runId: string): Promise { + const row = await this.deps.runs.findById(runId); + if (!row) { + return null; + } + + const output = row.outputSummary; + return { + runId: row.id, + status: row.status, + stage: row.stage, + done: row.status !== 'running', + captured: output ? capturedKeys(output.actions) : { wiki: [], sl: [], xrefs: [] }, + error: row.error, + commitHash: output?.commitHash ?? null, + skillsLoaded: output?.skillsLoaded ?? [], + signalDetected: output?.signalDetected ?? false, + }; + } +} diff --git a/packages/context/src/memory/memory-runtime-assets.test.ts b/packages/context/src/memory/memory-runtime-assets.test.ts new file mode 100644 index 00000000..882b267b --- /dev/null +++ b/packages/context/src/memory/memory-runtime-assets.test.ts @@ -0,0 +1,100 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { describe, expect, it } from 'vitest'; +import { PromptService } from '../prompts/index.js'; +import { SkillsRegistryService } from '../skills/index.js'; +import { DEFAULT_SKILL_NAMES, type MemoryAgentSourceType, promptNameFor } from './index.js'; + +const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url)); +const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url)); +const memorySourceTypes: MemoryAgentSourceType[] = ['research', 'external_ingest', 'backfill']; +const expectedSkillHeadings: Record = { + knowledge_capture: '# Knowledge Capture', + sl: '# Semantic Layer', + sl_capture: '# Semantic Layer', +}; +const expectedAdapterSkillHeadings: Record = { + historic_sql_ingest: '# Historic SQL Ingest', + live_database_ingest: '# Live Database Ingest', + looker_ingest: '# Looker Runtime Ingest', + lookml_ingest: '# LookML to KLO Semantic Layer', + metabase_ingest: '# Metabase to KLO Semantic Layer', + metricflow_ingest: '# MetricFlow to KLO Semantic Layer', +}; + +function forbiddenProductPattern() { + return new RegExp([['Kae', 'lio'].join(''), ['kae', 'lio'].join(''), ['KAE', 'LIO_'].join('')].join('|')); +} + +describe('memory runtime assets', () => { + it('packages every memory-agent base prompt referenced by promptNameFor()', async () => { + const prompts = new PromptService({ promptsDir, partials: [] }); + + for (const sourceType of memorySourceTypes) { + const promptName = promptNameFor(sourceType); + const prompt = await prompts.loadPrompt(promptName); + + expect(prompt).toContain(''); + expect(prompt).toContain(''); + expect(prompt).not.toMatch(forbiddenProductPattern()); + } + }); + + it('packages the default memory capture skills referenced by DEFAULT_SKILL_NAMES', async () => { + const registry = new SkillsRegistryService({ skillsDir }); + const skills = await registry.listSkills([...DEFAULT_SKILL_NAMES], 'memory_agent'); + + expect(skills.map((skill) => skill.name).sort()).toEqual(['knowledge_capture', 'sl', 'sl_capture']); + + for (const skill of skills) { + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + const expectedHeading = expectedSkillHeadings[skill.name]; + expect(expectedHeading).toBeDefined(); + expect(body).toContain(expectedHeading); + expect(body).not.toMatch(forbiddenProductPattern()); + } + }); + + it('keeps memory-only capture skills hidden from research callers', async () => { + const registry = new SkillsRegistryService({ skillsDir }); + const skills = await registry.listSkills([...DEFAULT_SKILL_NAMES], 'research'); + + expect(skills.map((skill) => skill.name)).toEqual(['sl']); + }); + + it('packages ingest adapter skills referenced by bundled adapters', async () => { + const registry = new SkillsRegistryService({ skillsDir }); + const skillNames = Object.keys(expectedAdapterSkillHeadings); + const skills = await registry.listSkills(skillNames, 'memory_agent'); + + expect(skills.map((skill) => skill.name).sort()).toEqual([...skillNames].sort()); + + for (const skill of skills) { + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + expect(body).toContain(expectedAdapterSkillHeadings[skill.name]); + expect(body).not.toMatch(forbiddenProductPattern()); + } + }); + + it('ships Looker runtime ingest guidance for warehouse target SL writes', async () => { + const body = await readFile(join(skillsDir, 'looker_ingest', 'SKILL.md'), 'utf-8'); + + expect(body).toContain('targetWarehouseConnectionId'); + expect(body).toContain('targetTable.ok === true'); + expect(body).toContain('targetTable.canonicalTable'); + expect(body).toContain('source_tables preflight'); + expect(body).toContain('emit_unmapped_fallback'); + expect(body).toContain('no_connection_mapping'); + expect(body).not.toContain('a standalone SL source only when raw evidence contains enough table or SQL structure'); + }); + + it('packages LookML connection-mismatch SL gate guidance', async () => { + const body = await readFile(join(skillsDir, 'lookml_ingest', 'SKILL.md'), 'utf-8'); + + expect(body).toContain('[LOOKML SL WRITES DISALLOWED]'); + expect(body).toContain('lookml_connection_mismatch'); + expect(body).toContain('Do not call `sl_write_source` or `sl_edit_source`'); + expect(body).toContain('LookML writes target the run connection directly'); + }); +}); diff --git a/packages/context/src/memory/types.ts b/packages/context/src/memory/types.ts new file mode 100644 index 00000000..559431be --- /dev/null +++ b/packages/context/src/memory/types.ts @@ -0,0 +1,157 @@ +import type { Tool } from 'ai'; +import type { AgentRunnerService } from '../agent/index.js'; +import type { GitService, KloFileStorePort, KloLogger, SessionWorktreeService } from '../core/index.js'; +import type { PromptService } from '../prompts/index.js'; +import type { SkillsRegistryService } from '../skills/index.js'; +import type { + KloConnectionInfo, + KloQueryResult, + SemanticLayerService, + SemanticLayerSource, + SlSearchService, + SlSourcesIndexPort, + SlValidationDeps, + SlValidatorPort, +} from '../sl/index.js'; +import type { ToolContext, ToolSession, TouchedSlSourceSet } from '../tools/index.js'; +import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js'; + +export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill' | 'sql-review-migration'; + +export interface MemoryAgentInput { + userId: string; + chatId: string; + userMessage: string; + assistantMessage?: string; + connectionId?: string; + userMessageId?: string; + sourceType?: MemoryAgentSourceType; +} + +export interface MemoryAction { + target: 'wiki' | 'sl'; + type: 'created' | 'updated' | 'removed'; + key: string; + detail: string; + targetConnectionId?: string | null; +} + +export interface MemoryAgentResult { + signalDetected: boolean; + actions: MemoryAction[]; + skillsLoaded: string[]; + commitHash: string | null; +} + +export interface CaptureSignals { + knowledge: boolean; + sl: boolean; + dialect?: 'lookml'; + reasons: string[]; +} + +export interface CaptureSession { + userId: string; + chatId: string; + userMessageId?: string; + userMessage: string; + connectionId?: string; + userScopedEnabled: boolean; + forceGlobalScope: boolean; + touchedSlSources: TouchedSlSourceSet; + preHead: string | null; +} + +export interface MemoryAgentSettings { + knowledge: { + userScopedKnowledgeEnabled: boolean; + }; + slValidation: { + probeRowCount: number; + }; + llm: { + memoryIngestionModel: string; + }; +} + +export interface MemoryTelemetryPort { + trackMemoryIngestion( + userId: string, + properties: { + chat_id: string; + source_type: MemoryAgentSourceType; + action_count: number; + actions: string[]; + skills_loaded: string[]; + signals_detected: string[]; + signals_acted_on: string[]; + reconciled_cross_refs: number; + session_outcome: 'success' | 'empty' | 'conflict' | 'crash'; + }, + ): void; +} + +export interface MemoryKnowledgeSlRefsPort { + syncFromWiki(args: { + wikiPageKey: string; + wikiScope: 'GLOBAL' | 'USER'; + wikiScopeId: string | null; + refs: Array<{ connectionId: string; sourceName: string }>; + }): Promise<{ inserted: number; deleted: number }>; +} + +export interface MemoryConnectionPort { + listEnabledConnections(ids: string[]): Promise; + getConnectionById(connectionId: string): Promise; + executeQuery(connectionId: string, sql: string): Promise; +} + +export interface MemoryCommitMessagePort { + enqueueCommitMessageJobForExternalCommit( + commit: { commitHash: string }, + message: string, + pathFilter: string, + ): Promise; +} + +export interface MemoryFileStorePort extends KloFileStorePort, MemoryCommitMessagePort {} + +export interface MemoryToolSetLike { + toAiSdkTools(context: ToolContext): Record; +} + +export interface MemoryToolsetFactoryPort { + createIngestWuToolset(session: ToolSession): MemoryToolSetLike; + createToolset(capabilities: ['wiki']): MemoryToolSetLike; +} + +export interface MemorySlSourceReconcilerPort { + upsertRow(parsed: SemanticLayerSource, path: string, contentHash: string): Promise; +} + +export interface MemoryLockPort { + withLock(key: 'config:repo', fn: () => Promise): Promise; +} + +export interface MemoryAgentServiceDeps { + settings: MemoryAgentSettings; + promptService: PromptService; + skillsRegistry: SkillsRegistryService; + wikiService: KnowledgeWikiService; + knowledgeIndex: KnowledgeIndexPort; + knowledgeSlRefs: MemoryKnowledgeSlRefsPort; + semanticLayerService: SemanticLayerService; + slSearchService: SlSearchService; + connections: MemoryConnectionPort; + rootFileStore: MemoryFileStorePort; + gitService: GitService; + lockingService: MemoryLockPort; + slSourcesRepository: SlSourcesIndexPort; + sessionWorktreeService: SessionWorktreeService; + semanticLayerSourceReconciler: MemorySlSourceReconcilerPort; + agentRunner: AgentRunnerService; + slValidator: SlValidatorPort; + toolsetFactory: MemoryToolsetFactoryPort; + telemetry?: MemoryTelemetryPort; + logger?: KloLogger; +} diff --git a/packages/context/src/package-exports.test.ts b/packages/context/src/package-exports.test.ts new file mode 100644 index 00000000..402da0e7 --- /dev/null +++ b/packages/context/src/package-exports.test.ts @@ -0,0 +1,253 @@ +import { describe, expect, it } from 'vitest'; +import type { + ApplyLocalScanRelationshipReviewDecisionsInput, + ApplyLocalScanRelationshipReviewDecisionsResult, +} from './scan/index.js'; + +const scanTypeExportCoverage: Partial<{ + input: ApplyLocalScanRelationshipReviewDecisionsInput; + result: ApplyLocalScanRelationshipReviewDecisionsResult; +}> = {}; + +describe('@klo/context package exports', () => { + it('exports package entry points used by host adapters', async () => { + const core = await import('./core/index.js'); + const connections = await import('./connections/index.js'); + const scan = await import('./scan/index.js'); + const search = await import('./search/index.js'); + const agent = await import('./agent/index.js'); + const prompts = await import('./prompts/index.js'); + const skills = await import('./skills/index.js'); + const sl = await import('./sl/index.js'); + const wiki = await import('./wiki/index.js'); + const tools = await import('./tools/index.js'); + const memory = await import('./memory/index.js'); + const ingest = await import('./ingest/index.js'); + const memoryFlow = await import('./ingest/memory-flow/index.js'); + const metabaseMapping = await import('./ingest/metabase-mapping.js'); + const mcp = await import('./mcp/index.js'); + const project = await import('./project/index.js'); + const daemon = await import('./daemon/index.js'); + const sqlAnalysis = await import('./sql-analysis/index.js'); + const root = await import('./index.js'); + + expect(core).toBeDefined(); + expect(connections.createPostgresQueryExecutor).toBeTypeOf('function'); + expect(connections.createSqliteQueryExecutor).toBeTypeOf('function'); + expect(connections.createDefaultLocalQueryExecutor).toBeTypeOf('function'); + expect(connections.sqliteDatabasePathFromConnection).toBeTypeOf('function'); + expect(connections.parseNotionConnectionConfig).toBeTypeOf('function'); + expect(connections.redactNotionConnectionConfig).toBeTypeOf('function'); + expect(connections.notionConnectionToPullConfig).toBeTypeOf('function'); + expect(scan).toBeDefined(); + expect(scanTypeExportCoverage).toEqual({}); + expect(scan.createKloConnectorCapabilities).toBeTypeOf('function'); + expect(`liveDatabaseSnapshotToKlo${'SchemaSnapshot'}` in scan).toBe(false); + expect(scan.normalizeKloNativeType).toBeTypeOf('function'); + expect(scan.inferKloDimensionType).toBeTypeOf('function'); + expect(scan.redactKloCredentialEnvelope).toBeTypeOf('function'); + expect(scan.redactKloScanReport).toBeTypeOf('function'); + expect(scan.redactKloScanWarning).toBeTypeOf('function'); + expect(core.redactKloSensitiveMetadata).toBeTypeOf('function'); + expect(core.redactKloSensitiveText).toBeTypeOf('function'); + expect(scan.isKloDataDictionaryCandidate).toBeTypeOf('function'); + expect(scan.buildKloColumnEmbeddingText).toBeTypeOf('function'); + expect(scan.KloDescriptionGenerator).toBeTypeOf('function'); + expect(scan.KloScanOrchestrator).toBeTypeOf('function'); + expect(scan.runLocalScan).toBeTypeOf('function'); + expect(scan.writeLocalScanEnrichmentArtifacts).toBeTypeOf('function'); + expect(scan.readLocalScanStructuralSnapshot).toBeTypeOf('function'); + expect(scan.writeLocalScanManifestShards).toBeTypeOf('function'); + expect(scan.appendKloWordLimitInstruction).toBeTypeOf('function'); + expect(scan.buildKloColumnDescriptionPrompt).toBeTypeOf('function'); + expect(scan.buildKloTableDescriptionPrompt).toBeTypeOf('function'); + expect(scan.buildKloDataSourceDescriptionPrompt).toBeTypeOf('function'); + expect(scan.currentKloRelationshipBenchmarkDetector).toBeTypeOf('function'); + expect(scan.generateKloRelationshipDiscoveryCandidates).toBeTypeOf('function'); + expect(scan.inferKloRelationshipTargetPks).toBeTypeOf('function'); + expect(scan.mergeKloRelationshipDiscoveryCandidates).toBeTypeOf('function'); + expect(scan.normalizeKloRelationshipName).toBeTypeOf('function'); + expect(scan.tokenizeKloRelationshipName).toBeTypeOf('function'); + expect(scan.tokenSimilarity).toBeTypeOf('function'); + expect(scan.localCandidateTables).toBeTypeOf('function'); + expect(scan.scoreKloRelationshipCandidate).toBeTypeOf('function'); + expect(scan.defaultKloRelationshipScoreWeights).toBeTypeOf('function'); + expect(scan.normalizeKloRelationshipScoreWeights).toBeTypeOf('function'); + expect(scan.calibrateWeightsFromSyntheticFixtures).toBeTypeOf('function'); + expect(scan.singularizeKloRelationshipToken).toBeTypeOf('function'); + expect(scan.pluralizeKloRelationshipToken).toBeTypeOf('function'); + expect(scan.collectKloFormalMetadataRelationships).toBeTypeOf('function'); + expect(scan.discoverKloCompositeRelationships).toBeTypeOf('function'); + expect(scan.proposeKloRelationshipCandidatesWithLlm).toBeTypeOf('function'); + expect(scan.profileKloRelationshipSchema).toBeTypeOf('function'); + expect(scan.quoteKloRelationshipIdentifier).toBeTypeOf('function'); + expect(scan.formatKloRelationshipTableRef).toBeTypeOf('function'); + expect(scan.validateKloRelationshipDiscoveryCandidates).toBeTypeOf('function'); + expect(scan.applyKloRelationshipValidationBudget).toBeTypeOf('function'); + expect(scan.defaultKloRelationshipValidationBudget).toBeTypeOf('function'); + expect(scan.resolveKloRelationshipGraph).toBeTypeOf('function'); + expect(scan.discoverKloRelationships).toBeTypeOf('function'); + expect('KloRelationshipDetector' in scan).toBe(false); + expect('defaultKloRelationshipDetectionSettings' in scan).toBe(false); + expect('KLO_RELATIONSHIP_DETECTION_CONFIDENCE' in scan).toBe(false); + expect(scan.buildKloRelationshipArtifacts).toBeTypeOf('function'); + expect(scan.buildKloRelationshipDiagnostics).toBeTypeOf('function'); + expect(scan.readLocalScanRelationshipArtifacts).toBeTypeOf('function'); + expect(scan.writeLocalScanRelationshipReviewDecision).toBeTypeOf('function'); + expect(scan.applyLocalScanRelationshipReviewDecisions).toBeTypeOf('function'); + expect(scan.exportLocalRelationshipFeedbackLabels).toBeTypeOf('function'); + expect(scan.formatKloRelationshipFeedbackLabelsJsonl).toBeTypeOf('function'); + expect(scan.buildKloRelationshipFeedbackCalibrationReport).toBeTypeOf('function'); + expect(scan.calibrateLocalRelationshipFeedbackLabels).toBeTypeOf('function'); + expect(scan.formatKloRelationshipFeedbackCalibrationMarkdown).toBeTypeOf('function'); + expect(scan.buildKloRelationshipThresholdAdviceReport).toBeTypeOf('function'); + expect(scan.adviseLocalRelationshipFeedbackThresholds).toBeTypeOf('function'); + expect(scan.formatKloRelationshipThresholdAdviceMarkdown).toBeTypeOf('function'); + expect(scan.emptyKloRelationshipProfileArtifact).toBeTypeOf('function'); + expect(scan.loadKloRelationshipBenchmarkFixture).toBeTypeOf('function'); + expect(scan.loadKloRelationshipBenchmarkFixtures).toBeTypeOf('function'); + expect(scan.maskKloRelationshipBenchmarkSnapshot).toBeTypeOf('function'); + expect(scan.runKloRelationshipBenchmarkCase).toBeTypeOf('function'); + expect(scan.runKloRelationshipBenchmarkSuite).toBeTypeOf('function'); + expect(scan.KLO_RELATIONSHIP_BENCHMARK_MODES).toEqual([ + 'metadata_present', + 'declared_fks_removed', + 'declared_pks_removed', + 'declared_pks_and_declared_fks_removed', + 'llm_disabled', + 'profiling_disabled', + 'validation_disabled', + 'embeddings_disabled', + ]); + expect(scan.buildKloRelationshipBenchmarkReport).toBeTypeOf('function'); + expect(scan.formatKloRelationshipBenchmarkReportMarkdown).toBeTypeOf('function'); + expect(search).toBeDefined(); + expect(search.HybridSearchCore).toBeTypeOf('function'); + expect(search.normalizeSearchQuery).toBeTypeOf('function'); + expect(search.rrfContribution).toBeTypeOf('function'); + expect(search.assertSearchBackendConformanceCase).toBeTypeOf('function'); + expect(search.assertSearchBackendCapabilities).toBeTypeOf('function'); + expect(core.resolveKloConfigReference).toBeTypeOf('function'); + expect(root.HybridSearchCore).toBeTypeOf('function'); + expect(root.assertSearchBackendConformanceCase).toBeTypeOf('function'); + expect(root.assertSearchBackendCapabilities).toBeTypeOf('function'); + expect(root.createLocalKloEmbeddingProviderFromConfig).toBeTypeOf('function'); + expect(agent).toBeDefined(); + expect(agent.AgentRunnerService).toBeTypeOf('function'); + expect(root.AgentRunnerService).toBeTypeOf('function'); + expect(root.createLocalKloLlmProviderFromConfig).toBeTypeOf('function'); + expect(prompts).toBeDefined(); + expect(skills).toBeDefined(); + expect(sl).toBeDefined(); + expect(sl.writeLocalSlSource).toBeTypeOf('function'); + expect(sl.readLocalSlSource).toBeTypeOf('function'); + expect(sl.validateLocalSlSource).toBeTypeOf('function'); + expect(sl.searchLocalSlSources).toBeTypeOf('function'); + expect(sl.SqliteSlSourcesIndex).toBeTypeOf('function'); + expect('searchLocalSlSourcesWithPglitePrototype' in sl).toBe(false); + expect(sl.compileLocalSlQuery).toBeTypeOf('function'); + expect(wiki).toBeDefined(); + expect(wiki.writeLocalKnowledgePage).toBeTypeOf('function'); + expect(wiki.readLocalKnowledgePage).toBeTypeOf('function'); + expect(wiki.searchLocalKnowledgePages).toBeTypeOf('function'); + expect(wiki.SqliteKnowledgeIndex).toBeTypeOf('function'); + expect('WikiSearchMatchReason' in wiki).toBe(false); + expect(tools).toBeDefined(); + expect(memory).toBeDefined(); + expect(ingest).toBeDefined(); + expect(memoryFlow.parseMemoryFlowReplayInput).toBeTypeOf('function'); + expect(memoryFlow.renderMemoryFlowReplay).toBeTypeOf('function'); + expect(ingest.LiveDatabaseSourceAdapter).toBeTypeOf('function'); + expect(ingest.createDaemonLiveDatabaseIntrospection).toBeTypeOf('function'); + expect(ingest.buildLiveDatabaseManifestShards).toBeTypeOf('function'); + expect(ingest.planLiveDatabaseStructuralSync).toBeTypeOf('function'); + expect(ingest.runLocalIngest).toBeTypeOf('function'); + expect(ingest.runLocalMetabaseIngest).toBeTypeOf('function'); + expect(ingest.getLocalIngestStatus).toBeTypeOf('function'); + expect(ingest.createLocalBundleIngestRuntime).toBeTypeOf('function'); + expect(ingest.runLocalStageOnlyIngest).toBeTypeOf('function'); + expect(ingest.getLocalStageOnlyIngestStatus).toBeTypeOf('function'); + expect(ingest.createDefaultLocalIngestAdapters).toBeTypeOf('function'); + expect(ingest.createLookerQueryToSlTool).toBeTypeOf('function'); + expect(ingest.buildLookerSlProposal).toBeTypeOf('function'); + expect(ingest.describeLookerScope).toBeTypeOf('function'); + expect(ingest.hashLookerScope).toBeTypeOf('function'); + expect(ingest.readLookerFetchReport).toBeTypeOf('function'); + expect(ingest.writeLookerFetchReport).toBeTypeOf('function'); + expect(ingest.writeLookerEvidenceDocuments).toBeTypeOf('function'); + expect(ingest.getLookerTriageSignals).toBeTypeOf('function'); + expect(ingest.LookerClient).toBeTypeOf('function'); + expect(ingest.DefaultLookerConnectionClientFactory).toBeTypeOf('function'); + expect(ingest.DefaultLookerClientFactory).toBeTypeOf('function'); + expect(ingest.LocalLookerRuntimeStore).toBeTypeOf('function'); + expect(ingest.createDaemonLookerTableIdentifierParser).toBeTypeOf('function'); + expect(ingest.createLocalLookerCredentialResolver).toBeTypeOf('function'); + expect(ingest.discoverLookerConnections).toBeTypeOf('function'); + expect(ingest.computeLookerMappingDrift).toBeTypeOf('function'); + expect(ingest.validateLookerMappings).toBeTypeOf('function'); + expect(ingest.refreshLookerMappingPlaceholders).toBeTypeOf('function'); + expect(ingest.suggestKloConnectionForLookerConnection).toBeTypeOf('function'); + expect(ingest.buildLookerPullConfigFromInputs).toBeTypeOf('function'); + expect(ingest.validateLookerWarehouseTarget).toBeTypeOf('function'); + expect(ingest.sqlglotDialectForConnectionType).toBeTypeOf('function'); + expect(ingest.lookerConnectionIdSchema).toBeDefined(); + expect(ingest.lookerRuntimeCursorsSchema).toBeDefined(); + expect(ingest.stagedSyncConfigSchema).toBeDefined(); + expect(ingest.stagedLookerScopeFileSchema).toBeDefined(); + expect(ingest.stagedLookerFetchReportSchema).toBeDefined(); + expect(ingest.LocalMetabaseSourceStateReader).toBeTypeOf('function'); + expect(ingest.createLocalMetabaseSourceAdapter).toBeTypeOf('function'); + expect(ingest.metabaseRuntimeConfigFromLocalConnection).toBeTypeOf('function'); + expect(ingest.IngestMetabaseClientFactory).toBeTypeOf('function'); + expect(ingest.MetabaseClient).toBeTypeOf('function'); + expect(ingest.DefaultMetabaseConnectionClientFactory).toBeTypeOf('function'); + expect(ingest.DEFAULT_METABASE_CLIENT_CONFIG).toMatchObject({ + maxRetries: 2, + timeoutMs: 60000, + retryableStatuses: [429, 500, 502, 503, 504], + }); + expect(ingest.expandCardReferences).toBeTypeOf('function'); + expect(ingest.CardReferenceCycleError).toBeTypeOf('function'); + expect(ingest.parseMetabasePullConfig).toBeTypeOf('function'); + expect(ingest.METABASE_ENGINE_TO_CONNECTION_TYPE).toMatchObject({ + postgres: 'POSTGRESQL', + bigquery: 'BIGQUERY', + snowflake: 'SNOWFLAKE', + }); + expect(metabaseMapping.METABASE_ENGINE_TO_CONNECTION_TYPE).toBe(ingest.METABASE_ENGINE_TO_CONNECTION_TYPE); + expect(metabaseMapping.validateMappingPhysicalMatch).toBeTypeOf('function'); + expect(ingest.discoverMetabaseDatabases).toBeTypeOf('function'); + expect(ingest.computeMetabaseMappingDrift).toBeTypeOf('function'); + expect(ingest.computeMetabaseMappingPhysicalMismatches).toBeTypeOf('function'); + expect(ingest.refreshMetabaseMapping).toBeTypeOf('function'); + expect(ingest.validateMetabaseMappings).toBeTypeOf('function'); + expect(ingest.validateMappingPhysicalMatch).toBeTypeOf('function'); + expect(ingest.findBestMatch).toBeTypeOf('function'); + expect(ingest.NotionSourceAdapter).toBeTypeOf('function'); + expect(ingest.NotionClient).toBeTypeOf('function'); + expect(ingest.HistoricSqlSourceAdapter).toBeTypeOf('function'); + expect(ingest.SnowflakeHistoricSqlQueryHistoryReader).toBeTypeOf('function'); + expect(ingest.BigQueryHistoricSqlQueryHistoryReader).toBeTypeOf('function'); + expect(ingest.PostgresPgssQueryHistoryReader).toBeTypeOf('function'); + expect(ingest.stagePgStatStatementsTemplates).toBeTypeOf('function'); + expect(ingest.pgssBaselinePath).toBeTypeOf('function'); + expect(ingest.readPgssBaseline).toBeTypeOf('function'); + expect(ingest.writePgssBaselineAtomic).toBeTypeOf('function'); + expect(ingest.HistoricSqlExtensionMissingError).toBeTypeOf('function'); + expect(ingest.HistoricSqlVersionUnsupportedError).toBeTypeOf('function'); + expect(ingest.HISTORIC_SQL_SOURCE_KEY).toBe('historic-sql'); + expect(ingest.SqliteContextEvidenceStore).toBeTypeOf('function'); + expect(ingest.SqliteBundleIngestStore).toBeTypeOf('function'); + expect(ingest.CuratorPaginationService).toBeTypeOf('function'); + expect(mcp).toBeDefined(); + expect(project).toBeDefined(); + expect(daemon).toBeDefined(); + expect(mcp.registerKloContextTools).toBeTypeOf('function'); + expect(mcp.createLocalProjectMcpContextPorts).toBeTypeOf('function'); + expect(project.buildDefaultKloProjectConfig).toBeTypeOf('function'); + expect(daemon.createHttpSemanticLayerComputePort).toBeTypeOf('function'); + expect(daemon.createPythonSemanticLayerComputePort).toBeTypeOf('function'); + expect(sqlAnalysis.createHttpSqlAnalysisPort).toBeTypeOf('function'); + expect(root.createHttpSqlAnalysisPort).toBeTypeOf('function'); + }); +}); diff --git a/packages/context/src/project/config.test.ts b/packages/context/src/project/config.test.ts new file mode 100644 index 00000000..4d5c851d --- /dev/null +++ b/packages/context/src/project/config.test.ts @@ -0,0 +1,391 @@ +import { describe, expect, it } from 'vitest'; +import { buildDefaultKloProjectConfig, parseKloProjectConfig, serializeKloProjectConfig } from './config.js'; + +describe('KLO project config', () => { + it('builds the default standalone project config', () => { + expect(buildDefaultKloProjectConfig('warehouse')).toEqual({ + project: 'warehouse', + connections: {}, + storage: { + state: 'sqlite', + search: 'sqlite-fts5', + git: { + auto_commit: true, + author: 'klo ', + }, + }, + llm: { + provider: { + backend: 'none', + }, + models: {}, + }, + ingest: { + adapters: ['live-database', 'lookml', 'metabase', 'metricflow', 'notion'], + embeddings: { + backend: 'deterministic', + model: 'deterministic', + dimensions: 8, + }, + workUnits: { + stepBudget: 40, + maxConcurrency: 1, + failureMode: 'continue', + }, + }, + agent: { + run_research: { + enabled: false, + max_iterations: 20, + default_toolset: ['sl_query', 'knowledge_search', 'sl_read_source'], + }, + }, + memory: { + auto_commit: true, + }, + scan: { + enrichment: { + mode: 'none', + }, + relationships: { + enabled: true, + llmProposals: true, + validationRequiredForManifest: true, + acceptThreshold: 0.85, + reviewThreshold: 0.55, + maxLlmTablesPerBatch: 40, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + }, + }); + }); + + it('round-trips through YAML with stable defaults', () => { + const serialized = serializeKloProjectConfig(buildDefaultKloProjectConfig('warehouse')); + const parsed = parseKloProjectConfig(serialized); + + expect(serialized).toContain('project: warehouse'); + expect(serialized).toContain('live-database'); + expect(serialized).toContain('notion'); + expect(serialized).toContain( + ' embeddings:\n backend: deterministic\n model: deterministic\n dimensions: 8', + ); + expect(parsed.project).toBe('warehouse'); + expect(parsed.ingest.adapters).toEqual(['live-database', 'lookml', 'metabase', 'metricflow', 'notion']); + expect(parsed.ingest.embeddings).toEqual({ + backend: 'deterministic', + model: 'deterministic', + dimensions: 8, + }); + }); + + it('parses and serializes setup wizard metadata', () => { + const config = parseKloProjectConfig(` +project: revenue +setup: + database_connection_ids: + - warehouse + - analytics + completed_steps: + - project + - llm +connections: + warehouse: + driver: postgres + url: env:WAREHOUSE_URL +`); + + expect(config.setup).toEqual({ + database_connection_ids: ['warehouse', 'analytics'], + completed_steps: ['project', 'llm'], + }); + + const serialized = serializeKloProjectConfig(config); + expect(serialized).toContain('setup:'); + expect(serialized).toContain('database_connection_ids:'); + expect(serialized).toContain('completed_steps:'); + }); + + it('parses global direct Anthropic LLM config', () => { + const config = parseKloProjectConfig(` +project: demo +llm: + provider: + backend: anthropic + anthropic: + api_key: env:ANTHROPIC_API_KEY + models: + default: claude-sonnet-4-6 + triage: claude-haiku-4-5 + repair: claude-opus-4-7 + promptCaching: + enabled: false +ingest: + workUnits: + stepBudget: 30 + maxConcurrency: 2 + failureMode: abort +`); + + expect(config.llm).toMatchObject({ + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { + default: 'claude-sonnet-4-6', + triage: 'claude-haiku-4-5', + repair: 'claude-opus-4-7', + }, + promptCaching: { enabled: false }, + }); + expect(config.ingest.workUnits).toEqual({ + stepBudget: 30, + maxConcurrency: 2, + failureMode: 'abort', + }); + }); + + it('parses global Vertex LLM config', () => { + const config = parseKloProjectConfig(` +project: demo +llm: + provider: + backend: vertex + vertex: + project: local-gcp-project + location: us-east5 + models: + default: claude-sonnet-4-6 + triage: claude-haiku-4-5 +`); + + expect(config.llm.provider.backend).toBe('vertex'); + expect(config.llm.provider.vertex).toEqual({ project: 'local-gcp-project', location: 'us-east5' }); + expect(config.llm.models).toEqual({ + default: 'claude-sonnet-4-6', + triage: 'claude-haiku-4-5', + }); + }); + + it('parses gateway LLM, OpenAI scan embeddings, and sentence-transformers ingest embeddings', () => { + const config = parseKloProjectConfig(` +project: demo +llm: + provider: + backend: gateway + gateway: + api_key: env:AI_GATEWAY_API_KEY + base_url: https://gateway.example/v1 + models: + default: anthropic/claude-sonnet-4-6 +ingest: + embeddings: + backend: sentence-transformers + model: all-MiniLM-L6-v2 + dimensions: 384 + sentenceTransformers: + base_url: http://127.0.0.1:18081 + pathPrefix: "" + batchSize: 16 +scan: + enrichment: + mode: llm + embeddings: + backend: openai + model: text-embedding-3-small + dimensions: 1536 + openai: + api_key: env:OPENAI_API_KEY + batchSize: 32 +`); + + expect(config.ingest.embeddings).toMatchObject({ + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 384, + sentenceTransformers: { base_url: 'http://127.0.0.1:18081', pathPrefix: '' }, + batchSize: 16, + }); + expect(config.llm.models.default).toBe('anthropic/claude-sonnet-4-6'); + expect(config.scan.enrichment.mode).toBe('llm'); + expect(config.scan.enrichment.embeddings?.dimensions).toBe(1536); + }); + + it('parses scan relationship settings', () => { + const config = parseKloProjectConfig(` +project: demo +scan: + relationships: + enabled: false + llm_proposals: false + validation_required_for_manifest: true + accept_threshold: 0.91 + review_threshold: 0.61 + max_llm_tables_per_batch: 12 + max_candidates_per_column: 7 + profile_sample_rows: 500 + validation_concurrency: 2 + validation_budget: 0 +`); + + expect(config.scan.relationships).toEqual({ + enabled: false, + llmProposals: false, + validationRequiredForManifest: true, + acceptThreshold: 0.91, + reviewThreshold: 0.61, + maxLlmTablesPerBatch: 12, + maxCandidatesPerColumn: 7, + profileSampleRows: 500, + validationConcurrency: 2, + validationBudget: 0, + }); + expect(serializeKloProjectConfig(config)).toContain('enabled: false'); + expect(serializeKloProjectConfig(config)).toContain('llmProposals: false'); + expect(serializeKloProjectConfig(config)).toContain('validationRequiredForManifest: true'); + expect(serializeKloProjectConfig(config)).toContain('acceptThreshold: 0.91'); + expect(serializeKloProjectConfig(config)).toContain('reviewThreshold: 0.61'); + expect(serializeKloProjectConfig(config)).toContain('maxLlmTablesPerBatch: 12'); + expect(serializeKloProjectConfig(config)).toContain('maxCandidatesPerColumn: 7'); + expect(serializeKloProjectConfig(config)).toContain('profileSampleRows: 500'); + expect(serializeKloProjectConfig(config)).toContain('validationConcurrency: 2'); + expect(serializeKloProjectConfig(config)).toContain('validationBudget: 0'); + }); + + it('parses the scan relationship validation budget sentinel', () => { + const config = parseKloProjectConfig(` +project: demo +scan: + relationships: + validation_budget: all +`); + + expect(config.scan.relationships.validationBudget).toBe('all'); + expect(serializeKloProjectConfig(config)).toContain('validationBudget: all'); + }); + + it('falls back to safe scan relationship defaults for invalid numeric settings', () => { + const config = parseKloProjectConfig(` +project: demo +scan: + relationships: + accept_threshold: 2 + review_threshold: -1 + max_llm_tables_per_batch: 0 + max_candidates_per_column: -4 + profile_sample_rows: 0 + validation_concurrency: 0 + validation_budget: 1.5 +`); + + expect(config.scan.relationships).toMatchObject({ + acceptThreshold: 0.85, + reviewThreshold: 0.55, + maxLlmTablesPerBatch: 40, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }); + expect(config.scan.relationships).not.toHaveProperty('validationBudget'); + }); + + it('falls back for invalid scan relationship validation budget strings', () => { + const config = parseKloProjectConfig(` +project: demo +scan: + relationships: + validation_budget: infinite +`); + + expect(config.scan.relationships).not.toHaveProperty('validationBudget'); + }); + + it('rejects legacy local LLM and embedding fields', () => { + expect(() => + parseKloProjectConfig(` +project: demo +ingest: + llm: + backend: anthropic +`), + ).toThrow('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits'); + + expect(() => + parseKloProjectConfig(` +project: demo +scan: + enrichment: + backend: gateway +`), + ).toThrow('Unsupported scan.enrichment.backend: use scan.enrichment.mode'); + + expect(() => + parseKloProjectConfig(` +project: demo +scan: + enrichment: + mode: llm + llm: + backend: gateway +`), + ).toThrow('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models'); + + expect(() => + parseKloProjectConfig(` +project: demo +ingest: + embeddings: + provider: gateway + max_batch_size: 32 +`), + ).toThrow('Unsupported ingest.embeddings.provider'); + }); + + it('rejects gateway embedding configs', () => { + expect(() => + parseKloProjectConfig(` +project: demo +ingest: + embeddings: + backend: gateway + model: provider/text-embedding + dimensions: 1536 +`), + ).toThrow('Unsupported ingest.embeddings.backend: gateway'); + + expect(() => + parseKloProjectConfig(` +project: demo +scan: + enrichment: + mode: llm + embeddings: + backend: gateway + model: provider/text-embedding + dimensions: 1536 +`), + ).toThrow('Unsupported scan.enrichment.embeddings.backend: gateway'); + }); + + it('fills optional sections when a minimal config is loaded', () => { + const config = parseKloProjectConfig('project: local\n'); + + expect(config).toEqual(buildDefaultKloProjectConfig('local')); + expect(config.ingest.embeddings).toEqual({ + backend: 'deterministic', + model: 'deterministic', + dimensions: 8, + }); + }); + + it('rejects configs without an object root', () => { + expect(() => parseKloProjectConfig('- nope\n')).toThrow('klo.yaml must contain a YAML object'); + }); + + it('rejects configs with a missing project name', () => { + expect(() => parseKloProjectConfig('connections: {}\n')).toThrow('klo.yaml field "project" is required'); + }); +}); diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts new file mode 100644 index 00000000..5151da52 --- /dev/null +++ b/packages/context/src/project/config.ts @@ -0,0 +1,551 @@ +import type { KloEmbeddingBackend, KloLlmBackend, KloModelRole, KloPromptCacheTtl } from '@klo/llm'; +import YAML from 'yaml'; + +export type KloStorageState = 'postgres' | 'sqlite'; +export type KloSearchBackend = 'postgres-hybrid' | 'sqlite-fts5'; +type KloLocalLlmBackend = KloLlmBackend | 'none'; +type KloLocalEmbeddingBackend = KloEmbeddingBackend | 'none'; +type KloScanEnrichmentMode = 'none' | 'deterministic' | 'llm'; + +interface KloProjectPromptCachingConfig { + enabled?: boolean; + systemTtl?: KloPromptCacheTtl; + toolsTtl?: KloPromptCacheTtl; + historyTtl?: KloPromptCacheTtl; + vertexFallbackTo5m?: boolean; +} + +export interface KloProjectLlmProviderConfig { + backend: KloLocalLlmBackend; + vertex?: { project?: string; location: string }; + anthropic?: { api_key?: string; base_url?: string }; + gateway?: { api_key?: string; base_url?: string }; +} + +export interface KloProjectLlmConfig { + provider: KloProjectLlmProviderConfig; + models: Partial> & { default?: string }; + promptCaching?: KloProjectPromptCachingConfig; +} + +export interface KloProjectEmbeddingConfig { + backend: KloLocalEmbeddingBackend; + model?: string; + dimensions: number; + openai?: { api_key?: string; base_url?: string }; + sentenceTransformers?: { base_url: string; pathPrefix?: string }; + batchSize?: number; +} + +export interface KloScanEnrichmentConfig { + mode: KloScanEnrichmentMode; + embeddings?: KloProjectEmbeddingConfig; +} + +export interface KloIngestWorkUnitsConfig { + stepBudget: number; + maxConcurrency: number; + failureMode: 'abort' | 'continue'; +} + +export interface KloScanRelationshipConfig { + enabled: boolean; + llmProposals: boolean; + validationRequiredForManifest: boolean; + acceptThreshold: number; + reviewThreshold: number; + maxLlmTablesPerBatch: number; + maxCandidatesPerColumn: number; + profileSampleRows: number; + validationConcurrency: number; + validationBudget?: number | 'all'; +} + +export interface KloProjectScanConfig { + enrichment: KloScanEnrichmentConfig; + relationships: KloScanRelationshipConfig; +} + +export interface KloProjectConnectionConfig { + driver: string; + url?: string; + readonly?: boolean; + [key: string]: unknown; +} + +export interface KloProjectSetupConfig { + database_connection_ids: string[]; + completed_steps: string[]; +} + +export interface KloProjectConfig { + project: string; + setup?: KloProjectSetupConfig; + connections: Record; + storage: { + state: KloStorageState; + search: KloSearchBackend; + git: { + auto_commit: boolean; + author: string; + }; + }; + llm: KloProjectLlmConfig; + ingest: { + adapters: string[]; + embeddings: KloProjectEmbeddingConfig; + workUnits: KloIngestWorkUnitsConfig; + }; + agent: { + run_research: { + enabled: boolean; + max_iterations: number; + default_toolset: string[]; + }; + }; + memory: { + auto_commit: boolean; + }; + scan: KloProjectScanConfig; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function stringArray(value: unknown, fallback: string[]): string[] { + if (!Array.isArray(value)) { + return fallback; + } + return value.filter((item): item is string => typeof item === 'string' && item.length > 0); +} + +function booleanValue(value: unknown, fallback: boolean): boolean { + return typeof value === 'boolean' ? value : fallback; +} + +function numberValue(value: unknown, fallback: number): number { + return typeof value === 'number' && Number.isFinite(value) ? value : fallback; +} + +function stringValue(value: unknown, fallback: string): string { + return typeof value === 'string' && value.trim().length > 0 ? value : fallback; +} + +function optionalNonEmptyString(value: unknown): string | undefined { + if (typeof value !== 'string') { + return undefined; + } + + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; +} + +function positiveIntegerConfigValue(value: unknown, fallback: number): number { + if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) { + return fallback; + } + + return value; +} + +function validationBudgetConfigValue(value: unknown, fallback: number | 'all' | undefined): number | 'all' | undefined { + if (value === 'all') { + return value; + } + if (typeof value === 'number' && Number.isInteger(value) && value >= 0) { + return value; + } + return fallback; +} + +function ratioConfigValue(value: unknown, fallback: number): number { + if (typeof value !== 'number' || !Number.isFinite(value) || value < 0 || value > 1) { + return fallback; + } + + return value; +} + +function localLlmBackend(value: unknown, fallback: KloLocalLlmBackend, section = 'llm.provider'): KloLocalLlmBackend { + if (value == null) { + return fallback; + } + + if (value === 'none' || value === 'anthropic' || value === 'vertex' || value === 'gateway') { + return value; + } + + throw new Error(`Unsupported ${section}.backend: ${String(value)}`); +} + +function localEmbeddingBackend( + value: unknown, + fallback: KloLocalEmbeddingBackend, + section = 'ingest.embeddings', +): KloLocalEmbeddingBackend { + if (value == null) { + return fallback; + } + + if ( + value === 'none' || + value === 'deterministic' || + value === 'openai' || + value === 'sentence-transformers' + ) { + return value; + } + + throw new Error(`Unsupported ${section}.backend: ${String(value)}`); +} + +function scanEnrichmentMode(value: unknown, fallback: KloScanEnrichmentMode): KloScanEnrichmentMode { + if (value == null) { + return fallback; + } + + if (value === 'none' || value === 'deterministic' || value === 'llm') { + return value; + } + + throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`); +} + +function rejectLegacyProvider(section: string, value: unknown): void { + if (value !== undefined) { + throw new Error(`Unsupported ${section}.provider: use ${section}.backend`); + } +} + +function optionalStringRecord(value: unknown): Record { + return isRecord(value) ? value : {}; +} + +function optionalProviderConfig(value: unknown): { api_key?: string; base_url?: string } | undefined { + if (!isRecord(value)) { + return undefined; + } + + const apiKey = optionalNonEmptyString(value.api_key); + const baseUrl = optionalNonEmptyString(value.base_url); + if (!apiKey && !baseUrl) { + return undefined; + } + + return { + ...(apiKey ? { api_key: apiKey } : {}), + ...(baseUrl ? { base_url: baseUrl } : {}), + }; +} + +function parseModels(value: unknown): KloProjectLlmConfig['models'] { + if (!isRecord(value)) { + return {}; + } + + const models: KloProjectLlmConfig['models'] = {}; + for (const [role, model] of Object.entries(value)) { + const modelName = optionalNonEmptyString(model); + if (modelName) { + models[role as KloModelRole] = modelName; + } + } + return models; +} + +function promptCacheTtl(value: unknown): KloPromptCacheTtl | undefined { + return value === '5m' || value === '1h' ? value : undefined; +} + +function parsePromptCaching(value: unknown): KloProjectPromptCachingConfig | undefined { + if (!isRecord(value)) { + return undefined; + } + + return { + ...(typeof value.enabled === 'boolean' ? { enabled: value.enabled } : {}), + ...(promptCacheTtl(value.systemTtl) ? { systemTtl: promptCacheTtl(value.systemTtl) } : {}), + ...(promptCacheTtl(value.toolsTtl) ? { toolsTtl: promptCacheTtl(value.toolsTtl) } : {}), + ...(promptCacheTtl(value.historyTtl) ? { historyTtl: promptCacheTtl(value.historyTtl) } : {}), + ...(typeof value.vertexFallbackTo5m === 'boolean' ? { vertexFallbackTo5m: value.vertexFallbackTo5m } : {}), + }; +} + +function parseProjectLlmProviderConfig( + raw: Record, + defaults: KloProjectLlmProviderConfig, + section: string, +): KloProjectLlmProviderConfig { + rejectLegacyProvider(section, raw.provider); + + const vertex = isRecord(raw.vertex) + ? { + ...(optionalNonEmptyString(raw.vertex.project) ? { project: optionalNonEmptyString(raw.vertex.project) } : {}), + location: stringValue(raw.vertex.location, ''), + } + : undefined; + const anthropic = optionalProviderConfig(raw.anthropic); + const gateway = optionalProviderConfig(raw.gateway); + + return { + backend: localLlmBackend(raw.backend, defaults.backend, section), + ...(vertex ? { vertex } : {}), + ...(anthropic ? { anthropic } : {}), + ...(gateway ? { gateway } : {}), + }; +} + +function parseProjectLlmConfig(raw: Record, defaults: KloProjectLlmConfig): KloProjectLlmConfig { + const provider = isRecord(raw.provider) ? raw.provider : {}; + return { + provider: parseProjectLlmProviderConfig(provider, defaults.provider, 'llm.provider'), + models: parseModels(raw.models ?? defaults.models), + ...(parsePromptCaching(raw.promptCaching) ? { promptCaching: parsePromptCaching(raw.promptCaching) } : {}), + }; +} + +function parseProjectEmbeddingConfig( + raw: Record, + defaults: KloProjectEmbeddingConfig, + section: string, +): KloProjectEmbeddingConfig { + rejectLegacyProvider(section, raw.provider); + + const openai = optionalProviderConfig(raw.openai); + const sentenceTransformers = isRecord(raw.sentenceTransformers) + ? { + base_url: stringValue(raw.sentenceTransformers.base_url, ''), + ...(typeof raw.sentenceTransformers.pathPrefix === 'string' + ? { pathPrefix: raw.sentenceTransformers.pathPrefix } + : {}), + } + : undefined; + + const backend = localEmbeddingBackend(raw.backend, defaults.backend, section); + const model = + optionalNonEmptyString(raw.model) ?? (raw.backend == null && backend !== 'none' ? defaults.model : undefined); + const batchSize = positiveIntegerConfigValue(raw.batchSize, 0); + return { + backend, + ...(model ? { model } : {}), + dimensions: positiveIntegerConfigValue(raw.dimensions, defaults.dimensions), + ...(openai ? { openai } : {}), + ...(sentenceTransformers ? { sentenceTransformers } : {}), + ...(batchSize > 0 ? { batchSize } : {}), + }; +} + +function parseScanRelationshipConfig( + raw: Record, + defaults: KloScanRelationshipConfig, +): KloScanRelationshipConfig { + const validationBudget = validationBudgetConfigValue( + raw.validation_budget ?? raw.validationBudget, + defaults.validationBudget, + ); + + return { + enabled: booleanValue(raw.enabled, defaults.enabled), + llmProposals: booleanValue(raw.llm_proposals ?? raw.llmProposals, defaults.llmProposals), + validationRequiredForManifest: booleanValue( + raw.validation_required_for_manifest ?? raw.validationRequiredForManifest, + defaults.validationRequiredForManifest, + ), + acceptThreshold: ratioConfigValue(raw.accept_threshold ?? raw.acceptThreshold, defaults.acceptThreshold), + reviewThreshold: ratioConfigValue(raw.review_threshold ?? raw.reviewThreshold, defaults.reviewThreshold), + maxLlmTablesPerBatch: positiveIntegerConfigValue( + raw.max_llm_tables_per_batch ?? raw.maxLlmTablesPerBatch, + defaults.maxLlmTablesPerBatch, + ), + maxCandidatesPerColumn: positiveIntegerConfigValue( + raw.max_candidates_per_column ?? raw.maxCandidatesPerColumn, + defaults.maxCandidatesPerColumn, + ), + profileSampleRows: positiveIntegerConfigValue( + raw.profile_sample_rows ?? raw.profileSampleRows, + defaults.profileSampleRows, + ), + validationConcurrency: positiveIntegerConfigValue( + raw.validation_concurrency ?? raw.validationConcurrency, + defaults.validationConcurrency, + ), + ...(validationBudget !== undefined ? { validationBudget } : {}), + }; +} + +function workUnitFailureMode(value: unknown, fallback: 'abort' | 'continue'): 'abort' | 'continue' { + return value === 'abort' || value === 'continue' ? value : fallback; +} + +function parseIngestWorkUnitsConfig( + raw: Record, + defaults: KloIngestWorkUnitsConfig, +): KloIngestWorkUnitsConfig { + return { + stepBudget: positiveIntegerConfigValue(raw.stepBudget, defaults.stepBudget), + maxConcurrency: positiveIntegerConfigValue(raw.maxConcurrency, defaults.maxConcurrency), + failureMode: workUnitFailureMode(raw.failureMode, defaults.failureMode), + }; +} + +export function buildDefaultKloProjectConfig(projectName = 'klo-project'): KloProjectConfig { + return { + project: projectName, + connections: {}, + storage: { + state: 'sqlite', + search: 'sqlite-fts5', + git: { + auto_commit: true, + author: 'klo ', + }, + }, + llm: { + provider: { + backend: 'none', + }, + models: {}, + }, + ingest: { + adapters: ['live-database', 'lookml', 'metabase', 'metricflow', 'notion'], + embeddings: { + backend: 'deterministic', + model: 'deterministic', + dimensions: 8, + }, + workUnits: { + stepBudget: 40, + maxConcurrency: 1, + failureMode: 'continue', + }, + }, + agent: { + run_research: { + enabled: false, + max_iterations: 20, + default_toolset: ['sl_query', 'knowledge_search', 'sl_read_source'], + }, + }, + memory: { + auto_commit: true, + }, + scan: { + enrichment: { + mode: 'none', + }, + relationships: { + enabled: true, + llmProposals: true, + validationRequiredForManifest: true, + acceptThreshold: 0.85, + reviewThreshold: 0.55, + maxLlmTablesPerBatch: 40, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + }, + }; +} + +export function parseKloProjectConfig(raw: string): KloProjectConfig { + const parsed = YAML.parse(raw) as unknown; + if (!isRecord(parsed)) { + throw new Error('klo.yaml must contain a YAML object'); + } + + const project = parsed.project; + if (typeof project !== 'string' || project.trim().length === 0) { + throw new Error('klo.yaml field "project" is required'); + } + + const defaults = buildDefaultKloProjectConfig(project.trim()); + const llm = isRecord(parsed.llm) ? parsed.llm : {}; + const storage = isRecord(parsed.storage) ? parsed.storage : {}; + const storageGit = isRecord(storage.git) ? storage.git : {}; + const setup = isRecord(parsed.setup) ? parsed.setup : undefined; + const ingest = isRecord(parsed.ingest) ? parsed.ingest : {}; + const ingestEmbeddings = isRecord(ingest.embeddings) ? ingest.embeddings : {}; + const ingestWorkUnits = isRecord(ingest.workUnits) ? ingest.workUnits : {}; + const agent = isRecord(parsed.agent) ? parsed.agent : {}; + const runResearch = isRecord(agent.run_research) ? agent.run_research : {}; + const memory = isRecord(parsed.memory) ? parsed.memory : {}; + const scan = isRecord(parsed.scan) ? parsed.scan : {}; + const scanEnrichment = isRecord(scan.enrichment) ? scan.enrichment : {}; + const scanRelationships = isRecord(scan.relationships) ? scan.relationships : {}; + if (isRecord(ingest.llm)) { + throw new Error('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits'); + } + if (scanEnrichment.backend !== undefined) { + throw new Error('Unsupported scan.enrichment.backend: use scan.enrichment.mode'); + } + if (isRecord(scanEnrichment.llm)) { + throw new Error('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models'); + } + + const parsedLlm = parseProjectLlmConfig(llm, defaults.llm); + const parsedIngestEmbeddings = parseProjectEmbeddingConfig( + ingestEmbeddings, + defaults.ingest.embeddings, + 'ingest.embeddings', + ); + const parsedIngestWorkUnits = parseIngestWorkUnitsConfig(ingestWorkUnits, defaults.ingest.workUnits); + const scanEmbeddings = parseProjectEmbeddingConfig( + optionalStringRecord(scanEnrichment.embeddings), + defaults.ingest.embeddings, + 'scan.enrichment.embeddings', + ); + const parsedScanEnrichment: KloScanEnrichmentConfig = { + mode: scanEnrichmentMode(scanEnrichment.mode, defaults.scan.enrichment.mode), + ...(isRecord(scanEnrichment.embeddings) ? { embeddings: scanEmbeddings } : {}), + }; + const parsedScanRelationships = parseScanRelationshipConfig(scanRelationships, defaults.scan.relationships); + + return { + project: project.trim(), + ...(setup + ? { + setup: { + database_connection_ids: stringArray(setup.database_connection_ids, []), + completed_steps: stringArray(setup.completed_steps, []), + }, + } + : {}), + connections: isRecord(parsed.connections) + ? (parsed.connections as Record) + : defaults.connections, + storage: { + state: storage.state === 'sqlite' ? 'sqlite' : defaults.storage.state, + search: storage.search === 'sqlite-fts5' ? 'sqlite-fts5' : defaults.storage.search, + git: { + auto_commit: booleanValue(storageGit.auto_commit, defaults.storage.git.auto_commit), + author: stringValue(storageGit.author, defaults.storage.git.author), + }, + }, + llm: parsedLlm, + ingest: { + adapters: stringArray(ingest.adapters, defaults.ingest.adapters), + embeddings: parsedIngestEmbeddings, + workUnits: parsedIngestWorkUnits, + }, + agent: { + run_research: { + enabled: booleanValue(runResearch.enabled, defaults.agent.run_research.enabled), + max_iterations: numberValue(runResearch.max_iterations, defaults.agent.run_research.max_iterations), + default_toolset: stringArray(runResearch.default_toolset, defaults.agent.run_research.default_toolset), + }, + }, + memory: { + auto_commit: booleanValue(memory.auto_commit, defaults.memory.auto_commit), + }, + scan: { + enrichment: parsedScanEnrichment, + relationships: parsedScanRelationships, + }, + }; +} + +export function serializeKloProjectConfig(config: KloProjectConfig): string { + return `${YAML.stringify(config, { indent: 2, lineWidth: 0 }).trimEnd()}\n`; +} diff --git a/packages/context/src/project/index.ts b/packages/context/src/project/index.ts new file mode 100644 index 00000000..71750f02 --- /dev/null +++ b/packages/context/src/project/index.ts @@ -0,0 +1,33 @@ +export type { + KloProjectConfig, + KloProjectConnectionConfig, + KloProjectEmbeddingConfig, + KloProjectLlmConfig, + KloSearchBackend, + KloStorageState, +} from './config.js'; +export { buildDefaultKloProjectConfig, parseKloProjectConfig, serializeKloProjectConfig } from './config.js'; +export type { LocalGitFileStoreDeps } from './local-git-file-store.js'; +export { LocalGitFileStore } from './local-git-file-store.js'; +export { kloLocalStateDbPath } from './local-state-db.js'; +export type { + ConnectionMappingBootstrap, + LookerMappingBootstrap, + LookmlMappingBootstrap, + MetabaseMappingBootstrap, +} from './mappings-yaml-schema.js'; +export { + parseConnectionMappingBootstrap, + parseLookerMappingBootstrap, + parseLookmlMappingBootstrap, + parseMetabaseMappingBootstrap, +} from './mappings-yaml-schema.js'; +export type { InitKloProjectOptions, InitKloProjectResult, KloLocalProject, LoadKloProjectOptions } from './project.js'; +export { initKloProject, loadKloProject } from './project.js'; +export type { KloSetupStep } from './setup-config.js'; +export { + KLO_SETUP_STEPS, + markKloSetupStepComplete, + mergeKloSetupGitignoreEntries, + setKloSetupDatabaseConnectionIds, +} from './setup-config.js'; diff --git a/packages/context/src/project/local-git-file-store.test.ts b/packages/context/src/project/local-git-file-store.test.ts new file mode 100644 index 00000000..58bdff10 --- /dev/null +++ b/packages/context/src/project/local-git-file-store.test.ts @@ -0,0 +1,101 @@ +import { mkdtemp, readFile, rm, stat } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { GitService, type KloCoreConfig } from '../core/index.js'; +import { LocalGitFileStore } from './local-git-file-store.js'; + +describe('LocalGitFileStore', () => { + let tempDir: string; + let store: LocalGitFileStore; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-store-')); + const coreConfig: KloCoreConfig = { + storage: { configDir: tempDir, homeDir: tempDir }, + git: { + userName: 'klo', + userEmail: 'klo@example.com', + bootstrapMessage: 'Initialize test project', + bootstrapAuthor: 'klo', + bootstrapAuthorEmail: 'klo@example.com', + }, + }; + const git = new GitService(coreConfig); + await git.onModuleInit(); + store = new LocalGitFileStore({ rootDir: tempDir, git }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('writes, commits, and reads a project file', async () => { + const write = await store.writeFile( + 'knowledge/global/revenue.md', + '# Revenue\n', + 'Agent', + 'agent@example.com', + 'Add revenue page', + ); + + expect(write.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(readFile(join(tempDir, 'knowledge/global/revenue.md'), 'utf-8')).resolves.toBe('# Revenue\n'); + await expect(store.readFile('knowledge/global/revenue.md')).resolves.toMatchObject({ + content: '# Revenue\n', + }); + }); + + it('lists files recursively and can strip the requested prefix', async () => { + await store.writeFile('knowledge/global/a.md', 'a', 'Agent', 'agent@example.com', 'Add a'); + await store.writeFile('knowledge/global/nested/b.md', 'b', 'Agent', 'agent@example.com', 'Add b'); + + await expect(store.listFiles('knowledge')).resolves.toEqual({ + files: ['knowledge/global/a.md', 'knowledge/global/nested/b.md'], + }); + await expect(store.listFiles('knowledge/global', true)).resolves.toEqual({ + files: ['a.md', 'nested/b.md'], + }); + }); + + it('deletes and commits an existing file', async () => { + await store.writeFile('semantic-layer/conn/orders.yaml', 'name: orders\n', 'Agent', 'agent@example.com', 'Add SL'); + + const deleted = await store.deleteFile( + 'semantic-layer/conn/orders.yaml', + 'Agent', + 'agent@example.com', + 'Delete SL', + ); + + expect(deleted?.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(stat(join(tempDir, 'semantic-layer/conn/orders.yaml'))).rejects.toThrow(); + }); + + it('returns null when deleting a missing file', async () => { + await expect(store.deleteFile('missing.md', 'Agent', 'agent@example.com', 'Delete missing')).resolves.toBeNull(); + }); + + it('exposes Git history for a file', async () => { + await store.writeFile('knowledge/global/history.md', 'v1', 'Agent', 'agent@example.com', 'Add history'); + await store.writeFile('knowledge/global/history.md', 'v2', 'Agent', 'agent@example.com', 'Update history'); + + const history = await store.getFileHistory('knowledge/global/history.md'); + + expect(Array.isArray(history)).toBe(true); + expect(history[0]).toMatchObject({ message: 'Update history' }); + expect(history[1]).toMatchObject({ message: 'Add history' }); + }); + + it('rejects absolute paths and parent-directory traversal', async () => { + await expect(store.writeFile('/tmp/outside.md', 'bad', 'Agent', 'agent@example.com', 'Bad write')).rejects.toThrow( + 'Path must be relative', + ); + + await expect(store.readFile('../outside.md')).rejects.toThrow('Path escapes the project directory'); + }); + + it('rejects direct .git access', async () => { + await expect(store.readFile('.git/config')).rejects.toThrow('Path cannot access .git'); + }); +}); diff --git a/packages/context/src/project/local-git-file-store.ts b/packages/context/src/project/local-git-file-store.ts new file mode 100644 index 00000000..969b079b --- /dev/null +++ b/packages/context/src/project/local-git-file-store.ts @@ -0,0 +1,190 @@ +import { promises as fs } from 'node:fs'; +import { dirname, isAbsolute, join, relative, resolve, sep } from 'node:path'; +import type { + GitCommitInfo, + GitService, + KloFileHistoryEntry, + KloFileListResult, + KloFileReadResult, + KloFileStorePort, + KloFileWriteResult, +} from '../core/index.js'; + +export interface LocalGitFileStoreDeps { + rootDir: string; + git: GitService; +} + +function normalizeRelativePath(filePath: string): string { + return filePath.replaceAll('\\', '/').replace(/^\.\/+/, ''); +} + +function gitInfoToWriteResult(info: GitCommitInfo): KloFileWriteResult { + return { + success: true, + commitHash: info.commitHash, + commitMessage: info.message, + author: info.author, + authorEmail: info.authorEmail, + timestamp: info.timestamp, + created: info.created, + }; +} + +export class LocalGitFileStore implements KloFileStorePort { + private readonly rootDir: string; + private readonly git: GitService; + + constructor(deps: LocalGitFileStoreDeps) { + this.rootDir = resolve(deps.rootDir); + this.git = deps.git; + } + + forWorktree(workdir: string): LocalGitFileStore { + return new LocalGitFileStore({ rootDir: workdir, git: this.git.forWorktree(workdir) }); + } + + async writeFile( + path: string, + content: string, + author: string, + authorEmail: string, + commitMessage: string, + options?: { skipLock?: boolean }, + ): Promise { + const relativePath = this.safeRelativePath(path); + const absolutePath = this.absolutePath(relativePath); + await fs.mkdir(dirname(absolutePath), { recursive: true }); + await fs.writeFile(absolutePath, content, 'utf-8'); + + if (options?.skipLock) { + return { success: true, commitHash: null, path: relativePath, operation: 'write' }; + } + + const info = await this.git.commitFile(relativePath, commitMessage, author, authorEmail); + return { ...gitInfoToWriteResult(info), path: relativePath, operation: 'write' }; + } + + async readFile(path: string): Promise { + const relativePath = this.safeRelativePath(path); + const absolutePath = this.absolutePath(relativePath); + const content = await fs.readFile(absolutePath, 'utf-8'); + const stats = await fs.stat(absolutePath); + return { + path: relativePath, + content, + size: stats.size, + modifiedAt: stats.mtime.toISOString(), + }; + } + + async deleteFile( + path: string, + author: string, + authorEmail: string, + commitMessage: string, + options?: { skipLock?: boolean }, + ): Promise { + const relativePath = this.safeRelativePath(path); + const absolutePath = this.absolutePath(relativePath); + try { + await fs.access(absolutePath); + } catch { + return null; + } + + await fs.unlink(absolutePath); + + if (options?.skipLock) { + return { success: true, commitHash: null, path: relativePath, operation: 'delete' }; + } + + const info = await this.git.deleteFile(relativePath, commitMessage, author, authorEmail); + return { ...gitInfoToWriteResult(info), path: relativePath, operation: 'delete' }; + } + + async listFiles(path = '', stripPrefix = false): Promise { + const relativePath = path ? this.safeRelativePath(path) : ''; + const searchRoot = relativePath ? this.absolutePath(relativePath) : this.rootDir; + let files: string[]; + + try { + files = await this.walk(searchRoot); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + return { files: [] }; + } + throw error; + } + + const prefix = relativePath ? `${relativePath}/` : ''; + const relativeFiles = files + .map((file) => normalizeRelativePath(relative(this.rootDir, file))) + .filter((file) => !file.startsWith('.git/') && !file.includes('/.git/')) + .filter((file) => !file.startsWith('.klo/cache/')) + .map((file) => (stripPrefix && prefix && file.startsWith(prefix) ? file.slice(prefix.length) : file)) + .sort(); + + return { files: relativeFiles }; + } + + async getFileHistory(path: string): Promise { + const relativePath = this.safeRelativePath(path); + const history = await this.git.getFileHistory(relativePath); + return history.map((entry) => ({ + sha: entry.commitHash, + commitHash: entry.commitHash, + shortHash: entry.shortHash, + message: entry.message, + author: entry.author, + authorEmail: entry.authorEmail, + timestamp: entry.timestamp, + committedDate: entry.committedDate, + created: entry.created, + enhancedMessage: entry.enhancedMessage, + })); + } + + private safeRelativePath(path: string): string { + if (path.length === 0) { + return ''; + } + if (isAbsolute(path)) { + throw new Error('Path must be relative'); + } + + const normalized = normalizeRelativePath(path); + if (normalized === '.git' || normalized.startsWith('.git/')) { + throw new Error('Path cannot access .git'); + } + + const absolute = resolve(this.rootDir, normalized); + if (absolute !== this.rootDir && !absolute.startsWith(`${this.rootDir}${sep}`)) { + throw new Error('Path escapes the project directory'); + } + + return normalized; + } + + private absolutePath(path: string): string { + return path ? join(this.rootDir, path) : this.rootDir; + } + + private async walk(dir: string): Promise { + const entries = await fs.readdir(dir, { withFileTypes: true }); + const files: string[] = []; + + for (const entry of entries) { + const absolute = join(dir, entry.name); + if (entry.isDirectory()) { + if (entry.name !== '.git') { + files.push(...(await this.walk(absolute))); + } + } else if (entry.isFile()) { + files.push(absolute); + } + } + + return files; + } +} diff --git a/packages/context/src/project/local-state-db.ts b/packages/context/src/project/local-state-db.ts new file mode 100644 index 00000000..f2155780 --- /dev/null +++ b/packages/context/src/project/local-state-db.ts @@ -0,0 +1,6 @@ +import { join } from 'node:path'; +import type { KloLocalProject } from './project.js'; + +export function kloLocalStateDbPath(project: Pick): string { + return join(project.projectDir, '.klo', 'db.sqlite'); +} diff --git a/packages/context/src/project/mappings-yaml-schema.test.ts b/packages/context/src/project/mappings-yaml-schema.test.ts new file mode 100644 index 00000000..6ba31822 --- /dev/null +++ b/packages/context/src/project/mappings-yaml-schema.test.ts @@ -0,0 +1,85 @@ +import { describe, expect, it } from 'vitest'; +import { + parseConnectionMappingBootstrap, + parseLookmlMappingBootstrap, + parseLookerMappingBootstrap, + parseMetabaseMappingBootstrap, +} from './mappings-yaml-schema.js'; + +describe('klo.yaml mapping bootstrap schema', () => { + it('parses Metabase mapping intent with CLI syncMode default ALL', () => { + const bootstrap = parseMetabaseMappingBootstrap('prod-metabase', { + driver: 'metabase', + mappings: { + databaseMappings: { '1': 'prod-warehouse', '2': null }, + syncEnabled: { '1': true, '2': false }, + selections: { collections: [12], items: [345] }, + defaultTagNames: ['klo', 'prod'], + }, + }); + + expect(bootstrap).toEqual({ + adapter: 'metabase', + connectionId: 'prod-metabase', + databaseMappings: { '1': 'prod-warehouse', '2': null }, + syncEnabled: { '1': true, '2': false }, + syncMode: 'ALL', + selections: { collections: [12], items: [345] }, + defaultTagNames: ['klo', 'prod'], + }); + }); + + it('rejects Metabase non-integer mapping keys', () => { + expect(() => + parseMetabaseMappingBootstrap('prod-metabase', { + driver: 'metabase', + mappings: { databaseMappings: { abc: 'warehouse' } }, + }), + ).toThrow(/databaseMappings key "abc" must be a positive integer string/); + }); + + it('parses Looker connection mapping intent', () => { + const bootstrap = parseLookerMappingBootstrap('prod-looker', { + driver: 'looker', + mappings: { + connectionMappings: { + bigquery_prod: 'prod-warehouse', + snowflake_dev: null, + }, + }, + }); + + expect(bootstrap).toEqual({ + adapter: 'looker', + connectionId: 'prod-looker', + connectionMappings: { + bigquery_prod: 'prod-warehouse', + snowflake_dev: null, + }, + }); + }); + + it('parses LookML expected connection from mappings block', () => { + expect( + parseLookmlMappingBootstrap('prod-lookml', { + driver: 'lookml', + repo_url: 'https://github.com/acme/looker.git', + mappings: { expectedLookerConnectionName: 'bigquery_prod' }, + }), + ).toEqual({ + adapter: 'lookml', + connectionId: 'prod-lookml', + expectedLookerConnectionName: 'bigquery_prod', + }); + }); + + it('dispatches by flat driver and returns null for connections with no mappings block', () => { + expect(parseConnectionMappingBootstrap('warehouse', { driver: 'postgres', url: 'env:DATABASE_URL' })).toBeNull(); + expect( + parseConnectionMappingBootstrap('prod-looker', { + driver: 'looker', + mappings: { connectionMappings: { analytics: 'prod-warehouse' } }, + }), + ).toMatchObject({ adapter: 'looker', connectionId: 'prod-looker' }); + }); +}); diff --git a/packages/context/src/project/mappings-yaml-schema.ts b/packages/context/src/project/mappings-yaml-schema.ts new file mode 100644 index 00000000..47e53f53 --- /dev/null +++ b/packages/context/src/project/mappings-yaml-schema.ts @@ -0,0 +1,135 @@ +import * as z from 'zod'; +import type { KloProjectConnectionConfig } from './config.js'; + +const metabaseSyncModeSchema = z.enum(['ALL', 'ONLY', 'EXCEPT']); +const positiveIntegerValueSchema = z.number().int().positive(); +const stringTargetSchema = z.string().min(1).nullable(); + +const metabaseSelectionsSchema = z + .object({ + collections: z.array(positiveIntegerValueSchema).default([]), + items: z.array(positiveIntegerValueSchema).default([]), + }); + +const metabaseMappingsSchema = z + .object({ + databaseMappings: z.record(z.string(), stringTargetSchema).default({}), + syncEnabled: z.record(z.string(), z.boolean()).default({}), + syncMode: metabaseSyncModeSchema.default('ALL'), + selections: metabaseSelectionsSchema.default({ collections: [], items: [] }), + defaultTagNames: z.array(z.string().min(1)).default([]), + }); + +const lookerMappingsSchema = z + .object({ + connectionMappings: z.record(z.string().min(1), stringTargetSchema).default({}), + }); + +const lookmlMappingsSchema = z + .object({ + expectedLookerConnectionName: z.string().min(1).nullable().default(null), + }); + +export type MetabaseMappingBootstrap = { + adapter: 'metabase'; + connectionId: string; + databaseMappings: Record; + syncEnabled: Record; + syncMode: z.infer; + selections: { collections: number[]; items: number[] }; + defaultTagNames: string[]; +}; + +export type LookerMappingBootstrap = { + adapter: 'looker'; + connectionId: string; + connectionMappings: Record; +}; + +export type LookmlMappingBootstrap = { + adapter: 'lookml'; + connectionId: string; + expectedLookerConnectionName: string | null; +}; + +export type ConnectionMappingBootstrap = MetabaseMappingBootstrap | LookerMappingBootstrap | LookmlMappingBootstrap; + +function recordValue(value: unknown): Record { + return typeof value === 'object' && value !== null && !Array.isArray(value) ? (value as Record) : {}; +} + +function assertPositiveIntegerKeys(field: string, record: Record): void { + for (const key of Object.keys(record)) { + if (!/^[1-9]\d*$/.test(key)) { + throw new Error(`${field} key "${key}" must be a positive integer string`); + } + } +} + +function driverOf(connection: KloProjectConnectionConfig): string { + return String(connection.driver ?? '').toLowerCase(); +} + +export function parseMetabaseMappingBootstrap( + connectionId: string, + connection: KloProjectConnectionConfig, +): MetabaseMappingBootstrap { + const rawMappings = recordValue(connection.mappings); + assertPositiveIntegerKeys('databaseMappings', recordValue(rawMappings.databaseMappings)); + assertPositiveIntegerKeys('syncEnabled', recordValue(rawMappings.syncEnabled)); + const parsed = metabaseMappingsSchema.parse(rawMappings); + return { + adapter: 'metabase', + connectionId, + databaseMappings: parsed.databaseMappings, + syncEnabled: parsed.syncEnabled, + syncMode: parsed.syncMode, + selections: parsed.selections, + defaultTagNames: parsed.defaultTagNames, + }; +} + +export function parseLookerMappingBootstrap( + connectionId: string, + connection: KloProjectConnectionConfig, +): LookerMappingBootstrap { + const parsed = lookerMappingsSchema.parse(recordValue(connection.mappings)); + return { + adapter: 'looker', + connectionId, + connectionMappings: parsed.connectionMappings, + }; +} + +export function parseLookmlMappingBootstrap( + connectionId: string, + connection: KloProjectConnectionConfig, +): LookmlMappingBootstrap { + const parsed = lookmlMappingsSchema.parse(recordValue(connection.mappings)); + return { + adapter: 'lookml', + connectionId, + expectedLookerConnectionName: parsed.expectedLookerConnectionName, + }; +} + +export function parseConnectionMappingBootstrap( + connectionId: string, + connection: KloProjectConnectionConfig, +): ConnectionMappingBootstrap | null { + if (!connection.mappings || typeof connection.mappings !== 'object' || Array.isArray(connection.mappings)) { + return null; + } + + const driver = driverOf(connection); + if (driver === 'metabase') { + return parseMetabaseMappingBootstrap(connectionId, connection); + } + if (driver === 'looker') { + return parseLookerMappingBootstrap(connectionId, connection); + } + if (driver === 'lookml') { + return parseLookmlMappingBootstrap(connectionId, connection); + } + return null; +} diff --git a/packages/context/src/project/project.test.ts b/packages/context/src/project/project.test.ts new file mode 100644 index 00000000..a0eb6258 --- /dev/null +++ b/packages/context/src/project/project.test.ts @@ -0,0 +1,78 @@ +import { mkdtemp, readFile, rm, stat } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKloProject, loadKloProject } from './project.js'; + +describe('KLO local project runtime', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-project-runtime-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('initializes the standalone project layout and commits it', async () => { + const projectDir = join(tempDir, 'warehouse'); + + const result = await initKloProject({ + projectDir, + projectName: 'warehouse', + authorName: 'Agent', + authorEmail: 'agent@example.com', + }); + + expect(result.projectDir).toBe(projectDir); + expect(result.config.project).toBe('warehouse'); + expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/); + await expect(readFile(join(projectDir, 'klo.yaml'), 'utf-8')).resolves.toContain('project: warehouse'); + const gitignore = await readFile(join(projectDir, '.klo/.gitignore'), 'utf-8'); + expect(gitignore).toContain('cache/'); + expect(gitignore).toContain('db.sqlite'); + expect(gitignore).toContain('secrets/'); + expect(gitignore).toContain('setup/'); + expect(gitignore).toContain('agents/'); + await expect(stat(join(projectDir, 'knowledge/global/.gitkeep'))).resolves.toBeDefined(); + await expect(stat(join(projectDir, 'semantic-layer/.gitkeep'))).resolves.toBeDefined(); + await expect(stat(join(projectDir, '_schema/.gitkeep'))).rejects.toMatchObject({ code: 'ENOENT' }); + await expect(stat(join(projectDir, 'raw-sources/.gitkeep'))).resolves.toBeDefined(); + await expect(stat(join(projectDir, '.git'))).resolves.toBeDefined(); + }); + + it('loads an initialized project with a working file store', async () => { + const projectDir = join(tempDir, 'warehouse'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + const loaded = await loadKloProject({ projectDir }); + await loaded.fileStore.writeFile( + 'knowledge/global/revenue.md', + '# Revenue\n', + 'Agent', + 'agent@example.com', + 'Add revenue page', + ); + + expect(loaded.config.project).toBe('warehouse'); + await expect(loaded.fileStore.readFile('knowledge/global/revenue.md')).resolves.toMatchObject({ + content: '# Revenue\n', + }); + }); + + it('rejects reinitializing an existing project unless force is set', async () => { + const projectDir = join(tempDir, 'warehouse'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + + await expect(initKloProject({ projectDir, projectName: 'warehouse' })).rejects.toThrow( + 'Project already contains klo.yaml', + ); + + await expect(initKloProject({ projectDir, projectName: 'warehouse-v2', force: true })).resolves.toMatchObject({ + config: { + project: 'warehouse-v2', + }, + }); + }); +}); diff --git a/packages/context/src/project/project.ts b/packages/context/src/project/project.ts new file mode 100644 index 00000000..bec915c1 --- /dev/null +++ b/packages/context/src/project/project.ts @@ -0,0 +1,143 @@ +import { promises as fs } from 'node:fs'; +import { basename, dirname, join, resolve } from 'node:path'; +import { GitService, type KloCoreConfig, type KloLogger, noopLogger } from '../core/index.js'; +import type { KloProjectConfig } from './config.js'; +import { buildDefaultKloProjectConfig, parseKloProjectConfig, serializeKloProjectConfig } from './config.js'; +import { LocalGitFileStore } from './local-git-file-store.js'; + +export interface InitKloProjectOptions { + projectDir: string; + projectName?: string; + force?: boolean; + authorName?: string; + authorEmail?: string; + logger?: KloLogger; +} + +export interface LoadKloProjectOptions { + projectDir: string; + authorName?: string; + authorEmail?: string; + logger?: KloLogger; +} + +export interface KloLocalProject { + projectDir: string; + configPath: string; + config: KloProjectConfig; + coreConfig: KloCoreConfig; + git: GitService; + fileStore: LocalGitFileStore; +} + +export interface InitKloProjectResult extends KloLocalProject { + commitHash: string | null; +} + +const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [ + { path: '.klo/.gitignore', content: 'cache/\ndb.sqlite\nsecrets/\nsetup/\nagents/\n' }, + { path: '.klo/prompts/.gitkeep', content: '' }, + { path: '.klo/skills/.gitkeep', content: '' }, + { path: 'knowledge/global/.gitkeep', content: '' }, + { path: 'semantic-layer/.gitkeep', content: '' }, + { path: 'raw-sources/.gitkeep', content: '' }, +]; + +function createCoreConfig(projectDir: string, authorName: string, authorEmail: string): KloCoreConfig { + return { + storage: { + configDir: projectDir, + homeDir: dirname(projectDir), + worktreesDir: join(projectDir, '.klo/worktrees'), + }, + git: { + userName: authorName, + userEmail: authorEmail, + bootstrapMessage: 'Initialize klo project repository', + bootstrapAuthor: authorName, + bootstrapAuthorEmail: authorEmail, + }, + }; +} + +async function fileExists(path: string): Promise { + try { + await fs.access(path); + return true; + } catch { + return false; + } +} + +async function writeProjectFile(projectDir: string, relativePath: string, content: string): Promise { + const absolutePath = join(projectDir, relativePath); + await fs.mkdir(dirname(absolutePath), { recursive: true }); + await fs.writeFile(absolutePath, content, 'utf-8'); +} + +async function createRuntime( + projectDir: string, + config: KloProjectConfig, + authorName: string, + authorEmail: string, + logger: KloLogger, +): Promise { + const coreConfig = createCoreConfig(projectDir, authorName, authorEmail); + const git = new GitService(coreConfig, logger); + await git.onModuleInit(); + + return { + projectDir, + configPath: join(projectDir, 'klo.yaml'), + config, + coreConfig, + git, + fileStore: new LocalGitFileStore({ rootDir: projectDir, git }), + }; +} + +export async function initKloProject(options: InitKloProjectOptions): Promise { + const projectDir = resolve(options.projectDir); + const projectName = options.projectName?.trim() || basename(projectDir) || 'klo-project'; + const authorName = options.authorName ?? 'klo'; + const authorEmail = options.authorEmail ?? 'klo@example.com'; + const logger = options.logger ?? noopLogger; + const configPath = join(projectDir, 'klo.yaml'); + + await fs.mkdir(projectDir, { recursive: true }); + if (!options.force && (await fileExists(configPath))) { + throw new Error(`Project already contains klo.yaml: ${configPath}`); + } + + const config = buildDefaultKloProjectConfig(projectName); + const runtime = await createRuntime(projectDir, config, authorName, authorEmail, logger); + + await writeProjectFile(projectDir, 'klo.yaml', serializeKloProjectConfig(config)); + await fs.mkdir(join(projectDir, '.klo/cache'), { recursive: true }); + for (const file of TRACKED_SCAFFOLD_FILES) { + await writeProjectFile(projectDir, file.path, file.content); + } + + const commit = await runtime.git.commitFiles( + ['klo.yaml', ...TRACKED_SCAFFOLD_FILES.map((file) => file.path)], + `Initialize KLO project: ${projectName}`, + authorName, + authorEmail, + ); + + return { + ...runtime, + commitHash: commit.commitHash, + }; +} + +export async function loadKloProject(options: LoadKloProjectOptions): Promise { + const projectDir = resolve(options.projectDir); + const authorName = options.authorName ?? 'klo'; + const authorEmail = options.authorEmail ?? 'klo@example.com'; + const logger = options.logger ?? noopLogger; + const configPath = join(projectDir, 'klo.yaml'); + const raw = await fs.readFile(configPath, 'utf-8'); + const config = parseKloProjectConfig(raw); + return createRuntime(projectDir, config, authorName, authorEmail, logger); +} diff --git a/packages/context/src/project/setup-config.test.ts b/packages/context/src/project/setup-config.test.ts new file mode 100644 index 00000000..f5f9e016 --- /dev/null +++ b/packages/context/src/project/setup-config.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from 'vitest'; +import { buildDefaultKloProjectConfig } from './config.js'; +import { + markKloSetupStepComplete, + mergeKloSetupGitignoreEntries, + setKloSetupDatabaseConnectionIds, +} from './setup-config.js'; + +describe('KLO setup config helpers', () => { + it('marks setup steps complete without duplicating existing state', () => { + const config = buildDefaultKloProjectConfig('warehouse'); + + const withProject = markKloSetupStepComplete(config, 'project'); + const withProjectAgain = markKloSetupStepComplete(withProject, 'project'); + const withLlm = markKloSetupStepComplete(withProjectAgain, 'llm'); + const withContext = markKloSetupStepComplete(withLlm, 'context'); + + expect(withProject.setup).toEqual({ + database_connection_ids: [], + completed_steps: ['project'], + }); + expect(withProjectAgain.setup?.completed_steps).toEqual(['project']); + expect(withLlm.setup?.completed_steps).toEqual(['project', 'llm']); + expect(withContext.setup?.completed_steps).toEqual(['project', 'llm', 'context']); + expect(config.setup).toBeUndefined(); + }); + + it('preserves database connection ids while marking a step complete', () => { + const config = { + ...buildDefaultKloProjectConfig('warehouse'), + setup: { + database_connection_ids: ['warehouse'], + completed_steps: ['databases'], + }, + }; + + expect(markKloSetupStepComplete(config, 'project').setup).toEqual({ + database_connection_ids: ['warehouse'], + completed_steps: ['databases', 'project'], + }); + }); + + it('sets setup database connection ids without duplicates', () => { + const config = buildDefaultKloProjectConfig('warehouse'); + + const withDatabases = setKloSetupDatabaseConnectionIds(config, ['warehouse', 'analytics', 'warehouse']); + + expect(withDatabases.setup).toEqual({ + database_connection_ids: ['warehouse', 'analytics'], + completed_steps: [], + }); + expect(config.setup).toBeUndefined(); + }); + + it('marks databases complete only when requested', () => { + const config = markKloSetupStepComplete(buildDefaultKloProjectConfig('warehouse'), 'project'); + + const withDatabases = setKloSetupDatabaseConnectionIds(config, ['warehouse'], { complete: true }); + const withDatabasesAgain = setKloSetupDatabaseConnectionIds(withDatabases, ['warehouse'], { complete: true }); + + expect(withDatabases.setup).toEqual({ + database_connection_ids: ['warehouse'], + completed_steps: ['project', 'databases'], + }); + expect(withDatabasesAgain.setup).toEqual(withDatabases.setup); + }); + + it('merges setup-local gitignore entries without removing existing lines', () => { + expect(mergeKloSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe( + ['cache/', 'db.sqlite', 'secrets/', 'setup/', 'agents/', ''].join('\n'), + ); + expect(mergeKloSetupGitignoreEntries('cache/\nsecrets/\n')).toBe( + ['cache/', 'secrets/', 'setup/', 'agents/', ''].join('\n'), + ); + }); +}); diff --git a/packages/context/src/project/setup-config.ts b/packages/context/src/project/setup-config.ts new file mode 100644 index 00000000..c916a9b7 --- /dev/null +++ b/packages/context/src/project/setup-config.ts @@ -0,0 +1,55 @@ +import type { KloProjectConfig } from './config.js'; + +export const KLO_SETUP_STEPS = ['project', 'llm', 'embeddings', 'databases', 'sources', 'context', 'agents'] as const; + +export type KloSetupStep = (typeof KLO_SETUP_STEPS)[number]; + +const SETUP_GITIGNORE_ENTRIES = ['secrets/', 'setup/', 'agents/'] as const; + +export function markKloSetupStepComplete(config: KloProjectConfig, step: KloSetupStep): KloProjectConfig { + const databaseConnectionIds = config.setup?.database_connection_ids ?? []; + const completedSteps = config.setup?.completed_steps ?? []; + return { + ...config, + setup: { + database_connection_ids: [...databaseConnectionIds], + completed_steps: completedSteps.includes(step) ? [...completedSteps] : [...completedSteps, step], + }, + }; +} + +export function setKloSetupDatabaseConnectionIds( + config: KloProjectConfig, + connectionIds: string[], + options: { complete?: boolean } = {}, +): KloProjectConfig { + const uniqueConnectionIds = [...new Set(connectionIds.filter((connectionId) => connectionId.trim().length > 0))]; + const completedSteps = config.setup?.completed_steps ?? []; + const nextCompletedSteps = + options.complete === true && !completedSteps.includes('databases') + ? [...completedSteps, 'databases'] + : [...completedSteps]; + + return { + ...config, + setup: { + database_connection_ids: uniqueConnectionIds, + completed_steps: nextCompletedSteps, + }, + }; +} + +export function mergeKloSetupGitignoreEntries(content: string): string { + const lines = content + .split(/\r?\n/) + .map((line) => line.trimEnd()) + .filter((line, index, all) => line.length > 0 || index < all.length - 1); + const existing = new Set(lines); + for (const entry of SETUP_GITIGNORE_ENTRIES) { + if (!existing.has(entry)) { + lines.push(entry); + existing.add(entry); + } + } + return `${lines.join('\n')}\n`; +} diff --git a/packages/context/src/prompts/index.ts b/packages/context/src/prompts/index.ts new file mode 100644 index 00000000..c70cbfc2 --- /dev/null +++ b/packages/context/src/prompts/index.ts @@ -0,0 +1,2 @@ +export type { PromptContext, PromptServiceOptions } from './prompt.service.js'; +export { PromptService } from './prompt.service.js'; diff --git a/packages/context/src/prompts/prompt.service.test.ts b/packages/context/src/prompts/prompt.service.test.ts new file mode 100644 index 00000000..78ec979b --- /dev/null +++ b/packages/context/src/prompts/prompt.service.test.ts @@ -0,0 +1,54 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { PromptService } from './prompt.service.js'; + +describe('PromptService', () => { + let dir: string; + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'klo-prompts-')); + }); + + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it('loads prompt files from the configured prompt directory', async () => { + await writeFile(join(dir, 'hello.md'), 'Hello {{name}}', 'utf-8'); + const service = new PromptService({ promptsDir: dir, partials: [] }); + + await expect(service.loadPrompt('hello')).resolves.toBe('Hello {{name}}'); + }); + + it('loads prompts from additional directories when the primary directory misses', async () => { + const extraDir = await mkdtemp(join(tmpdir(), 'klo-prompts-extra-')); + try { + await writeFile(join(extraDir, 'memory_agent_research.md'), 'Packaged memory prompt', 'utf-8'); + const service = new PromptService({ promptsDir: dir, additionalPromptDirs: [extraDir], partials: [] }); + + await expect(service.loadPrompt('memory_agent_research')).resolves.toBe( + 'Packaged memory prompt', + ); + } finally { + await rm(extraDir, { recursive: true, force: true }); + } + }); + + it('formats prompts with default settings and context settings', async () => { + await writeFile(join(dir, 'settings.md'), '{{settings.flag}} {{settings.mode}} {{name}}', 'utf-8'); + const service = new PromptService({ + promptsDir: dir, + partials: [], + defaultSettings: { flag: true, mode: 'default' }, + }); + + const rendered = await service.formatPrompt('settings', { + name: 'Ada', + settings: { mode: 'override' }, + }); + + expect(rendered).toBe('true override Ada'); + }); +}); diff --git a/packages/context/src/prompts/prompt.service.ts b/packages/context/src/prompts/prompt.service.ts new file mode 100644 index 00000000..f9939b97 --- /dev/null +++ b/packages/context/src/prompts/prompt.service.ts @@ -0,0 +1,108 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import Handlebars from 'handlebars'; +import { type KloLogger, noopLogger } from '../core/index.js'; + +export interface PromptContext { + current_date?: string; + business_rules?: string; + datasource_description?: string; + tables_and_columns_summary?: string; + metadata?: string; + settings?: Record; + [key: string]: unknown; +} + +export interface PromptServiceOptions { + promptsDir: string; + additionalPromptDirs?: string[]; + defaultSettings?: Record; + partials?: string[]; + logger?: KloLogger; +} + +export class PromptService { + private readonly logger: KloLogger; + private readonly partials: string[]; + private partialsRegistered = false; + + constructor(private readonly options: PromptServiceOptions) { + this.logger = options.logger ?? noopLogger; + this.partials = options.partials ?? ['clinical_policy']; + Handlebars.registerHelper('eq', (a: unknown, b: unknown) => a === b); + Handlebars.registerHelper('json', (context: unknown) => JSON.stringify(context, null, 2)); + Handlebars.registerHelper('truncate', (str: string, len: number) => + typeof str === 'string' && str.length > len ? `${str.substring(0, len)}...` : str, + ); + Handlebars.registerHelper('addOne', (index: number) => index + 1); + this.logger.log(`Prompt service initialized with directory: ${options.promptsDir}`); + } + + private promptDirs(): string[] { + return [this.options.promptsDir, ...(this.options.additionalPromptDirs ?? [])]; + } + + private async ensurePartials(): Promise { + if (this.partialsRegistered) { + return; + } + for (const name of this.partials) { + let registered = false; + for (const promptsDir of this.promptDirs()) { + try { + const content = await readFile(join(promptsDir, `${name}.md`), 'utf-8'); + Handlebars.registerPartial(name, content); + registered = true; + break; + } catch {} + } + if (!registered) { + this.logger.warn(`Could not register ${name} partial`); + } + } + this.partialsRegistered = true; + } + + async loadPrompt(promptName: string, extension = 'md'): Promise { + const tried: string[] = []; + for (const promptsDir of this.promptDirs()) { + const promptFile = join(promptsDir, `${promptName}.${extension}`); + tried.push(promptFile); + try { + const content = await readFile(promptFile, 'utf-8'); + this.logger.debug(`Loaded prompt template: ${promptName}.${extension}`); + return content; + } catch {} + } + + const paths = tried.join(', '); + this.logger.error(`Prompt file not found: ${paths}`); + throw new Error(`Prompt file not found in any configured directory: ${paths}`); + } + + async formatPrompt(promptName: string, context: PromptContext): Promise { + await this.ensurePartials(); + try { + const fullContext: PromptContext = { + current_date: context.current_date || new Date().toISOString().split('T')[0], + business_rules: context.business_rules || '', + ...context, + settings: { + ...this.options.defaultSettings, + ...context.settings, + }, + }; + + const templateSource = await this.loadPrompt(promptName); + const template = Handlebars.compile(templateSource, { noEscape: true }); + const rendered = template(fullContext); + + this.logger.debug(`Formatted prompt: ${promptName} (${rendered.length} chars)`); + return rendered; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + this.logger.error(`Error formatting prompt ${promptName}: ${errorMessage}`); + throw new Error(`Failed to format prompt ${promptName}: ${errorMessage}`); + } + } +} diff --git a/packages/context/src/scan/credentials.test.ts b/packages/context/src/scan/credentials.test.ts new file mode 100644 index 00000000..1a8ae1a7 --- /dev/null +++ b/packages/context/src/scan/credentials.test.ts @@ -0,0 +1,183 @@ +import { describe, expect, it } from 'vitest'; +import { + REDACTED_KLO_CREDENTIAL_VALUE, + redactKloCredentialEnvelope, + redactKloCredentialValue, + redactKloScanMetadata, + redactKloScanReport, + redactKloScanWarning, +} from './credentials.js'; +import type { KloCredentialEnvelope, KloScanReport, KloScanWarning } from './types.js'; + +describe('KLO scan credential redaction', () => { + it('keeps credential references inspectable', () => { + const envReference: KloCredentialEnvelope = { kind: 'env', name: 'DATABASE_URL' }; + const fileReference: KloCredentialEnvelope = { kind: 'file', path: '~/.config/klo/warehouse' }; + + expect(redactKloCredentialEnvelope(envReference)).toEqual(envReference); + expect(redactKloCredentialEnvelope(fileReference)).toEqual(fileReference); + }); + + it('redacts resolved credential envelope values recursively', () => { + expect( + redactKloCredentialEnvelope({ + kind: 'resolved', + source: 'host', + values: { + username: 'readonly', + password: 'secret-password', // pragma: allowlist secret + nested: { + api_key: 'phx_123', // pragma: allowlist secret + warehouse: 'compute_wh', + }, + headers: [{ authorizationToken: 'token-value' }, { label: 'safe' }], + }, + }), + ).toEqual({ + kind: 'resolved', + source: 'host', + redacted: true, + values: { + username: 'readonly', + password: REDACTED_KLO_CREDENTIAL_VALUE, + nested: { + api_key: REDACTED_KLO_CREDENTIAL_VALUE, + warehouse: 'compute_wh', + }, + headers: [{ authorizationToken: REDACTED_KLO_CREDENTIAL_VALUE }, { label: 'safe' }], + }, + }); + }); + + it('redacts scan metadata fields that commonly contain secrets', () => { + expect( + redactKloScanMetadata({ + driver: 'postgres', + url: 'postgres://user:pass@example.test/db', // pragma: allowlist secret + serviceAccountJson: { + client_email: 'reader@example.test', + private_key: 'pem-value', // pragma: allowlist secret + }, + safeCount: 3, + }), + ).toEqual({ + driver: 'postgres', + url: REDACTED_KLO_CREDENTIAL_VALUE, + serviceAccountJson: { + client_email: 'reader@example.test', + private_key: REDACTED_KLO_CREDENTIAL_VALUE, + }, + safeCount: 3, + }); + }); + + it('redacts scan warning messages and metadata without hiding safe context', () => { + const warning: KloScanWarning = { + code: 'sampling_failed', + message: 'sample failed for postgres://reader:secret@example.test/db', // pragma: allowlist secret + recoverable: true, + metadata: { + table: 'orders', + url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret + nested: { + api_key: 'sk_test_123', // pragma: allowlist secret + schema: 'public', + }, + }, + }; + + expect(redactKloScanWarning(warning)).toEqual({ + code: 'sampling_failed', + message: 'sample failed for postgres://reader:@example.test/db', + recoverable: true, + metadata: { + table: 'orders', + url: REDACTED_KLO_CREDENTIAL_VALUE, + nested: { + api_key: REDACTED_KLO_CREDENTIAL_VALUE, + schema: 'public', + }, + }, + }); + }); + + it('redacts scan report warning metadata recursively', () => { + const report: KloScanReport = { + connectionId: 'warehouse', + driver: 'postgres', + syncId: 'sync-1', + runId: 'run-1', + trigger: 'cli', + mode: 'structural', + dryRun: false, + artifactPaths: { + rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1', + reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', + manifestShards: [], + enrichmentArtifacts: [], + }, + diffSummary: { + tablesAdded: 0, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 0, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 0, + structuralSyncStats: { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, + }, + enrichment: { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'skipped', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', + }, + capabilityGaps: [], + warnings: [ + { + code: 'credential_redacted', + message: 'metadata redacted', + recoverable: true, + metadata: { + credentials_json: '{"private_key":"pem-value"}', // pragma: allowlist secret + safeCount: 2, + }, + }, + ], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + enrichmentState: { + resumedStages: [], + completedStages: [], + failedStages: [], + }, + createdAt: '2026-04-29T00:00:00.000Z', + }; + + const redacted = redactKloScanReport(report); + + expect(redacted.warnings[0]?.metadata).toEqual({ + credentials_json: REDACTED_KLO_CREDENTIAL_VALUE, + safeCount: 2, + }); + expect(report.warnings[0]?.metadata).toEqual({ + credentials_json: '{"private_key":"pem-value"}', // pragma: allowlist secret + safeCount: 2, + }); + }); + + it('redacts standalone primitive credential values only when the field key is sensitive', () => { + expect(redactKloCredentialValue('password', 'abc')).toBe(REDACTED_KLO_CREDENTIAL_VALUE); + expect(redactKloCredentialValue('schema', 'public')).toBe('public'); + }); +}); diff --git a/packages/context/src/scan/credentials.ts b/packages/context/src/scan/credentials.ts new file mode 100644 index 00000000..bd75332a --- /dev/null +++ b/packages/context/src/scan/credentials.ts @@ -0,0 +1,50 @@ +import { + redactKloSensitiveMetadata, + redactKloSensitiveText, + redactKloSensitiveValue, + REDACTED_KLO_CREDENTIAL_VALUE, +} from '../core/redaction.js'; +import type { KloCredentialEnvelope, KloScanReport, KloScanWarning } from './types.js'; + +export { REDACTED_KLO_CREDENTIAL_VALUE }; + +export function redactKloCredentialValue(key: string, value: unknown): unknown { + return redactKloSensitiveValue(key, value); +} + +export function redactKloScanMetadata(metadata: Record): Record { + return redactKloSensitiveMetadata(metadata); +} + +export function redactKloCredentialEnvelope(envelope: KloCredentialEnvelope): KloCredentialEnvelope { + if (envelope.kind !== 'resolved') { + return envelope; + } + return { + kind: 'resolved', + source: envelope.source, + redacted: true, + values: redactKloScanMetadata(envelope.values), + }; +} + +export function redactKloScanWarning(warning: KloScanWarning): KloScanWarning { + if (!warning.metadata) { + return { + ...warning, + message: redactKloSensitiveText(warning.message), + }; + } + return { + ...warning, + message: redactKloSensitiveText(warning.message), + metadata: redactKloScanMetadata(warning.metadata), + }; +} + +export function redactKloScanReport(report: KloScanReport): KloScanReport { + return { + ...report, + warnings: report.warnings.map((warning) => redactKloScanWarning(warning)), + }; +} diff --git a/packages/context/src/scan/data-dictionary.test.ts b/packages/context/src/scan/data-dictionary.test.ts new file mode 100644 index 00000000..0f5d6d5e --- /dev/null +++ b/packages/context/src/scan/data-dictionary.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, it } from 'vitest'; +import { + defaultKloDataDictionarySettings, + isKloDataDictionaryCandidate, + shouldKloSampleColumnForDictionary, +} from './data-dictionary.js'; + +const defaultPatterns = defaultKloDataDictionarySettings.excludePatterns; + +describe('KLO scan data dictionary policy', () => { + it('includes text-like and boolean categorical types', () => { + expect(isKloDataDictionaryCandidate('varchar(50)', 'status', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('VARCHAR', 'category', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('text', 'region', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('string', 'payment_method', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('nvarchar(100)', 'tier', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('enum', 'status', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('boolean', 'active', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('bool', 'verified', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('character varying(50)', 'region', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('character(1)', 'flag', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('ntext', 'category', defaultPatterns)).toBe(true); + }); + + it('excludes non-categorical primitive types', () => { + expect(isKloDataDictionaryCandidate('integer', 'count', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('bigint', 'total', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('timestamp', 'created', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('date', 'birth', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('numeric', 'amount', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('decimal(10,2)', 'price', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('float', 'rate', defaultPatterns)).toBe(false); + }); + + it('excludes configured high-cardinality or sensitive name patterns', () => { + expect(isKloDataDictionaryCandidate('varchar', 'user_id', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'session_uuid', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'api_key', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'password_hash', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'auth_token', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'id', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'created_at', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'birth_date', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('text', 'description', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('text', 'email_body', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'image_url', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'email', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'phone_number', defaultPatterns)).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'street_address', defaultPatterns)).toBe(false); + }); + + it('keeps business categorical names eligible', () => { + expect(isKloDataDictionaryCandidate('varchar', 'status', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'region', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'country', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'payment_method', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'currency', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'plan', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'category', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'tier', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'gender', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'language', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'order_type', defaultPatterns)).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'order_status', defaultPatterns)).toBe(true); + }); + + it('respects host-provided exclusion patterns and skips invalid regex patterns', () => { + expect(isKloDataDictionaryCandidate('varchar', 'company_size', ['company'])).toBe(false); + expect(isKloDataDictionaryCandidate('varchar', 'status', ['company'])).toBe(true); + expect(isKloDataDictionaryCandidate('varchar', 'status', ['[invalid', '(unclosed'])).toBe(true); + }); + + it('skips columns that already have persisted dictionary state', () => { + expect( + shouldKloSampleColumnForDictionary({ + columnType: 'varchar', + columnName: 'status', + sampleValues: ['paid'], + cardinality: null, + settings: defaultKloDataDictionarySettings, + }), + ).toEqual({ sample: false, reason: 'already_populated' }); + + expect( + shouldKloSampleColumnForDictionary({ + columnType: 'varchar', + columnName: 'empty_status', + sampleValues: null, + cardinality: 0, + settings: defaultKloDataDictionarySettings, + }), + ).toEqual({ sample: false, reason: 'empty_column' }); + + expect( + shouldKloSampleColumnForDictionary({ + columnType: 'varchar', + columnName: 'customer_name', + sampleValues: null, + cardinality: 300, + settings: defaultKloDataDictionarySettings, + }), + ).toEqual({ sample: false, reason: 'high_cardinality' }); + + expect( + shouldKloSampleColumnForDictionary({ + columnType: 'varchar', + columnName: 'status', + sampleValues: null, + cardinality: null, + settings: defaultKloDataDictionarySettings, + }), + ).toEqual({ sample: true }); + }); +}); diff --git a/packages/context/src/scan/data-dictionary.ts b/packages/context/src/scan/data-dictionary.ts new file mode 100644 index 00000000..e8aff474 --- /dev/null +++ b/packages/context/src/scan/data-dictionary.ts @@ -0,0 +1,109 @@ +export interface KloDataDictionarySettings { + cardinalityThreshold: number; + maxValuesToStore: number; + sampleSize: number; + useDbStatistics: boolean; + excludePatterns: string[]; +} + +export const defaultKloDataDictionarySettings: KloDataDictionarySettings = { + cardinalityThreshold: 200, + maxValuesToStore: 100, + sampleSize: 10000, + useDbStatistics: true, + excludePatterns: [ + '_id$', + '_uuid$', + '_key$', + '_hash$', + '_token$', + '^id$', + '^uuid$', + '_at$', + '_date$', + '_time$', + 'description$', + 'comment$', + 'notes?$', + 'message$', + 'body$', + 'content$', + '_url$', + '_path$', + 'email$', + '^phone', + 'address$', + ], +}; + +export type KloDataDictionarySkipReason = + | 'not_candidate' + | 'already_populated' + | 'empty_column' + | 'high_cardinality'; + +export interface KloDataDictionarySampleDecision { + sample: boolean; + reason?: KloDataDictionarySkipReason; +} + +export interface KloDataDictionaryColumnState { + columnType: string; + columnName: string; + sampleValues?: readonly string[] | null; + cardinality?: number | null; + settings: KloDataDictionarySettings; +} + +const categoricalCandidateTypes = /^(n?varchar|n?char|n?text|string|character|enum|bool(ean)?)/i; + +export function isKloDataDictionaryCandidate( + columnType: string, + columnName: string, + excludePatterns: readonly string[] = defaultKloDataDictionarySettings.excludePatterns, +): boolean { + const typeLower = columnType.toLowerCase(); + const nameLower = columnName.toLowerCase(); + + if (!categoricalCandidateTypes.test(typeLower)) { + return false; + } + + for (const patternText of excludePatterns) { + try { + const pattern = new RegExp(patternText, 'i'); + if (pattern.test(nameLower)) { + return false; + } + } catch { + continue; + } + } + + return true; +} + +export function shouldKloSampleColumnForDictionary( + input: KloDataDictionaryColumnState, +): KloDataDictionarySampleDecision { + const sampleValues = input.sampleValues ?? null; + const cardinality = input.cardinality ?? null; + + if (sampleValues && sampleValues.length > 0) { + return { sample: false, reason: 'already_populated' }; + } + + if (cardinality === 0) { + return { sample: false, reason: 'empty_column' }; + } + + if (cardinality !== null && cardinality > input.settings.cardinalityThreshold) { + return { sample: false, reason: 'high_cardinality' }; + } + + if (!isKloDataDictionaryCandidate(input.columnType, input.columnName, input.settings.excludePatterns)) { + return { sample: false, reason: 'not_candidate' }; + } + + return { sample: true }; +} diff --git a/packages/context/src/scan/description-generation.test.ts b/packages/context/src/scan/description-generation.test.ts new file mode 100644 index 00000000..5eaefc34 --- /dev/null +++ b/packages/context/src/scan/description-generation.test.ts @@ -0,0 +1,318 @@ +import { describe, expect, it, vi } from 'vitest'; + +vi.mock('ai', async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, generateText: vi.fn() }; +}); + +import { generateText } from 'ai'; +import { + buildKloColumnDescriptionPrompt, + buildKloDataSourceDescriptionPrompt, + buildKloTableDescriptionPrompt, + type KloDescriptionCachePort, + KloDescriptionGenerator, +} from './description-generation.js'; +import { createKloConnectorCapabilities, type KloScanConnector } from './types.js'; + +function createCache(initial: Record = {}): KloDescriptionCachePort { + const data = new Map(Object.entries(initial)); + return { + buildTableKey: (table) => [table.catalog, table.db, table.name].filter(Boolean).join('.'), + buildColumnKey: (table, columnName) => [table.catalog, table.db, table.name, columnName].filter(Boolean).join('.'), + buildConnectionKey: (connectionName) => `__connection:${connectionName}`, + get: vi.fn(async (key: string) => data.get(key) ?? null), + set: vi.fn(async (key: string, value: string) => { + data.set(key, value); + }), + }; +} + +function createLlmProvider(text = 'generated description') { + vi.mocked(generateText).mockResolvedValue({ text } as never); + return { + getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }), + getModelByName: vi.fn(), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(), + telemetryConfig: vi.fn(), + promptCachingConfig: vi.fn(() => ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + })), + activeBackend: vi.fn(() => 'anthropic'), + } as any; +} + +function createConnector(): KloScanConnector { + return { + id: 'test-connector', + driver: 'postgres', + capabilities: createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + nestedAnalysis: true, + }), + introspect: vi.fn(async () => { + throw new Error('introspection is not used by description generation'); + }), + sampleColumn: vi.fn(async () => ({ + values: ['paid', 'refunded', null], + nullCount: 1, + distinctCount: 2, + })), + sampleTable: vi.fn(async () => ({ + headers: ['id', 'status', 'amount'], + rows: [ + [1, 'paid', 20], + [2, 'refunded', 10], + ], + totalRows: 2, + })), + }; +} + +describe('KLO description prompt builders', () => { + it('builds column prompts with sample values, source descriptions, and nested BigQuery guidance', () => { + const prompt = buildKloColumnDescriptionPrompt({ + columnName: 'payload', + columnValues: [{ nested: true }, '[1,2]'], + tableContext: 'Table: events | Columns: payload | Data source: BIGQUERY', + dataSourceType: 'BIGQUERY', + supportsNestedAnalysis: true, + rawDescriptions: { db: 'Raw event payload', ai: 'Old AI text', user: 'User text' }, + }); + + expect(prompt).toContain( + ' Table: events | Columns: payload | Data source: BIGQUERY ', + ); + expect(prompt).toContain(' payload '); + expect(prompt).toContain(' [object Object], [1,2] '); + expect(prompt).toContain(' Raw event payload '); + expect(prompt).not.toContain('Old AI text'); + expect(prompt).not.toContain('User text'); + expect(prompt).toContain('nested/structured data'); + }); + + it('builds table and data-source prompts from sampled rows', () => { + const sample = { + headers: ['id', 'status'], + rows: [ + [1, 'paid'], + [2, 'refunded'], + ], + totalRows: 2, + }; + + expect( + buildKloTableDescriptionPrompt({ + tableName: 'orders', + sampleData: sample, + dataSourceType: 'POSTGRESQL', + rawDescriptions: { dbt: 'Fact table for commerce orders' }, + }), + ).toContain('status: paid, refunded'); + + expect( + buildKloDataSourceDescriptionPrompt({ + tableSamples: [['orders', sample]], + dataSourceType: 'POSTGRESQL', + }), + ).toContain('orders (2 columns, 2 sample rows)'); + }); +}); + +describe('KloDescriptionGenerator', () => { + it('generates column descriptions with pre-fetched values, cache hits, and word-limit metadata', async () => { + const cache = createCache({ 'warehouse.public.orders.cached_status': 'Cached status description' }); + const llmProvider = createLlmProvider('Payment state'); + const connector = createConnector(); + const generator = new KloDescriptionGenerator({ + llmProvider, + cache, + settings: { + columnMaxWords: 12, + tableMaxWords: 18, + dataSourceMaxWords: 24, + temperature: 0.2, + concurrencyLimit: 2, + }, + }); + + const result = await generator.generateColumnDescriptions({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + supportsNestedAnalysis: false, + table: { + catalog: 'warehouse', + db: 'public', + name: 'orders', + columns: [ + { name: 'status', sampleValues: ['paid', 'refunded'], rawDescriptions: { db: 'Payment lifecycle' } }, + { name: 'cached_status', sampleValues: ['open'] }, + ], + }, + skipExisting: false, + existingDescriptions: {}, + }); + + expect(result).toEqual({ + columnDescriptions: [ + ['status', 'Payment state'], + ['cached_status', 'Cached status description'], + ], + processedColumns: ['status'], + skippedColumns: ['cached_status'], + }); + expect(connector.sampleColumn).not.toHaveBeenCalled(); + expect(generateText).toHaveBeenCalledWith( + expect.objectContaining({ + temperature: 0.2, + messages: expect.arrayContaining([ + expect.objectContaining({ + role: 'user', + content: expect.stringContaining('Please provide a concise description in 12 words or less.'), + }), + ]), + }), + ); + }); + + it('samples through the connector when column values are not pre-fetched', async () => { + const connector = createConnector(); + const generator = new KloDescriptionGenerator({ + llmProvider: createLlmProvider('Current order state'), + settings: { + columnMaxWords: 12, + tableMaxWords: 18, + dataSourceMaxWords: 24, + }, + }); + + const result = await generator.generateColumnDescriptions({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + supportsNestedAnalysis: false, + table: { + catalog: null, + db: 'public', + name: 'orders', + columns: [{ name: 'status' }], + }, + }); + + expect(connector.sampleColumn).toHaveBeenCalledWith( + { + connectionId: 'conn-1', + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + limit: 50, + }, + { runId: 'run-1' }, + ); + expect(result.columnDescriptions).toEqual([['status', 'Current order state']]); + }); + + it('samples through a description sampling port without requiring structural introspection', async () => { + const sampler = { + id: 'description-sampler:conn-1', + sampleColumn: vi.fn(async () => ({ + values: ['paid', 'refunded'], + nullCount: null, + distinctCount: null, + })), + sampleTable: vi.fn(async () => ({ + headers: ['id', 'status'], + rows: [[1, 'paid']], + totalRows: 1, + })), + }; + const generator = new KloDescriptionGenerator({ + llmProvider: createLlmProvider('Generated through sampler'), + settings: { + columnMaxWords: 12, + tableMaxWords: 18, + dataSourceMaxWords: 24, + }, + }); + + const result = await generator.generateColumnDescriptions({ + connectionId: 'conn-1', + connector: sampler, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + supportsNestedAnalysis: false, + table: { + catalog: null, + db: 'public', + name: 'orders', + columns: [{ name: 'status' }], + }, + }); + + expect(result.columnDescriptions).toEqual([['status', 'Generated through sampler']]); + expect(sampler.sampleColumn).toHaveBeenCalledWith( + { + connectionId: 'conn-1', + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + limit: 50, + }, + { runId: 'run-1' }, + ); + expect('introspect' in sampler).toBe(false); + }); + + it('generates and caches table and data-source descriptions', async () => { + const cache = createCache(); + const connector = createConnector(); + const generator = new KloDescriptionGenerator({ + llmProvider: createLlmProvider('Commerce orders'), + cache, + settings: { + columnMaxWords: 12, + tableMaxWords: 18, + dataSourceMaxWords: 24, + concurrencyLimit: 2, + }, + }); + + await expect( + generator.generateTableDescription({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + table: { catalog: 'warehouse', db: 'public', name: 'orders', rawDescriptions: { db: 'Raw orders' } }, + }), + ).resolves.toBe('Commerce orders'); + + await expect( + generator.generateDataSourceDescription({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + tables: [ + { catalog: 'warehouse', db: 'public', name: 'orders' }, + { catalog: 'warehouse', db: 'public', name: 'customers' }, + ], + connectionName: 'Warehouse', + }), + ).resolves.toBe('Commerce orders'); + + expect(cache.set).toHaveBeenCalledWith('warehouse.public.orders', 'Commerce orders'); + expect(cache.set).toHaveBeenCalledWith('__connection:Warehouse', 'Commerce orders'); + }); +}); diff --git a/packages/context/src/scan/description-generation.ts b/packages/context/src/scan/description-generation.ts new file mode 100644 index 00000000..193544cb --- /dev/null +++ b/packages/context/src/scan/description-generation.ts @@ -0,0 +1,582 @@ +import type { KloLlmProvider } from '@klo/llm'; +import { generateKloText } from '../llm/index.js'; +import type { + KloColumnSampleInput, + KloColumnSampleResult, + KloScanContext, + KloScanLoggerPort, + KloTableRef, + KloTableSampleInput, + KloTableSampleResult, +} from './types.js'; + +export interface KloDescriptionCachePort { + buildTableKey(table: KloTableRef): string; + buildColumnKey(table: KloTableRef, columnName: string): string; + buildConnectionKey(connectionName: string): string; + get(key: string): Promise; + set(key: string, value: string): Promise; +} + +export interface KloDescriptionSamplingPort { + id: string; + sampleColumn?(input: KloColumnSampleInput, ctx: KloScanContext): Promise; + sampleTable?(input: KloTableSampleInput, ctx: KloScanContext): Promise; +} + +export interface KloDescriptionGenerationSettings { + columnMaxWords: number; + tableMaxWords: number; + dataSourceMaxWords: number; + temperature?: number; + concurrencyLimit?: number; +} + +interface ResolvedKloDescriptionGenerationSettings { + columnMaxWords: number; + tableMaxWords: number; + dataSourceMaxWords: number; + temperature?: number; + concurrencyLimit: number; +} + +export interface KloDescriptionColumn { + name: string; + type?: string; + rawDescriptions?: Record; + sampleValues?: unknown[]; +} + +export interface KloDescriptionColumnTable extends KloTableRef { + columns: KloDescriptionColumn[]; +} + +export interface KloDescriptionTableInput extends KloTableRef { + rawDescriptions?: Record; +} + +export interface KloColumnAnalysisResult { + columnDescriptions: Array<[string, string | null]>; + processedColumns: string[]; + skippedColumns: string[]; +} + +export interface KloColumnDescriptionPromptInput { + columnName: string; + columnValues: unknown[]; + tableContext: string; + dataSourceType: string; + supportsNestedAnalysis: boolean; + rawDescriptions?: Record; +} + +export interface KloTableDescriptionPromptInput { + tableName: string; + sampleData: KloTableSampleResult; + dataSourceType: string; + rawDescriptions?: Record; +} + +export interface KloDataSourceDescriptionPromptInput { + tableSamples: Array<[string, KloTableSampleResult]>; + dataSourceType: string; +} + +export interface KloGenerateColumnDescriptionsInput { + connectionId: string; + connector: KloDescriptionSamplingPort; + context: KloScanContext; + dataSourceType: string; + supportsNestedAnalysis: boolean; + table: KloDescriptionColumnTable; + skipExisting?: boolean; + existingDescriptions?: Record; +} + +export interface KloGenerateTableDescriptionInput { + connectionId: string; + connector: KloDescriptionSamplingPort; + context: KloScanContext; + dataSourceType: string; + table: KloDescriptionTableInput; +} + +export interface KloGenerateDataSourceDescriptionInput { + connectionId: string; + connector: KloDescriptionSamplingPort; + context: KloScanContext; + dataSourceType: string; + tables: KloTableRef[]; + connectionName?: string; +} + +export interface KloDescriptionGeneratorOptions { + llmProvider: KloLlmProvider; + cache?: KloDescriptionCachePort; + logger?: KloScanLoggerPort; + settings: KloDescriptionGenerationSettings; +} + +interface ColumnTaskResult { + columnName: string; + description: string | null; + processed: boolean; + skipped: boolean; +} + +function descriptionSources(rawDescriptions: Record | undefined): Array<[string, string]> { + if (!rawDescriptions) { + return []; + } + + return Object.entries(rawDescriptions).filter(([source, text]) => source !== 'ai' && source !== 'user' && !!text); +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function toTableRef(table: KloTableRef): KloTableRef { + return { + catalog: table.catalog, + db: table.db, + name: table.name, + }; +} + +async function runWithConcurrency( + items: readonly TInput[], + concurrencyLimit: number, + worker: (item: TInput, index: number) => Promise, +): Promise { + const results: TOutput[] = []; + let nextIndex = 0; + const workerCount = Math.max(1, Math.min(concurrencyLimit, items.length || 1)); + + await Promise.all( + Array.from({ length: workerCount }, async () => { + while (nextIndex < items.length) { + const index = nextIndex; + nextIndex += 1; + const item = items[index]; + if (item !== undefined) { + results[index] = await worker(item, index); + } + } + }), + ); + + return results; +} + +export function appendKloWordLimitInstruction(prompt: string, maxWords: number): string { + return `${prompt}\n\nPlease provide a concise description in ${maxWords} words or less.`; +} + +export function buildKloColumnDescriptionPrompt(input: KloColumnDescriptionPromptInput): string { + const sampleValues = input.columnValues.slice(0, 5); + const valuesStr = sampleValues + .filter((value) => value !== null && value !== undefined) + .map((value) => String(value)) + .join(', '); + + let prompt = `Analyze this database column and provide a concise description: + + ${input.tableContext} + + ${input.columnName} + + ${valuesStr} +`; + + const sources = descriptionSources(input.rawDescriptions); + if (sources.length > 0) { + prompt += '\nExisting descriptions from other sources:\n'; + for (const [source, text] of sources) { + prompt += `<${source}_documentation> ${text} \n`; + } + prompt += + '\nSynthesize a description that captures the most important information from all sources. Prioritize the sources as authoritative context.\n'; + } + + prompt += ` +Provide a brief description of what this column contains without repeating the column name. +Focus on the data's meaning and business purpose. Start directly with the content description. +Example: +"first names of individuals, likely employees or contacts" instead of "The column contains first names..." +"Job titles or roles of individuals..." instead of "This column contains job titles..." +`; + + if (input.dataSourceType === 'BIGQUERY' && input.supportsNestedAnalysis) { + const hasNestedData = sampleValues.some((value) => { + const text = String(value); + return text.includes('nested') || text.includes('{') || text.includes('['); + }); + if (hasNestedData) { + prompt += + '\nNote: This column contains nested/structured data (JSON, STRUCT, or ARRAY) - describe its general business purpose and data organization.'; + } + } + + return prompt.trim(); +} + +export function buildKloTableDescriptionPrompt(input: KloTableDescriptionPromptInput): string { + const columnInfo: string[] = []; + for (let index = 0; index < Math.min(input.sampleData.headers.length, 10); index += 1) { + const header = input.sampleData.headers[index]; + const sampleValues = input.sampleData.rows + .slice(0, 3) + .map((row) => row[index]) + .filter((value) => value !== null && value !== undefined); + columnInfo.push(`${header}: ${sampleValues.map((value) => String(value)).join(', ')}`); + } + + let prompt = ` + Analyze this database table and provide a concise description: + + Table: ${input.tableName} + Columns and sample data: ${columnInfo.join(' | ')} + Total rows in sample: ${input.sampleData.rows.length} + Data source type: ${input.dataSourceType} + `; + + const sources = descriptionSources(input.rawDescriptions); + if (sources.length > 0) { + prompt += '\n Existing descriptions from other sources:\n'; + for (const [source, text] of sources) { + prompt += ` ${source}: ${text}\n`; + } + prompt += + '\n Synthesize a description that captures the most important information from all sources. Prioritize the sources as authoritative context.\n'; + } + + if (input.dataSourceType === 'BIGQUERY') { + prompt += + "\nNote (Don't include this note in the final answer.): This is a BigQuery table which may contain nested structures, arrays, or other complex data types."; + } + + prompt += ` + + Provide a brief description of what this table represents and its business purpose. + Do NOT list or describe individual columns or fields. + Start directly with the content description without mentioning the table name. + Focus on the data's meaning and business purpose. + Example: "Information about healthcare professionals used for workforce management" instead of "The blahblah table contains information about healthcare professionals including their names, titles..." + `; + + return prompt.trim(); +} + +export function buildKloDataSourceDescriptionPrompt(input: KloDataSourceDescriptionPromptInput): string { + const tablesText = input.tableSamples + .map( + ([tableName, sampleData]) => + `${tableName} (${sampleData.headers.length} columns, ${sampleData.rows.length} sample rows)`, + ) + .join(' | '); + + let prompt = ` + Analyze this database and provide a concise description: + + Tables: ${tablesText} + Total tables analyzed: ${input.tableSamples.length} + Data source type: ${input.dataSourceType} + `; + + if (input.dataSourceType === 'BIGQUERY') { + prompt += + "\nNote (Don't include this note in the final answer): This is a BigQuery dataset which may contain large-scale analytics data, nested structures, and complex data types."; + } + + prompt += ` + + Provide a direct, concise description of what this database represents and its business purpose. + Do NOT start with phrases like "This database appears to represent" or "This BigQuery dataset". + Start directly with the domain or business area description. + Focus on the overall data model and its intended use. + Example: "Healthcare-related database with a focus on patient management..." instead of "This database appears to represent a healthcare-related system..." + `; + + return prompt.trim(); +} + +export class KloDescriptionGenerator { + private readonly llmProvider: KloLlmProvider; + private readonly cache?: KloDescriptionCachePort; + private readonly logger?: KloScanLoggerPort; + private readonly settings: ResolvedKloDescriptionGenerationSettings; + + constructor(options: KloDescriptionGeneratorOptions) { + this.llmProvider = options.llmProvider; + this.cache = options.cache; + this.logger = options.logger; + this.settings = { + columnMaxWords: options.settings.columnMaxWords, + tableMaxWords: options.settings.tableMaxWords, + dataSourceMaxWords: options.settings.dataSourceMaxWords, + ...(options.settings.temperature !== undefined ? { temperature: options.settings.temperature } : {}), + concurrencyLimit: options.settings.concurrencyLimit ?? 5, + }; + } + + async generateColumnDescriptions(input: KloGenerateColumnDescriptionsInput): Promise { + const columnsToProcess = input.table.columns; + const tableContext = `Table: ${input.table.name} | Columns: ${columnsToProcess.map((column) => column.name).join(', ')} | Data source: ${input.dataSourceType}`; + + const results = await runWithConcurrency(columnsToProcess, this.settings.concurrencyLimit, async (column) => + this.generateOneColumnDescription(input, column, tableContext), + ); + + const columnDescriptions: Array<[string, string | null]> = []; + const processedColumns: string[] = []; + const skippedColumns: string[] = []; + + for (const result of results) { + columnDescriptions.push([result.columnName, result.description]); + if (result.skipped) { + skippedColumns.push(result.columnName); + } else if (result.processed) { + processedColumns.push(result.columnName); + } + } + + return { + columnDescriptions, + processedColumns, + skippedColumns, + }; + } + + async generateTableDescription(input: KloGenerateTableDescriptionInput): Promise { + const tableRef = toTableRef(input.table); + const cacheKey = this.cache?.buildTableKey(tableRef); + if (cacheKey) { + const cached = await this.cache?.get(cacheKey); + if (cached) { + return cached; + } + } + + if (!input.connector.sampleTable) { + this.logger?.warn('KLO scan connector does not support table sampling for table description generation', { + connectorId: input.connector.id, + table: input.table.name, + }); + return 'Table not found'; + } + + try { + const sampleData = await input.connector.sampleTable( + { + connectionId: input.connectionId, + table: tableRef, + limit: 20, + }, + input.context, + ); + const prompt = buildKloTableDescriptionPrompt({ + tableName: input.table.name, + sampleData, + dataSourceType: input.dataSourceType, + rawDescriptions: input.table.rawDescriptions, + }); + const description = await this.generateAiDescription( + prompt, + this.settings.tableMaxWords, + 'klo-table-description', + ); + if (cacheKey) { + await this.cache?.set(cacheKey, description); + } + return description; + } catch (error) { + this.logger?.error(`Error generating table description: ${errorMessage(error)}`); + return 'Table not found'; + } + } + + async generateDataSourceDescription(input: KloGenerateDataSourceDescriptionInput): Promise { + if (input.tables.length === 0) { + return 'No tables found in database'; + } + + const cacheKey = input.connectionName ? this.cache?.buildConnectionKey(input.connectionName) : undefined; + if (cacheKey) { + const cached = await this.cache?.get(cacheKey); + if (cached) { + return cached; + } + } + + if (!input.connector.sampleTable) { + this.logger?.warn('KLO scan connector does not support table sampling for data-source description generation', { + connectorId: input.connector.id, + }); + return 'No accessible tables found in database'; + } + + const tablesToAnalyze = input.tables.slice(0, 10); + const tableSamples = await runWithConcurrency(tablesToAnalyze, this.settings.concurrencyLimit, async (table) => { + try { + const sampleData = await input.connector.sampleTable!( + { + connectionId: input.connectionId, + table: toTableRef(table), + limit: 5, + }, + input.context, + ); + return [table.name, sampleData] as [string, KloTableSampleResult]; + } catch (error) { + this.logger?.warn(`Failed to sample table '${table.name}' for data source analysis - ${errorMessage(error)}`); + return null; + } + }); + + const accessibleSamples = tableSamples.filter( + (sample): sample is [string, KloTableSampleResult] => sample !== null, + ); + if (accessibleSamples.length === 0) { + return 'No accessible tables found in database'; + } + + try { + const prompt = buildKloDataSourceDescriptionPrompt({ + tableSamples: accessibleSamples, + dataSourceType: input.dataSourceType, + }); + const description = await this.generateAiDescription( + prompt, + this.settings.dataSourceMaxWords, + 'klo-data-source-description', + ); + if (cacheKey) { + await this.cache?.set(cacheKey, description); + } + return description; + } catch (error) { + this.logger?.error(`Error generating data source description: ${errorMessage(error)}`); + return 'Failed to generate data source description'; + } + } + + private async generateOneColumnDescription( + input: KloGenerateColumnDescriptionsInput, + column: KloDescriptionColumn, + tableContext: string, + ): Promise { + const existingDescription = input.existingDescriptions?.[column.name]; + if (input.skipExisting && existingDescription) { + return { + columnName: column.name, + description: existingDescription, + skipped: true, + processed: false, + }; + } + + const tableRef = toTableRef(input.table); + const cacheKey = this.cache?.buildColumnKey(tableRef, column.name); + if (cacheKey) { + const cached = await this.cache?.get(cacheKey); + if (cached) { + return { + columnName: column.name, + description: cached, + skipped: true, + processed: false, + }; + } + } + + try { + let columnValues = column.sampleValues; + if (!columnValues || columnValues.length === 0) { + if (!input.connector.sampleColumn) { + this.logger?.warn('KLO scan connector does not support column sampling for column description generation', { + connectorId: input.connector.id, + table: input.table.name, + column: column.name, + }); + return { + columnName: column.name, + description: null, + skipped: false, + processed: false, + }; + } + + const sample = await input.connector.sampleColumn( + { + connectionId: input.connectionId, + table: tableRef, + column: column.name, + limit: 50, + }, + input.context, + ); + columnValues = sample.values; + } + + const nonNullValues = (columnValues ?? []).filter((value) => value !== null && value !== undefined); + if (nonNullValues.length === 0) { + return { + columnName: column.name, + description: null, + skipped: false, + processed: false, + }; + } + + const prompt = buildKloColumnDescriptionPrompt({ + columnName: column.name, + columnValues: nonNullValues, + tableContext, + dataSourceType: input.dataSourceType, + supportsNestedAnalysis: input.supportsNestedAnalysis, + rawDescriptions: column.rawDescriptions, + }); + const description = await this.generateAiDescription( + prompt, + this.settings.columnMaxWords, + 'klo-column-description', + ); + + if (cacheKey) { + await this.cache?.set(cacheKey, description); + } + + return { + columnName: column.name, + description, + skipped: false, + processed: true, + }; + } catch (error) { + this.logger?.error(`Error analyzing column '${column.name}': ${errorMessage(error)}`); + return { + columnName: column.name, + description: `Error generating description: ${errorMessage(error)}`, + skipped: false, + processed: false, + }; + } + } + + private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise { + try { + const text = await generateKloText({ + llmProvider: this.llmProvider, + role: 'candidateExtraction', + prompt: appendKloWordLimitInstruction(prompt, maxWords), + temperature: this.settings.temperature, + }); + const description = text.trim(); + return description || 'Failed to generate description'; + } catch (error) { + this.logger?.error(`Error generating AI description: ${errorMessage(error)}`); + return `Error generating description: ${errorMessage(error)}`; + } + } +} diff --git a/packages/context/src/scan/embedding-text.test.ts b/packages/context/src/scan/embedding-text.test.ts new file mode 100644 index 00000000..ce60f95a --- /dev/null +++ b/packages/context/src/scan/embedding-text.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, it } from 'vitest'; +import { buildKloColumnEmbeddingText } from './embedding-text.js'; + +describe('KLO scan embedding text', () => { + it('builds column embedding text with table, description, FK, and sample-value context', () => { + expect( + buildKloColumnEmbeddingText({ + tableName: 'orders', + columnName: 'status', + columnType: 'varchar', + resolvedDescription: 'Payment lifecycle state', + sampleValues: ['paid', 'refunded', 'pending'], + resolvedTableDescription: 'Customer orders', + foreignKeys: { + outgoing: [{ toTable: 'customers', toColumn: 'id' }], + incoming: [{ fromTable: 'refunds', fromColumn: 'order_status' }], + }, + maxSampleValues: 2, + }), + ).toBe( + 'orders.status (varchar). Table: Customer orders. Payment lifecycle state. FK -> customers.id. FK <- refunds.order_status. Values: paid, refunded', + ); + }); + + it('omits optional sections when the scan has no enrichment context yet', () => { + expect( + buildKloColumnEmbeddingText({ + tableName: 'orders', + columnName: 'id', + columnType: 'integer', + resolvedDescription: null, + }), + ).toBe('orders.id (integer)'); + }); + + it('keeps all available sample values when no explicit max is supplied', () => { + expect( + buildKloColumnEmbeddingText({ + tableName: 'orders', + columnName: 'status', + columnType: 'varchar', + resolvedDescription: null, + sampleValues: ['paid', 'refunded'], + }), + ).toBe('orders.status (varchar). Values: paid, refunded'); + }); +}); diff --git a/packages/context/src/scan/embedding-text.ts b/packages/context/src/scan/embedding-text.ts new file mode 100644 index 00000000..cf385354 --- /dev/null +++ b/packages/context/src/scan/embedding-text.ts @@ -0,0 +1,45 @@ +export interface KloColumnEmbeddingForeignKeys { + outgoing: Array<{ toTable: string; toColumn: string }>; + incoming: Array<{ fromTable: string; fromColumn: string }>; +} + +export interface KloColumnEmbeddingTextInput { + tableName: string; + columnName: string; + columnType: string; + resolvedDescription: string | null; + sampleValues?: readonly string[] | null; + resolvedTableDescription?: string | null; + foreignKeys?: KloColumnEmbeddingForeignKeys | null; + maxSampleValues?: number; +} + +export function buildKloColumnEmbeddingText(input: KloColumnEmbeddingTextInput): string { + const parts: string[] = []; + + parts.push(`${input.tableName}.${input.columnName} (${input.columnType})`); + + if (input.resolvedTableDescription) { + parts.push(`Table: ${input.resolvedTableDescription}`); + } + + if (input.resolvedDescription) { + parts.push(input.resolvedDescription); + } + + if (input.foreignKeys) { + for (const fk of input.foreignKeys.outgoing) { + parts.push(`FK -> ${fk.toTable}.${fk.toColumn}`); + } + for (const fk of input.foreignKeys.incoming) { + parts.push(`FK <- ${fk.fromTable}.${fk.fromColumn}`); + } + } + + if (input.sampleValues && input.sampleValues.length > 0) { + const maxSampleValues = input.maxSampleValues ?? 20; + parts.push(`Values: ${input.sampleValues.slice(0, maxSampleValues).join(', ')}`); + } + + return parts.join('. '); +} diff --git a/packages/context/src/scan/enrichment-state.test.ts b/packages/context/src/scan/enrichment-state.test.ts new file mode 100644 index 00000000..7f41254d --- /dev/null +++ b/packages/context/src/scan/enrichment-state.test.ts @@ -0,0 +1,175 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + completedKloScanEnrichmentStateSummary, + computeKloScanEnrichmentInputHash, + summarizeKloScanEnrichmentState, +} from './enrichment-state.js'; +import { SqliteLocalScanEnrichmentStateStore } from './sqlite-local-enrichment-state-store.js'; +import type { KloSchemaSnapshot } from './types.js'; + +const snapshot: KloSchemaSnapshot = { + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-04-29T12:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: null, + estimatedRows: 1, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + }, + ], +}; + +describe('scan enrichment state', () => { + let tempDir: string; + let store: SqliteLocalScanEnrichmentStateStore; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-scan-enrichment-state-')); + store = new SqliteLocalScanEnrichmentStateStore({ dbPath: join(tempDir, 'db.sqlite') }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('computes stable input hashes without depending on object key order', () => { + const first = computeKloScanEnrichmentInputHash({ + snapshot, + mode: 'enriched', + detectRelationships: true, + providerIdentity: { provider: 'deterministic', embeddingDimensions: 8, llmModel: 'a' }, + }); + const second = computeKloScanEnrichmentInputHash({ + snapshot: { ...snapshot, metadata: {} }, + mode: 'enriched', + detectRelationships: true, + providerIdentity: { llmModel: 'a', embeddingDimensions: 8, provider: 'deterministic' }, + }); + const firstTable = snapshot.tables[0]; + if (!firstTable) { + throw new Error('Expected test snapshot table'); + } + const changed = computeKloScanEnrichmentInputHash({ + snapshot: { ...snapshot, tables: [{ ...firstTable, name: 'orders_v2' }] }, + mode: 'enriched', + detectRelationships: true, + providerIdentity: { provider: 'deterministic', embeddingDimensions: 8, llmModel: 'a' }, + }); + + expect(first).toMatch(/^[a-f0-9]{64}$/); + expect(second).toBe(first); + expect(changed).not.toBe(first); + }); + + it('persists completed stages and ignores stale hashes', async () => { + const inputHash = computeKloScanEnrichmentInputHash({ + snapshot, + mode: 'enriched', + detectRelationships: true, + providerIdentity: { provider: 'deterministic', embeddingDimensions: 8 }, + }); + + await store.saveCompletedStage({ + runId: 'scan-run-1', + connectionId: 'warehouse', + syncId: 'sync-1', + mode: 'enriched', + stage: 'descriptions', + inputHash, + output: [{ table: { catalog: null, db: 'public', name: 'orders' }, tableDescription: 'Orders' }], + updatedAt: '2026-04-29T12:01:00.000Z', + }); + + await expect( + store.findCompletedStage({ + runId: 'scan-run-1', + stage: 'descriptions', + inputHash, + }), + ).resolves.toMatchObject({ + runId: 'scan-run-1', + stage: 'descriptions', + status: 'completed', + output: [{ table: { catalog: null, db: 'public', name: 'orders' }, tableDescription: 'Orders' }], + }); + + await expect( + store.findCompletedStage({ + runId: 'scan-run-1', + stage: 'descriptions', + inputHash: 'different-hash', + }), + ).resolves.toBeNull(); + }); + + it('records failed stages without making them reusable', async () => { + await store.saveFailedStage({ + runId: 'scan-run-2', + connectionId: 'warehouse', + syncId: 'sync-2', + mode: 'enriched', + stage: 'embeddings', + inputHash: 'hash-2', + errorMessage: 'embedding service timed out', + updatedAt: '2026-04-29T12:02:00.000Z', + }); + + await expect( + store.findCompletedStage({ + runId: 'scan-run-2', + stage: 'embeddings', + inputHash: 'hash-2', + }), + ).resolves.toBeNull(); + + await expect(store.listRunStages('scan-run-2')).resolves.toEqual([ + expect.objectContaining({ + runId: 'scan-run-2', + stage: 'embeddings', + status: 'failed', + errorMessage: 'embedding service timed out', + }), + ]); + }); + + it('summarizes resumed, completed, and failed stages for reports', () => { + expect( + summarizeKloScanEnrichmentState({ + resumedStages: ['descriptions'], + completedStages: ['descriptions', 'embeddings'], + failedStages: ['relationships'], + }), + ).toEqual({ + resumedStages: ['descriptions'], + completedStages: ['descriptions', 'embeddings'], + failedStages: ['relationships'], + }); + + expect(completedKloScanEnrichmentStateSummary()).toEqual({ + resumedStages: [], + completedStages: [], + failedStages: [], + }); + }); +}); diff --git a/packages/context/src/scan/enrichment-state.ts b/packages/context/src/scan/enrichment-state.ts new file mode 100644 index 00000000..9fcf5a63 --- /dev/null +++ b/packages/context/src/scan/enrichment-state.ts @@ -0,0 +1,108 @@ +import { createHash } from 'node:crypto'; +import type { KloScanEnrichmentStage, KloScanEnrichmentStateSummary, KloScanMode, KloSchemaSnapshot } from './types.js'; + +export const KLO_SCAN_ENRICHMENT_STAGES: readonly KloScanEnrichmentStage[] = [ + 'descriptions', + 'embeddings', + 'relationships', +] as const; + +export interface KloScanEnrichmentStageLookup { + runId: string; + stage: KloScanEnrichmentStage; + inputHash: string; +} + +export interface KloScanEnrichmentCompletedStage { + runId: string; + connectionId: string; + syncId: string; + mode: KloScanMode; + stage: KloScanEnrichmentStage; + inputHash: string; + status: 'completed'; + output: TOutput; + errorMessage: null; + updatedAt: string; +} + +export interface KloScanEnrichmentFailedStage { + runId: string; + connectionId: string; + syncId: string; + mode: KloScanMode; + stage: KloScanEnrichmentStage; + inputHash: string; + status: 'failed'; + output: null; + errorMessage: string; + updatedAt: string; +} + +export type KloScanEnrichmentStageRecord = + | KloScanEnrichmentCompletedStage + | KloScanEnrichmentFailedStage; + +export interface KloScanEnrichmentStateStore { + findCompletedStage( + input: KloScanEnrichmentStageLookup, + ): Promise | null>; + saveCompletedStage( + input: Omit, 'status' | 'errorMessage'>, + ): Promise; + saveFailedStage(input: Omit): Promise; + listRunStages(runId: string): Promise; +} + +export interface ComputeKloScanEnrichmentInputHashInput { + snapshot: KloSchemaSnapshot; + mode: KloScanMode; + detectRelationships: boolean; + providerIdentity: Record; + relationshipSettings?: unknown; +} + +function stableJson(value: unknown): string { + if (Array.isArray(value)) { + return `[${value.map(stableJson).join(',')}]`; + } + if (value && typeof value === 'object') { + const entries = Object.entries(value as Record).sort(([left], [right]) => + left.localeCompare(right), + ); + return `{${entries.map(([key, item]) => `${JSON.stringify(key)}:${stableJson(item)}`).join(',')}}`; + } + return JSON.stringify(value); +} + +export function computeKloScanEnrichmentInputHash(input: ComputeKloScanEnrichmentInputHashInput): string { + return createHash('sha256').update(stableJson(input)).digest('hex'); +} + +function uniqueStages(stages: KloScanEnrichmentStage[]): KloScanEnrichmentStage[] { + const seen = new Set(); + const ordered: KloScanEnrichmentStage[] = []; + for (const stage of KLO_SCAN_ENRICHMENT_STAGES) { + if (stages.includes(stage) && !seen.has(stage)) { + seen.add(stage); + ordered.push(stage); + } + } + return ordered; +} + +export function completedKloScanEnrichmentStateSummary(): KloScanEnrichmentStateSummary { + return { + resumedStages: [], + completedStages: [], + failedStages: [], + }; +} + +export function summarizeKloScanEnrichmentState(input: KloScanEnrichmentStateSummary): KloScanEnrichmentStateSummary { + return { + resumedStages: uniqueStages(input.resumedStages), + completedStages: uniqueStages(input.completedStages), + failedStages: uniqueStages(input.failedStages), + }; +} diff --git a/packages/context/src/scan/enrichment-summary.test.ts b/packages/context/src/scan/enrichment-summary.test.ts new file mode 100644 index 00000000..e2876ccc --- /dev/null +++ b/packages/context/src/scan/enrichment-summary.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest'; +import { + failedKloScanEnrichmentSummary, + kloScanErrorMessage, + skippedKloScanEnrichmentSummary, +} from './enrichment-summary.js'; + +describe('KLO scan enrichment summaries', () => { + it('keeps structural scans skipped when no enrichment was requested', () => { + expect(failedKloScanEnrichmentSummary('structural', false)).toEqual(skippedKloScanEnrichmentSummary); + }); + + it('marks relationship stages failed when relationship detection fails', () => { + expect(failedKloScanEnrichmentSummary('relationships', true)).toEqual({ + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'failed', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'failed', + }); + }); + + it('marks every enriched-only stage failed when full enrichment fails', () => { + expect(failedKloScanEnrichmentSummary('enriched', true)).toEqual({ + dataDictionary: 'failed', + tableDescriptions: 'failed', + columnDescriptions: 'failed', + embeddings: 'failed', + deterministicRelationships: 'failed', + llmRelationshipValidation: 'failed', + statisticalValidation: 'failed', + }); + }); + + it('formats unknown thrown values for scan warnings', () => { + expect(kloScanErrorMessage(new Error('gateway timeout'))).toBe('gateway timeout'); + expect(kloScanErrorMessage('plain failure')).toBe('plain failure'); + expect(kloScanErrorMessage({ code: 'E_SCAN' })).toBe('{"code":"E_SCAN"}'); + }); +}); diff --git a/packages/context/src/scan/enrichment-summary.ts b/packages/context/src/scan/enrichment-summary.ts new file mode 100644 index 00000000..e4798423 --- /dev/null +++ b/packages/context/src/scan/enrichment-summary.ts @@ -0,0 +1,52 @@ +import type { KloScanEnrichmentSummary, KloScanMode } from './types.js'; + +export const skippedKloScanEnrichmentSummary: KloScanEnrichmentSummary = { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'skipped', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', +}; + +export function failedKloScanEnrichmentSummary( + mode: KloScanMode, + detectRelationships = false, +): KloScanEnrichmentSummary { + if (mode === 'enriched') { + return { + dataDictionary: 'failed', + tableDescriptions: 'failed', + columnDescriptions: 'failed', + embeddings: 'failed', + deterministicRelationships: 'failed', + llmRelationshipValidation: 'failed', + statisticalValidation: 'failed', + }; + } + + if (mode === 'relationships' || detectRelationships) { + return { + ...skippedKloScanEnrichmentSummary, + deterministicRelationships: 'failed', + statisticalValidation: 'failed', + }; + } + + return skippedKloScanEnrichmentSummary; +} + +export function kloScanErrorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + if (typeof error === 'string') { + return error; + } + try { + return JSON.stringify(error); + } catch { + return String(error); + } +} diff --git a/packages/context/src/scan/enrichment-types.test.ts b/packages/context/src/scan/enrichment-types.test.ts new file mode 100644 index 00000000..14480107 --- /dev/null +++ b/packages/context/src/scan/enrichment-types.test.ts @@ -0,0 +1,159 @@ +import { describe, expect, it } from 'vitest'; +import type { + KloColumnSampleUpdate, + KloDescriptionUpdate, + KloEmbeddingUpdate, + KloEnrichedSchema, + KloJoinUpdate, + KloRelationshipEndpoint, + KloRelationshipUpdate, + KloScanMetadataStore, + KloStructuralSyncPlan, +} from './enrichment-types.js'; + +describe('KLO scan enrichment contracts', () => { + it('models an enriched schema with reusable table, column, and relationship metadata', () => { + const schema: KloEnrichedSchema = { + connectionId: 'warehouse', + tables: [ + { + id: 'table-orders', + ref: { catalog: 'analytics', db: 'public', name: 'orders' }, + enabled: true, + descriptions: { db: 'Raw orders', ai: 'Customer orders' }, + columns: [ + { + id: 'column-orders-status', + tableId: 'table-orders', + tableRef: { catalog: 'analytics', db: 'public', name: 'orders' }, + name: 'status', + nativeType: 'varchar', + normalizedType: 'string', + dimensionType: 'string', + nullable: false, + primaryKey: false, + parentColumnId: null, + descriptions: { db: 'Status code' }, + embedding: [0.1, 0.2], + sampleValues: ['paid', 'refunded'], + cardinality: 2, + }, + ], + }, + ], + relationships: [ + { + id: 'rel-orders-customers', + source: 'formal', + from: { + tableId: 'table-orders', + columnIds: ['column-orders-customer-id'], + table: { catalog: 'analytics', db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'table-customers', + columnIds: ['column-customers-id'], + table: { catalog: 'analytics', db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 1, + isPrimaryKeyReference: true, + }, + ], + }; + + expect(schema.tables[0].columns[0].sampleValues).toEqual(['paid', 'refunded']); + expect(schema.relationships[0].source).toBe('formal'); + }); + + it('models metadata-store updates without requiring a concrete store implementation', async () => { + const structuralPlan: KloStructuralSyncPlan = { + connectionId: 'warehouse', + snapshotId: 'snapshot-1', + operations: [{ kind: 'create_table', table: 'orders' }], + }; + const descriptionUpdate: KloDescriptionUpdate = { + connectionId: 'warehouse', + table: { catalog: 'analytics', db: 'public', name: 'orders' }, + source: 'ai', + tableDescription: 'Customer orders', + columnDescriptions: { status: 'Payment lifecycle state' }, + }; + const sampleUpdate: KloColumnSampleUpdate = { + columnId: 'column-orders-status', + sampleValues: ['paid', 'refunded'], + cardinality: 2, + }; + const embeddingUpdate: KloEmbeddingUpdate = { + columnId: 'column-orders-status', + text: 'orders.status (varchar). Values: paid, refunded', + embedding: [0.25, 0.75], + }; + const relationshipUpdate: KloRelationshipUpdate = { + connectionId: 'warehouse', + accepted: [], + rejected: [], + skipped: [{ reason: 'missing parent table', relationshipId: 'candidate-1' }], + }; + + const store: KloScanMetadataStore = { + loadSchema: async () => null, + applyStructuralPlan: async (plan) => ({ + connectionId: plan.connectionId, + tables: [], + relationships: [], + }), + updateDescriptions: async (input) => { + expect(input).toEqual(descriptionUpdate); + }, + updateColumnSamples: async (input) => { + expect(input).toEqual([sampleUpdate]); + }, + updateColumnEmbeddings: async (input) => { + expect(input).toEqual([embeddingUpdate]); + }, + updateInferredRelationships: async (input) => { + expect(input).toEqual(relationshipUpdate); + }, + }; + + await expect(store.loadSchema('warehouse')).resolves.toBeNull(); + await expect(store.applyStructuralPlan(structuralPlan)).resolves.toEqual({ + connectionId: 'warehouse', + tables: [], + relationships: [], + }); + await expect(store.updateDescriptions(descriptionUpdate)).resolves.toBeUndefined(); + await expect(store.updateColumnSamples([sampleUpdate])).resolves.toBeUndefined(); + await expect(store.updateColumnEmbeddings([embeddingUpdate])).resolves.toBeUndefined(); + await expect(store.updateInferredRelationships(relationshipUpdate)).resolves.toBeUndefined(); + }); +}); + +describe('relationship tuple contracts', () => { + it('represents relationship endpoints and join updates as ordered column tuples', () => { + const endpoint: KloRelationshipEndpoint = { + tableId: 'public.order_lines', + columnIds: ['public.order_lines.order_id', 'public.order_lines.line_number'], + table: { catalog: null, db: 'public', name: 'order_lines' }, + columns: ['order_id', 'line_number'], + }; + const update: KloJoinUpdate = { + connectionId: 'warehouse', + fromTable: 'order_line_allocations', + fromColumns: ['order_id', 'line_number'], + toTable: 'order_lines', + toColumns: ['order_id', 'line_number'], + relationship: 'many_to_one', + author: 'klo', + authorEmail: 'klo@example.com', + }; + + expect(endpoint.columns).toEqual(['order_id', 'line_number']); + expect(endpoint.columnIds).toEqual(['public.order_lines.order_id', 'public.order_lines.line_number']); + expect(update.fromColumns).toEqual(['order_id', 'line_number']); + expect(update.toColumns).toEqual(['order_id', 'line_number']); + }); +}); diff --git a/packages/context/src/scan/enrichment-types.ts b/packages/context/src/scan/enrichment-types.ts new file mode 100644 index 00000000..c3226126 --- /dev/null +++ b/packages/context/src/scan/enrichment-types.ts @@ -0,0 +1,130 @@ +import type { KloSchemaDimensionType, KloTableRef } from './types.js'; + +export type KloDescriptionSource = 'ai' | 'db' | 'dbt' | 'user' | (string & {}); + +export type KloRelationshipSource = 'formal' | 'inferred' | 'manual'; + +export type KloRelationshipType = 'many_to_one' | 'one_to_many' | 'one_to_one'; + +export interface KloEnrichedColumn { + id: string; + tableId: string; + tableRef: KloTableRef; + name: string; + nativeType: string; + normalizedType: string; + dimensionType: KloSchemaDimensionType; + nullable: boolean; + primaryKey: boolean; + parentColumnId: string | null; + descriptions: Partial>; + embedding: number[] | null; + sampleValues: string[] | null; + cardinality: number | null; +} + +export interface KloEnrichedTable { + id: string; + ref: KloTableRef; + enabled: boolean; + descriptions: Partial>; + columns: KloEnrichedColumn[]; +} + +export interface KloRelationshipEndpoint { + tableId: string; + columnIds: string[]; + table: KloTableRef; + columns: string[]; +} + +export interface KloEnrichedRelationship { + id: string; + source: KloRelationshipSource; + from: KloRelationshipEndpoint; + to: KloRelationshipEndpoint; + relationshipType: KloRelationshipType; + confidence: number; + isPrimaryKeyReference: boolean; +} + +export interface KloEnrichedSchema { + connectionId: string; + tables: KloEnrichedTable[]; + relationships: KloEnrichedRelationship[]; +} + +export interface KloStructuralSyncPlan { + connectionId: string; + snapshotId: string; + operations: Array>; +} + +export interface KloDescriptionUpdate { + connectionId: string; + table: KloTableRef; + source: KloDescriptionSource; + tableDescription?: string; + columnDescriptions?: Record; +} + +const PREFERRED_METADATA_FIELD_NAMES = [ + 'tags', + 'constraints', + 'enum_values', + 'freshness', + 'tests', + 'lineage', +] as const; + +export interface KloMetadataUpdate { + connectionId: string; + table: KloTableRef; + source: KloDescriptionSource; + tableFields?: Record; + columnFields?: Record>; +} + +export interface KloJoinUpdate { + connectionId: string; + fromTable: string; + fromColumns: string[]; + toTable: string; + toColumns: string[]; + relationship: KloRelationshipType; + author: string; + authorEmail: string; +} + +export interface KloColumnSampleUpdate { + columnId: string; + sampleValues: string[] | null; + cardinality: number | null; +} + +export interface KloEmbeddingUpdate { + columnId: string; + text: string; + embedding: number[]; +} + +export interface KloSkippedRelationship { + relationshipId: string; + reason: string; +} + +export interface KloRelationshipUpdate { + connectionId: string; + accepted: KloEnrichedRelationship[]; + rejected: KloEnrichedRelationship[]; + skipped: KloSkippedRelationship[]; +} + +export interface KloScanMetadataStore { + loadSchema(connectionId: string): Promise; + applyStructuralPlan(plan: KloStructuralSyncPlan): Promise; + updateDescriptions(input: KloDescriptionUpdate): Promise; + updateColumnSamples(input: KloColumnSampleUpdate[]): Promise; + updateColumnEmbeddings(input: KloEmbeddingUpdate[]): Promise; + updateInferredRelationships(input: KloRelationshipUpdate): Promise; +} diff --git a/packages/context/src/scan/index.ts b/packages/context/src/scan/index.ts new file mode 100644 index 00000000..4f45a6a1 --- /dev/null +++ b/packages/context/src/scan/index.ts @@ -0,0 +1,400 @@ +export { + REDACTED_KLO_CREDENTIAL_VALUE, + redactKloCredentialEnvelope, + redactKloCredentialValue, + redactKloScanMetadata, + redactKloScanReport, + redactKloScanWarning, +} from './credentials.js'; +export type { + KloDataDictionaryColumnState, + KloDataDictionarySampleDecision, + KloDataDictionarySettings, + KloDataDictionarySkipReason, +} from './data-dictionary.js'; +export { + defaultKloDataDictionarySettings, + isKloDataDictionaryCandidate, + shouldKloSampleColumnForDictionary, +} from './data-dictionary.js'; +export type { + KloColumnAnalysisResult, + KloColumnDescriptionPromptInput, + KloDataSourceDescriptionPromptInput, + KloDescriptionCachePort, + KloDescriptionColumn, + KloDescriptionColumnTable, + KloDescriptionGenerationSettings, + KloDescriptionGeneratorOptions, + KloDescriptionSamplingPort, + KloDescriptionTableInput, + KloGenerateColumnDescriptionsInput, + KloGenerateDataSourceDescriptionInput, + KloGenerateTableDescriptionInput, + KloTableDescriptionPromptInput, +} from './description-generation.js'; +export { + appendKloWordLimitInstruction, + buildKloColumnDescriptionPrompt, + buildKloDataSourceDescriptionPrompt, + buildKloTableDescriptionPrompt, + KloDescriptionGenerator, +} from './description-generation.js'; +export type { KloColumnEmbeddingForeignKeys, KloColumnEmbeddingTextInput } from './embedding-text.js'; +export { buildKloColumnEmbeddingText } from './embedding-text.js'; +export type { + ComputeKloScanEnrichmentInputHashInput, + KloScanEnrichmentCompletedStage, + KloScanEnrichmentFailedStage, + KloScanEnrichmentStageLookup, + KloScanEnrichmentStageRecord, + KloScanEnrichmentStateStore, +} from './enrichment-state.js'; +export { + completedKloScanEnrichmentStateSummary, + computeKloScanEnrichmentInputHash, + KLO_SCAN_ENRICHMENT_STAGES, + summarizeKloScanEnrichmentState, +} from './enrichment-state.js'; +export { + failedKloScanEnrichmentSummary, + kloScanErrorMessage, + skippedKloScanEnrichmentSummary, +} from './enrichment-summary.js'; +export type { + KloColumnSampleUpdate, + KloDescriptionSource, + KloDescriptionUpdate, + KloEmbeddingUpdate, + KloEnrichedColumn, + KloEnrichedRelationship, + KloEnrichedSchema, + KloEnrichedTable, + KloRelationshipEndpoint, + KloRelationshipSource, + KloRelationshipType, + KloRelationshipUpdate, + KloScanMetadataStore, + KloSkippedRelationship, + KloStructuralSyncPlan, +} from './enrichment-types.js'; +export type { + DeterministicLocalScanEnrichmentProviderOptions, + KloLocalScanEnrichmentInput, + KloLocalScanEnrichmentProviders, + KloLocalScanEnrichmentResult, +} from './local-enrichment.js'; +export { + createDeterministicLocalScanEnrichmentProviders, + runLocalScanEnrichment, + snapshotToKloEnrichedSchema, +} from './local-enrichment.js'; +export type { + WriteLocalScanEnrichmentArtifactsInput, + WriteLocalScanEnrichmentArtifactsResult, + WriteLocalScanManifestShardsInput, + WriteLocalScanManifestShardsResult, +} from './local-enrichment-artifacts.js'; +export { + writeLocalScanEnrichmentArtifacts, + writeLocalScanManifestShards, +} from './local-enrichment-artifacts.js'; +export type { + LocalScanMcpOptions, + LocalScanRunResult, + LocalScanStatusResponse, + RunLocalScanOptions, +} from './local-scan.js'; +export { getLocalScanReport, getLocalScanStatus, runLocalScan } from './local-scan.js'; +export type { ReadLocalScanStructuralSnapshotInput } from './local-structural-artifacts.js'; +export { readLocalScanStructuralSnapshot } from './local-structural-artifacts.js'; +export type { + KloEnrichmentScanPhaseResult, + KloScanOrchestratorOptions, + KloScanOrchestratorRunInput, + KloScanOrchestratorRunResult, + KloStructuralScanPhaseResult, +} from './orchestrator.js'; +export { KloScanOrchestrator } from './orchestrator.js'; +export type { + KloRelationshipArtifactStatus, + ReadLocalScanRelationshipArtifactsResult, +} from './relationship-artifacts.js'; +export { readLocalScanRelationshipArtifacts } from './relationship-artifacts.js'; +export type { + KloRelationshipBenchmarkReport, + KloRelationshipBenchmarkReportCase, + KloRelationshipBenchmarkReportCaseStatus, +} from './relationship-benchmark-report.js'; +export { + buildKloRelationshipBenchmarkReport, + formatKloRelationshipBenchmarkReportMarkdown, +} from './relationship-benchmark-report.js'; +export type { + KloRelationshipBenchmarkCaseResult, + KloRelationshipBenchmarkDetectedLink, + KloRelationshipBenchmarkDetectedPk, + KloRelationshipBenchmarkDetector, + KloRelationshipBenchmarkDetectorInput, + KloRelationshipBenchmarkDetectorResult, + KloRelationshipBenchmarkExpectedLink, + KloRelationshipBenchmarkExpectedLinks, + KloRelationshipBenchmarkExpectedPk, + KloRelationshipBenchmarkFixture, + KloRelationshipBenchmarkMetrics, + KloRelationshipBenchmarkMode, + KloRelationshipBenchmarkStatus, + KloRelationshipBenchmarkSuiteResult, + KloRelationshipBenchmarkTier, +} from './relationship-benchmarks.js'; +export { + currentKloRelationshipBenchmarkDetector, + kloRelationshipBenchmarkDetectorWithLlm, + KLO_RELATIONSHIP_BENCHMARK_MODES, + KLO_RELATIONSHIP_BENCHMARK_TIERS, + loadKloRelationshipBenchmarkFixture, + loadKloRelationshipBenchmarkFixtures, + maskKloRelationshipBenchmarkSnapshot, + runKloRelationshipBenchmarkCase, + runKloRelationshipBenchmarkSuite, +} from './relationship-benchmarks.js'; +export type { + ApplyKloRelationshipValidationBudgetInput, + KloRelationshipBudgetedCandidate, + KloRelationshipValidationBudget, + KloRelationshipValidationBudgetResult, +} from './relationship-budget.js'; +export { + applyKloRelationshipValidationBudget, + defaultKloRelationshipValidationBudget, +} from './relationship-budget.js'; +export type { + KloRelationshipDiscoveryCandidate, + KloRelationshipDiscoveryCandidateEvidence, + KloRelationshipDiscoveryCandidateOptions, + KloRelationshipDiscoveryCandidateSource, + KloRelationshipDiscoveryCandidateStatus, + KloRelationshipInferredTargetPk, +} from './relationship-candidates.js'; +export { + generateKloRelationshipDiscoveryCandidates, + inferKloRelationshipTargetPks, + mergeKloRelationshipDiscoveryCandidates, +} from './relationship-candidates.js'; +export type { + DiscoverKloCompositeRelationshipsInput, + DiscoverKloCompositeRelationshipsResult, + KloCompositePrimaryKeyCandidate, + KloCompositeRelationshipCandidate, + KloCompositeRelationshipStatus, + KloCompositeRelationshipTupleEndpoint, + KloCompositeRelationshipValidationEvidence, +} from './relationship-composite-candidates.js'; +export { discoverKloCompositeRelationships } from './relationship-composite-candidates.js'; +export type { + BuildKloRelationshipArtifactsInput, + BuildKloRelationshipDiagnosticsInput, + EmptyKloRelationshipProfileArtifactInput, + KloRelationshipArtifact, + KloRelationshipArtifactEdge, + KloRelationshipArtifactEndpoint, + KloRelationshipDiagnosticsArtifact, + KloRelationshipDiagnosticsSummary, + KloRelationshipDiagnosticsThresholds, + KloRelationshipDiagnosticsValidation, +} from './relationship-diagnostics.js'; +export { + buildKloRelationshipArtifacts, + buildKloRelationshipDiagnostics, + emptyKloRelationshipProfileArtifact, +} from './relationship-diagnostics.js'; +export type { + BuildKloRelationshipFeedbackCalibrationReportInput, + CalibrateLocalRelationshipFeedbackLabelsInput, + KloRelationshipFeedbackCalibrationBucket, + KloRelationshipFeedbackCalibrationLabel, + KloRelationshipFeedbackCalibrationReport, +} from './relationship-feedback-calibration.js'; +export { + buildKloRelationshipFeedbackCalibrationReport, + calibrateLocalRelationshipFeedbackLabels, + formatKloRelationshipFeedbackCalibrationMarkdown, +} from './relationship-feedback-calibration.js'; +export type { + ExportLocalRelationshipFeedbackLabelsInput, + ExportLocalRelationshipFeedbackLabelsResult, + KloRelationshipFeedbackDecisionFilter, + KloRelationshipFeedbackExportWarning, + KloRelationshipFeedbackLabel, +} from './relationship-feedback-export.js'; +export { + exportLocalRelationshipFeedbackLabels, + formatKloRelationshipFeedbackLabelsJsonl, +} from './relationship-feedback-export.js'; +export { + collectKloFormalMetadataRelationships, + type KloFormalMetadataRelationshipCollection, +} from './relationship-formal-metadata.js'; +export type { + KloRelationshipGraphResolutionResult, + KloRelationshipGraphResolverSettings, + KloResolvedRelationshipDiscoveryCandidate, + KloResolvedRelationshipGraphEvidence, + KloResolvedRelationshipPk, + KloResolvedRelationshipPkEvidence, + KloResolvedRelationshipStatus, + ResolveKloRelationshipGraphInput, +} from './relationship-graph-resolver.js'; +export { resolveKloRelationshipGraph } from './relationship-graph-resolver.js'; +export type { + KloRelationshipLlmProposalGenerateText, + KloRelationshipLlmProposalResult, + KloRelationshipLlmProposalSettings, + ProposeKloRelationshipCandidatesWithLlmInput, +} from './relationship-llm-proposal.js'; +export { proposeKloRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js'; +export type { + KloRelationshipLocalityCandidateTable, + LocalKloRelationshipCandidateTablesInput, +} from './relationship-locality.js'; +export { localCandidateTables } from './relationship-locality.js'; +export type { + KloRelationshipNormalizedName, + KloRelationshipTokenInput, +} from './relationship-name-similarity.js'; +export { + normalizeKloRelationshipName, + pluralizeKloRelationshipToken, + singularizeKloRelationshipToken, + tokenizeKloRelationshipName, + tokenSimilarity, +} from './relationship-name-similarity.js'; +export type { + DiscoverKloRelationshipsInput, + DiscoverKloRelationshipsResult, +} from './relationship-discovery.js'; +export { discoverKloRelationships } from './relationship-discovery.js'; +export type { + KloRelationshipColumnProfile, + KloRelationshipProfileArtifact, + KloRelationshipReadOnlyExecutor, + KloRelationshipTableProfile, + ProfileKloRelationshipSchemaInput, +} from './relationship-profiling.js'; +export { + formatKloRelationshipTableRef, + profileKloRelationshipSchema, + quoteKloRelationshipIdentifier, +} from './relationship-profiling.js'; +export type { + AppliedRelationshipReviewDecision, + ApplyLocalScanRelationshipReviewDecisionsInput, + ApplyLocalScanRelationshipReviewDecisionsResult, +} from './relationship-review-apply.js'; +export { applyLocalScanRelationshipReviewDecisions } from './relationship-review-apply.js'; +export type { + KloRelationshipReviewDecisionArtifact, + KloRelationshipReviewDecisionEntry, + KloRelationshipReviewDecisionValue, + WriteLocalScanRelationshipReviewDecisionInput, + WriteLocalScanRelationshipReviewDecisionResult, +} from './relationship-review-decisions.js'; +export { writeLocalScanRelationshipReviewDecision } from './relationship-review-decisions.js'; +export type { + KloRelationshipFixtureOrigin, + KloRelationshipScoreBreakdown, + KloRelationshipScoreSignal, + KloRelationshipScoreWeights, + KloRelationshipScoringCalibrationObservation, + KloRelationshipSignalVector, +} from './relationship-scoring.js'; +export { + calibrateWeightsFromSyntheticFixtures, + defaultKloRelationshipScoreWeights, + KLO_RELATIONSHIP_SCORE_SIGNAL_KEYS, + normalizeKloRelationshipScoreWeights, + scoreKloRelationshipCandidate, +} from './relationship-scoring.js'; +export type { + AdviseLocalRelationshipFeedbackThresholdsInput, + BuildKloRelationshipThresholdAdviceReportInput, + KloRelationshipThresholdAdviceCandidate, + KloRelationshipThresholdAdviceReport, + KloRelationshipThresholdAdviceStatus, +} from './relationship-threshold-advice.js'; +export { + adviseLocalRelationshipFeedbackThresholds, + buildKloRelationshipThresholdAdviceReport, + formatKloRelationshipThresholdAdviceMarkdown, +} from './relationship-threshold-advice.js'; +export type { + KloRelationshipValidationEvidence, + KloRelationshipValidationSettings, + KloValidatedRelationshipDiscoveryCandidate, + KloValidatedRelationshipStatus, + ValidateKloRelationshipDiscoveryCandidatesInput, +} from './relationship-validation.js'; +export { validateKloRelationshipDiscoveryCandidates } from './relationship-validation.js'; +export type { SqliteLocalScanEnrichmentStateStoreOptions } from './sqlite-local-enrichment-state-store.js'; +export { SqliteLocalScanEnrichmentStateStore } from './sqlite-local-enrichment-state-store.js'; +export type { KloColumnTypeMapping } from './type-normalization.js'; +export { + inferKloDimensionType, + kloColumnTypeMappingFromNative, + normalizeKloNativeType, +} from './type-normalization.js'; +export type { + KloColumnSampleInput, + KloColumnSampleResult, + KloColumnStatsInput, + KloColumnStatsResult, + KloConnectionDriver, + KloConnectorCapabilities, + KloCredentialEnvelope, + KloCredentialEnvReference, + KloCredentialFileReference, + KloEmbeddingPort, + KloEventPropertyDiscovery, + KloEventPropertyDiscoveryInput, + KloEventPropertyValuesInput, + KloEventPropertyValuesResult, + KloEventStreamDiscoveryPort, + KloEventTypeDiscovery, + KloEventTypeDiscoveryInput, + KloNetworkEndpoint, + KloNetworkTunnelPort, + KloNetworkTunnelRequest, + KloOptionalConnectorCapabilities, + KloProgressPort, + KloProgressUpdateOptions, + KloQueryResult, + KloReadOnlyQueryInput, + KloResolvedCredentialEnvelope, + KloScanArtifactPaths, + KloScanConnector, + KloScanContext, + KloScanDiffSummary, + KloScanEnrichmentStage, + KloScanEnrichmentStateSummary, + KloScanEnrichmentSummary, + KloScanInput, + KloScanLoggerPort, + KloScanMode, + KloScanRelationshipSummary, + KloScanReport, + KloScanTrigger, + KloScanWarning, + KloScanWarningCode, + KloSchemaColumn, + KloSchemaDimensionType, + KloSchemaForeignKey, + KloSchemaScope, + KloSchemaSnapshot, + KloSchemaTable, + KloSchemaTableKind, + KloStructuralSyncStats, + KloTableRef, + KloTableSampleInput, + KloTableSampleResult, +} from './types.js'; +export { createKloConnectorCapabilities } from './types.js'; diff --git a/packages/context/src/scan/local-enrichment-artifacts.test.ts b/packages/context/src/scan/local-enrichment-artifacts.test.ts new file mode 100644 index 00000000..41a3418a --- /dev/null +++ b/packages/context/src/scan/local-enrichment-artifacts.test.ts @@ -0,0 +1,852 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import YAML from 'yaml'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import type { KloLocalScanEnrichmentResult } from './local-enrichment.js'; +import { writeLocalScanEnrichmentArtifacts, writeLocalScanManifestShards } from './local-enrichment-artifacts.js'; +import type { KloSchemaSnapshot } from './types.js'; + +const snapshot: KloSchemaSnapshot = { + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-04-29T12:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'customers', + kind: 'table', + comment: 'DB customer table', + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'DB customer id', + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: 'DB orders table', + estimatedRows: 3, + foreignKeys: [ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: 'public', + toTable: 'customers', + toColumn: 'id', + constraintName: 'orders_customer_id_fkey', + }, + ], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'DB order id', + }, + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: 'DB customer id', + }, + ], + }, + ], +}; + +function enrichment(): KloLocalScanEnrichmentResult { + return { + snapshot, + summary: { + dataDictionary: 'completed', + tableDescriptions: 'completed', + columnDescriptions: 'completed', + embeddings: 'completed', + deterministicRelationships: 'completed', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', + }, + relationships: { accepted: 1, review: 0, rejected: 0, skipped: 0 }, + state: { + resumedStages: [], + completedStages: ['descriptions', 'embeddings', 'relationships'], + failedStages: [], + }, + warnings: [], + descriptionUpdates: [ + { + table: { catalog: null, db: 'public', name: 'orders' }, + tableDescription: 'AI orders table', + columnDescriptions: { + id: 'AI order id', + customer_id: 'AI customer reference', + }, + }, + { + table: { catalog: null, db: 'public', name: 'customers' }, + tableDescription: 'AI customers table', + columnDescriptions: { + id: 'AI customer id', + }, + }, + ], + embeddingUpdates: [ + { columnId: 'public.orders.id', text: 'orders id', embedding: [0.1, 0.2] }, + { columnId: 'public.orders.customer_id', text: 'orders customer_id', embedding: [0.3, 0.4] }, + ], + relationshipUpdate: { + connectionId: 'warehouse', + accepted: [ + { + id: 'public.orders:public.orders.customer_id->public.customers:public.customers.id', + source: 'inferred', + from: { + tableId: 'public.orders', + columnIds: ['public.orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'public.customers', + columnIds: ['public.customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.95, + isPrimaryKeyReference: true, + }, + ], + rejected: [], + skipped: [], + }, + relationshipProfile: { + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 6, + tables: [{ table: { catalog: null, db: 'public', name: 'customers' }, rowCount: 2 }], + columns: { + 'customers.id': { + table: { catalog: null, db: 'public', name: 'customers' }, + column: 'id', + nativeType: 'integer', + normalizedType: 'integer', + rowCount: 2, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2'], + minTextLength: 1, + maxTextLength: 1, + }, + }, + warnings: [], + }, + resolvedRelationships: [ + { + id: 'public.orders:public.orders.customer_id->public.customers:public.customers.id', + source: 'llm_proposal', + status: 'accepted', + from: { + tableId: 'public.orders', + columnIds: ['public.orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'public.customers', + columnIds: ['public.customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.92, + pkScore: 0.95, + fkScore: 0.91, + score: 0.9, + evidence: { + sourceColumnBase: 'buyer', + targetTableBase: 'customer', + targetColumnBase: 'id', + targetKeyScore: 0.88, + nameScore: 0.45, + reasons: ['llm_proposal', 'llm_pk_proposal'], + llmConfidence: 0.89, + llmRationale: 'Buyer reference values align with customer identifiers.', + }, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + sourceNullRate: 0, + targetNullRate: 0, + childDistinct: 2, + parentDistinct: 2, + overlap: 2, + checkedValues: 2, + reasons: ['validation_passed'], + }, + graph: { + targetPkScore: 0.95, + incomingCandidateCount: 1, + conflictRank: 1, + reasons: ['target_pk_score_passed', 'validation_passed', 'fk_score_passed'], + }, + }, + ], + compositeRelationships: null, + }; +} + +describe('writeLocalScanEnrichmentArtifacts', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-enrichment-artifacts-')); + project = await initKloProject({ + projectDir: join(tempDir, 'project'), + projectName: 'warehouse', + }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('writes enrichment artifacts and manifest shards while preserving external descriptions', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/public.yaml', + YAML.stringify( + { + tables: { + orders: { + table: 'public.orders', + descriptions: { user: 'Pinned analyst description', ai: 'Old AI description' }, + columns: [ + { + name: 'id', + type: 'number', + descriptions: { user: 'Pinned id description', ai: 'Old AI id' }, + }, + { name: 'customer_id', type: 'number' }, + ], + joins: [ + { + to: 'customers', + on: 'orders.id = customers.id', + relationship: 'many_to_one', + source: 'manual', + }, + ], + }, + }, + }, + { indent: 2, lineWidth: 0 }, + ), + 'klo', + 'klo@example.com', + 'Seed manifest shard', + ); + + const result = await writeLocalScanEnrichmentArtifacts({ + project, + connectionId: 'warehouse', + syncId: 'sync-1', + driver: 'postgres', + enrichment: enrichment(), + dryRun: false, + relationshipSettings: { + enabled: true, + llmProposals: false, + validationRequiredForManifest: true, + acceptThreshold: 0.91, + reviewThreshold: 0.61, + maxLlmTablesPerBatch: 12, + maxCandidatesPerColumn: 7, + profileSampleRows: 500, + validationConcurrency: 2, + }, + }); + + expect(result).toEqual({ + enrichmentArtifacts: [ + 'raw-sources/warehouse/live-database/sync-1/enrichment/descriptions.json', + 'raw-sources/warehouse/live-database/sync-1/enrichment/embeddings.json', + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationships.json', + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json', + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-diagnostics.json', + ], + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + manifestShardsWritten: 1, + }); + + await expect( + readFile( + join(project.projectDir, 'raw-sources/warehouse/live-database/sync-1/enrichment/descriptions.json'), + 'utf-8', + ), + ).resolves.toContain('AI orders table'); + + const relationshipsRaw = await readFile( + join(project.projectDir, 'raw-sources/warehouse/live-database/sync-1/enrichment/relationships.json'), + 'utf-8', + ); + const relationshipsArtifact = JSON.parse(relationshipsRaw) as { + accepted: Array<{ + id: string; + status: string; + source: string; + pkScore: number; + fkScore: number; + evidence: unknown; + reasons: string[]; + validation: unknown; + graph: unknown; + }>; + review: unknown[]; + rejected: unknown[]; + skipped: unknown[]; + }; + expect(relationshipsArtifact.accepted).toHaveLength(1); + expect(relationshipsArtifact.accepted[0]).toMatchObject({ + id: 'public.orders:public.orders.customer_id->public.customers:public.customers.id', + status: 'accepted', + source: 'llm_proposal', + pkScore: 0.95, + fkScore: 0.91, + evidence: expect.objectContaining({ + llmConfidence: 0.89, + llmRationale: 'Buyer reference values align with customer identifiers.', + }), + reasons: expect.arrayContaining(['llm_proposal', 'llm_pk_proposal']), + validation: expect.objectContaining({ reasons: ['validation_passed'] }), + graph: expect.objectContaining({ reasons: ['target_pk_score_passed', 'validation_passed', 'fk_score_passed'] }), + }); + expect(relationshipsArtifact.review).toEqual([]); + expect(relationshipsArtifact.rejected).toEqual([]); + expect(relationshipsArtifact.skipped).toEqual([]); + + const profileRaw = await readFile( + join(project.projectDir, 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json'), + 'utf-8', + ); + expect(JSON.parse(profileRaw)).toMatchObject({ + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 6, + warnings: [], + }); + + const diagnosticsRaw = await readFile( + join(project.projectDir, 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-diagnostics.json'), + 'utf-8', + ); + expect(JSON.parse(diagnosticsRaw)).toMatchObject({ + connectionId: 'warehouse', + summary: { accepted: 1, review: 0, rejected: 0, skipped: 0 }, + noAcceptedReason: null, + candidateCountsBySource: { llm_proposal: 1 }, + validation: { available: true, sqlAvailable: true, queryCount: 6 }, + thresholds: { acceptThreshold: 0.91, reviewThreshold: 0.61 }, + policy: { + validationRequiredForManifest: true, + maxCandidatesPerColumn: 7, + profileSampleRows: 500, + validationConcurrency: 2, + }, + profileWarnings: [], + }); + + const manifestRaw = await readFile( + join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), + 'utf-8', + ); + const manifest = YAML.parse(manifestRaw) as { + tables: { + orders: { + descriptions: Record; + columns: Array<{ name: string; descriptions?: Record }>; + joins: Array<{ to: string; on: string; source: string }>; + }; + }; + }; + + expect(manifest.tables.orders.descriptions).toEqual({ + user: 'Pinned analyst description', + db: 'DB orders table', + ai: 'AI orders table', + }); + expect(manifest.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({ + user: 'Pinned id description', + db: 'DB order id', + ai: 'AI order id', + }); + expect(manifest.tables.orders.joins).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + to: 'customers', + on: 'orders.customer_id = customers.id', + source: 'formal', + }), + expect.objectContaining({ + to: 'customers', + on: 'orders.id = customers.id', + source: 'manual', + }), + ]), + ); + }); + + it('writes formal accepted relationships into relationship artifacts and manifest shards', async () => { + const source = enrichment(); + const formalEnrichment: KloLocalScanEnrichmentResult = { + ...source, + relationshipUpdate: { + connectionId: 'warehouse', + accepted: [ + { + id: 'public.orders:public.orders.customer_id->public.customers:public.customers.id', + source: 'formal', + from: { + tableId: 'public.orders', + columnIds: ['public.orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'public.customers', + columnIds: ['public.customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 1, + isPrimaryKeyReference: true, + }, + ], + rejected: [], + skipped: [], + }, + resolvedRelationships: [], + compositeRelationships: null, + }; + + const result = await writeLocalScanEnrichmentArtifacts({ + project, + connectionId: 'warehouse', + driver: 'sqlite', + syncId: 'sync-formal', + enrichment: formalEnrichment, + relationshipSettings: { + enabled: true, + llmProposals: false, + validationRequiredForManifest: true, + acceptThreshold: 0.85, + reviewThreshold: 0.55, + maxLlmTablesPerBatch: 40, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + dryRun: false, + }); + + const relationshipsPath = 'raw-sources/warehouse/live-database/sync-formal/enrichment/relationships.json'; + const relationships = JSON.parse((await project.fileStore.readFile(relationshipsPath)).content) as { + accepted: Array<{ source: string; reasons: string[] }>; + }; + expect(relationships.accepted).toEqual([ + expect.objectContaining({ + source: 'formal', + reasons: ['formal_metadata_accepted'], + }), + ]); + + const manifestPath = result.manifestShards[0]; + if (!manifestPath) { + throw new Error('Expected manifest shard path'); + } + const manifest = YAML.parse((await project.fileStore.readFile(manifestPath)).content) as { + tables: { orders: { joins: Array<{ to: string; on: string; source: string }> } }; + }; + expect(manifest.tables.orders.joins).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + to: 'customers', + on: 'orders.customer_id = customers.id', + source: 'formal', + }), + ]), + ); + }); + + it('writes manually applied relationship joins with manual source', async () => { + const result = await writeLocalScanManifestShards({ + project, + connectionId: 'warehouse', + syncId: 'sync-manual', + driver: 'postgres', + snapshot, + dryRun: false, + relationshipUpdate: { + connectionId: 'warehouse', + accepted: [ + { + id: 'public.orders:(public.orders.customer_id)->public.customers:(public.customers.id)', + source: 'manual', + from: { + tableId: 'public.orders', + columnIds: ['public.orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'public.customers', + columnIds: ['public.customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 1, + isPrimaryKeyReference: true, + }, + ], + rejected: [], + skipped: [], + }, + }); + + expect(result.manifestShardsWritten).toBe(1); + const shard = YAML.parse(await readFile(join(tempDir, 'project/semantic-layer/warehouse/_schema/public.yaml'), 'utf8')); + expect(shard.tables.orders.joins).toContainEqual({ + to: 'customers', + on: 'orders.customer_id = customers.id', + relationship: 'many_to_one', + source: 'manual', + }); + }); + + it('writes accepted composite relationships to relationship artifacts and manifest shards', async () => { + const compositeSnapshot: KloSchemaSnapshot = { + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-05-07T12:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'order_lines', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'order_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'line_number', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'order_line_allocations', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'order_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'line_number', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; + const compositeEnrichment: KloLocalScanEnrichmentResult = Object.assign(enrichment(), { + snapshot: compositeSnapshot, + relationships: { accepted: 1, review: 0, rejected: 0, skipped: 0 }, + descriptionUpdates: [], + embeddingUpdates: [], + relationshipUpdate: { + connectionId: 'warehouse', + accepted: [ + { + id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + source: 'inferred', + from: { + tableId: 'public.order_line_allocations', + columnIds: ['public.order_line_allocations.order_id', 'public.order_line_allocations.line_number'], + table: { catalog: null, db: 'public', name: 'order_line_allocations' }, + columns: ['order_id', 'line_number'], + }, + to: { + tableId: 'public.order_lines', + columnIds: ['public.order_lines.order_id', 'public.order_lines.line_number'], + table: { catalog: null, db: 'public', name: 'order_lines' }, + columns: ['order_id', 'line_number'], + }, + relationshipType: 'many_to_one', + confidence: 0.95, + isPrimaryKeyReference: true, + }, + ], + rejected: [], + skipped: [], + }, + resolvedRelationships: [], + compositeRelationships: [ + { + id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + source: 'composite_profile_match', + status: 'accepted', + from: { + tableId: 'public.order_line_allocations', + columnIds: ['public.order_line_allocations.order_id', 'public.order_line_allocations.line_number'], + table: { catalog: null, db: 'public', name: 'order_line_allocations' }, + columns: ['order_id', 'line_number'], + }, + to: { + tableId: 'public.order_lines', + columnIds: ['public.order_lines.order_id', 'public.order_lines.line_number'], + table: { catalog: null, db: 'public', name: 'order_lines' }, + columns: ['order_id', 'line_number'], + }, + relationshipType: 'many_to_one', + confidence: 0.95, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + childDistinct: 2, + parentDistinct: 2, + overlap: 2, + reasons: ['composite_validation_passed'], + }, + }, + ], + }); + + const result = await writeLocalScanEnrichmentArtifacts({ + project, + connectionId: 'warehouse', + driver: 'postgres', + syncId: 'sync-composite', + enrichment: compositeEnrichment, + relationshipSettings: { + enabled: true, + llmProposals: false, + validationRequiredForManifest: true, + acceptThreshold: 0.85, + reviewThreshold: 0.55, + maxLlmTablesPerBatch: 40, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + dryRun: false, + }); + + const relationships = JSON.parse( + (await project.fileStore.readFile('raw-sources/warehouse/live-database/sync-composite/enrichment/relationships.json')) + .content, + ) as { accepted: Array<{ from: { columns: string[] }; to: { columns: string[] }; reasons: string[] }> }; + expect(relationships.accepted[0]).toMatchObject({ + from: { columns: ['order_id', 'line_number'] }, + to: { columns: ['order_id', 'line_number'] }, + reasons: ['composite_validation_passed'], + }); + + const manifestPath = result.manifestShards[0]; + if (!manifestPath) { + throw new Error('Expected manifest shard path'); + } + const manifest = YAML.parse((await project.fileStore.readFile(manifestPath)).content) as { + tables: { order_line_allocations: { joins: Array<{ to: string; on: string; source: string }> } }; + }; + expect(manifest.tables.order_line_allocations.joins).toEqual([ + { + to: 'order_lines', + on: 'order_line_allocations.order_id = order_lines.order_id AND order_line_allocations.line_number = order_lines.line_number', + relationship: 'many_to_one', + source: 'inferred', + }, + ]); + }); + + it('writes structural manifest shards without enrichment artifacts', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/public.yaml', + YAML.stringify( + { + tables: { + orders: { + table: 'public.orders', + descriptions: { user: 'Pinned structural description', ai: 'Old generated text' }, + columns: [ + { + name: 'id', + type: 'number', + descriptions: { user: 'Pinned structural id', ai: 'Old generated id' }, + }, + { name: 'customer_id', type: 'number' }, + ], + joins: [ + { + to: 'customers', + on: 'orders.id = customers.id', + relationship: 'many_to_one', + source: 'manual', + }, + ], + }, + }, + }, + { indent: 2, lineWidth: 0 }, + ), + 'klo', + 'klo@example.com', + 'Seed structural manifest shard', + ); + + const result = await writeLocalScanManifestShards({ + project, + connectionId: 'warehouse', + syncId: 'sync-structural-1', + driver: 'postgres', + snapshot, + dryRun: false, + }); + + expect(result).toEqual({ + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + manifestShardsWritten: 1, + }); + + await expect( + readFile( + join(project.projectDir, 'raw-sources/warehouse/live-database/sync-structural-1/enrichment/descriptions.json'), + 'utf-8', + ), + ).rejects.toMatchObject({ code: 'ENOENT' }); + + const manifestRaw = await readFile( + join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), + 'utf-8', + ); + const manifest = YAML.parse(manifestRaw) as { + tables: { + orders: { + descriptions: Record; + columns: Array<{ name: string; descriptions?: Record }>; + joins: Array<{ to: string; on: string; source: string }>; + }; + }; + }; + + expect(manifest.tables.orders.descriptions).toEqual({ + user: 'Pinned structural description', + db: 'DB orders table', + }); + expect(manifest.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({ + user: 'Pinned structural id', + db: 'DB order id', + }); + expect(manifest.tables.orders.joins).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + to: 'customers', + on: 'orders.customer_id = customers.id', + source: 'formal', + }), + expect.objectContaining({ + to: 'customers', + on: 'orders.id = customers.id', + source: 'manual', + }), + ]), + ); + }); + + it('returns planned empty paths without writing files during dry runs', async () => { + const result = await writeLocalScanEnrichmentArtifacts({ + project, + connectionId: 'warehouse', + syncId: 'sync-dry-run', + driver: 'postgres', + enrichment: enrichment(), + dryRun: true, + }); + + expect(result).toEqual({ + enrichmentArtifacts: [], + manifestShards: [], + manifestShardsWritten: 0, + }); + await expect( + readFile( + join(project.projectDir, 'raw-sources/warehouse/live-database/sync-dry-run/enrichment/descriptions.json'), + 'utf-8', + ), + ).rejects.toMatchObject({ code: 'ENOENT' }); + }); +}); diff --git a/packages/context/src/scan/local-enrichment-artifacts.ts b/packages/context/src/scan/local-enrichment-artifacts.ts new file mode 100644 index 00000000..0cd06220 --- /dev/null +++ b/packages/context/src/scan/local-enrichment-artifacts.ts @@ -0,0 +1,417 @@ +import YAML from 'yaml'; +import { + buildLiveDatabaseManifestShards, + type LiveDatabaseManifestExistingDescriptions, + type LiveDatabaseManifestJoinData, + type LiveDatabaseManifestJoinEntry, + type LiveDatabaseManifestShard, + type LiveDatabaseManifestTableData, +} from '../ingest/index.js'; +import type { KloScanRelationshipConfig } from '../project/config.js'; +import type { KloLocalProject } from '../project/index.js'; +import type { KloLocalScanEnrichmentResult } from './local-enrichment.js'; +import { + buildKloRelationshipArtifacts, + buildKloRelationshipDiagnostics, + emptyKloRelationshipProfileArtifact, +} from './relationship-diagnostics.js'; +import type { KloConnectionDriver, KloSchemaColumn, KloSchemaSnapshot, KloSchemaTable } from './types.js'; + +const LIVE_DATABASE_ADAPTER = 'live-database'; +const LOCAL_AUTHOR = 'klo'; +const LOCAL_AUTHOR_EMAIL = 'klo@example.com'; +const SCHEMA_DIR = '_schema'; +const SL_DIR_PREFIX = 'semantic-layer'; + +export interface WriteLocalScanManifestShardsInput { + project: KloLocalProject; + connectionId: string; + syncId: string; + driver: KloConnectionDriver; + snapshot: KloSchemaSnapshot; + dryRun: boolean; + descriptionUpdates?: KloLocalScanEnrichmentResult['descriptionUpdates']; + relationshipUpdate?: KloLocalScanEnrichmentResult['relationshipUpdate']; +} + +export interface WriteLocalScanManifestShardsResult { + manifestShards: string[]; + manifestShardsWritten: number; +} + +export interface WriteLocalScanEnrichmentArtifactsInput { + project: KloLocalProject; + connectionId: string; + syncId: string; + driver: KloConnectionDriver; + enrichment: KloLocalScanEnrichmentResult; + dryRun: boolean; + relationshipSettings?: KloScanRelationshipConfig; +} + +export interface WriteLocalScanEnrichmentArtifactsResult extends WriteLocalScanManifestShardsResult { + enrichmentArtifacts: string[]; +} + +interface ExistingManifestState { + descriptions: Map; + preservedJoins: Map; +} + +type LocalDescriptionUpdates = KloLocalScanEnrichmentResult['descriptionUpdates']; + +function artifactDir(connectionId: string, syncId: string): string { + return `raw-sources/${connectionId}/${LIVE_DATABASE_ADAPTER}/${syncId}/enrichment`; +} + +function schemaDir(connectionId: string): string { + return `${SL_DIR_PREFIX}/${connectionId}/${SCHEMA_DIR}`; +} + +function tableDescription( + table: KloSchemaTable, + descriptionUpdates: LocalDescriptionUpdates = [], +): Record | undefined { + const update = descriptionUpdates.find((candidate) => candidate.table.name === table.name); + const descriptions: Record = {}; + if (table.comment) { + descriptions.db = table.comment; + } + if (update?.tableDescription) { + descriptions.ai = update.tableDescription; + } + return Object.keys(descriptions).length > 0 ? descriptions : undefined; +} + +function columnDescription( + table: KloSchemaTable, + column: KloSchemaColumn, + descriptionUpdates: LocalDescriptionUpdates = [], +): Record | undefined { + const update = descriptionUpdates.find((candidate) => candidate.table.name === table.name); + const aiDescription = update?.columnDescriptions[column.name] ?? null; + const descriptions: Record = {}; + if (column.comment) { + descriptions.db = column.comment; + } + if (aiDescription) { + descriptions.ai = aiDescription; + } + return Object.keys(descriptions).length > 0 ? descriptions : undefined; +} + +function snapshotTablesToManifestData( + snapshot: KloSchemaSnapshot, + descriptionUpdates: LocalDescriptionUpdates = [], +): LiveDatabaseManifestTableData[] { + return snapshot.tables.map((table) => ({ + name: table.name, + catalog: table.catalog, + db: table.db, + descriptions: tableDescription(table, descriptionUpdates), + columns: table.columns.map((column) => ({ + name: column.name, + type: column.dimensionType, + ...(column.primaryKey ? { pk: true } : {}), + ...(column.nullable === false ? { nullable: false } : {}), + descriptions: columnDescription(table, column, descriptionUpdates), + })), + })); +} + +function formalJoins(snapshot: KloSchemaSnapshot): LiveDatabaseManifestJoinData[] { + const joins: LiveDatabaseManifestJoinData[] = []; + for (const table of snapshot.tables) { + for (const foreignKey of table.foreignKeys) { + joins.push({ + fromTable: table.name, + fromColumns: [foreignKey.fromColumn], + toTable: foreignKey.toTable, + toColumns: [foreignKey.toColumn], + relationship: 'many_to_one', + source: 'formal', + }); + } + } + return joins; +} + +function acceptedRelationshipJoins( + relationshipUpdate: KloLocalScanEnrichmentResult['relationshipUpdate'] | undefined, +): LiveDatabaseManifestJoinData[] { + return (relationshipUpdate?.accepted ?? []).map((relationship) => ({ + fromTable: relationship.from.table.name, + fromColumns: relationship.from.columns, + toTable: relationship.to.table.name, + toColumns: relationship.to.columns, + relationship: relationship.relationshipType, + source: relationship.source, + })); +} + +function relationshipJoins( + snapshot: KloSchemaSnapshot, + relationshipUpdate: KloLocalScanEnrichmentResult['relationshipUpdate'] | undefined, +): LiveDatabaseManifestJoinData[] { + const accepted = acceptedRelationshipJoins(relationshipUpdate); + const manual = accepted.filter((relationship) => relationship.source === 'manual'); + const generated = accepted.filter((relationship) => relationship.source !== 'manual'); + return [...manual, ...formalJoins(snapshot), ...generated]; +} + +function validColumns(snapshot: KloSchemaSnapshot): Map> { + return new Map(snapshot.tables.map((table) => [table.name, new Set(table.columns.map((column) => column.name))])); +} + +function joinReferencesExistingColumns( + join: LiveDatabaseManifestJoinEntry, + columnsByTable: Map>, +): boolean { + const terms = join.on.split(/\s+AND\s+/iu); + for (const term of terms) { + const match = term.match(/^(\w+)\.(\w+)\s*=\s*(\w+)\.(\w+)$/u); + if (!match) { + return true; + } + const leftTable = match[1]; + const leftColumn = match[2]; + const rightTable = match[3]; + const rightColumn = match[4]; + if (!leftTable || !leftColumn || !rightTable || !rightColumn) { + return true; + } + const leftColumns = columnsByTable.get(leftTable); + const rightColumns = columnsByTable.get(rightTable); + if ((leftColumns && !leftColumns.has(leftColumn)) || (rightColumns && !rightColumns.has(rightColumn))) { + return false; + } + } + return true; +} + +async function loadExistingManifestState( + project: KloLocalProject, + connectionId: string, + snapshot: KloSchemaSnapshot, +): Promise { + const descriptions = new Map(); + const preservedJoins = new Map(); + const validTableNames = new Set(snapshot.tables.map((table) => table.name)); + const columnsByTable = validColumns(snapshot); + + let files: string[]; + try { + files = (await project.fileStore.listFiles(schemaDir(connectionId))).files.filter((file) => file.endsWith('.yaml')); + } catch { + return { descriptions, preservedJoins }; + } + + for (const file of files) { + try { + const { content } = await project.fileStore.readFile(file); + const shard = YAML.parse(content) as LiveDatabaseManifestShard | null; + if (!shard?.tables) { + continue; + } + for (const [tableName, entry] of Object.entries(shard.tables)) { + if (!validTableNames.has(tableName)) { + continue; + } + descriptions.set(tableName, { + table: entry.descriptions ? { ...entry.descriptions } : undefined, + columns: new Map( + (entry.columns ?? []).flatMap((column) => + column.descriptions ? ([[column.name, { ...column.descriptions }]] as const) : [], + ), + ), + }); + const joins = (entry.joins ?? []).filter((join) => { + return ( + (join.source === 'manual' || join.source === 'inferred') && + validTableNames.has(join.to) && + joinReferencesExistingColumns(join, columnsByTable) + ); + }); + if (joins.length > 0) { + preservedJoins.set(tableName, joins); + } + } + } catch { + continue; + } + } + + return { descriptions, preservedJoins }; +} + +async function writeJsonArtifact( + project: KloLocalProject, + path: string, + value: unknown, + commitMessage: string, +): Promise { + await project.fileStore.writeFile( + path, + `${JSON.stringify(value, null, 2)}\n`, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + commitMessage, + ); +} + +export async function writeLocalScanManifestShards( + input: WriteLocalScanManifestShardsInput, +): Promise { + if (input.dryRun) { + return { + manifestShards: [], + manifestShardsWritten: 0, + }; + } + + const existing = await loadExistingManifestState(input.project, input.connectionId, input.snapshot); + const { shards } = buildLiveDatabaseManifestShards({ + connectionType: input.driver.toUpperCase(), + tables: snapshotTablesToManifestData(input.snapshot, input.descriptionUpdates), + joins: relationshipJoins(input.snapshot, input.relationshipUpdate), + existingDescriptions: existing.descriptions, + existingPreservedJoins: existing.preservedJoins, + mapColumnType: (dimensionType) => dimensionType, + }); + + const manifestShards: string[] = []; + for (const [shardKey, shard] of [...shards.entries()].sort(([left], [right]) => left.localeCompare(right))) { + const path = `${schemaDir(input.connectionId)}/${shardKey}.yaml`; + await input.project.fileStore.writeFile( + path, + YAML.stringify(shard, { indent: 2, lineWidth: 0 }), + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `scan(${LIVE_DATABASE_ADAPTER}): write manifest shard ${shardKey} syncId=${input.syncId}`, + ); + manifestShards.push(path); + } + + return { + manifestShards, + manifestShardsWritten: manifestShards.length, + }; +} + +export async function writeLocalScanEnrichmentArtifacts( + input: WriteLocalScanEnrichmentArtifactsInput, +): Promise { + if (input.dryRun) { + return { + enrichmentArtifacts: [], + manifestShards: [], + manifestShardsWritten: 0, + }; + } + + const enrichmentRoot = artifactDir(input.connectionId, input.syncId); + const descriptionsArtifact = `${enrichmentRoot}/descriptions.json`; + const embeddingsArtifact = `${enrichmentRoot}/embeddings.json`; + const relationshipsArtifact = `${enrichmentRoot}/relationships.json`; + const relationshipProfileArtifact = `${enrichmentRoot}/relationship-profile.json`; + const relationshipDiagnosticsArtifact = `${enrichmentRoot}/relationship-diagnostics.json`; + const enrichmentArtifacts: string[] = []; + + if ( + input.enrichment.summary.tableDescriptions === 'completed' || + input.enrichment.summary.columnDescriptions === 'completed' + ) { + enrichmentArtifacts.push(descriptionsArtifact); + await writeJsonArtifact( + input.project, + descriptionsArtifact, + input.enrichment.descriptionUpdates, + `scan(${LIVE_DATABASE_ADAPTER}): write enrichment descriptions syncId=${input.syncId}`, + ); + } + if (input.enrichment.summary.embeddings === 'completed') { + enrichmentArtifacts.push(embeddingsArtifact); + await writeJsonArtifact( + input.project, + embeddingsArtifact, + input.enrichment.embeddingUpdates, + `scan(${LIVE_DATABASE_ADAPTER}): write enrichment embeddings syncId=${input.syncId}`, + ); + } + enrichmentArtifacts.push(relationshipsArtifact, relationshipProfileArtifact, relationshipDiagnosticsArtifact); + const hasResolvedRelationships = input.enrichment.resolvedRelationships !== null; + const relationshipArtifacts = buildKloRelationshipArtifacts({ + connectionId: input.connectionId, + resolvedRelationships: hasResolvedRelationships ? (input.enrichment.resolvedRelationships ?? []) : undefined, + compositeRelationships: input.enrichment.compositeRelationships ?? undefined, + relationshipUpdate: input.enrichment.relationshipUpdate ?? { + connectionId: input.connectionId, + accepted: [], + rejected: [], + skipped: [], + }, + }); + const relationshipProfile = + input.enrichment.relationshipProfile ?? + emptyKloRelationshipProfileArtifact({ + connectionId: input.connectionId, + driver: input.driver, + reason: 'relationship_profiling_not_run', + }); + const relationshipDiagnostics = buildKloRelationshipDiagnostics({ + connectionId: input.connectionId, + artifacts: relationshipArtifacts, + profile: relationshipProfile, + warnings: input.enrichment.warnings, + thresholds: input.relationshipSettings + ? { + acceptThreshold: input.relationshipSettings.acceptThreshold, + reviewThreshold: input.relationshipSettings.reviewThreshold, + } + : undefined, + policy: input.relationshipSettings + ? { + validationRequiredForManifest: input.relationshipSettings.validationRequiredForManifest, + maxCandidatesPerColumn: input.relationshipSettings.maxCandidatesPerColumn, + profileSampleRows: input.relationshipSettings.profileSampleRows, + validationConcurrency: input.relationshipSettings.validationConcurrency, + } + : undefined, + }); + + await writeJsonArtifact( + input.project, + relationshipsArtifact, + relationshipArtifacts, + `scan(${LIVE_DATABASE_ADAPTER}): write enrichment relationships syncId=${input.syncId}`, + ); + await writeJsonArtifact( + input.project, + relationshipProfileArtifact, + relationshipProfile, + `scan(${LIVE_DATABASE_ADAPTER}): write relationship profile syncId=${input.syncId}`, + ); + await writeJsonArtifact( + input.project, + relationshipDiagnosticsArtifact, + relationshipDiagnostics, + `scan(${LIVE_DATABASE_ADAPTER}): write relationship diagnostics syncId=${input.syncId}`, + ); + + const manifestResult = await writeLocalScanManifestShards({ + project: input.project, + connectionId: input.connectionId, + syncId: input.syncId, + driver: input.driver, + snapshot: input.enrichment.snapshot, + descriptionUpdates: input.enrichment.descriptionUpdates, + relationshipUpdate: input.enrichment.relationshipUpdate, + dryRun: false, + }); + + return { + enrichmentArtifacts, + manifestShards: manifestResult.manifestShards, + manifestShardsWritten: manifestResult.manifestShardsWritten, + }; +} diff --git a/packages/context/src/scan/local-enrichment.test.ts b/packages/context/src/scan/local-enrichment.test.ts new file mode 100644 index 00000000..8ac0b59c --- /dev/null +++ b/packages/context/src/scan/local-enrichment.test.ts @@ -0,0 +1,742 @@ +import Database from 'better-sqlite3'; +import { describe, expect, it, vi } from 'vitest'; +import { buildDefaultKloProjectConfig } from '../project/config.js'; +import type { + KloScanEnrichmentCompletedStage, + KloScanEnrichmentFailedStage, + KloScanEnrichmentStageLookup, + KloScanEnrichmentStateStore, +} from './enrichment-state.js'; +import { + createDeterministicLocalScanEnrichmentProviders, + runLocalScanEnrichment, + snapshotToKloEnrichedSchema, +} from './local-enrichment.js'; +import { createLocalScanEnrichmentProvidersFromConfig } from './local-scan.js'; +import { + createKloConnectorCapabilities, + type KloQueryResult, + type KloReadOnlyQueryInput, + type KloScanConnector, + type KloScanContext, + type KloSchemaSnapshot, +} from './types.js'; + +const snapshot: KloSchemaSnapshot = { + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-04-29T12:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'customers', + kind: 'table', + comment: 'Customer accounts', + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Customer id', + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: 'Customer orders', + estimatedRows: 3, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: 'Customer id', + }, + ], + }, + ], +}; + +function connector(): KloScanConnector { + return { + id: 'test:warehouse', + driver: 'postgres', + capabilities: createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + readOnlySql: true, + columnStats: true, + }), + introspect: vi.fn(async () => snapshot), + sampleTable: vi.fn(async () => ({ + headers: ['id', 'customer_id'], + rows: [[1, 10]], + totalRows: 1, + })), + sampleColumn: vi.fn(async () => ({ + values: ['10', '11'], + nullCount: 0, + distinctCount: 2, + })), + }; +} + +class InMemorySqliteExecutor { + readonly db = new Database(':memory:'); + + executeReadOnly(input: KloReadOnlyQueryInput, _ctx: KloScanContext): Promise { + const rows = this.db.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return Promise.resolve({ + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }); + } + + close(): void { + this.db.close(); + } +} + +function noDeclaredRelationshipSnapshot(): KloSchemaSnapshot { + return { + connectionId: 'warehouse', + driver: 'sqlite', + extractedAt: '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: null, + name: 'accounts', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: null, + name: 'orders', + kind: 'table', + comment: null, + estimatedRows: 3, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'account_id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; +} + +function memoryEnrichmentStateStore(): KloScanEnrichmentStateStore { + const records = new Map(); + const key = (input: Pick) => `${input.runId}:${input.stage}`; + return { + async findCompletedStage(input: KloScanEnrichmentStageLookup) { + const record = records.get(key(input)); + if (!record || record.status !== 'completed' || record.inputHash !== input.inputHash) { + return null; + } + return record as KloScanEnrichmentCompletedStage; + }, + async saveCompletedStage(input) { + records.set(key(input), { + ...input, + status: 'completed', + errorMessage: null, + }); + }, + async saveFailedStage(input) { + records.set(key(input), { + ...input, + status: 'failed', + output: null, + }); + }, + async listRunStages(runId) { + return [...records.values()].filter((record) => record.runId === runId); + }, + }; +} + +describe('local scan enrichment', () => { + it('maps a scan snapshot into relationship detector schema', () => { + const schema = snapshotToKloEnrichedSchema(snapshot); + + expect(schema.connectionId).toBe('warehouse'); + expect(schema.tables).toHaveLength(2); + expect(schema.tables[1]?.columns.map((column) => column.name)).toEqual(['id', 'customer_id']); + expect(schema.tables[1]?.columns[1]).toMatchObject({ + id: 'public.orders.customer_id', + tableId: 'public.orders', + primaryKey: false, + sampleValues: null, + embedding: null, + }); + }); + + it('maps snapshot foreign keys into formal schema relationships', () => { + const source = noDeclaredRelationshipSnapshot(); + const snapshotWithForeignKey = { + ...source, + tables: source.tables.map((table) => + table.name === 'orders' + ? { + ...table, + foreignKeys: [ + { + fromColumn: 'account_id', + toCatalog: null, + toDb: null, + toTable: 'accounts', + toColumn: 'id', + constraintName: 'orders_account_id_fkey', + }, + ], + } + : table.name === 'accounts' + ? { + ...table, + columns: table.columns.map((column) => + column.name === 'id' ? { ...column, primaryKey: true } : column, + ), + } + : table, + ), + }; + + const schema = snapshotToKloEnrichedSchema(snapshotWithForeignKey); + + expect(schema.relationships).toEqual([ + { + id: 'orders:(orders.account_id)->accounts:(accounts.id)', + source: 'formal', + from: { + tableId: 'orders', + columnIds: ['orders.account_id'], + table: { catalog: null, db: null, name: 'orders' }, + columns: ['account_id'], + }, + to: { + tableId: 'accounts', + columnIds: ['accounts.id'], + table: { catalog: null, db: null, name: 'accounts' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 1, + isPrimaryKeyReference: true, + }, + ]); + }); + + it('runs deterministic relationship detection for relationship scans', async () => { + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector: connector(), + context: { runId: 'scan-run-1' }, + providers: null, + }); + + expect(result.summary).toMatchObject({ + deterministicRelationships: 'completed', + llmRelationshipValidation: 'skipped', + embeddings: 'skipped', + }); + expect(result.relationships).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 }); + expect(result.summary.statisticalValidation).toBe('skipped'); + expect(result.warnings).toContainEqual({ + code: 'relationship_validation_failed', + message: 'KLO scan connector advertises readOnlySql but does not expose executeReadOnly', + recoverable: true, + metadata: { capability: 'readOnlySql' }, + }); + }); + + it('runs relationship discovery with connector SQL evidence', async () => { + const executor = new InMemorySqliteExecutor(); + try { + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL); + CREATE TABLE orders (id INTEGER NOT NULL, account_id INTEGER NOT NULL); + INSERT INTO accounts (id) VALUES (1), (2); + INSERT INTO orders (id, account_id) VALUES (10, 1), (11, 1), (12, 2); + `); + const scanConnector = { + ...connector(), + driver: 'sqlite' as const, + capabilities: createKloConnectorCapabilities({ readOnlySql: true, columnStats: true }), + introspect: vi.fn(async () => noDeclaredRelationshipSnapshot()), + executeReadOnly: executor.executeReadOnly.bind(executor), + }; + + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector: scanConnector, + context: { runId: 'scan-run-relationship-discovery' }, + providers: null, + }); + + expect(result.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.summary.statisticalValidation).toBe('completed'); + expect(result.relationshipProfile).toMatchObject({ sqlAvailable: true }); + expect(result.resolvedRelationships).toEqual([ + expect.objectContaining({ + status: 'accepted', + from: expect.objectContaining({ table: expect.objectContaining({ name: 'orders' }), columns: ['account_id'] }), + to: expect.objectContaining({ table: expect.objectContaining({ name: 'accounts' }), columns: ['id'] }), + }), + ]); + expect(result.relationshipUpdate?.accepted).toHaveLength(1); + } finally { + executor.close(); + } + }); + + it('honors scan relationship config when LLM proposals are disabled', async () => { + const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 }); + const getModel = vi.fn(() => ({ modelId: 'provider/language-model', provider: 'gateway' })); + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector: connector(), + context: { runId: 'scan-run-llm-disabled' }, + providers: { + ...providers, + llm: { + ...providers.llm, + getModel: getModel as never, + }, + }, + relationshipSettings: { + ...buildDefaultKloProjectConfig('warehouse').scan.relationships, + llmProposals: false, + maxLlmTablesPerBatch: 40, + }, + }); + + expect(result.summary.llmRelationshipValidation).toBe('skipped'); + expect(getModel).not.toHaveBeenCalledWith('candidateExtraction'); + }); + + it('skips relationship detection when scan relationships are disabled', async () => { + const settings = { + ...buildDefaultKloProjectConfig('warehouse').scan.relationships, + enabled: false, + }; + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + connector: connector(), + context: { runId: 'disabled-relationships' }, + providers: createDeterministicLocalScanEnrichmentProviders(), + relationshipSettings: settings, + }); + + expect(result.summary.deterministicRelationships).toBe('skipped'); + expect(result.summary.statisticalValidation).toBe('skipped'); + expect(result.summary.llmRelationshipValidation).toBe('skipped'); + expect(result.relationships).toEqual({ accepted: 0, review: 0, rejected: 0, skipped: 0 }); + expect(result.relationshipUpdate).toBeNull(); + expect(result.relationshipProfile).toBeNull(); + expect(result.resolvedRelationships).toBeNull(); + }); + + it('runs configured deterministic enrichment with descriptions and embeddings', async () => { + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: true, + connector: connector(), + context: { runId: 'scan-run-2' }, + providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }), + }); + + expect(result.summary).toMatchObject({ + dataDictionary: 'completed', + tableDescriptions: 'completed', + columnDescriptions: 'completed', + embeddings: 'completed', + deterministicRelationships: 'completed', + }); + expect(result.embeddingUpdates).toHaveLength(3); + expect(result.embeddingUpdates[0]?.embedding).toHaveLength(6); + expect(result.snapshot).toEqual(snapshot); + expect(result.relationships).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 }); + }); + + it('reports enrichment progress for countable stages', async () => { + const events: Array<{ progress: number; message?: string; transient?: boolean }> = []; + const progress = { + async update(progressValue: number, message?: string, options?: { transient?: boolean }) { + events.push({ progress: progressValue, message, transient: options?.transient }); + }, + startPhase() { + return progress; + }, + }; + + await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: true, + connector: connector(), + context: { runId: 'scan-run-progress', progress }, + providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }), + }); + + expect(events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ message: 'Generating descriptions 1/2 tables', transient: true }), + expect.objectContaining({ message: 'Generating descriptions 2/2 tables', transient: true }), + expect.objectContaining({ message: 'Building embeddings 1/1 batches', transient: true }), + expect.objectContaining({ message: 'Detecting relationships' }), + ]), + ); + }); + + it('reports progress before enrichment connector introspection starts', async () => { + const events: Array<{ progress: number; message?: string; transient?: boolean }> = []; + const progress = { + async update(progressValue: number, message?: string, options?: { transient?: boolean }) { + events.push({ progress: progressValue, message, transient: options?.transient }); + }, + startPhase() { + return progress; + }, + }; + const scanConnector = { + ...connector(), + introspect: vi.fn(async () => { + expect(events).toContainEqual(expect.objectContaining({ message: 'Loading enrichment schema snapshot' })); + return snapshot; + }), + }; + + await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector: scanConnector, + context: { runId: 'scan-run-progress-before-introspection', progress }, + providers: null, + }); + + expect(scanConnector.introspect).toHaveBeenCalled(); + }); + + it('splits enrichment embedding requests by provider batch size', async () => { + const manyColumnSnapshot: KloSchemaSnapshot = { + ...snapshot, + tables: [ + { + catalog: null, + db: 'public', + name: 'wide_orders', + kind: 'table', + comment: 'Wide order facts', + estimatedRows: 3, + foreignKeys: [], + columns: Array.from({ length: 5 }, (_, index) => ({ + name: `metric_${index + 1}`, + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: `Metric ${index + 1}`, + })), + }, + ], + }; + const scanConnector = { + ...connector(), + introspect: vi.fn(async () => manyColumnSnapshot), + }; + const deterministicProviders = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 }); + const embedBatch = vi.fn(async (texts: string[]) => { + if (texts.length > 2) { + throw new Error(`Embedding batch size ${texts.length} exceeds maximum 2`); + } + return texts.map((_, index) => [index, index + 1, index + 2]); + }); + + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: false, + connector: scanConnector, + context: { runId: 'scan-run-batched-embeddings' }, + providers: { + llm: deterministicProviders.llm, + embedding: { + dimensions: 3, + maxBatchSize: 2, + embedBatch, + }, + }, + }); + + expect(result.embeddingUpdates).toHaveLength(5); + expect(embedBatch.mock.calls.map(([texts]) => texts).map((texts) => texts.length)).toEqual([2, 2, 1]); + }); + + it('reuses completed description and embedding stages for the same run id and snapshot hash', async () => { + const stateStore = memoryEnrichmentStateStore(); + const scanConnector = connector(); + const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }); + + const first = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: true, + connector: scanConnector, + context: { runId: 'scan-run-resume-1' }, + providers, + stateStore, + syncId: 'sync-resume-1', + providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 }, + }); + + const getModel = vi.spyOn(providers.llm, 'getModel'); + const embedBatch = vi.spyOn(providers.embedding, 'embedBatch'); + const second = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: true, + connector: scanConnector, + context: { runId: 'scan-run-resume-1' }, + providers, + stateStore, + syncId: 'sync-resume-1', + providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 }, + }); + + expect(first.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']); + expect(first.state.resumedStages).toEqual([]); + expect(second.state.resumedStages).toEqual(['descriptions', 'embeddings', 'relationships']); + expect(second.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']); + expect(getModel).not.toHaveBeenCalled(); + expect(embedBatch).not.toHaveBeenCalled(); + expect(second.descriptionUpdates).toEqual(first.descriptionUpdates); + expect(second.embeddingUpdates).toEqual(first.embeddingUpdates); + expect(second.relationships).toEqual(first.relationships); + }); + + it('does not reuse completed stages when the snapshot changes', async () => { + const stateStore = memoryEnrichmentStateStore(); + const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }); + const scanConnector = connector(); + + await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: false, + connector: scanConnector, + context: { runId: 'scan-run-resume-hash' }, + providers, + stateStore, + syncId: 'sync-resume-hash', + providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 }, + }); + + const firstTable = snapshot.tables[0]; + if (!firstTable) { + throw new Error('Expected test snapshot table'); + } + const changedConnector = { + ...connector(), + introspect: vi.fn(async () => ({ + ...snapshot, + tables: [{ ...firstTable, name: 'customers' }], + })), + }; + const getModel = vi.spyOn(providers.llm, 'getModel'); + + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: false, + connector: changedConnector, + context: { runId: 'scan-run-resume-hash' }, + providers, + stateStore, + syncId: 'sync-resume-hash', + providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 }, + }); + + expect(result.state.resumedStages).toEqual([]); + expect(result.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']); + expect(getModel).toHaveBeenCalled(); + }); + + it('runs providerless enriched scans as relationship-only discovery enrichment', async () => { + const executor = new InMemorySqliteExecutor(); + try { + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL); + CREATE TABLE orders (id INTEGER NOT NULL, account_id INTEGER NOT NULL); + INSERT INTO accounts (id) VALUES (1), (2); + INSERT INTO orders (id, account_id) VALUES (10, 1), (11, 1), (12, 2); + `); + const scanConnector = { + ...connector(), + driver: 'sqlite' as const, + capabilities: createKloConnectorCapabilities({ readOnlySql: true, columnStats: true }), + introspect: vi.fn(async () => noDeclaredRelationshipSnapshot()), + executeReadOnly: executor.executeReadOnly.bind(executor), + }; + + const result = await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + detectRelationships: false, + connector: scanConnector, + context: { runId: 'scan-run-providerless-enriched' }, + providers: null, + }); + + expect(result.summary).toEqual({ + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'completed', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'completed', + }); + expect(result.descriptionUpdates).toEqual([]); + expect(result.embeddingUpdates).toEqual([]); + expect(result.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.relationshipUpdate?.accepted).toHaveLength(1); + expect(result.relationshipProfile).toMatchObject({ sqlAvailable: true }); + expect(result.resolvedRelationships).toEqual([ + expect.objectContaining({ + status: 'accepted', + from: expect.objectContaining({ table: expect.objectContaining({ name: 'orders' }), columns: ['account_id'] }), + to: expect.objectContaining({ table: expect.objectContaining({ name: 'accounts' }), columns: ['id'] }), + }), + ]); + expect(result.warnings).toContainEqual({ + code: 'scan_enrichment_backend_not_configured', + message: + 'Skipping description and embedding enrichment because scan.enrichment.mode is not configured; relationship discovery still ran.', + recoverable: true, + metadata: { + skippedStages: ['descriptions', 'embeddings'], + relationshipDetection: true, + }, + }); + } finally { + executor.close(); + } + }); + + it('resolves gateway LLM providers and OpenAI embeddings from local scan config', () => { + const createKloLlmProvider = vi.fn(() => ({ + getModel: vi.fn().mockReturnValue({ modelId: 'provider/language-model', provider: 'gateway' }), + })); + const createKloEmbeddingProvider = vi.fn(() => ({ + dimensions: 1536, + maxBatchSize: 8, + embed: vi.fn(), + [['embed', 'Many'].join('')]: vi.fn(), + })); + + const providers = createLocalScanEnrichmentProvidersFromConfig( + { + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'provider/embedding-model', + dimensions: 1536, + batchSize: 8, + openai: { api_key: 'env:OPENAI_API_KEY' }, + }, + }, + { + provider: { + backend: 'gateway', + gateway: {}, + }, + models: { default: 'provider/language-model' }, + }, + { + createKloLlmProvider: createKloLlmProvider as any, + createKloEmbeddingProvider: createKloEmbeddingProvider as any, + env: { OPENAI_API_KEY: 'openai-key' }, + }, + ); + + expect(providers?.embedding.dimensions).toBe(1536); + expect(providers?.embedding.maxBatchSize).toBe(8); + expect(createKloLlmProvider).toHaveBeenCalledWith( + expect.objectContaining({ backend: 'gateway', modelSlots: { default: 'provider/language-model' } }), + ); + expect(createKloEmbeddingProvider).toHaveBeenCalledWith( + expect.objectContaining({ backend: 'openai', model: 'provider/embedding-model' }), + ); + }); +}); diff --git a/packages/context/src/scan/local-enrichment.ts b/packages/context/src/scan/local-enrichment.ts new file mode 100644 index 00000000..51b425c8 --- /dev/null +++ b/packages/context/src/scan/local-enrichment.ts @@ -0,0 +1,659 @@ +import type { KloLlmProvider } from '@klo/llm'; +import { buildDefaultKloProjectConfig, type KloScanRelationshipConfig } from '../project/config.js'; +import { type KloDescriptionColumnTable, KloDescriptionGenerator } from './description-generation.js'; +import { buildKloColumnEmbeddingText } from './embedding-text.js'; +import { + completedKloScanEnrichmentStateSummary, + computeKloScanEnrichmentInputHash, + type KloScanEnrichmentStateStore, + summarizeKloScanEnrichmentState, +} from './enrichment-state.js'; +import { skippedKloScanEnrichmentSummary } from './enrichment-summary.js'; +import type { + KloEmbeddingUpdate, + KloEnrichedColumn, + KloEnrichedRelationship, + KloEnrichedSchema, + KloEnrichedTable, + KloRelationshipEndpoint, + KloRelationshipUpdate, +} from './enrichment-types.js'; +import type { KloCompositeRelationshipCandidate } from './relationship-composite-candidates.js'; +import type { KloResolvedRelationshipDiscoveryCandidate } from './relationship-graph-resolver.js'; +import { discoverKloRelationships } from './relationship-discovery.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import type { + KloEmbeddingPort, + KloProgressPort, + KloScanConnector, + KloScanContext, + KloScanEnrichmentStage, + KloScanEnrichmentStateSummary, + KloScanEnrichmentSummary, + KloScanMode, + KloScanRelationshipSummary, + KloScanWarning, + KloSchemaColumn, + KloSchemaForeignKey, + KloSchemaSnapshot, + KloSchemaTable, + KloTableRef, +} from './types.js'; + +export interface DeterministicLocalScanEnrichmentProviderOptions { + embeddingDimensions?: number; + maxBatchSize?: number; +} + +export interface KloLocalScanEnrichmentProviders { + llm: KloLlmProvider; + embedding: KloEmbeddingPort; +} + +export interface KloLocalScanEnrichmentInput { + connectionId: string; + mode: KloScanMode; + detectRelationships?: boolean; + connector: KloScanConnector; + context: KloScanContext; + providers: KloLocalScanEnrichmentProviders | null; + stateStore?: KloScanEnrichmentStateStore | null; + syncId?: string; + providerIdentity?: Record; + relationshipSettings?: KloScanRelationshipConfig; + now?: () => Date; +} + +export interface KloLocalScanEnrichmentResult { + snapshot: KloSchemaSnapshot; + summary: KloScanEnrichmentSummary; + relationships: KloScanRelationshipSummary; + state: KloScanEnrichmentStateSummary; + warnings: KloScanWarning[]; + descriptionUpdates: Array<{ + table: KloTableRef; + tableDescription: string | null; + columnDescriptions: Record; + }>; + embeddingUpdates: KloEmbeddingUpdate[]; + relationshipUpdate: KloRelationshipUpdate | null; + relationshipProfile: KloRelationshipProfileArtifact | null; + resolvedRelationships: KloResolvedRelationshipDiscoveryCandidate[] | null; + compositeRelationships: KloCompositeRelationshipCandidate[] | null; +} + +function tableId(table: KloSchemaTable): string { + return [table.catalog, table.db, table.name].filter((value): value is string => Boolean(value)).join('.'); +} + +function columnId(table: KloSchemaTable, column: KloSchemaColumn): string { + return `${tableId(table)}.${column.name}`; +} + +function tableRef(table: KloSchemaTable): KloTableRef { + return { + catalog: table.catalog, + db: table.db, + name: table.name, + }; +} + +function endpoint(table: KloEnrichedTable, column: KloEnrichedColumn): KloRelationshipEndpoint { + return { + tableId: table.id, + columnIds: [column.id], + table: table.ref, + columns: [column.name], + }; +} + +function relationshipId(from: KloRelationshipEndpoint, to: KloRelationshipEndpoint): string { + return `${from.tableId}:(${from.columnIds.join(',')})->${to.tableId}:(${to.columnIds.join(',')})`; +} + +function targetMatchesForeignKey(table: KloEnrichedTable, foreignKey: KloSchemaForeignKey): boolean { + return ( + table.ref.name === foreignKey.toTable && + (foreignKey.toCatalog === null || table.ref.catalog === foreignKey.toCatalog) && + (foreignKey.toDb === null || table.ref.db === foreignKey.toDb) + ); +} + +function formalRelationshipsFromSnapshot( + snapshot: KloSchemaSnapshot, + tables: readonly KloEnrichedTable[], +): KloEnrichedRelationship[] { + const tableById = new Map(tables.map((table) => [table.id, table])); + const relationships: KloEnrichedRelationship[] = []; + + for (const sourceTableSnapshot of snapshot.tables) { + const sourceTable = tableById.get(tableId(sourceTableSnapshot)); + if (!sourceTable) { + continue; + } + + for (const foreignKey of sourceTableSnapshot.foreignKeys) { + const sourceColumn = sourceTable.columns.find((column) => column.name === foreignKey.fromColumn); + const targetTable = tables.find((table) => targetMatchesForeignKey(table, foreignKey)); + const targetColumn = targetTable?.columns.find((column) => column.name === foreignKey.toColumn); + if (!sourceColumn || !targetTable || !targetColumn) { + continue; + } + + const from = endpoint(sourceTable, sourceColumn); + const to = endpoint(targetTable, targetColumn); + relationships.push({ + id: relationshipId(from, to), + source: 'formal', + from, + to, + relationshipType: 'many_to_one', + confidence: 1, + isPrimaryKeyReference: true, + }); + } + } + + return relationships.sort((left, right) => left.id.localeCompare(right.id)); +} + +function providerlessEnrichedWarning(relationshipDetection: boolean): KloScanWarning { + return { + code: 'scan_enrichment_backend_not_configured', + message: + 'Skipping description and embedding enrichment because scan.enrichment.mode is not configured; relationship discovery still ran.', + recoverable: true, + metadata: { + skippedStages: ['descriptions', 'embeddings'], + relationshipDetection, + }, + }; +} + +function hashEmbedding(text: string, dimensions: number): number[] { + const values = Array.from({ length: dimensions }, (_, index) => { + let hash = index + 17; + for (const char of text) { + hash = (hash * 31 + char.charCodeAt(0) + index) % 1009; + } + return Number(((hash % 200) / 100 - 1).toFixed(4)); + }); + return values; +} + +export function createDeterministicLocalScanEnrichmentProviders( + options: DeterministicLocalScanEnrichmentProviderOptions = {}, +): KloLocalScanEnrichmentProviders { + const dimensions = options.embeddingDimensions ?? 8; + const maxBatchSize = options.maxBatchSize ?? 64; + return { + llm: deterministicLlmProvider(), + embedding: { + dimensions, + maxBatchSize, + async embedBatch(texts) { + return texts.map((text) => hashEmbedding(text, dimensions)); + }, + }, + }; +} + +function deterministicLlmProvider(): KloLlmProvider { + const model = { modelId: 'deterministic-scan', provider: 'deterministic' }; + return { + getModel() { + return model as ReturnType; + }, + getModelByName() { + return model as ReturnType; + }, + cacheMarker() { + return undefined; + }, + repairToolCallHandler() { + throw new Error('deterministic scan provider does not support tool-call repair'); + }, + thinkingProviderOptions() { + return {}; + }, + telemetryConfig() { + return undefined; + }, + promptCachingConfig() { + return { + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + }; + }, + activeBackend() { + return 'gateway'; + }, + }; +} + +export function snapshotToKloEnrichedSchema( + snapshot: KloSchemaSnapshot, + embeddingsByColumnId: ReadonlyMap = new Map(), +): KloEnrichedSchema { + const tables: KloEnrichedTable[] = snapshot.tables.map((table) => { + const id = tableId(table); + const ref = tableRef(table); + const columns: KloEnrichedColumn[] = table.columns.map((column) => { + const idForColumn = columnId(table, column); + return { + id: idForColumn, + tableId: id, + tableRef: ref, + name: column.name, + nativeType: column.nativeType, + normalizedType: column.normalizedType, + dimensionType: column.dimensionType, + nullable: column.nullable, + primaryKey: column.primaryKey, + parentColumnId: null, + descriptions: { + ...(column.comment ? { db: column.comment } : {}), + }, + embedding: embeddingsByColumnId.get(idForColumn) ?? null, + sampleValues: null, + cardinality: null, + }; + }); + return { + id, + ref, + enabled: true, + descriptions: { + ...(table.comment ? { db: table.comment } : {}), + }, + columns, + }; + }); + + return { + connectionId: snapshot.connectionId, + tables, + relationships: formalRelationshipsFromSnapshot(snapshot, tables), + }; +} + +function descriptionTable(table: KloSchemaTable): KloDescriptionColumnTable { + return { + catalog: table.catalog, + db: table.db, + name: table.name, + columns: table.columns.map((column) => ({ + name: column.name, + ...(column.comment ? { sampleValues: [column.comment], rawDescriptions: { db: column.comment } } : {}), + })), + }; +} + +function embeddingBatchSize(maxBatchSize: number): number { + return Number.isInteger(maxBatchSize) && maxBatchSize > 0 ? maxBatchSize : 100; +} + +async function generateDescriptions(input: { + snapshot: KloSchemaSnapshot; + connector: KloScanConnector; + context: KloScanContext; + providers: KloLocalScanEnrichmentProviders; + progress?: KloProgressPort; +}): Promise { + const generator = new KloDescriptionGenerator({ + llmProvider: input.providers.llm, + settings: { + columnMaxWords: 16, + tableMaxWords: 24, + dataSourceMaxWords: 32, + concurrencyLimit: 4, + }, + }); + + const updates: KloLocalScanEnrichmentResult['descriptionUpdates'] = []; + const totalTables = input.snapshot.tables.length; + if (totalTables === 0) { + await input.progress?.update(1, 'No tables to describe'); + return updates; + } + for (const [index, table] of input.snapshot.tables.entries()) { + await input.progress?.update( + (index + 1) / totalTables, + `Generating descriptions ${index + 1}/${totalTables} tables`, + { + transient: true, + }, + ); + const tableInput = descriptionTable(table); + const columnResult = await generator.generateColumnDescriptions({ + connectionId: input.snapshot.connectionId, + connector: input.connector, + context: input.context, + dataSourceType: input.snapshot.driver, + supportsNestedAnalysis: input.connector.capabilities.nestedAnalysis, + table: tableInput, + }); + const tableDescription = await generator.generateTableDescription({ + connectionId: input.snapshot.connectionId, + connector: input.connector, + context: input.context, + dataSourceType: input.snapshot.driver, + table: { + catalog: table.catalog, + db: table.db, + name: table.name, + rawDescriptions: table.comment ? { db: table.comment } : {}, + }, + }); + updates.push({ + table: tableRef(table), + tableDescription, + columnDescriptions: Object.fromEntries(columnResult.columnDescriptions), + }); + } + await input.progress?.update(1, `Generated descriptions for ${totalTables} tables`); + return updates; +} + +async function buildEmbeddings(input: { + snapshot: KloSchemaSnapshot; + providers: KloLocalScanEnrichmentProviders; + descriptions: KloLocalScanEnrichmentResult['descriptionUpdates']; + progress?: KloProgressPort; +}): Promise<{ updates: KloEmbeddingUpdate[]; byColumnId: Map }> { + const descriptionByTable = new Map(input.descriptions.map((item) => [item.table.name, item])); + const texts: Array<{ columnId: string; text: string }> = []; + + for (const table of input.snapshot.tables) { + const tableDescriptions = descriptionByTable.get(table.name); + for (const column of table.columns) { + const id = columnId(table, column); + const text = buildKloColumnEmbeddingText({ + tableName: table.name, + columnName: column.name, + columnType: column.nativeType, + resolvedDescription: tableDescriptions?.columnDescriptions[column.name] ?? column.comment, + resolvedTableDescription: tableDescriptions?.tableDescription ?? table.comment, + sampleValues: column.comment ? [column.comment] : null, + foreignKeys: { + outgoing: (table.foreignKeys ?? []) + .filter((foreignKey) => foreignKey.fromColumn === column.name) + .map((foreignKey) => ({ toTable: foreignKey.toTable, toColumn: foreignKey.toColumn })), + incoming: [], + }, + }); + texts.push({ columnId: id, text }); + } + } + + const embeddings: number[][] = []; + const maxBatchSize = embeddingBatchSize(input.providers.embedding.maxBatchSize); + const embeddingTexts = texts.map((item) => item.text); + const batchCount = Math.ceil(embeddingTexts.length / maxBatchSize); + if (batchCount === 0) { + await input.progress?.update(1, 'No embeddings to build'); + } + for (let offset = 0; offset < embeddingTexts.length; offset += maxBatchSize) { + const batchIndex = Math.floor(offset / maxBatchSize) + 1; + await input.progress?.update(batchIndex / batchCount, `Building embeddings ${batchIndex}/${batchCount} batches`, { + transient: true, + }); + const batch = embeddingTexts.slice(offset, offset + maxBatchSize); + const batchEmbeddings = await input.providers.embedding.embedBatch(batch); + if (batchEmbeddings.length !== batch.length) { + throw new Error(`expected ${batch.length} embeddings, received ${batchEmbeddings.length}`); + } + embeddings.push(...batchEmbeddings); + } + + const byColumnId = new Map(); + const updates = texts.map((item, index) => { + const embedding = embeddings[index] ?? []; + byColumnId.set(item.columnId, embedding); + return { + columnId: item.columnId, + text: item.text, + embedding, + }; + }); + if (batchCount > 0) { + await input.progress?.update(1, `Built embeddings for ${updates.length} columns`); + } + return { updates, byColumnId }; +} + +async function runEnrichmentStage(input: { + stateStore: KloScanEnrichmentStateStore | null | undefined; + runId: string; + connectionId: string; + syncId: string; + mode: KloScanMode; + stage: KloScanEnrichmentStage; + inputHash: string; + now: () => Date; + resumedStages: KloScanEnrichmentStage[]; + completedStages: KloScanEnrichmentStage[]; + failedStages: KloScanEnrichmentStage[]; + compute: () => Promise; +}): Promise { + const existing = await input.stateStore?.findCompletedStage({ + runId: input.runId, + stage: input.stage, + inputHash: input.inputHash, + }); + if (existing) { + input.resumedStages.push(input.stage); + input.completedStages.push(input.stage); + return existing.output; + } + + try { + const output = await input.compute(); + input.completedStages.push(input.stage); + await input.stateStore?.saveCompletedStage({ + runId: input.runId, + connectionId: input.connectionId, + syncId: input.syncId, + mode: input.mode, + stage: input.stage, + inputHash: input.inputHash, + output, + updatedAt: input.now().toISOString(), + }); + return output; + } catch (error) { + input.failedStages.push(input.stage); + await input.stateStore?.saveFailedStage({ + runId: input.runId, + connectionId: input.connectionId, + syncId: input.syncId, + mode: input.mode, + stage: input.stage, + inputHash: input.inputHash, + errorMessage: error instanceof Error ? error.message : String(error), + updatedAt: input.now().toISOString(), + }); + throw error; + } +} + +function embeddingsByColumnId(updates: KloEmbeddingUpdate[]): Map { + return new Map(updates.map((update) => [update.columnId, update.embedding])); +} + +export async function runLocalScanEnrichment( + input: KloLocalScanEnrichmentInput, +): Promise { + const progress = input.context.progress; + await progress?.update(0, 'Loading enrichment schema snapshot'); + const snapshot = await input.connector.introspect( + { + connectionId: input.connectionId, + driver: input.connector.driver, + mode: input.mode, + detectRelationships: input.detectRelationships, + }, + input.context, + ); + await progress?.update(0.05, `Loaded schema snapshot with ${snapshot.tables.length} tables`); + + const now = input.now ?? (() => new Date()); + const state = completedKloScanEnrichmentStateSummary(); + const syncId = input.syncId ?? input.context.runId; + const relationshipSettings = + input.relationshipSettings ?? buildDefaultKloProjectConfig(input.connectionId).scan.relationships; + const inputHash = computeKloScanEnrichmentInputHash({ + snapshot, + mode: input.mode, + detectRelationships: input.detectRelationships ?? false, + providerIdentity: input.providerIdentity ?? {}, + relationshipSettings, + }); + const warnings: KloScanWarning[] = []; + let descriptions: KloLocalScanEnrichmentResult['descriptionUpdates'] = []; + let embeddingUpdates: KloEmbeddingUpdate[] = []; + let schema = snapshotToKloEnrichedSchema(snapshot); + const summary: KloScanEnrichmentSummary = { ...skippedKloScanEnrichmentSummary }; + const relationshipDetectionEnabled = relationshipSettings.enabled; + const shouldDetectRelationships = + relationshipDetectionEnabled && + (input.mode === 'relationships' || input.mode === 'enriched' || (input.detectRelationships ?? false)); + + if (input.mode === 'enriched' && !input.providers) { + warnings.push(providerlessEnrichedWarning(shouldDetectRelationships)); + } + + if (input.mode === 'enriched' && input.providers) { + const providers = input.providers; + const descriptionProgress = progress?.startPhase(0.45); + descriptions = await runEnrichmentStage({ + stateStore: input.stateStore, + runId: input.context.runId, + connectionId: input.connectionId, + syncId, + mode: input.mode, + stage: 'descriptions', + inputHash, + now, + resumedStages: state.resumedStages, + completedStages: state.completedStages, + failedStages: state.failedStages, + compute: () => + generateDescriptions({ + snapshot, + connector: input.connector, + context: input.context, + providers, + progress: descriptionProgress, + }), + }); + const embeddingProgress = progress?.startPhase(0.2); + embeddingUpdates = await runEnrichmentStage({ + stateStore: input.stateStore, + runId: input.context.runId, + connectionId: input.connectionId, + syncId, + mode: input.mode, + stage: 'embeddings', + inputHash, + now, + resumedStages: state.resumedStages, + completedStages: state.completedStages, + failedStages: state.failedStages, + compute: async () => { + const embeddings = await buildEmbeddings({ + snapshot, + providers, + descriptions, + progress: embeddingProgress, + }); + return embeddings.updates; + }, + }); + schema = snapshotToKloEnrichedSchema(snapshot, embeddingsByColumnId(embeddingUpdates)); + summary.dataDictionary = input.connector.sampleColumn ? 'completed' : 'skipped'; + summary.tableDescriptions = 'completed'; + summary.columnDescriptions = 'completed'; + summary.embeddings = 'completed'; + } + + let relationshipUpdate: KloRelationshipUpdate | null = null; + let relationshipProfile: KloRelationshipProfileArtifact | null = null; + let resolvedRelationships: KloResolvedRelationshipDiscoveryCandidate[] | null = null; + let compositeRelationships: KloCompositeRelationshipCandidate[] | null = null; + let relationships: KloScanRelationshipSummary = { accepted: 0, review: 0, rejected: 0, skipped: 0 }; + if (shouldDetectRelationships) { + const relationshipProgress = progress?.startPhase(0.25); + const relationshipStage = await runEnrichmentStage({ + stateStore: input.stateStore, + runId: input.context.runId, + connectionId: input.connectionId, + syncId, + mode: input.mode, + stage: 'relationships', + inputHash, + now, + resumedStages: state.resumedStages, + completedStages: state.completedStages, + failedStages: state.failedStages, + compute: async () => { + await relationshipProgress?.update(0, 'Detecting relationships'); + const detection = await discoverKloRelationships({ + connectionId: input.connectionId, + driver: snapshot.driver, + connector: input.connector, + schema, + context: input.context, + settings: relationshipSettings, + llmProvider: input.providers?.llm ?? null, + }); + + await relationshipProgress?.update( + 1, + `Relationship detection found ${detection.relationships.accepted} accepted, ${detection.relationships.review} review`, + ); + return { + relationshipUpdate: detection.relationshipUpdate, + relationshipProfile: detection.profile, + resolvedRelationships: detection.resolvedRelationships, + compositeRelationships: detection.compositeRelationships, + relationships: detection.relationships, + statisticalValidation: detection.statisticalValidation, + llmRelationshipValidation: detection.llmRelationshipValidation, + warnings: detection.warnings, + }; + }, + }); + + summary.deterministicRelationships = 'completed'; + summary.llmRelationshipValidation = relationshipStage.llmRelationshipValidation; + summary.statisticalValidation = relationshipStage.statisticalValidation; + relationshipUpdate = relationshipStage.relationshipUpdate; + relationshipProfile = relationshipStage.relationshipProfile; + resolvedRelationships = relationshipStage.resolvedRelationships; + compositeRelationships = relationshipStage.compositeRelationships; + relationships = relationshipStage.relationships; + warnings.push(...relationshipStage.warnings); + } + + await progress?.update(1, 'Enrichment complete'); + return { + snapshot, + summary, + relationships, + state: summarizeKloScanEnrichmentState(state), + warnings, + descriptionUpdates: descriptions, + embeddingUpdates, + relationshipUpdate, + relationshipProfile, + resolvedRelationships, + compositeRelationships, + }; +} diff --git a/packages/context/src/scan/local-scan.test.ts b/packages/context/src/scan/local-scan.test.ts new file mode 100644 index 00000000..54688794 --- /dev/null +++ b/packages/context/src/scan/local-scan.test.ts @@ -0,0 +1,1494 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { KloLlmProvider } from '@klo/llm'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import YAML from 'yaml'; +import type { SourceAdapter } from '../ingest/index.js'; +import { initKloProject, type KloLocalProject, loadKloProject } from '../project/index.js'; +import { getLocalScanReport, getLocalScanStatus, runLocalScan } from './local-scan.js'; +import type { KloQueryResult, KloReadOnlyQueryInput } from './types.js'; + +function relationshipSqlResult( + input: KloReadOnlyQueryInput, + options: { throwOnCoverage?: boolean } = {}, +): KloQueryResult { + if (input.sql.includes('child_values')) { + if (options.throwOnCoverage) { + throw new Error('validation failed for postgres://reader:secret@example.test/db'); // pragma: allowlist secret + } + return { + headers: ['child_distinct', 'parent_distinct', 'overlap', 'violation_count'], + rows: [[100, 100, 99, 0]], + totalRows: 1, + rowCount: 1, + }; + } + if (input.sql.includes(' AS column_name')) { + const tableRowCount = input.sql.includes('orders') || input.sql.includes('users') ? 1000 : 100; + const columnNames = Array.from(input.sql.matchAll(/'([^']+)' AS column_name/g), (match) => match[1]); + return { + headers: [ + 'column_name', + 'table_row_count', + 'row_count', + 'null_count', + 'distinct_count', + 'min_text_length', + 'max_text_length', + 'sample_values', + ], + rows: columnNames.map((columnName) => [ + columnName, + tableRowCount, + tableRowCount, + 0, + columnName === 'id' ? tableRowCount : 100, + 1, + 3, + '1\u001f2', + ]), + totalRows: columnNames.length, + rowCount: columnNames.length, + }; + } + if (input.sql.includes('COUNT(*) AS row_count') && !input.sql.includes('COUNT(DISTINCT')) { + return { + headers: ['row_count'], + rows: [[input.sql.includes('orders') ? 1000 : 100]], + totalRows: 1, + rowCount: 1, + }; + } + if (input.sql.includes('COUNT(DISTINCT')) { + return { + headers: ['row_count', 'null_count', 'distinct_count', 'min_text_length', 'max_text_length'], + rows: [[input.sql.includes('orders') ? 1000 : 100, 0, input.sql.includes('orders') ? 100 : 100, 1, 3]], + totalRows: 1, + rowCount: 1, + }; + } + if (input.sql.includes(' AS value')) { + return { + headers: ['value'], + rows: [[1], [2]], + totalRows: 2, + rowCount: 2, + }; + } + throw new Error(`Unexpected relationship SQL: ${input.sql}`); +} + +function deterministicLlmProvider(): KloLlmProvider { + return { + getModel: () => ({ provider: 'deterministic', modelId: 'deterministic' }) as never, + getModelByName: () => ({ provider: 'deterministic', modelId: 'deterministic' }) as never, + cacheMarker: () => undefined, + repairToolCallHandler: (() => undefined) as never, + thinkingProviderOptions: () => ({}), + telemetryConfig: () => undefined, + promptCachingConfig: () => ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + }), + activeBackend: () => 'gateway', + }; +} + +async function writeLiveDatabaseConfig(projectDir: string): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); +} + +function fetchOnlyAdapter(options: { extractedAt?: () => string } = {}): SourceAdapter { + return { + source: 'live-database', + skillNames: ['live_database_ingest'], + async fetch(_pullConfig, stagedDir) { + await mkdir(join(stagedDir, 'tables'), { recursive: true }); + await writeFile( + join(stagedDir, 'connection.json'), + `${JSON.stringify({ + connectionId: 'warehouse', + driver: 'postgres', + ...(options.extractedAt ? { extractedAt: options.extractedAt() } : {}), + scope: { schemas: ['public'] }, + metadata: {}, + })}\n`, + 'utf-8', + ); + await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + await writeFile( + join(stagedDir, 'tables', 'orders.json'), + '{"name":"orders","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n', + 'utf-8', + ); + }, + async detect() { + return true; + }, + async chunk() { + return { + workUnits: [ + { + unitKey: 'live-database-public-orders', + rawFiles: ['tables/orders.json'], + dependencyPaths: ['connection.json', 'foreign-keys.json'], + peerFileIndex: [], + }, + ], + }; + }, + }; +} + +describe('local scan', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-scan-')); + const projectDir = join(tempDir, 'project'); + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeLiveDatabaseConfig(projectDir); + project = await loadKloProject({ projectDir }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('runs a structural live-database scan and writes a durable scan report', async () => { + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-run-1', + now: () => new Date('2026-04-29T09:00:00.000Z'), + }); + + expect(result.report).toMatchObject({ + connectionId: 'warehouse', + driver: 'postgres', + runId: 'scan-run-1', + syncId: '2026-04-29-090000-scan-run-1', + trigger: 'cli', + mode: 'structural', + dryRun: false, + artifactPaths: { + rawSourcesDir: 'raw-sources/warehouse/live-database/2026-04-29-090000-scan-run-1', + reportPath: 'raw-sources/warehouse/live-database/2026-04-29-090000-scan-run-1/scan-report.json', + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + enrichmentArtifacts: [], + }, + diffSummary: { + tablesAdded: 1, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 0, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 1, + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + }); + + await expect( + readFile( + join(project.projectDir, 'raw-sources/warehouse/live-database/2026-04-29-090000-scan-run-1/scan-report.json'), + 'utf-8', + ), + ).resolves.toContain('"runId": "scan-run-1"'); + + const structuralManifest = await readFile( + join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), + 'utf-8', + ); + expect(structuralManifest).toContain('orders:'); + expect(structuralManifest).toContain('table: public.orders'); + expect(structuralManifest).toContain('name: id'); + expect(structuralManifest).not.toContain('ai:'); + + await expect(getLocalScanStatus(project, 'scan-run-1')).resolves.toMatchObject({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + reportPath: 'raw-sources/warehouse/live-database/2026-04-29-090000-scan-run-1/scan-report.json', + }); + await expect(getLocalScanReport(project, 'scan-run-1')).resolves.toMatchObject({ + runId: 'scan-run-1', + connectionId: 'warehouse', + }); + }); + + it('reuses scan report and raw-source paths when the same local scan run id is retried', async () => { + const first = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-idempotent-run', + now: () => new Date('2026-04-29T09:20:00.000Z'), + }); + + const retry = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-idempotent-run', + now: () => new Date('2026-04-29T10:20:00.000Z'), + }); + + expect(retry.runId).toBe(first.runId); + expect(retry.syncId).toBe(first.syncId); + expect(retry.report.artifactPaths.rawSourcesDir).toBe(first.report.artifactPaths.rawSourcesDir); + expect(retry.report.artifactPaths.reportPath).toBe(first.report.artifactPaths.reportPath); + expect(retry.report.artifactPaths.manifestShards).toEqual(first.report.artifactPaths.manifestShards); + + const status = await getLocalScanStatus(project, 'scan-idempotent-run'); + expect(status?.syncId).toBe(first.syncId); + expect(status?.reportPath).toBe(first.report.artifactPaths.reportPath); + + const files = await project.fileStore.listFiles('raw-sources/warehouse/live-database'); + expect(files.files.every((file) => file.includes('2026-04-29-092000-scan-idempotent-run'))).toBe(true); + expect(files.files.some((file) => file.includes('2026-04-29-102000-scan-idempotent-run'))).toBe(false); + }); + + it('reuses existing scan artifacts without writing when a new scan run has unchanged input', async () => { + let extractedAt = '2026-04-29T09:25:00.000Z'; + const adapter = fetchOnlyAdapter({ extractedAt: () => extractedAt }); + const first = await runLocalScan({ + project, + adapters: [adapter], + connectionId: 'warehouse', + jobId: 'scan-idempotent-new-run-1', + now: () => new Date('2026-04-29T09:25:00.000Z'), + }); + + const writeSpy = vi.spyOn(project.fileStore, 'writeFile'); + const commitSpy = vi.spyOn(project.git, 'commitFiles'); + + extractedAt = '2026-04-29T10:25:00.000Z'; + const second = await runLocalScan({ + project, + adapters: [adapter], + connectionId: 'warehouse', + jobId: 'scan-idempotent-new-run-2', + now: () => new Date('2026-04-29T10:25:00.000Z'), + }); + + expect(second.runId).toBe('scan-idempotent-new-run-2'); + expect(second.syncId).toBe(first.syncId); + expect(second.report.runId).toBe('scan-idempotent-new-run-2'); + expect(second.report.artifactPaths.rawSourcesDir).toBe(first.report.artifactPaths.rawSourcesDir); + expect(second.report.artifactPaths.reportPath).toBe(first.report.artifactPaths.reportPath); + expect(second.report.artifactPaths.manifestShards).toEqual(first.report.artifactPaths.manifestShards); + expect(second.report.diffSummary).toMatchObject({ + tablesAdded: 0, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 1, + }); + expect(writeSpy).not.toHaveBeenCalled(); + expect(commitSpy).not.toHaveBeenCalled(); + + await expect(getLocalScanReport(project, 'scan-idempotent-new-run-2')).resolves.toMatchObject({ + runId: 'scan-idempotent-new-run-2', + syncId: first.syncId, + diffSummary: { + tablesAdded: 0, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 1, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + }); + + const files = await project.fileStore.listFiles('raw-sources/warehouse/live-database'); + expect(files.files.every((file) => file.includes(first.syncId))).toBe(true); + + writeSpy.mockRestore(); + commitSpy.mockRestore(); + }); + + it('returns an in-memory dry-run report without persisting scan status or report files', async () => { + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-dry-run-1', + now: () => new Date('2026-04-29T09:05:00.000Z'), + dryRun: true, + }); + + expect(result.report).toMatchObject({ + runId: 'scan-dry-run-1', + dryRun: true, + artifactPaths: { + rawSourcesDir: null, + reportPath: null, + manifestShards: [], + enrichmentArtifacts: [], + }, + }); + await expect(getLocalScanStatus(project, 'scan-dry-run-1')).resolves.toBeNull(); + await expect(getLocalScanReport(project, 'scan-dry-run-1')).resolves.toBeNull(); + }); + + it('runs relationship scans through a native scan connector and records relationship summary', async () => { + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: false, + columnSampling: false, + columnStats: false, + readOnlySql: false, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: false, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-04-29T09:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'customers', + kind: 'table' as const, + comment: null, + estimatedRows: null, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: true, + comment: null, + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table' as const, + comment: null, + estimatedRows: null, + foreignKeys: [], + columns: [ + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector, + jobId: 'scan-relationships-1', + now: () => new Date('2026-04-29T09:10:00.000Z'), + }); + + expect(result.report.mode).toBe('relationships'); + expect(result.report.enrichment.deterministicRelationships).toBe('completed'); + expect(result.report.relationships).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 }); + }); + + it('records standalone statistical validation results in the scan report', async () => { + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: false, + columnSampling: false, + columnStats: true, + readOnlySql: true, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: true, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-04-29T09:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'customers', + kind: 'table' as const, + comment: null, + estimatedRows: 100, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: true, + comment: null, + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table' as const, + comment: null, + estimatedRows: 1000, + foreignKeys: [], + columns: [ + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; + }, + async executeReadOnly(input: KloReadOnlyQueryInput) { + return relationshipSqlResult(input); + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector, + jobId: 'scan-statistical-relationships-1', + now: () => new Date('2026-04-29T09:12:00.000Z'), + }); + + expect(result.report.enrichment.statisticalValidation).toBe('completed'); + expect(result.report.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.report.warnings).toEqual([]); + }); + + it('threads scan relationship settings into relationship-only local scans', async () => { + project.config.scan.enrichment = { mode: 'deterministic' }; + project.config.scan.relationships = { + ...project.config.scan.relationships, + llmProposals: false, + maxLlmTablesPerBatch: 7, + }; + const getModel = vi.fn(() => ({ modelId: 'provider/language-model', provider: 'gateway' })); + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: false, + columnSampling: false, + columnStats: true, + readOnlySql: true, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: true, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-04-29T09:30:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'customers', + kind: 'table' as const, + comment: null, + estimatedRows: null, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table' as const, + comment: null, + estimatedRows: null, + foreignKeys: [], + columns: [ + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; + }, + async executeReadOnly(input: KloReadOnlyQueryInput) { + return relationshipSqlResult(input); + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector, + enrichmentProviders: { + llm: { + ...deterministicLlmProvider(), + getModel: getModel as never, + }, + embedding: { + dimensions: 8, + maxBatchSize: 64, + async embedBatch() { + return []; + }, + }, + }, + jobId: 'scan-relationships-llm-disabled-1', + now: () => new Date('2026-04-29T09:30:00.000Z'), + }); + + expect(result.report.relationships.accepted).toBe(1); + expect(result.report.enrichment.llmRelationshipValidation).toBe('skipped'); + expect(getModel).not.toHaveBeenCalledWith('candidateExtraction'); + }); + + it('accepts no-declared-constraint relationships and writes relationship artifacts', async () => { + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: false, + columnSampling: false, + columnStats: true, + readOnlySql: true, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: true, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-05-07T09:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: null, + name: 'accounts', + kind: 'table' as const, + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: null, + name: 'orders', + kind: 'table' as const, + comment: null, + estimatedRows: 3, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'account_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; + }, + async executeReadOnly(input: KloReadOnlyQueryInput) { + return relationshipSqlResult(input); + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector, + jobId: 'scan-relationship-discovery', + now: () => new Date('2026-05-07T09:12:00.000Z'), + }); + + expect(result.report.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + const enrichmentRoot = `raw-sources/warehouse/live-database/${result.report.syncId}/enrichment`; + expect(result.report.artifactPaths.enrichmentArtifacts).toEqual([ + `${enrichmentRoot}/relationships.json`, + `${enrichmentRoot}/relationship-profile.json`, + `${enrichmentRoot}/relationship-diagnostics.json`, + ]); + + const diagnostics = JSON.parse( + (await project.fileStore.readFile(`${enrichmentRoot}/relationship-diagnostics.json`)).content, + ) as { + summary: { accepted: number; review: number; rejected: number; skipped: number }; + noAcceptedReason: string | null; + }; + expect(diagnostics.summary).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(diagnostics.noAcceptedReason).toBeNull(); + + const manifestPath = result.report.artifactPaths.manifestShards[0]; + if (!manifestPath) { + throw new Error('Expected manifest shard path'); + } + const manifest = YAML.parse((await project.fileStore.readFile(manifestPath)).content) as { + tables: { orders: { joins: Array<{ to: string; on: string; source: string }> } }; + }; + expect(manifest.tables.orders.joins).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + to: 'accounts', + on: 'orders.account_id = accounts.id', + source: 'inferred', + }), + ]), + ); + }); + + it('runs enriched relationship discovery without scan enrichment backend', async () => { + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: false, + columnSampling: false, + columnStats: true, + readOnlySql: true, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: true, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-05-07T09:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: null, + name: 'accounts', + kind: 'table' as const, + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: null, + name: 'orders', + kind: 'table' as const, + comment: null, + estimatedRows: 3, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'account_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; + }, + async executeReadOnly(input: KloReadOnlyQueryInput) { + return relationshipSqlResult(input); + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'enriched', + connector, + jobId: 'scan-providerless-enriched-relationship-discovery', + now: () => new Date('2026-05-07T09:14:00.000Z'), + }); + + expect(result.report.mode).toBe('enriched'); + expect(result.report.enrichment).toEqual({ + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'completed', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'completed', + }); + expect(result.report.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.report.warnings).toContainEqual({ + code: 'scan_enrichment_backend_not_configured', + message: + 'Skipping description and embedding enrichment because scan.enrichment.mode is not configured; relationship discovery still ran.', + recoverable: true, + metadata: { + skippedStages: ['descriptions', 'embeddings'], + relationshipDetection: true, + }, + }); + + const enrichmentRoot = `raw-sources/warehouse/live-database/${result.report.syncId}/enrichment`; + expect(result.report.artifactPaths.enrichmentArtifacts).toEqual( + expect.arrayContaining([ + `${enrichmentRoot}/relationships.json`, + `${enrichmentRoot}/relationship-profile.json`, + `${enrichmentRoot}/relationship-diagnostics.json`, + ]), + ); + + const manifestPath = result.report.artifactPaths.manifestShards[0]; + if (!manifestPath) { + throw new Error('Expected manifest shard path'); + } + const manifest = YAML.parse((await project.fileStore.readFile(manifestPath)).content) as { + tables: { orders: { joins: Array<{ to: string; on: string; source: string }> } }; + }; + expect(manifest.tables.orders.joins).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + to: 'accounts', + on: 'orders.account_id = accounts.id', + source: 'inferred', + }), + ]), + ); + }); + + it('redacts credential-like warning messages before persisting local scan reports', async () => { + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: false, + columnSampling: false, + columnStats: true, + readOnlySql: true, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: true, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-04-29T09:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'customers', + kind: 'table' as const, + comment: null, + estimatedRows: 100, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: true, + comment: null, + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table' as const, + comment: null, + estimatedRows: 1000, + foreignKeys: [], + columns: [ + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; + }, + async executeReadOnly(input: KloReadOnlyQueryInput) { + return relationshipSqlResult(input, { throwOnCoverage: true }); + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'relationships', + detectRelationships: true, + connector, + jobId: 'scan-redacted-warning-1', + now: () => new Date('2026-04-29T09:14:00.000Z'), + }); + + expect(result.report.warnings[0]?.message).toContain('postgres://reader:@example.test/db'); + expect(result.report.warnings[0]?.message).not.toContain( + 'postgres://reader:secret@example.test/db', // pragma: allowlist secret + ); + const reportPath = result.report.artifactPaths.reportPath; + if (!reportPath) { + throw new Error('Expected local scan report path'); + } + const persistedReport = await readFile(join(project.projectDir, reportPath), 'utf-8'); + expect(persistedReport).toContain('postgres://reader:@example.test/db'); + expect(persistedReport).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret + }); + + it('runs enriched scans when deterministic standalone enrichment is configured', async () => { + await writeFile( + join(project.projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + 'scan:', + ' enrichment:', + ' mode: deterministic', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir: project.projectDir }); + + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: false, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: false, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-04-29T09:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table' as const, + comment: 'Orders', + estimatedRows: 1, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + ], + }, + ], + }; + }, + async sampleTable() { + return { headers: ['id'], rows: [[1]], totalRows: 1 }; + }, + async sampleColumn() { + return { values: ['1'], nullCount: 0, distinctCount: 1 }; + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'enriched', + connector, + jobId: 'scan-enriched-1', + now: () => new Date('2026-04-29T09:15:00.000Z'), + }); + + expect(result.report.mode).toBe('enriched'); + expect(result.report.enrichment.tableDescriptions).toBe('completed'); + expect(result.report.enrichment.columnDescriptions).toBe('completed'); + expect(result.report.enrichment.embeddings).toBe('completed'); + expect(result.report.artifactPaths.enrichmentArtifacts).toEqual([ + 'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/descriptions.json', + 'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/embeddings.json', + 'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/relationships.json', + 'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/relationship-profile.json', + 'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/relationship-diagnostics.json', + ]); + expect(result.report.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + expect(result.report.manifestShardsWritten).toBe(1); + + await expect( + readFile( + join( + project.projectDir, + 'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/descriptions.json', + ), + 'utf-8', + ), + ).resolves.toContain('Deterministic description'); + + const manifestRaw = await readFile( + join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), + 'utf-8', + ); + expect(manifestRaw).toContain('ai: "Deterministic description'); + }); + + it('persists structural artifacts and a recoverable warning when standalone enrichment execution fails', async () => { + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: false, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: false, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-04-29T09:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table' as const, + comment: 'Orders', + estimatedRows: 1, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + ], + }, + ], + }; + }, + async sampleTable() { + return { headers: ['id'], rows: [[1]], totalRows: 1 }; + }, + async sampleColumn() { + return { values: ['1'], nullCount: 0, distinctCount: 1 }; + }, + }; + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'enriched', + connector, + enrichmentProviders: { + llm: deterministicLlmProvider(), + embedding: { + dimensions: 8, + maxBatchSize: 64, + async embedBatch() { + throw new Error('embedding service timed out'); + }, + }, + }, + jobId: 'scan-enrichment-fails-1', + now: () => new Date('2026-04-29T09:18:00.000Z'), + }); + + expect(result.report.mode).toBe('enriched'); + expect(result.report.enrichment).toEqual({ + dataDictionary: 'failed', + tableDescriptions: 'failed', + columnDescriptions: 'failed', + embeddings: 'failed', + deterministicRelationships: 'failed', + llmRelationshipValidation: 'failed', + statisticalValidation: 'failed', + }); + expect(result.report.warnings).toEqual([ + { + code: 'enrichment_failed', + message: 'KLO scan enrichment failed after structural scan completed: embedding service timed out', + recoverable: true, + metadata: { + mode: 'enriched', + detectRelationships: false, + }, + }, + ]); + expect(result.report.artifactPaths.enrichmentArtifacts).toEqual([]); + expect(result.report.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + + const manifestRaw = await readFile( + join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), + 'utf-8', + ); + expect(manifestRaw).toContain('orders:'); + expect(manifestRaw).toContain('table: public.orders'); + expect(manifestRaw).not.toContain('ai: Generated description'); + + const reportPath = result.report.artifactPaths.reportPath; + if (!reportPath) { + throw new Error('Expected local scan report path'); + } + const persistedReport = await readFile(join(project.projectDir, reportPath), 'utf-8'); + expect(persistedReport).toContain('"code": "enrichment_failed"'); + expect(persistedReport).toContain('embedding service timed out'); + }); + + it('resumes completed local enrichment stages when an enriched scan run is retried', async () => { + let embeddingAttempts = 0; + const connector = { + id: 'test:warehouse', + driver: 'postgres' as const, + capabilities: { + structuralIntrospection: true as const, + tableSampling: true, + columnSampling: true, + columnStats: true, + readOnlySql: false, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: false, + }, + async introspect() { + return { + connectionId: 'warehouse', + driver: 'postgres' as const, + extractedAt: '2026-04-29T09:21:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table' as const, + comment: 'Orders', + estimatedRows: 1, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + ], + }, + ], + }; + }, + async sampleTable() { + return { headers: ['id'], rows: [[1]], totalRows: 1 }; + }, + async sampleColumn() { + return { values: ['1'], nullCount: 0, distinctCount: 1 }; + }, + }; + const llm = deterministicLlmProvider(); + + const first = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'enriched', + connector, + enrichmentProviders: { + llm, + embedding: { + dimensions: 8, + maxBatchSize: 64, + async embedBatch() { + embeddingAttempts += 1; + throw new Error('embedding service timed out once'); + }, + }, + }, + jobId: 'scan-enrichment-resume-1', + now: () => new Date('2026-04-29T09:21:00.000Z'), + }); + + expect(first.report.enrichmentState).toEqual({ + resumedStages: [], + completedStages: ['descriptions'], + failedStages: ['embeddings'], + }); + expect(first.report.enrichment.embeddings).toBe('failed'); + + const getModel = vi.spyOn(llm, 'getModel'); + const retry = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + mode: 'enriched', + connector, + enrichmentProviders: { + llm, + embedding: { + dimensions: 8, + maxBatchSize: 64, + async embedBatch(texts) { + embeddingAttempts += 1; + return texts.map(() => [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]); + }, + }, + }, + jobId: 'scan-enrichment-resume-1', + now: () => new Date('2026-04-29T09:22:00.000Z'), + }); + + expect(retry.syncId).toBe(first.syncId); + expect(retry.report.enrichmentState).toEqual({ + resumedStages: ['descriptions'], + completedStages: ['descriptions', 'embeddings', 'relationships'], + failedStages: [], + }); + expect(retry.report.enrichment.embeddings).toBe('completed'); + expect(getModel).toHaveBeenCalledTimes(1); + expect(getModel).toHaveBeenCalledWith('candidateExtraction'); + expect(embeddingAttempts).toBe(2); + + const reportPath = retry.report.artifactPaths.reportPath; + if (!reportPath) { + throw new Error('Expected local scan report path'); + } + const persistedReport = await readFile(join(project.projectDir, reportPath), 'utf-8'); + expect(persistedReport).toContain('"resumedStages": ['); + expect(persistedReport).toContain('"descriptions"'); + }); + + it('accepts sqlite as a native standalone scan driver when the host supplies a live-database adapter', async () => { + await writeFile( + join(project.projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir: project.projectDir }); + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-run-sqlite', + now: () => new Date('2026-04-29T11:00:00.000Z'), + }); + + expect(result.report.driver).toBe('sqlite'); + expect(result.report.artifactPaths.reportPath).toBe( + 'raw-sources/warehouse/live-database/2026-04-29-110000-scan-run-sqlite/scan-report.json', + ); + }); + + it('accepts mysql as a native standalone scan driver when the host supplies a live-database adapter', async () => { + await writeFile( + join(project.projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: mysql', + ' url: env:MYSQL_URL', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir: project.projectDir }); + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-run-mysql', + now: () => new Date('2026-04-29T13:00:00.000Z'), + }); + + expect(result.report.driver).toBe('mysql'); + expect(result.report.artifactPaths.reportPath).toBe( + 'raw-sources/warehouse/live-database/2026-04-29-130000-scan-run-mysql/scan-report.json', + ); + }); + + it('accepts clickhouse as a native standalone scan driver when the host supplies a live-database adapter', async () => { + await writeFile( + join(project.projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: clickhouse', + ' host: env:CLICKHOUSE_HOST', + ' database: analytics', + ' username: reader', + ' password: env:CLICKHOUSE_PASSWORD', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir: project.projectDir }); + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-run-clickhouse', + now: () => new Date('2026-04-29T15:00:00.000Z'), + }); + + expect(result.report.driver).toBe('clickhouse'); + expect(result.report.artifactPaths.reportPath).toBe( + 'raw-sources/warehouse/live-database/2026-04-29-150000-scan-run-clickhouse/scan-report.json', + ); + }); + + it('accepts sqlserver as a native standalone scan driver when the host supplies a live-database adapter', async () => { + await writeFile( + join(project.projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlserver', + ' host: env:SQLSERVER_HOST', + ' database: analytics', + ' username: reader', + ' schema: dbo', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); + project = await loadKloProject({ projectDir: project.projectDir }); + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-run-sqlserver', + now: () => new Date('2026-04-29T16:00:00.000Z'), + }); + + expect(result.report.driver).toBe('sqlserver'); + expect(result.report.artifactPaths.reportPath).toBe( + 'raw-sources/warehouse/live-database/2026-04-29-160000-scan-run-sqlserver/scan-report.json', + ); + }); +}); diff --git a/packages/context/src/scan/local-scan.ts b/packages/context/src/scan/local-scan.ts new file mode 100644 index 00000000..e42a0a9c --- /dev/null +++ b/packages/context/src/scan/local-scan.ts @@ -0,0 +1,516 @@ +import type { createKloEmbeddingProvider, createKloLlmProvider } from '@klo/llm'; +import { + createDefaultLocalIngestAdapters, + getLocalStageOnlyIngestStatus, + type LocalIngestRunRecord, + runLocalStageOnlyIngest, + type SourceAdapter, +} from '../ingest/index.js'; +import { + createLocalKloEmbeddingProviderFromConfig, + createLocalKloLlmProviderFromConfig, + KloScanEmbeddingPortAdapter, +} from '../llm/index.js'; +import type { KloProjectLlmConfig, KloScanEnrichmentConfig, KloScanRelationshipConfig } from '../project/config.js'; +import type { KloLocalProject } from '../project/index.js'; +import { kloLocalStateDbPath } from '../project/local-state-db.js'; +import { redactKloScanReport } from './credentials.js'; +import { completedKloScanEnrichmentStateSummary } from './enrichment-state.js'; +import { failedKloScanEnrichmentSummary, kloScanErrorMessage } from './enrichment-summary.js'; +import { + createDeterministicLocalScanEnrichmentProviders, + type KloLocalScanEnrichmentProviders, + runLocalScanEnrichment, +} from './local-enrichment.js'; +import { writeLocalScanEnrichmentArtifacts, writeLocalScanManifestShards } from './local-enrichment-artifacts.js'; +import { readLocalScanStructuralSnapshot } from './local-structural-artifacts.js'; +import { SqliteLocalScanEnrichmentStateStore } from './sqlite-local-enrichment-state-store.js'; +import type { + KloConnectionDriver, + KloProgressPort, + KloScanConnector, + KloScanEnrichmentStateSummary, + KloScanMode, + KloScanReport, + KloScanTrigger, +} from './types.js'; + +export interface RunLocalScanOptions { + project: KloLocalProject; + connectionId: string; + mode?: KloScanMode; + detectRelationships?: boolean; + dryRun?: boolean; + trigger?: KloScanTrigger; + databaseIntrospectionUrl?: string; + adapters?: SourceAdapter[]; + jobId?: string; + now?: () => Date; + connector?: KloScanConnector; + createConnector?: (connectionId: string) => KloScanConnector | Promise; + enrichmentProviders?: KloLocalScanEnrichmentProviders | null; + enrichmentStateStore?: SqliteLocalScanEnrichmentStateStore | null; + progress?: KloProgressPort; +} + +export interface LocalScanRunResult { + runId: string; + status: 'done'; + done: true; + connectionId: string; + mode: KloScanMode; + dryRun: boolean; + syncId: string; + report: KloScanReport; +} + +export interface LocalScanStatusResponse { + runId: string; + status: LocalIngestRunRecord['status']; + done: boolean; + connectionId: string; + mode: KloScanMode; + dryRun: boolean; + syncId: string; + progress: number; + startedAt: string; + completedAt: string; + reportPath: string | null; + warnings: KloScanReport['warnings']; +} + +export interface LocalScanMcpOptions { + adapters?: SourceAdapter[]; + databaseIntrospectionUrl?: string; + jobIdFactory?: () => string; + now?: () => Date; + createConnector?: (connectionId: string) => KloScanConnector | Promise; +} + +const LIVE_DATABASE_ADAPTER = 'live-database'; +const SCAN_REPORT_FILE = 'scan-report.json'; +const LOCAL_AUTHOR = 'klo'; +const LOCAL_AUTHOR_EMAIL = 'klo@example.com'; + +function normalizeDriver(driver: string | undefined): KloConnectionDriver { + const normalized = (driver ?? '').toLowerCase(); + if ( + normalized === 'postgres' || + normalized === 'postgresql' || + normalized === 'sqlite' || + normalized === 'sqlite3' || + normalized === 'mysql' || + normalized === 'clickhouse' || + normalized === 'sqlserver' || + normalized === 'bigquery' || + normalized === 'snowflake' || + normalized === 'posthog' + ) { + return normalized === 'sqlite3' ? 'sqlite' : normalized; + } + throw new Error( + `Standalone klo scan supports postgres/postgresql/sqlite/mysql/clickhouse/sqlserver/bigquery/snowflake/posthog in this phase, received "${driver ?? 'unknown'}"`, + ); +} + +function tablePathCount(paths: string[]): number { + return paths.filter((path) => path.startsWith('tables/') && path.endsWith('.json')).length; +} + +function rawSourcesDir(connectionId: string, syncId: string): string { + return `raw-sources/${connectionId}/${LIVE_DATABASE_ADAPTER}/${syncId}`; +} + +function scanReportPath(connectionId: string, syncId: string): string { + return `${rawSourcesDir(connectionId, syncId)}/${SCAN_REPORT_FILE}`; +} + +function assertSupportedMode(mode: KloScanMode): void { + if (mode !== 'structural' && mode !== 'relationships' && mode !== 'enriched') { + throw new Error(`Unsupported KLO scan mode: ${mode}`); + } +} + +async function resolveScanConnector(options: RunLocalScanOptions, mode: KloScanMode): Promise { + if (mode === 'structural' && !options.detectRelationships) { + return null; + } + if (options.connector) { + return options.connector; + } + if (options.createConnector) { + return options.createConnector(options.connectionId); + } + throw new Error('klo scan --enrich and --detect-relationships require a native standalone scan connector'); +} + +interface LocalScanEnrichmentProviderDeps { + createKloLlmProvider?: typeof createKloLlmProvider; + createKloEmbeddingProvider?: typeof createKloEmbeddingProvider; + env?: NodeJS.ProcessEnv; +} + +export function createLocalScanEnrichmentProvidersFromConfig( + config: KloScanEnrichmentConfig, + llmConfig: KloProjectLlmConfig, + deps: LocalScanEnrichmentProviderDeps = {}, +): KloLocalScanEnrichmentProviders | null { + if (config.mode === 'deterministic') { + return createDeterministicLocalScanEnrichmentProviders(); + } + + if (config.mode !== 'llm' || !config.embeddings) { + return null; + } + + const llm = createLocalKloLlmProviderFromConfig(llmConfig, deps); + const embeddingProvider = createLocalKloEmbeddingProviderFromConfig(config.embeddings, deps); + if (!llm || !embeddingProvider) { + return null; + } + + return { + llm, + embedding: new KloScanEmbeddingPortAdapter(embeddingProvider), + }; +} + +function createLocalScanEnrichmentStateStore(options: RunLocalScanOptions): SqliteLocalScanEnrichmentStateStore | null { + if (options.dryRun) { + return null; + } + if (options.enrichmentStateStore !== undefined) { + return options.enrichmentStateStore; + } + return new SqliteLocalScanEnrichmentStateStore({ dbPath: kloLocalStateDbPath(options.project) }); +} + +function localScanProviderIdentity( + config: KloScanEnrichmentConfig, + llmConfig: KloProjectLlmConfig, + relationships: KloScanRelationshipConfig, +): Record { + return { + mode: config.mode, + embeddingDimensions: config.embeddings?.dimensions ?? null, + llmModel: llmConfig.models.default ?? null, + embeddingModel: config.embeddings?.model ?? null, + batchSize: config.embeddings?.batchSize ?? null, + baseUrlConfigured: Boolean(llmConfig.provider.gateway?.base_url), + relationships, + }; +} + +function reportFromIngest(input: { + record: LocalIngestRunRecord; + driver: KloConnectionDriver; + mode: KloScanMode; + dryRun: boolean; + trigger: KloScanTrigger; + createdAt: string; +}): KloScanReport { + const reportPath = input.dryRun ? null : scanReportPath(input.record.connectionId, input.record.syncId); + return { + connectionId: input.record.connectionId, + driver: input.driver, + syncId: input.record.syncId, + runId: input.record.runId, + trigger: input.trigger, + mode: input.mode, + dryRun: input.dryRun, + artifactPaths: { + rawSourcesDir: input.dryRun ? null : rawSourcesDir(input.record.connectionId, input.record.syncId), + reportPath, + manifestShards: [], + enrichmentArtifacts: [], + }, + diffSummary: { + tablesAdded: tablePathCount(input.record.diffPaths.added), + tablesModified: tablePathCount(input.record.diffPaths.modified), + tablesDeleted: tablePathCount(input.record.diffPaths.deleted), + tablesUnchanged: tablePathCount(input.record.diffPaths.unchanged), + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 0, + structuralSyncStats: { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, + }, + enrichment: { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'skipped', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', + }, + capabilityGaps: [], + warnings: [], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + enrichmentState: completedKloScanEnrichmentStateSummary(), + createdAt: input.createdAt, + }; +} + +async function writeScanReport(project: KloLocalProject, report: KloScanReport): Promise { + if (!report.artifactPaths.reportPath) { + return; + } + await project.fileStore.writeFile( + report.artifactPaths.reportPath, + `${JSON.stringify(report, null, 2)}\n`, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `scan(${LIVE_DATABASE_ADAPTER}): ${report.runId} syncId=${report.syncId}`, + ); +} + +function scanDiffSummaryFromRecord(record: LocalIngestRunRecord): KloScanReport['diffSummary'] { + return { + tablesAdded: tablePathCount(record.diffPaths.added), + tablesModified: tablePathCount(record.diffPaths.modified), + tablesDeleted: tablePathCount(record.diffPaths.deleted), + tablesUnchanged: tablePathCount(record.diffPaths.unchanged), + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }; +} + +function hasNoContentChanges(record: LocalIngestRunRecord): boolean { + return ( + record.previousRunId !== null && + record.diffSummary.added === 0 && + record.diffSummary.modified === 0 && + record.diffSummary.deleted === 0 + ); +} + +function scanChangeSummary(diffSummary: KloScanReport['diffSummary']): string { + const changedTables = diffSummary.tablesAdded + diffSummary.tablesModified + diffSummary.tablesDeleted; + const totalTables = changedTables + diffSummary.tablesUnchanged; + const changeNoun = changedTables === 1 ? 'change' : 'changes'; + const tableNoun = totalTables === 1 ? 'table' : 'tables'; + return `Semantic layer comparison found ${changedTables} ${changeNoun} across ${totalTables} ${tableNoun}`; +} + +async function readScanReport( + project: KloLocalProject, + connectionId: string, + syncId: string, +): Promise { + try { + const raw = await project.fileStore.readFile(scanReportPath(connectionId, syncId)); + return JSON.parse(raw.content) as KloScanReport; + } catch { + return null; + } +} + +export async function runLocalScan(options: RunLocalScanOptions): Promise { + const mode = options.mode ?? 'structural'; + assertSupportedMode(mode); + await options.progress?.update(0.05, 'Preparing scan'); + const connector = await resolveScanConnector(options, mode); + + const connection = options.project.config.connections[options.connectionId]; + if (!connection) { + throw new Error(`Connection "${options.connectionId}" is not configured in klo.yaml`); + } + const driver = normalizeDriver(connection.driver); + const adapters = + options.adapters ?? + createDefaultLocalIngestAdapters(options.project, { databaseIntrospectionUrl: options.databaseIntrospectionUrl }); + const enrichmentProviders = + connector && (mode !== 'structural' || options.detectRelationships) + ? options.enrichmentProviders !== undefined + ? options.enrichmentProviders + : createLocalScanEnrichmentProvidersFromConfig(options.project.config.scan.enrichment, options.project.config.llm) + : null; + + await options.progress?.update(0.15, 'Inspecting database schema'); + const record = await runLocalStageOnlyIngest({ + project: options.project, + adapters, + adapter: LIVE_DATABASE_ADAPTER, + connectionId: options.connectionId, + trigger: 'manual_resync', + jobId: options.jobId, + now: options.now, + dryRun: options.dryRun, + }); + await options.progress?.update(0.55, scanChangeSummary(scanDiffSummaryFromRecord(record))); + let report = reportFromIngest({ + record, + driver, + mode, + dryRun: options.dryRun ?? false, + trigger: options.trigger ?? 'cli', + createdAt: (options.now?.() ?? new Date()).toISOString(), + }); + let reusedExistingScanArtifacts = false; + const existingReport = + !report.dryRun && !connector && hasNoContentChanges(record) + ? await readScanReport(options.project, record.connectionId, record.syncId) + : null; + if (existingReport && existingReport.mode === mode && existingReport.dryRun === report.dryRun) { + report.artifactPaths = existingReport.artifactPaths; + report.capabilityGaps = existingReport.capabilityGaps; + report.warnings = existingReport.warnings; + report.relationships = existingReport.relationships; + report.enrichment = existingReport.enrichment; + report.enrichmentState = existingReport.enrichmentState; + reusedExistingScanArtifacts = true; + } + const enrichmentStateStore = connector ? createLocalScanEnrichmentStateStore(options) : null; + let enrichmentState: KloScanEnrichmentStateSummary = completedKloScanEnrichmentStateSummary(); + if (!reusedExistingScanArtifacts && !report.dryRun && report.artifactPaths.rawSourcesDir) { + await options.progress?.update(0.7, 'Writing schema artifacts'); + const structuralSnapshot = await readLocalScanStructuralSnapshot({ + project: options.project, + connectionId: options.connectionId, + driver, + rawSourcesDir: report.artifactPaths.rawSourcesDir, + extractedAtFallback: report.createdAt, + }); + const manifestArtifacts = await writeLocalScanManifestShards({ + project: options.project, + connectionId: options.connectionId, + syncId: record.syncId, + driver, + snapshot: structuralSnapshot, + dryRun: false, + }); + report.artifactPaths.manifestShards = manifestArtifacts.manifestShards; + report.manifestShardsWritten = manifestArtifacts.manifestShardsWritten; + } + if (connector) { + try { + await options.progress?.update( + 0.82, + mode === 'relationships' || options.detectRelationships + ? 'Detecting relationships' + : 'Enriching schema metadata', + ); + const enrichment = await runLocalScanEnrichment({ + connectionId: options.connectionId, + mode, + detectRelationships: options.detectRelationships, + connector, + context: { runId: record.runId, progress: options.progress?.startPhase(0.18) }, + providers: enrichmentProviders, + stateStore: enrichmentStateStore, + syncId: record.syncId, + providerIdentity: localScanProviderIdentity( + options.project.config.scan.enrichment, + options.project.config.llm, + options.project.config.scan.relationships, + ), + relationshipSettings: options.project.config.scan.relationships, + now: options.now, + }); + const artifacts = await writeLocalScanEnrichmentArtifacts({ + project: options.project, + connectionId: options.connectionId, + syncId: record.syncId, + driver, + enrichment, + dryRun: options.dryRun ?? false, + relationshipSettings: options.project.config.scan.relationships, + }); + report.enrichment = enrichment.summary; + report.relationships = enrichment.relationships; + enrichmentState = enrichment.state; + report.enrichmentState = enrichmentState; + report.warnings.push(...enrichment.warnings); + report.artifactPaths.enrichmentArtifacts = artifacts.enrichmentArtifacts; + report.artifactPaths.manifestShards = artifacts.manifestShards; + report.manifestShardsWritten = artifacts.manifestShardsWritten; + } catch (error) { + const message = kloScanErrorMessage(error); + report.enrichment = failedKloScanEnrichmentSummary(mode, options.detectRelationships ?? false); + const stages = await enrichmentStateStore?.listRunStages(record.runId); + if (stages) { + enrichmentState = completedKloScanEnrichmentStateSummary(); + for (const stage of stages) { + if (stage.status === 'completed') { + enrichmentState.completedStages.push(stage.stage); + } else { + enrichmentState.failedStages.push(stage.stage); + } + } + report.enrichmentState = enrichmentState; + } + report.warnings.push({ + code: 'enrichment_failed', + message: `KLO scan enrichment failed after structural scan completed: ${message}`, + recoverable: true, + metadata: { mode, detectRelationships: options.detectRelationships ?? false }, + }); + } + } + report = redactKloScanReport(report); + if (!reusedExistingScanArtifacts) { + await writeScanReport(options.project, report); + } + await options.progress?.update(1, 'Scan completed'); + return { + runId: record.runId, + status: 'done', + done: true, + connectionId: record.connectionId, + mode, + dryRun: options.dryRun ?? false, + syncId: record.syncId, + report, + }; +} + +export async function getLocalScanReport(project: KloLocalProject, runId: string): Promise { + const status = await getLocalStageOnlyIngestStatus(project, runId); + if (!status || status.adapter !== LIVE_DATABASE_ADAPTER) { + return null; + } + const report = await readScanReport(project, status.connectionId, status.syncId); + if (!report) { + return null; + } + return { + ...report, + runId: status.runId, + syncId: status.syncId, + diffSummary: scanDiffSummaryFromRecord(status), + }; +} + +export async function getLocalScanStatus( + project: KloLocalProject, + runId: string, +): Promise { + const status = await getLocalStageOnlyIngestStatus(project, runId); + if (!status || status.adapter !== LIVE_DATABASE_ADAPTER) { + return null; + } + const report = await getLocalScanReport(project, runId); + return { + runId: status.runId, + status: status.status, + done: status.done, + connectionId: status.connectionId, + mode: report?.mode ?? 'structural', + dryRun: report?.dryRun ?? false, + syncId: status.syncId, + progress: status.progress, + startedAt: status.startedAt, + completedAt: status.completedAt, + reportPath: report?.artifactPaths.reportPath ?? null, + warnings: report?.warnings ?? [], + }; +} diff --git a/packages/context/src/scan/local-structural-artifacts.test.ts b/packages/context/src/scan/local-structural-artifacts.test.ts new file mode 100644 index 00000000..ce628478 --- /dev/null +++ b/packages/context/src/scan/local-structural-artifacts.test.ts @@ -0,0 +1,196 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { readLocalScanStructuralSnapshot } from './local-structural-artifacts.js'; + +describe('readLocalScanStructuralSnapshot', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-structural-artifacts-')); + project = await initKloProject({ + projectDir: join(tempDir, 'project'), + projectName: 'warehouse', + }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('rebuilds a canonical snapshot from persisted live-database raw files', async () => { + const rawRoot = 'raw-sources/warehouse/live-database/sync-1'; + await project.fileStore.writeFile( + `${rawRoot}/connection.json`, + `${JSON.stringify( + { + connectionId: 'warehouse', + extractedAt: '2026-04-29T12:00:00.000Z', + metadata: { source: 'sqlite-smoke' }, + tableCount: 2, + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed connection artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/tables/customers.json`, + `${JSON.stringify( + { + name: 'customers', + catalog: null, + db: 'public', + kind: 'table', + comment: 'Customer table', + estimatedRows: 12, + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Customer id', + }, + ], + foreignKeys: [], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed customers artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/tables/orders.json`, + `${JSON.stringify( + { + name: 'orders', + catalog: null, + db: 'public', + kind: 'table', + comment: null, + estimatedRows: 20, + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'customer_id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: 'public', + toTable: 'customers', + toColumn: 'id', + constraintName: null, + }, + ], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed orders artifact', + ); + + const snapshot = await readLocalScanStructuralSnapshot({ + project, + connectionId: 'warehouse', + driver: 'sqlite', + rawSourcesDir: rawRoot, + extractedAtFallback: '2026-04-29T13:00:00.000Z', + }); + + expect(snapshot).toMatchObject({ + connectionId: 'warehouse', + driver: 'sqlite', + extractedAt: '2026-04-29T12:00:00.000Z', + metadata: { source: 'sqlite-smoke' }, + tables: [ + { + db: 'public', + name: 'customers', + comment: 'Customer table', + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Customer id', + }, + ], + }, + { + db: 'public', + name: 'orders', + foreignKeys: [ + { + fromColumn: 'customer_id', + toCatalog: null, + toDb: 'public', + toTable: 'customers', + toColumn: 'id', + constraintName: null, + }, + ], + }, + ], + }); + }); + + it('uses the scan report timestamp when connection.json omits extractedAt', async () => { + const rawRoot = 'raw-sources/warehouse/live-database/sync-2'; + await project.fileStore.writeFile( + `${rawRoot}/connection.json`, + '{"connectionId":"warehouse","metadata":{}}\n', + 'klo', + 'klo@example.com', + 'Seed connection artifact without extractedAt', + ); + await project.fileStore.writeFile( + `${rawRoot}/tables/orders.json`, + '{"name":"orders","catalog":null,"db":null,"kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n', + 'klo', + 'klo@example.com', + 'Seed orders artifact', + ); + + const snapshot = await readLocalScanStructuralSnapshot({ + project, + connectionId: 'warehouse', + driver: 'postgres', + rawSourcesDir: rawRoot, + extractedAtFallback: '2026-04-29T13:00:00.000Z', + }); + + expect(snapshot.extractedAt).toBe('2026-04-29T13:00:00.000Z'); + }); +}); diff --git a/packages/context/src/scan/local-structural-artifacts.ts b/packages/context/src/scan/local-structural-artifacts.ts new file mode 100644 index 00000000..afd31ee5 --- /dev/null +++ b/packages/context/src/scan/local-structural-artifacts.ts @@ -0,0 +1,125 @@ +import type { KloLocalProject } from '../project/index.js'; +import type { + KloConnectionDriver, + KloSchemaColumn, + KloSchemaForeignKey, + KloSchemaSnapshot, + KloSchemaTable, +} from './types.js'; + +export interface ReadLocalScanStructuralSnapshotInput { + project: KloLocalProject; + connectionId: string; + driver: KloConnectionDriver; + rawSourcesDir: string; + extractedAtFallback: string; +} + +interface LiveDatabaseConnectionArtifact { + connectionId?: unknown; + extractedAt?: unknown; + metadata?: unknown; + scope?: unknown; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function metadataRecord(value: unknown): Record { + return isRecord(value) ? value : {}; +} + +function optionalStringOrNull(value: unknown): string | null | undefined { + if (value === undefined) { + return undefined; + } + return typeof value === 'string' ? value : null; +} + +function parseColumn(rawColumn: unknown, path: string): KloSchemaColumn { + if ( + !isRecord(rawColumn) || + typeof rawColumn.name !== 'string' || + typeof rawColumn.nativeType !== 'string' || + typeof rawColumn.normalizedType !== 'string' || + (rawColumn.dimensionType !== 'time' && + rawColumn.dimensionType !== 'string' && + rawColumn.dimensionType !== 'number' && + rawColumn.dimensionType !== 'boolean') + ) { + throw new Error(`Invalid KLO schema column artifact: ${path}`); + } + return { + name: rawColumn.name, + nativeType: rawColumn.nativeType, + normalizedType: rawColumn.normalizedType, + dimensionType: rawColumn.dimensionType, + nullable: rawColumn.nullable === true, + primaryKey: rawColumn.primaryKey === true, + comment: optionalStringOrNull(rawColumn.comment) ?? null, + }; +} + +function parseForeignKey(rawForeignKey: unknown, path: string): KloSchemaForeignKey { + if ( + !isRecord(rawForeignKey) || + typeof rawForeignKey.fromColumn !== 'string' || + typeof rawForeignKey.toTable !== 'string' || + typeof rawForeignKey.toColumn !== 'string' + ) { + throw new Error(`Invalid KLO schema foreign key artifact: ${path}`); + } + return { + fromColumn: rawForeignKey.fromColumn, + toCatalog: optionalStringOrNull(rawForeignKey.toCatalog) ?? null, + toDb: optionalStringOrNull(rawForeignKey.toDb) ?? null, + toTable: rawForeignKey.toTable, + toColumn: rawForeignKey.toColumn, + constraintName: optionalStringOrNull(rawForeignKey.constraintName) ?? null, + }; +} + +function parseTable(raw: string, path: string): KloSchemaTable { + const parsed = JSON.parse(raw) as unknown; + if (!isRecord(parsed) || typeof parsed.name !== 'string' || !Array.isArray(parsed.columns)) { + throw new Error(`Invalid KLO schema table artifact: ${path}`); + } + return { + catalog: optionalStringOrNull(parsed.catalog) ?? null, + db: optionalStringOrNull(parsed.db) ?? null, + name: parsed.name, + kind: + parsed.kind === 'view' || parsed.kind === 'external' || parsed.kind === 'event_stream' ? parsed.kind : 'table', + comment: optionalStringOrNull(parsed.comment) ?? null, + estimatedRows: typeof parsed.estimatedRows === 'number' ? parsed.estimatedRows : null, + columns: parsed.columns.map((column) => parseColumn(column, path)), + foreignKeys: Array.isArray(parsed.foreignKeys) + ? parsed.foreignKeys.map((foreignKey) => parseForeignKey(foreignKey, path)) + : [], + }; +} + +export async function readLocalScanStructuralSnapshot( + input: ReadLocalScanStructuralSnapshotInput, +): Promise { + const connectionRaw = await input.project.fileStore.readFile(`${input.rawSourcesDir}/connection.json`); + const connection = JSON.parse(connectionRaw.content) as LiveDatabaseConnectionArtifact; + const listedTables = await input.project.fileStore.listFiles(`${input.rawSourcesDir}/tables`); + const tablePaths = listedTables.files.filter((path) => path.endsWith('.json')).sort(); + + const tables: KloSchemaTable[] = []; + for (const path of tablePaths) { + const tableRaw = await input.project.fileStore.readFile(path); + tables.push(parseTable(tableRaw.content, path)); + } + + return { + connectionId: typeof connection.connectionId === 'string' ? connection.connectionId : input.connectionId, + driver: input.driver, + extractedAt: typeof connection.extractedAt === 'string' ? connection.extractedAt : input.extractedAtFallback, + scope: isRecord(connection.scope) ? connection.scope : {}, + metadata: metadataRecord(connection.metadata), + tables, + }; +} diff --git a/packages/context/src/scan/orchestrator.test.ts b/packages/context/src/scan/orchestrator.test.ts new file mode 100644 index 00000000..401370e4 --- /dev/null +++ b/packages/context/src/scan/orchestrator.test.ts @@ -0,0 +1,376 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + createKloConnectorCapabilities, + type KloScanConnector, + type KloScanContext, + type KloScanEnrichmentStateSummary, + type KloScanInput, + KloScanOrchestrator, + type KloSchemaSnapshot, +} from './index.js'; + +function snapshot(): KloSchemaSnapshot { + return { + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-04-29T00:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: { source: 'test' }, + tables: [ + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: 'Orders table', + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + ], + foreignKeys: [], + }, + ], + }; +} + +function connector( + capabilities = createKloConnectorCapabilities({ tableSampling: true, columnSampling: true }), +): KloScanConnector { + return { + id: 'connector-1', + driver: 'postgres', + capabilities, + introspect: vi.fn(async () => snapshot()), + }; +} + +function context(): KloScanContext { + return { + runId: 'scan-run-1', + logger: { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }, + }; +} + +const input: KloScanInput = { + connectionId: 'warehouse', + driver: 'postgres', + mode: 'structural', +}; + +describe('KloScanOrchestrator', () => { + it('runs structural scans through connector introspection and structural host callback', async () => { + const scanConnector = connector(); + const scanContext = context(); + const runStructural = vi.fn(async (scanSnapshot: KloSchemaSnapshot) => ({ + result: { synced: true }, + diffSummary: { tablesAdded: scanSnapshot.tables.length, columnsAdded: 1 }, + structuralSyncStats: { tablesCreated: 1, columnsCreated: 1 }, + artifactPaths: { manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'] }, + })); + + const result = await new KloScanOrchestrator({ + now: () => new Date('2026-04-29T00:10:00.000Z'), + syncIdFactory: () => 'sync-1', + }).run({ + connector: scanConnector, + input, + trigger: 'schema_scan', + context: scanContext, + runStructural, + }); + + expect(scanConnector.introspect).toHaveBeenCalledWith(input, scanContext); + expect(runStructural).toHaveBeenCalledWith(snapshot(), scanContext); + expect(result.snapshot.connectionId).toBe('warehouse'); + expect(result.structural.result).toEqual({ synced: true }); + expect(result.enrichment).toBeNull(); + expect(result.report).toMatchObject({ + connectionId: 'warehouse', + driver: 'postgres', + syncId: 'sync-1', + runId: 'scan-run-1', + trigger: 'schema_scan', + mode: 'structural', + dryRun: false, + diffSummary: { + tablesAdded: 1, + columnsAdded: 1, + }, + structuralSyncStats: { + tablesCreated: 1, + columnsCreated: 1, + }, + manifestShardsWritten: 1, + artifactPaths: { + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + }, + enrichment: { + dataDictionary: 'skipped', + columnDescriptions: 'skipped', + tableDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'skipped', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', + }, + enrichmentState: { + resumedStages: [], + completedStages: [], + failedStages: [], + }, + createdAt: '2026-04-29T00:10:00.000Z', + }); + }); + + it('runs enriched scans through structural and enrichment host callbacks', async () => { + const scanConnector = connector( + createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: true, + readOnlySql: true, + }), + ); + const scanContext = context(); + + const result = await new KloScanOrchestrator({ syncIdFactory: () => 'sync-2' }).run({ + connector: scanConnector, + input: { ...input, mode: 'enriched', detectRelationships: true }, + trigger: 'schema_scan', + context: scanContext, + runStructural: vi.fn(async () => ({ + result: { schemaId: 'schema-1' }, + structuralSyncStats: { tablesCreated: 1 }, + })), + runEnrichment: vi.fn(async () => ({ + result: { enriched: true }, + enrichment: { + dataDictionary: 'completed', + columnDescriptions: 'completed', + tableDescriptions: 'completed', + embeddings: 'completed', + deterministicRelationships: 'completed', + statisticalValidation: 'completed', + } as const, + relationships: { accepted: 2, rejected: 1 }, + })), + }); + + expect(result.enrichment?.result).toEqual({ enriched: true }); + expect(result.report.enrichment.columnDescriptions).toBe('completed'); + expect(result.report.relationships).toEqual({ accepted: 2, review: 0, rejected: 1, skipped: 0 }); + expect(result.report.capabilityGaps).toEqual([]); + expect(result.report.warnings).toEqual([]); + }); + + it('reports host enrichment state summaries from enriched scan phases', async () => { + const scanConnector = connector( + createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: true, + readOnlySql: true, + }), + ); + const enrichmentState: Partial = { + resumedStages: ['relationships', 'descriptions', 'descriptions'], + completedStages: ['embeddings', 'descriptions', 'relationships'], + failedStages: [], + }; + + const result = await new KloScanOrchestrator({ syncIdFactory: () => 'sync-state' }).run({ + connector: scanConnector, + input: { ...input, mode: 'enriched', detectRelationships: true }, + trigger: 'schema_scan', + context: context(), + runStructural: vi.fn(async () => ({ result: { synced: true } })), + runEnrichment: vi.fn(async () => ({ + result: { enriched: true }, + enrichmentState, + })), + }); + + expect(result.report.enrichmentState).toEqual({ + resumedStages: ['descriptions', 'relationships'], + completedStages: ['descriptions', 'embeddings', 'relationships'], + failedStages: [], + }); + }); + + it('records recoverable warnings for missing optional capabilities during enriched scans', async () => { + const result = await new KloScanOrchestrator({ syncIdFactory: () => 'sync-3' }).run({ + connector: connector(createKloConnectorCapabilities()), + input: { ...input, mode: 'enriched', detectRelationships: true }, + trigger: 'schema_scan', + context: context(), + runStructural: vi.fn(async () => ({ result: {} })), + runEnrichment: vi.fn(async () => ({ result: {} })), + }); + + expect(result.report.capabilityGaps).toEqual(['tableSampling', 'columnSampling', 'columnStats', 'readOnlySql']); + expect(result.report.warnings.map((warning) => warning.code)).toEqual([ + 'connector_capability_missing', + 'connector_capability_missing', + 'connector_capability_missing', + 'connector_capability_missing', + ]); + expect(result.report.warnings.every((warning) => warning.recoverable)).toBe(true); + }); + + it('redacts structural and enrichment warning metadata before returning reports', async () => { + const result = await new KloScanOrchestrator({ syncIdFactory: () => 'sync-redacted' }).run({ + connector: connector( + createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: true, + readOnlySql: true, + }), + ), + input: { ...input, mode: 'enriched' }, + trigger: 'schema_scan', + context: context(), + runStructural: vi.fn(async () => ({ + result: {}, + warnings: [ + { + code: 'sampling_failed', + message: 'structural warning', + recoverable: true, + metadata: { + url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret + table: 'orders', + }, + } as const, + ], + })), + runEnrichment: vi.fn(async () => ({ + result: {}, + warnings: [ + { + code: 'embedding_unavailable', + message: 'enrichment warning', + recoverable: true, + metadata: { + nested: { + api_key: 'sk_test_123', // pragma: allowlist secret + schema: 'public', + }, + }, + } as const, + ], + })), + }); + + expect(result.report.warnings).toEqual([ + { + code: 'sampling_failed', + message: 'structural warning', + recoverable: true, + metadata: { + url: '', + table: 'orders', + }, + }, + { + code: 'embedding_unavailable', + message: 'enrichment warning', + recoverable: true, + metadata: { + nested: { + api_key: '', + schema: 'public', + }, + }, + }, + ]); + }); + + it('keeps structural results when the enrichment phase fails after structural sync', async () => { + const scanConnector = connector( + createKloConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: true, + readOnlySql: true, + }), + ); + const runStructural = vi.fn(async () => ({ + result: { synced: true }, + artifactPaths: { + rawSourcesDir: 'raw-sources/warehouse/live-database/sync-failed-enrichment', + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + }, + manifestShardsWritten: 1, + })); + const runEnrichment = vi.fn(async () => { + throw new Error('AI Gateway timed out'); + }); + + const result = await new KloScanOrchestrator({ + now: () => new Date('2026-04-29T18:00:00.000Z'), + syncIdFactory: () => 'sync-failed-enrichment', + }).run({ + connector: scanConnector, + input: { ...input, mode: 'enriched', detectRelationships: true }, + trigger: 'schema_scan', + context: context(), + runStructural, + runEnrichment, + }); + + expect(result.structural.result).toEqual({ synced: true }); + expect(result.enrichment).toBeNull(); + expect(result.report.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + expect(result.report.manifestShardsWritten).toBe(1); + expect(result.report.enrichment).toEqual({ + dataDictionary: 'failed', + tableDescriptions: 'failed', + columnDescriptions: 'failed', + embeddings: 'failed', + deterministicRelationships: 'failed', + llmRelationshipValidation: 'failed', + statisticalValidation: 'failed', + }); + expect(result.report.warnings).toEqual([ + { + code: 'enrichment_failed', + message: 'KLO scan enrichment failed after structural scan completed: AI Gateway timed out', + recoverable: true, + metadata: { + mode: 'enriched', + detectRelationships: true, + }, + }, + ]); + }); + + it('marks dry-run reports without changing host callback behavior', async () => { + const runStructural = vi.fn(async () => ({ result: { planned: true }, manifestShardsWritten: 0 })); + + const result = await new KloScanOrchestrator({ syncIdFactory: () => 'sync-4' }).run({ + connector: connector(), + input: { ...input, dryRun: true }, + trigger: 'cli', + context: context(), + runStructural, + }); + + expect(runStructural).toHaveBeenCalledTimes(1); + expect(result.report.dryRun).toBe(true); + expect(result.report.trigger).toBe('cli'); + }); +}); diff --git a/packages/context/src/scan/orchestrator.ts b/packages/context/src/scan/orchestrator.ts new file mode 100644 index 00000000..9fa4590f --- /dev/null +++ b/packages/context/src/scan/orchestrator.ts @@ -0,0 +1,297 @@ +import { redactKloScanReport } from './credentials.js'; +import { completedKloScanEnrichmentStateSummary, summarizeKloScanEnrichmentState } from './enrichment-state.js'; +import { + failedKloScanEnrichmentSummary, + kloScanErrorMessage, + skippedKloScanEnrichmentSummary, +} from './enrichment-summary.js'; +import type { + KloConnectorCapabilities, + KloScanArtifactPaths, + KloScanConnector, + KloScanContext, + KloScanDiffSummary, + KloScanEnrichmentSummary, + KloScanEnrichmentStateSummary, + KloScanInput, + KloScanRelationshipSummary, + KloScanReport, + KloScanTrigger, + KloScanWarning, + KloSchemaSnapshot, + KloStructuralSyncStats, +} from './types.js'; + +type CapabilityGap = keyof Omit; + +export interface KloStructuralScanPhaseResult { + result: TResult; + diffSummary?: Partial; + structuralSyncStats?: Partial; + manifestShardsWritten?: number; + artifactPaths?: Partial; + relationships?: Partial; + warnings?: KloScanWarning[]; +} + +export interface KloEnrichmentScanPhaseResult { + result: TResult; + enrichment?: Partial; + enrichmentState?: Partial; + manifestShardsWritten?: number; + artifactPaths?: Partial; + relationships?: Partial; + warnings?: KloScanWarning[]; +} + +export interface KloScanOrchestratorRunInput { + connector: KloScanConnector; + input: KloScanInput; + trigger: KloScanTrigger; + context: KloScanContext; + syncId?: string; + runStructural: ( + snapshot: KloSchemaSnapshot, + context: KloScanContext, + ) => Promise>; + runEnrichment?: ( + snapshot: KloSchemaSnapshot, + structural: KloStructuralScanPhaseResult, + context: KloScanContext, + ) => Promise>; +} + +export interface KloScanOrchestratorRunResult { + snapshot: KloSchemaSnapshot; + structural: KloStructuralScanPhaseResult; + enrichment: KloEnrichmentScanPhaseResult | null; + report: KloScanReport; +} + +export interface KloScanOrchestratorOptions { + now?: () => Date; + syncIdFactory?: (input: KloScanInput, context: KloScanContext) => string; +} + +const emptyDiffSummary: KloScanDiffSummary = { + tablesAdded: 0, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 0, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, +}; + +const emptyStructuralSyncStats: KloStructuralSyncStats = { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, +}; + +const emptyArtifactPaths: KloScanArtifactPaths = { + rawSourcesDir: null, + reportPath: null, + manifestShards: [], + enrichmentArtifacts: [], +}; + +function mergeDiffSummary(input?: Partial): KloScanDiffSummary { + return { ...emptyDiffSummary, ...input }; +} + +function mergeStructuralSyncStats(input?: Partial): KloStructuralSyncStats { + return { ...emptyStructuralSyncStats, ...input }; +} + +function mergeEnrichmentSummary(input?: Partial): KloScanEnrichmentSummary { + return { ...skippedKloScanEnrichmentSummary, ...input }; +} + +function mergeEnrichmentState(input?: Partial): KloScanEnrichmentStateSummary { + if (!input) { + return completedKloScanEnrichmentStateSummary(); + } + + return summarizeKloScanEnrichmentState({ + resumedStages: input.resumedStages ?? [], + completedStages: input.completedStages ?? [], + failedStages: input.failedStages ?? [], + }); +} + +function mergeArtifactPaths( + structural?: Partial, + enrichment?: Partial, +): KloScanArtifactPaths { + return { + ...emptyArtifactPaths, + ...structural, + ...enrichment, + manifestShards: [...(structural?.manifestShards ?? []), ...(enrichment?.manifestShards ?? [])], + enrichmentArtifacts: [...(structural?.enrichmentArtifacts ?? []), ...(enrichment?.enrichmentArtifacts ?? [])], + }; +} + +function mergeRelationshipSummary( + structural?: Partial, + enrichment?: Partial, +): KloScanRelationshipSummary { + return { + accepted: (structural?.accepted ?? 0) + (enrichment?.accepted ?? 0), + review: (structural?.review ?? 0) + (enrichment?.review ?? 0), + rejected: (structural?.rejected ?? 0) + (enrichment?.rejected ?? 0), + skipped: (structural?.skipped ?? 0) + (enrichment?.skipped ?? 0), + }; +} + +function manifestShardsWritten(phase: { + manifestShardsWritten?: number; + artifactPaths?: Partial; +}): number { + return phase.manifestShardsWritten ?? phase.artifactPaths?.manifestShards?.length ?? 0; +} + +function requiredCapabilities(mode: KloScanInput['mode'], detectRelationships: boolean | undefined): CapabilityGap[] { + const required = new Set(); + + if (mode === 'enriched') { + required.add('tableSampling'); + required.add('columnSampling'); + required.add('columnStats'); + required.add('readOnlySql'); + } + + if (mode === 'relationships' || detectRelationships) { + required.add('columnStats'); + required.add('readOnlySql'); + } + + return [...required]; +} + +function capabilityGaps(capabilities: KloConnectorCapabilities, input: KloScanInput): CapabilityGap[] { + return requiredCapabilities(input.mode ?? 'structural', input.detectRelationships).filter( + (capability) => !capabilities[capability], + ); +} + +function warningsForCapabilityGaps(gaps: CapabilityGap[]): KloScanWarning[] { + return gaps.map((gap) => ({ + code: 'connector_capability_missing', + message: `KLO scan connector is missing optional capability: ${gap}`, + recoverable: true, + metadata: { capability: gap }, + })); +} + +function assertNotAborted(context: KloScanContext): void { + if (context.signal?.aborted) { + throw new Error('KLO scan aborted'); + } +} + +export class KloScanOrchestrator { + private readonly now: () => Date; + private readonly syncIdFactory: (input: KloScanInput, context: KloScanContext) => string; + + constructor(options: KloScanOrchestratorOptions = {}) { + this.now = options.now ?? (() => new Date()); + this.syncIdFactory = options.syncIdFactory ?? ((_, context) => context.runId); + } + + async run( + input: KloScanOrchestratorRunInput, + ): Promise> { + const mode = input.input.mode ?? 'structural'; + const syncId = input.syncId ?? this.syncIdFactory(input.input, input.context); + const gaps = capabilityGaps(input.connector.capabilities, input.input); + const warnings = warningsForCapabilityGaps(gaps); + + input.context.logger?.info('Starting KLO scan', { + connectionId: input.input.connectionId, + connectorId: input.connector.id, + mode, + trigger: input.trigger, + }); + + assertNotAborted(input.context); + const snapshot = await input.connector.introspect(input.input, input.context); + + assertNotAborted(input.context); + const structural = await input.runStructural(snapshot, input.context); + + let enrichment: KloEnrichmentScanPhaseResult | null = null; + let failedEnrichment: KloScanEnrichmentSummary | null = null; + if (mode !== 'structural' || input.input.detectRelationships) { + if (input.runEnrichment) { + assertNotAborted(input.context); + try { + enrichment = await input.runEnrichment(snapshot, structural, input.context); + } catch (error) { + const message = kloScanErrorMessage(error); + failedEnrichment = failedKloScanEnrichmentSummary(mode, input.input.detectRelationships ?? false); + warnings.push({ + code: 'enrichment_failed', + message: `KLO scan enrichment failed after structural scan completed: ${message}`, + recoverable: true, + metadata: { mode, detectRelationships: input.input.detectRelationships ?? false }, + }); + input.context.logger?.warn('KLO scan enrichment failed after structural scan completed', { + connectionId: input.input.connectionId, + runId: input.context.runId, + mode, + error: message, + }); + } + } else { + failedEnrichment = failedKloScanEnrichmentSummary(mode, input.input.detectRelationships ?? false); + warnings.push({ + code: 'connector_capability_missing', + message: 'KLO scan requested enrichment or relationship detection, but no enrichment phase was provided', + recoverable: true, + metadata: { mode, detectRelationships: input.input.detectRelationships ?? false }, + }); + } + } + + const manifestShardCount = manifestShardsWritten(structural) + (enrichment ? manifestShardsWritten(enrichment) : 0); + + const report: KloScanReport = redactKloScanReport({ + connectionId: input.input.connectionId, + driver: input.input.driver, + syncId, + runId: input.context.runId, + trigger: input.trigger, + mode, + dryRun: input.input.dryRun ?? false, + artifactPaths: mergeArtifactPaths(structural.artifactPaths, enrichment?.artifactPaths), + diffSummary: mergeDiffSummary(structural.diffSummary), + manifestShardsWritten: manifestShardCount, + structuralSyncStats: mergeStructuralSyncStats(structural.structuralSyncStats), + enrichment: mergeEnrichmentSummary(enrichment?.enrichment ?? failedEnrichment ?? undefined), + capabilityGaps: gaps, + warnings: [...warnings, ...(structural.warnings ?? []), ...(enrichment?.warnings ?? [])], + relationships: mergeRelationshipSummary(structural.relationships, enrichment?.relationships), + enrichmentState: mergeEnrichmentState(enrichment?.enrichmentState), + createdAt: this.now().toISOString(), + }); + + input.context.logger?.info('Completed KLO scan', { + connectionId: report.connectionId, + runId: report.runId, + syncId: report.syncId, + warnings: report.warnings.length, + }); + + return { + snapshot, + structural, + enrichment, + report, + }; + } +} diff --git a/packages/context/src/scan/relationship-artifacts.test.ts b/packages/context/src/scan/relationship-artifacts.test.ts new file mode 100644 index 00000000..2c62880b --- /dev/null +++ b/packages/context/src/scan/relationship-artifacts.test.ts @@ -0,0 +1,310 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { runLocalStageOnlyIngest, type SourceAdapter } from '../ingest/index.js'; +import { initKloProject, loadKloProject } from '../project/index.js'; +import { describe, expect, it } from 'vitest'; +import { readLocalScanRelationshipArtifacts } from './relationship-artifacts.js'; +import type { KloRelationshipArtifact, KloRelationshipDiagnosticsArtifact } from './relationship-diagnostics.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import type { KloScanReport } from './types.js'; + +async function writeProjectFile(projectDir: string, relativePath: string, content: string): Promise { + const absolutePath = join(projectDir, relativePath); + await mkdir(dirname(absolutePath), { recursive: true }); + await writeFile(absolutePath, content, 'utf-8'); +} + +async function writeWarehouseConfig(projectDir: string): Promise { + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); +} + +function liveDatabaseAdapter(): SourceAdapter { + return { + source: 'live-database', + skillNames: ['live_database_ingest'], + async fetch(_pullConfig, stagedDir) { + await mkdir(join(stagedDir, 'tables'), { recursive: true }); + await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8'); + await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + await writeFile( + join(stagedDir, 'tables', 'orders.json'), + '{"name":"orders","db":"public","columns":[{"name":"id","type":"integer","nullable":false,"primaryKey":true}]}\n', + 'utf-8', + ); + }, + async detect(stagedDir) { + await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8'); + return true; + }, + async chunk() { + return { + workUnits: [ + { + unitKey: 'live-database-public-orders', + rawFiles: ['tables/orders.json'], + dependencyPaths: ['connection.json', 'foreign-keys.json'], + peerFileIndex: [], + }, + ], + }; + }, + }; +} + +async function createLiveDatabaseRun(projectDir: string, runId: string) { + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeWarehouseConfig(projectDir); + const project = await loadKloProject({ projectDir }); + await runLocalStageOnlyIngest({ + project, + adapters: [liveDatabaseAdapter()], + adapter: 'live-database', + connectionId: 'warehouse', + jobId: runId, + now: () => new Date('2026-05-07T10:00:00.000Z'), + }); + return project; +} + +function scanReport(enrichmentArtifacts: string[], syncId = '2026-05-07-100000-scan-run-review'): KloScanReport { + return { + connectionId: 'warehouse', + driver: 'sqlite', + syncId, + runId: 'scan-run-review', + trigger: 'cli', + mode: 'relationships', + dryRun: false, + artifactPaths: { + rawSourcesDir: `raw-sources/warehouse/live-database/${syncId}`, + reportPath: `raw-sources/warehouse/live-database/${syncId}/scan-report.json`, + manifestShards: [], + enrichmentArtifacts, + }, + diffSummary: { + tablesAdded: 0, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 2, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 0, + structuralSyncStats: { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, + }, + enrichment: { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'completed', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'skipped', + }, + capabilityGaps: [], + warnings: [], + relationships: { accepted: 0, review: 1, rejected: 1, skipped: 0 }, + enrichmentState: { + resumedStages: [], + completedStages: ['relationships'], + failedStages: [], + }, + createdAt: '2026-05-07T10:00:00.000Z', + }; +} + +const relationshipArtifact: KloRelationshipArtifact = { + connectionId: 'warehouse', + accepted: [], + review: [ + { + id: 'orders:orders.customer_id->customers:customers.id', + status: 'review', + source: 'deterministic_name', + from: { + tableId: 'orders', + columnIds: ['orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'customers', + columnIds: ['customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + score: 0.62, + evidence: { sources: ['table_suffix'] }, + validation: { status: 'unavailable' }, + graph: { reasons: ['validation_unavailable_review_only'] }, + reasons: ['validation_unavailable_review_only', 'fk_score_review'], + }, + ], + rejected: [ + { + id: 'orders:orders.note_id->notes:notes.id', + status: 'rejected', + source: 'deterministic_name', + from: { + tableId: 'orders', + columnIds: ['orders.note_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['note_id'], + }, + to: { + tableId: 'notes', + columnIds: ['notes.id'], + table: { catalog: null, db: 'public', name: 'notes' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.2, + pkScore: 0.4, + fkScore: 0.2, + score: 0.2, + evidence: { sources: ['exact_column_match'] }, + validation: { status: 'failed' }, + graph: { reasons: ['low_source_coverage'] }, + reasons: ['low_source_coverage'], + }, + ], + skipped: [], +}; + +const diagnosticsArtifact: KloRelationshipDiagnosticsArtifact = { + connectionId: 'warehouse', + generatedAt: '2026-05-07T10:00:00.000Z', + summary: { accepted: 0, review: 1, rejected: 1, skipped: 0 }, + noAcceptedReason: 'relationship candidates require review before manifest writes', + candidateCountsBySource: { deterministic_name: 2 }, + validation: { available: false, sqlAvailable: false, queryCount: 0 }, + thresholds: { acceptThreshold: 0.85, reviewThreshold: 0.55 }, + policy: { + validationRequiredForManifest: true, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + warnings: [], + profileWarnings: ['KLO scan connector cannot run read-only SQL relationship validation'], +}; + +const profileArtifact: KloRelationshipProfileArtifact = { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: false, + tables: [], + columns: {}, + queryCount: 0, + warnings: ['KLO scan connector cannot run read-only SQL relationship validation'], +}; + +describe('local scan relationship artifact reader', () => { + it('loads relationship, diagnostics, and profile artifacts for a scan run', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-relationship-artifacts-')); + try { + const project = await createLiveDatabaseRun(projectDir, 'scan-run-review'); + const syncId = '2026-05-07-100000-scan-run-review'; + const report = scanReport( + [ + `raw-sources/warehouse/live-database/${syncId}/enrichment/relationships.json`, + `raw-sources/warehouse/live-database/${syncId}/enrichment/relationship-profile.json`, + `raw-sources/warehouse/live-database/${syncId}/enrichment/relationship-diagnostics.json`, + ], + syncId, + ); + await writeProjectFile(projectDir, report.artifactPaths.reportPath ?? '', `${JSON.stringify(report, null, 2)}\n`); + await writeProjectFile( + projectDir, + `raw-sources/warehouse/live-database/${syncId}/enrichment/relationships.json`, + `${JSON.stringify(relationshipArtifact, null, 2)}\n`, + ); + await writeProjectFile( + projectDir, + `raw-sources/warehouse/live-database/${syncId}/enrichment/relationship-diagnostics.json`, + `${JSON.stringify(diagnosticsArtifact, null, 2)}\n`, + ); + await writeProjectFile( + projectDir, + `raw-sources/warehouse/live-database/${syncId}/enrichment/relationship-profile.json`, + `${JSON.stringify(profileArtifact, null, 2)}\n`, + ); + + const result = await readLocalScanRelationshipArtifacts(project, 'scan-run-review'); + + expect(result).toMatchObject({ + runId: 'scan-run-review', + connectionId: 'warehouse', + syncId, + paths: { + relationships: `raw-sources/warehouse/live-database/${syncId}/enrichment/relationships.json`, + diagnostics: `raw-sources/warehouse/live-database/${syncId}/enrichment/relationship-diagnostics.json`, + profile: `raw-sources/warehouse/live-database/${syncId}/enrichment/relationship-profile.json`, + }, + }); + expect(result?.relationships.review[0]).toMatchObject({ + id: 'orders:orders.customer_id->customers:customers.id', + status: 'review', + reasons: ['validation_unavailable_review_only', 'fk_score_review'], + }); + expect(result?.diagnostics?.noAcceptedReason).toBe('relationship candidates require review before manifest writes'); + expect(result?.profile?.sqlAvailable).toBe(false); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + + it('returns null when the scan run has no report', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-relationship-artifacts-missing-run-')); + try { + await initKloProject({ projectDir, projectName: 'warehouse' }); + const project = await loadKloProject({ projectDir }); + + await expect(readLocalScanRelationshipArtifacts(project, 'missing-run')).resolves.toBeNull(); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + + it('throws a focused error when a scan report does not reference relationships.json', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-relationship-artifacts-missing-artifact-')); + try { + const project = await createLiveDatabaseRun(projectDir, 'scan-run-review'); + const report = scanReport([]); + await writeProjectFile(projectDir, report.artifactPaths.reportPath ?? '', `${JSON.stringify(report, null, 2)}\n`); + + await expect(readLocalScanRelationshipArtifacts(project, 'scan-run-review')).rejects.toThrow( + 'Scan report "scan-run-review" does not reference relationships.json', + ); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/context/src/scan/relationship-artifacts.ts b/packages/context/src/scan/relationship-artifacts.ts new file mode 100644 index 00000000..e630e2a8 --- /dev/null +++ b/packages/context/src/scan/relationship-artifacts.ts @@ -0,0 +1,75 @@ +import type { KloLocalProject } from '../project/index.js'; +import { getLocalScanReport } from './local-scan.js'; +import type { KloRelationshipArtifact, KloRelationshipDiagnosticsArtifact } from './relationship-diagnostics.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import type { KloScanReport } from './types.js'; + +export type KloRelationshipArtifactStatus = 'accepted' | 'review' | 'rejected' | 'skipped' | 'all'; + +export interface ReadLocalScanRelationshipArtifactsResult { + runId: string; + connectionId: string; + syncId: string; + report: KloScanReport; + relationships: KloRelationshipArtifact; + diagnostics: KloRelationshipDiagnosticsArtifact | null; + profile: KloRelationshipProfileArtifact | null; + paths: { + relationships: string; + diagnostics: string | null; + profile: string | null; + }; +} + +function findArtifactPath(report: KloScanReport, fileName: string): string | null { + return report.artifactPaths.enrichmentArtifacts.find((path) => path.endsWith(`/enrichment/${fileName}`)) ?? null; +} + +async function readJsonArtifact(project: KloLocalProject, path: string): Promise { + const raw = await project.fileStore.readFile(path); + return JSON.parse(raw.content) as T; +} + +async function readOptionalJsonArtifact(project: KloLocalProject, path: string | null): Promise { + if (!path) { + return null; + } + try { + return await readJsonArtifact(project, path); + } catch { + return null; + } +} + +export async function readLocalScanRelationshipArtifacts( + project: KloLocalProject, + runId: string, +): Promise { + const report = await getLocalScanReport(project, runId); + if (!report) { + return null; + } + + const relationshipsPath = findArtifactPath(report, 'relationships.json'); + if (!relationshipsPath) { + throw new Error(`Scan report "${runId}" does not reference relationships.json`); + } + + const diagnosticsPath = findArtifactPath(report, 'relationship-diagnostics.json'); + const profilePath = findArtifactPath(report, 'relationship-profile.json'); + + return { + runId, + connectionId: report.connectionId, + syncId: report.syncId, + report, + relationships: await readJsonArtifact(project, relationshipsPath), + diagnostics: await readOptionalJsonArtifact(project, diagnosticsPath), + profile: await readOptionalJsonArtifact(project, profilePath), + paths: { + relationships: relationshipsPath, + diagnostics: diagnosticsPath, + profile: profilePath, + }, + }; +} diff --git a/packages/context/src/scan/relationship-benchmark-report.test.ts b/packages/context/src/scan/relationship-benchmark-report.test.ts new file mode 100644 index 00000000..9b1effa0 --- /dev/null +++ b/packages/context/src/scan/relationship-benchmark-report.test.ts @@ -0,0 +1,451 @@ +import { describe, expect, it } from 'vitest'; +import { + buildKloRelationshipBenchmarkReport, + formatKloRelationshipBenchmarkReportMarkdown, +} from './relationship-benchmark-report.js'; +import type { + KloRelationshipBenchmarkCaseResult, + KloRelationshipBenchmarkFixture, + KloRelationshipBenchmarkSuiteResult, +} from './relationship-benchmarks.js'; + +type CaseResultOverrides = Omit, 'metrics'> & { + metrics?: Partial; +}; + +function caseResult(overrides: CaseResultOverrides = {}): KloRelationshipBenchmarkCaseResult { + return { + fixtureId: overrides.fixtureId ?? 'demo_b2b_no_declared_constraints', + mode: overrides.mode ?? 'declared_pks_and_declared_fks_removed', + metrics: { + pkPrecision: 1, + pkRecall: 0.5, + pkF1: 0.6666666666666666, + fkPrecision: 1, + fkRecall: 1, + fkF1: 1, + acceptedFalsePositiveCount: 0, + reviewRecall: 0, + acceptedOrReviewRecall: 1, + runtimeSeconds: 0.012345, + sqlQueries: 14, + llmCalls: 0, + ...(overrides.metrics ?? {}), + }, + expected: overrides.expected ?? { + pk: ['accounts.(id)', 'users.(id)'], + fk: ['users.(account_id)->accounts.(id)'], + }, + predicted: overrides.predicted ?? { + pk: ['accounts.(id)'], + fk: ['users.(account_id)->accounts.(id)'], + acceptedFk: ['users.(account_id)->accounts.(id)'], + reviewFk: [], + }, + falsePositives: overrides.falsePositives ?? { pk: [], fk: [] }, + falseNegatives: overrides.falseNegatives ?? { pk: ['users.(id)'], fk: [] }, + skippedComposite: overrides.skippedComposite ?? { pk: [], fk: [] }, + validationBlocked: overrides.validationBlocked ?? false, + }; +} + +function fixture(overrides: Partial = {}): KloRelationshipBenchmarkFixture { + return { + id: overrides.id ?? 'demo_b2b_no_declared_constraints', + name: overrides.name ?? 'Packaged B2B demo with declared PK and FK metadata masked', + tier: overrides.tier ?? 'smoke', + origin: overrides.origin ?? 'synthetic', + thresholdEligible: overrides.thresholdEligible, + validationBudget: overrides.validationBudget, + snapshot: overrides.snapshot ?? { + connectionId: 'demo_b2b', + driver: 'sqlite', + extractedAt: '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: [], + }, + expected: overrides.expected ?? { expectedPks: [], expectedLinks: [] }, + defaultModes: overrides.defaultModes ?? ['declared_pks_and_declared_fks_removed', 'validation_disabled'], + dataPath: overrides.dataPath ?? '/tmp/demo.sqlite', + columnEmbeddings: overrides.columnEmbeddings ?? {}, + }; +} + +describe('relationship benchmark report', () => { + it('classifies run, validation-blocked, and not-run benchmark cases', () => { + const suite: KloRelationshipBenchmarkSuiteResult = { + cases: [ + caseResult(), + caseResult({ + mode: 'validation_disabled', + validationBlocked: true, + metrics: { fkRecall: 0, acceptedOrReviewRecall: 1, sqlQueries: 0 }, + predicted: { + pk: ['accounts.(id)'], + fk: ['users.(account_id)->accounts.(id)'], + acceptedFk: [], + reviewFk: ['users.(account_id)->accounts.(id)'], + }, + }), + ], + validationBlockedCases: ['demo_b2b_no_declared_constraints:validation_disabled'], + aggregate: { + caseCount: 2, + headlineCaseCount: 1, + headlinePkRecall: 0.5, + headlineFkRecall: 1, + headlineAcceptedOrReviewRecall: 1, + meanPkRecall: 0.5, + meanFkRecall: 0.5, + meanAcceptedOrReviewRecall: 1, + }, + }; + + const report = buildKloRelationshipBenchmarkReport({ + fixtures: [fixture()], + suite, + modes: ['declared_pks_and_declared_fks_removed', 'validation_disabled', 'profiling_disabled'], + }); + + expect(report.headline).toEqual({ + caseCount: 2, + headlineCaseCount: 1, + headlinePkRecall: 0.5, + headlineFkRecall: 1, + headlineAcceptedOrReviewRecall: 1, + acceptedFalsePositiveCount: 0, + validationBlockedCount: 1, + }); + expect(report.cases.map((item) => `${item.fixtureId}:${item.mode}:${item.status}`)).toEqual([ + 'demo_b2b_no_declared_constraints:declared_pks_and_declared_fks_removed:run', + 'demo_b2b_no_declared_constraints:validation_disabled:validation_blocked', + 'demo_b2b_no_declared_constraints:profiling_disabled:not_run', + ]); + expect(report.cases[2]?.reason).toBe('mode not selected by fixture defaultModes'); + }); + + it('surfaces validation budget review candidates in the report reason', () => { + const suite: KloRelationshipBenchmarkSuiteResult = { + cases: [ + caseResult({ + fixtureId: 'scale_stress_no_declared_constraints', + metrics: { fkRecall: 0.5, acceptedOrReviewRecall: 1 }, + predicted: { + pk: ['dim_entity_00.(entity_00_key)'], + fk: [ + 'fact_activity_000.(entity_00_key)->dim_entity_00.(entity_00_key)', + 'fact_activity_001.(entity_00_key)->dim_entity_00.(entity_00_key)', + ], + acceptedFk: ['fact_activity_000.(entity_00_key)->dim_entity_00.(entity_00_key)'], + reviewFk: ['fact_activity_001.(entity_00_key)->dim_entity_00.(entity_00_key)'], + }, + }), + ], + validationBlockedCases: [], + aggregate: { + caseCount: 1, + headlineCaseCount: 0, + headlinePkRecall: 1, + headlineFkRecall: 0.5, + headlineAcceptedOrReviewRecall: 1, + meanPkRecall: 1, + meanFkRecall: 0.5, + meanAcceptedOrReviewRecall: 1, + }, + }; + + const report = buildKloRelationshipBenchmarkReport({ + fixtures: [ + fixture({ + id: 'scale_stress_no_declared_constraints', + name: 'Scale stress fixture', + tier: 'row_bearing', + validationBudget: 800, + defaultModes: ['declared_pks_and_declared_fks_removed'], + }), + ], + suite, + modes: ['declared_pks_and_declared_fks_removed'], + }); + + expect(report.cases[0]?.reason).toBe('review candidate validation reasons: validation_unattempted (1)'); + expect(formatKloRelationshipBenchmarkReportMarkdown(report)).toContain('validation_unattempted'); + }); + + it('uses benchmark suite eligibility for product and smoke report rows', () => { + const productCase = caseResult({ fixtureId: 'product_curated' }); + const productBlocked = caseResult({ + fixtureId: 'product_curated', + mode: 'validation_disabled', + validationBlocked: true, + metrics: { fkRecall: 0, acceptedOrReviewRecall: 1, sqlQueries: 0 }, + }); + const smokeCase = caseResult({ fixtureId: 'smoke_even_if_marked' }); + const suite: KloRelationshipBenchmarkSuiteResult = { + cases: [productCase, productBlocked, smokeCase], + validationBlockedCases: ['product_curated:validation_disabled'], + aggregate: { + caseCount: 3, + headlineCaseCount: 1, + headlinePkRecall: 0.5, + headlineFkRecall: 1, + headlineAcceptedOrReviewRecall: 1, + meanPkRecall: 0.5, + meanFkRecall: 0.6666666666666666, + meanAcceptedOrReviewRecall: 1, + }, + }; + + const report = buildKloRelationshipBenchmarkReport({ + fixtures: [ + fixture({ + id: 'product_curated', + name: 'Curated product fixture', + tier: 'product', + thresholdEligible: true, + defaultModes: ['declared_pks_and_declared_fks_removed', 'validation_disabled'], + }), + fixture({ + id: 'smoke_even_if_marked', + name: 'Marked smoke fixture', + tier: 'smoke', + thresholdEligible: true, + defaultModes: ['declared_pks_and_declared_fks_removed'], + }), + ], + suite, + modes: ['declared_pks_and_declared_fks_removed', 'validation_disabled'], + }); + + expect(report.cases.map((item) => `${item.fixtureId}:${item.mode}:${item.tuningEligible}`)).toEqual([ + 'product_curated:declared_pks_and_declared_fks_removed:true', + 'product_curated:validation_disabled:false', + 'smoke_even_if_marked:declared_pks_and_declared_fks_removed:false', + 'smoke_even_if_marked:validation_disabled:false', + ]); + expect(formatKloRelationshipBenchmarkReportMarkdown(report)).toContain( + '| product_curated | product | declared_pks_and_declared_fks_removed | run | yes |', + ); + }); + + it('formats a compact Markdown report with false negatives and blocked modes', () => { + const suite: KloRelationshipBenchmarkSuiteResult = { + cases: [ + caseResult({ + metrics: { fkRecall: 0, acceptedOrReviewRecall: 0 }, + falseNegatives: { pk: ['users.(id)'], fk: ['users.(account_id)->accounts.(id)'] }, + }), + ], + validationBlockedCases: [], + aggregate: { + caseCount: 1, + headlineCaseCount: 1, + headlinePkRecall: 0.5, + headlineFkRecall: 0, + headlineAcceptedOrReviewRecall: 0, + meanPkRecall: 0.5, + meanFkRecall: 0, + meanAcceptedOrReviewRecall: 0, + }, + }; + + const markdown = formatKloRelationshipBenchmarkReportMarkdown( + buildKloRelationshipBenchmarkReport({ + fixtures: [fixture()], + suite, + modes: ['declared_pks_and_declared_fks_removed'], + }), + ); + + expect(markdown).toContain('# KLO Relationship Discovery Benchmark Evidence'); + expect(markdown).toContain( + '| demo_b2b_no_declared_constraints | smoke | declared_pks_and_declared_fks_removed | run | no | 0.500 | 0.000 | 0.000 | 0 |', + ); + expect(markdown).toContain( + '- `demo_b2b_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: users.(id)', + ); + expect(markdown).toContain( + '- `demo_b2b_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: users.(account_id)->accounts.(id)', + ); + }); + + it('keeps headline failures separate from non-headline failure details', () => { + const suite: KloRelationshipBenchmarkSuiteResult = { + cases: [ + caseResult({ + fixtureId: 'product_curated', + falseNegatives: { pk: [], fk: [] }, + metrics: { pkRecall: 1, fkRecall: 1, acceptedOrReviewRecall: 1 }, + }), + caseResult({ + fixtureId: 'product_curated', + mode: 'embeddings_disabled', + falseNegatives: { + pk: ['customers.(id)'], + fk: ['orders.(buyer_ref)->customers.(id)'], + }, + metrics: { pkRecall: 0.5, fkRecall: 0, acceptedOrReviewRecall: 0 }, + }), + ], + validationBlockedCases: [], + aggregate: { + caseCount: 2, + headlineCaseCount: 1, + headlinePkRecall: 1, + headlineFkRecall: 1, + headlineAcceptedOrReviewRecall: 1, + meanPkRecall: 0.75, + meanFkRecall: 0.5, + meanAcceptedOrReviewRecall: 0.5, + }, + }; + + const markdown = formatKloRelationshipBenchmarkReportMarkdown( + buildKloRelationshipBenchmarkReport({ + fixtures: [ + fixture({ + id: 'product_curated', + name: 'Curated product fixture', + tier: 'product', + thresholdEligible: true, + defaultModes: ['declared_pks_and_declared_fks_removed', 'embeddings_disabled'], + }), + ], + suite, + modes: ['declared_pks_and_declared_fks_removed', 'embeddings_disabled'], + }), + ); + + expect(markdown).toContain('## Failure Details'); + expect(markdown).toContain('### Headline False Negative FKs\n\n- none'); + expect(markdown).toContain( + '- `product_curated` / `embeddings_disabled` / `run`: orders.(buyer_ref)->customers.(id)', + ); + expect(markdown).toContain('- `product_curated` / `embeddings_disabled` / `run`: customers.(id)'); + }); + + it('formats headline failure context from remaining headline false negatives', () => { + const suite: KloRelationshipBenchmarkSuiteResult = { + cases: [ + caseResult({ + fixtureId: 'public_headline_fixture', + metrics: { pkRecall: 0.5, fkRecall: 0, acceptedOrReviewRecall: 0 }, + falseNegatives: { + pk: ['parent_table.(opaque_key)'], + fk: ['child_table.(parent_table_id)->parent_table.(opaque_key)'], + }, + }), + ], + validationBlockedCases: [], + aggregate: { + caseCount: 1, + headlineCaseCount: 1, + headlinePkRecall: 0.5, + headlineFkRecall: 0, + headlineAcceptedOrReviewRecall: 0, + meanPkRecall: 0.5, + meanFkRecall: 0, + meanAcceptedOrReviewRecall: 0, + }, + }; + + const markdown = formatKloRelationshipBenchmarkReportMarkdown( + buildKloRelationshipBenchmarkReport({ + fixtures: [ + fixture({ + id: 'public_headline_fixture', + name: 'Public headline fixture', + tier: 'row_bearing', + thresholdEligible: true, + defaultModes: ['declared_pks_and_declared_fks_removed'], + }), + ], + suite, + modes: ['declared_pks_and_declared_fks_removed'], + }), + ); + + expect(markdown).toContain('## Headline Failure Context'); + expect(markdown).toContain('- Remaining headline false-negative PKs: 1'); + expect(markdown).toContain('- Remaining headline false-negative FKs: 1'); + expect(markdown).toContain( + '- `public_headline_fixture` / `declared_pks_and_declared_fks_removed` / `run`: parent_table.(opaque_key)', + ); + expect(markdown).toContain( + '- `public_headline_fixture` / `declared_pks_and_declared_fks_removed` / `run`: child_table.(parent_table_id)->parent_table.(opaque_key)', + ); + }); + + it('formats skipped composite ground truth separately from false-negative details', () => { + const compositePk = 'order_lines.(order_id,line_number)'; + const compositeFk = 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)'; + const suite: KloRelationshipBenchmarkSuiteResult = { + cases: [ + caseResult({ + fixtureId: 'composite_keys_no_declared_constraints', + metrics: { pkRecall: 0, fkRecall: 0, acceptedOrReviewRecall: 0 }, + expected: { + pk: [compositePk], + fk: [compositeFk], + }, + predicted: { + pk: [], + fk: [], + acceptedFk: [], + reviewFk: [], + }, + falseNegatives: { + pk: [compositePk], + fk: [compositeFk], + }, + skippedComposite: { + pk: [compositePk], + fk: [compositeFk], + }, + }), + ], + validationBlockedCases: [], + aggregate: { + caseCount: 1, + headlineCaseCount: 1, + headlinePkRecall: 0, + headlineFkRecall: 0, + headlineAcceptedOrReviewRecall: 0, + meanPkRecall: 0, + meanFkRecall: 0, + meanAcceptedOrReviewRecall: 0, + }, + }; + + const report = buildKloRelationshipBenchmarkReport({ + fixtures: [ + fixture({ + id: 'composite_keys_no_declared_constraints', + name: 'Composite key fixture with no declared constraints', + tier: 'row_bearing', + defaultModes: ['declared_pks_and_declared_fks_removed'], + }), + ], + suite, + modes: ['declared_pks_and_declared_fks_removed'], + }); + + expect(report.cases[0]?.skippedComposite).toEqual({ + pk: [compositePk], + fk: [compositeFk], + }); + + const markdown = formatKloRelationshipBenchmarkReportMarkdown(report); + expect(markdown).toContain('## Composite Ground Truth Skips'); + expect(markdown).toContain( + '### Skipped Composite PKs\n\n- `composite_keys_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: order_lines.(order_id,line_number)', + ); + expect(markdown).toContain( + '### Skipped Composite FKs\n\n- `composite_keys_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + ); + expect(markdown).toContain( + '### Headline False Negative FKs\n\n- `composite_keys_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + ); + }); +}); diff --git a/packages/context/src/scan/relationship-benchmark-report.ts b/packages/context/src/scan/relationship-benchmark-report.ts new file mode 100644 index 00000000..2c2f7275 --- /dev/null +++ b/packages/context/src/scan/relationship-benchmark-report.ts @@ -0,0 +1,363 @@ +import { isKloRelationshipBenchmarkTuningEligible } from './relationship-benchmarks.js'; +import type { + KloRelationshipBenchmarkCaseResult, + KloRelationshipBenchmarkFixture, + KloRelationshipBenchmarkMode, + KloRelationshipBenchmarkSuiteResult, +} from './relationship-benchmarks.js'; + +export type KloRelationshipBenchmarkReportCaseStatus = 'run' | 'validation_blocked' | 'not_run'; + +export interface KloRelationshipBenchmarkReportCase { + fixtureId: string; + fixtureName: string; + tier: string; + mode: KloRelationshipBenchmarkMode; + status: KloRelationshipBenchmarkReportCaseStatus; + reason: string | null; + tuningEligible: boolean; + metrics: { + pkRecall: number | null; + fkRecall: number | null; + acceptedOrReviewRecall: number | null; + acceptedFalsePositiveCount: number | null; + sqlQueries: number | null; + llmCalls: number | null; + runtimeSeconds: number | null; + }; + falsePositives: { + pk: string[]; + fk: string[]; + }; + falseNegatives: { + pk: string[]; + fk: string[]; + }; + skippedComposite: { + pk: string[]; + fk: string[]; + }; +} + +export interface KloRelationshipBenchmarkReport { + generatedAt: string; + headline: { + caseCount: number; + headlineCaseCount: number; + headlinePkRecall: number; + headlineFkRecall: number; + headlineAcceptedOrReviewRecall: number; + acceptedFalsePositiveCount: number; + validationBlockedCount: number; + }; + cases: KloRelationshipBenchmarkReportCase[]; +} + +function key(fixtureId: string, mode: KloRelationshipBenchmarkMode): string { + return `${fixtureId}:${mode}`; +} + +function fixed(value: number | null): string { + return value === null ? '-' : value.toFixed(3); +} + +function reportCaseReason(input: { + fixture: KloRelationshipBenchmarkFixture; + result: KloRelationshipBenchmarkCaseResult; +}): string | null { + if (input.result.validationBlocked) { + return 'validation unavailable for this benchmark mode'; + } + + if (input.fixture.validationBudget !== undefined && input.result.predicted.reviewFk.length > 0) { + return `review candidate validation reasons: validation_unattempted (${input.result.predicted.reviewFk.length})`; + } + + return null; +} + +function reportCaseFromResult(input: { + fixture: KloRelationshipBenchmarkFixture; + mode: KloRelationshipBenchmarkMode; + result: KloRelationshipBenchmarkCaseResult; +}): KloRelationshipBenchmarkReportCase { + const status = input.result.validationBlocked ? 'validation_blocked' : 'run'; + return { + fixtureId: input.fixture.id, + fixtureName: input.fixture.name, + tier: input.fixture.tier, + mode: input.mode, + status, + reason: reportCaseReason({ fixture: input.fixture, result: input.result }), + tuningEligible: isKloRelationshipBenchmarkTuningEligible({ + fixture: input.fixture, + mode: input.mode, + validationBlocked: input.result.validationBlocked, + }), + metrics: { + pkRecall: input.result.metrics.pkRecall, + fkRecall: input.result.metrics.fkRecall, + acceptedOrReviewRecall: input.result.metrics.acceptedOrReviewRecall, + acceptedFalsePositiveCount: input.result.metrics.acceptedFalsePositiveCount, + sqlQueries: input.result.metrics.sqlQueries, + llmCalls: input.result.metrics.llmCalls, + runtimeSeconds: input.result.metrics.runtimeSeconds, + }, + falsePositives: input.result.falsePositives, + falseNegatives: input.result.falseNegatives, + skippedComposite: input.result.skippedComposite, + }; +} + +function notRunCase(input: { + fixture: KloRelationshipBenchmarkFixture; + mode: KloRelationshipBenchmarkMode; + reason: string; +}): KloRelationshipBenchmarkReportCase { + return { + fixtureId: input.fixture.id, + fixtureName: input.fixture.name, + tier: input.fixture.tier, + mode: input.mode, + status: 'not_run', + reason: input.reason, + tuningEligible: false, + metrics: { + pkRecall: null, + fkRecall: null, + acceptedOrReviewRecall: null, + acceptedFalsePositiveCount: null, + sqlQueries: null, + llmCalls: null, + runtimeSeconds: null, + }, + falsePositives: { pk: [], fk: [] }, + falseNegatives: { pk: [], fk: [] }, + skippedComposite: { pk: [], fk: [] }, + }; +} + +export function buildKloRelationshipBenchmarkReport(input: { + fixtures: readonly KloRelationshipBenchmarkFixture[]; + suite: KloRelationshipBenchmarkSuiteResult; + modes: readonly KloRelationshipBenchmarkMode[]; + generatedAt?: string; +}): KloRelationshipBenchmarkReport { + const resultsByKey = new Map(input.suite.cases.map((result) => [key(result.fixtureId, result.mode), result])); + const cases: KloRelationshipBenchmarkReportCase[] = []; + + for (const fixture of input.fixtures) { + const selectedModes = new Set(fixture.defaultModes); + for (const mode of input.modes) { + const result = resultsByKey.get(key(fixture.id, mode)); + if (result) { + cases.push(reportCaseFromResult({ fixture, mode, result })); + continue; + } + cases.push( + notRunCase({ + fixture, + mode, + reason: selectedModes.has(mode) ? 'mode produced no benchmark result' : 'mode not selected by fixture defaultModes', + }), + ); + } + } + + return { + generatedAt: input.generatedAt ?? new Date().toISOString(), + headline: { + caseCount: input.suite.aggregate.caseCount, + headlineCaseCount: input.suite.aggregate.headlineCaseCount, + headlinePkRecall: input.suite.aggregate.headlinePkRecall, + headlineFkRecall: input.suite.aggregate.headlineFkRecall, + headlineAcceptedOrReviewRecall: input.suite.aggregate.headlineAcceptedOrReviewRecall, + acceptedFalsePositiveCount: input.suite.cases.reduce( + (sum, result) => sum + result.metrics.acceptedFalsePositiveCount, + 0, + ), + validationBlockedCount: input.suite.validationBlockedCases.length, + }, + cases, + }; +} + +type KloRelationshipBenchmarkFailureSelector = ( + item: KloRelationshipBenchmarkReportCase, +) => readonly string[]; + +function sortedFailureLines(input: { + cases: readonly KloRelationshipBenchmarkReportCase[]; + select: KloRelationshipBenchmarkFailureSelector; +}): string[] { + return input.cases + .flatMap((item) => + input.select(item).map((value) => ({ + fixtureId: item.fixtureId, + mode: item.mode, + status: item.status, + value, + })), + ) + .sort((left, right) => { + const leftKey = `${left.fixtureId}:${left.mode}:${left.status}:${left.value}`; + const rightKey = `${right.fixtureId}:${right.mode}:${right.status}:${right.value}`; + return leftKey.localeCompare(rightKey); + }) + .map((item) => `- \`${item.fixtureId}\` / \`${item.mode}\` / \`${item.status}\`: ${item.value}`); +} + +function failureBlock(input: { + title: string; + cases: readonly KloRelationshipBenchmarkReportCase[]; + select: KloRelationshipBenchmarkFailureSelector; +}): string[] { + const values = sortedFailureLines({ cases: input.cases, select: input.select }); + return ['', `### ${input.title}`, '', ...(values.length > 0 ? values : ['- none'])]; +} + +function headlineFailureContextBlocks(report: KloRelationshipBenchmarkReport): string[] { + const headlineCases = report.cases.filter((item) => item.tuningEligible); + const remainingPkMisses = sortedFailureLines({ + cases: headlineCases, + select: (item) => item.falseNegatives.pk, + }); + const remainingFkMisses = sortedFailureLines({ + cases: headlineCases, + select: (item) => item.falseNegatives.fk, + }); + + return [ + '', + '## Headline Failure Context', + '', + 'Remaining headline misses after this run are listed here so recall gains and still-open algorithmic gaps are visible in the regenerated evidence report.', + '', + `- Remaining headline false-negative PKs: ${remainingPkMisses.length}`, + `- Remaining headline false-negative FKs: ${remainingFkMisses.length}`, + '', + '### Remaining Headline False Negative PKs', + '', + ...(remainingPkMisses.length > 0 ? remainingPkMisses : ['- none']), + '', + '### Remaining Headline False Negative FKs', + '', + ...(remainingFkMisses.length > 0 ? remainingFkMisses : ['- none']), + ]; +} + +function failureDetailBlocks(report: KloRelationshipBenchmarkReport): string[] { + const headlineCases = report.cases.filter((item) => item.tuningEligible); + const otherCases = report.cases.filter((item) => !item.tuningEligible); + + return [ + '', + '## Failure Details', + ...failureBlock({ + title: 'Headline False Positive PKs', + cases: headlineCases, + select: (item) => item.falsePositives.pk, + }), + ...failureBlock({ + title: 'Headline False Positive FKs', + cases: headlineCases, + select: (item) => item.falsePositives.fk, + }), + ...failureBlock({ + title: 'Headline False Negative PKs', + cases: headlineCases, + select: (item) => item.falseNegatives.pk, + }), + ...failureBlock({ + title: 'Headline False Negative FKs', + cases: headlineCases, + select: (item) => item.falseNegatives.fk, + }), + ...failureBlock({ + title: 'Other False Positive PKs', + cases: otherCases, + select: (item) => item.falsePositives.pk, + }), + ...failureBlock({ + title: 'Other False Positive FKs', + cases: otherCases, + select: (item) => item.falsePositives.fk, + }), + ...failureBlock({ + title: 'Other False Negative PKs', + cases: otherCases, + select: (item) => item.falseNegatives.pk, + }), + ...failureBlock({ + title: 'Other False Negative FKs', + cases: otherCases, + select: (item) => item.falseNegatives.fk, + }), + ]; +} + +function compositeSkipBlocks(report: KloRelationshipBenchmarkReport): string[] { + const headlineCases = report.cases.filter((item) => item.tuningEligible); + + return [ + '', + '## Composite Ground Truth Skips', + ...failureBlock({ + title: 'Skipped Composite PKs', + cases: headlineCases, + select: (item) => item.skippedComposite.pk, + }), + ...failureBlock({ + title: 'Skipped Composite FKs', + cases: headlineCases, + select: (item) => item.skippedComposite.fk, + }), + ]; +} + +export function formatKloRelationshipBenchmarkReportMarkdown(report: KloRelationshipBenchmarkReport): string { + const lines = [ + '# KLO Relationship Discovery Benchmark Evidence', + '', + `Generated: ${report.generatedAt}`, + '', + '## Headline', + '', + `- Cases run: ${report.headline.caseCount}`, + `- Headline cases: ${report.headline.headlineCaseCount}`, + `- Headline PK recall: ${fixed(report.headline.headlinePkRecall)}`, + `- Headline FK recall: ${fixed(report.headline.headlineFkRecall)}`, + `- Headline accepted-or-review recall: ${fixed(report.headline.headlineAcceptedOrReviewRecall)}`, + `- Accepted false positives: ${report.headline.acceptedFalsePositiveCount}`, + `- Validation-blocked cases: ${report.headline.validationBlockedCount}`, + '', + '## Cases', + '', + '| Fixture | Tier | Mode | Status | Tuning Eligible | PK Recall | FK Recall | Accepted+Review Recall | Accepted FP | Reason |', + '| --- | --- | --- | --- | --- | ---: | ---: | ---: | ---: | --- |', + ]; + + for (const item of report.cases) { + lines.push( + [ + `| ${item.fixtureId}`, + item.tier, + item.mode, + item.status, + item.tuningEligible ? 'yes' : 'no', + fixed(item.metrics.pkRecall), + fixed(item.metrics.fkRecall), + fixed(item.metrics.acceptedOrReviewRecall), + String(item.metrics.acceptedFalsePositiveCount ?? '-'), + `${item.reason ?? ''} |`, + ].join(' | '), + ); + } + + lines.push(...headlineFailureContextBlocks(report)); + lines.push(...failureDetailBlocks(report)); + lines.push(...compositeSkipBlocks(report)); + lines.push(''); + + return `${lines.join('\n')}\n`; +} diff --git a/packages/context/src/scan/relationship-benchmarks.test.ts b/packages/context/src/scan/relationship-benchmarks.test.ts new file mode 100644 index 00000000..e1f7c1fc --- /dev/null +++ b/packages/context/src/scan/relationship-benchmarks.test.ts @@ -0,0 +1,1269 @@ +import { mkdir, mkdtemp, readdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import type { + KloRelationshipBenchmarkExpectedLinks, + KloRelationshipBenchmarkFixture, +} from './relationship-benchmarks.js'; +import { + currentKloRelationshipBenchmarkDetector, + loadKloRelationshipBenchmarkFixture, + loadKloRelationshipBenchmarkFixtures, + maskKloRelationshipBenchmarkSnapshot, + runKloRelationshipBenchmarkCase, + runKloRelationshipBenchmarkSuite, +} from './relationship-benchmarks.js'; +import type { KloSchemaSnapshot } from './types.js'; + +const EXPECTED_LINKS: KloRelationshipBenchmarkExpectedLinks = { + expectedPks: [ + { table: 'accounts', columns: ['id'] }, + { table: 'users', columns: ['id'] }, + ], + expectedLinks: [ + { + fromTable: 'users', + fromColumns: ['account_id'], + toTable: 'accounts', + toColumns: ['id'], + relationship: 'many_to_one', + }, + ], +}; + +const CHECKED_IN_FIXTURE_ORIGINS = { + abbreviated_legacy_no_declared_constraints: 'synthetic', + adventureworks_oltp_with_declared_metadata: 'public', + adventureworkslt_with_declared_metadata: 'public', + analytical_warehouse_no_naming_convention: 'synthetic', + chinook_with_declared_metadata: 'public', + composite_keys_no_declared_constraints: 'synthetic', + demo_b2b_declared_metadata: 'synthetic', + demo_b2b_no_declared_constraints: 'synthetic', + mixed_case_within_schema_no_declared_constraints: 'synthetic', + natural_keys_no_declared_constraints: 'synthetic', + non_english_naming_no_declared_constraints: 'synthetic', + northwind_with_declared_metadata: 'public', + orbit_style_product_no_declared_constraints: 'synthetic', + plan_code_no_declared_constraints: 'synthetic', + polymorphic_partial_overlap_no_declared_constraints: 'synthetic', + sakila_with_declared_metadata: 'public', + scale_stress_no_declared_constraints: 'synthetic', + semantic_embedding_aliases_no_declared_constraints: 'synthetic', +} as const; + +function snapshot(): KloSchemaSnapshot { + return { + connectionId: 'warehouse', + driver: 'sqlite', + extractedAt: '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: 'main', + name: 'accounts', + kind: 'table', + comment: null, + estimatedRows: 2, + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'name', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [], + }, + { + catalog: null, + db: 'main', + name: 'users', + kind: 'table', + comment: null, + estimatedRows: 3, + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'account_id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [ + { + fromColumn: 'account_id', + toCatalog: null, + toDb: 'main', + toTable: 'accounts', + toColumn: 'id', + constraintName: 'users_account_id_fkey', + }, + ], + }, + ], + }; +} + +describe('relationship benchmarks', () => { + it('keeps the current benchmark detector on the relationship-discovery path only', async () => { + const source = await readFile(new URL('relationship-benchmarks.ts', import.meta.url), 'utf-8'); + + expect(source).not.toMatch(/KloRelationshipDetector/); + expect(source).not.toMatch(/relationship-detection\.js/); + expect(source).not.toMatch(/\bacceptedLinks\b/); + expect(source).toMatch(/generateKloRelationshipDiscoveryCandidates/); + expect(source).toMatch(/validateKloRelationshipDiscoveryCandidates/); + expect(source).toMatch(/resolveKloRelationshipGraph/); + }); + + it('scores the current detector with declared metadata present', async () => { + const result = await runKloRelationshipBenchmarkCase({ + fixture: { + id: 'mini_declared', + name: 'Mini declared fixture', + tier: 'unit', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['metadata_present'], + dataPath: null, + columnEmbeddings: {}, + }, + mode: 'metadata_present', + detector: currentKloRelationshipBenchmarkDetector(), + }); + + expect(result.metrics.pkRecall).toBe(1); + expect(result.metrics.pkPrecision).toBe(1); + expect(result.metrics.fkRecall).toBe(1); + expect(result.metrics.fkPrecision).toBe(1); + expect(result.falseNegatives.fk).toEqual([]); + expect(result.predicted.fk).toEqual(['users.(account_id)->accounts.(id)']); + }); + + it('keeps no-declared-constraint misses in benchmark metrics', async () => { + const result = await runKloRelationshipBenchmarkCase({ + fixture: { + id: 'mini_no_declared', + name: 'Mini no declared fixture', + tier: 'unit', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + mode: 'declared_pks_and_declared_fks_removed', + detector: currentKloRelationshipBenchmarkDetector(), + }); + + expect(result.metrics.pkRecall).toBe(0.5); + expect(result.metrics.fkRecall).toBe(0); + expect(result.metrics.reviewRecall).toBe(1); + expect(result.metrics.acceptedOrReviewRecall).toBe(1); + expect(result.falseNegatives.pk).toEqual(['users.(id)']); + expect(result.falseNegatives.fk).toEqual([]); + expect(result.predicted.acceptedFk).toEqual([]); + expect(result.predicted.reviewFk).toEqual(['users.(account_id)->accounts.(id)']); + }); + + it('keeps composite ground truth in recall denominators and skipped-composite buckets', async () => { + const compositeExpected: KloRelationshipBenchmarkExpectedLinks = { + expectedPks: [{ table: 'order_lines', columns: ['order_id', 'line_number'] }], + expectedLinks: [ + { + fromTable: 'order_line_allocations', + fromColumns: ['order_id', 'line_number'], + toTable: 'order_lines', + toColumns: ['order_id', 'line_number'], + relationship: 'many_to_one', + }, + ], + }; + const emptyDetector = { + async detect() { + return { + pks: [], + links: [], + validationBlocked: false, + sqlQueries: 0, + llmCalls: 0, + runtimeSeconds: 0.001, + }; + }, + }; + + const result = await runKloRelationshipBenchmarkCase({ + fixture: { + id: 'composite_no_declared', + name: 'Composite relationship fixture without declared constraints', + tier: 'row_bearing', + origin: 'synthetic', + snapshot: snapshot(), + expected: compositeExpected, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + mode: 'declared_pks_and_declared_fks_removed', + detector: emptyDetector, + }); + + expect(result.expected.pk).toEqual(['order_lines.(order_id,line_number)']); + expect(result.expected.fk).toEqual([ + 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + ]); + expect(result.metrics.pkRecall).toBe(0); + expect(result.metrics.fkRecall).toBe(0); + expect(result.falseNegatives.pk).toEqual(['order_lines.(order_id,line_number)']); + expect(result.falseNegatives.fk).toEqual([ + 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + ]); + expect(result.skippedComposite).toEqual({ + pk: ['order_lines.(order_id,line_number)'], + fk: ['order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)'], + }); + }); + + it('loads the composite-key fixture and accepts composite ground truth as headline evidence', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'composite_keys_no_declared_constraints'), + ); + + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.defaultModes).toEqual([ + 'declared_pks_and_declared_fks_removed', + 'llm_disabled', + 'profiling_disabled', + 'validation_disabled', + 'embeddings_disabled', + ]); + expect(fixture.dataPath).toMatch(/composite_keys_no_declared_constraints\/data\.sqlite$/); + + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [fixture], + detector: currentKloRelationshipBenchmarkDetector(), + }); + const headline = suite.cases.find( + (item) => + item.fixtureId === 'composite_keys_no_declared_constraints' && + item.mode === 'declared_pks_and_declared_fks_removed', + ); + const profilingDisabled = suite.cases.find( + (item) => item.fixtureId === 'composite_keys_no_declared_constraints' && item.mode === 'profiling_disabled', + ); + const validationDisabled = suite.cases.find( + (item) => item.fixtureId === 'composite_keys_no_declared_constraints' && item.mode === 'validation_disabled', + ); + const compositePks = [ + 'order_line_allocations.(order_id,line_number,warehouse_code)', + 'order_lines.(order_id,line_number)', + ]; + const compositeFk = ['order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)']; + + expect(headline?.expected.pk).toEqual(compositePks); + expect(headline?.expected.fk).toEqual(compositeFk); + expect(headline?.predicted.pk).toEqual(compositePks); + expect(headline?.predicted.acceptedFk).toEqual(compositeFk); + expect(headline?.predicted.reviewFk).toEqual([]); + expect(headline?.metrics.pkRecall).toBe(1); + expect(headline?.metrics.fkRecall).toBe(1); + expect(headline?.metrics.acceptedOrReviewRecall).toBe(1); + expect(headline?.metrics.acceptedFalsePositiveCount).toBe(0); + expect(headline?.falseNegatives.pk).toEqual([]); + expect(headline?.falseNegatives.fk).toEqual([]); + expect(headline?.skippedComposite).toEqual({ + pk: [], + fk: [], + }); + expect(profilingDisabled?.validationBlocked).toBe(true); + expect(validationDisabled?.validationBlocked).toBe(true); + expect(suite.validationBlockedCases).toEqual([ + 'composite_keys_no_declared_constraints:profiling_disabled', + 'composite_keys_no_declared_constraints:validation_disabled', + ]); + expect(suite.aggregate.headlineCaseCount).toBe(1); + expect(suite.aggregate.headlinePkRecall).toBe(1); + expect(suite.aggregate.headlineFkRecall).toBe(1); + }); + + it('counts formal metadata links in metadata-present mode without SQL validation', async () => { + const source = snapshot(); + const fixture: KloRelationshipBenchmarkFixture = { + id: 'declared_without_sql', + name: 'Declared relationships without SQL validation', + tier: 'unit', + origin: 'synthetic', + snapshot: { + ...source, + tables: source.tables.map((table) => + table.name === 'accounts' + ? { + ...table, + columns: table.columns.map((column) => + column.name === 'id' ? { ...column, primaryKey: true } : column, + ), + } + : table.name === 'users' + ? { + ...table, + foreignKeys: [ + { + fromColumn: 'account_id', + toCatalog: null, + toDb: null, + toTable: 'accounts', + toColumn: 'id', + constraintName: 'users_account_id_fkey', + }, + ], + } + : table, + ), + }, + expected: EXPECTED_LINKS, + defaultModes: ['metadata_present'], + dataPath: null, + columnEmbeddings: {}, + }; + + const result = await runKloRelationshipBenchmarkCase({ + fixture, + mode: 'metadata_present', + }); + + expect(result.validationBlocked).toBe(false); + expect(result.predicted.acceptedFk).toEqual(['users.(account_id)->accounts.(id)']); + expect(result.metrics.fkRecall).toBe(1); + expect(result.metrics.fkPrecision).toBe(1); + }); + + it('masks primary keys and foreign keys independently', () => { + const pksRemoved = maskKloRelationshipBenchmarkSnapshot(snapshot(), 'declared_pks_removed'); + const fksRemoved = maskKloRelationshipBenchmarkSnapshot(snapshot(), 'declared_fks_removed'); + + expect(pksRemoved.tables.flatMap((table) => table.columns.filter((column) => column.primaryKey))).toEqual([]); + expect(pksRemoved.tables.find((table) => table.name === 'users')?.foreignKeys).toHaveLength(1); + expect(fksRemoved.tables.find((table) => table.name === 'accounts')?.columns[0]?.primaryKey).toBe(true); + expect(fksRemoved.tables.find((table) => table.name === 'users')?.foreignKeys).toEqual([]); + }); + + it('loads fixture.yaml, snapshot.json, and expected-links.yaml from a fixture directory', async () => { + const fixtureDir = await mkdtemp(join(tmpdir(), 'klo-relationship-fixture-')); + try { + await writeFile( + join(fixtureDir, 'fixture.yaml'), + [ + 'id: mini_loaded', + 'name: Mini loaded fixture', + 'tier: unit', + 'origin: synthetic', + 'validationBudget: 3', + 'defaultModes:', + ' - metadata_present', + ' - declared_pks_and_declared_fks_removed', + '', + ].join('\n'), + ); + await writeFile(join(fixtureDir, 'snapshot.json'), `${JSON.stringify(snapshot(), null, 2)}\n`); + await writeFile( + join(fixtureDir, 'column-embeddings.json'), + `${JSON.stringify( + { + 'accounts.id': [1, 0, 0], + 'users.account_id': [0.99, 0.01, 0], + }, + null, + 2, + )}\n`, + ); + await writeFile( + join(fixtureDir, 'expected-links.yaml'), + [ + 'expectedPks:', + ' - table: accounts', + ' columns: [id]', + ' - table: users', + ' columns: [id]', + 'expectedLinks:', + ' - fromTable: users', + ' fromColumns: [account_id]', + ' toTable: accounts', + ' toColumns: [id]', + ' relationship: many_to_one', + '', + ].join('\n'), + ); + + await expect(loadKloRelationshipBenchmarkFixture(fixtureDir)).resolves.toMatchObject({ + id: 'mini_loaded', + origin: 'synthetic', + validationBudget: 3, + defaultModes: ['metadata_present', 'declared_pks_and_declared_fks_removed'], + columnEmbeddings: { + 'accounts.id': [1, 0, 0], + 'users.account_id': [0.99, 0.01, 0], + }, + expected: { + expectedLinks: [ + { + fromTable: 'users', + fromColumns: ['account_id'], + toTable: 'accounts', + toColumns: ['id'], + relationship: 'many_to_one', + }, + ], + }, + }); + await expect(readFile(join(fixtureDir, 'snapshot.json'), 'utf-8')).resolves.toContain( + '"connectionId": "warehouse"', + ); + } finally { + await rm(fixtureDir, { recursive: true, force: true }); + } + }); + + it('passes fixture validation budgets into benchmark detectors', async () => { + const seenBudgets: unknown[] = []; + const detector = { + async detect(input: { validationBudget?: number | 'all' }) { + seenBudgets.push(input.validationBudget); + return { + pks: [], + links: [], + validationBlocked: false, + sqlQueries: 0, + llmCalls: 0, + runtimeSeconds: 0.001, + }; + }, + }; + + await runKloRelationshipBenchmarkSuite({ + fixtures: [ + { + id: 'budgeted_fixture', + name: 'Budgeted fixture', + tier: 'row_bearing', + origin: 'synthetic', + validationBudget: 0, + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + { + id: 'unbudgeted_fixture', + name: 'Unbudgeted fixture', + tier: 'row_bearing', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['metadata_present'], + dataPath: null, + columnEmbeddings: {}, + }, + ], + detector, + }); + + expect(seenBudgets).toEqual([0, undefined]); + }); + + it('requires relationship benchmark fixture origin provenance', async () => { + const fixtureDir = await mkdtemp(join(tmpdir(), 'klo-relationship-missing-origin-')); + try { + await writeFile( + join(fixtureDir, 'fixture.yaml'), + [ + 'id: missing_origin', + 'name: Missing origin fixture', + 'tier: unit', + 'defaultModes:', + ' - metadata_present', + '', + ].join('\n'), + ); + await writeFile(join(fixtureDir, 'snapshot.json'), `${JSON.stringify(snapshot(), null, 2)}\n`); + await writeFile( + join(fixtureDir, 'expected-links.yaml'), + ['expectedPks:', ' - table: accounts', ' columns: [id]', 'expectedLinks: []', ''].join('\n'), + ); + + await expect(loadKloRelationshipBenchmarkFixture(fixtureDir)).rejects.toThrow(/origin/); + } finally { + await rm(fixtureDir, { recursive: true, force: true }); + } + }); + + it('loads all benchmark fixture directories in stable order', async () => { + const fixtureRoot = await mkdtemp(join(tmpdir(), 'klo-relationship-fixture-root-')); + + async function writeFixtureDir(dirName: string, fixtureId: string): Promise { + const fixtureDir = join(fixtureRoot, dirName); + await mkdir(fixtureDir); + await writeFile( + join(fixtureDir, 'fixture.yaml'), + [ + `id: ${fixtureId}`, + `name: ${fixtureId}`, + 'tier: unit', + 'origin: synthetic', + 'defaultModes:', + ' - metadata_present', + '', + ].join('\n'), + ); + await writeFile(join(fixtureDir, 'snapshot.json'), `${JSON.stringify(snapshot(), null, 2)}\n`); + await writeFile( + join(fixtureDir, 'expected-links.yaml'), + [ + 'expectedPks:', + ' - table: accounts', + ' columns: [id]', + ' - table: users', + ' columns: [id]', + 'expectedLinks:', + ' - fromTable: users', + ' fromColumns: [account_id]', + ' toTable: accounts', + ' toColumns: [id]', + ' relationship: many_to_one', + '', + ].join('\n'), + ); + } + + try { + await writeFixtureDir('z_fixture', 'z_fixture'); + await writeFixtureDir('a_fixture', 'a_fixture'); + + await expect(loadKloRelationshipBenchmarkFixtures(fixtureRoot)).resolves.toMatchObject([ + { id: 'a_fixture', origin: 'synthetic' }, + { id: 'z_fixture', origin: 'synthetic' }, + ]); + } finally { + await rm(fixtureRoot, { recursive: true, force: true }); + } + }); + + it('loads every checked-in relationship benchmark fixture with explicit provenance', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixtureDirs = (await readdir(fixtureRoot, { withFileTypes: true })) + .filter((entry) => entry.isDirectory()) + .map((entry) => entry.name) + .sort((left, right) => left.localeCompare(right)); + + expect(fixtureDirs).toEqual(Object.keys(CHECKED_IN_FIXTURE_ORIGINS).sort()); + + const fixtures = await loadKloRelationshipBenchmarkFixtures(fixtureRoot.pathname); + expect(Object.fromEntries(fixtures.map((fixture) => [fixture.id, fixture.origin]))).toEqual( + CHECKED_IN_FIXTURE_ORIGINS, + ); + }); + + it('loads May 8 evidence-fusion adversarial fixtures as reported synthetic evidence', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixtures = await loadKloRelationshipBenchmarkFixtures(fixtureRoot.pathname); + const byId = new Map(fixtures.map((fixture) => [fixture.id, fixture])); + const adversarialIds = [ + 'non_english_naming_no_declared_constraints', + 'abbreviated_legacy_no_declared_constraints', + 'analytical_warehouse_no_naming_convention', + 'mixed_case_within_schema_no_declared_constraints', + 'polymorphic_partial_overlap_no_declared_constraints', + ]; + + for (const fixtureId of adversarialIds) { + const fixture = byId.get(fixtureId); + expect(fixture, fixtureId).toBeDefined(); + expect(fixture?.origin).toBe('synthetic'); + expect(fixture?.tier).toBe('row_bearing'); + expect(fixture?.thresholdEligible).toBe(false); + expect(fixture?.defaultModes).toEqual(['declared_pks_and_declared_fks_removed']); + expect(fixture?.dataPath).toMatch(/data\.sqlite$/); + expect(fixture?.expected.expectedPks.length).toBeGreaterThan(0); + expect(fixture?.expected.expectedLinks.length).toBeGreaterThan(0); + } + + expect( + byId + .get('polymorphic_partial_overlap_no_declared_constraints') + ?.expected.expectedLinks.filter( + (link) => link.fromTable === 'activity_events' && link.fromColumns.join(',') === 'entity_id', + ), + ).toHaveLength(2); + }); + + it('loads the May 8 scale stress fixture with bounded benchmark validation', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'scale_stress_no_declared_constraints'), + ); + + expect(fixture.origin).toBe('synthetic'); + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.thresholdEligible).toBe(false); + expect(fixture.defaultModes).toEqual(['declared_pks_and_declared_fks_removed']); + expect(fixture.validationBudget).toBe(800); + expect(fixture.snapshot.tables).toHaveLength(400); + expect(fixture.snapshot.tables.every((table) => table.columns.length === 50)).toBe(true); + expect(fixture.expected.expectedPks).toHaveLength(20); + expect(fixture.expected.expectedLinks).toHaveLength(1900); + }); + + it('runs the scale stress fixture inside the benchmark validation budget', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'scale_stress_no_declared_constraints'), + ); + + const result = await runKloRelationshipBenchmarkCase({ + fixture, + mode: 'declared_pks_and_declared_fks_removed', + detector: currentKloRelationshipBenchmarkDetector(), + }); + + expect(result.metrics.runtimeSeconds).toBeLessThan(60); + expect(result.metrics.sqlQueries).toBeLessThanOrEqual(800); + expect(result.validationBlocked).toBe(false); + }, 60_000); + + it('aggregates suite metrics without hiding validation-blocked cases', async () => { + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [ + { + id: 'mini_declared', + name: 'Mini declared fixture', + tier: 'unit', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['metadata_present'], + dataPath: null, + columnEmbeddings: {}, + }, + { + id: 'mini_no_declared', + name: 'Mini no declared fixture', + tier: 'row_bearing', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed', 'validation_disabled'], + dataPath: null, + columnEmbeddings: {}, + }, + ], + detector: currentKloRelationshipBenchmarkDetector(), + }); + + expect(suite.cases.map((item) => `${item.fixtureId}:${item.mode}`)).toEqual([ + 'mini_declared:metadata_present', + 'mini_no_declared:declared_pks_and_declared_fks_removed', + 'mini_no_declared:validation_disabled', + ]); + expect(suite.validationBlockedCases).toEqual(['mini_no_declared:validation_disabled']); + expect(suite.aggregate.caseCount).toBe(3); + expect(suite.aggregate.headlineCaseCount).toBe(1); + expect(suite.aggregate.headlineFkRecall).toBe(0); + expect(suite.aggregate.headlineAcceptedOrReviewRecall).toBe(1); + }); + + it('keeps smoke fixtures out of headline threshold metrics', async () => { + const detector = { + async detect() { + return { + pks: [ + { table: 'accounts', columns: ['id'], score: 1, status: 'accepted' as const }, + { table: 'users', columns: ['id'], score: 1, status: 'accepted' as const }, + ], + links: [ + { + fromTable: 'users', + fromColumns: ['account_id'], + toTable: 'accounts', + toColumns: ['id'], + relationship: 'many_to_one' as const, + score: 1, + status: 'accepted' as const, + source: 'test', + }, + ], + validationBlocked: false, + sqlQueries: 1, + llmCalls: 0, + runtimeSeconds: 0.001, + }; + }, + }; + + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [ + { + id: 'smoke_no_declared', + name: 'Smoke no declared fixture', + tier: 'smoke', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + { + id: 'row_bearing_no_declared', + name: 'Row-bearing no declared fixture', + tier: 'row_bearing', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + ], + detector, + }); + + expect(suite.aggregate.caseCount).toBe(2); + expect(suite.aggregate.headlineCaseCount).toBe(1); + expect(suite.aggregate.headlineFkRecall).toBe(1); + expect(suite.aggregate.headlinePkRecall).toBe(1); + }); + + it('counts product fixtures as headline evidence only when threshold eligible', async () => { + const detector = { + async detect() { + return { + pks: [ + { table: 'accounts', columns: ['id'], score: 1, status: 'accepted' as const }, + { table: 'users', columns: ['id'], score: 1, status: 'accepted' as const }, + ], + links: [ + { + fromTable: 'users', + fromColumns: ['account_id'], + toTable: 'accounts', + toColumns: ['id'], + relationship: 'many_to_one' as const, + score: 1, + status: 'accepted' as const, + source: 'test', + }, + ], + validationBlocked: false, + sqlQueries: 1, + llmCalls: 0, + runtimeSeconds: 0.001, + }; + }, + }; + + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [ + { + id: 'product_not_curated', + name: 'Product fixture without curated threshold evidence', + tier: 'product', + origin: 'synthetic', + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + { + id: 'product_curated', + name: 'Product fixture with curated threshold evidence', + tier: 'product', + origin: 'synthetic', + thresholdEligible: true, + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + { + id: 'smoke_even_if_marked', + name: 'Smoke fixture remains excluded', + tier: 'smoke', + origin: 'synthetic', + thresholdEligible: true, + snapshot: snapshot(), + expected: EXPECTED_LINKS, + defaultModes: ['declared_pks_and_declared_fks_removed'], + dataPath: null, + columnEmbeddings: {}, + }, + ], + detector, + }); + + expect(suite.aggregate.caseCount).toBe(3); + expect(suite.aggregate.headlineCaseCount).toBe(1); + expect(suite.aggregate.headlinePkRecall).toBe(1); + expect(suite.aggregate.headlineFkRecall).toBe(1); + }); + + it('loads the packaged B2B demo fixtures and records the current relationship-discovery baseline', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const declared = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'demo_b2b_declared_metadata'), + ); + const noDeclared = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'demo_b2b_no_declared_constraints'), + ); + + expect(declared.tier).toBe('smoke'); + expect(noDeclared.tier).toBe('smoke'); + expect(declared.defaultModes).toEqual([ + 'metadata_present', + 'declared_fks_removed', + 'declared_pks_removed', + 'declared_pks_and_declared_fks_removed', + 'llm_disabled', + 'profiling_disabled', + 'validation_disabled', + 'embeddings_disabled', + ]); + expect(noDeclared.defaultModes).toEqual([ + 'declared_pks_and_declared_fks_removed', + 'profiling_disabled', + 'validation_disabled', + 'llm_disabled', + 'embeddings_disabled', + ]); + + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [declared, noDeclared], + detector: currentKloRelationshipBenchmarkDetector(), + }); + + const declaredCase = suite.cases.find( + (item) => item.fixtureId === 'demo_b2b_declared_metadata' && item.mode === 'metadata_present', + ); + const noDeclaredCase = suite.cases.find( + (item) => + item.fixtureId === 'demo_b2b_no_declared_constraints' && item.mode === 'declared_pks_and_declared_fks_removed', + ); + const profilingDisabledCase = suite.cases.find( + (item) => item.fixtureId === 'demo_b2b_no_declared_constraints' && item.mode === 'profiling_disabled', + ); + + expect(declaredCase?.expected.fk).toHaveLength(7); + expect(declaredCase?.metrics.fkRecall).toBe(1); + expect(declaredCase?.metrics.pkRecall).toBe(1); + expect(noDeclaredCase?.expected.fk).toHaveLength(7); + expect(noDeclaredCase?.metrics.fkRecall).toBe(1); + expect(noDeclaredCase?.metrics.fkPrecision).toBe(1); + expect(noDeclaredCase?.metrics.pkRecall).toBe(1); + expect(noDeclaredCase?.falseNegatives.pk).toEqual([]); + expect(noDeclaredCase?.metrics.reviewRecall).toBe(0); + expect(noDeclaredCase?.metrics.acceptedOrReviewRecall).toBe(1); + expect(noDeclaredCase?.metrics.acceptedFalsePositiveCount).toBe(0); + expect(noDeclaredCase?.predicted.acceptedFk).toEqual([ + 'invoices.(account_id)->accounts.(id)', + 'opportunities.(account_id)->accounts.(id)', + 'product_events.(account_id)->accounts.(id)', + 'product_events.(user_id)->users.(id)', + 'subscriptions.(account_id)->accounts.(id)', + 'support_tickets.(account_id)->accounts.(id)', + 'users.(account_id)->accounts.(id)', + ]); + expect(noDeclaredCase?.predicted.reviewFk).toEqual([]); + expect(noDeclaredCase?.falseNegatives.fk).toEqual([]); + expect(profilingDisabledCase?.validationBlocked).toBe(true); + expect(profilingDisabledCase?.metrics.fkRecall).toBe(0); + expect(profilingDisabledCase?.metrics.acceptedOrReviewRecall).toBe(1); + expect(suite.aggregate.headlineCaseCount).toBe(0); + expect(suite.aggregate.headlineFkRecall).toBe(0); + expect(suite.aggregate.headlineAcceptedOrReviewRecall).toBe(0); + expect(suite.validationBlockedCases).toEqual([ + 'demo_b2b_declared_metadata:profiling_disabled', + 'demo_b2b_declared_metadata:validation_disabled', + 'demo_b2b_no_declared_constraints:profiling_disabled', + 'demo_b2b_no_declared_constraints:validation_disabled', + ]); + }); + + it('loads the public Chinook benchmark fixture with declared metadata', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'chinook_with_declared_metadata'), + ); + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.thresholdEligible).toBe(true); + expect(fixture.defaultModes).toContain('metadata_present'); + expect(fixture.defaultModes).toContain('declared_pks_and_declared_fks_removed'); + expect(fixture.snapshot.tables.length).toBeGreaterThanOrEqual(11); + expect(fixture.expected.expectedLinks.length).toBeGreaterThanOrEqual(8); + + const albumArtist = fixture.expected.expectedLinks.find( + (link) => link.fromTable === 'Album' && link.toTable === 'Artist', + ); + expect(albumArtist).toBeDefined(); + }); + + it('loads the public Northwind benchmark fixture with declared metadata', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'northwind_with_declared_metadata'), + ); + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.thresholdEligible).toBe(true); + expect(fixture.snapshot.tables.length).toBeGreaterThanOrEqual(13); + expect(fixture.expected.expectedLinks.length).toBeGreaterThanOrEqual(11); + + const orderCustomer = fixture.expected.expectedLinks.find( + (link) => ['Orders', 'orders'].includes(link.fromTable) && ['Customers', 'customers'].includes(link.toTable), + ); + expect(orderCustomer).toBeDefined(); + }); + + it('loads the public Sakila benchmark fixture with declared metadata', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'sakila_with_declared_metadata'), + ); + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.thresholdEligible).toBe(true); + expect(fixture.snapshot.tables.length).toBeGreaterThanOrEqual(16); + expect(fixture.expected.expectedLinks.length).toBeGreaterThanOrEqual(14); + + const filmLanguage = fixture.expected.expectedLinks.find( + (link) => link.fromTable === 'film' && link.toTable === 'language', + ); + expect(filmLanguage).toBeDefined(); + }); + + it('loads the public AdventureWorksLT benchmark fixture with declared metadata', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'adventureworkslt_with_declared_metadata'), + ); + + expect(fixture.id).toBe('adventureworkslt_with_declared_metadata'); + expect(fixture.name).toBe('AdventureWorksLT (SQLite, declared metadata)'); + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.thresholdEligible).toBe(true); + expect(fixture.defaultModes).toEqual([ + 'metadata_present', + 'declared_pks_and_declared_fks_removed', + 'declared_pks_removed', + 'declared_fks_removed', + 'profiling_disabled', + 'validation_disabled', + 'llm_disabled', + 'embeddings_disabled', + ]); + expect(fixture.snapshot.tables).toHaveLength(12); + expect(fixture.expected.expectedPks).toHaveLength(12); + expect(fixture.expected.expectedLinks).toHaveLength(12); + + const customerAddressPk = fixture.expected.expectedPks.find((pk) => pk.table === 'CustomerAddress'); + expect(customerAddressPk?.columns).toEqual(['CustomerID', 'AddressID']); + + const modelDescriptionPk = fixture.expected.expectedPks.find((pk) => pk.table === 'ProductModelProductDescription'); + expect(modelDescriptionPk?.columns).toEqual(['ProductModelID', 'ProductDescriptionID', 'Culture']); + + expect(fixture.expected.expectedLinks).toContainEqual({ + fromTable: 'CustomerAddress', + fromColumns: ['CustomerID'], + toTable: 'Customer', + toColumns: ['CustomerID'], + relationship: 'many_to_one', + }); + expect(fixture.expected.expectedLinks).toContainEqual({ + fromTable: 'ProductCategory', + fromColumns: ['ParentProductCategoryID'], + toTable: 'ProductCategory', + toColumns: ['ProductCategoryID'], + relationship: 'many_to_one', + }); + expect(fixture.expected.expectedLinks).toContainEqual({ + fromTable: 'SalesOrderDetail', + fromColumns: ['SalesOrderID'], + toTable: 'SalesOrderHeader', + toColumns: ['SalesOrderID'], + relationship: 'many_to_one', + }); + expect(fixture.expected.expectedLinks).toContainEqual({ + fromTable: 'SalesOrderHeader', + fromColumns: ['CustomerID'], + toTable: 'Customer', + toColumns: ['CustomerID'], + relationship: 'many_to_one', + }); + }); + + it('loads the full AdventureWorks OLTP benchmark fixture with declared metadata', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'adventureworks_oltp_with_declared_metadata'), + ); + + expect(fixture.id).toBe('adventureworks_oltp_with_declared_metadata'); + expect(fixture.name).toBe('AdventureWorks OLTP (SQL Server 2022, declared metadata)'); + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.thresholdEligible).toBe(true); + expect(fixture.defaultModes).toEqual([ + 'metadata_present', + 'declared_pks_and_declared_fks_removed', + 'declared_pks_removed', + 'declared_fks_removed', + 'profiling_disabled', + 'validation_disabled', + 'llm_disabled', + 'embeddings_disabled', + ]); + expect( + fixture.dataPath === null || fixture.dataPath.endsWith('/adventureworks_oltp_with_declared_metadata/data.sqlite'), + ).toBe(true); + expect(fixture.snapshot.driver).toBe('sqlite'); + expect(fixture.snapshot.metadata.source_driver).toBe('sqlserver'); + expect(fixture.snapshot.tables).toHaveLength(71); + expect(fixture.expected.expectedPks).toHaveLength(71); + expect(fixture.expected.expectedLinks).toHaveLength(90); + + expect(fixture.expected.expectedPks).toContainEqual({ + table: 'Sales.SalesOrderDetail', + columns: ['SalesOrderID', 'SalesOrderDetailID'], + }); + expect(fixture.expected.expectedPks).toContainEqual({ + table: 'Sales.SalesOrderHeaderSalesReason', + columns: ['SalesOrderID', 'SalesReasonID'], + }); + expect(fixture.expected.expectedLinks).toContainEqual({ + fromTable: 'Sales.SalesOrderHeader', + fromColumns: ['CustomerID'], + toTable: 'Sales.Customer', + toColumns: ['CustomerID'], + relationship: 'many_to_one', + }); + expect(fixture.expected.expectedLinks).toContainEqual({ + fromTable: 'Sales.SalesOrderDetail', + fromColumns: ['SalesOrderID'], + toTable: 'Sales.SalesOrderHeader', + toColumns: ['SalesOrderID'], + relationship: 'many_to_one', + }); + expect(fixture.expected.expectedLinks).toContainEqual({ + fromTable: 'Production.Product', + fromColumns: ['ProductSubcategoryID'], + toTable: 'Production.ProductSubcategory', + toColumns: ['ProductSubcategoryID'], + relationship: 'many_to_one', + }); + }); + + it('loads the row-bearing natural-key fixture and counts it as headline evidence', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const naturalKeys = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'natural_keys_no_declared_constraints'), + ); + + expect(naturalKeys.tier).toBe('row_bearing'); + expect(naturalKeys.defaultModes).toEqual([ + 'declared_pks_and_declared_fks_removed', + 'llm_disabled', + 'profiling_disabled', + 'validation_disabled', + 'embeddings_disabled', + ]); + + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [naturalKeys], + detector: currentKloRelationshipBenchmarkDetector(), + }); + const headline = suite.cases.find( + (item) => + item.fixtureId === 'natural_keys_no_declared_constraints' && + item.mode === 'declared_pks_and_declared_fks_removed', + ); + + expect(headline?.metrics.pkRecall).toBe(1); + expect(headline?.metrics.fkRecall).toBe(1); + expect(headline?.metrics.acceptedFalsePositiveCount).toBe(0); + expect(headline?.predicted.acceptedFk).toEqual(['fct_accounts.(country_code)->dim_countries.(iso_code)']); + expect(headline?.falseNegatives.fk).toEqual([]); + expect(suite.aggregate.headlineCaseCount).toBe(1); + expect(suite.aggregate.headlineFkRecall).toBe(1); + }); + + it('accepts plan-code suffix relationships only when validation is available', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'plan_code_no_declared_constraints'), + ); + + expect(fixture.tier).toBe('row_bearing'); + expect(fixture.defaultModes).toEqual([ + 'declared_pks_and_declared_fks_removed', + 'llm_disabled', + 'profiling_disabled', + 'validation_disabled', + 'embeddings_disabled', + ]); + + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [fixture], + detector: currentKloRelationshipBenchmarkDetector(), + }); + const expectedAccepted = [ + 'mart_account_segments.(current_plan_code)->stg_plans.(plan_code)', + 'mart_account_segments.(normalized_plan_code)->stg_plans.(plan_code)', + 'stg_plan_segment_mapping.(canonical_plan_code)->stg_plans.(plan_code)', + 'stg_plans.(canonical_plan_code)->stg_plans.(plan_code)', + ]; + const headline = suite.cases.find( + (item) => + item.fixtureId === 'plan_code_no_declared_constraints' && item.mode === 'declared_pks_and_declared_fks_removed', + ); + const llmDisabled = suite.cases.find( + (item) => item.fixtureId === 'plan_code_no_declared_constraints' && item.mode === 'llm_disabled', + ); + const embeddingsDisabled = suite.cases.find( + (item) => item.fixtureId === 'plan_code_no_declared_constraints' && item.mode === 'embeddings_disabled', + ); + const validationDisabled = suite.cases.find( + (item) => item.fixtureId === 'plan_code_no_declared_constraints' && item.mode === 'validation_disabled', + ); + const profilingDisabled = suite.cases.find( + (item) => item.fixtureId === 'plan_code_no_declared_constraints' && item.mode === 'profiling_disabled', + ); + + expect(headline?.predicted.acceptedFk).toEqual(expectedAccepted); + expect(headline?.predicted.reviewFk).toEqual([]); + expect(headline?.metrics.fkRecall).toBe(1); + expect(headline?.metrics.fkPrecision).toBe(1); + expect(headline?.metrics.acceptedFalsePositiveCount).toBe(0); + expect(llmDisabled?.predicted.acceptedFk).toEqual(expectedAccepted); + expect(embeddingsDisabled?.predicted.acceptedFk).toEqual(expectedAccepted); + expect(validationDisabled?.predicted.acceptedFk).toEqual([]); + expect(validationDisabled?.predicted.reviewFk).toEqual(expectedAccepted); + expect(validationDisabled?.validationBlocked).toBe(true); + expect(validationDisabled?.metrics.reviewRecall).toBe(1); + expect(validationDisabled?.metrics.acceptedOrReviewRecall).toBe(1); + expect(profilingDisabled?.predicted.acceptedFk).toEqual([]); + expect(profilingDisabled?.validationBlocked).toBe(true); + expect(suite.aggregate.headlineCaseCount).toBe(1); + expect(suite.aggregate.headlineFkRecall).toBe(1); + expect(suite.aggregate.headlineAcceptedOrReviewRecall).toBe(1); + }); + + it('uses embedding fixtures for semantic alias relationship benchmark cases', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'semantic_embedding_aliases_no_declared_constraints'), + ); + + expect(fixture.columnEmbeddings).toMatchObject({ + 'customers.id': [1, 0, 0], + 'orders.buyer_ref': [0.995, 0.005, 0], + }); + + const withEmbeddings = await runKloRelationshipBenchmarkCase({ + fixture, + mode: 'declared_pks_and_declared_fks_removed', + detector: currentKloRelationshipBenchmarkDetector(), + }); + const withoutEmbeddings = await runKloRelationshipBenchmarkCase({ + fixture, + mode: 'embeddings_disabled', + detector: currentKloRelationshipBenchmarkDetector(), + }); + + expect(withEmbeddings.predicted.acceptedFk).toEqual(['orders.(buyer_ref)->customers.(id)']); + expect(withEmbeddings.metrics.fkRecall).toBe(1); + expect(withEmbeddings.metrics.acceptedFalsePositiveCount).toBe(0); + expect(withEmbeddings.falseNegatives.fk).toEqual([]); + expect(withoutEmbeddings.predicted.acceptedFk).toEqual([]); + expect(withoutEmbeddings.metrics.fkRecall).toBe(0); + expect(withoutEmbeddings.falseNegatives.fk).toEqual(['orders.(buyer_ref)->customers.(id)']); + }); + + it('loads the Orbit-style product fixture as curated relationship-discovery benchmark evidence', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'orbit_style_product_no_declared_constraints'), + ); + + expect(fixture.tier).toBe('product'); + expect(fixture.thresholdEligible).toBe(true); + expect(fixture.defaultModes).toEqual([ + 'declared_pks_and_declared_fks_removed', + 'llm_disabled', + 'profiling_disabled', + 'validation_disabled', + 'embeddings_disabled', + ]); + + const suite = await runKloRelationshipBenchmarkSuite({ + fixtures: [fixture], + detector: currentKloRelationshipBenchmarkDetector(), + }); + const headline = suite.cases.find( + (item) => + item.fixtureId === 'orbit_style_product_no_declared_constraints' && + item.mode === 'declared_pks_and_declared_fks_removed', + ); + const validationDisabled = suite.cases.find( + (item) => item.fixtureId === 'orbit_style_product_no_declared_constraints' && item.mode === 'validation_disabled', + ); + + expect(headline?.expected.fk).toHaveLength(9); + expect(headline?.metrics.pkRecall).toBe(1); + expect(headline?.metrics.fkRecall).toBe(1); + expect(headline?.metrics.acceptedFalsePositiveCount).toBe(0); + expect(headline?.predicted.acceptedFk).toEqual([ + 'dim_users.(account_id)->dim_accounts.(id)', + 'dim_workspaces.(account_id)->dim_accounts.(id)', + 'dim_workspaces.(user_id)->dim_users.(id)', + 'fct_invoices.(account_id)->dim_accounts.(id)', + 'fct_product_events.(account_id)->dim_accounts.(id)', + 'fct_product_events.(user_id)->dim_users.(id)', + 'fct_product_events.(workspace_id)->dim_workspaces.(id)', + 'support_tickets.(account_id)->dim_accounts.(id)', + 'support_tickets.(user_id)->dim_users.(id)', + ]); + expect(headline?.falseNegatives.fk).toEqual([]); + expect(validationDisabled?.validationBlocked).toBe(true); + expect(suite.aggregate.headlineCaseCount).toBe(1); + expect(suite.aggregate.headlineFkRecall).toBe(1); + expect(suite.aggregate.headlinePkRecall).toBe(1); + }); +}); diff --git a/packages/context/src/scan/relationship-benchmarks.ts b/packages/context/src/scan/relationship-benchmarks.ts new file mode 100644 index 00000000..fca3d15e --- /dev/null +++ b/packages/context/src/scan/relationship-benchmarks.ts @@ -0,0 +1,902 @@ +import { createHash } from 'node:crypto'; +import { mkdtemp, readdir, readFile, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { gunzipSync } from 'node:zlib'; +import Database from 'better-sqlite3'; +import YAML from 'yaml'; +import { z } from 'zod'; +import type { KloEnrichedRelationship, KloEnrichedSchema, KloRelationshipType } from './enrichment-types.js'; +import { snapshotToKloEnrichedSchema } from './local-enrichment.js'; +import type { KloRelationshipDiscoveryCandidate } from './relationship-candidates.js'; +import { + generateKloRelationshipDiscoveryCandidates, + mergeKloRelationshipDiscoveryCandidates, +} from './relationship-candidates.js'; +import type { KloLlmProvider } from '@klo/llm'; +import { proposeKloRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js'; +import { + discoverKloCompositeRelationships, + type KloCompositePrimaryKeyCandidate, + type KloCompositeRelationshipCandidate, +} from './relationship-composite-candidates.js'; +import { emptyKloRelationshipProfileArtifact } from './relationship-diagnostics.js'; +import { collectKloFormalMetadataRelationships } from './relationship-formal-metadata.js'; +import { resolveKloRelationshipGraph } from './relationship-graph-resolver.js'; +import { type KloRelationshipReadOnlyExecutor, profileKloRelationshipSchema } from './relationship-profiling.js'; +import type { KloRelationshipValidationBudget } from './relationship-budget.js'; +import type { KloRelationshipFixtureOrigin } from './relationship-scoring.js'; +import { validateKloRelationshipDiscoveryCandidates } from './relationship-validation.js'; +import type { KloQueryResult, KloReadOnlyQueryInput, KloScanContext, KloSchemaSnapshot } from './types.js'; + +export const KLO_RELATIONSHIP_BENCHMARK_MODES = [ + 'metadata_present', + 'declared_fks_removed', + 'declared_pks_removed', + 'declared_pks_and_declared_fks_removed', + 'llm_disabled', + 'profiling_disabled', + 'validation_disabled', + 'embeddings_disabled', +] as const; + +export type KloRelationshipBenchmarkMode = (typeof KLO_RELATIONSHIP_BENCHMARK_MODES)[number]; + +export const KLO_RELATIONSHIP_BENCHMARK_TIERS = ['unit', 'row_bearing', 'schema_only', 'smoke', 'product'] as const; + +export type KloRelationshipBenchmarkTier = (typeof KLO_RELATIONSHIP_BENCHMARK_TIERS)[number]; + +export type KloRelationshipBenchmarkStatus = 'accepted' | 'review' | 'rejected'; + +export interface KloRelationshipBenchmarkExpectedPk { + table: string; + columns: string[]; +} + +export interface KloRelationshipBenchmarkExpectedLink { + fromTable: string; + fromColumns: string[]; + toTable: string; + toColumns: string[]; + relationship: KloRelationshipType; +} + +export interface KloRelationshipBenchmarkExpectedLinks { + expectedPks: KloRelationshipBenchmarkExpectedPk[]; + expectedLinks: KloRelationshipBenchmarkExpectedLink[]; +} + +export interface KloRelationshipBenchmarkFixture { + id: string; + name: string; + tier: KloRelationshipBenchmarkTier; + origin: KloRelationshipFixtureOrigin; + thresholdEligible?: boolean; + validationBudget?: KloRelationshipValidationBudget; + snapshot: KloSchemaSnapshot; + expected: KloRelationshipBenchmarkExpectedLinks; + defaultModes: KloRelationshipBenchmarkMode[]; + dataPath: string | null; + columnEmbeddings: Record; +} + +export interface KloRelationshipBenchmarkDetectedPk { + table: string; + columns: string[]; + score: number; + status: KloRelationshipBenchmarkStatus; +} + +export interface KloRelationshipBenchmarkDetectedLink { + fromTable: string; + fromColumns: string[]; + toTable: string; + toColumns: string[]; + relationship: KloRelationshipType; + score: number; + status: KloRelationshipBenchmarkStatus; + source: string; +} + +export interface KloRelationshipBenchmarkDetectorResult { + pks: KloRelationshipBenchmarkDetectedPk[]; + links: KloRelationshipBenchmarkDetectedLink[]; + validationBlocked: boolean; + sqlQueries: number; + llmCalls: number; + runtimeSeconds: number; +} + +export interface KloRelationshipBenchmarkDetectorInput { + fixtureId: string; + mode: KloRelationshipBenchmarkMode; + snapshot: KloSchemaSnapshot; + schema: KloEnrichedSchema; + dataPath: string | null; + validationBudget?: KloRelationshipValidationBudget; +} + +export interface KloRelationshipBenchmarkDetector { + detect(input: KloRelationshipBenchmarkDetectorInput): Promise; +} + +export interface KloRelationshipBenchmarkMetrics { + pkPrecision: number; + pkRecall: number; + pkF1: number; + fkPrecision: number; + fkRecall: number; + fkF1: number; + acceptedFalsePositiveCount: number; + reviewRecall: number; + acceptedOrReviewRecall: number; + runtimeSeconds: number; + sqlQueries: number; + llmCalls: number; +} + +export interface KloRelationshipBenchmarkCaseResult { + fixtureId: string; + mode: KloRelationshipBenchmarkMode; + metrics: KloRelationshipBenchmarkMetrics; + expected: { + pk: string[]; + fk: string[]; + }; + predicted: { + pk: string[]; + fk: string[]; + acceptedFk: string[]; + reviewFk: string[]; + }; + falsePositives: { + pk: string[]; + fk: string[]; + }; + falseNegatives: { + pk: string[]; + fk: string[]; + }; + skippedComposite: { + pk: string[]; + fk: string[]; + }; + validationBlocked: boolean; +} + +export interface KloRelationshipBenchmarkSuiteResult { + cases: KloRelationshipBenchmarkCaseResult[]; + validationBlockedCases: string[]; + aggregate: { + caseCount: number; + headlineCaseCount: number; + headlinePkRecall: number; + headlineFkRecall: number; + headlineAcceptedOrReviewRecall: number; + meanPkRecall: number; + meanFkRecall: number; + meanAcceptedOrReviewRecall: number; + }; +} + +class KloRelationshipBenchmarkSqliteExecutor implements KloRelationshipReadOnlyExecutor { + private readonly db: Database.Database; + queryCount = 0; + + constructor(dataPath: string) { + this.db = new Database(dataPath, { readonly: true, fileMustExist: true }); + } + + async executeReadOnly(input: KloReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.queryCount += 1; + const rows = this.db.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return { + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }; + } + + close(): void { + this.db.close(); + } +} + +async function fixtureText(fixtureDir: string, fileName: string): Promise { + const rawPath = join(fixtureDir, fileName); + try { + return await readFile(rawPath, 'utf-8'); + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { + throw error; + } + } + + const compressed = await readFile(`${rawPath}.gz`); + return gunzipSync(compressed).toString('utf-8'); +} + +async function fixtureDataPath(fixtureDir: string): Promise { + const dataPath = join(fixtureDir, 'data.sqlite'); + try { + const dataStat = await stat(dataPath); + return dataStat.isFile() ? dataPath : null; + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { + throw error; + } + } + + const compressedPath = `${dataPath}.gz`; + try { + const compressedStat = await stat(compressedPath); + if (!compressedStat.isFile()) { + return null; + } + const digest = createHash('sha256').update(fixtureDir).digest('hex').slice(0, 16); + const tempRoot = await mkdtemp(join(tmpdir(), `klo-relationship-benchmark-${digest}-`)); + const extractedPath = join(tempRoot, 'data.sqlite'); + await writeFile(extractedPath, gunzipSync(await readFile(compressedPath))); + return extractedPath; + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + return null; + } + throw error; + } +} + +async function fixtureColumnEmbeddings(fixtureDir: string): Promise> { + const embeddingsPath = join(fixtureDir, 'column-embeddings.json'); + try { + const raw = await readFile(embeddingsPath, 'utf-8'); + const parsed = JSON.parse(raw) as Record; + return Object.fromEntries( + Object.entries(parsed).flatMap(([columnId, value]) => { + if (!Array.isArray(value) || value.some((item) => typeof item !== 'number')) { + return []; + } + return [[columnId, value as number[]]]; + }), + ); + } catch { + return {}; + } +} + +const modeSchema = z.enum(KLO_RELATIONSHIP_BENCHMARK_MODES); +const tierSchema = z.enum(KLO_RELATIONSHIP_BENCHMARK_TIERS); +const originSchema = z.enum(['synthetic', 'public', 'customer']); +const validationBudgetSchema = z.union([z.literal('all'), z.number().int().nonnegative()]); + +const fixtureConfigSchema = z.object({ + id: z.string().min(1), + name: z.string().min(1), + tier: tierSchema.default('unit'), + origin: originSchema, + thresholdEligible: z.boolean().optional(), + validationBudget: validationBudgetSchema.optional(), + defaultModes: z.array(modeSchema).min(1), +}); + +const expectedLinksSchema = z.object({ + expectedPks: z.array( + z.object({ + table: z.string().min(1), + columns: z.array(z.string().min(1)).min(1), + }), + ), + expectedLinks: z.array( + z.object({ + fromTable: z.string().min(1), + fromColumns: z.array(z.string().min(1)).min(1), + toTable: z.string().min(1), + toColumns: z.array(z.string().min(1)).min(1), + relationship: z.enum(['many_to_one', 'one_to_many', 'one_to_one']), + }), + ), +}); + +function sortedUnique(values: Iterable): string[] { + return Array.from(new Set(values)).sort((left, right) => left.localeCompare(right)); +} + +function tupleKey(columns: readonly string[]): string { + return `(${columns.join(',')})`; +} + +function pkKey(pk: Pick): string { + return `${pk.table}.${tupleKey(pk.columns)}`; +} + +function fkKey( + link: Pick, +): string { + return `${link.fromTable}.${tupleKey(link.fromColumns)}->${link.toTable}.${tupleKey(link.toColumns)}`; +} + +function relationshipKey(link: KloRelationshipBenchmarkDetectedLink): string { + return fkKey(link); +} + +function relationshipToBenchmarkLink(candidate: KloEnrichedRelationship): KloRelationshipBenchmarkDetectedLink { + return { + fromTable: candidate.from.table.name, + fromColumns: candidate.from.columns, + toTable: candidate.to.table.name, + toColumns: candidate.to.columns, + relationship: candidate.relationshipType, + score: candidate.confidence, + status: 'accepted', + source: candidate.source, + }; +} + +function broadCandidateToBenchmarkLink( + candidate: Pick, +): KloRelationshipBenchmarkDetectedLink { + return { + fromTable: candidate.from.table.name, + fromColumns: candidate.from.columns, + toTable: candidate.to.table.name, + toColumns: candidate.to.columns, + relationship: candidate.relationshipType, + score: candidate.confidence, + status: 'review', + source: candidate.source, + }; +} + +function compositePkToBenchmarkPk(candidate: KloCompositePrimaryKeyCandidate): KloRelationshipBenchmarkDetectedPk { + return { + table: candidate.table.name, + columns: candidate.columns, + score: candidate.score, + status: candidate.status, + }; +} + +function compositeRelationshipToBenchmarkLink( + candidate: KloCompositeRelationshipCandidate, +): KloRelationshipBenchmarkDetectedLink { + return { + fromTable: candidate.from.table.name, + fromColumns: candidate.from.columns, + toTable: candidate.to.table.name, + toColumns: candidate.to.columns, + relationship: candidate.relationshipType, + score: candidate.confidence, + status: candidate.status, + source: candidate.source, + }; +} + +function ratio(numerator: number, denominator: number): number { + return denominator === 0 ? 1 : numerator / denominator; +} + +function f1(precision: number, recall: number): number { + return precision + recall === 0 ? 0 : (2 * precision * recall) / (precision + recall); +} + +function difference(left: readonly string[], right: readonly string[]): string[] { + const rightSet = new Set(right); + return left.filter((item) => !rightSet.has(item)); +} + +function intersectionSize(left: readonly string[], right: readonly string[]): number { + const rightSet = new Set(right); + return left.filter((item) => rightSet.has(item)).length; +} + +function compositePkKeys(expected: KloRelationshipBenchmarkExpectedLinks): string[] { + return sortedUnique(expected.expectedPks.filter((pk) => pk.columns.length > 1).map(pkKey)); +} + +function compositeFkKeys(expected: KloRelationshipBenchmarkExpectedLinks): string[] { + return sortedUnique( + expected.expectedLinks.filter((link) => link.fromColumns.length > 1 || link.toColumns.length > 1).map(fkKey), + ); +} + +function scalarExpectedPkKeys(expected: KloRelationshipBenchmarkExpectedLinks): string[] { + return sortedUnique(expected.expectedPks.map(pkKey)); +} + +function scalarExpectedFkKeys(expected: KloRelationshipBenchmarkExpectedLinks): string[] { + return sortedUnique(expected.expectedLinks.map(fkKey)); +} + +function scoreBenchmarkCase(input: { + fixtureId: string; + mode: KloRelationshipBenchmarkMode; + expected: KloRelationshipBenchmarkExpectedLinks; + detected: KloRelationshipBenchmarkDetectorResult; +}): KloRelationshipBenchmarkCaseResult { + const expectedPk = scalarExpectedPkKeys(input.expected); + const expectedFk = scalarExpectedFkKeys(input.expected); + const predictedPk = sortedUnique(input.detected.pks.map(pkKey)); + const predictedFk = sortedUnique(input.detected.links.map(relationshipKey)); + const acceptedFk = sortedUnique( + input.detected.links.filter((link) => link.status === 'accepted').map(relationshipKey), + ); + const reviewFk = sortedUnique(input.detected.links.filter((link) => link.status === 'review').map(relationshipKey)); + const acceptedOrReviewFk = sortedUnique([...acceptedFk, ...reviewFk]); + + const truePositivePk = intersectionSize(predictedPk, expectedPk); + const truePositiveFk = intersectionSize(acceptedFk, expectedFk); + const acceptedOrReviewTruePositiveFk = intersectionSize(acceptedOrReviewFk, expectedFk); + const reviewTruePositiveFk = intersectionSize(reviewFk, expectedFk); + const pkPrecision = ratio(truePositivePk, predictedPk.length); + const pkRecall = ratio(truePositivePk, expectedPk.length); + const fkPrecision = ratio(truePositiveFk, acceptedFk.length); + const fkRecall = ratio(truePositiveFk, expectedFk.length); + + const falsePositiveFk = difference(acceptedFk, expectedFk); + return { + fixtureId: input.fixtureId, + mode: input.mode, + metrics: { + pkPrecision, + pkRecall, + pkF1: f1(pkPrecision, pkRecall), + fkPrecision, + fkRecall, + fkF1: f1(fkPrecision, fkRecall), + acceptedFalsePositiveCount: falsePositiveFk.length, + reviewRecall: ratio(reviewTruePositiveFk, expectedFk.length), + acceptedOrReviewRecall: ratio(acceptedOrReviewTruePositiveFk, expectedFk.length), + runtimeSeconds: input.detected.runtimeSeconds, + sqlQueries: input.detected.sqlQueries, + llmCalls: input.detected.llmCalls, + }, + expected: { + pk: expectedPk, + fk: expectedFk, + }, + predicted: { + pk: predictedPk, + fk: predictedFk, + acceptedFk, + reviewFk, + }, + falsePositives: { + pk: difference(predictedPk, expectedPk), + fk: falsePositiveFk, + }, + falseNegatives: { + pk: difference(expectedPk, predictedPk), + fk: difference(expectedFk, acceptedOrReviewFk), + }, + skippedComposite: { + pk: difference(compositePkKeys(input.expected), predictedPk), + fk: difference(compositeFkKeys(input.expected), acceptedOrReviewFk), + }, + validationBlocked: input.detected.validationBlocked, + }; +} + +export function maskKloRelationshipBenchmarkSnapshot( + snapshot: KloSchemaSnapshot, + mode: KloRelationshipBenchmarkMode, +): KloSchemaSnapshot { + const relationshipDiscoveryMode = + mode === 'declared_pks_and_declared_fks_removed' || + mode === 'llm_disabled' || + mode === 'profiling_disabled' || + mode === 'validation_disabled' || + mode === 'embeddings_disabled'; + const removePks = relationshipDiscoveryMode || mode === 'declared_pks_removed'; + const removeFks = relationshipDiscoveryMode || mode === 'declared_fks_removed'; + + return { + ...snapshot, + scope: { ...snapshot.scope }, + metadata: { ...snapshot.metadata }, + tables: snapshot.tables.map((table) => ({ + ...table, + columns: table.columns.map((column) => ({ + ...column, + primaryKey: removePks ? false : column.primaryKey, + })), + foreignKeys: removeFks ? [] : table.foreignKeys.map((foreignKey) => ({ ...foreignKey })), + })), + }; +} + +export function isKloRelationshipBenchmarkTuningEligible(input: { + fixture: Pick; + mode: KloRelationshipBenchmarkMode; + validationBlocked: boolean; +}): boolean { + if (input.validationBlocked || input.mode !== 'declared_pks_and_declared_fks_removed') { + return false; + } + + if (input.fixture.tier === 'smoke' || input.fixture.tier === 'schema_only') { + return false; + } + + if (input.fixture.thresholdEligible !== undefined) { + return input.fixture.thresholdEligible; + } + + return input.fixture.tier === 'unit' || input.fixture.tier === 'row_bearing'; +} + +export function kloRelationshipBenchmarkDetectorWithLlm( + llmProvider: KloLlmProvider, +): KloRelationshipBenchmarkDetector { + return { + async detect(input) { + const startedAt = performance.now(); + const formalMetadata = collectKloFormalMetadataRelationships(input.schema); + const formalLinks = formalMetadata.accepted.map((relationship) => relationshipToBenchmarkLink(relationship)); + const acceptedKeys = new Set(formalLinks.map(fkKey)); + const sqliteDataAvailable = Boolean(input.dataPath && input.snapshot.driver === 'sqlite'); + const profilingExecutor = + sqliteDataAvailable && input.mode !== 'profiling_disabled' + ? new KloRelationshipBenchmarkSqliteExecutor(input.dataPath as string) + : null; + const validationExecutor = profilingExecutor && input.mode !== 'validation_disabled' ? profilingExecutor : null; + const profiles = + input.mode === 'profiling_disabled' + ? emptyKloRelationshipProfileArtifact({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + reason: 'relationship_benchmark_profiling_disabled', + }) + : await profileKloRelationshipSchema({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + schema: input.schema, + executor: profilingExecutor, + ctx: { runId: `relationship-benchmark:${input.fixtureId}:${input.mode}:profile` }, + }); + const broadRelationshipCandidates = generateKloRelationshipDiscoveryCandidates(input.schema, { + profiles, + useEmbeddings: input.mode !== 'embeddings_disabled', + }); + const llmProposalResult = + input.mode === 'llm_disabled' + ? { candidates: [], warnings: [], llmCalls: 0, summary: 'skipped' as const } + : await proposeKloRelationshipCandidatesWithLlm({ + connectionId: input.snapshot.connectionId, + schema: input.schema, + profile: profiles, + llmProvider, + }); + const candidates = mergeKloRelationshipDiscoveryCandidates([ + ...broadRelationshipCandidates, + ...llmProposalResult.candidates, + ]); + const validationBudget = + input.validationBudget === 'all' + ? 'all' + : input.validationBudget === undefined + ? 'all' + : Math.max(0, input.validationBudget - profiles.queryCount); + const validatedBroadCandidates = await validateKloRelationshipDiscoveryCandidates({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + candidates, + profiles, + executor: validationExecutor, + ctx: { runId: `relationship-benchmark:${input.fixtureId}:${input.mode}:validate` }, + tableCount: input.schema.tables.length, + settings: { + validationBudget, + }, + }); + const compositeDetection = + validationBudget === 'all' && + validationExecutor && + input.mode !== 'profiling_disabled' && + input.mode !== 'validation_disabled' + ? await discoverKloCompositeRelationships({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + schema: input.schema, + profiles, + executor: validationExecutor, + ctx: { runId: `relationship-benchmark:${input.fixtureId}:${input.mode}:composite` }, + }) + : { primaryKeys: [], relationships: [], queryCount: 0, warnings: [] }; + profilingExecutor?.close(); + const graph = resolveKloRelationshipGraph({ + schema: input.schema, + profiles, + candidates: validatedBroadCandidates, + }); + const acceptedBroadCandidates = graph.relationships + .filter((candidate) => candidate.status === 'accepted') + .map((candidate) => ({ + ...broadCandidateToBenchmarkLink(candidate), + score: candidate.fkScore, + status: 'accepted' as const, + })) + .filter((candidate) => !acceptedKeys.has(fkKey(candidate))); + const reviewCandidates = graph.relationships + .filter((candidate) => candidate.status === 'review') + .map((candidate) => ({ + ...broadCandidateToBenchmarkLink(candidate), + score: candidate.fkScore, + status: 'review' as const, + })) + .filter((candidate) => !acceptedKeys.has(fkKey(candidate))); + const resolvedPks = graph.pks + .filter((pk) => pk.status !== 'rejected') + .map((pk) => ({ + table: pk.table, + columns: pk.columns, + score: pk.pkScore, + status: pk.status, + })); + const compositePks = compositeDetection.primaryKeys.map(compositePkToBenchmarkPk); + const allPksByKey = new Map([...resolvedPks, ...compositePks].map((candidate) => [pkKey(candidate), candidate])); + const pks = sortedUnique(allPksByKey.keys()).flatMap((key) => { + const candidate = allPksByKey.get(key); + return candidate ? [candidate] : []; + }); + + return { + pks, + links: [ + ...formalLinks, + ...acceptedBroadCandidates, + ...reviewCandidates, + ...compositeDetection.relationships + .map(compositeRelationshipToBenchmarkLink) + .filter((candidate) => !acceptedKeys.has(fkKey(candidate))), + ], + validationBlocked: + input.mode === 'validation_disabled' || + input.mode === 'profiling_disabled' || + (input.dataPath !== null && broadRelationshipCandidates.length > 0 && !profiles.sqlAvailable), + sqlQueries: profilingExecutor?.queryCount ?? profiles.queryCount, + llmCalls: llmProposalResult.llmCalls, + runtimeSeconds: Number(((performance.now() - startedAt) / 1000).toFixed(6)), + }; + }, + }; +} + +export function currentKloRelationshipBenchmarkDetector(): KloRelationshipBenchmarkDetector { + return { + async detect(input) { + const startedAt = performance.now(); + const formalMetadata = collectKloFormalMetadataRelationships(input.schema); + const formalLinks = formalMetadata.accepted.map((relationship) => relationshipToBenchmarkLink(relationship)); + const acceptedKeys = new Set(formalLinks.map(fkKey)); + const sqliteDataAvailable = Boolean(input.dataPath && input.snapshot.driver === 'sqlite'); + const profilingExecutor = + sqliteDataAvailable && input.mode !== 'profiling_disabled' + ? new KloRelationshipBenchmarkSqliteExecutor(input.dataPath as string) + : null; + const validationExecutor = profilingExecutor && input.mode !== 'validation_disabled' ? profilingExecutor : null; + const profiles = + input.mode === 'profiling_disabled' + ? emptyKloRelationshipProfileArtifact({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + reason: 'relationship_benchmark_profiling_disabled', + }) + : await profileKloRelationshipSchema({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + schema: input.schema, + executor: profilingExecutor, + ctx: { runId: `relationship-benchmark:${input.fixtureId}:${input.mode}:profile` }, + }); + const broadRelationshipCandidates = generateKloRelationshipDiscoveryCandidates(input.schema, { + profiles, + useEmbeddings: input.mode !== 'embeddings_disabled', + }); + const validationBudget = + input.validationBudget === 'all' + ? 'all' + : input.validationBudget === undefined + ? 'all' + : Math.max(0, input.validationBudget - profiles.queryCount); + const validatedBroadCandidates = await validateKloRelationshipDiscoveryCandidates({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + candidates: broadRelationshipCandidates, + profiles, + executor: validationExecutor, + ctx: { runId: `relationship-benchmark:${input.fixtureId}:${input.mode}:validate` }, + tableCount: input.schema.tables.length, + settings: { + validationBudget, + }, + }); + const compositeDetection = + validationBudget === 'all' && + validationExecutor && + input.mode !== 'profiling_disabled' && + input.mode !== 'validation_disabled' + ? await discoverKloCompositeRelationships({ + connectionId: input.snapshot.connectionId, + driver: input.snapshot.driver, + schema: input.schema, + profiles, + executor: validationExecutor, + ctx: { runId: `relationship-benchmark:${input.fixtureId}:${input.mode}:composite` }, + }) + : { primaryKeys: [], relationships: [], queryCount: 0, warnings: [] }; + profilingExecutor?.close(); + const graph = resolveKloRelationshipGraph({ + schema: input.schema, + profiles, + candidates: validatedBroadCandidates, + }); + const acceptedBroadCandidates = graph.relationships + .filter((candidate) => candidate.status === 'accepted') + .map((candidate) => ({ + ...broadCandidateToBenchmarkLink(candidate), + score: candidate.fkScore, + status: 'accepted' as const, + })) + .filter((candidate) => !acceptedKeys.has(fkKey(candidate))); + const reviewCandidates = graph.relationships + .filter((candidate) => candidate.status === 'review') + .map((candidate) => ({ + ...broadCandidateToBenchmarkLink(candidate), + score: candidate.fkScore, + status: 'review' as const, + })) + .filter((candidate) => !acceptedKeys.has(fkKey(candidate))); + const resolvedPks = graph.pks + .filter((pk) => pk.status !== 'rejected') + .map((pk) => ({ + table: pk.table, + columns: pk.columns, + score: pk.pkScore, + status: pk.status, + })); + const compositePks = compositeDetection.primaryKeys.map(compositePkToBenchmarkPk); + const allPksByKey = new Map([...resolvedPks, ...compositePks].map((candidate) => [pkKey(candidate), candidate])); + const pks = sortedUnique(allPksByKey.keys()).flatMap((key) => { + const candidate = allPksByKey.get(key); + return candidate ? [candidate] : []; + }); + + return { + pks, + links: [ + ...formalLinks, + ...acceptedBroadCandidates, + ...reviewCandidates, + ...compositeDetection.relationships + .map(compositeRelationshipToBenchmarkLink) + .filter((candidate) => !acceptedKeys.has(fkKey(candidate))), + ], + validationBlocked: + input.mode === 'validation_disabled' || + input.mode === 'profiling_disabled' || + (input.dataPath !== null && broadRelationshipCandidates.length > 0 && !profiles.sqlAvailable), + sqlQueries: profilingExecutor?.queryCount ?? profiles.queryCount, + llmCalls: 0, + runtimeSeconds: Number(((performance.now() - startedAt) / 1000).toFixed(6)), + }; + }, + }; +} + +export async function loadKloRelationshipBenchmarkFixture( + fixtureDir: string, +): Promise { + const [fixtureRaw, snapshotRaw, expectedRaw] = await Promise.all([ + fixtureText(fixtureDir, 'fixture.yaml'), + fixtureText(fixtureDir, 'snapshot.json'), + fixtureText(fixtureDir, 'expected-links.yaml'), + ]); + const fixture = fixtureConfigSchema.parse(YAML.parse(fixtureRaw)); + const expected = expectedLinksSchema.parse(YAML.parse(expectedRaw)); + const snapshot = JSON.parse(snapshotRaw) as KloSchemaSnapshot; + + return { + ...fixture, + snapshot, + expected, + dataPath: await fixtureDataPath(fixtureDir), + columnEmbeddings: await fixtureColumnEmbeddings(fixtureDir), + }; +} + +export async function loadKloRelationshipBenchmarkFixtures( + fixtureRoot: string, +): Promise { + const entries = await readdir(fixtureRoot, { withFileTypes: true }); + const fixtureDirs = entries + .filter((entry) => entry.isDirectory()) + .map((entry) => join(fixtureRoot, entry.name)) + .sort((left, right) => left.localeCompare(right)); + + return Promise.all(fixtureDirs.map((fixtureDir) => loadKloRelationshipBenchmarkFixture(fixtureDir))); +} + +export async function runKloRelationshipBenchmarkCase(input: { + fixture: KloRelationshipBenchmarkFixture; + mode: KloRelationshipBenchmarkMode; + detector?: KloRelationshipBenchmarkDetector; +}): Promise { + const snapshot = maskKloRelationshipBenchmarkSnapshot(input.fixture.snapshot, input.mode); + const embeddings = + input.mode === 'embeddings_disabled' + ? new Map() + : new Map(Object.entries(input.fixture.columnEmbeddings)); + const schema = snapshotToKloEnrichedSchema(snapshot, embeddings); + const detected = await (input.detector ?? currentKloRelationshipBenchmarkDetector()).detect({ + fixtureId: input.fixture.id, + mode: input.mode, + snapshot, + schema, + dataPath: input.fixture.dataPath, + validationBudget: input.fixture.validationBudget, + }); + + return scoreBenchmarkCase({ + fixtureId: input.fixture.id, + mode: input.mode, + expected: input.fixture.expected, + detected, + }); +} + +export async function runKloRelationshipBenchmarkSuite(input: { + fixtures: KloRelationshipBenchmarkFixture[]; + detector?: KloRelationshipBenchmarkDetector; +}): Promise { + const cases: KloRelationshipBenchmarkCaseResult[] = []; + for (const fixture of input.fixtures) { + for (const mode of fixture.defaultModes) { + cases.push( + await runKloRelationshipBenchmarkCase({ + fixture, + mode, + detector: input.detector, + }), + ); + } + } + + const fixtureById = new Map(input.fixtures.map((fixture) => [fixture.id, fixture])); + const headlineCases = cases.filter((item) => { + const fixture = fixtureById.get(item.fixtureId); + return fixture + ? isKloRelationshipBenchmarkTuningEligible({ + fixture, + mode: item.mode, + validationBlocked: item.validationBlocked, + }) + : false; + }); + const aggregateCases = cases.length === 0 ? [] : cases; + + return { + cases, + validationBlockedCases: cases + .filter((item) => item.validationBlocked) + .map((item) => `${item.fixtureId}:${item.mode}`), + aggregate: { + caseCount: cases.length, + headlineCaseCount: headlineCases.length, + headlinePkRecall: mean(headlineCases.map((item) => item.metrics.pkRecall)), + headlineFkRecall: mean(headlineCases.map((item) => item.metrics.fkRecall)), + headlineAcceptedOrReviewRecall: mean(headlineCases.map((item) => item.metrics.acceptedOrReviewRecall)), + meanPkRecall: mean(aggregateCases.map((item) => item.metrics.pkRecall)), + meanFkRecall: mean(aggregateCases.map((item) => item.metrics.fkRecall)), + meanAcceptedOrReviewRecall: mean(aggregateCases.map((item) => item.metrics.acceptedOrReviewRecall)), + }, + }; +} + +function mean(values: number[]): number { + if (values.length === 0) { + return 0; + } + return values.reduce((sum, value) => sum + value, 0) / values.length; +} diff --git a/packages/context/src/scan/relationship-budget.test.ts b/packages/context/src/scan/relationship-budget.test.ts new file mode 100644 index 00000000..d2b1f3f6 --- /dev/null +++ b/packages/context/src/scan/relationship-budget.test.ts @@ -0,0 +1,86 @@ +import { describe, expect, it } from 'vitest'; +import { applyKloRelationshipValidationBudget, defaultKloRelationshipValidationBudget } from './relationship-budget.js'; + +interface Candidate { + id: string; + confidence: number; +} + +describe('relationship validation budget', () => { + it('computes the default validation budget from table count', () => { + expect(defaultKloRelationshipValidationBudget(0)).toBe(0); + expect(defaultKloRelationshipValidationBudget(3)).toBe(6); + expect(defaultKloRelationshipValidationBudget(400)).toBe(800); + expect(defaultKloRelationshipValidationBudget(900)).toBe(1000); + expect(defaultKloRelationshipValidationBudget(-4)).toBe(0); + expect(defaultKloRelationshipValidationBudget(3.8)).toBe(6); + }); + + it('splits candidates by descending score with stable tie ordering', () => { + const result = applyKloRelationshipValidationBudget({ + candidates: [ + { id: 'first', confidence: 0.8 }, + { id: 'second', confidence: 0.9 }, + { id: 'third', confidence: 0.9 }, + { id: 'fourth', confidence: 0.2 }, + ], + tableCount: 100, + budget: 2, + score: (candidate) => candidate.confidence, + }); + + expect(result.effectiveBudget).toBe(2); + expect(result.toValidate.map((entry) => entry.candidate.id)).toEqual(['second', 'third']); + expect(result.deferred.map((entry) => entry.candidate.id)).toEqual(['first', 'fourth']); + expect(result.toValidate.map((entry) => entry.originalIndex)).toEqual([1, 2]); + }); + + it('uses the default budget when the budget is omitted', () => { + const candidates = Array.from({ length: 8 }, (_, index) => ({ + id: `candidate-${index}`, + confidence: 1 - index / 10, + })); + + const result = applyKloRelationshipValidationBudget({ + candidates, + tableCount: 2, + score: (candidate) => candidate.confidence, + }); + + expect(result.effectiveBudget).toBe(4); + expect(result.toValidate).toHaveLength(4); + expect(result.deferred).toHaveLength(4); + }); + + it('treats budget zero as disabling SQL validation', () => { + const result = applyKloRelationshipValidationBudget({ + candidates: [ + { id: 'first', confidence: 1 }, + { id: 'second', confidence: 0.5 }, + ], + tableCount: 10, + budget: 0, + score: (candidate) => candidate.confidence, + }); + + expect(result.effectiveBudget).toBe(0); + expect(result.toValidate).toEqual([]); + expect(result.deferred.map((entry) => entry.candidate.id)).toEqual(['first', 'second']); + }); + + it('treats budget all as validating every candidate', () => { + const result = applyKloRelationshipValidationBudget({ + candidates: [ + { id: 'first', confidence: 0.1 }, + { id: 'second', confidence: 0.9 }, + ], + tableCount: 1, + budget: 'all', + score: (candidate) => candidate.confidence, + }); + + expect(result.effectiveBudget).toBe('all'); + expect(result.toValidate.map((entry) => entry.candidate.id)).toEqual(['first', 'second']); + expect(result.deferred).toEqual([]); + }); +}); diff --git a/packages/context/src/scan/relationship-budget.ts b/packages/context/src/scan/relationship-budget.ts new file mode 100644 index 00000000..b6ddcf0d --- /dev/null +++ b/packages/context/src/scan/relationship-budget.ts @@ -0,0 +1,60 @@ +export type KloRelationshipValidationBudget = number | 'all' | undefined; + +export interface KloRelationshipBudgetedCandidate { + candidate: TCandidate; + originalIndex: number; + score: number; +} + +export interface KloRelationshipValidationBudgetResult { + effectiveBudget: number | 'all'; + toValidate: KloRelationshipBudgetedCandidate[]; + deferred: KloRelationshipBudgetedCandidate[]; +} + +export interface ApplyKloRelationshipValidationBudgetInput { + candidates: readonly TCandidate[]; + tableCount: number; + budget?: KloRelationshipValidationBudget; + score: (candidate: TCandidate) => number; +} + +export function defaultKloRelationshipValidationBudget(tableCount: number): number { + const safeTableCount = Number.isFinite(tableCount) ? Math.max(0, Math.floor(tableCount)) : 0; + return Math.min(2 * safeTableCount, 1000); +} + +export function applyKloRelationshipValidationBudget( + input: ApplyKloRelationshipValidationBudgetInput, +): KloRelationshipValidationBudgetResult { + const ranked = input.candidates + .map((candidate, originalIndex) => ({ + candidate, + originalIndex, + score: input.score(candidate), + })) + .sort((left, right) => { + const scoreDelta = right.score - left.score; + return scoreDelta === 0 ? left.originalIndex - right.originalIndex : scoreDelta; + }); + + if (input.budget === 'all') { + return { + effectiveBudget: 'all', + toValidate: input.candidates.map((candidate, originalIndex) => ({ + candidate, + originalIndex, + score: input.score(candidate), + })), + deferred: [], + }; + } + + const effectiveBudget = input.budget ?? defaultKloRelationshipValidationBudget(input.tableCount); + const safeBudget = Math.max(0, Math.floor(effectiveBudget)); + return { + effectiveBudget: safeBudget, + toValidate: ranked.slice(0, safeBudget), + deferred: ranked.slice(safeBudget), + }; +} diff --git a/packages/context/src/scan/relationship-candidates.test.ts b/packages/context/src/scan/relationship-candidates.test.ts new file mode 100644 index 00000000..c89f029f --- /dev/null +++ b/packages/context/src/scan/relationship-candidates.test.ts @@ -0,0 +1,881 @@ +import { describe, expect, it } from 'vitest'; +import type { KloEnrichedColumn, KloEnrichedSchema, KloEnrichedTable } from './enrichment-types.js'; +import { normalizeKloRelationshipName } from './relationship-name-similarity.js'; +import { + generateKloRelationshipDiscoveryCandidates, + inferKloRelationshipTargetPks, + mergeKloRelationshipDiscoveryCandidates, +} from './relationship-candidates.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; + +function column( + tableId: string, + id: string, + name: string, + options: Partial = {}, +): KloEnrichedColumn { + const tableRef = options.tableRef ?? { catalog: null, db: 'public', name: tableId }; + return { + id, + tableId, + tableRef, + name, + nativeType: options.nativeType ?? 'INTEGER', + normalizedType: options.normalizedType ?? 'integer', + dimensionType: options.dimensionType ?? 'number', + nullable: options.nullable ?? true, + primaryKey: options.primaryKey ?? false, + parentColumnId: options.parentColumnId ?? null, + descriptions: options.descriptions ?? {}, + embedding: options.embedding ?? null, + sampleValues: options.sampleValues ?? null, + cardinality: options.cardinality ?? null, + }; +} + +function table(id: string, name: string, columns: KloEnrichedColumn[]): KloEnrichedTable { + const ref = { catalog: null, db: 'public', name }; + return { + id, + ref, + enabled: true, + descriptions: {}, + columns: columns.map((item) => ({ ...item, tableId: id, tableRef: ref })), + }; +} + +function schema(tables: KloEnrichedTable[]): KloEnrichedSchema { + return { + connectionId: 'warehouse', + tables, + relationships: [], + }; +} + +function planCodeProfiles(): KloRelationshipProfileArtifact { + return { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: true, + queryCount: 0, + tables: [ + { table: { catalog: null, db: 'public', name: 'stg_plans' }, rowCount: 4 }, + { table: { catalog: null, db: 'public', name: 'mart_account_segments' }, rowCount: 4 }, + { table: { catalog: null, db: 'public', name: 'stg_plan_segment_mapping' }, rowCount: 4 }, + ], + warnings: [], + columns: { + 'stg_plans.plan_code': { + table: { catalog: null, db: 'public', name: 'stg_plans' }, + column: 'plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + 'stg_plans.created_at': { + table: { catalog: null, db: 'public', name: 'stg_plans' }, + column: 'created_at', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['2026-05-01', '2026-05-02', '2026-05-03', '2026-05-04'], + minTextLength: 10, + maxTextLength: 10, + }, + 'stg_plans.email': { + table: { catalog: null, db: 'public', name: 'stg_plans' }, + column: 'email', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['a@example.test', 'b@example.test', 'c@example.test', 'd@example.test'], + minTextLength: 14, + maxTextLength: 14, + }, + 'stg_plans.is_deleted': { + table: { catalog: null, db: 'public', name: 'stg_plans' }, + column: 'is_deleted', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['deleted-a', 'deleted-b', 'deleted-c', 'deleted-d'], + minTextLength: 9, + maxTextLength: 9, + }, + 'mart_account_segments.current_plan_code': { + table: { catalog: null, db: 'public', name: 'mart_account_segments' }, + column: 'current_plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + 'mart_account_segments.normalized_plan_code': { + table: { catalog: null, db: 'public', name: 'mart_account_segments' }, + column: 'normalized_plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + 'stg_plan_segment_mapping.canonical_plan_code': { + table: { catalog: null, db: 'public', name: 'stg_plan_segment_mapping' }, + column: 'canonical_plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + 'stg_plans.canonical_plan_code': { + table: { catalog: null, db: 'public', name: 'stg_plans' }, + column: 'canonical_plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + }, + }; +} + +describe('relationship discovery candidates', () => { + it('normalizes warehouse prefixes and emits review candidates without declared primary keys', () => { + const accounts = table('accounts-id', 'dim_accounts', [ + column('accounts-id', 'accounts-id-col', 'id', { primaryKey: false }), + column('accounts-id', 'accounts-name-col', 'account_name', { nativeType: 'TEXT', normalizedType: 'text' }), + ]); + const invoices = table('invoices-id', 'fct_invoices', [ + column('invoices-id', 'invoice-id-col', 'id', { primaryKey: false }), + column('invoices-id', 'account-id-col', 'account_id', { primaryKey: false }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([accounts, invoices])); + + expect(candidates).toHaveLength(1); + expect(candidates[0]).toMatchObject({ + from: { tableId: 'invoices-id', columnIds: ['account-id-col'], columns: ['account_id'] }, + to: { tableId: 'accounts-id', columnIds: ['accounts-id-col'], columns: ['id'] }, + relationshipType: 'many_to_one', + status: 'review', + source: 'normalized_table_match', + evidence: { + sourceColumnBase: 'account', + targetTableBase: 'account', + targetKeyScore: 0.92, + }, + }); + expect(candidates[0]?.confidence).toBeGreaterThanOrEqual(0.8); + expect(candidates[0]?.evidence.signalVector).toMatchObject({ + nameSimilarity: 0.92, + typeCompatibility: 1, + valueOverlap: 0, + embeddingSimilarity: 0, + profileUniqueness: 0.92, + }); + expect(candidates[0]?.evidence.scoreBreakdown?.score).toBe(candidates[0]?.confidence); + expect(candidates[0]?.evidence.scoreBreakdown?.contributions.nameSimilarity).toBeGreaterThan(0); + expect(candidates[0]?.evidence.reasons).toEqual( + expect.arrayContaining(['foreign_key_suffix', 'normalized_table_name', 'target_key_like']), + ); + }); + + it('generates candidates for PascalCase ID columns without declared keys', () => { + const artists = table('artist-id', 'Artist', [ + column('artist-id', 'artist-id-col', 'ArtistId', { primaryKey: false }), + column('artist-id', 'artist-name-col', 'Name', { nativeType: 'TEXT', normalizedType: 'text' }), + ]); + const albums = table('album-id', 'Album', [ + column('album-id', 'album-id-col', 'AlbumId', { primaryKey: false }), + column('album-id', 'artist-id-fk-col', 'ArtistId', { primaryKey: false }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([artists, albums])); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).toEqual(['Album.ArtistId->Artist.ArtistId']); + expect(candidates[0]).toMatchObject({ + source: 'normalized_table_match', + evidence: { + sourceColumnBase: 'artist', + targetTableBase: 'artist', + targetColumnBase: 'artist_id', + targetKeyScore: 0.9, + reasons: expect.arrayContaining(['foreign_key_suffix', 'normalized_table_name', 'target_key_like']), + }, + }); + expect(candidates[0]?.confidence).toBeGreaterThanOrEqual(0.9); + }); + + it('uses the locality cap before scanning parent tables', () => { + const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'accounts-id-col', 'id')]); + const invoices = table('invoices-id', 'invoices', [ + column('invoices-id', 'invoice-id-col', 'id'), + column('invoices-id', 'account-id-col', 'account_id'), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([accounts, invoices]), { + maxCandidateParentTables: 0, + }); + + expect(candidates).toEqual([]); + }); + + it('keeps the nearest parent when the locality cap is one', () => { + const artists = table('artist-id', 'Artist', [ + column('artist-id', 'artist-id-col', 'ArtistId', { primaryKey: false }), + column('artist-id', 'artist-name-col', 'Name', { nativeType: 'TEXT', normalizedType: 'text' }), + ]); + const albums = table('album-id', 'Album', [ + column('album-id', 'album-id-col', 'AlbumId', { primaryKey: false }), + column('album-id', 'artist-id-fk-col', 'ArtistId', { primaryKey: false }), + ]); + const fillerTables = Array.from({ length: 25 }, (_, index) => + table(`filler-${index}`, `WarehouseFiller${index}`, [ + column(`filler-${index}`, `filler-${index}-id`, 'WarehouseFillerId', { primaryKey: false }), + ]), + ); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([albums, ...fillerTables, artists]), { + maxCandidateParentTables: 1, + }); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).toEqual(['Album.ArtistId->Artist.ArtistId']); + }); + + it('uses final table tokens from dotted parent table names', () => { + const customers = table('customer-id', 'SalesLT.Customer', [ + column('customer-id', 'customer-id-col', 'CustomerID', { primaryKey: false }), + column('customer-id', 'customer-name-col', 'CustomerName', { nativeType: 'TEXT', normalizedType: 'text' }), + ]); + const orders = table('order-id', 'SalesLT.SalesOrderHeader', [ + column('order-id', 'order-id-col', 'SalesOrderID', { primaryKey: false }), + column('order-id', 'customer-id-fk-col', 'CustomerID', { primaryKey: false }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([customers, orders])); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).toEqual(['SalesLT.SalesOrderHeader.CustomerID->SalesLT.Customer.CustomerID']); + expect(candidates[0]).toMatchObject({ + evidence: { + sourceColumnBase: 'customer', + targetTableBase: 'sales_lt_customer', + targetColumnBase: 'customer_id', + targetKeyScore: 0.9, + reasons: expect.arrayContaining(['foreign_key_suffix', 'inflection', 'target_key_like']), + }, + }); + }); + + it('emits lower-confidence parent-table-name candidates when the target key name differs from the table name', () => { + const customerAccounts = table('customer-account-id', 'crm.CustomerAccount', [ + column('customer-account-id', 'business-entity-id-col', 'BusinessEntityID', { primaryKey: true }), + column('customer-account-id', 'account-name-col', 'AccountName', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const subscriptions = table('subscriptions-id', 'fct_subscriptions', [ + column('subscriptions-id', 'subscription-id-col', 'SubscriptionID', { primaryKey: false }), + column('subscriptions-id', 'customer-account-id-col', 'CustomerAccountID', { primaryKey: false }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([customerAccounts, subscriptions])); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).toEqual(['fct_subscriptions.CustomerAccountID->crm.CustomerAccount.BusinessEntityID']); + expect(candidates[0]).toMatchObject({ + source: 'parent_table_name_match', + relationshipType: 'many_to_one', + status: 'review', + evidence: { + sourceColumnBase: 'customer_account', + targetTableBase: 'crm_customer_account', + targetColumnBase: 'business_entity_id', + targetKeyScore: 1, + nameScore: 0.82, + reasons: expect.arrayContaining(['foreign_key_suffix', 'parent_table_name_match', 'target_key_like']), + }, + }); + expect(candidates[0]?.evidence.signalVector).toMatchObject({ + nameSimilarity: 0.82, + typeCompatibility: 1, + }); + expect(candidates[0]?.evidence.scoreBreakdown?.score).toBe(candidates[0]?.confidence); + }); + + it('does not emit parent-table-name candidates when the target key type is incompatible', () => { + const customerAccounts = table('customer-account-id', 'crm.CustomerAccount', [ + column('customer-account-id', 'business-entity-id-col', 'BusinessEntityID', { + primaryKey: true, + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const subscriptions = table('subscriptions-id', 'fct_subscriptions', [ + column('subscriptions-id', 'customer-account-id-col', 'CustomerAccountID', { + primaryKey: false, + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([customerAccounts, subscriptions])); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).not.toContain('fct_subscriptions.CustomerAccountID->crm.CustomerAccount.BusinessEntityID'); + }); + + it('does not use parent-table-name matching to create same-table same-column self-links', () => { + const customerAccounts = table('customer-account-id', 'crm.CustomerAccount', [ + column('customer-account-id', 'customer-account-id-col', 'CustomerAccountID', { primaryKey: false }), + column('customer-account-id', 'account-name-col', 'AccountName', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([customerAccounts])); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).not.toContain('crm.CustomerAccount.CustomerAccountID->crm.CustomerAccount.CustomerAccountID'); + }); + + it('uses profile evidence to generate natural-key candidates without id-like target names', () => { + const countries = table('countries-id', 'dim_countries', [ + column('countries-id', 'countries-code-col', 'iso_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('countries-id', 'countries-name-col', 'name', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const accounts = table('accounts-id', 'fct_accounts', [ + column('accounts-id', 'account-id-col', 'id', { primaryKey: false }), + column('accounts-id', 'country-code-col', 'country_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const profiles = { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: true, + queryCount: 0, + tables: [], + warnings: [], + columns: { + 'dim_countries.iso_code': { + table: { catalog: null, db: 'public', name: 'dim_countries' }, + column: 'iso_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['DE', 'FR', 'US'], + minTextLength: 2, + maxTextLength: 2, + }, + 'fct_accounts.country_code': { + table: { catalog: null, db: 'public', name: 'fct_accounts' }, + column: 'country_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 0.75, + nullRate: 0, + sampleValues: ['FR', 'US'], + minTextLength: 2, + maxTextLength: 2, + }, + }, + } satisfies KloRelationshipProfileArtifact; + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([countries, accounts]), { profiles }); + + expect(candidates).toHaveLength(1); + expect(candidates[0]).toMatchObject({ + source: 'profile_match', + from: { tableId: 'accounts-id', columnIds: ['country-code-col'], columns: ['country_code'] }, + to: { tableId: 'countries-id', columnIds: ['countries-code-col'], columns: ['iso_code'] }, + evidence: { + sourceColumnBase: 'country', + targetTableBase: 'country', + targetColumnBase: 'iso_code', + targetKeyScore: 0.86, + }, + }); + expect(candidates[0]?.confidence).toBeGreaterThanOrEqual(0.78); + expect(candidates[0]?.evidence.reasons).toEqual( + expect.arrayContaining([ + 'foreign_key_code_suffix', + 'normalized_table_name', + 'profile_unique_target', + 'profile_sample_overlap', + ]), + ); + }); + + it('drops same-table same-column self-links using ordered endpoint equality', () => { + const accounts = table('accounts-id', 'stg_accounts', [ + column('accounts-id', 'accounts-account-id-col', 'account_id', { primaryKey: false }), + column('accounts-id', 'accounts-name-col', 'account_name', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([accounts])); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).not.toContain('stg_accounts.account_id->stg_accounts.account_id'); + }); + + it('keeps legitimate same-table different-column self-references', () => { + const employees = table('employees-id', 'employees', [ + column('employees-id', 'employees-id-col', 'id', { primaryKey: false }), + column('employees-id', 'employees-parent-id-col', 'parent_id', { primaryKey: false }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([employees])); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).toContain('employees.parent_id->employees.id'); + expect(candidates[0]).toMatchObject({ + source: 'self_reference', + evidence: { + reasons: expect.arrayContaining(['self_reference']), + }, + }); + }); + + it('emits column_suffix_match candidates for relationship-key-shaped trailing target columns', () => { + const plans = table('plans-id', 'stg_plans', [ + column('plans-id', 'plans-plan-code-col', 'plan_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('plans-id', 'plans-canonical-plan-code-col', 'canonical_plan_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('plans-id', 'plans-created-at-col', 'created_at', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('plans-id', 'plans-email-col', 'email', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('plans-id', 'plans-is-deleted-col', 'is_deleted', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const accountSegments = table('account-segments-id', 'mart_account_segments', [ + column('account-segments-id', 'current-plan-code-col', 'current_plan_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('account-segments-id', 'normalized-plan-code-col', 'normalized_plan_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('account-segments-id', 'source-created-at-col', 'source_created_at', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('account-segments-id', 'billing-email-col', 'billing_email', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + column('account-segments-id', 'source-is-deleted-col', 'source_is_deleted', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const mapping = table('mapping-id', 'stg_plan_segment_mapping', [ + column('mapping-id', 'mapping-canonical-plan-code-col', 'canonical_plan_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([plans, accountSegments, mapping]), { + profiles: planCodeProfiles(), + }); + const candidateKeys = candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ); + + expect(candidateKeys).toEqual([ + 'mart_account_segments.current_plan_code->stg_plans.plan_code', + 'mart_account_segments.normalized_plan_code->stg_plans.plan_code', + 'stg_plan_segment_mapping.canonical_plan_code->stg_plans.plan_code', + 'stg_plans.canonical_plan_code->stg_plans.plan_code', + ]); + expect(candidates).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + source: 'column_suffix_match', + confidence: expect.any(Number), + evidence: expect.objectContaining({ + nameScore: 0.78, + targetKeyScore: 0.86, + reasons: expect.arrayContaining(['column_suffix_match', 'profile_unique_target']), + }), + }), + ]), + ); + expect(candidateKeys).not.toContain('mart_account_segments.source_created_at->stg_plans.created_at'); + expect(candidateKeys).not.toContain('mart_account_segments.billing_email->stg_plans.email'); + expect(candidateKeys).not.toContain('mart_account_segments.source_is_deleted->stg_plans.is_deleted'); + const suffixCandidate = candidates.find( + (candidate) => candidate.from.table.name === 'mart_account_segments' && candidate.from.columns[0] === 'current_plan_code', + ); + expect(suffixCandidate?.confidence).toBe(suffixCandidate?.evidence.scoreBreakdown?.score); + expect(suffixCandidate?.evidence.signalVector).toMatchObject({ + nameSimilarity: 0.78, + typeCompatibility: 1, + valueOverlap: 1, + profileUniqueness: 1, + profileNullRate: 1, + }); + }); + + it('does not suffix-match bare single-token targets or incompatible target types', () => { + const users = table('users-id', 'users', [ + column('users-id', 'users-id-col', 'id', { primaryKey: false }), + column('users-id', 'users-account-id-col', 'account_id', { primaryKey: false }), + ]); + const plans = table('plans-id', 'plans', [ + column('plans-id', 'plans-plan-code-col', 'plan_code', { + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + }), + ]); + const accounts = table('accounts-id', 'accounts', [ + column('accounts-id', 'current-plan-code-col', 'current_plan_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const profiles = { + ...planCodeProfiles(), + columns: { + ...planCodeProfiles().columns, + 'users.id': { + table: { catalog: null, db: 'public', name: 'users' }, + column: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 2, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2'], + minTextLength: 1, + maxTextLength: 1, + }, + 'plans.plan_code': { + table: { catalog: null, db: 'public', name: 'plans' }, + column: 'plan_code', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 2, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2'], + minTextLength: 1, + maxTextLength: 1, + }, + }, + } satisfies KloRelationshipProfileArtifact; + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([users, plans, accounts]), { profiles }); + const candidateKeys = candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ); + + expect(candidateKeys).not.toContain('users.account_id->users.id'); + expect(candidateKeys).not.toContain('accounts.current_plan_code->plans.plan_code'); + }); + + it('uses column embeddings as a recall source for non-standard source names', () => { + const customers = table('customers-id', 'customers', [ + column('customers-id', 'customers-id-col', 'id', { + primaryKey: false, + embedding: [1, 0, 0], + }), + column('customers-id', 'customers-name-col', 'name', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + embedding: [0, 1, 0], + }), + ]); + const orders = table('orders-id', 'orders', [ + column('orders-id', 'orders-id-col', 'id', { + primaryKey: false, + embedding: [0, 0, 1], + }), + column('orders-id', 'buyer-ref-col', 'buyer_ref', { + primaryKey: false, + embedding: [0.995, 0.005, 0], + }), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([customers, orders]), { + embeddingSimilarityThreshold: 0.95, + }); + + expect(candidates).toHaveLength(1); + expect(candidates[0]).toMatchObject({ + source: 'embedding_similarity', + from: { tableId: 'orders-id', columnIds: ['buyer-ref-col'], columns: ['buyer_ref'] }, + to: { tableId: 'customers-id', columnIds: ['customers-id-col'], columns: ['id'] }, + relationshipType: 'many_to_one', + status: 'review', + evidence: { + sourceColumnBase: 'buyer_ref', + targetTableBase: 'customer', + targetColumnBase: 'id', + targetKeyScore: 0.92, + embeddingSimilarity: expect.any(Number), + }, + }); + expect(candidates[0]?.confidence).toBeGreaterThanOrEqual(0.9); + expect(candidates[0]?.evidence.reasons).toEqual( + expect.arrayContaining(['embedding_similarity', 'target_key_like']), + ); + }); + + it('singularizes names and caps candidates per source column deterministically', () => { + const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'accounts-id-col', 'id')]); + const archivedAccounts = table('archived-accounts-id', 'accounts_archive', [ + column('archived-accounts-id', 'archived-accounts-id-col', 'id'), + ]); + const events = table('events-id', 'product_events', [ + column('events-id', 'event-id-col', 'id'), + column('events-id', 'account-id-col', 'account_id'), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([events, archivedAccounts, accounts]), { + maxCandidatesPerColumn: 1, + }); + + expect( + candidates.map( + (candidate) => + `${candidate.from.table.name}.${candidate.from.columns[0]}->${candidate.to.table.name}.${candidate.to.columns[0]}`, + ), + ).toEqual(['product_events.account_id->accounts.id']); + }); + + it('infers target primary-key candidates from incoming review links', () => { + const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'accounts-id-col', 'id')]); + const users = table('users-id', 'users', [column('users-id', 'users-id-col', 'id')]); + const events = table('events-id', 'product_events', [ + column('events-id', 'event-id-col', 'id'), + column('events-id', 'account-id-col', 'account_id'), + column('events-id', 'user-id-col', 'user_id'), + ]); + + const candidates = generateKloRelationshipDiscoveryCandidates(schema([accounts, users, events])); + const inferredPks = inferKloRelationshipTargetPks(candidates); + + expect(inferredPks).toEqual([ + { + table: 'accounts', + columns: ['id'], + score: expect.any(Number), + status: 'review', + incomingCandidateCount: 1, + }, + { + table: 'users', + columns: ['id'], + score: expect.any(Number), + status: 'review', + incomingCandidateCount: 1, + }, + ]); + expect(inferredPks.every((pk) => pk.score >= 0.8)).toBe(true); + }); + + it('does not generate candidates from primary-key source columns or incompatible target types', () => { + const accounts = table('accounts-id', 'accounts', [ + column('accounts-id', 'accounts-id-col', 'id', { nativeType: 'TEXT', normalizedType: 'text' }), + ]); + const invoices = table('invoices-id', 'invoices', [ + column('invoices-id', 'invoice-id-col', 'id', { primaryKey: true }), + column('invoices-id', 'account-id-col', 'account_id', { nativeType: 'INTEGER', normalizedType: 'integer' }), + ]); + + expect(generateKloRelationshipDiscoveryCandidates(schema([accounts, invoices]))).toEqual([]); + }); + + it('normalizes layer prefixes, punctuation, plural forms, and non-plural trailing s words', () => { + expect(normalizeKloRelationshipName('mart__Sales_Accounts')).toMatchObject({ + normalized: 'sales_accounts', + singular: 'sales_account', + tokens: ['sales', 'accounts'], + }); + expect(normalizeKloRelationshipName('dim_users')).toMatchObject({ + normalized: 'users', + singular: 'user', + tokens: ['users'], + }); + expect(normalizeKloRelationshipName('Address')).toMatchObject({ + normalized: 'address', + singular: 'address', + plural: 'addresses', + tokens: ['address'], + }); + }); + + it('merges duplicate deterministic and LLM proposal candidates without losing LLM rationale', () => { + const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'accounts-id-col', 'id')]); + const invoices = table('invoices-id', 'invoices', [column('invoices-id', 'account-id-col', 'account_id')]); + const [deterministic] = generateKloRelationshipDiscoveryCandidates(schema([accounts, invoices])); + if (!deterministic) { + throw new Error('Expected deterministic relationship candidate'); + } + const llmCandidate = { + ...deterministic, + confidence: 0.99, + source: 'llm_proposal' as const, + evidence: { + ...deterministic.evidence, + reasons: ['llm_proposal', 'llm_pk_proposal'], + llmConfidence: 0.89, + llmRationale: 'Invoices point at the owning account dimension.', + }, + }; + + const merged = mergeKloRelationshipDiscoveryCandidates([deterministic, llmCandidate]); + + expect(merged).toHaveLength(1); + expect(merged[0]).toMatchObject({ + id: deterministic.id, + source: 'normalized_table_match', + confidence: 0.99, + evidence: { + llmConfidence: 0.89, + llmRationale: 'Invoices point at the owning account dimension.', + }, + }); + expect(merged[0]?.evidence.reasons).toEqual( + expect.arrayContaining(['foreign_key_suffix', 'normalized_table_name', 'target_key_like', 'llm_proposal']), + ); + }); +}); diff --git a/packages/context/src/scan/relationship-candidates.ts b/packages/context/src/scan/relationship-candidates.ts new file mode 100644 index 00000000..884b8580 --- /dev/null +++ b/packages/context/src/scan/relationship-candidates.ts @@ -0,0 +1,756 @@ +import type { + KloEnrichedColumn, + KloEnrichedSchema, + KloEnrichedTable, + KloRelationshipEndpoint, + KloRelationshipType, +} from './enrichment-types.js'; +import { localCandidateTables } from './relationship-locality.js'; +import { + normalizeKloRelationshipName, + pluralizeKloRelationshipToken, + singularizeKloRelationshipToken, +} from './relationship-name-similarity.js'; +export type { KloRelationshipNormalizedName } from './relationship-name-similarity.js'; +export { normalizeKloRelationshipName } from './relationship-name-similarity.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import { + scoreKloRelationshipCandidate, + type KloRelationshipScoreBreakdown, + type KloRelationshipSignalVector, +} from './relationship-scoring.js'; + +export type KloRelationshipDiscoveryCandidateSource = + | 'exact_column_match' + | 'normalized_table_match' + | 'parent_table_name_match' + | 'inflection' + | 'self_reference' + | 'profile_match' + | 'column_suffix_match' + | 'embedding_similarity' + | 'llm_proposal'; + +export type KloRelationshipDiscoveryCandidateStatus = 'review'; + +export interface KloRelationshipDiscoveryCandidateEvidence { + sourceColumnBase: string; + targetTableBase: string; + targetColumnBase: string; + targetKeyScore: number; + nameScore: number; + reasons: string[]; + signalVector?: KloRelationshipSignalVector; + scoreBreakdown?: KloRelationshipScoreBreakdown; + embeddingSimilarity?: number; + llmConfidence?: number; + llmRationale?: string; +} + +export interface KloRelationshipDiscoveryCandidate { + id: string; + from: KloRelationshipEndpoint; + to: KloRelationshipEndpoint; + relationshipType: KloRelationshipType; + confidence: number; + source: KloRelationshipDiscoveryCandidateSource; + status: KloRelationshipDiscoveryCandidateStatus; + evidence: KloRelationshipDiscoveryCandidateEvidence; +} + +export interface KloRelationshipDiscoveryCandidateOptions { + maxCandidatesPerColumn?: number; + maxCandidateParentTables?: number; + maxEmbeddingCandidatesPerColumn?: number; + minConfidence?: number; + embeddingSimilarityThreshold?: number; + useEmbeddings?: boolean; + profiles?: KloRelationshipProfileArtifact; +} + +export interface KloRelationshipInferredTargetPk { + table: string; + columns: string[]; + score: number; + status: 'review'; + incomingCandidateCount: number; +} + +interface KloRelationshipSourceColumnReference { + base: string; + reason: string; +} + +interface KloRelationshipTargetKeyEvidence { + score: number; + reasons: string[]; +} + +const INTEGER_TYPES = new Set(['integer', 'int', 'bigint', 'smallint', 'tinyint', 'int4', 'int8', 'number']); +const STRING_TYPES = new Set(['text', 'varchar', 'character varying', 'char', 'character', 'string']); +const UUID_TYPES = new Set(['uuid', 'uniqueidentifier']); +const SELF_REFERENCE_NAMES = new Set(['parent_id', 'manager_id', 'reported_to_id', 'supervisor_id', 'reports_to_id']); +const REFERENCE_SUFFIXES: Array<{ suffix: string; reason: string }> = [ + { suffix: '_id', reason: 'foreign_key_suffix' }, + { suffix: '_key', reason: 'foreign_key_key_suffix' }, + { suffix: '_code', reason: 'foreign_key_code_suffix' }, + { suffix: '_uuid', reason: 'foreign_key_uuid_suffix' }, +]; +const RELATIONSHIP_KEY_TARGET_SUFFIXES = ['_id', '_key', '_code', '_uuid'] as const; + +function isRelationshipKeyShapedTarget(column: KloEnrichedColumn): boolean { + const normalized = normalizeKloRelationshipName(column.name); + return ( + normalized.tokens.length >= 2 && + RELATIONSHIP_KEY_TARGET_SUFFIXES.some((suffix) => normalized.normalized.endsWith(suffix)) + ); +} + +function columnSuffixMatchesTarget(input: { fromColumn: KloEnrichedColumn; toColumn: KloEnrichedColumn }): boolean { + const source = normalizeKloRelationshipName(input.fromColumn.name).normalized; + const target = normalizeKloRelationshipName(input.toColumn.name).normalized; + return source !== target && target.length > 0 && source.endsWith(`_${target}`); +} + +function normalizeType(column: KloEnrichedColumn): string { + const rawType = (column.normalizedType || column.nativeType || '').toLowerCase().trim(); + return rawType.includes('(') ? (rawType.split('(')[0] ?? '') : rawType; +} + +function typesCompatible(left: KloEnrichedColumn, right: KloEnrichedColumn): boolean { + const leftType = normalizeType(left); + const rightType = normalizeType(right); + if (leftType === rightType) { + return true; + } + if (INTEGER_TYPES.has(leftType) && INTEGER_TYPES.has(rightType)) { + return true; + } + if (STRING_TYPES.has(leftType) && STRING_TYPES.has(rightType)) { + return true; + } + return UUID_TYPES.has(leftType) && UUID_TYPES.has(rightType); +} + +function cosineSimilarity(left: readonly number[] | null, right: readonly number[] | null): number { + if (!left || !right || left.length === 0 || left.length !== right.length) { + return 0; + } + + let dot = 0; + let leftMagnitude = 0; + let rightMagnitude = 0; + for (let index = 0; index < left.length; index += 1) { + const leftValue = left[index] ?? 0; + const rightValue = right[index] ?? 0; + dot += leftValue * rightValue; + leftMagnitude += leftValue * leftValue; + rightMagnitude += rightValue * rightValue; + } + + if (leftMagnitude === 0 || rightMagnitude === 0) { + return 0; + } + + return dot / (Math.sqrt(leftMagnitude) * Math.sqrt(rightMagnitude)); +} + +function hasUsableEmbedding(column: KloEnrichedColumn): boolean { + return Array.isArray(column.embedding) && column.embedding.length > 0; +} + +function sourceColumnReference(column: KloEnrichedColumn): KloRelationshipSourceColumnReference | null { + const normalized = normalizeKloRelationshipName(column.name); + if (SELF_REFERENCE_NAMES.has(normalized.normalized)) { + return { base: normalized.normalized.replace(/_id$/u, ''), reason: 'foreign_key_suffix' }; + } + + for (const item of REFERENCE_SUFFIXES) { + if (!normalized.normalized.endsWith(item.suffix)) { + continue; + } + const base = normalized.normalized.slice(0, -item.suffix.length); + if (base.length > 1) { + return { base: singularizeKloRelationshipToken(base), reason: item.reason }; + } + } + + return null; +} + +function addNormalizedTableAlias(aliases: Set, name: string): void { + const normalized = normalizeKloRelationshipName(name); + if (normalized.normalized.length > 0) { + aliases.add(normalized.normalized); + } + if (normalized.singular.length > 0) { + aliases.add(normalized.singular); + } + if (normalized.plural.length > 0) { + aliases.add(normalized.plural); + } +} + +function tableAliases(table: KloEnrichedTable): Set { + const normalized = normalizeKloRelationshipName(table.ref.name); + const aliases = new Set([normalized.normalized, normalized.singular, normalized.plural]); + if (normalized.tokens.length > 1) { + const lastToken = normalized.tokens[normalized.tokens.length - 1]; + if (lastToken) { + aliases.add(lastToken); + const singularLastToken = singularizeKloRelationshipToken(lastToken); + aliases.add(singularLastToken); + aliases.add(pluralizeKloRelationshipToken(singularLastToken)); + } + } + return aliases; +} + +function finalTableNamePart(table: KloEnrichedTable): string { + const parts = table.ref.name.split(/[^\p{L}\p{N}]+/u).filter(Boolean); + return parts[parts.length - 1] ?? table.ref.name; +} + +function parentTableNameAliases(table: KloEnrichedTable): Set { + const aliases = tableAliases(table); + addNormalizedTableAlias(aliases, finalTableNamePart(table)); + return aliases; +} + +function targetKeyScore(table: KloEnrichedTable, column: KloEnrichedColumn): number { + const columnName = normalizeKloRelationshipName(column.name).normalized; + const tableKeyBases = parentTableNameAliases(table); + if (column.primaryKey) { + return 1; + } + if (columnName === 'id') { + return 0.92; + } + if (Array.from(tableKeyBases).some((tableKeyBase) => columnName === `${tableKeyBase}_id`)) { + return 0.9; + } + if (Array.from(tableKeyBases).some((tableKeyBase) => columnName === `${tableKeyBase}_key`)) { + return 0.82; + } + if (columnName === 'key' || columnName === 'uuid') { + return 0.74; + } + return 0; +} + +function profileColumn( + profiles: KloRelationshipProfileArtifact | undefined, + tableName: string, + columnName: string, +) { + return profiles?.columns[`${tableName}.${columnName}`] ?? null; +} + +function profileSampleOverlap(input: { + profiles: KloRelationshipProfileArtifact | undefined; + fromTable: KloEnrichedTable; + fromColumn: KloEnrichedColumn; + toTable: KloEnrichedTable; + toColumn: KloEnrichedColumn; +}): number { + const source = profileColumn(input.profiles, input.fromTable.ref.name, input.fromColumn.name); + const target = profileColumn(input.profiles, input.toTable.ref.name, input.toColumn.name); + if (!source || !target || source.sampleValues.length === 0 || target.sampleValues.length === 0) { + return 0; + } + const targetValues = new Set(target.sampleValues.map((value) => value.toLowerCase())); + const overlap = source.sampleValues.filter((value) => targetValues.has(value.toLowerCase())).length; + return overlap / source.sampleValues.length; +} + +function tableProfileRowCount(profiles: KloRelationshipProfileArtifact | undefined, tableName: string): number | null { + return profiles?.tables.find((table) => table.table.name === tableName)?.rowCount ?? null; +} + +function structuralPriorScore(input: { + profiles: KloRelationshipProfileArtifact | undefined; + fromTable: KloEnrichedTable; + toTable: KloEnrichedTable; +}): number { + if (input.fromTable.id === input.toTable.id) { + return 0.72; + } + + const sourceRows = tableProfileRowCount(input.profiles, input.fromTable.ref.name); + const targetRows = tableProfileRowCount(input.profiles, input.toTable.ref.name); + if (sourceRows === null || targetRows === null || sourceRows <= 0 || targetRows <= 0) { + return 0.5; + } + + const ratio = targetRows / sourceRows; + if (ratio >= 0.05 && ratio <= 20) { + return 0.7; + } + return 0.4; +} + +function candidateSignalVector(input: { + profiles: KloRelationshipProfileArtifact | undefined; + fromTable: KloEnrichedTable; + fromColumn: KloEnrichedColumn; + toTable: KloEnrichedTable; + toColumn: KloEnrichedColumn; + targetKeyScore: number; + nameScore: number; + valueOverlap: number; + embeddingSimilarity?: number; +}): KloRelationshipSignalVector { + const sourceProfile = profileColumn(input.profiles, input.fromTable.ref.name, input.fromColumn.name); + const targetProfile = profileColumn(input.profiles, input.toTable.ref.name, input.toColumn.name); + const targetUniqueness = targetProfile?.uniquenessRatio ?? input.targetKeyScore; + const sourceNonNullness = sourceProfile ? 1 - sourceProfile.nullRate : 0.5; + + return { + nameSimilarity: input.nameScore, + typeCompatibility: typesCompatible(input.fromColumn, input.toColumn) ? 1 : 0, + valueOverlap: input.valueOverlap, + embeddingSimilarity: input.embeddingSimilarity ?? 0, + profileUniqueness: targetUniqueness, + profileNullRate: sourceNonNullness, + structuralPrior: structuralPriorScore({ + profiles: input.profiles, + fromTable: input.fromTable, + toTable: input.toTable, + }), + }; +} + +function candidateParentTables(input: { + tables: readonly KloEnrichedTable[]; + fromTable: KloEnrichedTable; + fromColumn: KloEnrichedColumn; + options: KloRelationshipDiscoveryCandidateOptions; +}): KloEnrichedTable[] { + const maxParentTables = input.options.maxCandidateParentTables ?? 20; + if (maxParentTables <= 0) { + return []; + } + + const ranked = localCandidateTables({ + childTable: input.fromTable, + childColumn: input.fromColumn, + parentTables: input.tables, + maxParentTables, + }).map((item) => item.table); + + const normalizedColumn = normalizeKloRelationshipName(input.fromColumn.name).normalized; + if (!SELF_REFERENCE_NAMES.has(normalizedColumn) || ranked.some((table) => table.id === input.fromTable.id)) { + return ranked; + } + + return [ + input.fromTable, + ...ranked.filter((table) => table.id !== input.fromTable.id).slice(0, Math.max(0, maxParentTables - 1)), + ]; +} + +function targetKeyEvidence( + table: KloEnrichedTable, + column: KloEnrichedColumn, + profiles: KloRelationshipProfileArtifact | undefined, +): KloRelationshipTargetKeyEvidence { + const deterministicScore = targetKeyScore(table, column); + if (deterministicScore > 0) { + return { score: deterministicScore, reasons: ['target_key_like'] }; + } + + const profile = profileColumn(profiles, table.ref.name, column.name); + if (!profile || profile.uniquenessRatio < 0.98 || profile.nullRate > 0.05) { + return { score: 0, reasons: [] }; + } + + const columnName = normalizeKloRelationshipName(column.name).normalized; + if (columnName === 'code' || columnName.endsWith('_code') || columnName === 'key' || columnName.endsWith('_key')) { + return { score: 0.86, reasons: ['profile_unique_target'] }; + } + + return { score: 0.78, reasons: ['profile_unique_target'] }; +} + +function endpoint(table: KloEnrichedTable, column: KloEnrichedColumn): KloRelationshipEndpoint { + return { + tableId: table.id, + columnIds: [column.id], + table: table.ref, + columns: [column.name], + }; +} + +function relationshipId(from: KloRelationshipEndpoint, to: KloRelationshipEndpoint): string { + return `${from.tableId}:(${from.columnIds.join(',')})->${to.tableId}:(${to.columnIds.join(',')})`; +} + +function endpointsHaveSameOrderedColumns(left: KloRelationshipEndpoint, right: KloRelationshipEndpoint): boolean { + if (left.columnIds.length !== right.columnIds.length || left.columns.length !== right.columns.length) { + return false; + } + return left.columnIds.every( + (columnId, index) => columnId === right.columnIds[index] && left.columns[index] === right.columns[index], + ); +} + +function isDegenerateSameColumnSelfLink(candidate: Pick): boolean { + return candidate.from.tableId === candidate.to.tableId && endpointsHaveSameOrderedColumns(candidate.from, candidate.to); +} + +function singleRelationshipColumn(endpointValue: KloRelationshipEndpoint): string { + const column = endpointValue.columns[0]; + if (!column) { + throw new Error(`Expected relationship endpoint ${endpointValue.table.name} to contain one column`); + } + return column; +} + +function candidateSortKey(candidate: KloRelationshipDiscoveryCandidate): string { + return `${candidate.from.table.name}.${singleRelationshipColumn(candidate.from)}->${candidate.to.table.name}.${singleRelationshipColumn(candidate.to)}`; +} + +function uniqueReasons(values: readonly string[]): string[] { + return Array.from(new Set(values.filter((value) => value.trim().length > 0))); +} + +function mergeCandidateEvidence( + left: KloRelationshipDiscoveryCandidate, + right: KloRelationshipDiscoveryCandidate, +): KloRelationshipDiscoveryCandidate { + const preferred = right.confidence > left.confidence && left.source === 'llm_proposal' ? right : left; + const supplement = preferred === left ? right : left; + return { + ...preferred, + confidence: Math.max(left.confidence, right.confidence), + evidence: { + ...preferred.evidence, + llmConfidence: preferred.evidence.llmConfidence ?? supplement.evidence.llmConfidence, + llmRationale: preferred.evidence.llmRationale ?? supplement.evidence.llmRationale, + reasons: uniqueReasons([...preferred.evidence.reasons, ...supplement.evidence.reasons]), + }, + }; +} + +function sourceForEvidence(reasons: string[]): KloRelationshipDiscoveryCandidateSource { + if (reasons.includes('self_reference')) { + return 'self_reference'; + } + if (reasons.includes('embedding_similarity')) { + return 'embedding_similarity'; + } + if (reasons.includes('column_suffix_match')) { + return 'column_suffix_match'; + } + if (reasons.includes('parent_table_name_match')) { + return 'parent_table_name_match'; + } + if (reasons.includes('profile_sample_overlap') || reasons.includes('profile_unique_target')) { + return 'profile_match'; + } + if (reasons.includes('normalized_table_name')) { + return 'normalized_table_match'; + } + if (reasons.includes('exact_column_name')) { + return 'exact_column_match'; + } + if (reasons.includes('inflection')) { + return 'inflection'; + } + return 'normalized_table_match'; +} + +function createCandidate(input: { + fromTable: KloEnrichedTable; + fromColumn: KloEnrichedColumn; + toTable: KloEnrichedTable; + toColumn: KloEnrichedColumn; + sourceBase: string; + targetBase: string; + targetKeyScore: number; + nameScore: number; + reasons: string[]; + profiles: KloRelationshipProfileArtifact | undefined; + valueOverlap: number; + embeddingSimilarity?: number; +}): KloRelationshipDiscoveryCandidate { + const from = endpoint(input.fromTable, input.fromColumn); + const to = endpoint(input.toTable, input.toColumn); + const signalVector = candidateSignalVector({ + profiles: input.profiles, + fromTable: input.fromTable, + fromColumn: input.fromColumn, + toTable: input.toTable, + toColumn: input.toColumn, + targetKeyScore: input.targetKeyScore, + nameScore: input.nameScore, + valueOverlap: input.valueOverlap, + embeddingSimilarity: input.embeddingSimilarity, + }); + const scoreBreakdown = scoreKloRelationshipCandidate(signalVector); + + return { + id: relationshipId(from, to), + from, + to, + relationshipType: 'many_to_one', + confidence: scoreBreakdown.score, + source: sourceForEvidence(input.reasons), + status: 'review', + evidence: { + sourceColumnBase: input.sourceBase, + targetTableBase: input.targetBase, + targetColumnBase: normalizeKloRelationshipName(input.toColumn.name).normalized, + targetKeyScore: input.targetKeyScore, + nameScore: input.nameScore, + reasons: input.reasons, + signalVector, + scoreBreakdown, + ...(input.embeddingSimilarity === undefined + ? {} + : { embeddingSimilarity: Number(input.embeddingSimilarity.toFixed(3)) }), + }, + }; +} + +function generateKloEmbeddingRelationshipCandidates( + schema: KloEnrichedSchema, + options: KloRelationshipDiscoveryCandidateOptions, +): KloRelationshipDiscoveryCandidate[] { + if (options.useEmbeddings === false) { + return []; + } + + const threshold = options.embeddingSimilarityThreshold ?? 0.92; + const maxCandidatesPerColumn = options.maxEmbeddingCandidatesPerColumn ?? options.maxCandidatesPerColumn ?? 25; + const tables = schema.tables.filter((table) => table.enabled); + const candidates: KloRelationshipDiscoveryCandidate[] = []; + + for (const fromTable of tables) { + for (const fromColumn of fromTable.columns) { + if (fromColumn.primaryKey || !hasUsableEmbedding(fromColumn)) { + continue; + } + + const columnCandidates: KloRelationshipDiscoveryCandidate[] = []; + for (const toTable of candidateParentTables({ tables, fromTable, fromColumn, options })) { + if (fromTable.id === toTable.id) { + continue; + } + + for (const toColumn of toTable.columns) { + if (!hasUsableEmbedding(toColumn) || !typesCompatible(fromColumn, toColumn)) { + continue; + } + + const keyEvidence = targetKeyEvidence(toTable, toColumn, options.profiles); + if (keyEvidence.score === 0) { + continue; + } + + const similarity = cosineSimilarity(fromColumn.embedding, toColumn.embedding); + if (similarity < threshold) { + continue; + } + + const sourceBase = normalizeKloRelationshipName(fromColumn.name).normalized; + const targetBase = normalizeKloRelationshipName(toTable.ref.name).singular; + const reasons = ['embedding_similarity', ...keyEvidence.reasons]; + const candidate = createCandidate({ + fromTable, + fromColumn, + toTable, + toColumn, + sourceBase, + targetBase, + targetKeyScore: keyEvidence.score, + nameScore: similarity, + reasons, + profiles: options.profiles, + valueOverlap: profileSampleOverlap({ + profiles: options.profiles, + fromTable, + fromColumn, + toTable, + toColumn, + }), + embeddingSimilarity: similarity, + }); + if (candidate.confidence >= (options.minConfidence ?? 0.72) && !isDegenerateSameColumnSelfLink(candidate)) { + columnCandidates.push(candidate); + } + } + } + + columnCandidates.sort( + (left, right) => right.confidence - left.confidence || candidateSortKey(left).localeCompare(candidateSortKey(right)), + ); + candidates.push(...columnCandidates.slice(0, maxCandidatesPerColumn)); + } + } + + return candidates; +} + +export function generateKloRelationshipDiscoveryCandidates( + schema: KloEnrichedSchema, + options: KloRelationshipDiscoveryCandidateOptions = {}, +): KloRelationshipDiscoveryCandidate[] { + const maxCandidatesPerColumn = options.maxCandidatesPerColumn ?? 25; + const minConfidence = options.minConfidence ?? 0.72; + const tables = schema.tables.filter((table) => table.enabled); + const candidates: KloRelationshipDiscoveryCandidate[] = []; + + for (const fromTable of tables) { + for (const fromColumn of fromTable.columns) { + if (fromColumn.primaryKey) { + continue; + } + const sourceReference = sourceColumnReference(fromColumn); + if (!sourceReference) { + continue; + } + const sourceBase = sourceReference.base; + + const columnCandidates: KloRelationshipDiscoveryCandidate[] = []; + for (const toTable of candidateParentTables({ tables, fromTable, fromColumn, options })) { + const strictAliases = tableAliases(toTable); + const parentAliases = parentTableNameAliases(toTable); + const targetBase = normalizeKloRelationshipName(toTable.ref.name).singular; + const sameTable = fromTable.id === toTable.id; + const nameMatchesTarget = strictAliases.has(sourceBase); + const parentTableNameMatcher = !sameTable && !nameMatchesTarget && parentAliases.has(sourceBase); + const selfReference = sameTable && SELF_REFERENCE_NAMES.has(normalizeKloRelationshipName(fromColumn.name).normalized); + const strictTableMatcher = (!sameTable && nameMatchesTarget) || selfReference; + + for (const toColumn of toTable.columns) { + const keyEvidence = targetKeyEvidence(toTable, toColumn, options.profiles); + if (keyEvidence.score === 0 || !typesCompatible(fromColumn, toColumn)) { + continue; + } + + const suffixMatcher = + !strictTableMatcher && + !parentTableNameMatcher && + columnSuffixMatchesTarget({ fromColumn, toColumn }) && + isRelationshipKeyShapedTarget(toColumn); + if (!strictTableMatcher && !suffixMatcher && !parentTableNameMatcher) { + continue; + } + + const overlap = profileSampleOverlap({ + profiles: options.profiles, + fromTable, + fromColumn, + toTable, + toColumn, + }); + if ( + (strictTableMatcher || parentTableNameMatcher) && + keyEvidence.reasons.includes('profile_unique_target') && + overlap === 0 + ) { + continue; + } + const reasons = suffixMatcher + ? ['column_suffix_match', ...keyEvidence.reasons] + : [sourceReference.reason, ...keyEvidence.reasons]; + if (overlap > 0) { + reasons.push('profile_sample_overlap'); + } + let nameScore = suffixMatcher ? 0.78 : 0.88; + if (parentTableNameMatcher) { + reasons.push('parent_table_name_match'); + nameScore = 0.82; + } else if (selfReference) { + reasons.push('self_reference'); + nameScore = 0.82; + } else if (!suffixMatcher && normalizeKloRelationshipName(toTable.ref.name).singular === sourceBase) { + reasons.push('normalized_table_name'); + nameScore = 0.92; + } else if (!suffixMatcher && strictAliases.has(sourceBase)) { + reasons.push('inflection'); + nameScore = 0.88; + } + if ( + !suffixMatcher && + !parentTableNameMatcher && + normalizeKloRelationshipName(fromColumn.name).normalized === normalizeKloRelationshipName(toColumn.name).normalized + ) { + reasons.push('exact_column_name'); + nameScore = Math.max(nameScore, 0.9); + } + + const candidate = createCandidate({ + fromTable, + fromColumn, + toTable, + toColumn, + sourceBase, + targetBase, + targetKeyScore: keyEvidence.score, + nameScore, + reasons, + profiles: options.profiles, + valueOverlap: overlap, + }); + if (candidate.confidence >= minConfidence && !isDegenerateSameColumnSelfLink(candidate)) { + columnCandidates.push(candidate); + } + } + } + + columnCandidates.sort( + (left, right) => right.confidence - left.confidence || candidateSortKey(left).localeCompare(candidateSortKey(right)), + ); + candidates.push(...columnCandidates.slice(0, maxCandidatesPerColumn)); + } + } + + candidates.push(...generateKloEmbeddingRelationshipCandidates(schema, options)); + + const byId = new Map(); + for (const candidate of candidates) { + const existing = byId.get(candidate.id); + if (!existing || candidate.confidence > existing.confidence) { + byId.set(candidate.id, candidate); + } + } + return Array.from(byId.values()).sort( + (left, right) => right.confidence - left.confidence || candidateSortKey(left).localeCompare(candidateSortKey(right)), + ); +} + +export function mergeKloRelationshipDiscoveryCandidates( + candidates: readonly KloRelationshipDiscoveryCandidate[], +): KloRelationshipDiscoveryCandidate[] { + const byId = new Map(); + for (const candidate of candidates) { + const existing = byId.get(candidate.id); + byId.set(candidate.id, existing ? mergeCandidateEvidence(existing, candidate) : candidate); + } + return Array.from(byId.values()).sort((left, right) => candidateSortKey(left).localeCompare(candidateSortKey(right))); +} + +export function inferKloRelationshipTargetPks( + candidates: readonly KloRelationshipDiscoveryCandidate[], +): KloRelationshipInferredTargetPk[] { + const incoming = new Map(); + for (const candidate of candidates) { + const toColumn = singleRelationshipColumn(candidate.to); + const key = `${candidate.to.table.name}.${toColumn}`; + const item = incoming.get(key) ?? { table: candidate.to.table.name, column: toColumn, scores: [] }; + item.scores.push(candidate.confidence); + incoming.set(key, item); + } + + return Array.from(incoming.values()) + .map((item) => ({ + table: item.table, + columns: [item.column], + score: Number(Math.min(0.95, Math.max(...item.scores)).toFixed(3)), + status: 'review' as const, + incomingCandidateCount: item.scores.length, + })) + .sort((left, right) => left.table.localeCompare(right.table) || left.columns[0]!.localeCompare(right.columns[0]!)); +} diff --git a/packages/context/src/scan/relationship-composite-candidates.test.ts b/packages/context/src/scan/relationship-composite-candidates.test.ts new file mode 100644 index 00000000..dbd5037f --- /dev/null +++ b/packages/context/src/scan/relationship-composite-candidates.test.ts @@ -0,0 +1,84 @@ +import Database from 'better-sqlite3'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { snapshotToKloEnrichedSchema } from './local-enrichment.js'; +import { loadKloRelationshipBenchmarkFixture, maskKloRelationshipBenchmarkSnapshot } from './relationship-benchmarks.js'; +import { discoverKloCompositeRelationships } from './relationship-composite-candidates.js'; +import { profileKloRelationshipSchema, type KloRelationshipReadOnlyExecutor } from './relationship-profiling.js'; +import type { KloQueryResult, KloReadOnlyQueryInput, KloScanContext } from './types.js'; + +class TestSqliteExecutor implements KloRelationshipReadOnlyExecutor { + private readonly db: Database.Database; + + constructor(dataPath: string) { + this.db = new Database(dataPath, { readonly: true, fileMustExist: true }); + } + + async executeReadOnly(input: KloReadOnlyQueryInput, _ctx: KloScanContext): Promise { + const rows = this.db.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return { + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }; + } + + close(): void { + this.db.close(); + } +} + +describe('composite relationship discovery detector', () => { + it('infers composite primary keys and validates composite foreign keys from row evidence', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture( + join(fixtureRoot.pathname, 'composite_keys_no_declared_constraints'), + ); + const snapshot = maskKloRelationshipBenchmarkSnapshot(fixture.snapshot, 'declared_pks_and_declared_fks_removed'); + const schema = snapshotToKloEnrichedSchema(snapshot, new Map()); + const executor = new TestSqliteExecutor(fixture.dataPath ?? ''); + const profiles = await profileKloRelationshipSchema({ + connectionId: snapshot.connectionId, + driver: snapshot.driver, + schema, + executor, + ctx: { runId: 'test:composite-profile' }, + }); + + const result = await discoverKloCompositeRelationships({ + connectionId: snapshot.connectionId, + driver: snapshot.driver, + schema, + profiles, + executor, + ctx: { runId: 'test:composite-detect' }, + }); + executor.close(); + + expect(result.primaryKeys.map((item) => `${item.table.name}.(${item.columns.join(',')})`)).toEqual([ + 'order_line_allocations.(order_id,line_number,warehouse_code)', + 'order_lines.(order_id,line_number)', + ]); + expect( + result.relationships.map( + (item) => + `${item.from.table.name}.(${item.from.columns.join(',')})->${item.to.table.name}.(${item.to.columns.join(',')})`, + ), + ).toEqual(['order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)']); + expect(result.relationships[0]).toMatchObject({ + relationshipType: 'many_to_one', + status: 'accepted', + confidence: 0.95, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + reasons: ['composite_validation_passed'], + }, + }); + expect(result.queryCount).toBeGreaterThan(0); + }); +}); diff --git a/packages/context/src/scan/relationship-composite-candidates.ts b/packages/context/src/scan/relationship-composite-candidates.ts new file mode 100644 index 00000000..fa1d9bae --- /dev/null +++ b/packages/context/src/scan/relationship-composite-candidates.ts @@ -0,0 +1,622 @@ +import type { KloEnrichedColumn, KloEnrichedSchema, KloEnrichedTable, KloRelationshipType } from './enrichment-types.js'; +import { + formatKloRelationshipTableRef, + quoteKloRelationshipIdentifier, + type KloRelationshipProfileArtifact, + type KloRelationshipReadOnlyExecutor, +} from './relationship-profiling.js'; +import type { KloConnectionDriver, KloQueryResult, KloScanContext, KloTableRef } from './types.js'; + +export type KloCompositeRelationshipStatus = 'accepted' | 'review' | 'rejected'; + +export interface KloCompositeRelationshipTupleEndpoint { + tableId: string; + columnIds: string[]; + table: KloTableRef; + columns: string[]; +} + +export interface KloCompositePrimaryKeyCandidate { + id: string; + tableId: string; + table: KloTableRef; + columns: string[]; + columnIds: string[]; + score: number; + status: KloCompositeRelationshipStatus; + evidence: { + rowCount: number; + distinctCount: number; + uniquenessRatio: number; + nullRate: number; + reasons: string[]; + }; +} + +export interface KloCompositeRelationshipValidationEvidence { + targetUniqueness: number; + sourceCoverage: number; + violationCount: number; + violationRatio: number; + childDistinct: number; + parentDistinct: number; + overlap: number; + reasons: string[]; +} + +export interface KloCompositeRelationshipCandidate { + id: string; + from: KloCompositeRelationshipTupleEndpoint; + to: KloCompositeRelationshipTupleEndpoint; + relationshipType: KloRelationshipType; + confidence: number; + status: KloCompositeRelationshipStatus; + source: 'composite_profile_match'; + validation: KloCompositeRelationshipValidationEvidence; +} + +export interface DiscoverKloCompositeRelationshipsInput { + connectionId: string; + driver: KloConnectionDriver; + schema: KloEnrichedSchema; + profiles: KloRelationshipProfileArtifact; + executor: KloRelationshipReadOnlyExecutor | null; + ctx: KloScanContext; + maxCompositeWidth?: number; + maxColumnsPerTable?: number; + minPrimaryKeyUniqueness?: number; + minSourceCoverage?: number; + maxViolationRatio?: number; +} + +export interface DiscoverKloCompositeRelationshipsResult { + primaryKeys: KloCompositePrimaryKeyCandidate[]; + relationships: KloCompositeRelationshipCandidate[]; + queryCount: number; + warnings: string[]; +} + +const KEY_NAME_PARTS = new Set(['id', 'key', 'code', 'number', 'num', 'line', 'warehouse', 'account', 'order']); +const DEFAULT_MAX_COMPOSITE_WIDTH = 3; +const DEFAULT_MAX_COLUMNS_PER_TABLE = 8; +const DEFAULT_MIN_PRIMARY_KEY_UNIQUENESS = 0.98; +const DEFAULT_MIN_SOURCE_COVERAGE = 0.9; +const DEFAULT_MAX_VIOLATION_RATIO = 0.01; + +function enabledTables(schema: KloEnrichedSchema): KloEnrichedTable[] { + return schema.tables.filter((table) => table.enabled); +} + +function tableRowCount(profiles: KloRelationshipProfileArtifact, tableName: string): number { + return profiles.tables.find((item) => item.table.name === tableName)?.rowCount ?? 0; +} + +function profileKey(tableName: string, columnName: string): string { + return `${tableName}.${columnName}`; +} + +function profileNullRate(profiles: KloRelationshipProfileArtifact, tableName: string, columnName: string): number { + return profiles.columns[profileKey(tableName, columnName)]?.nullRate ?? 1; +} + +function normalizedColumnName(name: string): string { + return name + .toLowerCase() + .replace(/[^a-z0-9]+/gu, '_') + .replace(/^_+|_+$/gu, ''); +} + +function columnNameScore(column: KloEnrichedColumn): number { + const parts = normalizedColumnName(column.name).split('_').filter(Boolean); + if (parts.some((part) => KEY_NAME_PARTS.has(part))) { + return 1; + } + return 0; +} + +function nameParts(name: string): string[] { + return normalizedColumnName(name).split('_').filter(Boolean); +} + +function keyLikeTableNameParts(tableName: string): Set { + return new Set(nameParts(tableName).filter((part) => KEY_NAME_PARTS.has(part))); +} + +function tupleCoversTableNameKeyParts(tableName: string, columns: readonly KloEnrichedColumn[]): boolean { + const required = keyLikeTableNameParts(tableName); + if (required.size === 0) { + return true; + } + const columnParts = new Set(columns.flatMap((column) => nameParts(column.name))); + return Array.from(required).every((part) => columnParts.has(part)); +} + +function candidateKeyColumns(input: { + table: KloEnrichedTable; + profiles: KloRelationshipProfileArtifact; + maxColumnsPerTable: number; +}): KloEnrichedColumn[] { + return input.table.columns + .map((column, index) => ({ column, index })) + .filter(({ column }) => { + if (column.dimensionType === 'time' || column.dimensionType === 'boolean') { + return false; + } + const profile = input.profiles.columns[profileKey(input.table.ref.name, column.name)]; + return Boolean(profile) && profile!.nullRate <= 0.02 && columnNameScore(column) > 0; + }) + .sort( + (left, right) => + columnNameScore(right.column) - columnNameScore(left.column) || left.index - right.index, + ) + .slice(0, input.maxColumnsPerTable) + .map(({ column }) => column); +} + +function hasStrongSingleColumnKey(input: { + table: KloEnrichedTable; + profiles: KloRelationshipProfileArtifact; + minPrimaryKeyUniqueness: number; +}): boolean { + return input.table.columns.some((column) => { + if (column.dimensionType === 'time' || column.dimensionType === 'boolean' || columnNameScore(column) === 0) { + return false; + } + const profile = input.profiles.columns[profileKey(input.table.ref.name, column.name)]; + return Boolean(profile) && profile!.nullRate <= 0.02 && profile!.uniquenessRatio >= input.minPrimaryKeyUniqueness; + }); +} + +function combinations(values: readonly T[], width: number): T[][] { + if (width <= 0) { + return [[]]; + } + if (values.length < width) { + return []; + } + const output: T[][] = []; + values.forEach((value, index) => { + for (const tail of combinations(values.slice(index + 1), width - 1)) { + output.push([value, ...tail]); + } + }); + return output; +} + +function tupleKey(tableName: string, columns: readonly string[]): string { + return `${tableName}.(${columns.join(',')})`; +} + +function relationshipKey(input: { + fromTable: string; + fromColumns: readonly string[]; + toTable: string; + toColumns: readonly string[]; +}): string { + return `${tupleKey(input.fromTable, input.fromColumns)}->${tupleKey(input.toTable, input.toColumns)}`; +} + +function tupleEndpoint(table: KloEnrichedTable, columns: readonly KloEnrichedColumn[]): KloCompositeRelationshipTupleEndpoint { + return { + tableId: table.id, + columnIds: columns.map((column) => column.id), + table: table.ref, + columns: columns.map((column) => column.name), + }; +} + +function row(result: KloQueryResult): unknown[] { + return result.rows[0] ?? []; +} + +function numberAt(result: KloQueryResult, header: string): number { + const index = result.headers.findIndex((candidate) => candidate.toLowerCase() === header.toLowerCase()); + const value = row(result)[index]; + if (typeof value === 'number') { + return value; + } + if (typeof value === 'bigint') { + return Number(value); + } + if (typeof value === 'string' && value.trim() !== '') { + return Number(value); + } + return 0; +} + +function topSql(driver: KloConnectionDriver, limit: number): string { + if (driver === 'sqlserver') { + return ` TOP (${Math.max(1, Math.floor(limit))})`; + } + return ''; +} + +function limitSql(driver: KloConnectionDriver, limit: number): string { + if (driver === 'sqlserver') { + return ''; + } + return ` LIMIT ${Math.max(1, Math.floor(limit))}`; +} + +function aliasedTupleSelect(driver: KloConnectionDriver, columns: readonly string[]): string { + return columns + .map((column, index) => `${quoteKloRelationshipIdentifier(driver, column)} AS c${index}`) + .join(', '); +} + +function nonNullPredicate(driver: KloConnectionDriver, columns: readonly string[]): string { + return columns.map((column) => `${quoteKloRelationshipIdentifier(driver, column)} IS NOT NULL`).join(' AND '); +} + +function tupleEquality(columns: number): string { + return Array.from({ length: columns }, (_, index) => `child_values.c${index} = parent_values.c${index}`).join( + ' AND ', + ); +} + +function buildTupleDistinctSql(input: { + driver: KloConnectionDriver; + table: KloTableRef; + columns: readonly string[]; +}): string { + const tableSql = formatKloRelationshipTableRef(input.driver, input.table); + return [ + 'WITH tuple_values AS (', + `SELECT DISTINCT ${aliasedTupleSelect(input.driver, input.columns)} FROM ${tableSql}`, + `WHERE ${nonNullPredicate(input.driver, input.columns)}`, + ')', + 'SELECT COUNT(*) AS distinct_count FROM tuple_values', + ].join(' '); +} + +function buildCompositeCoverageSql(input: { + driver: KloConnectionDriver; + childTable: KloTableRef; + childColumns: readonly string[]; + parentTable: KloTableRef; + parentColumns: readonly string[]; + maxDistinctSourceValues: number; +}): string { + const childTableSql = formatKloRelationshipTableRef(input.driver, input.childTable); + const parentTableSql = formatKloRelationshipTableRef(input.driver, input.parentTable); + const top = topSql(input.driver, input.maxDistinctSourceValues); + const limit = limitSql(input.driver, input.maxDistinctSourceValues); + return [ + 'WITH child_values AS (', + `SELECT DISTINCT${top} ${aliasedTupleSelect(input.driver, input.childColumns)} FROM ${childTableSql}`, + `WHERE ${nonNullPredicate(input.driver, input.childColumns)}${limit}`, + '), parent_values AS (', + `SELECT DISTINCT ${aliasedTupleSelect(input.driver, input.parentColumns)} FROM ${parentTableSql}`, + `WHERE ${nonNullPredicate(input.driver, input.parentColumns)}`, + ')', + 'SELECT', + '(SELECT COUNT(*) FROM child_values) AS child_distinct,', + '(SELECT COUNT(*) FROM parent_values) AS parent_distinct,', + 'SUM(CASE WHEN parent_values.c0 IS NOT NULL THEN 1 ELSE 0 END) AS overlap,', + 'SUM(CASE WHEN parent_values.c0 IS NULL THEN 1 ELSE 0 END) AS violation_count', + 'FROM child_values', + `LEFT JOIN parent_values ON ${tupleEquality(input.childColumns.length)}`, + ].join(' '); +} + +function relationshipStatus(input: { + targetUniqueness: number; + sourceCoverage: number; + violationRatio: number; + minSourceCoverage: number; + maxViolationRatio: number; +}): KloCompositeRelationshipStatus { + if ( + input.targetUniqueness >= DEFAULT_MIN_PRIMARY_KEY_UNIQUENESS && + input.sourceCoverage >= input.minSourceCoverage && + input.violationRatio <= input.maxViolationRatio + ) { + return 'accepted'; + } + if (input.sourceCoverage >= 0.55) { + return 'review'; + } + return 'rejected'; +} + +function hasAcceptedSubset( + accepted: readonly KloCompositePrimaryKeyCandidate[], + tableName: string, + columns: readonly string[], +): boolean { + const columnSet = new Set(columns); + return accepted.some( + (candidate) => + candidate.table.name === tableName && + candidate.columns.length < columns.length && + candidate.columns.every((column) => columnSet.has(column)), + ); +} + +async function detectCompositePrimaryKeys(input: { + connectionId: string; + driver: KloConnectionDriver; + table: KloEnrichedTable; + profiles: KloRelationshipProfileArtifact; + executor: KloRelationshipReadOnlyExecutor; + ctx: KloScanContext; + maxCompositeWidth: number; + maxColumnsPerTable: number; + minPrimaryKeyUniqueness: number; +}): Promise<{ primaryKeys: KloCompositePrimaryKeyCandidate[]; queryCount: number }> { + const rowCount = tableRowCount(input.profiles, input.table.ref.name); + if (rowCount === 0) { + return { primaryKeys: [], queryCount: 0 }; + } + if ( + hasStrongSingleColumnKey({ + table: input.table, + profiles: input.profiles, + minPrimaryKeyUniqueness: input.minPrimaryKeyUniqueness, + }) + ) { + return { primaryKeys: [], queryCount: 0 }; + } + + const columns = candidateKeyColumns({ + table: input.table, + profiles: input.profiles, + maxColumnsPerTable: input.maxColumnsPerTable, + }); + const primaryKeys: KloCompositePrimaryKeyCandidate[] = []; + let queryCount = 0; + + for (let width = 2; width <= input.maxCompositeWidth; width += 1) { + for (const columnTuple of combinations(columns, width)) { + const columnNames = columnTuple.map((column) => column.name); + if (!tupleCoversTableNameKeyParts(input.table.ref.name, columnTuple)) { + continue; + } + if (hasAcceptedSubset(primaryKeys, input.table.ref.name, columnNames)) { + continue; + } + const result = await input.executor.executeReadOnly( + { + connectionId: input.connectionId, + sql: buildTupleDistinctSql({ + driver: input.driver, + table: input.table.ref, + columns: columnNames, + }), + maxRows: 1, + }, + input.ctx, + ); + queryCount += 1; + const distinctCount = numberAt(result, 'distinct_count'); + const uniquenessRatio = rowCount === 0 ? 0 : distinctCount / rowCount; + if (uniquenessRatio < input.minPrimaryKeyUniqueness) { + continue; + } + const nullRate = Math.max( + ...columnNames.map((columnName) => profileNullRate(input.profiles, input.table.ref.name, columnName)), + ); + primaryKeys.push({ + id: tupleKey(input.table.ref.name, columnNames), + tableId: input.table.id, + table: input.table.ref, + columns: columnNames, + columnIds: columnTuple.map((column) => column.id), + score: Number(Math.min(0.99, 0.72 + uniquenessRatio * 0.22 + (1 - nullRate) * 0.06).toFixed(3)), + status: 'accepted', + evidence: { + rowCount, + distinctCount, + uniquenessRatio, + nullRate, + reasons: ['composite_unique_tuple', 'not_null_profile'], + }, + }); + } + } + + return { + primaryKeys: primaryKeys.sort((left, right) => + tupleKey(left.table.name, left.columns).localeCompare(tupleKey(right.table.name, right.columns)), + ), + queryCount, + }; +} + +function columnsByName(table: KloEnrichedTable): Map { + return new Map(table.columns.map((column) => [column.name, column])); +} + +function compatibleTuple(sourceColumns: readonly KloEnrichedColumn[], targetColumns: readonly KloEnrichedColumn[]): boolean { + if (sourceColumns.length !== targetColumns.length) { + return false; + } + return sourceColumns.every((source, index) => { + const target = targetColumns[index]; + return Boolean(target) && source.dimensionType === target.dimensionType; + }); +} + +async function validateCompositeRelationship(input: { + connectionId: string; + driver: KloConnectionDriver; + sourceTable: KloEnrichedTable; + sourceColumns: readonly KloEnrichedColumn[]; + targetKey: KloCompositePrimaryKeyCandidate; + targetTable: KloEnrichedTable; + targetColumns: readonly KloEnrichedColumn[]; + executor: KloRelationshipReadOnlyExecutor; + ctx: KloScanContext; + minSourceCoverage: number; + maxViolationRatio: number; +}): Promise<{ relationship: KloCompositeRelationshipCandidate; queryCount: number }> { + const result = await input.executor.executeReadOnly( + { + connectionId: input.connectionId, + sql: buildCompositeCoverageSql({ + driver: input.driver, + childTable: input.sourceTable.ref, + childColumns: input.sourceColumns.map((column) => column.name), + parentTable: input.targetTable.ref, + parentColumns: input.targetColumns.map((column) => column.name), + maxDistinctSourceValues: 10000, + }), + maxRows: 1, + }, + input.ctx, + ); + const childDistinct = numberAt(result, 'child_distinct'); + const parentDistinct = numberAt(result, 'parent_distinct'); + const overlap = numberAt(result, 'overlap'); + const violationCount = numberAt(result, 'violation_count'); + const sourceCoverage = childDistinct === 0 ? 0 : overlap / childDistinct; + const violationRatio = childDistinct === 0 ? 1 : violationCount / childDistinct; + const targetUniqueness = input.targetKey.evidence.uniquenessRatio; + const status = relationshipStatus({ + targetUniqueness, + sourceCoverage, + violationRatio, + minSourceCoverage: input.minSourceCoverage, + maxViolationRatio: input.maxViolationRatio, + }); + + const from = tupleEndpoint(input.sourceTable, input.sourceColumns); + const to = { + tableId: input.targetKey.tableId, + columnIds: input.targetKey.columnIds, + table: input.targetKey.table, + columns: input.targetKey.columns, + }; + const reasons = + status === 'accepted' + ? ['composite_validation_passed'] + : [ + 'composite_validation_failed', + sourceCoverage < input.minSourceCoverage ? 'low_source_coverage' : '', + violationRatio > input.maxViolationRatio ? 'excessive_violations' : '', + ].filter(Boolean); + + return { + queryCount: 1, + relationship: { + id: relationshipKey({ + fromTable: from.table.name, + fromColumns: from.columns, + toTable: to.table.name, + toColumns: to.columns, + }), + from, + to, + relationshipType: 'many_to_one', + confidence: status === 'accepted' ? 0.95 : 0.62, + status, + source: 'composite_profile_match', + validation: { + targetUniqueness, + sourceCoverage, + violationCount, + violationRatio, + childDistinct, + parentDistinct, + overlap, + reasons, + }, + }, + }; +} + +export async function discoverKloCompositeRelationships( + input: DiscoverKloCompositeRelationshipsInput, +): Promise { + if (!input.executor || !input.profiles.sqlAvailable) { + return { + primaryKeys: [], + relationships: [], + queryCount: 0, + warnings: ['composite_relationship_validation_unavailable'], + }; + } + + const settings = { + maxCompositeWidth: input.maxCompositeWidth ?? DEFAULT_MAX_COMPOSITE_WIDTH, + maxColumnsPerTable: input.maxColumnsPerTable ?? DEFAULT_MAX_COLUMNS_PER_TABLE, + minPrimaryKeyUniqueness: input.minPrimaryKeyUniqueness ?? DEFAULT_MIN_PRIMARY_KEY_UNIQUENESS, + minSourceCoverage: input.minSourceCoverage ?? DEFAULT_MIN_SOURCE_COVERAGE, + maxViolationRatio: input.maxViolationRatio ?? DEFAULT_MAX_VIOLATION_RATIO, + }; + const tables = enabledTables(input.schema); + const tableByName = new Map(tables.map((table) => [table.ref.name, table])); + const primaryKeys: KloCompositePrimaryKeyCandidate[] = []; + let queryCount = 0; + + for (const table of tables) { + const result = await detectCompositePrimaryKeys({ + connectionId: input.connectionId, + driver: input.driver, + table, + profiles: input.profiles, + executor: input.executor, + ctx: input.ctx, + maxCompositeWidth: settings.maxCompositeWidth, + maxColumnsPerTable: settings.maxColumnsPerTable, + minPrimaryKeyUniqueness: settings.minPrimaryKeyUniqueness, + }); + primaryKeys.push(...result.primaryKeys); + queryCount += result.queryCount; + } + + const relationships: KloCompositeRelationshipCandidate[] = []; + for (const targetKey of primaryKeys) { + const targetTable = tableByName.get(targetKey.table.name); + if (!targetTable) { + continue; + } + const targetColumnByName = columnsByName(targetTable); + const targetColumns = targetKey.columns.flatMap((columnName) => { + const column = targetColumnByName.get(columnName); + return column ? [column] : []; + }); + if (targetColumns.length !== targetKey.columns.length) { + continue; + } + + for (const sourceTable of tables) { + if (sourceTable.id === targetTable.id) { + continue; + } + const sourceColumnByName = columnsByName(sourceTable); + const sourceColumns = targetKey.columns.flatMap((columnName) => { + const column = sourceColumnByName.get(columnName); + return column ? [column] : []; + }); + if (sourceColumns.length !== targetKey.columns.length || !compatibleTuple(sourceColumns, targetColumns)) { + continue; + } + + const result = await validateCompositeRelationship({ + connectionId: input.connectionId, + driver: input.driver, + sourceTable, + sourceColumns, + targetKey, + targetTable, + targetColumns, + executor: input.executor, + ctx: input.ctx, + minSourceCoverage: settings.minSourceCoverage, + maxViolationRatio: settings.maxViolationRatio, + }); + queryCount += result.queryCount; + if (result.relationship.status !== 'rejected') { + relationships.push(result.relationship); + } + } + } + + return { + primaryKeys: primaryKeys.sort((left, right) => left.id.localeCompare(right.id)), + relationships: relationships.sort((left, right) => left.id.localeCompare(right.id)), + queryCount, + warnings: [], + }; +} diff --git a/packages/context/src/scan/relationship-diagnostics.test.ts b/packages/context/src/scan/relationship-diagnostics.test.ts new file mode 100644 index 00000000..825b2eb4 --- /dev/null +++ b/packages/context/src/scan/relationship-diagnostics.test.ts @@ -0,0 +1,373 @@ +import { describe, expect, it } from 'vitest'; +import type { KloEnrichedRelationship, KloRelationshipEndpoint } from './enrichment-types.js'; +import type { KloResolvedRelationshipDiscoveryCandidate } from './relationship-graph-resolver.js'; +import { + buildKloRelationshipArtifacts, + buildKloRelationshipDiagnostics, + emptyKloRelationshipProfileArtifact, +} from './relationship-diagnostics.js'; + +function endpoint(table: string, column: string): KloRelationshipEndpoint { + return { + tableId: table, + columnIds: [`${table}.${column}`], + table: { catalog: null, db: null, name: table }, + columns: [column], + }; +} + +function enrichedRelationship(input: { + id: string; + fromTable: string; + fromColumn: string; + toTable: string; + toColumn: string; + confidence?: number; +}): KloEnrichedRelationship { + return { + id: input.id, + source: 'inferred', + from: endpoint(input.fromTable, input.fromColumn), + to: endpoint(input.toTable, input.toColumn), + relationshipType: 'many_to_one', + confidence: input.confidence ?? 0.92, + isPrimaryKeyReference: true, + }; +} + +function resolvedRelationship(input: { + id: string; + status: 'accepted' | 'review' | 'rejected'; + source?: 'normalized_table_match' | 'exact_column_match' | 'inflection' | 'self_reference' | 'llm_proposal'; + fkScore?: number; + pkScore?: number; + validationReasons?: string[]; + graphReasons?: string[]; +}): KloResolvedRelationshipDiscoveryCandidate { + return { + id: input.id, + from: endpoint('orders', 'customer_id'), + to: endpoint('customers', 'id'), + relationshipType: 'many_to_one', + confidence: 0.88, + source: input.source ?? 'normalized_table_match', + status: input.status, + evidence: + input.source === 'llm_proposal' + ? { + sourceColumnBase: 'buyer', + targetTableBase: 'customer', + targetColumnBase: 'id', + targetKeyScore: 0.88, + nameScore: 0.45, + reasons: ['llm_proposal', 'llm_pk_proposal'], + llmConfidence: 0.89, + llmRationale: 'Buyer reference values align with customer identifiers.', + } + : { + sourceColumnBase: 'customer', + targetTableBase: 'customer', + targetColumnBase: 'id', + targetKeyScore: 0.9, + nameScore: 0.85, + reasons: ['table_name_matches_source_column'], + }, + score: 0.91, + validation: { + targetUniqueness: 1, + sourceCoverage: input.status === 'rejected' ? 0.2 : 1, + violationCount: input.status === 'rejected' ? 8 : 0, + violationRatio: input.status === 'rejected' ? 0.8 : 0, + sourceNullRate: 0, + targetNullRate: 0, + childDistinct: 10, + parentDistinct: 10, + overlap: input.status === 'rejected' ? 2 : 10, + checkedValues: 10, + reasons: input.validationReasons ?? ['validation_passed'], + }, + pkScore: input.pkScore ?? 0.97, + fkScore: input.fkScore ?? 0.94, + graph: { + targetPkScore: input.pkScore ?? 0.97, + incomingCandidateCount: 1, + conflictRank: 1, + reasons: input.graphReasons ?? ['target_pk_score_passed', 'fk_score_passed'], + }, + }; +} + +describe('relationship diagnostics artifacts', () => { + it('groups graph-resolved relationships and preserves evidence reasons', () => { + const artifacts = buildKloRelationshipArtifacts({ + connectionId: 'warehouse', + resolvedRelationships: [ + resolvedRelationship({ id: 'accepted-edge', status: 'accepted', source: 'llm_proposal' }), + resolvedRelationship({ + id: 'review-edge', + status: 'review', + validationReasons: ['validation_unavailable'], + graphReasons: ['validation_unavailable_review_only', 'fk_score_review'], + }), + resolvedRelationship({ + id: 'rejected-edge', + status: 'rejected', + validationReasons: ['low_source_coverage'], + graphReasons: ['fk_score_rejected'], + }), + ], + }); + + expect(artifacts.accepted).toHaveLength(1); + expect(artifacts.accepted[0]).toMatchObject({ + source: 'llm_proposal', + evidence: { + llmConfidence: 0.89, + llmRationale: 'Buyer reference values align with customer identifiers.', + }, + reasons: expect.arrayContaining(['llm_proposal', 'llm_pk_proposal']), + }); + expect(artifacts.review).toHaveLength(1); + expect(artifacts.rejected).toHaveLength(1); + expect(artifacts.review[0]).toMatchObject({ + id: 'review-edge', + status: 'review', + source: 'normalized_table_match', + fkScore: 0.94, + reasons: expect.arrayContaining(['validation_unavailable', 'validation_unavailable_review_only']), + }); + expect(artifacts.rejected[0]?.reasons).toEqual( + expect.arrayContaining(['table_name_matches_source_column', 'low_source_coverage', 'fk_score_rejected']), + ); + }); + + it('adapts legacy relationship updates into the richer artifact shape', () => { + const artifacts = buildKloRelationshipArtifacts({ + connectionId: 'warehouse', + relationshipUpdate: { + connectionId: 'warehouse', + accepted: [ + enrichedRelationship({ + id: 'orders-customer', + fromTable: 'orders', + fromColumn: 'customer_id', + toTable: 'customers', + toColumn: 'id', + }), + ], + rejected: [ + enrichedRelationship({ + id: 'orders-account', + fromTable: 'orders', + fromColumn: 'account_id', + toTable: 'accounts', + toColumn: 'id', + confidence: 0.4, + }), + ], + skipped: [{ relationshipId: 'orders-region', reason: 'validation_port_unavailable' }], + }, + }); + + expect(artifacts.accepted[0]).toMatchObject({ + id: 'orders-customer', + status: 'accepted', + source: 'inferred', + reasons: ['accepted_relationship_update'], + }); + expect(artifacts.rejected[0]).toMatchObject({ + id: 'orders-account', + status: 'rejected', + reasons: ['rejected_relationship_update'], + }); + expect(artifacts.skipped).toEqual([{ relationshipId: 'orders-region', reason: 'validation_port_unavailable' }]); + }); + + it('deduplicates resolved and formal relationship update artifacts by edge id', () => { + const artifacts = buildKloRelationshipArtifacts({ + connectionId: 'warehouse', + resolvedRelationships: [ + { + id: 'orders:orders.account_id->accounts:accounts.id', + from: endpoint('orders', 'account_id'), + to: endpoint('accounts', 'id'), + relationshipType: 'many_to_one', + source: 'normalized_table_match', + status: 'accepted', + confidence: 0.92, + score: 0.9, + pkScore: 0.92, + fkScore: 0.9, + evidence: { + sourceColumnBase: 'account', + targetTableBase: 'account', + targetColumnBase: 'id', + targetKeyScore: 0.92, + nameScore: 0.92, + reasons: ['foreign_key_suffix'], + }, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + sourceNullRate: 0, + targetNullRate: 0, + childDistinct: 2, + parentDistinct: 2, + overlap: 2, + checkedValues: 2, + reasons: ['validation_passed'], + }, + graph: { + targetPkScore: 0.92, + incomingCandidateCount: 1, + conflictRank: 1, + reasons: ['fk_score_passed'], + }, + }, + ], + relationshipUpdate: { + connectionId: 'warehouse', + accepted: [ + { + id: 'orders:orders.account_id->accounts:accounts.id', + source: 'formal', + from: endpoint('orders', 'account_id'), + to: endpoint('accounts', 'id'), + relationshipType: 'many_to_one', + confidence: 1, + isPrimaryKeyReference: true, + }, + ], + rejected: [], + skipped: [], + }, + }); + + expect(artifacts.accepted).toHaveLength(1); + expect(artifacts.accepted[0]).toMatchObject({ + id: 'orders:orders.account_id->accounts:accounts.id', + source: 'normalized_table_match', + reasons: expect.arrayContaining(['foreign_key_suffix', 'validation_passed', 'fk_score_passed']), + }); + }); + + it('explains validation-unavailable review candidates', () => { + const artifacts = buildKloRelationshipArtifacts({ + connectionId: 'warehouse', + resolvedRelationships: [ + resolvedRelationship({ + id: 'review-edge', + status: 'review', + validationReasons: ['validation_unavailable'], + graphReasons: ['validation_unavailable_review_only'], + }), + ], + }); + const profile = emptyKloRelationshipProfileArtifact({ + connectionId: 'warehouse', + driver: 'sqlite', + reason: 'read_only_sql_unavailable', + }); + + const diagnostics = buildKloRelationshipDiagnostics({ + connectionId: 'warehouse', + generatedAt: '2026-05-07T12:00:00.000Z', + artifacts, + profile, + warnings: [ + { + code: 'connector_capability_missing', + message: 'KLO scan connector cannot run standalone statistical relationship validation', + recoverable: true, + metadata: { capability: 'readOnlySql' }, + }, + ], + thresholds: { acceptThreshold: 0.85, reviewThreshold: 0.55 }, + }); + + expect(diagnostics.summary).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 }); + expect(diagnostics.noAcceptedReason).toBe('validation unavailable; review candidates written'); + expect(diagnostics.candidateCountsBySource).toEqual({ normalized_table_match: 1 }); + expect(diagnostics.validation).toEqual({ + available: false, + sqlAvailable: false, + queryCount: 0, + }); + expect(diagnostics.profileWarnings).toEqual(['read_only_sql_unavailable']); + expect(diagnostics.warnings[0]).toMatchObject({ code: 'connector_capability_missing' }); + }); + + it('explains empty relationship output as a no-candidate outcome', () => { + const artifacts = buildKloRelationshipArtifacts({ connectionId: 'warehouse' }); + const diagnostics = buildKloRelationshipDiagnostics({ + connectionId: 'warehouse', + generatedAt: '2026-05-07T12:00:00.000Z', + artifacts, + profile: emptyKloRelationshipProfileArtifact({ + connectionId: 'warehouse', + driver: 'sqlite', + reason: 'relationship_profiling_not_run', + }), + }); + + expect(diagnostics.summary).toEqual({ accepted: 0, review: 0, rejected: 0, skipped: 0 }); + expect(diagnostics.noAcceptedReason).toBe('no candidate pairs passed type compatibility'); + expect(diagnostics.candidateCountsBySource).toEqual({}); + }); + + it('records composite relationship endpoints in relationship artifacts', () => { + const artifacts = buildKloRelationshipArtifacts({ + connectionId: 'warehouse', + compositeRelationships: [ + { + id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + source: 'composite_profile_match', + status: 'accepted', + from: { + tableId: 'order_line_allocations', + columnIds: ['order_line_allocations.order_id', 'order_line_allocations.line_number'], + table: { catalog: null, db: null, name: 'order_line_allocations' }, + columns: ['order_id', 'line_number'], + }, + to: { + tableId: 'order_lines', + columnIds: ['order_lines.order_id', 'order_lines.line_number'], + table: { catalog: null, db: null, name: 'order_lines' }, + columns: ['order_id', 'line_number'], + }, + relationshipType: 'many_to_one', + confidence: 0.95, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + childDistinct: 2, + parentDistinct: 2, + overlap: 2, + reasons: ['composite_validation_passed'], + }, + }, + ], + }); + + expect(artifacts.accepted).toEqual([ + expect.objectContaining({ + id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)', + source: 'composite_profile_match', + from: expect.objectContaining({ + columnIds: ['order_line_allocations.order_id', 'order_line_allocations.line_number'], + columns: ['order_id', 'line_number'], + }), + to: expect.objectContaining({ + columnIds: ['order_lines.order_id', 'order_lines.line_number'], + columns: ['order_id', 'line_number'], + }), + reasons: ['composite_validation_passed'], + validation: expect.objectContaining({ sourceCoverage: 1 }), + }), + ]); + }); +}); diff --git a/packages/context/src/scan/relationship-diagnostics.ts b/packages/context/src/scan/relationship-diagnostics.ts new file mode 100644 index 00000000..b0d8fde3 --- /dev/null +++ b/packages/context/src/scan/relationship-diagnostics.ts @@ -0,0 +1,364 @@ +import type { + KloEnrichedRelationship, + KloRelationshipEndpoint, + KloRelationshipType, + KloRelationshipUpdate, +} from './enrichment-types.js'; +import type { + KloResolvedRelationshipDiscoveryCandidate, + KloResolvedRelationshipStatus, +} from './relationship-graph-resolver.js'; +import type { KloCompositeRelationshipCandidate } from './relationship-composite-candidates.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import type { KloConnectionDriver, KloScanWarning } from './types.js'; + +export interface KloRelationshipArtifactEndpoint { + tableId: string; + columnIds: string[]; + table: { + catalog: string | null; + db: string | null; + name: string; + }; + columns: string[]; +} + +export interface KloRelationshipArtifactEdge { + id: string; + status: KloResolvedRelationshipStatus; + source: string; + from: KloRelationshipArtifactEndpoint; + to: KloRelationshipArtifactEndpoint; + relationshipType: KloRelationshipType; + confidence: number; + pkScore: number | null; + fkScore: number | null; + score: number | null; + evidence: unknown | null; + validation: unknown | null; + graph: unknown | null; + reasons: string[]; +} + +export interface KloRelationshipArtifact { + connectionId: string; + accepted: KloRelationshipArtifactEdge[]; + review: KloRelationshipArtifactEdge[]; + rejected: KloRelationshipArtifactEdge[]; + skipped: KloRelationshipUpdate['skipped']; +} + +export interface KloRelationshipDiagnosticsSummary { + accepted: number; + review: number; + rejected: number; + skipped: number; +} + +export interface KloRelationshipDiagnosticsValidation { + available: boolean; + sqlAvailable: boolean; + queryCount: number; +} + +export interface KloRelationshipDiagnosticsThresholds { + acceptThreshold: number; + reviewThreshold: number; +} + +export interface KloRelationshipDiagnosticsPolicy { + validationRequiredForManifest: boolean; + maxCandidatesPerColumn: number; + profileSampleRows: number; + validationConcurrency: number; +} + +export interface KloRelationshipDiagnosticsArtifact { + connectionId: string; + generatedAt: string; + summary: KloRelationshipDiagnosticsSummary; + noAcceptedReason: string | null; + candidateCountsBySource: Record; + validation: KloRelationshipDiagnosticsValidation; + thresholds: KloRelationshipDiagnosticsThresholds; + policy: KloRelationshipDiagnosticsPolicy; + warnings: KloScanWarning[]; + profileWarnings: string[]; +} + +export interface BuildKloRelationshipArtifactsInput { + connectionId: string; + relationshipUpdate?: KloRelationshipUpdate | null; + resolvedRelationships?: readonly KloResolvedRelationshipDiscoveryCandidate[]; + compositeRelationships?: readonly KloCompositeRelationshipCandidate[]; +} + +export interface BuildKloRelationshipDiagnosticsInput { + connectionId: string; + artifacts: KloRelationshipArtifact; + profile: KloRelationshipProfileArtifact; + warnings?: readonly KloScanWarning[]; + thresholds?: Partial; + policy?: Partial; + generatedAt?: string; +} + +export interface EmptyKloRelationshipProfileArtifactInput { + connectionId: string; + driver: KloConnectionDriver; + reason: string; +} + +const DEFAULT_THRESHOLDS: KloRelationshipDiagnosticsThresholds = { + acceptThreshold: 0.85, + reviewThreshold: 0.55, +}; + +const DEFAULT_POLICY: KloRelationshipDiagnosticsPolicy = { + validationRequiredForManifest: true, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, +}; + +function endpointArtifact(endpoint: KloRelationshipEndpoint): KloRelationshipArtifactEndpoint { + return { + tableId: endpoint.tableId, + columnIds: endpoint.columnIds, + table: { + catalog: endpoint.table.catalog, + db: endpoint.table.db, + name: endpoint.table.name, + }, + columns: endpoint.columns, + }; +} + +function uniqueReasons(values: readonly string[]): string[] { + return Array.from(new Set(values.filter((value) => value.trim().length > 0))); +} + +function relationshipUpdateEdge( + relationship: KloEnrichedRelationship, + status: 'accepted' | 'rejected', +): KloRelationshipArtifactEdge { + const acceptedReason = relationship.source === 'formal' ? 'formal_metadata_accepted' : 'accepted_relationship_update'; + return { + id: relationship.id, + status, + source: relationship.source, + from: endpointArtifact(relationship.from), + to: endpointArtifact(relationship.to), + relationshipType: relationship.relationshipType, + confidence: relationship.confidence, + pkScore: null, + fkScore: null, + score: relationship.confidence, + evidence: relationship.source === 'formal' ? { source: 'formal_metadata' } : null, + validation: relationship.source === 'formal' ? { status: 'formal_metadata' } : null, + graph: null, + reasons: [status === 'accepted' ? acceptedReason : 'rejected_relationship_update'], + }; +} + +function resolvedEdge(candidate: KloResolvedRelationshipDiscoveryCandidate): KloRelationshipArtifactEdge { + return { + id: candidate.id, + status: candidate.status, + source: candidate.source, + from: endpointArtifact(candidate.from), + to: endpointArtifact(candidate.to), + relationshipType: candidate.relationshipType, + confidence: candidate.confidence, + pkScore: candidate.pkScore, + fkScore: candidate.fkScore, + score: candidate.score, + evidence: candidate.evidence, + validation: candidate.validation, + graph: candidate.graph, + reasons: uniqueReasons([ + ...candidate.evidence.reasons, + ...candidate.validation.reasons, + ...candidate.graph.reasons, + ]), + }; +} + +function compositeEndpointArtifact(endpoint: KloCompositeRelationshipCandidate['from']): KloRelationshipArtifactEndpoint { + return { + tableId: endpoint.tableId, + columnIds: endpoint.columnIds, + table: { + catalog: endpoint.table.catalog, + db: endpoint.table.db, + name: endpoint.table.name, + }, + columns: endpoint.columns, + }; +} + +function compositeEdge(candidate: KloCompositeRelationshipCandidate): KloRelationshipArtifactEdge { + return { + id: candidate.id, + status: candidate.status, + source: candidate.source, + from: compositeEndpointArtifact(candidate.from), + to: compositeEndpointArtifact(candidate.to), + relationshipType: candidate.relationshipType, + confidence: candidate.confidence, + pkScore: null, + fkScore: candidate.confidence, + score: candidate.confidence, + evidence: { source: candidate.source }, + validation: candidate.validation, + graph: null, + reasons: uniqueReasons(candidate.validation.reasons), + }; +} + +function emptyArtifacts(connectionId: string): KloRelationshipArtifact { + return { + connectionId, + accepted: [], + review: [], + rejected: [], + skipped: [], + }; +} + +function pushUniqueEdge(edges: KloRelationshipArtifactEdge[], edge: KloRelationshipArtifactEdge): void { + if (!edges.some((item) => item.id === edge.id)) { + edges.push(edge); + } +} + +export function buildKloRelationshipArtifacts(input: BuildKloRelationshipArtifactsInput): KloRelationshipArtifact { + const artifacts = emptyArtifacts(input.connectionId); + + if (input.resolvedRelationships) { + for (const candidate of input.resolvedRelationships) { + const edge = resolvedEdge(candidate); + if (edge.status === 'accepted') { + pushUniqueEdge(artifacts.accepted, edge); + } else if (edge.status === 'review') { + pushUniqueEdge(artifacts.review, edge); + } else { + pushUniqueEdge(artifacts.rejected, edge); + } + } + } + + for (const candidate of input.compositeRelationships ?? []) { + const edge = compositeEdge(candidate); + if (edge.status === 'accepted') { + pushUniqueEdge(artifacts.accepted, edge); + } else if (edge.status === 'review') { + pushUniqueEdge(artifacts.review, edge); + } else { + pushUniqueEdge(artifacts.rejected, edge); + } + } + + const relationshipUpdate = input.relationshipUpdate; + if (relationshipUpdate) { + for (const relationship of relationshipUpdate.accepted) { + pushUniqueEdge(artifacts.accepted, relationshipUpdateEdge(relationship, 'accepted')); + } + for (const relationship of relationshipUpdate.rejected) { + pushUniqueEdge(artifacts.rejected, relationshipUpdateEdge(relationship, 'rejected')); + } + artifacts.skipped.push(...relationshipUpdate.skipped); + } + + return { + connectionId: artifacts.connectionId, + accepted: artifacts.accepted.sort((left, right) => left.id.localeCompare(right.id)), + review: artifacts.review.sort((left, right) => left.id.localeCompare(right.id)), + rejected: artifacts.rejected.sort((left, right) => left.id.localeCompare(right.id)), + skipped: [...artifacts.skipped].sort((left, right) => left.relationshipId.localeCompare(right.relationshipId)), + }; +} + +function allEdges(artifacts: KloRelationshipArtifact): KloRelationshipArtifactEdge[] { + return [...artifacts.accepted, ...artifacts.review, ...artifacts.rejected]; +} + +function candidateCountsBySource(artifacts: KloRelationshipArtifact): Record { + const counts: Record = {}; + for (const edge of allEdges(artifacts)) { + counts[edge.source] = (counts[edge.source] ?? 0) + 1; + } + return Object.fromEntries(Object.entries(counts).sort(([left], [right]) => left.localeCompare(right))); +} + +function hasReason(artifacts: KloRelationshipArtifact, reason: string): boolean { + return allEdges(artifacts).some((edge) => edge.reasons.includes(reason)); +} + +function noAcceptedReason(input: { + artifacts: KloRelationshipArtifact; + profile: KloRelationshipProfileArtifact; +}): string | null { + if (input.artifacts.accepted.length > 0) { + return null; + } + if ( + input.artifacts.review.length > 0 && + (!input.profile.sqlAvailable || + hasReason(input.artifacts, 'validation_unavailable') || + hasReason(input.artifacts, 'validation_unavailable_review_only')) + ) { + return 'validation unavailable; review candidates written'; + } + if (input.artifacts.review.length > 0) { + return 'relationship candidates require review before manifest writes'; + } + if (input.artifacts.rejected.length > 0) { + return 'all candidate pairs were rejected'; + } + return 'no candidate pairs passed type compatibility'; +} + +export function emptyKloRelationshipProfileArtifact( + input: EmptyKloRelationshipProfileArtifactInput, +): KloRelationshipProfileArtifact { + return { + connectionId: input.connectionId, + driver: input.driver, + sqlAvailable: false, + queryCount: 0, + tables: [], + columns: {}, + warnings: [input.reason], + }; +} + +export function buildKloRelationshipDiagnostics( + input: BuildKloRelationshipDiagnosticsInput, +): KloRelationshipDiagnosticsArtifact { + const thresholds = { ...DEFAULT_THRESHOLDS, ...input.thresholds }; + const policy = { ...DEFAULT_POLICY, ...input.policy }; + const summary: KloRelationshipDiagnosticsSummary = { + accepted: input.artifacts.accepted.length, + review: input.artifacts.review.length, + rejected: input.artifacts.rejected.length, + skipped: input.artifacts.skipped.length, + }; + + return { + connectionId: input.connectionId, + generatedAt: input.generatedAt ?? new Date().toISOString(), + summary, + noAcceptedReason: noAcceptedReason({ artifacts: input.artifacts, profile: input.profile }), + candidateCountsBySource: candidateCountsBySource(input.artifacts), + validation: { + available: input.profile.sqlAvailable, + sqlAvailable: input.profile.sqlAvailable, + queryCount: input.profile.queryCount, + }, + thresholds, + policy, + warnings: [...(input.warnings ?? [])], + profileWarnings: [...input.profile.warnings], + }; +} diff --git a/packages/context/src/scan/relationship-discovery.test.ts b/packages/context/src/scan/relationship-discovery.test.ts new file mode 100644 index 00000000..79cc6722 --- /dev/null +++ b/packages/context/src/scan/relationship-discovery.test.ts @@ -0,0 +1,699 @@ +import type { KloLlmProvider } from '@klo/llm'; +import Database from 'better-sqlite3'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { buildDefaultKloProjectConfig } from '../project/config.js'; +import { snapshotToKloEnrichedSchema } from './local-enrichment.js'; +import { + loadKloRelationshipBenchmarkFixture, + maskKloRelationshipBenchmarkSnapshot, +} from './relationship-benchmarks.js'; +import { discoverKloRelationships } from './relationship-discovery.js'; +import { createKloConnectorCapabilities } from './types.js'; +import type { KloQueryResult, KloReadOnlyQueryInput, KloScanConnector, KloScanContext, KloSchemaSnapshot } from './types.js'; + +class InMemorySqliteExecutor { + readonly db = new Database(':memory:'); + queryCount = 0; + + executeReadOnly(input: KloReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.queryCount += 1; + const rows = this.db.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return Promise.resolve({ + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }); + } + + close(): void { + this.db.close(); + } +} + +function snapshot(): KloSchemaSnapshot { + return { + connectionId: 'warehouse', + driver: 'sqlite', + extractedAt: '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: null, + name: 'accounts', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'name', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: null, + name: 'orders', + kind: 'table', + comment: null, + estimatedRows: 3, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'account_id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; +} + +function declaredForeignKeySnapshot(): KloSchemaSnapshot { + const source = snapshot(); + return { + ...source, + tables: source.tables.map((table) => + table.name === 'accounts' + ? { + ...table, + columns: table.columns.map((column) => (column.name === 'id' ? { ...column, primaryKey: true } : column)), + } + : table.name === 'orders' + ? { + ...table, + foreignKeys: [ + { + fromColumn: 'account_id', + toCatalog: null, + toDb: null, + toTable: 'accounts', + toColumn: 'id', + constraintName: 'orders_account_id_fkey', + }, + ], + } + : table, + ), + }; +} + +function naturalKeySnapshot(): KloSchemaSnapshot { + return { + connectionId: 'warehouse', + driver: 'sqlite', + extractedAt: '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: null, + name: 'dim_countries', + kind: 'table', + comment: null, + estimatedRows: 3, + foreignKeys: [], + columns: [ + { + name: 'iso_code', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'name', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: null, + name: 'fct_accounts', + kind: 'table', + comment: null, + estimatedRows: 4, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'country_code', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; +} + +function connector(executor: InMemorySqliteExecutor | null): KloScanConnector { + return { + id: 'sqlite:test', + driver: 'sqlite', + capabilities: createKloConnectorCapabilities({ + readOnlySql: executor !== null, + columnStats: executor !== null, + tableSampling: false, + columnSampling: false, + }), + introspect: async () => snapshot(), + executeReadOnly: executor ? executor.executeReadOnly.bind(executor) : undefined, + }; +} + +function llmProvider(): KloLlmProvider { + const model = { modelId: 'claude-sonnet-4-6', provider: 'anthropic' }; + return { + getModel: vi.fn(() => model as ReturnType), + getModelByName: vi.fn(() => model as ReturnType), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(() => ({})), + telemetryConfig: vi.fn(() => undefined), + promptCachingConfig: vi.fn( + () => + ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + }) as ReturnType, + ), + activeBackend: vi.fn(() => 'anthropic' as ReturnType), + }; +} + +function relationshipSettings() { + return buildDefaultKloProjectConfig('warehouse').scan.relationships; +} + +function llmOnlyRelationshipSnapshot(): KloSchemaSnapshot { + return { + connectionId: 'warehouse', + driver: 'sqlite', + extractedAt: '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: null, + name: 'customers', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: null, + name: 'orders', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'buyer_ref', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], + }; +} + +describe('production relationship discovery', () => { + let executor: InMemorySqliteExecutor | null = null; + + afterEach(() => { + executor?.close(); + executor = null; + }); + + it('accepts a validated relationship without declared PK or FK metadata', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL, name TEXT NOT NULL); + CREATE TABLE orders (id INTEGER NOT NULL, account_id INTEGER NOT NULL); + INSERT INTO accounts (id, name) VALUES (1, 'Acme'), (2, 'Globex'); + INSERT INTO orders (id, account_id) VALUES (10, 1), (11, 1), (12, 2); + `); + + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: connector(executor), + schema: snapshotToKloEnrichedSchema(snapshot()), + context: { runId: 'relationship-run-1' }, + settings: relationshipSettings(), + }); + + expect(result.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.statisticalValidation).toBe('completed'); + expect(result.profile.sqlAvailable).toBe(true); + expect(result.profile.queryCount).toBeGreaterThan(0); + expect(result.relationshipUpdate.accepted).toEqual([ + expect.objectContaining({ + from: expect.objectContaining({ table: expect.objectContaining({ name: 'orders' }), columns: ['account_id'] }), + to: expect.objectContaining({ table: expect.objectContaining({ name: 'accounts' }), columns: ['id'] }), + relationshipType: 'many_to_one', + source: 'inferred', + isPrimaryKeyReference: true, + }), + ]); + expect(result.resolvedRelationships[0]).toMatchObject({ + status: 'accepted', + validation: expect.objectContaining({ reasons: expect.arrayContaining(['validation_passed']) }), + graph: expect.objectContaining({ reasons: expect.arrayContaining(['fk_score_passed']) }), + }); + }); + + it('accepts a profile-driven natural-key relationship without declared metadata', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE dim_countries (iso_code TEXT NOT NULL, name TEXT NOT NULL); + CREATE TABLE fct_accounts (id INTEGER NOT NULL, country_code TEXT NOT NULL); + INSERT INTO dim_countries (iso_code, name) VALUES ('US', 'United States'), ('FR', 'France'), ('DE', 'Germany'); + INSERT INTO fct_accounts (id, country_code) VALUES (1, 'US'), (2, 'FR'), (3, 'US'), (4, 'DE'); + `); + + const schema = naturalKeySnapshot(); + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: { + ...connector(executor), + introspect: async () => schema, + }, + schema: snapshotToKloEnrichedSchema(schema), + context: { runId: 'natural-key-relationship-run' }, + settings: relationshipSettings(), + }); + + expect(result.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.relationshipUpdate.accepted).toEqual([ + expect.objectContaining({ + from: expect.objectContaining({ table: expect.objectContaining({ name: 'fct_accounts' }), columns: ['country_code'] }), + to: expect.objectContaining({ table: expect.objectContaining({ name: 'dim_countries' }), columns: ['iso_code'] }), + relationshipType: 'many_to_one', + source: 'inferred', + isPrimaryKeyReference: true, + }), + ]); + expect(result.resolvedRelationships[0]).toMatchObject({ + source: 'profile_match', + status: 'accepted', + validation: expect.objectContaining({ reasons: expect.arrayContaining(['validation_passed']) }), + graph: expect.objectContaining({ reasons: expect.arrayContaining(['fk_score_passed']) }), + }); + }); + + it('accepts an embedding-driven relationship without declared metadata or LLM proposals', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE customers (id INTEGER NOT NULL, name TEXT NOT NULL); + CREATE TABLE orders (id INTEGER NOT NULL, buyer_ref INTEGER NOT NULL); + INSERT INTO customers (id, name) VALUES (1, 'Acme'), (2, 'Orbit'), (3, 'Globex'); + INSERT INTO orders (id, buyer_ref) VALUES (10, 1), (11, 2), (12, 2), (13, 3); + `); + + const sourceSnapshot = llmOnlyRelationshipSnapshot(); + const schema = snapshotToKloEnrichedSchema( + sourceSnapshot, + new Map([ + ['customers.id', [1, 0, 0]], + ['customers.name', [0, 1, 0]], + ['orders.id', [0, 0, 1]], + ['orders.buyer_ref', [0.995, 0.005, 0]], + ]), + ); + + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: { + ...connector(executor), + introspect: async () => sourceSnapshot, + }, + schema, + context: { runId: 'embedding-relationship-run' }, + settings: { + ...relationshipSettings(), + llmProposals: false, + }, + }); + + expect(result.llmRelationshipValidation).toBe('skipped'); + expect(result.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.relationshipUpdate.accepted[0]).toMatchObject({ + from: { table: { name: 'orders' }, columns: ['buyer_ref'] }, + to: { table: { name: 'customers' }, columns: ['id'] }, + }); + expect(result.resolvedRelationships[0]).toMatchObject({ + source: 'embedding_similarity', + status: 'accepted', + validation: expect.objectContaining({ reasons: expect.arrayContaining(['validation_passed']) }), + evidence: expect.objectContaining({ + reasons: expect.arrayContaining(['embedding_similarity', 'target_key_like']), + embeddingSimilarity: expect.any(Number), + }), + }); + }); + + it('keeps candidates review-only when read-only SQL is unavailable', async () => { + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: connector(null), + schema: snapshotToKloEnrichedSchema(snapshot()), + context: { runId: 'relationship-run-no-sql' }, + settings: relationshipSettings(), + }); + + expect(result.relationships).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 }); + expect(result.statisticalValidation).toBe('skipped'); + expect(result.relationshipUpdate.accepted).toEqual([]); + expect(result.resolvedRelationships[0]).toMatchObject({ + status: 'review', + validation: expect.objectContaining({ reasons: expect.arrayContaining(['validation_unavailable']) }), + }); + expect(result.warnings).toContainEqual({ + code: 'connector_capability_missing', + message: 'KLO scan connector cannot run read-only SQL relationship validation', + recoverable: true, + metadata: { capability: 'readOnlySql' }, + }); + }); + + it('accepts formal metadata relationships when read-only SQL is unavailable', async () => { + const sourceSnapshot = declaredForeignKeySnapshot(); + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: connector(null), + schema: snapshotToKloEnrichedSchema(sourceSnapshot), + context: { runId: 'formal-metadata-no-sql' }, + settings: relationshipSettings(), + }); + + expect(result.statisticalValidation).toBe('skipped'); + expect(result.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.resolvedRelationships).toEqual([]); + expect(result.relationshipUpdate.accepted).toEqual([ + expect.objectContaining({ + id: 'orders:(orders.account_id)->accounts:(accounts.id)', + source: 'formal', + confidence: 1, + from: expect.objectContaining({ table: expect.objectContaining({ name: 'orders' }), columns: ['account_id'] }), + to: expect.objectContaining({ table: expect.objectContaining({ name: 'accounts' }), columns: ['id'] }), + }), + ]); + expect(result.relationshipUpdate.rejected).toEqual([]); + expect(result.relationshipUpdate.skipped).toEqual([]); + }); + + it('accepts LLM-only relationship proposals only after SQL validation and graph resolution pass', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE customers (id INTEGER); + CREATE TABLE orders (id INTEGER, buyer_ref INTEGER); + INSERT INTO customers (id) VALUES (1), (2); + INSERT INTO orders (id, buyer_ref) VALUES (10, 1), (11, 2); + `); + const generateText = vi.fn(async () => ({ + output: { + pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.91, rationale: 'Unique customer key.' }], + fkCandidates: [ + { + fromTable: 'orders', + fromColumn: 'buyer_ref', + toTable: 'customers', + toColumn: 'id', + confidence: 0.89, + rationale: 'Buyer reference values align with customer identifiers.', + }, + ], + }, + })); + + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: connector(executor), + schema: snapshotToKloEnrichedSchema(llmOnlyRelationshipSnapshot()), + context: { runId: 'llm-relationship-orchestrator' }, + settings: relationshipSettings(), + llmProvider: llmProvider(), + generateText, + }); + + expect(result.llmRelationshipValidation).toBe('completed'); + expect(result.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); + expect(result.resolvedRelationships[0]).toMatchObject({ + source: 'llm_proposal', + status: 'accepted', + evidence: { + llmRationale: 'Buyer reference values align with customer identifiers.', + }, + }); + expect(result.relationshipUpdate.accepted[0]).toMatchObject({ + from: { table: { name: 'orders' }, columns: ['buyer_ref'] }, + to: { table: { name: 'customers' }, columns: ['id'] }, + }); + }); + + it('uses configured acceptance thresholds when resolving graph relationships', async () => { + const executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL, name TEXT NOT NULL); + CREATE TABLE orders (id INTEGER NOT NULL, account_id INTEGER NOT NULL); + INSERT INTO accounts VALUES (1, 'Acme'), (2, 'Orbit'); + INSERT INTO orders VALUES (10, 1), (11, 1), (12, 2); + `); + + const settings = { + ...buildDefaultKloProjectConfig('warehouse').scan.relationships, + acceptThreshold: 0.99, + reviewThreshold: 0.55, + }; + + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: connector(executor), + schema: snapshotToKloEnrichedSchema(snapshot()), + context: { runId: 'configured-thresholds' }, + settings, + }); + + expect(result.relationships).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 }); + expect(result.relationshipUpdate.accepted).toEqual([]); + expect(result.resolvedRelationships[0]).toMatchObject({ + status: 'review', + graph: { reasons: expect.arrayContaining(['fk_score_review']) }, + }); + + executor.close(); + }); + + it('passes maxCandidatesPerColumn into broad deterministic candidate generation', async () => { + const executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL, name TEXT NOT NULL); + CREATE TABLE account_archive (id INTEGER NOT NULL, name TEXT NOT NULL); + CREATE TABLE orders (id INTEGER NOT NULL, account_id INTEGER NOT NULL); + INSERT INTO accounts VALUES (1, 'Acme'), (2, 'Orbit'); + INSERT INTO account_archive VALUES (99, 'Archive'); + INSERT INTO orders VALUES (10, 1), (11, 1), (12, 2); + `); + + const richSnapshot = snapshot(); + richSnapshot.tables.splice(1, 0, { + catalog: null, + db: null, + name: 'account_archive', + kind: 'table', + comment: null, + estimatedRows: 1, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + { + name: 'name', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }); + + const result = await discoverKloRelationships({ + connectionId: 'warehouse', + driver: 'sqlite', + connector: { + ...connector(executor), + introspect: async () => richSnapshot, + }, + schema: snapshotToKloEnrichedSchema(richSnapshot), + context: { runId: 'candidate-cap' }, + settings: { + ...buildDefaultKloProjectConfig('warehouse').scan.relationships, + maxCandidatesPerColumn: 1, + }, + }); + + const sourceTargets = result.resolvedRelationships + .filter((relationship) => relationship.from.columns[0] === 'account_id') + .map((relationship) => `${relationship.to.table.name}.${relationship.to.columns[0]}`); + expect(sourceTargets).toHaveLength(1); + expect(sourceTargets).toEqual(['accounts.id']); + + executor.close(); + }); + + it('accepts SQL-validated composite relationships in production relationship-discovery detection', async () => { + const fixtureRoot = new URL( + '../../test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints', + import.meta.url, + ); + const fixture = await loadKloRelationshipBenchmarkFixture(fixtureRoot.pathname); + const maskedSnapshot = maskKloRelationshipBenchmarkSnapshot(fixture.snapshot, 'declared_pks_and_declared_fks_removed'); + const database = new Database(fixture.dataPath ?? '', { readonly: true, fileMustExist: true }); + const testConnector: KloScanConnector = { + id: 'sqlite:composite', + driver: 'sqlite', + capabilities: createKloConnectorCapabilities({ + readOnlySql: true, + columnStats: true, + tableSampling: false, + columnSampling: false, + }), + introspect: async () => maskedSnapshot, + executeReadOnly: async (input) => { + const rows = database.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return { + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }; + }, + }; + + const result = await discoverKloRelationships({ + connectionId: maskedSnapshot.connectionId, + driver: maskedSnapshot.driver, + connector: testConnector, + schema: snapshotToKloEnrichedSchema(maskedSnapshot, new Map()), + context: { runId: 'test:production-composite' }, + settings: relationshipSettings(), + }); + database.close(); + + expect( + result.relationshipUpdate.accepted.map( + (relationship) => + `${relationship.from.table.name}.(${relationship.from.columns.join(',')})->${relationship.to.table.name}.(${relationship.to.columns.join(',')})`, + ), + ).toContain('order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)'); + expect(result.relationships.accepted).toBeGreaterThanOrEqual(1); + expect(result.compositeRelationships.map((relationship) => relationship.status)).toContain('accepted'); + }); +}); diff --git a/packages/context/src/scan/relationship-discovery.ts b/packages/context/src/scan/relationship-discovery.ts new file mode 100644 index 00000000..8a03ec6c --- /dev/null +++ b/packages/context/src/scan/relationship-discovery.ts @@ -0,0 +1,338 @@ +import type { KloLlmProvider } from '@klo/llm'; +import type { KloScanRelationshipConfig } from '../project/config.js'; +import type { KloEnrichedRelationship, KloEnrichedSchema, KloRelationshipUpdate } from './enrichment-types.js'; +import { + generateKloRelationshipDiscoveryCandidates, + type KloRelationshipDiscoveryCandidate, + mergeKloRelationshipDiscoveryCandidates, +} from './relationship-candidates.js'; +import { + discoverKloCompositeRelationships, + type KloCompositeRelationshipCandidate, +} from './relationship-composite-candidates.js'; +import { collectKloFormalMetadataRelationships } from './relationship-formal-metadata.js'; +import { + type KloResolvedRelationshipDiscoveryCandidate, + resolveKloRelationshipGraph, +} from './relationship-graph-resolver.js'; +import { + type KloRelationshipLlmProposalGenerateText, + proposeKloRelationshipCandidatesWithLlm, +} from './relationship-llm-proposal.js'; +import { + createKloRelationshipProfileCache, + type KloRelationshipProfileArtifact, + type KloRelationshipReadOnlyExecutor, + profileKloRelationshipSchema, +} from './relationship-profiling.js'; +import { validateKloRelationshipDiscoveryCandidates } from './relationship-validation.js'; +import type { + KloConnectionDriver, + KloScanConnector, + KloScanContext, + KloScanEnrichmentSummary, + KloScanRelationshipSummary, + KloScanWarning, +} from './types.js'; + +export interface DiscoverKloRelationshipsInput { + connectionId: string; + driver: KloConnectionDriver; + connector: KloScanConnector; + schema: KloEnrichedSchema; + context: KloScanContext; + settings: KloScanRelationshipConfig; + llmProvider?: KloLlmProvider | null; + generateText?: KloRelationshipLlmProposalGenerateText; +} + +export interface DiscoverKloRelationshipsResult { + relationshipUpdate: KloRelationshipUpdate; + relationships: KloScanRelationshipSummary; + profile: KloRelationshipProfileArtifact; + resolvedRelationships: KloResolvedRelationshipDiscoveryCandidate[]; + compositeRelationships: KloCompositeRelationshipCandidate[]; + statisticalValidation: KloScanEnrichmentSummary['statisticalValidation']; + llmRelationshipValidation: KloScanEnrichmentSummary['llmRelationshipValidation']; + warnings: KloScanWarning[]; +} + +function relationshipFromResolved(candidate: KloResolvedRelationshipDiscoveryCandidate): KloEnrichedRelationship { + return { + id: candidate.id, + source: 'inferred', + from: candidate.from, + to: candidate.to, + relationshipType: candidate.relationshipType, + confidence: candidate.fkScore, + isPrimaryKeyReference: candidate.pkScore >= 0.78, + }; +} + +function relationshipFromComposite(candidate: KloCompositeRelationshipCandidate): KloEnrichedRelationship { + return { + id: candidate.id, + source: 'inferred', + from: { + tableId: candidate.from.tableId, + columnIds: candidate.from.columnIds, + table: candidate.from.table, + columns: candidate.from.columns, + }, + to: { + tableId: candidate.to.tableId, + columnIds: candidate.to.columnIds, + table: candidate.to.table, + columns: candidate.to.columns, + }, + relationshipType: candidate.relationshipType, + confidence: candidate.confidence, + isPrimaryKeyReference: candidate.status === 'accepted', + }; +} + +function relationshipId(input: Pick): string { + return `${input.from.tableId}:(${input.from.columnIds.join(',')})->${input.to.tableId}:(${input.to.columnIds.join(',')})`; +} + +function nonFormalAcceptedRelationships(input: { + formalIds: ReadonlySet; + resolvedRelationships: readonly KloResolvedRelationshipDiscoveryCandidate[]; +}): KloEnrichedRelationship[] { + return input.resolvedRelationships + .filter((candidate) => candidate.status === 'accepted' && !input.formalIds.has(candidate.id)) + .map(relationshipFromResolved); +} + +function relationshipSummary( + resolvedRelationships: readonly KloResolvedRelationshipDiscoveryCandidate[], +): KloScanRelationshipSummary { + return { + accepted: resolvedRelationships.filter((candidate) => candidate.status === 'accepted').length, + review: resolvedRelationships.filter((candidate) => candidate.status === 'review').length, + rejected: resolvedRelationships.filter((candidate) => candidate.status === 'rejected').length, + skipped: 0, + }; +} + +function compositeSummary(relationships: readonly KloCompositeRelationshipCandidate[]): KloScanRelationshipSummary { + return { + accepted: relationships.filter((candidate) => candidate.status === 'accepted').length, + review: relationships.filter((candidate) => candidate.status === 'review').length, + rejected: relationships.filter((candidate) => candidate.status === 'rejected').length, + skipped: 0, + }; +} + +async function detectCompositeRelationships(input: { + connectionId: string; + driver: DiscoverKloRelationshipsInput['driver']; + schema: KloEnrichedSchema; + profile: KloRelationshipProfileArtifact; + executor: KloRelationshipReadOnlyExecutor | null; + context: DiscoverKloRelationshipsInput['context']; + warnings: KloScanWarning[]; +}): Promise { + if (!input.executor || !input.profile.sqlAvailable) { + return []; + } + try { + const compositeDetection = await discoverKloCompositeRelationships({ + connectionId: input.connectionId, + driver: input.driver, + schema: input.schema, + profiles: input.profile, + executor: input.executor, + ctx: input.context, + }); + for (const warning of compositeDetection.warnings) { + input.warnings.push({ + code: 'relationship_validation_failed', + message: warning, + recoverable: true, + metadata: { source: 'composite_relationship_detection' }, + }); + } + return compositeDetection.relationships; + } catch (error) { + input.warnings.push({ + code: 'relationship_validation_failed', + message: `KLO composite relationship detection failed: ${error instanceof Error ? error.message : String(error)}`, + recoverable: true, + metadata: { source: 'composite_relationship_detection' }, + }); + return []; + } +} + +function combinedRelationshipSummary(input: { + formalAccepted: number; + formalSkipped: number; + resolvedRelationships: readonly KloResolvedRelationshipDiscoveryCandidate[]; +}): KloScanRelationshipSummary { + const graph = relationshipSummary(input.resolvedRelationships); + return { + accepted: input.formalAccepted + graph.accepted, + review: graph.review, + rejected: graph.rejected, + skipped: input.formalSkipped, + }; +} + +function sqlExecutor(input: DiscoverKloRelationshipsInput): { + executor: KloRelationshipReadOnlyExecutor | null; + warnings: KloScanWarning[]; +} { + if (!input.connector.capabilities.readOnlySql) { + return { + executor: null, + warnings: [ + { + code: 'connector_capability_missing', + message: 'KLO scan connector cannot run read-only SQL relationship validation', + recoverable: true, + metadata: { capability: 'readOnlySql' }, + }, + ], + }; + } + + if (!input.connector.executeReadOnly) { + return { + executor: null, + warnings: [ + { + code: 'relationship_validation_failed', + message: 'KLO scan connector advertises readOnlySql but does not expose executeReadOnly', + recoverable: true, + metadata: { capability: 'readOnlySql' }, + }, + ], + }; + } + + return { + executor: { + executeReadOnly: input.connector.executeReadOnly.bind(input.connector), + }, + warnings: [], + }; +} + +export async function discoverKloRelationships( + input: DiscoverKloRelationshipsInput, +): Promise { + const { executor, warnings } = sqlExecutor(input); + const formalMetadata = collectKloFormalMetadataRelationships(input.schema); + const profileCache = createKloRelationshipProfileCache(); + const profile = await profileKloRelationshipSchema({ + connectionId: input.connectionId, + driver: input.driver, + schema: input.schema, + executor, + ctx: input.context, + profileSampleRows: input.settings.profileSampleRows, + cache: profileCache, + }); + const deterministicCandidates: KloRelationshipDiscoveryCandidate[] = generateKloRelationshipDiscoveryCandidates( + input.schema, + { + maxCandidatesPerColumn: input.settings.maxCandidatesPerColumn, + profiles: profile, + }, + ); + const llmProposalResult = input.settings.llmProposals + ? await proposeKloRelationshipCandidatesWithLlm({ + connectionId: input.connectionId, + schema: input.schema, + profile, + llmProvider: input.llmProvider ?? null, + settings: { + maxTablesPerBatch: input.settings.maxLlmTablesPerBatch, + }, + generateText: input.generateText, + }) + : { candidates: [], warnings: [], llmCalls: 0, summary: 'skipped' as const }; + const candidates = mergeKloRelationshipDiscoveryCandidates([ + ...deterministicCandidates, + ...llmProposalResult.candidates, + ]).filter((candidate) => !formalMetadata.acceptedIds.has(candidate.id)); + warnings.push(...llmProposalResult.warnings); + const validated = await validateKloRelationshipDiscoveryCandidates({ + connectionId: input.connectionId, + driver: input.driver, + candidates, + profiles: profile, + executor, + ctx: input.context, + tableCount: input.schema.tables.length, + settings: { + acceptThreshold: input.settings.acceptThreshold, + reviewThreshold: input.settings.reviewThreshold, + maxDistinctSourceValues: input.settings.profileSampleRows, + concurrency: input.settings.validationConcurrency, + validationBudget: input.settings.validationBudget, + }, + }); + const graph = resolveKloRelationshipGraph({ + schema: input.schema, + profiles: profile, + candidates: validated, + settings: { + acceptThreshold: input.settings.acceptThreshold, + reviewThreshold: input.settings.reviewThreshold, + validationRequiredForManifest: input.settings.validationRequiredForManifest, + }, + }); + const compositeRelationships = await detectCompositeRelationships({ + connectionId: input.connectionId, + driver: input.driver, + schema: input.schema, + profile, + executor, + context: input.context, + warnings, + }); + const inferredAccepted = nonFormalAcceptedRelationships({ + formalIds: formalMetadata.acceptedIds, + resolvedRelationships: graph.relationships, + }); + const compositeAccepted = compositeRelationships + .filter((candidate) => candidate.status === 'accepted') + .map(relationshipFromComposite); + const relationshipsForAcceptance = formalMetadata.accepted.concat(inferredAccepted, compositeAccepted); + const acceptedById = new Map(relationshipsForAcceptance.map((relationship) => [relationship.id, relationship])); + const accepted = Array.from(acceptedById.values()).sort((left, right) => + relationshipId(left).localeCompare(relationshipId(right)), + ); + const rejected = graph.relationships + .filter((candidate) => candidate.status === 'rejected') + .map(relationshipFromResolved); + const combined = combinedRelationshipSummary({ + formalAccepted: formalMetadata.accepted.length, + formalSkipped: formalMetadata.skipped.length, + resolvedRelationships: graph.relationships, + }); + const compositeCounts = compositeSummary(compositeRelationships); + + return { + relationshipUpdate: { + connectionId: input.connectionId, + accepted, + rejected, + skipped: formalMetadata.skipped, + }, + relationships: { + accepted: combined.accepted + compositeCounts.accepted, + review: combined.review + compositeCounts.review, + rejected: combined.rejected + compositeCounts.rejected, + skipped: combined.skipped, + }, + profile, + resolvedRelationships: graph.relationships, + compositeRelationships, + statisticalValidation: profile.sqlAvailable ? 'completed' : 'skipped', + llmRelationshipValidation: llmProposalResult.summary, + warnings, + }; +} diff --git a/packages/context/src/scan/relationship-feedback-calibration.test.ts b/packages/context/src/scan/relationship-feedback-calibration.test.ts new file mode 100644 index 00000000..54de3cbe --- /dev/null +++ b/packages/context/src/scan/relationship-feedback-calibration.test.ts @@ -0,0 +1,211 @@ +import type { KloLocalProject } from '../project/index.js'; +import { describe, expect, it, vi } from 'vitest'; +import { + buildKloRelationshipFeedbackCalibrationReport, + calibrateLocalRelationshipFeedbackLabels, + formatKloRelationshipFeedbackCalibrationMarkdown, +} from './relationship-feedback-calibration.js'; +import type { + ExportLocalRelationshipFeedbackLabelsResult, + KloRelationshipFeedbackLabel, +} from './relationship-feedback-export.js'; + +function label( + input: Partial & + Pick, +): KloRelationshipFeedbackLabel { + return { + schemaVersion: 1, + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decidedAt: '2026-05-07T12:00:00.000Z', + reviewer: 'Andrey', + note: null, + relationshipType: 'many_to_one', + source: 'deterministic_name', + confidence: input.score ?? 0, + pkScore: input.pkScore ?? null, + fkScore: input.fkScore ?? input.score, + fromTable: 'public.orders', + fromColumns: ['customer_id'], + toTable: 'public.customers', + toColumns: ['id'], + reasons: [], + artifactPath: 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json', + ...input, + }; +} + +function feedback(labels: KloRelationshipFeedbackLabel[]): ExportLocalRelationshipFeedbackLabelsResult { + return { + generatedAt: '2026-05-07T13:00:00.000Z', + filters: { connectionId: null, decision: 'all' }, + summary: { + total: labels.length, + accepted: labels.filter((item) => item.decision === 'accepted').length, + rejected: labels.filter((item) => item.decision === 'rejected').length, + connections: new Set(labels.map((item) => item.connectionId)).size, + runs: new Set(labels.map((item) => `${item.connectionId}:${item.runId}`)).size, + }, + labels, + warnings: [], + }; +} + +describe('relationship feedback calibration', () => { + it('builds score buckets and threshold-band summary from feedback labels', () => { + const report = buildKloRelationshipFeedbackCalibrationReport( + feedback([ + label({ + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + score: 0.91, + pkScore: 0.97, + fkScore: 0.91, + }), + label({ + candidateId: 'orders:orders.account_id->accounts:accounts.id', + decision: 'accepted', + score: 0.61, + pkScore: 0.88, + fkScore: 0.61, + }), + label({ + candidateId: 'orders:orders.note_id->notes:notes.id', + decision: 'rejected', + score: 0.21, + pkScore: 0.4, + fkScore: 0.21, + }), + label({ + candidateId: 'orders:orders.region_id->regions:regions.id', + decision: 'rejected', + score: 0.88, + pkScore: 0.9, + fkScore: 0.88, + }), + ]), + { + acceptThreshold: 0.85, + reviewThreshold: 0.55, + }, + ); + + expect(report.thresholds).toEqual({ accept: 0.85, review: 0.55 }); + expect(report.summary).toEqual({ + total: 4, + scored: 4, + unscored: 0, + acceptedLabels: 2, + rejectedLabels: 2, + predictedAccepted: 2, + predictedReview: 1, + predictedRejected: 1, + acceptedBandPrecision: 0.5, + rejectedBandPrecision: 1, + reviewBandAcceptedRate: 1, + meanAcceptedScore: 0.76, + meanRejectedScore: 0.545, + }); + expect(report.buckets.map((bucket) => [bucket.label, bucket.total, bucket.accepted, bucket.rejected, bucket.acceptanceRate])).toEqual([ + ['0.00-0.24', 1, 0, 1, 0], + ['0.25-0.49', 0, 0, 0, null], + ['0.50-0.74', 1, 1, 0, 1], + ['0.75-1.00', 2, 1, 1, 0.5], + ]); + expect(report.labels.map((item) => [item.candidateId, item.predictedStatus, item.bucket])).toEqual([ + ['orders:orders.account_id->accounts:accounts.id', 'review', '0.50-0.74'], + ['orders:orders.customer_id->customers:customers.id', 'accepted', '0.75-1.00'], + ['orders:orders.note_id->notes:notes.id', 'rejected', '0.00-0.24'], + ['orders:orders.region_id->regions:regions.id', 'accepted', '0.75-1.00'], + ]); + }); + + it('keeps unscored labels visible without treating them as threshold predictions', () => { + const report = buildKloRelationshipFeedbackCalibrationReport( + feedback([ + label({ + candidateId: 'orders:orders.note_id->notes:notes.id', + decision: 'rejected', + score: null, + confidence: 0.2, + fkScore: null, + }), + ]), + { + acceptThreshold: 0.85, + reviewThreshold: 0.55, + }, + ); + + expect(report.summary).toMatchObject({ + total: 1, + scored: 0, + unscored: 1, + predictedAccepted: 0, + predictedReview: 0, + predictedRejected: 0, + acceptedBandPrecision: null, + rejectedBandPrecision: null, + reviewBandAcceptedRate: null, + meanAcceptedScore: null, + meanRejectedScore: null, + }); + expect(report.labels[0]).toMatchObject({ + candidateId: 'orders:orders.note_id->notes:notes.id', + predictedStatus: 'unscored', + bucket: 'unscored', + }); + }); + + it('formats a stable markdown summary for human CLI output', () => { + const report = buildKloRelationshipFeedbackCalibrationReport( + feedback([ + label({ candidateId: 'orders:orders.customer_id->customers:customers.id', decision: 'accepted', score: 0.91 }), + label({ candidateId: 'orders:orders.note_id->notes:notes.id', decision: 'rejected', score: 0.21 }), + ]), + { + acceptThreshold: 0.85, + reviewThreshold: 0.55, + }, + ); + + expect(formatKloRelationshipFeedbackCalibrationMarkdown(report)).toContain( + 'KLO relationship feedback calibration', + ); + expect(formatKloRelationshipFeedbackCalibrationMarkdown(report)).toContain('Total labels: 2'); + expect(formatKloRelationshipFeedbackCalibrationMarkdown(report)).toContain('Accepted-band precision: 1.000'); + expect(formatKloRelationshipFeedbackCalibrationMarkdown(report)).toContain( + '0.75-1.00: total=1 accepted=1 rejected=0 acceptanceRate=1.000', + ); + }); + + it('wraps the feedback exporter and preserves exporter warnings', async () => { + const project = { projectDir: '/tmp/klo-project' } as KloLocalProject; + const exportLocalRelationshipFeedbackLabels = vi.fn(async () => ({ + ...feedback([ + label({ candidateId: 'orders:orders.customer_id->customers:customers.id', decision: 'accepted', score: 0.91 }), + ]), + warnings: [{ path: 'raw-sources/broken/live-database/sync/enrichment/relationship-review-decisions.json', message: 'Unexpected token' }], + })); + + const report = await calibrateLocalRelationshipFeedbackLabels(project, { + connectionId: 'warehouse', + decision: 'all', + acceptThreshold: 0.9, + reviewThreshold: 0.5, + exportLocalRelationshipFeedbackLabels, + }); + + expect(exportLocalRelationshipFeedbackLabels).toHaveBeenCalledWith(project, { + connectionId: 'warehouse', + decision: 'all', + }); + expect(report.thresholds).toEqual({ accept: 0.9, review: 0.5 }); + expect(report.warnings).toEqual([ + { path: 'raw-sources/broken/live-database/sync/enrichment/relationship-review-decisions.json', message: 'Unexpected token' }, + ]); + }); +}); diff --git a/packages/context/src/scan/relationship-feedback-calibration.ts b/packages/context/src/scan/relationship-feedback-calibration.ts new file mode 100644 index 00000000..025d8b57 --- /dev/null +++ b/packages/context/src/scan/relationship-feedback-calibration.ts @@ -0,0 +1,300 @@ +import type { KloLocalProject } from '../project/index.js'; +import { + exportLocalRelationshipFeedbackLabels, + type ExportLocalRelationshipFeedbackLabelsInput, + type ExportLocalRelationshipFeedbackLabelsResult, + type KloRelationshipFeedbackExportWarning, + type KloRelationshipFeedbackLabel, +} from './relationship-feedback-export.js'; +import type { KloResolvedRelationshipStatus } from './relationship-graph-resolver.js'; +import type { KloRelationshipReviewDecisionValue } from './relationship-review-decisions.js'; + +const DEFAULT_ACCEPT_THRESHOLD = 0.85; +const DEFAULT_REVIEW_THRESHOLD = 0.55; + +type CalibrationPredictedStatus = KloResolvedRelationshipStatus | 'unscored'; + +interface Thresholds { + accept: number; + review: number; +} + +export interface BuildKloRelationshipFeedbackCalibrationReportInput { + acceptThreshold?: number; + reviewThreshold?: number; +} + +export interface CalibrateLocalRelationshipFeedbackLabelsInput + extends ExportLocalRelationshipFeedbackLabelsInput, + BuildKloRelationshipFeedbackCalibrationReportInput { + exportLocalRelationshipFeedbackLabels?: typeof exportLocalRelationshipFeedbackLabels; +} + +export interface KloRelationshipFeedbackCalibrationBucket { + label: string; + minInclusive: number; + maxInclusive: number; + total: number; + accepted: number; + rejected: number; + acceptanceRate: number | null; +} + +export interface KloRelationshipFeedbackCalibrationLabel { + candidateId: string; + decision: KloRelationshipReviewDecisionValue; + previousStatus: KloRelationshipFeedbackLabel['previousStatus']; + predictedStatus: CalibrationPredictedStatus; + bucket: string; + score: number | null; + pkScore: number | null; + fkScore: number | null; + connectionId: string; + runId: string; + fromTable: string; + fromColumns: string[]; + toTable: string; + toColumns: string[]; + source: string; + reasons: string[]; +} + +export interface KloRelationshipFeedbackCalibrationReport { + generatedAt: string; + filters: ExportLocalRelationshipFeedbackLabelsResult['filters']; + thresholds: Thresholds; + summary: { + total: number; + scored: number; + unscored: number; + acceptedLabels: number; + rejectedLabels: number; + predictedAccepted: number; + predictedReview: number; + predictedRejected: number; + acceptedBandPrecision: number | null; + rejectedBandPrecision: number | null; + reviewBandAcceptedRate: number | null; + meanAcceptedScore: number | null; + meanRejectedScore: number | null; + }; + buckets: KloRelationshipFeedbackCalibrationBucket[]; + labels: KloRelationshipFeedbackCalibrationLabel[]; + warnings: KloRelationshipFeedbackExportWarning[]; +} + +const BUCKETS = [ + { label: '0.00-0.24', minInclusive: 0, maxInclusive: 0.249999 }, + { label: '0.25-0.49', minInclusive: 0.25, maxInclusive: 0.499999 }, + { label: '0.50-0.74', minInclusive: 0.5, maxInclusive: 0.749999 }, + { label: '0.75-1.00', minInclusive: 0.75, maxInclusive: 1 }, +] as const; + +function thresholds(input: BuildKloRelationshipFeedbackCalibrationReportInput): Thresholds { + return { + accept: input.acceptThreshold ?? DEFAULT_ACCEPT_THRESHOLD, + review: input.reviewThreshold ?? DEFAULT_REVIEW_THRESHOLD, + }; +} + +function roundMetric(value: number): number { + return Math.round(value * 1000) / 1000; +} + +function ratio(numerator: number, denominator: number): number | null { + return denominator === 0 ? null : roundMetric(numerator / denominator); +} + +function mean(values: readonly number[]): number | null { + if (values.length === 0) { + return null; + } + return roundMetric(values.reduce((sum, value) => sum + value, 0) / values.length); +} + +function scoreBucket(score: number | null): string { + if (score === null) { + return 'unscored'; + } + return BUCKETS.find((bucket) => score >= bucket.minInclusive && score <= bucket.maxInclusive)?.label ?? 'unscored'; +} + +function predictedStatus(score: number | null, currentThresholds: Thresholds): CalibrationPredictedStatus { + if (score === null) { + return 'unscored'; + } + if (score >= currentThresholds.accept) { + return 'accepted'; + } + if (score >= currentThresholds.review) { + return 'review'; + } + return 'rejected'; +} + +function calibrationLabel( + label: KloRelationshipFeedbackLabel, + currentThresholds: Thresholds, +): KloRelationshipFeedbackCalibrationLabel { + return { + candidateId: label.candidateId, + decision: label.decision, + previousStatus: label.previousStatus, + predictedStatus: predictedStatus(label.score, currentThresholds), + bucket: scoreBucket(label.score), + score: label.score, + pkScore: label.pkScore, + fkScore: label.fkScore, + connectionId: label.connectionId, + runId: label.runId, + fromTable: label.fromTable, + fromColumns: [...label.fromColumns], + toTable: label.toTable, + toColumns: [...label.toColumns], + source: label.source, + reasons: [...label.reasons], + }; +} + +function summarize( + labels: readonly KloRelationshipFeedbackCalibrationLabel[], +): KloRelationshipFeedbackCalibrationReport['summary'] { + const scored = labels.filter((label) => label.score !== null); + const predictedAccepted = scored.filter((label) => label.predictedStatus === 'accepted'); + const predictedReview = scored.filter((label) => label.predictedStatus === 'review'); + const predictedRejected = scored.filter((label) => label.predictedStatus === 'rejected'); + const acceptedLabels = labels.filter((label) => label.decision === 'accepted'); + const rejectedLabels = labels.filter((label) => label.decision === 'rejected'); + + return { + total: labels.length, + scored: scored.length, + unscored: labels.length - scored.length, + acceptedLabels: acceptedLabels.length, + rejectedLabels: rejectedLabels.length, + predictedAccepted: predictedAccepted.length, + predictedReview: predictedReview.length, + predictedRejected: predictedRejected.length, + acceptedBandPrecision: ratio( + predictedAccepted.filter((label) => label.decision === 'accepted').length, + predictedAccepted.length, + ), + rejectedBandPrecision: ratio( + predictedRejected.filter((label) => label.decision === 'rejected').length, + predictedRejected.length, + ), + reviewBandAcceptedRate: ratio( + predictedReview.filter((label) => label.decision === 'accepted').length, + predictedReview.length, + ), + meanAcceptedScore: mean(acceptedLabels.map((label) => label.score).filter((score): score is number => score !== null)), + meanRejectedScore: mean(rejectedLabels.map((label) => label.score).filter((score): score is number => score !== null)), + }; +} + +function buildBuckets( + labels: readonly KloRelationshipFeedbackCalibrationLabel[], +): KloRelationshipFeedbackCalibrationBucket[] { + return BUCKETS.map((bucket) => { + const bucketLabels = labels.filter((label) => label.bucket === bucket.label); + const accepted = bucketLabels.filter((label) => label.decision === 'accepted').length; + const rejected = bucketLabels.filter((label) => label.decision === 'rejected').length; + return { + label: bucket.label, + minInclusive: bucket.minInclusive, + maxInclusive: + bucket.maxInclusive === 0.249999 + ? 0.24 + : bucket.maxInclusive === 0.499999 + ? 0.49 + : bucket.maxInclusive === 0.749999 + ? 0.74 + : 1, + total: bucketLabels.length, + accepted, + rejected, + acceptanceRate: ratio(accepted, bucketLabels.length), + }; + }); +} + +export function buildKloRelationshipFeedbackCalibrationReport( + feedback: ExportLocalRelationshipFeedbackLabelsResult, + input: BuildKloRelationshipFeedbackCalibrationReportInput = {}, +): KloRelationshipFeedbackCalibrationReport { + const currentThresholds = thresholds(input); + const labels = feedback.labels + .map((label) => calibrationLabel(label, currentThresholds)) + .sort( + (left, right) => + left.connectionId.localeCompare(right.connectionId) || + left.runId.localeCompare(right.runId) || + left.candidateId.localeCompare(right.candidateId), + ); + + return { + generatedAt: feedback.generatedAt, + filters: feedback.filters, + thresholds: currentThresholds, + summary: summarize(labels), + buckets: buildBuckets(labels), + labels, + warnings: [...feedback.warnings], + }; +} + +export async function calibrateLocalRelationshipFeedbackLabels( + project: KloLocalProject, + input: CalibrateLocalRelationshipFeedbackLabelsInput = {}, +): Promise { + const exporter = input.exportLocalRelationshipFeedbackLabels ?? exportLocalRelationshipFeedbackLabels; + const feedback = await exporter(project, { + connectionId: input.connectionId, + decision: input.decision, + }); + return buildKloRelationshipFeedbackCalibrationReport(feedback, input); +} + +function formatMetric(value: number | null): string { + return value === null ? 'n/a' : value.toFixed(3); +} + +export function formatKloRelationshipFeedbackCalibrationMarkdown( + report: KloRelationshipFeedbackCalibrationReport, +): string { + const lines = [ + 'KLO relationship feedback calibration', + `Generated: ${report.generatedAt}`, + `Filter connection: ${report.filters.connectionId ?? 'all'}`, + `Filter decision: ${report.filters.decision}`, + `Thresholds: accept=${report.thresholds.accept.toFixed(2)} review=${report.thresholds.review.toFixed(2)}`, + `Total labels: ${report.summary.total}`, + `Scored labels: ${report.summary.scored}`, + `Unscored labels: ${report.summary.unscored}`, + `Accepted labels: ${report.summary.acceptedLabels}`, + `Rejected labels: ${report.summary.rejectedLabels}`, + `Predicted accepted: ${report.summary.predictedAccepted}`, + `Predicted review: ${report.summary.predictedReview}`, + `Predicted rejected: ${report.summary.predictedRejected}`, + `Accepted-band precision: ${formatMetric(report.summary.acceptedBandPrecision)}`, + `Rejected-band precision: ${formatMetric(report.summary.rejectedBandPrecision)}`, + `Review-band accepted rate: ${formatMetric(report.summary.reviewBandAcceptedRate)}`, + `Mean accepted score: ${formatMetric(report.summary.meanAcceptedScore)}`, + `Mean rejected score: ${formatMetric(report.summary.meanRejectedScore)}`, + '', + 'Score buckets', + ...report.buckets.map( + (bucket) => + ` - ${bucket.label}: total=${bucket.total} accepted=${bucket.accepted} rejected=${bucket.rejected} acceptanceRate=${formatMetric(bucket.acceptanceRate)}`, + ), + ]; + + if (report.warnings.length > 0) { + lines.push('', 'Warnings'); + for (const warning of report.warnings.slice(0, 5)) { + lines.push(` - ${warning.path}: ${warning.message}`); + } + } + + return `${lines.join('\n')}\n`; +} diff --git a/packages/context/src/scan/relationship-feedback-export.test.ts b/packages/context/src/scan/relationship-feedback-export.test.ts new file mode 100644 index 00000000..93a130c8 --- /dev/null +++ b/packages/context/src/scan/relationship-feedback-export.test.ts @@ -0,0 +1,270 @@ +import type { KloLocalProject } from '../project/index.js'; +import { describe, expect, it, vi } from 'vitest'; +import { + exportLocalRelationshipFeedbackLabels, + formatKloRelationshipFeedbackLabelsJsonl, +} from './relationship-feedback-export.js'; +import type { KloRelationshipReviewDecisionArtifact } from './relationship-review-decisions.js'; + +function projectWithFiles(files: Record): KloLocalProject { + const contentByPath = new Map( + Object.entries(files).map(([path, value]) => [ + path, + typeof value === 'string' ? value : `${JSON.stringify(value, null, 2)}\n`, + ]), + ); + return { + projectDir: '/tmp/klo-project', + fileStore: { + async listFiles(path: string) { + return { + files: [...contentByPath.keys()].filter((file) => file.startsWith(`${path}/`)).sort(), + }; + }, + async readFile(path: string) { + const content = contentByPath.get(path); + if (!content) { + throw new Error(`missing file ${path}`); + } + return { content }; + }, + writeFile: vi.fn(), + deleteFile: vi.fn(), + getFileHistory: vi.fn(), + forWorktree: vi.fn(), + }, + } as unknown as KloLocalProject; +} + +function decisionsArtifact(input: { + connectionId: string; + runId: string; + syncId: string; + decisions: KloRelationshipReviewDecisionArtifact['decisions']; +}): KloRelationshipReviewDecisionArtifact { + return { + connectionId: input.connectionId, + runId: input.runId, + syncId: input.syncId, + generatedAt: '2026-05-07T12:00:00.000Z', + decisions: input.decisions, + }; +} + +const acceptedOrderCustomer = { + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted' as const, + previousStatus: 'review' as const, + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decidedAt: '2026-05-07T12:00:00.000Z', + reviewer: 'Andrey', + note: 'Confirmed in warehouse docs', + from: { + tableId: 'orders', + columnIds: ['orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'customers', + columnIds: ['customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one' as const, + source: 'deterministic_name', + score: 0.62, + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + reasons: ['fk_score_review'], +}; + +const rejectedOrderNote = { + candidateId: 'orders:orders.note_id->notes:notes.id', + decision: 'rejected' as const, + previousStatus: 'rejected' as const, + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decidedAt: '2026-05-07T12:05:00.000Z', + reviewer: 'Andrey', + note: null, + from: { + tableId: 'orders', + columnIds: ['orders.note_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['note_id'], + }, + to: { + tableId: 'notes', + columnIds: ['notes.id'], + table: { catalog: null, db: 'public', name: 'notes' }, + columns: ['id'], + }, + relationshipType: 'many_to_one' as const, + source: 'deterministic_name', + score: 0.2, + confidence: 0.2, + pkScore: 0.4, + fkScore: 0.2, + reasons: ['low_source_coverage'], +}; + +const acceptedInvoiceAccount = { + candidateId: 'invoices:invoices.account_id->accounts:accounts.id', + decision: 'accepted' as const, + previousStatus: 'accepted' as const, + connectionId: 'billing', + runId: 'scan-run-b', + syncId: 'sync-b', + decidedAt: '2026-05-07T12:10:00.000Z', + reviewer: 'klo', + note: null, + from: { + tableId: 'invoices', + columnIds: ['invoices.account_id'], + table: { catalog: null, db: 'billing', name: 'invoices' }, + columns: ['account_id'], + }, + to: { + tableId: 'accounts', + columnIds: ['accounts.id'], + table: { catalog: null, db: 'billing', name: 'accounts' }, + columns: ['id'], + }, + relationshipType: 'many_to_one' as const, + source: 'formal_metadata', + score: 1, + confidence: 1, + pkScore: 1, + fkScore: 1, + reasons: ['formal_metadata_relationship'], +}; + +describe('relationship feedback export', () => { + it('exports stable labels from all relationship review decision artifacts', async () => { + const project = projectWithFiles({ + 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json': decisionsArtifact({ + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decisions: [rejectedOrderNote, acceptedOrderCustomer], + }), + 'raw-sources/billing/live-database/sync-b/enrichment/relationship-review-decisions.json': decisionsArtifact({ + connectionId: 'billing', + runId: 'scan-run-b', + syncId: 'sync-b', + decisions: [acceptedInvoiceAccount], + }), + 'raw-sources/warehouse/live-database/sync-a/enrichment/relationships.json': { accepted: [], review: [], rejected: [] }, + }); + + const result = await exportLocalRelationshipFeedbackLabels(project, { + now: () => new Date('2026-05-07T13:00:00.000Z'), + }); + + expect(result.summary).toEqual({ + total: 3, + accepted: 2, + rejected: 1, + connections: 2, + runs: 2, + }); + expect(result.labels.map((label) => label.candidateId)).toEqual([ + 'invoices:invoices.account_id->accounts:accounts.id', + 'orders:orders.customer_id->customers:customers.id', + 'orders:orders.note_id->notes:notes.id', + ]); + expect(result.labels[0]).toMatchObject({ + schemaVersion: 1, + decision: 'accepted', + connectionId: 'billing', + source: 'formal_metadata', + fromTable: 'billing.invoices', + fromColumns: ['account_id'], + toTable: 'billing.accounts', + toColumns: ['id'], + artifactPath: 'raw-sources/billing/live-database/sync-b/enrichment/relationship-review-decisions.json', + }); + expect(result.warnings).toEqual([]); + }); + + it('filters labels by connection and decision', async () => { + const project = projectWithFiles({ + 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json': decisionsArtifact({ + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decisions: [rejectedOrderNote, acceptedOrderCustomer], + }), + 'raw-sources/billing/live-database/sync-b/enrichment/relationship-review-decisions.json': decisionsArtifact({ + connectionId: 'billing', + runId: 'scan-run-b', + syncId: 'sync-b', + decisions: [acceptedInvoiceAccount], + }), + }); + + const result = await exportLocalRelationshipFeedbackLabels(project, { + connectionId: 'warehouse', + decision: 'rejected', + now: () => new Date('2026-05-07T13:00:00.000Z'), + }); + + expect(result.summary).toMatchObject({ total: 1, accepted: 0, rejected: 1 }); + expect(result.labels).toHaveLength(1); + expect(result.labels[0]?.candidateId).toBe('orders:orders.note_id->notes:notes.id'); + }); + + it('formats JSONL with one stable label object per line', async () => { + const project = projectWithFiles({ + 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json': decisionsArtifact({ + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decisions: [acceptedOrderCustomer], + }), + }); + const result = await exportLocalRelationshipFeedbackLabels(project, { + now: () => new Date('2026-05-07T13:00:00.000Z'), + }); + + const lines = formatKloRelationshipFeedbackLabelsJsonl(result).trim().split('\n').map((line) => JSON.parse(line)); + + expect(lines).toHaveLength(1); + expect(lines[0]).toMatchObject({ + schemaVersion: 1, + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + relationshipType: 'many_to_one', + }); + }); + + it('records parse warnings and continues exporting readable decision artifacts', async () => { + const project = projectWithFiles({ + 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json': decisionsArtifact({ + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decisions: [acceptedOrderCustomer], + }), + 'raw-sources/broken/live-database/sync-b/enrichment/relationship-review-decisions.json': '{not-json', + }); + + const result = await exportLocalRelationshipFeedbackLabels(project, { + now: () => new Date('2026-05-07T13:00:00.000Z'), + }); + + expect(result.summary.total).toBe(1); + expect(result.warnings).toEqual([ + { + path: 'raw-sources/broken/live-database/sync-b/enrichment/relationship-review-decisions.json', + message: expect.any(String), + }, + ]); + expect(result.warnings[0]?.message.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/context/src/scan/relationship-feedback-export.ts b/packages/context/src/scan/relationship-feedback-export.ts new file mode 100644 index 00000000..f2b191a5 --- /dev/null +++ b/packages/context/src/scan/relationship-feedback-export.ts @@ -0,0 +1,179 @@ +import type { KloLocalProject } from '../project/index.js'; +import type { + KloRelationshipReviewDecisionArtifact, + KloRelationshipReviewDecisionEntry, + KloRelationshipReviewDecisionValue, +} from './relationship-review-decisions.js'; + +const DECISION_ARTIFACT_SUFFIX = '/enrichment/relationship-review-decisions.json'; +const FEEDBACK_SCHEMA_VERSION = 1; + +export type KloRelationshipFeedbackDecisionFilter = KloRelationshipReviewDecisionValue | 'all'; + +export interface ExportLocalRelationshipFeedbackLabelsInput { + connectionId?: string | null; + decision?: KloRelationshipFeedbackDecisionFilter; + now?: () => Date; +} + +export interface KloRelationshipFeedbackLabel { + schemaVersion: 1; + candidateId: string; + decision: KloRelationshipReviewDecisionValue; + previousStatus: KloRelationshipReviewDecisionEntry['previousStatus']; + connectionId: string; + runId: string; + syncId: string; + decidedAt: string; + reviewer: string; + note: string | null; + relationshipType: KloRelationshipReviewDecisionEntry['relationshipType']; + source: string; + score: number | null; + confidence: number; + pkScore: number | null; + fkScore: number | null; + fromTable: string; + fromColumns: string[]; + toTable: string; + toColumns: string[]; + reasons: string[]; + artifactPath: string; +} + +export interface KloRelationshipFeedbackExportWarning { + path: string; + message: string; +} + +export interface ExportLocalRelationshipFeedbackLabelsResult { + generatedAt: string; + filters: { + connectionId: string | null; + decision: KloRelationshipFeedbackDecisionFilter; + }; + summary: { + total: number; + accepted: number; + rejected: number; + connections: number; + runs: number; + }; + labels: KloRelationshipFeedbackLabel[]; + warnings: KloRelationshipFeedbackExportWarning[]; +} + +function qualifiedTableName(entry: KloRelationshipReviewDecisionEntry, side: 'from' | 'to'): string { + const table = entry[side].table; + return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.'); +} + +function labelFromDecision(entry: KloRelationshipReviewDecisionEntry, artifactPath: string): KloRelationshipFeedbackLabel { + return { + schemaVersion: FEEDBACK_SCHEMA_VERSION, + candidateId: entry.candidateId, + decision: entry.decision, + previousStatus: entry.previousStatus, + connectionId: entry.connectionId, + runId: entry.runId, + syncId: entry.syncId, + decidedAt: entry.decidedAt, + reviewer: entry.reviewer, + note: entry.note, + relationshipType: entry.relationshipType, + source: entry.source, + score: entry.score, + confidence: entry.confidence, + pkScore: entry.pkScore, + fkScore: entry.fkScore, + fromTable: qualifiedTableName(entry, 'from'), + fromColumns: [...entry.from.columns], + toTable: qualifiedTableName(entry, 'to'), + toColumns: [...entry.to.columns], + reasons: [...entry.reasons], + artifactPath, + }; +} + +function sortLabels(labels: KloRelationshipFeedbackLabel[]): KloRelationshipFeedbackLabel[] { + return [...labels].sort((left, right) => { + return ( + left.connectionId.localeCompare(right.connectionId) || + left.runId.localeCompare(right.runId) || + left.candidateId.localeCompare(right.candidateId) || + left.decidedAt.localeCompare(right.decidedAt) + ); + }); +} + +function passesFilters( + label: KloRelationshipFeedbackLabel, + filters: { connectionId: string | null; decision: KloRelationshipFeedbackDecisionFilter }, +): boolean { + if (filters.connectionId && label.connectionId !== filters.connectionId) { + return false; + } + return filters.decision === 'all' || label.decision === filters.decision; +} + +function messageFromUnknownError(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +async function readDecisionLabels( + project: KloLocalProject, + artifactPath: string, +): Promise { + const raw = await project.fileStore.readFile(artifactPath); + const parsed = JSON.parse(raw.content) as KloRelationshipReviewDecisionArtifact; + const decisions = Array.isArray(parsed.decisions) ? parsed.decisions : []; + return decisions.map((entry) => labelFromDecision(entry, artifactPath)); +} + +function summarize(labels: KloRelationshipFeedbackLabel[]): ExportLocalRelationshipFeedbackLabelsResult['summary'] { + return { + total: labels.length, + accepted: labels.filter((label) => label.decision === 'accepted').length, + rejected: labels.filter((label) => label.decision === 'rejected').length, + connections: new Set(labels.map((label) => label.connectionId)).size, + runs: new Set(labels.map((label) => `${label.connectionId}:${label.runId}`)).size, + }; +} + +export async function exportLocalRelationshipFeedbackLabels( + project: KloLocalProject, + input: ExportLocalRelationshipFeedbackLabelsInput = {}, +): Promise { + const filters = { + connectionId: input.connectionId ?? null, + decision: input.decision ?? 'all', + }; + const listed = await project.fileStore.listFiles('raw-sources'); + const artifactPaths = listed.files.filter((path) => path.endsWith(DECISION_ARTIFACT_SUFFIX)).sort(); + const labels: KloRelationshipFeedbackLabel[] = []; + const warnings: KloRelationshipFeedbackExportWarning[] = []; + + for (const artifactPath of artifactPaths) { + try { + labels.push(...(await readDecisionLabels(project, artifactPath))); + } catch (error) { + warnings.push({ path: artifactPath, message: messageFromUnknownError(error) }); + } + } + + const filtered = sortLabels(labels.filter((label) => passesFilters(label, filters))); + return { + generatedAt: (input.now?.() ?? new Date()).toISOString(), + filters, + summary: summarize(filtered), + labels: filtered, + warnings, + }; +} + +export function formatKloRelationshipFeedbackLabelsJsonl(result: ExportLocalRelationshipFeedbackLabelsResult): string { + if (result.labels.length === 0) { + return ''; + } + return `${result.labels.map((label) => JSON.stringify(label)).join('\n')}\n`; +} diff --git a/packages/context/src/scan/relationship-formal-metadata.test.ts b/packages/context/src/scan/relationship-formal-metadata.test.ts new file mode 100644 index 00000000..231ec122 --- /dev/null +++ b/packages/context/src/scan/relationship-formal-metadata.test.ts @@ -0,0 +1,134 @@ +import { describe, expect, it } from 'vitest'; +import type { KloEnrichedRelationship, KloEnrichedSchema } from './enrichment-types.js'; +import { collectKloFormalMetadataRelationships } from './relationship-formal-metadata.js'; + +function schema(relationships: KloEnrichedRelationship[]): KloEnrichedSchema { + return { + connectionId: 'warehouse', + tables: [ + { + id: 'accounts', + ref: { catalog: null, db: null, name: 'accounts' }, + enabled: true, + descriptions: {}, + columns: [ + { + id: 'accounts.id', + tableId: 'accounts', + tableRef: { catalog: null, db: null, name: 'accounts' }, + name: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + { + id: 'orders', + ref: { catalog: null, db: null, name: 'orders' }, + enabled: true, + descriptions: {}, + columns: [ + { + id: 'orders.account_id', + tableId: 'orders', + tableRef: { catalog: null, db: null, name: 'orders' }, + name: 'account_id', + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + ], + relationships, + }; +} + +function formalRelationship(overrides: Partial = {}): KloEnrichedRelationship { + return { + id: 'orders:orders.account_id->accounts:accounts.id', + source: 'formal', + from: { + tableId: 'orders', + columnIds: ['orders.account_id'], + table: { catalog: null, db: null, name: 'orders' }, + columns: ['account_id'], + }, + to: { + tableId: 'accounts', + columnIds: ['accounts.id'], + table: { catalog: null, db: null, name: 'accounts' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.6, + isPrimaryKeyReference: false, + ...overrides, + }; +} + +describe('formal metadata relationship collection', () => { + it('accepts valid formal relationships with ground-truth confidence', () => { + const result = collectKloFormalMetadataRelationships(schema([formalRelationship()])); + + expect(result.accepted).toEqual([ + expect.objectContaining({ + id: 'orders:orders.account_id->accounts:accounts.id', + source: 'formal', + confidence: 1, + isPrimaryKeyReference: true, + }), + ]); + expect(result.skipped).toEqual([]); + expect(result.acceptedIds).toEqual(new Set(['orders:orders.account_id->accounts:accounts.id'])); + }); + + it('skips duplicate and invalid formal relationships with reasons', () => { + const result = collectKloFormalMetadataRelationships( + schema([ + formalRelationship(), + formalRelationship(), + formalRelationship({ + id: 'orders:orders.missing_account_id->accounts:accounts.id', + from: { + tableId: 'orders', + columnIds: ['orders.missing_account_id'], + table: { catalog: null, db: null, name: 'orders' }, + columns: ['missing_account_id'], + }, + }), + formalRelationship({ + id: 'manual-edge', + source: 'manual', + }), + ]), + ); + + expect(result.accepted).toHaveLength(1); + expect(result.skipped).toEqual([ + { + relationshipId: 'orders:orders.account_id->accounts:accounts.id', + reason: 'formal_metadata_duplicate', + }, + { + relationshipId: 'orders:orders.missing_account_id->accounts:accounts.id', + reason: 'formal_metadata_endpoint_not_found', + }, + ]); + }); +}); diff --git a/packages/context/src/scan/relationship-formal-metadata.ts b/packages/context/src/scan/relationship-formal-metadata.ts new file mode 100644 index 00000000..33866fa1 --- /dev/null +++ b/packages/context/src/scan/relationship-formal-metadata.ts @@ -0,0 +1,61 @@ +import type { KloEnrichedRelationship, KloEnrichedSchema, KloSkippedRelationship } from './enrichment-types.js'; + +export interface KloFormalMetadataRelationshipCollection { + accepted: KloEnrichedRelationship[]; + skipped: KloSkippedRelationship[]; + acceptedIds: Set; +} + +function relationshipEndpointExists(schema: KloEnrichedSchema, relationship: KloEnrichedRelationship): boolean { + const fromTable = schema.tables.find((table) => table.id === relationship.from.tableId && table.enabled); + const toTable = schema.tables.find((table) => table.id === relationship.to.tableId && table.enabled); + const fromColumn = fromTable?.columns.some( + (column) => relationship.from.columnIds.includes(column.id) && relationship.from.columns.includes(column.name), + ); + const toColumn = toTable?.columns.some( + (column) => relationship.to.columnIds.includes(column.id) && relationship.to.columns.includes(column.name), + ); + return Boolean(fromTable && toTable && fromColumn && toColumn); +} + +export function collectKloFormalMetadataRelationships( + schema: KloEnrichedSchema, +): KloFormalMetadataRelationshipCollection { + const accepted: KloEnrichedRelationship[] = []; + const skipped: KloSkippedRelationship[] = []; + const acceptedIds = new Set(); + + for (const relationship of schema.relationships) { + if (relationship.source !== 'formal') { + continue; + } + if (acceptedIds.has(relationship.id)) { + skipped.push({ + relationshipId: relationship.id, + reason: 'formal_metadata_duplicate', + }); + continue; + } + if (!relationshipEndpointExists(schema, relationship)) { + skipped.push({ + relationshipId: relationship.id, + reason: 'formal_metadata_endpoint_not_found', + }); + continue; + } + + acceptedIds.add(relationship.id); + accepted.push({ + ...relationship, + source: 'formal', + confidence: 1, + isPrimaryKeyReference: true, + }); + } + + return { + accepted: accepted.sort((left, right) => left.id.localeCompare(right.id)), + skipped, + acceptedIds, + }; +} diff --git a/packages/context/src/scan/relationship-graph-resolver.test.ts b/packages/context/src/scan/relationship-graph-resolver.test.ts new file mode 100644 index 00000000..f0759deb --- /dev/null +++ b/packages/context/src/scan/relationship-graph-resolver.test.ts @@ -0,0 +1,649 @@ +import { describe, expect, it } from 'vitest'; +import type { + KloEnrichedColumn, + KloEnrichedSchema, + KloEnrichedTable, + KloRelationshipEndpoint, +} from './enrichment-types.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import type { KloValidatedRelationshipDiscoveryCandidate } from './relationship-validation.js'; +import { resolveKloRelationshipGraph } from './relationship-graph-resolver.js'; + +function column(tableId: string, name: string, overrides: Partial = {}): KloEnrichedColumn { + const tableRef = overrides.tableRef ?? { catalog: null, db: null, name: tableId }; + return { + id: `${tableId}.${name}`, + tableId, + tableRef, + name, + nativeType: overrides.nativeType ?? 'INTEGER', + normalizedType: overrides.normalizedType ?? 'integer', + dimensionType: overrides.dimensionType ?? 'number', + nullable: overrides.nullable ?? true, + primaryKey: overrides.primaryKey ?? false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + ...overrides, + }; +} + +function table(name: string, columns: KloEnrichedColumn[]): KloEnrichedTable { + const ref = { catalog: null, db: null, name }; + return { + id: name, + ref, + enabled: true, + descriptions: {}, + columns: columns.map((item) => ({ ...item, tableId: name, tableRef: ref })), + }; +} + +function schema(overrides: { accountsPrimaryKey?: boolean } = {}): KloEnrichedSchema { + return { + connectionId: 'warehouse', + tables: [ + table('accounts', [ + column('accounts', 'id', { nullable: false, primaryKey: overrides.accountsPrimaryKey ?? false }), + column('accounts', 'name', { nativeType: 'TEXT', normalizedType: 'text', dimensionType: 'string' }), + ]), + table('account_archive', [column('account_archive', 'id', { nullable: false })]), + table('users', [ + column('users', 'id', { nullable: false }), + column('users', 'account_id', { nullable: false }), + ]), + ], + relationships: [], + }; +} + +function endpoint(tableName: string, columnName: string): KloRelationshipEndpoint { + return { + tableId: tableName, + columnIds: [`${tableName}.${columnName}`], + table: { catalog: null, db: null, name: tableName }, + columns: [columnName], + }; +} + +function profiles(): KloRelationshipProfileArtifact { + return { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: true, + queryCount: 0, + tables: [ + { table: { catalog: null, db: null, name: 'accounts' }, rowCount: 3 }, + { table: { catalog: null, db: null, name: 'account_archive' }, rowCount: 3 }, + { table: { catalog: null, db: null, name: 'users' }, rowCount: 3 }, + ], + columns: { + 'accounts.id': { + table: { catalog: null, db: null, name: 'accounts' }, + column: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2', '3'], + minTextLength: 1, + maxTextLength: 1, + }, + 'account_archive.id': { + table: { catalog: null, db: null, name: 'account_archive' }, + column: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2', '3'], + minTextLength: 1, + maxTextLength: 1, + }, + 'users.account_id': { + table: { catalog: null, db: null, name: 'users' }, + column: 'account_id', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2', '3'], + minTextLength: 1, + maxTextLength: 1, + }, + }, + warnings: [], + }; +} + +function validatedCandidate( + overrides: Partial = {}, +): KloValidatedRelationshipDiscoveryCandidate { + const from = overrides.from ?? endpoint('users', 'account_id'); + const to = overrides.to ?? endpoint('accounts', 'id'); + return { + id: `${from.tableId}:(${from.columnIds.join(',')})->${to.tableId}:(${to.columnIds.join(',')})`, + from, + to, + relationshipType: 'many_to_one', + confidence: overrides.confidence ?? 0.95, + source: overrides.source ?? 'normalized_table_match', + status: overrides.status ?? 'accepted', + score: overrides.score ?? 0.96, + evidence: { + sourceColumnBase: 'account', + targetTableBase: to.table.name, + targetColumnBase: to.columns[0] ?? '', + targetKeyScore: 0.92, + nameScore: 0.92, + reasons: ['foreign_key_suffix', 'normalized_table_name', 'target_key_like'], + ...overrides.evidence, + }, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + sourceNullRate: 0, + targetNullRate: 0, + childDistinct: 3, + parentDistinct: 3, + overlap: 3, + checkedValues: 3, + reasons: ['validation_passed'], + ...overrides.validation, + }, + ...overrides, + }; +} + +describe('relationship graph resolver', () => { + it('promotes validated relationship discovery references to accepted relationships and inferred PKs', () => { + const result = resolveKloRelationshipGraph({ + schema: schema(), + profiles: profiles(), + candidates: [validatedCandidate()], + }); + + expect(result.pks).toContainEqual({ + table: 'accounts', + columns: ['id'], + pkScore: expect.any(Number), + status: 'accepted', + incomingCandidateCount: 1, + evidence: { + declaredPrimaryKey: false, + targetUniqueness: 1, + incomingAcceptedCount: 1, + incomingReviewCount: 0, + reasons: expect.arrayContaining(['unique_target_column', 'incoming_validated_reference']), + }, + }); + expect(result.pks.find((pk) => pk.table === 'accounts')?.pkScore).toBeGreaterThanOrEqual(0.85); + expect(result.relationships).toHaveLength(1); + expect(result.relationships[0]).toMatchObject({ + from: { table: { name: 'users' }, columns: ['account_id'] }, + to: { table: { name: 'accounts' }, columns: ['id'] }, + status: 'accepted', + pkScore: expect.any(Number), + fkScore: expect.any(Number), + graph: { + reasons: expect.arrayContaining(['target_pk_score_passed', 'fk_score_passed']), + }, + }); + expect(result.relationships[0]?.fkScore).toBeGreaterThanOrEqual(0.85); + }); + + it('keeps validation-unavailable candidates in review even when name evidence is strong', () => { + const result = resolveKloRelationshipGraph({ + schema: schema(), + profiles: { ...profiles(), sqlAvailable: false, columns: {}, warnings: ['read_only_sql_unavailable'] }, + candidates: [ + validatedCandidate({ + status: 'review', + score: 0.57, + validation: { + targetUniqueness: 0, + sourceCoverage: 0, + violationCount: 0, + violationRatio: 1, + sourceNullRate: 0, + targetNullRate: 0, + childDistinct: 0, + parentDistinct: 0, + overlap: 0, + checkedValues: 0, + reasons: ['validation_unavailable'], + }, + }), + ], + }); + + expect(result.relationships).toHaveLength(1); + expect(result.relationships[0]).toMatchObject({ + status: 'review', + graph: { + reasons: expect.arrayContaining(['validation_unavailable_review_only']), + }, + }); + expect(result.relationships[0]?.fkScore).toBeGreaterThanOrEqual(0.55); + }); + + it('accepts at most one target per source column and rejects the lower-scored conflict loser', () => { + const winner = validatedCandidate({ confidence: 0.95, score: 0.96 }); + const loser = validatedCandidate({ + from: endpoint('users', 'account_id'), + to: endpoint('account_archive', 'id'), + confidence: 0.85, + score: 0.9, + evidence: { + sourceColumnBase: 'account', + targetTableBase: 'account_archive', + targetColumnBase: 'id', + targetKeyScore: 0.92, + nameScore: 0.78, + reasons: ['foreign_key_suffix', 'inflection', 'target_key_like'], + }, + }); + + const result = resolveKloRelationshipGraph({ + schema: schema(), + profiles: profiles(), + candidates: [loser, winner], + }); + + expect(result.relationships.map((relationship) => relationship.status)).toEqual(['accepted', 'rejected']); + expect(result.relationships[0]?.to.table.name).toBe('accounts'); + expect(result.relationships[1]).toMatchObject({ + to: { table: { name: 'account_archive' }, columns: ['id'] }, + status: 'rejected', + graph: { + reasons: expect.arrayContaining(['conflict_lost']), + }, + }); + }); + + it('preserves declared primary keys as accepted even without incoming candidates', () => { + const result = resolveKloRelationshipGraph({ + schema: schema({ accountsPrimaryKey: true }), + profiles: profiles(), + candidates: [], + }); + + expect(result.relationships).toEqual([]); + expect(result.pks).toContainEqual({ + table: 'accounts', + columns: ['id'], + pkScore: 1, + status: 'accepted', + incomingCandidateCount: 0, + evidence: { + declaredPrimaryKey: true, + targetUniqueness: 1, + incomingAcceptedCount: 0, + incomingReviewCount: 0, + reasons: ['declared_primary_key'], + }, + }); + }); + + it('infers profile-only key-like columns without incoming relationship candidates', () => { + const baseSchema = schema(); + const invoices = table('invoices', [ + column('invoices', 'id', { nullable: false }), + column('invoices', 'invoice_number', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + }), + column('invoices', 'amount', { + nativeType: 'INTEGER', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + }), + ]); + const baseProfiles = profiles(); + const result = resolveKloRelationshipGraph({ + schema: { ...baseSchema, tables: [...baseSchema.tables, invoices] }, + profiles: { + ...baseProfiles, + tables: [...baseProfiles.tables, { table: invoices.ref, rowCount: 3 }], + columns: { + ...baseProfiles.columns, + 'invoices.id': { + table: invoices.ref, + column: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2', '3'], + minTextLength: 1, + maxTextLength: 1, + }, + 'invoices.invoice_number': { + table: invoices.ref, + column: 'invoice_number', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['INV-1', 'INV-2', 'INV-3'], + minTextLength: 5, + maxTextLength: 5, + }, + 'invoices.amount': { + table: invoices.ref, + column: 'amount', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 3, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 2 / 3, + nullRate: 0, + sampleValues: ['100', '200'], + minTextLength: 3, + maxTextLength: 3, + }, + }, + }, + candidates: [], + }); + + expect(result.relationships).toEqual([]); + expect(result.pks).toContainEqual({ + table: 'invoices', + columns: ['id'], + pkScore: 1, + status: 'accepted', + incomingCandidateCount: 0, + evidence: { + declaredPrimaryKey: false, + targetUniqueness: 1, + incomingAcceptedCount: 0, + incomingReviewCount: 0, + reasons: expect.arrayContaining([ + 'unique_target_column', + 'profile_key_name', + 'not_null_profile', + 'profile_only_primary_key', + 'no_incoming_references', + ]), + }, + }); + expect(result.pks).toContainEqual( + expect.objectContaining({ + table: 'invoices', + columns: ['invoice_number'], + status: 'review', + evidence: expect.objectContaining({ + reasons: expect.arrayContaining(['profile_only_primary_key', 'weak_name_profile_key']), + }), + }), + ); + expect(result.pks.some((pk) => pk.table === 'invoices' && pk.columns[0] === 'amount')).toBe(false); + }); + + it('pins single-incoming column_suffix_match resolver scores', () => { + const schema = { + connectionId: 'warehouse', + relationships: [], + tables: [ + { + id: 'plans-id', + ref: { catalog: null, db: null, name: 'stg_plans' }, + enabled: true, + descriptions: {}, + columns: [ + { + id: 'plan-code-col', + tableId: 'plans-id', + tableRef: { catalog: null, db: null, name: 'stg_plans' }, + name: 'plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + { + id: 'segments-id', + ref: { catalog: null, db: null, name: 'mart_account_segments' }, + enabled: true, + descriptions: {}, + columns: [ + { + id: 'current-plan-code-col', + tableId: 'segments-id', + tableRef: { catalog: null, db: null, name: 'mart_account_segments' }, + name: 'current_plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + }, + ], + }, + ], + } satisfies KloEnrichedSchema; + const profiles = { + connectionId: 'warehouse', + driver: 'sqlite' as const, + sqlAvailable: true, + queryCount: 0, + tables: [], + warnings: [], + columns: { + 'stg_plans.plan_code': { + table: { catalog: null, db: null, name: 'stg_plans' }, + column: 'plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + }, + }; + const result = resolveKloRelationshipGraph({ + schema, + profiles, + candidates: [ + { + id: 'segments:(current_plan_code)->plans:(plan_code)', + from: { + tableId: 'segments-id', + columnIds: ['current-plan-code-col'], + table: { catalog: null, db: null, name: 'mart_account_segments' }, + columns: ['current_plan_code'], + }, + to: { + tableId: 'plans-id', + columnIds: ['plan-code-col'], + table: { catalog: null, db: null, name: 'stg_plans' }, + columns: ['plan_code'], + }, + relationshipType: 'many_to_one', + confidence: 0.902, + source: 'column_suffix_match', + evidence: { + sourceColumnBase: 'current_plan', + targetTableBase: 'plan', + targetColumnBase: 'plan_code', + targetKeyScore: 0.86, + nameScore: 0.78, + reasons: ['column_suffix_match', 'profile_unique_target'], + }, + status: 'accepted', + score: 0.98, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + sourceNullRate: 0, + targetNullRate: 0, + childDistinct: 4, + parentDistinct: 4, + overlap: 4, + checkedValues: 4, + reasons: ['validation_passed'], + }, + }, + ], + }); + + expect(result.pks).toEqual([ + expect.objectContaining({ + table: 'stg_plans', + columns: ['plan_code'], + pkScore: 0.922, + status: 'accepted', + }), + ]); + expect(result.relationships).toEqual([ + expect.objectContaining({ + source: 'column_suffix_match', + status: 'accepted', + pkScore: 0.922, + fkScore: 0.953, + }), + ]); + }); + + it('keeps strong profile-only primary key evidence when name evidence is weak', () => { + const baseSchema = schema(); + baseSchema.tables.push( + table('events', [ + column('events', 'warehouse_key', { + nullable: false, + primaryKey: false, + nativeType: 'INTEGER', + normalizedType: 'integer', + }), + ]), + ); + + const baseProfiles = profiles(); + baseProfiles.tables.push({ table: { catalog: null, db: null, name: 'events' }, rowCount: 3 }); + baseProfiles.columns['events.warehouse_key'] = { + table: { catalog: null, db: null, name: 'events' }, + column: 'warehouse_key', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['100', '101', '102'], + minTextLength: 3, + maxTextLength: 3, + }; + + const result = resolveKloRelationshipGraph({ + schema: baseSchema, + profiles: baseProfiles, + candidates: [], + }); + + expect(result.pks).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + table: 'events', + columns: ['warehouse_key'], + status: 'review', + evidence: expect.objectContaining({ + reasons: expect.arrayContaining(['profile_only_primary_key', 'weak_name_profile_key']), + }), + }), + ]), + ); + }); + + it('keeps strong profile-only primary key evidence when the column is not key-shaped', () => { + const baseSchema = schema(); + baseSchema.tables.push( + table('events', [ + column('events', 'opaque_reference', { + nullable: false, + primaryKey: false, + nativeType: 'INTEGER', + normalizedType: 'integer', + }), + ]), + ); + + const baseProfiles = profiles(); + baseProfiles.tables.push({ table: { catalog: null, db: null, name: 'events' }, rowCount: 3 }); + baseProfiles.columns['events.opaque_reference'] = { + table: { catalog: null, db: null, name: 'events' }, + column: 'opaque_reference', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 3, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['100', '101', '102'], + minTextLength: 3, + maxTextLength: 3, + }; + + const result = resolveKloRelationshipGraph({ + schema: baseSchema, + profiles: baseProfiles, + candidates: [], + }); + + const inferredPk = result.pks.find((candidate) => candidate.table === 'events'); + expect(inferredPk).toMatchObject({ + table: 'events', + columns: ['opaque_reference'], + status: 'review', + evidence: expect.objectContaining({ + reasons: expect.arrayContaining(['profile_only_primary_key', 'weak_name_profile_key']), + }), + }); + expect(inferredPk?.pkScore).toBeGreaterThanOrEqual(0.55); + }); +}); diff --git a/packages/context/src/scan/relationship-graph-resolver.ts b/packages/context/src/scan/relationship-graph-resolver.ts new file mode 100644 index 00000000..fc9f361c --- /dev/null +++ b/packages/context/src/scan/relationship-graph-resolver.ts @@ -0,0 +1,508 @@ +import type { + KloEnrichedColumn, + KloEnrichedSchema, + KloEnrichedTable, + KloRelationshipEndpoint, +} from './enrichment-types.js'; +import { normalizeKloRelationshipName } from './relationship-candidates.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import { scoreKloRelationshipCandidate } from './relationship-scoring.js'; +import type { KloValidatedRelationshipDiscoveryCandidate } from './relationship-validation.js'; + +export type KloResolvedRelationshipStatus = 'accepted' | 'review' | 'rejected'; + +export interface KloRelationshipGraphResolverSettings { + acceptThreshold: number; + reviewThreshold: number; + minTargetPkScoreForAcceptance: number; + validationRequiredForManifest: boolean; +} + +export interface KloResolvedRelationshipPkEvidence { + declaredPrimaryKey: boolean; + targetUniqueness: number; + incomingAcceptedCount: number; + incomingReviewCount: number; + reasons: string[]; +} + +export interface KloResolvedRelationshipPk { + table: string; + columns: string[]; + pkScore: number; + status: KloResolvedRelationshipStatus; + incomingCandidateCount: number; + evidence: KloResolvedRelationshipPkEvidence; +} + +export interface KloResolvedRelationshipGraphEvidence { + targetPkScore: number; + incomingCandidateCount: number; + conflictRank: number; + reasons: string[]; +} + +export interface KloResolvedRelationshipDiscoveryCandidate + extends Omit { + status: KloResolvedRelationshipStatus; + pkScore: number; + fkScore: number; + graph: KloResolvedRelationshipGraphEvidence; +} + +export interface KloRelationshipGraphResolutionResult { + pks: KloResolvedRelationshipPk[]; + relationships: KloResolvedRelationshipDiscoveryCandidate[]; +} + +export interface ResolveKloRelationshipGraphInput { + schema: KloEnrichedSchema; + profiles: KloRelationshipProfileArtifact; + candidates: readonly KloValidatedRelationshipDiscoveryCandidate[]; + settings?: Partial; +} + +const DEFAULT_SETTINGS: KloRelationshipGraphResolverSettings = { + acceptThreshold: 0.85, + reviewThreshold: 0.55, + minTargetPkScoreForAcceptance: 0.78, + validationRequiredForManifest: true, +}; + +const PROFILE_ONLY_PK_MEASURE_NAME_TOKENS = new Set(['amount', 'count', 'price', 'quantity', 'subtotal', 'total']); + +function mergeSettings( + settings: Partial | undefined, +): KloRelationshipGraphResolverSettings { + return { ...DEFAULT_SETTINGS, ...settings }; +} + +function roundScore(value: number): number { + return Number(Math.max(0, Math.min(1, value)).toFixed(3)); +} + +function endpointKey(endpoint: KloRelationshipEndpoint): string { + return `${endpoint.table.name}.${singleRelationshipColumn(endpoint)}`; +} + +function sourceKey(endpoint: KloRelationshipEndpoint): string { + return `${endpoint.tableId}:${endpoint.columnIds.join(',')}`; +} + +function singleRelationshipColumn(endpoint: KloRelationshipEndpoint): string { + const column = endpoint.columns[0]; + if (!column) { + throw new Error(`Expected relationship endpoint ${endpoint.table.name} to contain one column`); + } + return column; +} + +function pkKey(pk: Pick): string { + return `${pk.table}.(${pk.columns.join(',')})`; +} + +function candidateSortKey(candidate: Pick): string { + return `${candidate.from.table.name}.${singleRelationshipColumn(candidate.from)}->${candidate.to.table.name}.${singleRelationshipColumn(candidate.to)}`; +} + +function statusForScore( + score: number, + settings: KloRelationshipGraphResolverSettings, + acceptedAllowed: boolean, +): KloResolvedRelationshipStatus { + if (acceptedAllowed && score >= settings.acceptThreshold) { + return 'accepted'; + } + if (score >= settings.reviewThreshold) { + return 'review'; + } + return 'rejected'; +} + +function candidateHasValidationPassed(candidate: KloValidatedRelationshipDiscoveryCandidate): boolean { + return candidate.validation.reasons.includes('validation_passed'); +} + +function candidateIsValidationUnavailable(candidate: KloValidatedRelationshipDiscoveryCandidate): boolean { + return ( + candidate.validation.reasons.includes('validation_unavailable') || + candidate.validation.reasons.includes('profile_unavailable') + ); +} + +function declaredPrimaryKeys(schema: KloEnrichedSchema): KloResolvedRelationshipPk[] { + const pks: KloResolvedRelationshipPk[] = []; + for (const table of schema.tables.filter((candidate) => candidate.enabled)) { + for (const column of table.columns.filter((candidate) => candidate.primaryKey)) { + pks.push({ + table: table.ref.name, + columns: [column.name], + pkScore: 1, + status: 'accepted', + incomingCandidateCount: 0, + evidence: { + declaredPrimaryKey: true, + targetUniqueness: 1, + incomingAcceptedCount: 0, + incomingReviewCount: 0, + reasons: ['declared_primary_key'], + }, + }); + } + } + return pks; +} + +function schemaTargetColumns(schema: KloEnrichedSchema): Array<{ table: KloEnrichedTable; column: KloEnrichedColumn }> { + return schema.tables + .filter((table) => table.enabled) + .flatMap((table) => table.columns.map((column) => ({ table, column }))); +} + +function profileUniqueness(profiles: KloRelationshipProfileArtifact, tableName: string, columnName: string): number { + return profiles.columns[`${tableName}.${columnName}`]?.uniquenessRatio ?? 0; +} + +function profileNullRate(profiles: KloRelationshipProfileArtifact, tableName: string, columnName: string): number { + return profiles.columns[`${tableName}.${columnName}`]?.nullRate ?? 1; +} + +function profileColumnExists(profiles: KloRelationshipProfileArtifact, tableName: string, columnName: string): boolean { + return Boolean(profiles.columns[`${tableName}.${columnName}`]); +} + +function profileOnlyPkNameScore(tableName: string, columnName: string): number { + const table = normalizeKloRelationshipName(tableName).singular; + const column = normalizeKloRelationshipName(columnName).normalized; + if (column === 'id') { + return 1; + } + if (column === `${table}_id`) { + return 0.96; + } + if (column === `${table}_key`) { + return 0.88; + } + if (column === 'key' || column === 'uuid') { + return 0.76; + } + return 0; +} + +function profileOnlyPkTypeCompatibility(columnName: string): number { + const tokens = normalizeKloRelationshipName(columnName).normalized.split('_').filter(Boolean); + return tokens.some((token) => PROFILE_ONLY_PK_MEASURE_NAME_TOKENS.has(token)) ? 0 : 1; +} + +function profileOnlyPkEvidence(input: { + profiles: KloRelationshipProfileArtifact; + tableName: string; + columnName: string; +}): { nameScore: number; nullRate: number; uniqueness: number; pkScore: number; weakName: boolean } | null { + if (!profileColumnExists(input.profiles, input.tableName, input.columnName)) { + return null; + } + const uniqueness = profileUniqueness(input.profiles, input.tableName, input.columnName); + const nullRate = profileNullRate(input.profiles, input.tableName, input.columnName); + const nameScore = profileOnlyPkNameScore(input.tableName, input.columnName); + if (uniqueness < 0.98 || nullRate > 0.05) { + return null; + } + const typeCompatibility = profileOnlyPkTypeCompatibility(input.columnName); + const scoreBreakdown = scoreKloRelationshipCandidate( + { + nameSimilarity: nameScore, + typeCompatibility, + valueOverlap: 0, + embeddingSimilarity: 0, + profileUniqueness: uniqueness, + profileNullRate: 1 - nullRate, + structuralPrior: 0.65, + }, + { + nameSimilarity: 0.2, + typeCompatibility: 0.08, + valueOverlap: 0, + embeddingSimilarity: 0, + profileUniqueness: 0.48, + profileNullRate: 0.2, + structuralPrior: 0.04, + }, + ); + + if (scoreBreakdown.score < DEFAULT_SETTINGS.reviewThreshold) { + return null; + } + + return { nameScore, nullRate, uniqueness, pkScore: scoreBreakdown.score, weakName: nameScore < 0.74 }; +} + +function resolveTargetPk(input: { + table: string; + column: string; + declared: KloResolvedRelationshipPk | undefined; + profiles: KloRelationshipProfileArtifact; + incoming: readonly KloValidatedRelationshipDiscoveryCandidate[]; + settings: KloRelationshipGraphResolverSettings; + profileOnly?: { nameScore: number; nullRate: number; uniqueness: number; pkScore: number; weakName: boolean } | null; +}): KloResolvedRelationshipPk { + if (input.declared) { + return input.declared; + } + + const targetUniqueness = profileUniqueness(input.profiles, input.table, input.column); + const incomingAccepted = input.incoming.filter((candidate) => candidate.status === 'accepted'); + const incomingReview = input.incoming.filter((candidate) => candidate.status === 'review'); + const incomingQuality = Math.max(0, ...input.incoming.map((candidate) => candidate.score)); + const incomingVolume = Math.min(1, incomingAccepted.length * 0.3 + incomingReview.length * 0.15); + const keyEvidence = Math.max(0, ...input.incoming.map((candidate) => candidate.evidence.targetKeyScore)); + const reasons: string[] = []; + + if (targetUniqueness >= 0.9) { + reasons.push('unique_target_column'); + } + if (incomingAccepted.length > 0) { + reasons.push('incoming_validated_reference'); + } + if (incomingReview.length > 0) { + reasons.push('incoming_review_reference'); + } + if (keyEvidence >= 0.8) { + reasons.push('target_key_like'); + } + if (input.incoming.length === 0) { + reasons.push('no_incoming_references'); + } + + if (input.profileOnly) { + reasons.push('not_null_profile', 'profile_only_primary_key'); + if (input.profileOnly.weakName) { + reasons.push('weak_name_profile_key'); + } else { + reasons.push('profile_key_name'); + } + const pkScore = input.profileOnly.pkScore; + return { + table: input.table, + columns: [input.column], + pkScore, + status: statusForScore(pkScore, input.settings, !input.profileOnly.weakName), + incomingCandidateCount: 0, + evidence: { + declaredPrimaryKey: false, + targetUniqueness, + incomingAcceptedCount: 0, + incomingReviewCount: 0, + reasons, + }, + }; + } + + const pkScore = roundScore(0.52 * targetUniqueness + 0.28 * incomingQuality + 0.12 * keyEvidence + 0.08 * incomingVolume); + const acceptedAllowed = incomingAccepted.length > 0 && targetUniqueness >= 0.9; + const status = + incomingReview.length > 0 && pkScore < input.settings.reviewThreshold + ? 'review' + : statusForScore(pkScore, input.settings, acceptedAllowed); + + return { + table: input.table, + columns: [input.column], + pkScore, + status, + incomingCandidateCount: input.incoming.length, + evidence: { + declaredPrimaryKey: false, + targetUniqueness, + incomingAcceptedCount: incomingAccepted.length, + incomingReviewCount: incomingReview.length, + reasons, + }, + }; +} + +function baseRelationshipResolution(input: { + candidate: KloValidatedRelationshipDiscoveryCandidate; + pk: KloResolvedRelationshipPk; + settings: KloRelationshipGraphResolverSettings; +}): KloResolvedRelationshipDiscoveryCandidate { + const reasons: string[] = []; + if (input.candidate.status === 'rejected') { + reasons.push('candidate_validation_rejected'); + } + if (candidateIsValidationUnavailable(input.candidate)) { + reasons.push('validation_unavailable_review_only'); + } + if (input.pk.pkScore >= input.settings.minTargetPkScoreForAcceptance) { + reasons.push('target_pk_score_passed'); + } else { + reasons.push('target_pk_score_low'); + } + if (candidateHasValidationPassed(input.candidate)) { + reasons.push('validation_passed'); + } + + const validationPassBonus = candidateHasValidationPassed(input.candidate) ? 1 : 0; + let fkScore = roundScore( + 0.48 * input.candidate.score + + 0.3 * input.pk.pkScore + + 0.14 * input.candidate.confidence + + 0.08 * validationPassBonus, + ); + let status: KloResolvedRelationshipStatus; + + if (input.candidate.status === 'rejected') { + status = 'rejected'; + } else if (candidateIsValidationUnavailable(input.candidate)) { + status = 'review'; + fkScore = Math.max(fkScore, input.settings.reviewThreshold); + } else { + const acceptedAllowed = + input.candidate.status === 'accepted' && + input.pk.pkScore >= input.settings.minTargetPkScoreForAcceptance && + (!input.settings.validationRequiredForManifest || candidateHasValidationPassed(input.candidate)); + status = statusForScore(fkScore, input.settings, acceptedAllowed); + } + + if (status === 'accepted') { + reasons.push('fk_score_passed'); + } else if (status === 'review') { + reasons.push('fk_score_review'); + } else { + reasons.push('fk_score_rejected'); + } + + return { + ...input.candidate, + status, + pkScore: input.pk.pkScore, + fkScore, + graph: { + targetPkScore: input.pk.pkScore, + incomingCandidateCount: input.pk.incomingCandidateCount, + conflictRank: 1, + reasons, + }, + }; +} + +function relationshipRank( + left: KloResolvedRelationshipDiscoveryCandidate, + right: KloResolvedRelationshipDiscoveryCandidate, +): number { + return ( + right.fkScore - left.fkScore || + right.validation.sourceCoverage - left.validation.sourceCoverage || + right.pkScore - left.pkScore || + candidateSortKey(left).localeCompare(candidateSortKey(right)) + ); +} + +function applySourceConflicts( + relationships: readonly KloResolvedRelationshipDiscoveryCandidate[], +): KloResolvedRelationshipDiscoveryCandidate[] { + const bySource = new Map(); + for (const relationship of relationships) { + const key = sourceKey(relationship.from); + bySource.set(key, [...(bySource.get(key) ?? []), relationship]); + } + + const resolved: KloResolvedRelationshipDiscoveryCandidate[] = []; + for (const group of bySource.values()) { + const ranked = [...group].sort(relationshipRank); + let acceptedSeen = false; + ranked.forEach((relationship, index) => { + const conflictRank = index + 1; + if (relationship.status === 'accepted' && acceptedSeen) { + resolved.push({ + ...relationship, + status: 'rejected', + graph: { + ...relationship.graph, + conflictRank, + reasons: [...relationship.graph.reasons.filter((reason) => reason !== 'fk_score_passed'), 'conflict_lost'], + }, + }); + return; + } + if (relationship.status === 'accepted') { + acceptedSeen = true; + } + resolved.push({ + ...relationship, + graph: { + ...relationship.graph, + conflictRank, + }, + }); + }); + } + + return resolved.sort(relationshipRank); +} + +export function resolveKloRelationshipGraph( + input: ResolveKloRelationshipGraphInput, +): KloRelationshipGraphResolutionResult { + const settings = mergeSettings(input.settings); + const declared = declaredPrimaryKeys(input.schema); + const declaredByKey = new Map(declared.map((pk) => [pkKey(pk), pk])); + const incomingByTarget = new Map(); + + for (const candidate of input.candidates) { + const key = endpointKey(candidate.to); + incomingByTarget.set(key, [...(incomingByTarget.get(key) ?? []), candidate]); + } + + const pkCandidates = new Map(); + for (const item of schemaTargetColumns(input.schema)) { + const key = `${item.table.ref.name}.(${item.column.name})`; + const incoming = incomingByTarget.get(`${item.table.ref.name}.${item.column.name}`) ?? []; + const profileOnly = + incoming.length === 0 && !item.column.primaryKey + ? profileOnlyPkEvidence({ + profiles: input.profiles, + tableName: item.table.ref.name, + columnName: item.column.name, + }) + : null; + if (incoming.length === 0 && !item.column.primaryKey && !profileOnly) { + continue; + } + const pk = resolveTargetPk({ + table: item.table.ref.name, + column: item.column.name, + declared: declaredByKey.get(key), + profiles: input.profiles, + incoming, + settings, + profileOnly, + }); + pkCandidates.set(key, pk); + } + + const relationships = input.candidates.map((candidate) => { + const toColumn = singleRelationshipColumn(candidate.to); + const key = `${candidate.to.table.name}.(${toColumn})`; + const pk = + pkCandidates.get(key) ?? + resolveTargetPk({ + table: candidate.to.table.name, + column: toColumn, + declared: undefined, + profiles: input.profiles, + incoming: incomingByTarget.get(endpointKey(candidate.to)) ?? [], + settings, + profileOnly: null, + }); + pkCandidates.set(key, pk); + return baseRelationshipResolution({ candidate, pk, settings }); + }); + + return { + pks: Array.from(pkCandidates.values()).sort( + (left, right) => right.pkScore - left.pkScore || pkKey(left).localeCompare(pkKey(right)), + ), + relationships: applySourceConflicts(relationships), + }; +} diff --git a/packages/context/src/scan/relationship-llm-proposal.test.ts b/packages/context/src/scan/relationship-llm-proposal.test.ts new file mode 100644 index 00000000..0a3dbb57 --- /dev/null +++ b/packages/context/src/scan/relationship-llm-proposal.test.ts @@ -0,0 +1,240 @@ +import type { KloLlmProvider } from '@klo/llm'; +import { describe, expect, it, vi } from 'vitest'; +import type { KloEnrichedColumn, KloEnrichedSchema, KloEnrichedTable } from './enrichment-types.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import { proposeKloRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js'; + +function llmProvider(provider = 'anthropic'): KloLlmProvider { + const model = { modelId: 'claude-sonnet-4-6', provider }; + return { + getModel: vi.fn(() => model as ReturnType), + getModelByName: vi.fn(() => model as ReturnType), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(() => ({})), + telemetryConfig: vi.fn(() => undefined), + promptCachingConfig: vi.fn( + () => + ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + }) as ReturnType, + ), + activeBackend: vi.fn(() => provider as ReturnType), + }; +} + +function column(tableId: string, name: string, overrides: Partial = {}): KloEnrichedColumn { + const tableRef = overrides.tableRef ?? { catalog: null, db: null, name: tableId }; + return { + id: `${tableId}.${name}`, + tableId, + tableRef, + name, + nativeType: overrides.nativeType ?? 'INTEGER', + normalizedType: overrides.normalizedType ?? 'integer', + dimensionType: overrides.dimensionType ?? 'number', + nullable: overrides.nullable ?? true, + primaryKey: overrides.primaryKey ?? false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + ...overrides, + }; +} + +function table(name: string, columns: KloEnrichedColumn[]): KloEnrichedTable { + const ref = { catalog: null, db: null, name }; + return { + id: name, + ref, + enabled: true, + descriptions: {}, + columns: columns.map((item) => ({ ...item, tableId: name, tableRef: ref })), + }; +} + +function schema(): KloEnrichedSchema { + return { + connectionId: 'warehouse', + relationships: [], + tables: [ + table('customers', [ + column('customers', 'id', { nullable: false }), + column('customers', 'email', { nativeType: 'TEXT', normalizedType: 'text', dimensionType: 'string' }), + ]), + table('orders', [ + column('orders', 'id', { nullable: false }), + column('orders', 'buyer_ref'), + ]), + ], + }; +} + +function profile(): KloRelationshipProfileArtifact { + return { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: true, + queryCount: 4, + warnings: [], + tables: [ + { table: { catalog: null, db: null, name: 'customers' }, rowCount: 2 }, + { table: { catalog: null, db: null, name: 'orders' }, rowCount: 2 }, + ], + columns: { + 'customers.id': { + table: { catalog: null, db: null, name: 'customers' }, + column: 'id', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 2, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2'], + minTextLength: 1, + maxTextLength: 1, + }, + 'orders.buyer_ref': { + table: { catalog: null, db: null, name: 'orders' }, + column: 'buyer_ref', + nativeType: 'INTEGER', + normalizedType: 'integer', + rowCount: 2, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['1', '2'], + minTextLength: 1, + maxTextLength: 1, + }, + }, + }; +} + +describe('relationship LLM proposals', () => { + it('maps valid structured FK proposals into review candidates with rationale evidence', async () => { + const generateText = vi.fn(async () => ({ + output: { + pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.94, rationale: 'Unique customer identifier.' }], + fkCandidates: [ + { + fromTable: 'orders', + fromColumn: 'buyer_ref', + toTable: 'customers', + toColumn: 'id', + confidence: 0.88, + rationale: 'Buyer reference values match customer identifiers.', + }, + ], + }, + })); + + const result = await proposeKloRelationshipCandidatesWithLlm({ + connectionId: 'warehouse', + schema: schema(), + profile: profile(), + llmProvider: llmProvider(), + generateText, + }); + + expect(result.summary).toBe('completed'); + expect(result.llmCalls).toBe(1); + expect(result.warnings).toEqual([]); + expect(result.candidates).toHaveLength(1); + expect(result.candidates[0]).toMatchObject({ + from: { tableId: 'orders', columnIds: ['orders.buyer_ref'], columns: ['buyer_ref'] }, + to: { tableId: 'customers', columnIds: ['customers.id'], columns: ['id'] }, + source: 'llm_proposal', + status: 'review', + evidence: { + llmConfidence: 0.88, + llmRationale: 'Buyer reference values match customer identifiers.', + reasons: ['llm_proposal', 'llm_pk_proposal'], + }, + }); + expect(generateText).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: 'user', + content: expect.stringContaining('"tables"'), + }), + ]), + }), + ); + }); + + it('skips deterministic providers without calling generateText', async () => { + const generateText = vi.fn(); + + const result = await proposeKloRelationshipCandidatesWithLlm({ + connectionId: 'warehouse', + schema: schema(), + profile: profile(), + llmProvider: llmProvider('deterministic'), + generateText, + }); + + expect(result).toMatchObject({ candidates: [], llmCalls: 0, summary: 'skipped' }); + expect(result.warnings).toEqual([]); + expect(generateText).not.toHaveBeenCalled(); + }); + + it('returns recoverable warnings for invalid references and generation failures', async () => { + const invalidReference = await proposeKloRelationshipCandidatesWithLlm({ + connectionId: 'warehouse', + schema: schema(), + profile: profile(), + llmProvider: llmProvider(), + generateText: vi.fn(async () => ({ + output: { + pkCandidates: [], + fkCandidates: [ + { + fromTable: 'orders', + fromColumn: 'missing_column', + toTable: 'customers', + toColumn: 'id', + confidence: 0.7, + rationale: 'Invalid source column.', + }, + ], + }, + })), + }); + expect(invalidReference.candidates).toEqual([]); + expect(invalidReference.summary).toBe('completed'); + expect(invalidReference.warnings[0]).toMatchObject({ + code: 'relationship_llm_invalid_reference', + recoverable: true, + }); + + const failed = await proposeKloRelationshipCandidatesWithLlm({ + connectionId: 'warehouse', + schema: schema(), + profile: profile(), + llmProvider: llmProvider(), + generateText: vi.fn(async () => { + throw new Error('model unavailable'); + }), + }); + expect(failed).toMatchObject({ candidates: [], llmCalls: 1, summary: 'failed' }); + expect(failed.warnings[0]).toMatchObject({ + code: 'relationship_llm_proposal_failed', + message: 'KLO relationship LLM proposal failed: model unavailable', + recoverable: true, + }); + }); +}); diff --git a/packages/context/src/scan/relationship-llm-proposal.ts b/packages/context/src/scan/relationship-llm-proposal.ts new file mode 100644 index 00000000..2f4b37c1 --- /dev/null +++ b/packages/context/src/scan/relationship-llm-proposal.ts @@ -0,0 +1,281 @@ +import type { KloLlmProvider } from '@klo/llm'; +import type { generateText } from 'ai'; +import { z } from 'zod'; +import { generateKloObject } from '../llm/index.js'; +import type { KloEnrichedColumn, KloEnrichedSchema, KloEnrichedTable } from './enrichment-types.js'; +import { + normalizeKloRelationshipName, + type KloRelationshipDiscoveryCandidate, +} from './relationship-candidates.js'; +import type { KloRelationshipColumnProfile, KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import type { KloScanEnrichmentSummary, KloScanWarning, KloTableRef } from './types.js'; + +const relationshipLlmProposalSchema = z.object({ + pkCandidates: z.array( + z.object({ + table: z.string(), + column: z.string(), + confidence: z.number(), + rationale: z.string(), + }), + ), + fkCandidates: z.array( + z.object({ + fromTable: z.string(), + fromColumn: z.string(), + toTable: z.string(), + toColumn: z.string(), + confidence: z.number(), + rationale: z.string(), + }), + ), +}); + +type KloRelationshipLlmProposalOutput = z.infer; +type GenerateTextInput = Parameters[0]; +export type KloRelationshipLlmProposalGenerateText = ( + input: GenerateTextInput, +) => Promise<{ text?: string; output?: unknown }>; + +export interface KloRelationshipLlmProposalSettings { + maxTablesPerBatch: number; + maxColumnsPerTable: number; + maxSampleValuesPerColumn: number; + minConfidence: number; +} + +export interface ProposeKloRelationshipCandidatesWithLlmInput { + connectionId: string; + schema: KloEnrichedSchema; + profile: KloRelationshipProfileArtifact; + llmProvider: KloLlmProvider | null; + settings?: Partial; + generateText?: KloRelationshipLlmProposalGenerateText; +} + +export interface KloRelationshipLlmProposalResult { + candidates: KloRelationshipDiscoveryCandidate[]; + warnings: KloScanWarning[]; + llmCalls: number; + summary: KloScanEnrichmentSummary['llmRelationshipValidation']; +} + +const DEFAULT_SETTINGS: KloRelationshipLlmProposalSettings = { + maxTablesPerBatch: 40, + maxColumnsPerTable: 80, + maxSampleValuesPerColumn: 5, + minConfidence: 0.55, +}; + +function mergeSettings( + settings: Partial | undefined, +): KloRelationshipLlmProposalSettings { + return { ...DEFAULT_SETTINGS, ...settings }; +} + +function clampConfidence(value: number): number { + return Number(Math.max(0, Math.min(1, value)).toFixed(3)); +} + +function modelIsDeterministic(llmProvider: KloLlmProvider): boolean { + const model = llmProvider.getModel('candidateExtraction'); + return (model as { provider?: string }).provider === 'deterministic'; +} + +function findTable(schema: KloEnrichedSchema, name: string): KloEnrichedTable | null { + const normalized = name.toLowerCase(); + return schema.tables.find((table) => table.ref.name.toLowerCase() === normalized) ?? null; +} + +function findColumn(table: KloEnrichedTable, name: string): KloEnrichedColumn | null { + const normalized = name.toLowerCase(); + return table.columns.find((column) => column.name.toLowerCase() === normalized) ?? null; +} + +function profileKey(table: KloTableRef, column: KloEnrichedColumn): string { + return `${table.name}.${column.name}`; +} + +function profileForColumn( + profile: KloRelationshipProfileArtifact, + table: KloEnrichedTable, + column: KloEnrichedColumn, +): KloRelationshipColumnProfile | null { + return profile.columns[profileKey(table.ref, column)] ?? null; +} + +function rowCountForTable(profile: KloRelationshipProfileArtifact, table: KloEnrichedTable): number | null { + return profile.tables.find((item) => item.table.name.toLowerCase() === table.ref.name.toLowerCase())?.rowCount ?? null; +} + +function buildEvidencePacket( + schema: KloEnrichedSchema, + profile: KloRelationshipProfileArtifact, + settings: KloRelationshipLlmProposalSettings, +): Record { + return { + connectionId: schema.connectionId, + sqlAvailable: profile.sqlAvailable, + tables: schema.tables + .filter((table) => table.enabled) + .slice(0, settings.maxTablesPerBatch) + .map((table) => ({ + name: table.ref.name, + catalog: table.ref.catalog, + db: table.ref.db, + rowCount: rowCountForTable(profile, table), + columns: table.columns.slice(0, settings.maxColumnsPerTable).map((column) => { + const columnProfile = profileForColumn(profile, table, column); + return { + name: column.name, + nativeType: column.nativeType, + normalizedType: column.normalizedType, + dimensionType: column.dimensionType, + nullable: column.nullable, + declaredPrimaryKey: column.primaryKey, + profile: columnProfile + ? { + rowCount: columnProfile.rowCount, + nullCount: columnProfile.nullCount, + distinctCount: columnProfile.distinctCount, + uniquenessRatio: columnProfile.uniquenessRatio, + nullRate: columnProfile.nullRate, + sampleValues: columnProfile.sampleValues.slice(0, settings.maxSampleValuesPerColumn), + } + : null, + }; + }), + })), + }; +} + +function pkProposalKey(table: string, column: string): string { + return `${table.toLowerCase()}.${column.toLowerCase()}`; +} + +function endpoint(table: KloEnrichedTable, column: KloEnrichedColumn) { + return { + tableId: table.id, + columnIds: [column.id], + table: table.ref, + columns: [column.name], + }; +} + +function relationshipId(fromTable: KloEnrichedTable, fromColumn: KloEnrichedColumn, toTable: KloEnrichedTable, toColumn: KloEnrichedColumn): string { + return `${fromTable.id}:(${fromColumn.id})->${toTable.id}:(${toColumn.id})`; +} + +function invalidReferenceWarning(message: string, metadata: Record): KloScanWarning { + return { + code: 'relationship_llm_invalid_reference', + message, + recoverable: true, + metadata, + }; +} + +function mapValidProposals( + schema: KloEnrichedSchema, + output: KloRelationshipLlmProposalOutput, + settings: KloRelationshipLlmProposalSettings, +): { candidates: KloRelationshipDiscoveryCandidate[]; warnings: KloScanWarning[] } { + const warnings: KloScanWarning[] = []; + const pkProposals = new Set(output.pkCandidates.map((item) => pkProposalKey(item.table, item.column))); + const candidates: KloRelationshipDiscoveryCandidate[] = []; + + for (const item of output.fkCandidates) { + if (item.confidence < settings.minConfidence) { + continue; + } + const fromTable = findTable(schema, item.fromTable); + const toTable = findTable(schema, item.toTable); + const fromColumn = fromTable ? findColumn(fromTable, item.fromColumn) : null; + const toColumn = toTable ? findColumn(toTable, item.toColumn) : null; + if (!fromTable || !toTable || !fromColumn || !toColumn) { + warnings.push( + invalidReferenceWarning('KLO relationship LLM proposal referenced a table or column that is not in the schema.', { + proposal: item, + }), + ); + continue; + } + + const pkProposalExists = pkProposals.has(pkProposalKey(toTable.ref.name, toColumn.name)); + candidates.push({ + id: relationshipId(fromTable, fromColumn, toTable, toColumn), + from: endpoint(fromTable, fromColumn), + to: endpoint(toTable, toColumn), + source: 'llm_proposal', + status: 'review', + relationshipType: 'many_to_one', + confidence: clampConfidence(item.confidence), + evidence: { + sourceColumnBase: normalizeKloRelationshipName(fromColumn.name).singular, + targetTableBase: normalizeKloRelationshipName(toTable.ref.name).singular, + targetColumnBase: normalizeKloRelationshipName(toColumn.name).singular, + targetKeyScore: pkProposalExists ? 0.88 : 0.68, + nameScore: 0.45, + reasons: pkProposalExists ? ['llm_proposal', 'llm_pk_proposal'] : ['llm_proposal'], + llmConfidence: clampConfidence(item.confidence), + llmRationale: item.rationale, + }, + }); + } + + return { candidates, warnings }; +} + +function generationFailureWarning(error: unknown): KloScanWarning { + const message = error instanceof Error ? error.message : String(error); + return { + code: 'relationship_llm_proposal_failed', + message: `KLO relationship LLM proposal failed: ${message}`, + recoverable: true, + }; +} + +export async function proposeKloRelationshipCandidatesWithLlm( + input: ProposeKloRelationshipCandidatesWithLlmInput, +): Promise { + if (!input.llmProvider || modelIsDeterministic(input.llmProvider)) { + return { candidates: [], warnings: [], llmCalls: 0, summary: 'skipped' }; + } + + const settings = mergeSettings(input.settings); + const evidence = buildEvidencePacket(input.schema, input.profile, settings); + const prompt = [ + 'You are helping KLO review possible SQL relationships before validation.', + 'Use only the compact schema evidence. Propose likely primary keys and foreign keys for later SQL validation.', + 'Return structured output only; never assume a join is accepted.', + JSON.stringify(evidence), + ].join('\n\n'); + + try { + const generated = await generateKloObject< + KloRelationshipLlmProposalOutput, + typeof relationshipLlmProposalSchema + >({ + llmProvider: input.llmProvider, + role: 'candidateExtraction', + prompt, + schema: relationshipLlmProposalSchema, + generateText: input.generateText, + }); + const output = relationshipLlmProposalSchema.parse(generated); + const mapped = mapValidProposals(input.schema, output, settings); + return { + candidates: mapped.candidates, + warnings: mapped.warnings, + llmCalls: 1, + summary: 'completed', + }; + } catch (error) { + return { + candidates: [], + warnings: [generationFailureWarning(error)], + llmCalls: 1, + summary: 'failed', + }; + } +} diff --git a/packages/context/src/scan/relationship-locality.test.ts b/packages/context/src/scan/relationship-locality.test.ts new file mode 100644 index 00000000..2b925984 --- /dev/null +++ b/packages/context/src/scan/relationship-locality.test.ts @@ -0,0 +1,151 @@ +import { describe, expect, it } from 'vitest'; +import type { KloEnrichedColumn, KloEnrichedTable } from './enrichment-types.js'; +import { localCandidateTables } from './relationship-locality.js'; + +function column( + tableId: string, + id: string, + name: string, + options: Partial = {}, +): KloEnrichedColumn { + const tableRef = options.tableRef ?? { catalog: null, db: 'public', name: tableId }; + return { + id, + tableId, + tableRef, + name, + nativeType: options.nativeType ?? 'INTEGER', + normalizedType: options.normalizedType ?? 'integer', + dimensionType: options.dimensionType ?? 'number', + nullable: options.nullable ?? true, + primaryKey: options.primaryKey ?? false, + parentColumnId: options.parentColumnId ?? null, + descriptions: options.descriptions ?? {}, + embedding: options.embedding ?? null, + sampleValues: options.sampleValues ?? null, + cardinality: options.cardinality ?? null, + }; +} + +function table(id: string, name: string, columns: KloEnrichedColumn[]): KloEnrichedTable { + const ref = { catalog: null, db: 'public', name }; + return { + id, + ref, + enabled: true, + descriptions: {}, + columns: columns.map((item) => ({ ...item, tableId: id, tableRef: ref })), + }; +} + +describe('relationship locality', () => { + it('ranks the referenced parent table ahead of the child table for id-like source columns', () => { + const artists = table('artist-id', 'Artist', [column('artist-id', 'artist-pk', 'ArtistId')]); + const albums = table('album-id', 'Album', [ + column('album-id', 'album-pk', 'AlbumId'), + column('album-id', 'artist-fk', 'ArtistId'), + ]); + const unrelated = table('invoice-id', 'Invoice', [column('invoice-id', 'invoice-pk', 'InvoiceId')]); + + const ranked = localCandidateTables({ + childTable: albums, + childColumn: albums.columns[1]!, + parentTables: [albums, unrelated, artists], + maxParentTables: 1, + }); + + expect(ranked.map((item) => item.table.ref.name)).toEqual(['Artist']); + expect(ranked[0]).toMatchObject({ + score: expect.any(Number), + tokenScore: expect.any(Number), + embeddingScore: 0, + reasons: expect.arrayContaining(['column_table_token_overlap']), + }); + }); + + it('uses singular and plural variants so plan_code can rank stg_plans', () => { + const plans = table('plans-id', 'stg_plans', [column('plans-id', 'plan-code', 'plan_code')]); + const segments = table('segments-id', 'mart_account_segments', [ + column('segments-id', 'current-plan-code', 'current_plan_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + }), + ]); + const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]); + + const ranked = localCandidateTables({ + childTable: segments, + childColumn: segments.columns[0]!, + parentTables: [accounts, segments, plans], + maxParentTables: 1, + }); + + expect(ranked.map((item) => item.table.ref.name)).toEqual(['stg_plans']); + expect(ranked[0]?.tokenScore).toBeGreaterThan(0); + }); + + it('returns all tables when the schema is smaller than the default locality cap', () => { + const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]); + const invoices = table('invoices-id', 'invoices', [ + column('invoices-id', 'invoice-id', 'id'), + column('invoices-id', 'account-id', 'account_id'), + ]); + + const ranked = localCandidateTables({ + childTable: invoices, + childColumn: invoices.columns[1]!, + parentTables: [invoices, accounts], + }); + + expect(ranked.map((item) => item.table.ref.name).sort()).toEqual(['accounts', 'invoices']); + }); + + it('supports an explicit zero cap for deterministic tests', () => { + const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]); + const invoices = table('invoices-id', 'invoices', [ + column('invoices-id', 'invoice-id', 'id'), + column('invoices-id', 'account-id', 'account_id'), + ]); + + const ranked = localCandidateTables({ + childTable: invoices, + childColumn: invoices.columns[1]!, + parentTables: [invoices, accounts], + maxParentTables: 0, + }); + + expect(ranked).toEqual([]); + }); + + it('uses parent-column embeddings when token locality is weak', () => { + const customers = table('customers-id', 'customers', [ + column('customers-id', 'customers-id-col', 'id', { embedding: [1, 0, 0] }), + column('customers-id', 'customers-name-col', 'name', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + embedding: [0, 1, 0], + }), + ]); + const orders = table('orders-id', 'orders', [ + column('orders-id', 'orders-id-col', 'id', { embedding: [0, 0, 1] }), + column('orders-id', 'buyer-ref-col', 'buyer_ref', { embedding: [0.995, 0.005, 0] }), + ]); + const invoices = table('invoices-id', 'invoices', [column('invoices-id', 'invoice-id', 'id')]); + + const ranked = localCandidateTables({ + childTable: orders, + childColumn: orders.columns[1]!, + parentTables: [invoices, customers], + maxParentTables: 1, + }); + + expect(ranked.map((item) => item.table.ref.name)).toEqual(['customers']); + expect(ranked[0]).toMatchObject({ + embeddingScore: expect.any(Number), + reasons: expect.arrayContaining(['embedding_similarity']), + }); + expect(ranked[0]!.embeddingScore).toBeGreaterThan(0.99); + }); +}); diff --git a/packages/context/src/scan/relationship-locality.ts b/packages/context/src/scan/relationship-locality.ts new file mode 100644 index 00000000..b0e851cc --- /dev/null +++ b/packages/context/src/scan/relationship-locality.ts @@ -0,0 +1,164 @@ +import type { KloEnrichedColumn, KloEnrichedTable } from './enrichment-types.js'; +import { normalizeKloRelationshipName, tokenizeKloRelationshipName } from './relationship-name-similarity.js'; + +export interface KloRelationshipLocalityCandidateTable { + table: KloEnrichedTable; + score: number; + tokenScore: number; + embeddingScore: number; + reasons: string[]; +} + +export interface LocalKloRelationshipCandidateTablesInput { + childTable: KloEnrichedTable; + childColumn: KloEnrichedColumn; + parentTables: readonly KloEnrichedTable[]; + maxParentTables?: number; +} + +const DEFAULT_MAX_PARENT_TABLES = 20; +const RELATIONSHIP_SUFFIX_TOKENS = new Set(['id', 'ids', 'key', 'keys', 'code', 'codes', 'uuid', 'uuids']); + +function roundedScore(value: number): number { + return Number(Math.max(0, Math.min(1, value)).toFixed(3)); +} + +function normalizedTokenVariants(name: string): string[] { + const normalized = normalizeKloRelationshipName(name); + return Array.from( + new Set([ + ...normalized.tokens, + ...tokenizeKloRelationshipName(normalized.singular), + ...tokenizeKloRelationshipName(normalized.plural), + ]), + ).filter(Boolean); +} + +function childColumnLocalityTokens(column: KloEnrichedColumn): string[] { + const tokens = normalizedTokenVariants(column.name); + const withoutSuffix = tokens.filter((token) => !RELATIONSHIP_SUFFIX_TOKENS.has(token)); + return withoutSuffix.length > 0 ? withoutSuffix : tokens; +} + +function uniqueTokens(values: readonly string[]): string[] { + return Array.from(new Set(values.filter((value) => value.length > 0))); +} + +function jaccard(left: readonly string[], right: readonly string[]): number { + if (left.length === 0 || right.length === 0) { + return 0; + } + const leftSet = new Set(left); + const rightSet = new Set(right); + const intersectionSize = Array.from(leftSet).filter((token) => rightSet.has(token)).length; + const unionSize = new Set([...leftSet, ...rightSet]).size; + return unionSize === 0 ? 0 : intersectionSize / unionSize; +} + +function cosineSimilarity(left: readonly number[] | null, right: readonly number[] | null): number { + if (!left || !right || left.length === 0 || left.length !== right.length) { + return 0; + } + + let dot = 0; + let leftMagnitude = 0; + let rightMagnitude = 0; + for (let index = 0; index < left.length; index += 1) { + const leftValue = left[index] ?? 0; + const rightValue = right[index] ?? 0; + dot += leftValue * rightValue; + leftMagnitude += leftValue * leftValue; + rightMagnitude += rightValue * rightValue; + } + + if (leftMagnitude === 0 || rightMagnitude === 0) { + return 0; + } + + return dot / (Math.sqrt(leftMagnitude) * Math.sqrt(rightMagnitude)); +} + +function parentEmbeddingScore(childColumn: KloEnrichedColumn, parentTable: KloEnrichedTable): number { + if (!Array.isArray(childColumn.embedding) || childColumn.embedding.length === 0) { + return 0; + } + + let best = 0; + for (const parentColumn of parentTable.columns) { + best = Math.max(best, cosineSimilarity(childColumn.embedding, parentColumn.embedding)); + } + return best; +} + +function tableTokenScore(input: { + childTable: KloEnrichedTable; + childColumn: KloEnrichedColumn; + parentTable: KloEnrichedTable; +}): number { + const childTableTokens = normalizedTokenVariants(input.childTable.ref.name); + const childColumnTokens = childColumnLocalityTokens(input.childColumn); + const parentTokens = normalizedTokenVariants(input.parentTable.ref.name); + const columnOnlyScore = jaccard(childColumnTokens, parentTokens); + if (input.parentTable.id === input.childTable.id) { + return columnOnlyScore; + } + const columnAndTableScore = jaccard(uniqueTokens([...childTableTokens, ...childColumnTokens]), parentTokens); + return Math.max(columnOnlyScore, columnAndTableScore * 0.6); +} + +function localityScore(input: { + childTable: KloEnrichedTable; + childColumn: KloEnrichedColumn; + parentTable: KloEnrichedTable; +}): Omit { + const tokenScore = roundedScore(tableTokenScore(input)); + const embeddingScore = roundedScore(parentEmbeddingScore(input.childColumn, input.parentTable)); + const score = + embeddingScore > 0 + ? roundedScore(Math.max(tokenScore, tokenScore * 0.8 + embeddingScore * 0.2, embeddingScore * 0.65)) + : tokenScore; + const reasons: string[] = []; + if (tokenScore > 0) { + reasons.push('column_table_token_overlap'); + } + if (embeddingScore > 0) { + reasons.push('embedding_similarity'); + } + if (reasons.length === 0) { + reasons.push('locality_tie_breaker'); + } + return { + score, + tokenScore, + embeddingScore, + reasons, + }; +} + +export function localCandidateTables( + input: LocalKloRelationshipCandidateTablesInput, +): KloRelationshipLocalityCandidateTable[] { + const limit = input.maxParentTables ?? DEFAULT_MAX_PARENT_TABLES; + if (!Number.isFinite(limit) || limit <= 0) { + return []; + } + + return input.parentTables + .map((table) => ({ + table, + ...localityScore({ + childTable: input.childTable, + childColumn: input.childColumn, + parentTable: table, + }), + })) + .sort( + (left, right) => + right.score - left.score || + right.tokenScore - left.tokenScore || + right.embeddingScore - left.embeddingScore || + left.table.ref.name.localeCompare(right.table.ref.name) || + left.table.id.localeCompare(right.table.id), + ) + .slice(0, Math.floor(limit)); +} diff --git a/packages/context/src/scan/relationship-name-similarity.test.ts b/packages/context/src/scan/relationship-name-similarity.test.ts new file mode 100644 index 00000000..ef77cc1a --- /dev/null +++ b/packages/context/src/scan/relationship-name-similarity.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, it } from 'vitest'; +import { + normalizeKloRelationshipName, + pluralizeKloRelationshipToken, + singularizeKloRelationshipToken, + tokenSimilarity, + tokenizeKloRelationshipName, +} from './relationship-name-similarity.js'; + +describe('relationship name similarity', () => { + it('tokenizes common warehouse naming styles', () => { + expect(normalizeKloRelationshipName('AlbumId')).toMatchObject({ + normalized: 'album_id', + singular: 'album_id', + plural: 'album_ids', + tokens: ['album', 'id'], + }); + expect(normalizeKloRelationshipName('artistID')).toMatchObject({ + normalized: 'artist_id', + tokens: ['artist', 'id'], + }); + expect(normalizeKloRelationshipName('SalesLT.CustomerID')).toMatchObject({ + normalized: 'sales_lt_customer_id', + singular: 'sales_lt_customer_id', + tokens: ['sales', 'lt', 'customer', 'id'], + }); + expect(normalizeKloRelationshipName('SCREAMING_CUSTOMER_UUID')).toMatchObject({ + normalized: 'screaming_customer_uuid', + tokens: ['screaming', 'customer', 'uuid'], + }); + expect(normalizeKloRelationshipName('billing-account-key')).toMatchObject({ + normalized: 'billing_account_key', + tokens: ['billing', 'account', 'key'], + }); + }); + + it('removes only leading warehouse layer prefixes', () => { + expect(normalizeKloRelationshipName('mart__Sales_Accounts')).toMatchObject({ + normalized: 'sales_accounts', + singular: 'sales_account', + plural: 'sales_accounts', + tokens: ['sales', 'accounts'], + }); + expect(normalizeKloRelationshipName('dim_users')).toMatchObject({ + normalized: 'users', + singular: 'user', + plural: 'users', + tokens: ['users'], + }); + expect(normalizeKloRelationshipName('customer_dim_id')).toMatchObject({ + normalized: 'customer_dim_id', + tokens: ['customer', 'dim', 'id'], + }); + }); + + it('folds accents and preserves non-suffix trailing s words', () => { + expect(normalizeKloRelationshipName('KundénID')).toMatchObject({ + normalized: 'kunden_id', + tokens: ['kunden', 'id'], + }); + expect(singularizeKloRelationshipToken('address')).toBe('address'); + expect(singularizeKloRelationshipToken('addresses')).toBe('address'); + expect(singularizeKloRelationshipToken('status')).toBe('status'); + expect(pluralizeKloRelationshipToken('address')).toBe('addresses'); + expect(pluralizeKloRelationshipToken('company')).toBe('companies'); + }); + + it('returns deterministic tokens for direct tokenization calls', () => { + expect(tokenizeKloRelationshipName('HTTPResponseCode')).toEqual(['http', 'response', 'code']); + expect(tokenizeKloRelationshipName('customer2AddressID')).toEqual(['customer', '2', 'address', 'id']); + }); + + it('scores token overlap and ordered suffix similarity', () => { + expect(tokenSimilarity('artist_id', 'artist_id')).toBe(1); + expect(tokenSimilarity('Album.ArtistId', 'ArtistID')).toBeGreaterThanOrEqual(0.74); + expect(tokenSimilarity('customer_account_id', 'account_id')).toBeGreaterThan( + tokenSimilarity('customer_account_id', 'invoice_id'), + ); + expect(tokenSimilarity('', 'artist')).toBe(0); + }); +}); diff --git a/packages/context/src/scan/relationship-name-similarity.ts b/packages/context/src/scan/relationship-name-similarity.ts new file mode 100644 index 00000000..19212557 --- /dev/null +++ b/packages/context/src/scan/relationship-name-similarity.ts @@ -0,0 +1,151 @@ +export interface KloRelationshipNormalizedName { + raw: string; + normalized: string; + singular: string; + plural: string; + tokens: string[]; +} + +export type KloRelationshipTokenInput = string | readonly string[] | KloRelationshipNormalizedName; + +const WAREHOUSE_LAYER_PREFIXES = new Set(['stg', 'stage', 'staging', 'dim', 'fct', 'fact', 'int', 'mart']); + +function splitCaseBoundaries(value: string): string { + return value + .replace(/([\p{Lu}]+)([\p{Lu}][\p{Ll}])/gu, '$1_$2') + .replace(/([\p{Ll}\p{N}])([\p{Lu}])/gu, '$1_$2') + .replace(/(\p{L})(\p{N})/gu, '$1_$2') + .replace(/(\p{N})(\p{L})/gu, '$1_$2'); +} + +function foldAccents(value: string): string { + return value + .normalize('NFKD') + .replace(/\p{Mark}+/gu, '') + .replace(/ß/giu, 'ss') + .replace(/æ/giu, 'ae') + .replace(/œ/giu, 'oe'); +} + +export function singularizeKloRelationshipToken(value: string): string { + if (value.length <= 2) { + return value; + } + if (value.endsWith('ies') && value.length > 3) { + return `${value.slice(0, -3)}y`; + } + if (/(ches|shes|sses|xes|zes)$/u.test(value)) { + return value.slice(0, -2); + } + if (value.endsWith('ves') && value.length > 4) { + return `${value.slice(0, -3)}f`; + } + if (value.endsWith('s') && !/(ss|us|is)$/u.test(value)) { + return value.slice(0, -1); + } + return value; +} + +export function pluralizeKloRelationshipToken(value: string): string { + if (value.endsWith('y')) { + return `${value.slice(0, -1)}ies`; + } + if (/(s|x|z|ch|sh)$/u.test(value)) { + return `${value}es`; + } + return `${value}s`; +} + +function singularizeTokens(tokens: readonly string[]): string[] { + if (tokens.length === 0) { + return []; + } + const result = [...tokens]; + const last = result[result.length - 1]; + if (last) { + result[result.length - 1] = singularizeKloRelationshipToken(last); + } + return result; +} + +function pluralizeTokens(tokens: readonly string[]): string[] { + if (tokens.length === 0) { + return []; + } + const result = [...tokens]; + const last = result[result.length - 1]; + if (last) { + result[result.length - 1] = pluralizeKloRelationshipToken(last); + } + return result; +} + +export function tokenizeKloRelationshipName(name: string): string[] { + const boundarySeparated = splitCaseBoundaries(foldAccents(name.trim())); + const tokens = boundarySeparated + .toLowerCase() + .replace(/[^\p{L}\p{N}]+/gu, '_') + .replace(/^_+|_+$/gu, '') + .split('_') + .filter(Boolean); + + return tokens.filter((token, index) => index > 0 || !WAREHOUSE_LAYER_PREFIXES.has(token)); +} + +export function normalizeKloRelationshipName(name: string): KloRelationshipNormalizedName { + const tokens = tokenizeKloRelationshipName(name); + const singularTokens = singularizeTokens(tokens); + const pluralTokens = pluralizeTokens(singularTokens); + + return { + raw: name, + normalized: tokens.join('_'), + singular: singularTokens.join('_'), + plural: pluralTokens.join('_'), + tokens, + }; +} + +function tokensFromInput(input: KloRelationshipTokenInput): string[] { + if (typeof input === 'string') { + return tokenizeKloRelationshipName(input); + } + if ('tokens' in input) { + return input.tokens; + } + return input.map((token) => normalizeKloRelationshipName(token).normalized).filter(Boolean); +} + +function longestCommonSuffixLength(left: readonly string[], right: readonly string[]): number { + let count = 0; + while ( + count < left.length && + count < right.length && + left[left.length - 1 - count] === right[right.length - 1 - count] + ) { + count += 1; + } + return count; +} + +function roundedScore(value: number): number { + return Number(Math.max(0, Math.min(1, value)).toFixed(3)); +} + +export function tokenSimilarity(leftInput: KloRelationshipTokenInput, rightInput: KloRelationshipTokenInput): number { + const left = tokensFromInput(leftInput); + const right = tokensFromInput(rightInput); + if (left.length === 0 || right.length === 0) { + return 0; + } + + const leftSet = new Set(left); + const rightSet = new Set(right); + const intersectionSize = Array.from(leftSet).filter((token) => rightSet.has(token)).length; + const unionSize = new Set([...leftSet, ...rightSet]).size; + const jaccard = unionSize === 0 ? 0 : intersectionSize / unionSize; + const suffixLength = longestCommonSuffixLength(left, right); + const suffixScore = suffixLength / Math.min(left.length, right.length); + + return roundedScore(jaccard * 0.75 + suffixScore * 0.25); +} diff --git a/packages/context/src/scan/relationship-profiling.test.ts b/packages/context/src/scan/relationship-profiling.test.ts new file mode 100644 index 00000000..01d3a07b --- /dev/null +++ b/packages/context/src/scan/relationship-profiling.test.ts @@ -0,0 +1,354 @@ +import { readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { afterEach, describe, expect, it } from 'vitest'; +import type { KloEnrichedColumn, KloEnrichedSchema, KloEnrichedTable } from './enrichment-types.js'; +import { snapshotToKloEnrichedSchema } from './local-enrichment.js'; +import { loadKloRelationshipBenchmarkFixture, maskKloRelationshipBenchmarkSnapshot } from './relationship-benchmarks.js'; +import { + createKloRelationshipProfileCache, + formatKloRelationshipTableRef, + profileKloRelationshipSchema, + quoteKloRelationshipIdentifier, +} from './relationship-profiling.js'; +import type { KloQueryResult, KloReadOnlyQueryInput, KloScanContext } from './types.js'; + +class InMemorySqliteExecutor { + readonly db = new Database(':memory:'); + queryCount = 0; + + executeReadOnly(input: KloReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.queryCount += 1; + const rows = this.db.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return Promise.resolve({ + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }); + } + + close(): void { + this.db.close(); + } +} + +class FileSqliteExecutor { + readonly db: Database.Database; + queryCount = 0; + + constructor(dataPath: string) { + this.db = new Database(dataPath, { readonly: true, fileMustExist: true }); + } + + executeReadOnly(input: KloReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.queryCount += 1; + const rows = this.db.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return Promise.resolve({ + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }); + } + + close(): void { + this.db.close(); + } +} + +function column(tableId: string, name: string, overrides: Partial = {}): KloEnrichedColumn { + const tableRef = overrides.tableRef ?? { catalog: null, db: null, name: tableId }; + return { + id: `${tableId}.${name}`, + tableId, + tableRef, + name, + nativeType: overrides.nativeType ?? 'INTEGER', + normalizedType: overrides.normalizedType ?? 'integer', + dimensionType: overrides.dimensionType ?? 'number', + nullable: overrides.nullable ?? true, + primaryKey: overrides.primaryKey ?? false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + ...overrides, + }; +} + +function table(name: string, columns: KloEnrichedColumn[]): KloEnrichedTable { + const ref = { catalog: null, db: null, name }; + return { + id: name, + ref, + enabled: true, + descriptions: {}, + columns: columns.map((item) => ({ ...item, tableId: name, tableRef: ref })), + }; +} + +function schema(tables: KloEnrichedTable[]): KloEnrichedSchema { + return { connectionId: 'warehouse', tables, relationships: [] }; +} + +describe('relationship profiling', () => { + let executor: InMemorySqliteExecutor | null = null; + + afterEach(() => { + executor?.close(); + executor = null; + }); + + it('keeps profiling on the batched table path', async () => { + const source = await readFile(new URL('relationship-profiling.ts', import.meta.url), 'utf-8'); + + expect(source).not.toMatch(new RegExp('queryColumn' + 'Profile')); + expect(source).not.toMatch(/for \(const column of table\.columns\)[\s\S]*executeReadOnly/); + expect(source).toMatch(/queryTableProfile/); + expect(source).toMatch(/UNION ALL/); + }); + + it('quotes identifiers and formats table refs for supported local SQL drivers', () => { + expect(quoteKloRelationshipIdentifier('sqlite', 'odd"name')).toBe('"odd""name"'); + expect(quoteKloRelationshipIdentifier('mysql', 'odd`name')).toBe('`odd``name`'); + expect(quoteKloRelationshipIdentifier('sqlserver', 'odd]name')).toBe('[odd]]name]'); + expect(formatKloRelationshipTableRef('sqlite', { catalog: null, db: null, name: 'accounts' })).toBe('"accounts"'); + expect(formatKloRelationshipTableRef('postgres', { catalog: null, db: 'analytics', name: 'accounts' })).toBe( + '"analytics"."accounts"', + ); + }); + + it('profiles row count, null rate, uniqueness, sample values, and text lengths', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER, code TEXT, parent_id INTEGER); + INSERT INTO accounts (id, code, parent_id) VALUES + (1, 'A-1', NULL), + (2, 'B-2', 1), + (3, 'C-3', 1), + (4, 'C-3', 2); + `); + + const result = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: schema([ + table('accounts', [ + column('accounts', 'id', { primaryKey: false, nullable: false }), + column('accounts', 'code', { nativeType: 'TEXT', normalizedType: 'text', dimensionType: 'string' }), + column('accounts', 'parent_id'), + ]), + ]), + executor, + ctx: { runId: 'profile-test' }, + sampleValuesPerColumn: 3, + }); + + expect(result.sqlAvailable).toBe(true); + expect(result.queryCount).toBe(1); + expect(executor.queryCount).toBe(1); + expect(result.tables).toHaveLength(1); + expect(result.tables[0]).toMatchObject({ table: { name: 'accounts' }, rowCount: 4 }); + expect(result.columns['accounts.id']).toMatchObject({ + table: { name: 'accounts' }, + column: 'id', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + minTextLength: 1, + maxTextLength: 1, + }); + expect(result.columns['accounts.code']).toMatchObject({ + distinctCount: 3, + uniquenessRatio: 0.75, + sampleValues: ['C-3', 'A-1', 'B-2'], + minTextLength: 3, + maxTextLength: 3, + }); + expect(result.columns['accounts.parent_id']).toMatchObject({ + nullCount: 1, + distinctCount: 2, + uniquenessRatio: 0.5, + nullRate: 0.25, + }); + }); + + it('profiles each enabled table with one read-only SQL query', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER, code TEXT, parent_id INTEGER); + CREATE TABLE users (id INTEGER, account_id INTEGER); + INSERT INTO accounts (id, code, parent_id) VALUES + (1, 'A-1', NULL), + (2, 'B-2', 1), + (3, 'C-3', 1), + (4, 'C-3', 2); + INSERT INTO users (id, account_id) VALUES + (10, 1), + (11, 1), + (12, 2); + `); + + const result = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: schema([ + table('accounts', [ + column('accounts', 'id', { nullable: false }), + column('accounts', 'code', { nativeType: 'TEXT', normalizedType: 'text', dimensionType: 'string' }), + column('accounts', 'parent_id'), + ]), + table('users', [column('users', 'id', { nullable: false }), column('users', 'account_id')]), + ]), + executor, + ctx: { runId: 'profile-batched-query-count' }, + sampleValuesPerColumn: 3, + }); + + expect(result.sqlAvailable).toBe(true); + expect(result.queryCount).toBe(2); + expect(executor.queryCount).toBe(2); + expect(result.tables).toEqual([ + { table: { catalog: null, db: null, name: 'accounts' }, rowCount: 4 }, + { table: { catalog: null, db: null, name: 'users' }, rowCount: 3 }, + ]); + expect(result.columns['accounts.code']).toMatchObject({ + distinctCount: 3, + uniquenessRatio: 0.75, + sampleValues: ['C-3', 'A-1', 'B-2'], + }); + expect(result.columns['users.account_id']).toMatchObject({ + rowCount: 3, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 2 / 3, + }); + }); + + it('bounds column profile statistics with profileSampleRows', async () => { + const executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL, account_code TEXT NOT NULL); + INSERT INTO accounts VALUES (1, 'a1'), (2, 'a2'), (3, 'a3'), (4, 'a4'); + `); + + const profiles = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: schema([ + table('accounts', [ + column('accounts', 'id', { nullable: false }), + column('accounts', 'account_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + }), + ]), + ]), + executor, + ctx: { runId: 'profile-sample-rows' }, + profileSampleRows: 2, + }); + + expect(profiles.queryCount).toBe(1); + expect(executor.queryCount).toBe(1); + expect(profiles.tables).toEqual([{ table: { catalog: null, db: null, name: 'accounts' }, rowCount: 4 }]); + expect(profiles.columns['accounts.id']).toMatchObject({ + rowCount: 2, + distinctCount: 2, + uniquenessRatio: 1, + }); + expect(profiles.columns['accounts.account_code']?.sampleValues).toEqual(['a1', 'a2']); + + executor.close(); + }); + + it('reuses a profile cache inside one scan run but re-queries with a fresh cache', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL, account_code TEXT NOT NULL); + INSERT INTO accounts VALUES (1, 'a1'), (2, 'a2'), (3, 'a2'); + `); + const relationshipSchema = schema([ + table('accounts', [ + column('accounts', 'id', { nullable: false }), + column('accounts', 'account_code', { + nativeType: 'TEXT', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + }), + ]), + ]); + const cache = createKloRelationshipProfileCache(); + + const first = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: relationshipSchema, + executor, + ctx: { runId: 'profile-cache-run' }, + cache, + }); + const second = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: relationshipSchema, + executor, + ctx: { runId: 'profile-cache-run' }, + cache, + }); + const third = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: relationshipSchema, + executor, + ctx: { runId: 'profile-cache-fresh-run' }, + cache: createKloRelationshipProfileCache(), + }); + + expect(first.queryCount).toBe(1); + expect(second.queryCount).toBe(0); + expect(third.queryCount).toBe(1); + expect(executor.queryCount).toBe(2); + expect(second.tables).toEqual(first.tables); + expect(second.columns).toEqual(first.columns); + }); + + it('profiles the checked-in scale stress fixture with one query per table', async () => { + const fixtureRoot = new URL('../../test/fixtures/relationship-benchmarks/', import.meta.url); + const fixture = await loadKloRelationshipBenchmarkFixture(join(fixtureRoot.pathname, 'scale_stress_no_declared_constraints')); + if (!fixture.dataPath) { + throw new Error('scale_stress_no_declared_constraints is missing data.sqlite'); + } + const maskedSnapshot = maskKloRelationshipBenchmarkSnapshot( + fixture.snapshot, + 'declared_pks_and_declared_fks_removed', + ); + const scaleExecutor = new FileSqliteExecutor(fixture.dataPath); + try { + const result = await profileKloRelationshipSchema({ + connectionId: fixture.snapshot.connectionId, + driver: fixture.snapshot.driver, + schema: snapshotToKloEnrichedSchema(maskedSnapshot, new Map()), + executor: scaleExecutor, + ctx: { runId: 'scale-stress-profile-query-count' }, + profileSampleRows: 3, + }); + + expect(fixture.snapshot.tables).toHaveLength(400); + expect(result.queryCount).toBe(400); + expect(result.queryCount).toBeLessThanOrEqual(2 * fixture.snapshot.tables.length); + expect(scaleExecutor.queryCount).toBe(400); + } finally { + scaleExecutor.close(); + } + }); +}); diff --git a/packages/context/src/scan/relationship-profiling.ts b/packages/context/src/scan/relationship-profiling.ts new file mode 100644 index 00000000..6c5a4b8a --- /dev/null +++ b/packages/context/src/scan/relationship-profiling.ts @@ -0,0 +1,467 @@ +import type { KloEnrichedColumn, KloEnrichedSchema, KloEnrichedTable } from './enrichment-types.js'; +import type { + KloConnectionDriver, + KloQueryResult, + KloReadOnlyQueryInput, + KloScanContext, + KloTableRef, +} from './types.js'; + +export interface KloRelationshipReadOnlyExecutor { + executeReadOnly(input: KloReadOnlyQueryInput, ctx: KloScanContext): Promise; +} + +export interface KloRelationshipColumnProfile { + table: KloTableRef; + column: string; + nativeType: string; + normalizedType: string; + rowCount: number; + nullCount: number; + distinctCount: number; + uniquenessRatio: number; + nullRate: number; + sampleValues: string[]; + minTextLength: number | null; + maxTextLength: number | null; +} + +export interface KloRelationshipTableProfile { + table: KloTableRef; + rowCount: number; +} + +export interface KloRelationshipProfileArtifact { + connectionId: string; + driver: KloConnectionDriver; + sqlAvailable: boolean; + queryCount: number; + tables: KloRelationshipTableProfile[]; + columns: Record; + warnings: string[]; +} + +interface KloRelationshipCachedTableProfile { + table: KloRelationshipTableProfile; + columns: Record; + warnings: string[]; +} + +export interface KloRelationshipProfileCache { + readonly tableProfiles: Map; +} + +export interface ProfileKloRelationshipSchemaInput { + connectionId: string; + driver: KloConnectionDriver; + schema: KloEnrichedSchema; + executor: KloRelationshipReadOnlyExecutor | null; + ctx: KloScanContext; + sampleValuesPerColumn?: number; + profileSampleRows?: number; + cache?: KloRelationshipProfileCache; +} + +export function createKloRelationshipProfileCache(): KloRelationshipProfileCache { + return { tableProfiles: new Map() }; +} + +const SAMPLE_VALUE_DELIMITER = '\u001f'; + +type QuoteStyle = 'double' | 'backtick' | 'bracket'; + +function quoteStyle(driver: KloConnectionDriver): QuoteStyle { + if (driver === 'mysql' || driver === 'clickhouse' || driver === 'posthog') { + return 'backtick'; + } + if (driver === 'sqlserver') { + return 'bracket'; + } + return 'double'; +} + +export function quoteKloRelationshipIdentifier(driver: KloConnectionDriver, identifier: string): string { + switch (quoteStyle(driver)) { + case 'backtick': + return `\`${identifier.replace(/`/g, '``')}\``; + case 'bracket': + return `[${identifier.replace(/\]/g, ']]')}]`; + case 'double': + return `"${identifier.replace(/"/g, '""')}"`; + } +} + +export function formatKloRelationshipTableRef(driver: KloConnectionDriver, table: KloTableRef): string { + const parts = + driver === 'sqlite' || driver === 'posthog' + ? [table.name] + : [table.catalog, table.db, table.name].filter((value): value is string => Boolean(value)); + return parts.map((part) => quoteKloRelationshipIdentifier(driver, part)).join('.'); +} + +function textLengthExpression(driver: KloConnectionDriver, columnSql: string): string { + if (driver === 'mysql') { + return `CHAR_LENGTH(CAST(${columnSql} AS CHAR))`; + } + if (driver === 'sqlserver') { + return `LEN(CAST(${columnSql} AS NVARCHAR(MAX)))`; + } + if (driver === 'bigquery') { + return `LENGTH(CAST(${columnSql} AS STRING))`; + } + if (driver === 'clickhouse' || driver === 'posthog') { + return `length(toString(${columnSql}))`; + } + return `LENGTH(CAST(${columnSql} AS TEXT))`; +} + +function limitSql(driver: KloConnectionDriver, limit: number): string { + if (driver === 'sqlserver') { + return ''; + } + return ` LIMIT ${Math.max(1, Math.floor(limit))}`; +} + +function topSql(driver: KloConnectionDriver, limit: number): string { + if (driver === 'sqlserver') { + return ` TOP (${Math.max(1, Math.floor(limit))})`; + } + return ''; +} + +function sampledTableSql(driver: KloConnectionDriver, tableSql: string, limit: number): string { + const safeLimit = Math.max(1, Math.floor(limit)); + if (driver === 'sqlserver') { + return `(SELECT TOP (${safeLimit}) * FROM ${tableSql}) AS relationship_profile_sample`; + } + return `(SELECT * FROM ${tableSql}${limitSql(driver, safeLimit)}) AS relationship_profile_sample`; +} + +function firstRow(result: KloQueryResult): unknown[] { + return result.rows[0] ?? []; +} + +function headerIndex(result: KloQueryResult, header: string): number { + return result.headers.findIndex((candidate) => candidate.toLowerCase() === header.toLowerCase()); +} + +function valueAt(result: KloQueryResult, row: unknown[], header: string): unknown { + return row[headerIndex(result, header)]; +} + +function numberFromValue(value: unknown): number { + if (typeof value === 'number') { + return value; + } + if (typeof value === 'bigint') { + return Number(value); + } + if (typeof value === 'string' && value.trim() !== '') { + return Number(value); + } + return 0; +} + +function nullableNumberFromValue(value: unknown): number | null { + if (value === null || value === undefined) { + return null; + } + if (typeof value === 'number') { + return value; + } + if (typeof value === 'bigint') { + return Number(value); + } + if (typeof value === 'string' && value.trim() !== '') { + return Number(value); + } + return null; +} + +function numberAt(result: KloQueryResult, header: string): number { + return numberFromValue(valueAt(result, firstRow(result), header)); +} + +function columnKey(table: KloEnrichedTable, column: KloEnrichedColumn): string { + return `${table.ref.name}.${column.name}`; +} + +function tableProfileCacheKey(input: { + connectionId: string; + driver: KloConnectionDriver; + ctx: KloScanContext; + table: KloTableRef; + sampleValuesPerColumn: number; + profileSampleRows: number; +}): string { + return [ + input.ctx.runId, + input.connectionId, + input.driver, + input.table.catalog ?? '', + input.table.db ?? '', + input.table.name, + String(input.sampleValuesPerColumn), + String(input.profileSampleRows), + ].join('\u001e'); +} + +function sqlStringLiteral(value: string): string { + return `'${value.replace(/'/g, "''")}'`; +} + +function sampleAggregateSql(driver: KloConnectionDriver, innerSql: string): string { + if (driver === 'postgres') { + return `(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (${innerSql}) AS relationship_profile_values)`; + } + if (driver === 'bigquery') { + return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`; + } + if (driver === 'mysql') { + return `(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`; + } + if (driver === 'sqlserver') { + return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`; + } + if (driver === 'clickhouse' || driver === 'posthog') { + return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`; + } + return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`; +} + +function sampleValuesSql(input: { + driver: KloConnectionDriver; + tableSql: string; + columnSql: string; + limit: number; +}): string { + return [ + `SELECT${topSql(input.driver, input.limit)} ${input.columnSql} AS value`, + `FROM ${input.tableSql}`, + `WHERE ${input.columnSql} IS NOT NULL`, + `GROUP BY ${input.columnSql}`, + `ORDER BY COUNT(*) DESC, ${input.columnSql} ASC`, + limitSql(input.driver, input.limit), + ].join(' '); +} + +function columnProfileSelectSql(input: { + connectionDriver: KloConnectionDriver; + tableSql: string; + profileTableSql: string; + column: KloEnrichedColumn; + sampleValuesPerColumn: number; +}): string { + const columnSql = quoteKloRelationshipIdentifier(input.connectionDriver, input.column.name); + const textLengthSql = textLengthExpression(input.connectionDriver, columnSql); + const samplesSql = sampleAggregateSql( + input.connectionDriver, + sampleValuesSql({ + driver: input.connectionDriver, + tableSql: input.profileTableSql, + columnSql, + limit: input.sampleValuesPerColumn, + }), + ); + return [ + 'SELECT', + `${sqlStringLiteral(input.column.name)} AS column_name,`, + `(SELECT COUNT(*) FROM ${input.tableSql}) AS table_row_count,`, + 'COUNT(*) AS row_count,', + `SUM(CASE WHEN ${columnSql} IS NULL THEN 1 ELSE 0 END) AS null_count,`, + `COUNT(DISTINCT ${columnSql}) AS distinct_count,`, + `MIN(${textLengthSql}) AS min_text_length,`, + `MAX(${textLengthSql}) AS max_text_length,`, + `${samplesSql} AS sample_values`, + `FROM ${input.profileTableSql}`, + ].join(' '); +} + +function splitSampleValues(value: unknown): string[] { + if (value === null || value === undefined) { + return []; + } + const text = String(value); + if (text === '') { + return []; + } + return text.split(SAMPLE_VALUE_DELIMITER).filter((item) => item !== ''); +} + +async function queryCount(input: { + connectionId: string; + driver: KloConnectionDriver; + table: KloTableRef; + executor: KloRelationshipReadOnlyExecutor; + ctx: KloScanContext; +}): Promise<{ rowCount: number; queryCount: number }> { + const tableSql = formatKloRelationshipTableRef(input.driver, input.table); + const result = await input.executor.executeReadOnly( + { connectionId: input.connectionId, sql: `SELECT COUNT(*) AS row_count FROM ${tableSql}`, maxRows: 1 }, + input.ctx, + ); + return { rowCount: numberAt(result, 'row_count'), queryCount: 1 }; +} + +async function queryTableProfile(input: { + connectionId: string; + driver: KloConnectionDriver; + table: KloEnrichedTable; + executor: KloRelationshipReadOnlyExecutor; + ctx: KloScanContext; + sampleValuesPerColumn: number; + profileSampleRows: number; +}): Promise<{ + table: KloRelationshipTableProfile; + columns: Record; + queryCount: number; +}> { + if (input.table.columns.length === 0) { + const rowCount = await queryCount({ + connectionId: input.connectionId, + driver: input.driver, + table: input.table.ref, + executor: input.executor, + ctx: input.ctx, + }); + return { + table: { table: input.table.ref, rowCount: rowCount.rowCount }, + columns: {}, + queryCount: rowCount.queryCount, + }; + } + + const tableSql = formatKloRelationshipTableRef(input.driver, input.table.ref); + const profileTableSql = sampledTableSql(input.driver, tableSql, input.profileSampleRows); + const sql = input.table.columns + .map((column) => + columnProfileSelectSql({ + connectionDriver: input.driver, + tableSql, + profileTableSql, + column, + sampleValuesPerColumn: input.sampleValuesPerColumn, + }), + ) + .join(' UNION ALL '); + const result = await input.executor.executeReadOnly( + { connectionId: input.connectionId, sql, maxRows: input.table.columns.length }, + input.ctx, + ); + const columnsByName = new Map(input.table.columns.map((column) => [column.name, column])); + const profiles: Record = {}; + let tableRowCount = 0; + + for (const row of result.rows) { + const columnName = String(valueAt(result, row, 'column_name')); + const column = columnsByName.get(columnName); + if (!column) { + continue; + } + const rowCount = numberFromValue(valueAt(result, row, 'row_count')); + const nullCount = numberFromValue(valueAt(result, row, 'null_count')); + const distinctCount = numberFromValue(valueAt(result, row, 'distinct_count')); + tableRowCount = Math.max(tableRowCount, numberFromValue(valueAt(result, row, 'table_row_count'))); + profiles[columnKey(input.table, column)] = { + table: input.table.ref, + column: column.name, + nativeType: column.nativeType, + normalizedType: column.normalizedType, + rowCount, + nullCount, + distinctCount, + uniquenessRatio: rowCount === 0 ? 0 : distinctCount / rowCount, + nullRate: rowCount === 0 ? 0 : nullCount / rowCount, + sampleValues: splitSampleValues(valueAt(result, row, 'sample_values')), + minTextLength: nullableNumberFromValue(valueAt(result, row, 'min_text_length')), + maxTextLength: nullableNumberFromValue(valueAt(result, row, 'max_text_length')), + }; + } + + return { + table: { table: input.table.ref, rowCount: tableRowCount }, + columns: profiles, + queryCount: 1, + }; +} + +export async function profileKloRelationshipSchema( + input: ProfileKloRelationshipSchemaInput, +): Promise { + if (!input.executor) { + return { + connectionId: input.connectionId, + driver: input.driver, + sqlAvailable: false, + queryCount: 0, + tables: [], + columns: {}, + warnings: ['read_only_sql_unavailable'], + }; + } + + let queryTotal = 0; + const tables: KloRelationshipTableProfile[] = []; + const columns: Record = {}; + const warnings: string[] = []; + + for (const table of input.schema.tables.filter((candidate) => candidate.enabled)) { + const sampleValuesPerColumn = input.sampleValuesPerColumn ?? 5; + const profileSampleRows = input.profileSampleRows ?? 10000; + const cacheKey = tableProfileCacheKey({ + connectionId: input.connectionId, + driver: input.driver, + ctx: input.ctx, + table: table.ref, + sampleValuesPerColumn, + profileSampleRows, + }); + const cached = input.cache?.tableProfiles.get(cacheKey); + if (cached) { + tables.push(cached.table); + Object.assign(columns, cached.columns); + for (const warning of cached.warnings) { + warnings.push(warning); + } + continue; + } + + try { + const tableProfile = await queryTableProfile({ + connectionId: input.connectionId, + driver: input.driver, + table, + executor: input.executor, + ctx: input.ctx, + sampleValuesPerColumn, + profileSampleRows, + }); + queryTotal += tableProfile.queryCount; + tables.push(tableProfile.table); + Object.assign(columns, tableProfile.columns); + input.cache?.tableProfiles.set(cacheKey, { + table: tableProfile.table, + columns: tableProfile.columns, + warnings: [], + }); + } catch (error) { + const failureWarning = `profile_failed:${table.ref.name}:${error instanceof Error ? error.message : String(error)}`; + warnings.push(failureWarning); + input.cache?.tableProfiles.set(cacheKey, { + table: { table: table.ref, rowCount: 0 }, + columns: {}, + warnings: [failureWarning], + }); + } + } + + return { + connectionId: input.connectionId, + driver: input.driver, + sqlAvailable: true, + queryCount: queryTotal, + tables, + columns, + warnings, + }; +} diff --git a/packages/context/src/scan/relationship-review-apply.test.ts b/packages/context/src/scan/relationship-review-apply.test.ts new file mode 100644 index 00000000..9b9cb497 --- /dev/null +++ b/packages/context/src/scan/relationship-review-apply.test.ts @@ -0,0 +1,352 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { KloLocalProject } from '../project/index.js'; +import { initKloProject } from '../project/index.js'; +import { describe, expect, it, vi } from 'vitest'; +import { applyLocalScanRelationshipReviewDecisions } from './relationship-review-apply.js'; +import type { KloRelationshipReviewDecisionArtifact } from './relationship-review-decisions.js'; +import type { ReadLocalScanRelationshipArtifactsResult } from './relationship-artifacts.js'; +import type { WriteLocalScanManifestShardsResult } from './local-enrichment-artifacts.js'; +import type { KloSchemaSnapshot } from './types.js'; + +const acceptedDecisionArtifact: KloRelationshipReviewDecisionArtifact = { + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + generatedAt: '2026-05-07T12:00:00.000Z', + decisions: [ + { + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decidedAt: '2026-05-07T12:01:00.000Z', + reviewer: 'Andrey', + note: 'Customer link is valid.', + from: { + tableId: 'public.orders', + columnIds: ['public.orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'public.customers', + columnIds: ['public.customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + source: 'deterministic_name', + score: 0.81, + confidence: 0.81, + pkScore: 0.93, + fkScore: 0.81, + reasons: ['review_threshold'], + }, + { + candidateId: 'orders:orders.note_id->notes:notes.id', + decision: 'rejected', + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decidedAt: '2026-05-07T12:02:00.000Z', + reviewer: 'Andrey', + note: null, + from: { + tableId: 'public.orders', + columnIds: ['public.orders.note_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['note_id'], + }, + to: { + tableId: 'public.notes', + columnIds: ['public.notes.id'], + table: { catalog: null, db: 'public', name: 'notes' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + source: 'embedding_similarity', + score: 0.7, + confidence: 0.7, + pkScore: 0.7, + fkScore: 0.7, + reasons: ['review_threshold'], + }, + ], +}; + +const artifacts: ReadLocalScanRelationshipArtifactsResult = { + runId: 'scan-run-a', + connectionId: 'warehouse', + syncId: 'sync-a', + report: { + connectionId: 'warehouse', + driver: 'postgres', + syncId: 'sync-a', + runId: 'scan-run-a', + trigger: 'cli', + mode: 'relationships', + dryRun: false, + artifactPaths: { + rawSourcesDir: 'raw-sources/warehouse/live-database/sync-a', + reportPath: 'raw-sources/warehouse/live-database/sync-a/scan-report.json', + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + enrichmentArtifacts: ['raw-sources/warehouse/live-database/sync-a/enrichment/relationships.json'], + }, + diffSummary: { + tablesAdded: 0, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 2, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 1, + structuralSyncStats: { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, + }, + enrichment: { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'completed', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'completed', + }, + capabilityGaps: [], + warnings: [], + relationships: { accepted: 0, review: 1, rejected: 1, skipped: 0 }, + enrichmentState: { resumedStages: [], completedStages: ['relationships'], failedStages: [] }, + createdAt: '2026-05-07T12:00:00.000Z', + }, + relationships: { + connectionId: 'warehouse', + accepted: [], + review: [], + rejected: [], + skipped: [], + }, + diagnostics: null, + profile: null, + paths: { + relationships: 'raw-sources/warehouse/live-database/sync-a/enrichment/relationships.json', + diagnostics: null, + profile: null, + }, +}; + +const snapshot: KloSchemaSnapshot = { + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-05-07T12:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: {}, + tables: [ + { + catalog: null, + db: 'public', + name: 'customers', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'customer_id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + }, + ], +}; + +async function projectWithDecisions( + decisions = acceptedDecisionArtifact, +): Promise<{ project: KloLocalProject; tempDir: string }> { + const tempDir = await mkdtemp(join(tmpdir(), 'klo-relationship-review-apply-')); + const project = await initKloProject({ + projectDir: join(tempDir, 'project'), + projectName: 'warehouse', + }); + await project.fileStore.writeFile( + 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json', + `${JSON.stringify(decisions)}\n`, + 'klo', + 'klo@example.com', + 'Seed relationship review decisions', + ); + return { project, tempDir }; +} + +function manifestResult(): WriteLocalScanManifestShardsResult { + return { + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + manifestShardsWritten: 1, + }; +} + +describe('relationship review apply', () => { + it('previews all accepted decisions without writing manifest shards', async () => { + const { project, tempDir } = await projectWithDecisions(); + const writeLocalScanManifestShards = vi.fn(async () => manifestResult()); + try { + const result = await applyLocalScanRelationshipReviewDecisions(project, { + runId: 'scan-run-a', + applyAllAccepted: true, + dryRun: true, + readLocalScanRelationshipArtifacts: vi.fn(async () => artifacts), + readLocalScanStructuralSnapshot: vi.fn(async () => snapshot), + writeLocalScanManifestShards, + }); + + expect(result).toMatchObject({ + runId: 'scan-run-a', + connectionId: 'warehouse', + syncId: 'sync-a', + dryRun: true, + selectedDecisions: 1, + appliedRelationships: 1, + manifestShards: [], + manifestShardsWritten: 0, + }); + expect(result.relationships[0]).toMatchObject({ + id: 'orders:orders.customer_id->customers:customers.id', + source: 'manual', + relationshipType: 'many_to_one', + confidence: 1, + }); + expect(writeLocalScanManifestShards).not.toHaveBeenCalled(); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('writes selected accepted decisions as manual manifest relationships', async () => { + const { project, tempDir } = await projectWithDecisions(); + const readLocalScanStructuralSnapshot = vi.fn(async () => snapshot); + const writeLocalScanManifestShards = vi.fn(async () => manifestResult()); + try { + const result = await applyLocalScanRelationshipReviewDecisions(project, { + runId: 'scan-run-a', + candidateIds: ['orders:orders.customer_id->customers:customers.id'], + readLocalScanRelationshipArtifacts: vi.fn(async () => artifacts), + readLocalScanStructuralSnapshot, + writeLocalScanManifestShards, + }); + + expect(readLocalScanStructuralSnapshot).toHaveBeenCalledWith({ + project: expect.any(Object), + connectionId: 'warehouse', + driver: 'postgres', + rawSourcesDir: 'raw-sources/warehouse/live-database/sync-a', + extractedAtFallback: '2026-05-07T12:00:00.000Z', + }); + expect(writeLocalScanManifestShards).toHaveBeenCalledWith({ + project: expect.any(Object), + connectionId: 'warehouse', + syncId: 'sync-a', + driver: 'postgres', + snapshot, + dryRun: false, + relationshipUpdate: { + connectionId: 'warehouse', + accepted: [ + expect.objectContaining({ + id: 'orders:orders.customer_id->customers:customers.id', + source: 'manual', + from: expect.objectContaining({ columns: ['customer_id'] }), + to: expect.objectContaining({ columns: ['id'] }), + }), + ], + rejected: [], + skipped: [], + }, + }); + expect(result.manifestShardsWritten).toBe(1); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('rejects ambiguous apply selection input', async () => { + const { project, tempDir } = await projectWithDecisions(); + try { + await expect( + applyLocalScanRelationshipReviewDecisions(project, { + runId: 'scan-run-a', + readLocalScanRelationshipArtifacts: vi.fn(async () => artifacts), + }), + ).rejects.toThrow('Pass --all-accepted or at least one --candidate to choose review decisions to apply'); + + await expect( + applyLocalScanRelationshipReviewDecisions(project, { + runId: 'scan-run-a', + applyAllAccepted: true, + candidateIds: ['orders:orders.customer_id->customers:customers.id'], + readLocalScanRelationshipArtifacts: vi.fn(async () => artifacts), + }), + ).rejects.toThrow('Use either --all-accepted or --candidate, not both'); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it('refuses rejected decisions and missing candidate ids', async () => { + const { project, tempDir } = await projectWithDecisions(); + try { + await expect( + applyLocalScanRelationshipReviewDecisions(project, { + runId: 'scan-run-a', + candidateIds: ['orders:orders.note_id->notes:notes.id'], + readLocalScanRelationshipArtifacts: vi.fn(async () => artifacts), + }), + ).rejects.toThrow('Relationship review decision "orders:orders.note_id->notes:notes.id" is rejected, not accepted'); + + await expect( + applyLocalScanRelationshipReviewDecisions(project, { + runId: 'scan-run-a', + candidateIds: ['missing'], + readLocalScanRelationshipArtifacts: vi.fn(async () => artifacts), + }), + ).rejects.toThrow('Relationship review decision "missing" was not found for scan run "scan-run-a"'); + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/context/src/scan/relationship-review-apply.ts b/packages/context/src/scan/relationship-review-apply.ts new file mode 100644 index 00000000..521ea2ed --- /dev/null +++ b/packages/context/src/scan/relationship-review-apply.ts @@ -0,0 +1,231 @@ +import type { KloLocalProject } from '../project/index.js'; +import { + readLocalScanRelationshipArtifacts, + type ReadLocalScanRelationshipArtifactsResult, +} from './relationship-artifacts.js'; +import { + readLocalScanStructuralSnapshot, + type ReadLocalScanStructuralSnapshotInput, +} from './local-structural-artifacts.js'; +import { + writeLocalScanManifestShards, + type WriteLocalScanManifestShardsInput, + type WriteLocalScanManifestShardsResult, +} from './local-enrichment-artifacts.js'; +import type { KloEnrichedRelationship, KloRelationshipUpdate } from './enrichment-types.js'; +import type { + KloRelationshipReviewDecisionArtifact, + KloRelationshipReviewDecisionEntry, +} from './relationship-review-decisions.js'; + +const DECISIONS_FILE = 'relationship-review-decisions.json'; + +export interface ApplyLocalScanRelationshipReviewDecisionsInput { + runId: string; + applyAllAccepted?: boolean; + candidateIds?: readonly string[]; + dryRun?: boolean; + readLocalScanRelationshipArtifacts?: typeof readLocalScanRelationshipArtifacts; + readLocalScanStructuralSnapshot?: ( + input: ReadLocalScanStructuralSnapshotInput, + ) => Promise; + writeLocalScanManifestShards?: ( + input: WriteLocalScanManifestShardsInput, + ) => Promise; +} + +export interface AppliedRelationshipReviewDecision { + candidateId: string; + decidedAt: string; + reviewer: string; + note: string | null; + relationship: KloEnrichedRelationship; +} + +export interface ApplyLocalScanRelationshipReviewDecisionsResult { + runId: string; + connectionId: string; + syncId: string; + dryRun: boolean; + decisionsPath: string; + selectedDecisions: number; + appliedRelationships: number; + relationships: KloEnrichedRelationship[]; + manifestShards: string[]; + manifestShardsWritten: number; +} + +function decisionsPathFromRelationshipsPath(relationshipsPath: string): string { + return relationshipsPath.replace(/relationships\.json$/u, DECISIONS_FILE); +} + +async function readDecisionArtifact( + project: KloLocalProject, + path: string, + runId: string, +): Promise { + let raw: { content: string }; + try { + raw = await project.fileStore.readFile(path); + } catch { + throw new Error(`Relationship review decisions were not found for scan run "${runId}"`); + } + const parsed = JSON.parse(raw.content) as KloRelationshipReviewDecisionArtifact; + return { + connectionId: parsed.connectionId, + runId: parsed.runId, + syncId: parsed.syncId, + generatedAt: parsed.generatedAt, + decisions: Array.isArray(parsed.decisions) ? parsed.decisions : [], + }; +} + +function assertSelection(input: ApplyLocalScanRelationshipReviewDecisionsInput): void { + const candidateIds = input.candidateIds ?? []; + if (input.applyAllAccepted === true && candidateIds.length > 0) { + throw new Error('Use either --all-accepted or --candidate, not both'); + } + if (input.applyAllAccepted !== true && candidateIds.length === 0) { + throw new Error('Pass --all-accepted or at least one --candidate to choose review decisions to apply'); + } +} + +function selectAcceptedDecisions( + artifact: KloRelationshipReviewDecisionArtifact, + input: ApplyLocalScanRelationshipReviewDecisionsInput, +): KloRelationshipReviewDecisionEntry[] { + assertSelection(input); + if (input.applyAllAccepted === true) { + return artifact.decisions.filter((decision) => decision.decision === 'accepted'); + } + + const decisionsById = new Map(artifact.decisions.map((decision) => [decision.candidateId, decision])); + const selected: KloRelationshipReviewDecisionEntry[] = []; + for (const candidateId of input.candidateIds ?? []) { + const decision = decisionsById.get(candidateId); + if (!decision) { + throw new Error(`Relationship review decision "${candidateId}" was not found for scan run "${input.runId}"`); + } + if (decision.decision !== 'accepted') { + throw new Error(`Relationship review decision "${candidateId}" is ${decision.decision}, not accepted`); + } + selected.push(decision); + } + return selected; +} + +function tableId(table: KloRelationshipReviewDecisionEntry['from']['table']): string { + return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.'); +} + +function columnIds(table: KloRelationshipReviewDecisionEntry['from']['table'], columns: readonly string[]): string[] { + const prefix = tableId(table); + return columns.map((column) => `${prefix}.${column}`); +} + +function relationshipFromDecision(decision: KloRelationshipReviewDecisionEntry): KloEnrichedRelationship { + return { + id: decision.candidateId, + source: 'manual', + from: { + tableId: tableId(decision.from.table), + columnIds: columnIds(decision.from.table, decision.from.columns), + table: decision.from.table, + columns: [...decision.from.columns], + }, + to: { + tableId: tableId(decision.to.table), + columnIds: columnIds(decision.to.table, decision.to.columns), + table: decision.to.table, + columns: [...decision.to.columns], + }, + relationshipType: decision.relationshipType, + confidence: 1, + isPrimaryKeyReference: true, + }; +} + +function relationshipUpdate( + connectionId: string, + relationships: readonly KloEnrichedRelationship[], +): KloRelationshipUpdate { + return { + connectionId, + accepted: [...relationships], + rejected: [], + skipped: [], + }; +} + +function assertApplyableArtifacts(artifacts: ReadLocalScanRelationshipArtifactsResult): string { + const rawSourcesDir = artifacts.report.artifactPaths.rawSourcesDir; + if (!rawSourcesDir) { + throw new Error(`Scan run "${artifacts.runId}" does not have raw source artifacts for manifest rewriting`); + } + return rawSourcesDir; +} + +export async function applyLocalScanRelationshipReviewDecisions( + project: KloLocalProject, + input: ApplyLocalScanRelationshipReviewDecisionsInput, +): Promise { + const readArtifacts = input.readLocalScanRelationshipArtifacts ?? readLocalScanRelationshipArtifacts; + const artifacts = await readArtifacts(project, input.runId); + if (!artifacts) { + throw new Error(`Scan run "${input.runId}" was not found`); + } + + const decisionsPath = decisionsPathFromRelationshipsPath(artifacts.paths.relationships); + const decisions = await readDecisionArtifact(project, decisionsPath, input.runId); + const selected = selectAcceptedDecisions(decisions, input); + const relationships = selected.map((decision) => relationshipFromDecision(decision)); + const dryRun = input.dryRun === true; + + if (dryRun || relationships.length === 0) { + return { + runId: artifacts.runId, + connectionId: artifacts.connectionId, + syncId: artifacts.syncId, + dryRun, + decisionsPath, + selectedDecisions: selected.length, + appliedRelationships: relationships.length, + relationships, + manifestShards: [], + manifestShardsWritten: 0, + }; + } + + const rawSourcesDir = assertApplyableArtifacts(artifacts); + const readSnapshot = input.readLocalScanStructuralSnapshot ?? readLocalScanStructuralSnapshot; + const writeManifestShards = input.writeLocalScanManifestShards ?? writeLocalScanManifestShards; + const snapshot = await readSnapshot({ + project, + connectionId: artifacts.connectionId, + driver: artifacts.report.driver, + rawSourcesDir, + extractedAtFallback: artifacts.report.createdAt, + }); + const manifest = await writeManifestShards({ + project, + connectionId: artifacts.connectionId, + syncId: artifacts.syncId, + driver: artifacts.report.driver, + snapshot, + dryRun: false, + relationshipUpdate: relationshipUpdate(artifacts.connectionId, relationships), + }); + + return { + runId: artifacts.runId, + connectionId: artifacts.connectionId, + syncId: artifacts.syncId, + dryRun, + decisionsPath, + selectedDecisions: selected.length, + appliedRelationships: relationships.length, + relationships, + manifestShards: manifest.manifestShards, + manifestShardsWritten: manifest.manifestShardsWritten, + }; +} diff --git a/packages/context/src/scan/relationship-review-decisions.test.ts b/packages/context/src/scan/relationship-review-decisions.test.ts new file mode 100644 index 00000000..238e0b04 --- /dev/null +++ b/packages/context/src/scan/relationship-review-decisions.test.ts @@ -0,0 +1,365 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { runLocalStageOnlyIngest, type SourceAdapter } from '../ingest/index.js'; +import { initKloProject, loadKloProject } from '../project/index.js'; +import { describe, expect, it } from 'vitest'; +import { writeLocalScanRelationshipReviewDecision } from './relationship-review-decisions.js'; +import type { KloRelationshipArtifact, KloRelationshipDiagnosticsArtifact } from './relationship-diagnostics.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import type { KloScanReport } from './types.js'; + +const RUN_ID = 'scan-run-review'; +const SYNC_ID = '2026-05-07-100000-scan-run-review'; + +async function writeProjectFile(projectDir: string, relativePath: string, content: string): Promise { + const absolutePath = join(projectDir, relativePath); + await mkdir(dirname(absolutePath), { recursive: true }); + await writeFile(absolutePath, content, 'utf-8'); +} + +async function createProject(projectDir: string): Promise { + await initKloProject({ projectDir, projectName: 'warehouse' }); + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + 'utf-8', + ); +} + +function liveDatabaseAdapter(): SourceAdapter { + return { + source: 'live-database', + skillNames: ['live_database_ingest'], + async fetch(_pullConfig, stagedDir) { + await mkdir(join(stagedDir, 'tables'), { recursive: true }); + await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8'); + await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + await writeFile( + join(stagedDir, 'tables', 'orders.json'), + '{"name":"orders","db":"public","columns":[{"name":"id","type":"integer","nullable":false,"primaryKey":true}]}\n', + 'utf-8', + ); + }, + async detect(stagedDir) { + await writeFile(join(stagedDir, 'connection.json'), '{"connectionId":"warehouse"}\n', 'utf-8'); + return true; + }, + async chunk() { + return { + workUnits: [ + { + unitKey: 'live-database-public-orders', + rawFiles: ['tables/orders.json'], + dependencyPaths: ['connection.json', 'foreign-keys.json'], + peerFileIndex: [], + }, + ], + }; + }, + }; +} + +async function createLiveDatabaseRun(projectDir: string): Promise { + await createProject(projectDir); + const project = await loadKloProject({ projectDir }); + await runLocalStageOnlyIngest({ + project, + adapters: [liveDatabaseAdapter()], + adapter: 'live-database', + connectionId: 'warehouse', + jobId: RUN_ID, + now: () => new Date('2026-05-07T10:00:00.000Z'), + }); +} + +function reviewRelationships(): KloRelationshipArtifact { + return { + connectionId: 'warehouse', + accepted: [], + review: [ + { + id: 'orders:orders.customer_id->customers:customers.id', + status: 'review', + source: 'deterministic_name', + from: { + tableId: 'orders', + columnIds: ['orders.customer_id'], + table: { catalog: null, db: 'public', name: 'orders' }, + columns: ['customer_id'], + }, + to: { + tableId: 'customers', + columnIds: ['customers.id'], + table: { catalog: null, db: 'public', name: 'customers' }, + columns: ['id'], + }, + relationshipType: 'many_to_one', + confidence: 0.62, + pkScore: 0.91, + fkScore: 0.62, + score: 0.62, + evidence: { sources: ['table_suffix'] }, + validation: { status: 'passed' }, + graph: { reasons: ['fk_score_review'] }, + reasons: ['fk_score_review'], + }, + ], + rejected: [], + skipped: [], + }; +} + +function diagnostics(): KloRelationshipDiagnosticsArtifact { + return { + connectionId: 'warehouse', + generatedAt: '2026-05-07T10:00:00.000Z', + summary: { accepted: 0, review: 1, rejected: 0, skipped: 0 }, + noAcceptedReason: 'relationship candidates require review before manifest writes', + candidateCountsBySource: { deterministic_name: 1 }, + validation: { available: true, sqlAvailable: true, queryCount: 3 }, + thresholds: { acceptThreshold: 0.85, reviewThreshold: 0.55 }, + policy: { + validationRequiredForManifest: true, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + warnings: [], + profileWarnings: [], + }; +} + +function profile(): KloRelationshipProfileArtifact { + return { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: true, + tables: [], + columns: {}, + queryCount: 3, + warnings: [], + }; +} + +function report(): KloScanReport { + return { + connectionId: 'warehouse', + driver: 'sqlite', + syncId: SYNC_ID, + runId: RUN_ID, + trigger: 'cli', + mode: 'relationships', + dryRun: false, + artifactPaths: { + rawSourcesDir: `raw-sources/warehouse/live-database/${SYNC_ID}`, + reportPath: `raw-sources/warehouse/live-database/${SYNC_ID}/scan-report.json`, + manifestShards: [], + enrichmentArtifacts: [ + `raw-sources/warehouse/live-database/${SYNC_ID}/enrichment/relationships.json`, + `raw-sources/warehouse/live-database/${SYNC_ID}/enrichment/relationship-diagnostics.json`, + `raw-sources/warehouse/live-database/${SYNC_ID}/enrichment/relationship-profile.json`, + ], + }, + diffSummary: { + tablesAdded: 0, + tablesModified: 0, + tablesDeleted: 0, + tablesUnchanged: 2, + columnsAdded: 0, + columnsModified: 0, + columnsDeleted: 0, + }, + manifestShardsWritten: 0, + structuralSyncStats: { + tablesCreated: 0, + tablesUpdated: 0, + tablesDeleted: 0, + columnsCreated: 0, + columnsUpdated: 0, + columnsDeleted: 0, + }, + enrichment: { + dataDictionary: 'skipped', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + deterministicRelationships: 'completed', + llmRelationshipValidation: 'skipped', + statisticalValidation: 'completed', + }, + relationships: { accepted: 0, review: 1, rejected: 0, skipped: 0 }, + enrichmentState: { + resumedStages: [], + completedStages: ['relationships'], + failedStages: [], + }, + warnings: [], + capabilityGaps: [], + createdAt: '2026-05-07T10:00:00.000Z', + }; +} + +async function writeScanArtifacts(projectDir: string): Promise { + await writeProjectFile( + projectDir, + `raw-sources/warehouse/live-database/${SYNC_ID}/scan-report.json`, + JSON.stringify(report(), null, 2), + ); + await writeProjectFile( + projectDir, + `raw-sources/warehouse/live-database/${SYNC_ID}/enrichment/relationships.json`, + JSON.stringify(reviewRelationships(), null, 2), + ); + await writeProjectFile( + projectDir, + `raw-sources/warehouse/live-database/${SYNC_ID}/enrichment/relationship-diagnostics.json`, + JSON.stringify(diagnostics(), null, 2), + ); + await writeProjectFile( + projectDir, + `raw-sources/warehouse/live-database/${SYNC_ID}/enrichment/relationship-profile.json`, + JSON.stringify(profile(), null, 2), + ); +} + +describe('relationship review decisions', () => { + it('writes an accepted decision beside the scan relationship artifacts', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-relationship-review-decisions-')); + try { + await createLiveDatabaseRun(projectDir); + await writeScanArtifacts(projectDir); + const project = await loadKloProject({ projectDir }); + + const result = await writeLocalScanRelationshipReviewDecision(project, { + runId: 'scan-run-review', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + reviewer: 'Andrey', + note: 'Matches the warehouse model', + decidedAt: '2026-05-07T12:00:00.000Z', + }); + + expect(result).not.toBeNull(); + if (!result) { + throw new Error('Expected relationship review decision to be written'); + } + expect(result.path).toBe( + `raw-sources/warehouse/live-database/${SYNC_ID}/enrichment/relationship-review-decisions.json`, + ); + expect(result.artifact.decisions).toHaveLength(1); + expect(result.decision).toMatchObject({ + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + previousStatus: 'review', + reviewer: 'Andrey', + note: 'Matches the warehouse model', + source: 'deterministic_name', + relationshipType: 'many_to_one', + score: 0.62, + reasons: ['fk_score_review'], + }); + await expect(project.fileStore.readFile(result.path)).resolves.toMatchObject({ + path: result.path, + content: expect.stringContaining('"decision": "accepted"'), + }); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + + it('replaces the existing decision for the same candidate id', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-relationship-review-replace-')); + try { + await createLiveDatabaseRun(projectDir); + await writeScanArtifacts(projectDir); + const project = await loadKloProject({ projectDir }); + + await writeLocalScanRelationshipReviewDecision(project, { + runId: 'scan-run-review', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + reviewer: 'Andrey', + note: 'First decision', + decidedAt: '2026-05-07T12:00:00.000Z', + }); + const replacement = await writeLocalScanRelationshipReviewDecision(project, { + runId: 'scan-run-review', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'rejected', + reviewer: 'Andrey', + note: 'Reviewed against source data and rejected', + decidedAt: '2026-05-07T12:05:00.000Z', + }); + + expect(replacement).not.toBeNull(); + if (!replacement) { + throw new Error('Expected replacement relationship review decision to be written'); + } + expect(replacement.artifact.decisions).toHaveLength(1); + expect(replacement.artifact.decisions[0]).toMatchObject({ + decision: 'rejected', + note: 'Reviewed against source data and rejected', + decidedAt: '2026-05-07T12:05:00.000Z', + }); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + + it('returns null when the scan run does not exist', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-relationship-review-missing-run-')); + try { + await createProject(projectDir); + const project = await loadKloProject({ projectDir }); + + await expect( + writeLocalScanRelationshipReviewDecision(project, { + runId: 'missing-run', + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + reviewer: 'Andrey', + note: null, + decidedAt: '2026-05-07T12:00:00.000Z', + }), + ).resolves.toBeNull(); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + + it('rejects unknown candidate ids for an existing scan run', async () => { + const projectDir = await mkdtemp(join(tmpdir(), 'klo-relationship-review-missing-candidate-')); + try { + await createLiveDatabaseRun(projectDir); + await writeScanArtifacts(projectDir); + const project = await loadKloProject({ projectDir }); + + await expect( + writeLocalScanRelationshipReviewDecision(project, { + runId: 'scan-run-review', + candidateId: 'orders:orders.unknown_id->customers:customers.id', + decision: 'accepted', + reviewer: 'Andrey', + note: null, + decidedAt: '2026-05-07T12:00:00.000Z', + }), + ).rejects.toThrow( + 'Relationship candidate "orders:orders.unknown_id->customers:customers.id" was not found in scan run "scan-run-review"', + ); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/context/src/scan/relationship-review-decisions.ts b/packages/context/src/scan/relationship-review-decisions.ts new file mode 100644 index 00000000..06d78571 --- /dev/null +++ b/packages/context/src/scan/relationship-review-decisions.ts @@ -0,0 +1,182 @@ +import type { KloLocalProject } from '../project/index.js'; +import type { KloRelationshipType } from './enrichment-types.js'; +import { readLocalScanRelationshipArtifacts } from './relationship-artifacts.js'; +import type { + KloRelationshipArtifactEdge, + KloRelationshipArtifactEndpoint, +} from './relationship-diagnostics.js'; +import type { KloResolvedRelationshipStatus } from './relationship-graph-resolver.js'; + +const LOCAL_AUTHOR = 'klo'; +const LOCAL_AUTHOR_EMAIL = 'klo@example.com'; +const DECISIONS_FILE = 'relationship-review-decisions.json'; + +export type KloRelationshipReviewDecisionValue = 'accepted' | 'rejected'; + +export interface WriteLocalScanRelationshipReviewDecisionInput { + runId: string; + candidateId: string; + decision: KloRelationshipReviewDecisionValue; + reviewer: string; + note: string | null; + decidedAt?: string; +} + +export interface KloRelationshipReviewDecisionEntry { + candidateId: string; + decision: KloRelationshipReviewDecisionValue; + previousStatus: KloResolvedRelationshipStatus; + connectionId: string; + runId: string; + syncId: string; + decidedAt: string; + reviewer: string; + note: string | null; + from: KloRelationshipArtifactEndpoint; + to: KloRelationshipArtifactEndpoint; + relationshipType: KloRelationshipType; + source: string; + score: number | null; + confidence: number; + pkScore: number | null; + fkScore: number | null; + reasons: string[]; +} + +export interface KloRelationshipReviewDecisionArtifact { + connectionId: string; + runId: string; + syncId: string; + generatedAt: string; + decisions: KloRelationshipReviewDecisionEntry[]; +} + +export interface WriteLocalScanRelationshipReviewDecisionResult { + path: string; + decision: KloRelationshipReviewDecisionEntry; + artifact: KloRelationshipReviewDecisionArtifact; +} + +function reviewDecisionPath(relationshipsPath: string): string { + return relationshipsPath.replace(/relationships\.json$/u, DECISIONS_FILE); +} + +function allCandidateEdges(result: Awaited>): KloRelationshipArtifactEdge[] { + if (!result) { + return []; + } + return [...result.relationships.accepted, ...result.relationships.review, ...result.relationships.rejected]; +} + +async function readExistingDecisions( + project: KloLocalProject, + path: string, + fallback: Omit, +): Promise { + try { + const raw = await project.fileStore.readFile(path); + const parsed = JSON.parse(raw.content) as KloRelationshipReviewDecisionArtifact; + return { + connectionId: parsed.connectionId, + runId: parsed.runId, + syncId: parsed.syncId, + generatedAt: parsed.generatedAt, + decisions: Array.isArray(parsed.decisions) ? parsed.decisions : [], + }; + } catch { + return { ...fallback, decisions: [] }; + } +} + +function decisionEntry(input: { + candidate: KloRelationshipArtifactEdge; + connectionId: string; + runId: string; + syncId: string; + decision: KloRelationshipReviewDecisionValue; + reviewer: string; + note: string | null; + decidedAt: string; +}): KloRelationshipReviewDecisionEntry { + return { + candidateId: input.candidate.id, + decision: input.decision, + previousStatus: input.candidate.status, + connectionId: input.connectionId, + runId: input.runId, + syncId: input.syncId, + decidedAt: input.decidedAt, + reviewer: input.reviewer, + note: input.note, + from: input.candidate.from, + to: input.candidate.to, + relationshipType: input.candidate.relationshipType, + source: input.candidate.source, + score: input.candidate.score, + confidence: input.candidate.confidence, + pkScore: input.candidate.pkScore, + fkScore: input.candidate.fkScore, + reasons: [...input.candidate.reasons], + }; +} + +function upsertDecision( + existing: readonly KloRelationshipReviewDecisionEntry[], + next: KloRelationshipReviewDecisionEntry, +): KloRelationshipReviewDecisionEntry[] { + return [...existing.filter((item) => item.candidateId !== next.candidateId), next].sort((left, right) => + left.candidateId.localeCompare(right.candidateId), + ); +} + +export async function writeLocalScanRelationshipReviewDecision( + project: KloLocalProject, + input: WriteLocalScanRelationshipReviewDecisionInput, +): Promise { + const artifacts = await readLocalScanRelationshipArtifacts(project, input.runId); + if (!artifacts) { + return null; + } + + const candidate = allCandidateEdges(artifacts).find((edge) => edge.id === input.candidateId); + if (!candidate) { + throw new Error(`Relationship candidate "${input.candidateId}" was not found in scan run "${input.runId}"`); + } + + const decidedAt = input.decidedAt ?? new Date().toISOString(); + const path = reviewDecisionPath(artifacts.paths.relationships); + const fallback = { + connectionId: artifacts.connectionId, + runId: artifacts.runId, + syncId: artifacts.syncId, + generatedAt: decidedAt, + }; + const existing = await readExistingDecisions(project, path, fallback); + const decision = decisionEntry({ + candidate, + connectionId: artifacts.connectionId, + runId: artifacts.runId, + syncId: artifacts.syncId, + decision: input.decision, + reviewer: input.reviewer, + note: input.note, + decidedAt, + }); + const artifact: KloRelationshipReviewDecisionArtifact = { + connectionId: artifacts.connectionId, + runId: artifacts.runId, + syncId: artifacts.syncId, + generatedAt: decidedAt, + decisions: upsertDecision(existing.decisions, decision), + }; + + await project.fileStore.writeFile( + path, + `${JSON.stringify(artifact, null, 2)}\n`, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `scan(live-database): record relationship review decision runId=${input.runId}`, + ); + + return { path, decision, artifact }; +} diff --git a/packages/context/src/scan/relationship-scoring.test.ts b/packages/context/src/scan/relationship-scoring.test.ts new file mode 100644 index 00000000..94a02de0 --- /dev/null +++ b/packages/context/src/scan/relationship-scoring.test.ts @@ -0,0 +1,108 @@ +import { describe, expect, it } from 'vitest'; +import { + calibrateWeightsFromSyntheticFixtures, + defaultKloRelationshipScoreWeights, + normalizeKloRelationshipScoreWeights, + scoreKloRelationshipCandidate, + type KloRelationshipSignalVector, +} from './relationship-scoring.js'; + +function signals(overrides: Partial = {}): KloRelationshipSignalVector { + return { + nameSimilarity: 0.5, + typeCompatibility: 1, + valueOverlap: 0, + embeddingSimilarity: 0, + profileUniqueness: 0.5, + profileNullRate: 0.5, + structuralPrior: 0.5, + ...overrides, + }; +} + +describe('relationship scoring', () => { + it('scores stronger evidence higher without hard-gating on names', () => { + const weakNameStrongProfile = scoreKloRelationshipCandidate( + signals({ + nameSimilarity: 0.05, + typeCompatibility: 1, + valueOverlap: 0.7, + profileUniqueness: 1, + profileNullRate: 1, + structuralPrior: 0.7, + }), + ); + const strongNameWeakProfile = scoreKloRelationshipCandidate( + signals({ + nameSimilarity: 0.95, + typeCompatibility: 1, + valueOverlap: 0, + profileUniqueness: 0.3, + profileNullRate: 0.4, + structuralPrior: 0.5, + }), + ); + + expect(weakNameStrongProfile.score).toBeGreaterThan(strongNameWeakProfile.score); + expect(weakNameStrongProfile.contributions.profileUniqueness).toBeGreaterThan(0); + expect(weakNameStrongProfile.contributions.nameSimilarity).toBeLessThan(0.02); + }); + + it('normalizes partial and invalid weights into a usable vector', () => { + const weights = normalizeKloRelationshipScoreWeights({ + nameSimilarity: 3, + typeCompatibility: -1, + valueOverlap: Number.POSITIVE_INFINITY, + profileUniqueness: 1, + }); + + const total = Object.values(weights).reduce((sum, value) => sum + value, 0); + expect(total).toBeCloseTo(1, 6); + expect(weights.nameSimilarity).toBeGreaterThan(weights.profileUniqueness); + expect(weights.typeCompatibility).toBe(0); + expect(weights.valueOverlap).toBe(0); + }); + + it('returns deterministic defaults as a defensive copy', () => { + const first = defaultKloRelationshipScoreWeights(); + const second = defaultKloRelationshipScoreWeights(); + + expect(first).toEqual(second); + expect(first).not.toBe(second); + expect(Object.values(first).reduce((sum, value) => sum + value, 0)).toBeCloseTo(1, 6); + }); + + it('calibrates only from synthetic observations', () => { + expect(() => + calibrateWeightsFromSyntheticFixtures([ + { + fixtureId: 'chinook_with_declared_metadata', + origin: 'public', + expectedRelationship: true, + signals: signals({ nameSimilarity: 1 }), + }, + ]), + ).toThrow(/synthetic/i); + }); + + it('calibrates deterministic weights from positive and negative synthetic observations', () => { + const weights = calibrateWeightsFromSyntheticFixtures([ + { + fixtureId: 'synthetic_positive', + origin: 'synthetic', + expectedRelationship: true, + signals: signals({ nameSimilarity: 0.8, valueOverlap: 0.9, profileUniqueness: 1, profileNullRate: 1 }), + }, + { + fixtureId: 'synthetic_negative', + origin: 'synthetic', + expectedRelationship: false, + signals: signals({ nameSimilarity: 0.2, valueOverlap: 0.1, profileUniqueness: 0.4, profileNullRate: 0.5 }), + }, + ]); + + expect(Object.values(weights).reduce((sum, value) => sum + value, 0)).toBeCloseTo(1, 6); + expect(weights.valueOverlap).toBeGreaterThan(weights.structuralPrior); + expect(weights.profileUniqueness).toBeGreaterThan(weights.embeddingSimilarity); + }); +}); diff --git a/packages/context/src/scan/relationship-scoring.ts b/packages/context/src/scan/relationship-scoring.ts new file mode 100644 index 00000000..a6b1565f --- /dev/null +++ b/packages/context/src/scan/relationship-scoring.ts @@ -0,0 +1,155 @@ +export const KLO_RELATIONSHIP_SCORE_SIGNAL_KEYS = [ + 'nameSimilarity', + 'typeCompatibility', + 'valueOverlap', + 'embeddingSimilarity', + 'profileUniqueness', + 'profileNullRate', + 'structuralPrior', +] as const; + +export type KloRelationshipScoreSignal = (typeof KLO_RELATIONSHIP_SCORE_SIGNAL_KEYS)[number]; + +export type KloRelationshipFixtureOrigin = 'synthetic' | 'public' | 'customer'; + +export interface KloRelationshipSignalVector { + nameSimilarity: number; + typeCompatibility: number; + valueOverlap: number; + embeddingSimilarity: number; + profileUniqueness: number; + profileNullRate: number; + structuralPrior: number; +} + +export type KloRelationshipScoreWeights = Record; + +export interface KloRelationshipScoreBreakdown { + score: number; + signals: KloRelationshipSignalVector; + weights: KloRelationshipScoreWeights; + contributions: KloRelationshipScoreWeights; +} + +export interface KloRelationshipScoringCalibrationObservation { + fixtureId: string; + origin: KloRelationshipFixtureOrigin; + expectedRelationship: boolean; + signals: KloRelationshipSignalVector; +} + +const DEFAULT_WEIGHTS: KloRelationshipScoreWeights = { + nameSimilarity: 0.24, + typeCompatibility: 0.1, + valueOverlap: 0.22, + embeddingSimilarity: 0.1, + profileUniqueness: 0.22, + profileNullRate: 0.08, + structuralPrior: 0.04, +}; + +function clampScore(value: number): number { + if (!Number.isFinite(value)) { + return 0; + } + return Math.max(0, Math.min(1, value)); +} + +function roundScore(value: number): number { + return Number(clampScore(value).toFixed(3)); +} + +function sanitizeSignalVector(signals: KloRelationshipSignalVector): KloRelationshipSignalVector { + return { + nameSimilarity: roundScore(signals.nameSimilarity), + typeCompatibility: roundScore(signals.typeCompatibility), + valueOverlap: roundScore(signals.valueOverlap), + embeddingSimilarity: roundScore(signals.embeddingSimilarity), + profileUniqueness: roundScore(signals.profileUniqueness), + profileNullRate: roundScore(signals.profileNullRate), + structuralPrior: roundScore(signals.structuralPrior), + }; +} + +export function defaultKloRelationshipScoreWeights(): KloRelationshipScoreWeights { + return { ...DEFAULT_WEIGHTS }; +} + +export function normalizeKloRelationshipScoreWeights( + weights: Partial = DEFAULT_WEIGHTS, +): KloRelationshipScoreWeights { + const rawEntries = KLO_RELATIONSHIP_SCORE_SIGNAL_KEYS.map((key) => { + const value = weights[key] ?? 0; + return [key, Number.isFinite(value) ? Math.max(0, value) : 0] as const; + }); + const total = rawEntries.reduce((sum, [, value]) => sum + value, 0); + if (total <= 0) { + return defaultKloRelationshipScoreWeights(); + } + + return Object.fromEntries(rawEntries.map(([key, value]) => [key, value / total])) as KloRelationshipScoreWeights; +} + +export function scoreKloRelationshipCandidate( + signals: KloRelationshipSignalVector, + weights: Partial = DEFAULT_WEIGHTS, +): KloRelationshipScoreBreakdown { + const sanitizedSignals = sanitizeSignalVector(signals); + const normalizedWeights = normalizeKloRelationshipScoreWeights(weights); + const contributions = Object.fromEntries( + KLO_RELATIONSHIP_SCORE_SIGNAL_KEYS.map((key) => [ + key, + Number((sanitizedSignals[key] * normalizedWeights[key]).toFixed(6)), + ]), + ) as KloRelationshipScoreWeights; + const rawWeightedScore = KLO_RELATIONSHIP_SCORE_SIGNAL_KEYS.reduce((sum, key) => sum + contributions[key], 0); + const scoredConfidence = sanitizedSignals.typeCompatibility <= 0 ? 0 : 0.56 + rawWeightedScore * 0.65; + + return { + score: roundScore(scoredConfidence), + signals: sanitizedSignals, + weights: normalizedWeights, + contributions, + }; +} + +function averageSignal( + observations: readonly KloRelationshipScoringCalibrationObservation[], + key: KloRelationshipScoreSignal, +): number { + if (observations.length === 0) { + return 0; + } + return observations.reduce((sum, observation) => sum + clampScore(observation.signals[key]), 0) / observations.length; +} + +export function calibrateWeightsFromSyntheticFixtures( + observations: readonly KloRelationshipScoringCalibrationObservation[], +): KloRelationshipScoreWeights { + const nonSynthetic = observations.find((observation) => observation.origin !== 'synthetic'); + if (nonSynthetic) { + throw new Error( + `Relationship score calibration accepts only synthetic fixtures; ${nonSynthetic.fixtureId} is ${nonSynthetic.origin}`, + ); + } + if (observations.length === 0) { + return defaultKloRelationshipScoreWeights(); + } + + const positives = observations.filter((observation) => observation.expectedRelationship); + const negatives = observations.filter((observation) => !observation.expectedRelationship); + if (positives.length === 0 || negatives.length === 0) { + return defaultKloRelationshipScoreWeights(); + } + + const calibrated = Object.fromEntries( + KLO_RELATIONSHIP_SCORE_SIGNAL_KEYS.map((key) => { + const positiveAverage = averageSignal(positives, key); + const negativeAverage = averageSignal(negatives, key); + const separation = Math.max(0, positiveAverage - negativeAverage); + return [key, separation + DEFAULT_WEIGHTS[key] * 0.25]; + }), + ) as KloRelationshipScoreWeights; + + return normalizeKloRelationshipScoreWeights(calibrated); +} diff --git a/packages/context/src/scan/relationship-threshold-advice.test.ts b/packages/context/src/scan/relationship-threshold-advice.test.ts new file mode 100644 index 00000000..1943caaa --- /dev/null +++ b/packages/context/src/scan/relationship-threshold-advice.test.ts @@ -0,0 +1,241 @@ +import type { KloLocalProject } from '../project/index.js'; +import { describe, expect, it, vi } from 'vitest'; +import { + adviseLocalRelationshipFeedbackThresholds, + buildKloRelationshipThresholdAdviceReport, + formatKloRelationshipThresholdAdviceMarkdown, +} from './relationship-threshold-advice.js'; +import type { + ExportLocalRelationshipFeedbackLabelsResult, + KloRelationshipFeedbackLabel, +} from './relationship-feedback-export.js'; + +function label( + input: Partial & Pick, +): KloRelationshipFeedbackLabel { + return { + schemaVersion: 1, + previousStatus: 'review', + connectionId: 'warehouse', + runId: 'scan-run-a', + syncId: 'sync-a', + decidedAt: '2026-05-07T12:00:00.000Z', + reviewer: 'Andrey', + note: null, + relationshipType: 'many_to_one', + source: 'deterministic_name', + confidence: input.score ?? 0, + pkScore: input.pkScore ?? null, + fkScore: input.fkScore ?? input.score, + fromTable: 'public.orders', + fromColumns: ['customer_id'], + toTable: 'public.customers', + toColumns: ['id'], + reasons: [], + artifactPath: 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json', + ...input, + }; +} + +function feedback(labels: KloRelationshipFeedbackLabel[]): ExportLocalRelationshipFeedbackLabelsResult { + return { + generatedAt: '2026-05-07T13:00:00.000Z', + filters: { connectionId: null, decision: 'all' }, + summary: { + total: labels.length, + accepted: labels.filter((item) => item.decision === 'accepted').length, + rejected: labels.filter((item) => item.decision === 'rejected').length, + connections: new Set(labels.map((item) => item.connectionId)).size, + runs: new Set(labels.map((item) => `${item.connectionId}:${item.runId}`)).size, + }, + labels, + warnings: [], + }; +} + +describe('relationship threshold advice', () => { + it('selects the highest-quality threshold candidate when enough labels exist', () => { + const report = buildKloRelationshipThresholdAdviceReport( + feedback([ + label({ + candidateId: 'orders:orders.customer_id->customers:customers.id', + decision: 'accepted', + score: 0.91, + pkScore: 0.97, + fkScore: 0.91, + }), + label({ + candidateId: 'orders:orders.account_id->accounts:accounts.id', + decision: 'accepted', + score: 0.61, + pkScore: 0.88, + fkScore: 0.61, + }), + label({ + candidateId: 'orders:orders.note_id->notes:notes.id', + decision: 'rejected', + score: 0.21, + pkScore: 0.4, + fkScore: 0.21, + }), + label({ + candidateId: 'orders:orders.region_id->regions:regions.id', + decision: 'rejected', + score: 0.88, + pkScore: 0.9, + fkScore: 0.88, + }), + ]), + { + acceptThresholds: [0.9, 0.85], + reviewThresholds: [0.55], + minTotalLabels: 4, + minAcceptedLabels: 2, + minRejectedLabels: 2, + minAcceptedBandPrecision: 0.75, + minAcceptedOrReviewRecall: 0.75, + minRejectedBandPrecision: 0.75, + }, + ); + + expect(report.status).toBe('ready'); + expect(report.summary).toMatchObject({ + totalLabels: 4, + scoredLabels: 4, + acceptedLabels: 2, + rejectedLabels: 2, + eligibleCandidates: 1, + }); + expect(report.recommended).toMatchObject({ + acceptThreshold: 0.9, + reviewThreshold: 0.55, + eligible: true, + acceptedBandPrecision: 1, + acceptedRecall: 0.5, + acceptedOrReviewRecall: 1, + rejectedBandPrecision: 1, + rejectedRecall: 1, + falseAcceptedRejectedLabels: 0, + falseRejectedAcceptedLabels: 0, + }); + expect(report.candidates.map((candidate) => [candidate.acceptThreshold, candidate.reviewThreshold, candidate.eligible])).toEqual([ + [0.9, 0.55, true], + [0.85, 0.55, false], + ]); + }); + + it('reports insufficient labels without hiding evaluated candidates', () => { + const report = buildKloRelationshipThresholdAdviceReport( + feedback([ + label({ candidateId: 'orders:orders.customer_id->customers:customers.id', decision: 'accepted', score: 0.91 }), + label({ candidateId: 'orders:orders.note_id->notes:notes.id', decision: 'rejected', score: 0.21 }), + ]), + { + acceptThresholds: [0.9], + reviewThresholds: [0.55], + minTotalLabels: 10, + minAcceptedLabels: 5, + minRejectedLabels: 5, + }, + ); + + expect(report.status).toBe('insufficient_labels'); + expect(report.recommended).toBeNull(); + expect(report.summary).toMatchObject({ + totalLabels: 2, + scoredLabels: 2, + acceptedLabels: 1, + rejectedLabels: 1, + eligibleCandidates: 1, + }); + expect(report.reasons).toEqual([ + 'Need at least 10 scored labels; found 2.', + 'Need at least 5 accepted labels; found 1.', + 'Need at least 5 rejected labels; found 1.', + ]); + expect(report.candidates).toHaveLength(1); + }); + + it('reports no eligible thresholds when label counts pass but quality gates fail', () => { + const report = buildKloRelationshipThresholdAdviceReport( + feedback([ + label({ candidateId: 'a', decision: 'accepted', score: 0.92 }), + label({ candidateId: 'b', decision: 'accepted', score: 0.58 }), + label({ candidateId: 'c', decision: 'rejected', score: 0.91 }), + label({ candidateId: 'd', decision: 'rejected', score: 0.2 }), + ]), + { + acceptThresholds: [0.9], + reviewThresholds: [0.55], + minTotalLabels: 4, + minAcceptedLabels: 2, + minRejectedLabels: 2, + minAcceptedBandPrecision: 0.9, + }, + ); + + expect(report.status).toBe('no_eligible_thresholds'); + expect(report.recommended).toBeNull(); + expect(report.reasons).toEqual(['No threshold candidate met the precision and recall gates.']); + expect(report.candidates[0]).toMatchObject({ + acceptThreshold: 0.9, + reviewThreshold: 0.55, + eligible: false, + acceptedBandPrecision: 0.5, + }); + }); + + it('wraps the feedback exporter and preserves warnings', async () => { + const project = { projectDir: '/tmp/klo-project' } as KloLocalProject; + const exportLocalRelationshipFeedbackLabels = vi.fn(async () => ({ + ...feedback([]), + warnings: [ + { + path: 'raw-sources/broken/live-database/sync/enrichment/relationship-review-decisions.json', + message: 'Unexpected token', + }, + ], + })); + + const report = await adviseLocalRelationshipFeedbackThresholds(project, { + connectionId: 'warehouse', + exportLocalRelationshipFeedbackLabels, + minTotalLabels: 1, + }); + + expect(exportLocalRelationshipFeedbackLabels).toHaveBeenCalledWith(project, { + connectionId: 'warehouse', + decision: 'all', + }); + expect(report.warnings).toEqual([ + { + path: 'raw-sources/broken/live-database/sync/enrichment/relationship-review-decisions.json', + message: 'Unexpected token', + }, + ]); + }); + + it('formats a stable human-readable report', () => { + const report = buildKloRelationshipThresholdAdviceReport( + feedback([ + label({ candidateId: 'orders:orders.customer_id->customers:customers.id', decision: 'accepted', score: 0.91 }), + label({ candidateId: 'orders:orders.account_id->accounts:accounts.id', decision: 'accepted', score: 0.61 }), + label({ candidateId: 'orders:orders.note_id->notes:notes.id', decision: 'rejected', score: 0.21 }), + label({ candidateId: 'orders:orders.region_id->regions:regions.id', decision: 'rejected', score: 0.88 }), + ]), + { + acceptThresholds: [0.9], + reviewThresholds: [0.55], + minTotalLabels: 4, + minAcceptedLabels: 2, + minRejectedLabels: 2, + minAcceptedBandPrecision: 0.75, + }, + ); + + expect(formatKloRelationshipThresholdAdviceMarkdown(report)).toContain('KLO relationship threshold advice'); + expect(formatKloRelationshipThresholdAdviceMarkdown(report)).toContain('Status: ready'); + expect(formatKloRelationshipThresholdAdviceMarkdown(report)).toContain('Recommended: accept=0.90 review=0.55'); + expect(formatKloRelationshipThresholdAdviceMarkdown(report)).toContain('acceptedPrecision=1.000'); + }); +}); diff --git a/packages/context/src/scan/relationship-threshold-advice.ts b/packages/context/src/scan/relationship-threshold-advice.ts new file mode 100644 index 00000000..f5b7141e --- /dev/null +++ b/packages/context/src/scan/relationship-threshold-advice.ts @@ -0,0 +1,335 @@ +import type { KloLocalProject } from '../project/index.js'; +import { + exportLocalRelationshipFeedbackLabels, + type ExportLocalRelationshipFeedbackLabelsInput, + type ExportLocalRelationshipFeedbackLabelsResult, + type KloRelationshipFeedbackExportWarning, + type KloRelationshipFeedbackLabel, +} from './relationship-feedback-export.js'; +import type { KloResolvedRelationshipStatus } from './relationship-graph-resolver.js'; + +const DEFAULT_ACCEPT_THRESHOLDS = [0.95, 0.9, 0.85, 0.8, 0.75] as const; +const DEFAULT_REVIEW_THRESHOLDS = [0.65, 0.6, 0.55, 0.5, 0.45] as const; + +type AdvicePredictedStatus = KloResolvedRelationshipStatus; +export type KloRelationshipThresholdAdviceStatus = 'ready' | 'insufficient_labels' | 'no_eligible_thresholds'; + +export interface BuildKloRelationshipThresholdAdviceReportInput { + acceptThresholds?: readonly number[]; + reviewThresholds?: readonly number[]; + minTotalLabels?: number; + minAcceptedLabels?: number; + minRejectedLabels?: number; + minAcceptedBandPrecision?: number; + minAcceptedOrReviewRecall?: number; + minRejectedBandPrecision?: number; +} + +export interface AdviseLocalRelationshipFeedbackThresholdsInput + extends Omit, + BuildKloRelationshipThresholdAdviceReportInput { + exportLocalRelationshipFeedbackLabels?: typeof exportLocalRelationshipFeedbackLabels; +} + +export interface KloRelationshipThresholdAdviceCandidate { + acceptThreshold: number; + reviewThreshold: number; + eligible: boolean; + predictedAccepted: number; + predictedReview: number; + predictedRejected: number; + acceptedBandPrecision: number | null; + acceptedRecall: number | null; + acceptedOrReviewRecall: number | null; + rejectedBandPrecision: number | null; + rejectedRecall: number | null; + falseAcceptedRejectedLabels: number; + falseRejectedAcceptedLabels: number; +} + +export interface KloRelationshipThresholdAdviceReport { + generatedAt: string; + filters: ExportLocalRelationshipFeedbackLabelsResult['filters']; + status: KloRelationshipThresholdAdviceStatus; + gates: { + minTotalLabels: number; + minAcceptedLabels: number; + minRejectedLabels: number; + minAcceptedBandPrecision: number; + minAcceptedOrReviewRecall: number; + minRejectedBandPrecision: number; + }; + summary: { + totalLabels: number; + scoredLabels: number; + unscoredLabels: number; + acceptedLabels: number; + rejectedLabels: number; + evaluatedCandidates: number; + eligibleCandidates: number; + }; + recommended: KloRelationshipThresholdAdviceCandidate | null; + candidates: KloRelationshipThresholdAdviceCandidate[]; + reasons: string[]; + warnings: KloRelationshipFeedbackExportWarning[]; +} + +interface ResolvedAdviceInput { + acceptThresholds: number[]; + reviewThresholds: number[]; + minTotalLabels: number; + minAcceptedLabels: number; + minRejectedLabels: number; + minAcceptedBandPrecision: number; + minAcceptedOrReviewRecall: number; + minRejectedBandPrecision: number; +} + +function resolveInput(input: BuildKloRelationshipThresholdAdviceReportInput): ResolvedAdviceInput { + return { + acceptThresholds: [...(input.acceptThresholds ?? DEFAULT_ACCEPT_THRESHOLDS)].sort((left, right) => right - left), + reviewThresholds: [...(input.reviewThresholds ?? DEFAULT_REVIEW_THRESHOLDS)].sort((left, right) => right - left), + minTotalLabels: input.minTotalLabels ?? 20, + minAcceptedLabels: input.minAcceptedLabels ?? 5, + minRejectedLabels: input.minRejectedLabels ?? 5, + minAcceptedBandPrecision: input.minAcceptedBandPrecision ?? 0.9, + minAcceptedOrReviewRecall: input.minAcceptedOrReviewRecall ?? 0.8, + minRejectedBandPrecision: input.minRejectedBandPrecision ?? 0.8, + }; +} + +function roundMetric(value: number): number { + return Math.round(value * 1000) / 1000; +} + +function ratio(numerator: number, denominator: number): number | null { + return denominator === 0 ? null : roundMetric(numerator / denominator); +} + +function prediction(score: number, acceptThreshold: number, reviewThreshold: number): AdvicePredictedStatus { + if (score >= acceptThreshold) { + return 'accepted'; + } + if (score >= reviewThreshold) { + return 'review'; + } + return 'rejected'; +} + +function isMetricAtLeast(value: number | null, minimum: number): boolean { + return value !== null && value >= minimum; +} + +function thresholdCandidate( + labels: readonly KloRelationshipFeedbackLabel[], + acceptThreshold: number, + reviewThreshold: number, + gates: ResolvedAdviceInput, +): KloRelationshipThresholdAdviceCandidate { + const scored = labels.filter((label): label is KloRelationshipFeedbackLabel & { score: number } => label.score !== null); + const acceptedLabels = scored.filter((label) => label.decision === 'accepted'); + const rejectedLabels = scored.filter((label) => label.decision === 'rejected'); + const predictions = scored.map((label) => ({ + label, + predictedStatus: prediction(label.score, acceptThreshold, reviewThreshold), + })); + const predictedAccepted = predictions.filter((item) => item.predictedStatus === 'accepted'); + const predictedReview = predictions.filter((item) => item.predictedStatus === 'review'); + const predictedRejected = predictions.filter((item) => item.predictedStatus === 'rejected'); + const acceptedBandPrecision = ratio( + predictedAccepted.filter((item) => item.label.decision === 'accepted').length, + predictedAccepted.length, + ); + const acceptedOrReviewRecall = ratio( + predictions.filter((item) => item.label.decision === 'accepted' && item.predictedStatus !== 'rejected').length, + acceptedLabels.length, + ); + const rejectedBandPrecision = ratio( + predictedRejected.filter((item) => item.label.decision === 'rejected').length, + predictedRejected.length, + ); + + return { + acceptThreshold, + reviewThreshold, + eligible: + predictedAccepted.length > 0 && + predictedRejected.length > 0 && + isMetricAtLeast(acceptedBandPrecision, gates.minAcceptedBandPrecision) && + isMetricAtLeast(acceptedOrReviewRecall, gates.minAcceptedOrReviewRecall) && + isMetricAtLeast(rejectedBandPrecision, gates.minRejectedBandPrecision), + predictedAccepted: predictedAccepted.length, + predictedReview: predictedReview.length, + predictedRejected: predictedRejected.length, + acceptedBandPrecision, + acceptedRecall: ratio( + predictedAccepted.filter((item) => item.label.decision === 'accepted').length, + acceptedLabels.length, + ), + acceptedOrReviewRecall, + rejectedBandPrecision, + rejectedRecall: ratio( + predictions.filter((item) => item.label.decision === 'rejected' && item.predictedStatus !== 'accepted').length, + rejectedLabels.length, + ), + falseAcceptedRejectedLabels: predictedAccepted.filter((item) => item.label.decision === 'rejected').length, + falseRejectedAcceptedLabels: predictedRejected.filter((item) => item.label.decision === 'accepted').length, + }; +} + +function metricRank(value: number | null): number { + return value ?? -1; +} + +function sortCandidates( + candidates: readonly KloRelationshipThresholdAdviceCandidate[], +): KloRelationshipThresholdAdviceCandidate[] { + return [...candidates].sort( + (left, right) => + Number(right.eligible) - Number(left.eligible) || + metricRank(right.acceptedBandPrecision) - metricRank(left.acceptedBandPrecision) || + metricRank(right.acceptedOrReviewRecall) - metricRank(left.acceptedOrReviewRecall) || + metricRank(right.rejectedBandPrecision) - metricRank(left.rejectedBandPrecision) || + right.acceptThreshold - left.acceptThreshold || + right.reviewThreshold - left.reviewThreshold, + ); +} + +function labelGateReasons(labels: readonly KloRelationshipFeedbackLabel[], gates: ResolvedAdviceInput): string[] { + const scored = labels.filter((label) => label.score !== null); + const accepted = scored.filter((label) => label.decision === 'accepted'); + const rejected = scored.filter((label) => label.decision === 'rejected'); + const reasons: string[] = []; + if (scored.length < gates.minTotalLabels) { + reasons.push(`Need at least ${gates.minTotalLabels} scored labels; found ${scored.length}.`); + } + if (accepted.length < gates.minAcceptedLabels) { + reasons.push(`Need at least ${gates.minAcceptedLabels} accepted labels; found ${accepted.length}.`); + } + if (rejected.length < gates.minRejectedLabels) { + reasons.push(`Need at least ${gates.minRejectedLabels} rejected labels; found ${rejected.length}.`); + } + return reasons; +} + +export function buildKloRelationshipThresholdAdviceReport( + feedback: ExportLocalRelationshipFeedbackLabelsResult, + input: BuildKloRelationshipThresholdAdviceReportInput = {}, +): KloRelationshipThresholdAdviceReport { + const gates = resolveInput(input); + const scored = feedback.labels.filter((label) => label.score !== null); + const acceptedLabels = scored.filter((label) => label.decision === 'accepted'); + const rejectedLabels = scored.filter((label) => label.decision === 'rejected'); + const candidates = sortCandidates( + gates.acceptThresholds.flatMap((acceptThreshold) => + gates.reviewThresholds.flatMap((reviewThreshold) => + acceptThreshold > reviewThreshold + ? [thresholdCandidate(feedback.labels, acceptThreshold, reviewThreshold, gates)] + : [], + ), + ), + ); + const labelReasons = labelGateReasons(feedback.labels, gates); + const eligibleCandidates = candidates.filter((candidate) => candidate.eligible); + const status: KloRelationshipThresholdAdviceStatus = + labelReasons.length > 0 ? 'insufficient_labels' : eligibleCandidates.length > 0 ? 'ready' : 'no_eligible_thresholds'; + const reasons = + status === 'insufficient_labels' + ? labelReasons + : status === 'no_eligible_thresholds' + ? ['No threshold candidate met the precision and recall gates.'] + : []; + + return { + generatedAt: feedback.generatedAt, + filters: feedback.filters, + status, + gates: { + minTotalLabels: gates.minTotalLabels, + minAcceptedLabels: gates.minAcceptedLabels, + minRejectedLabels: gates.minRejectedLabels, + minAcceptedBandPrecision: gates.minAcceptedBandPrecision, + minAcceptedOrReviewRecall: gates.minAcceptedOrReviewRecall, + minRejectedBandPrecision: gates.minRejectedBandPrecision, + }, + summary: { + totalLabels: feedback.labels.length, + scoredLabels: scored.length, + unscoredLabels: feedback.labels.length - scored.length, + acceptedLabels: acceptedLabels.length, + rejectedLabels: rejectedLabels.length, + evaluatedCandidates: candidates.length, + eligibleCandidates: eligibleCandidates.length, + }, + recommended: status === 'ready' ? eligibleCandidates[0] ?? null : null, + candidates, + reasons, + warnings: [...feedback.warnings], + }; +} + +export async function adviseLocalRelationshipFeedbackThresholds( + project: KloLocalProject, + input: AdviseLocalRelationshipFeedbackThresholdsInput = {}, +): Promise { + const exporter = input.exportLocalRelationshipFeedbackLabels ?? exportLocalRelationshipFeedbackLabels; + const feedback = await exporter(project, { + connectionId: input.connectionId, + decision: 'all', + }); + return buildKloRelationshipThresholdAdviceReport(feedback, input); +} + +function formatMetric(value: number | null): string { + return value === null ? 'n/a' : value.toFixed(3); +} + +function candidateLine(candidate: KloRelationshipThresholdAdviceCandidate): string { + return [ + `accept=${candidate.acceptThreshold.toFixed(2)}`, + `review=${candidate.reviewThreshold.toFixed(2)}`, + `eligible=${candidate.eligible ? 'yes' : 'no'}`, + `acceptedPrecision=${formatMetric(candidate.acceptedBandPrecision)}`, + `acceptedRecall=${formatMetric(candidate.acceptedRecall)}`, + `acceptedOrReviewRecall=${formatMetric(candidate.acceptedOrReviewRecall)}`, + `rejectedPrecision=${formatMetric(candidate.rejectedBandPrecision)}`, + `rejectedRecall=${formatMetric(candidate.rejectedRecall)}`, + `falseAcceptedRejected=${candidate.falseAcceptedRejectedLabels}`, + `falseRejectedAccepted=${candidate.falseRejectedAcceptedLabels}`, + ].join(' '); +} + +export function formatKloRelationshipThresholdAdviceMarkdown(report: KloRelationshipThresholdAdviceReport): string { + const lines = [ + 'KLO relationship threshold advice', + `Generated: ${report.generatedAt}`, + `Filter connection: ${report.filters.connectionId ?? 'all'}`, + `Status: ${report.status}`, + `Labels: total=${report.summary.totalLabels} scored=${report.summary.scoredLabels} accepted=${report.summary.acceptedLabels} rejected=${report.summary.rejectedLabels}`, + `Gates: minTotal=${report.gates.minTotalLabels} minAccepted=${report.gates.minAcceptedLabels} minRejected=${report.gates.minRejectedLabels} acceptedPrecision=${report.gates.minAcceptedBandPrecision.toFixed(3)} acceptedOrReviewRecall=${report.gates.minAcceptedOrReviewRecall.toFixed(3)} rejectedPrecision=${report.gates.minRejectedBandPrecision.toFixed(3)}`, + `Evaluated candidates: ${report.summary.evaluatedCandidates}`, + `Eligible candidates: ${report.summary.eligibleCandidates}`, + `Recommended: ${ + report.recommended + ? `accept=${report.recommended.acceptThreshold.toFixed(2)} review=${report.recommended.reviewThreshold.toFixed(2)}` + : 'none' + }`, + ]; + + if (report.reasons.length > 0) { + lines.push('', 'Reasons', ...report.reasons.map((reason) => ` - ${reason}`)); + } + + if (report.candidates.length > 0) { + lines.push('', 'Top candidates', ...report.candidates.slice(0, 5).map((candidate) => ` - ${candidateLine(candidate)}`)); + } + + if (report.warnings.length > 0) { + lines.push('', 'Warnings'); + for (const warning of report.warnings.slice(0, 5)) { + lines.push(` - ${warning.path}: ${warning.message}`); + } + } + + return `${lines.join('\n')}\n`; +} diff --git a/packages/context/src/scan/relationship-validation.test.ts b/packages/context/src/scan/relationship-validation.test.ts new file mode 100644 index 00000000..e328a90b --- /dev/null +++ b/packages/context/src/scan/relationship-validation.test.ts @@ -0,0 +1,492 @@ +import Database from 'better-sqlite3'; +import { afterEach, describe, expect, it } from 'vitest'; +import type { KloEnrichedColumn, KloEnrichedSchema, KloEnrichedTable } from './enrichment-types.js'; +import { generateKloRelationshipDiscoveryCandidates } from './relationship-candidates.js'; +import type { KloRelationshipProfileArtifact } from './relationship-profiling.js'; +import { profileKloRelationshipSchema } from './relationship-profiling.js'; +import { validateKloRelationshipDiscoveryCandidates } from './relationship-validation.js'; +import type { KloQueryResult, KloReadOnlyQueryInput, KloScanContext } from './types.js'; + +class InMemorySqliteExecutor { + readonly db = new Database(':memory:'); + queryCount = 0; + + executeReadOnly(input: KloReadOnlyQueryInput, _ctx: KloScanContext): Promise { + this.queryCount += 1; + const rows = this.db.prepare(input.sql).all() as Record[]; + const headers = Object.keys(rows[0] ?? {}); + return Promise.resolve({ + headers, + rows: rows.map((row) => headers.map((header) => row[header])), + totalRows: rows.length, + rowCount: rows.length, + }); + } + + close(): void { + this.db.close(); + } +} + +function column(tableId: string, name: string, overrides: Partial = {}): KloEnrichedColumn { + const tableRef = overrides.tableRef ?? { catalog: null, db: null, name: tableId }; + return { + id: `${tableId}.${name}`, + tableId, + tableRef, + name, + nativeType: overrides.nativeType ?? 'INTEGER', + normalizedType: overrides.normalizedType ?? 'integer', + dimensionType: overrides.dimensionType ?? 'number', + nullable: overrides.nullable ?? true, + primaryKey: overrides.primaryKey ?? false, + parentColumnId: null, + descriptions: {}, + embedding: null, + sampleValues: null, + cardinality: null, + ...overrides, + }; +} + +function table(name: string, columns: KloEnrichedColumn[]): KloEnrichedTable { + const ref = { catalog: null, db: null, name }; + return { + id: name, + ref, + enabled: true, + descriptions: {}, + columns: columns.map((item) => ({ ...item, tableId: name, tableRef: ref })), + }; +} + +function schema(tables?: KloEnrichedTable[]): KloEnrichedSchema { + return { + connectionId: 'warehouse', + tables: tables ?? [ + table('accounts', [ + column('accounts', 'id', { nullable: false }), + column('accounts', 'name', { nativeType: 'TEXT', normalizedType: 'text', dimensionType: 'string' }), + ]), + table('users', [column('users', 'id', { nullable: false }), column('users', 'account_id', { nullable: false })]), + table('invoices', [ + column('invoices', 'id', { nullable: false }), + column('invoices', 'account_id', { nullable: false }), + ]), + ], + relationships: [], + }; +} + +describe('relationship validation', () => { + let executor: InMemorySqliteExecutor | null = null; + + afterEach(() => { + executor?.close(); + executor = null; + }); + + it('accepts a relationship-discovery candidate with unique parent values and full source coverage', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER, name TEXT); + CREATE TABLE users (id INTEGER, account_id INTEGER); + CREATE TABLE invoices (id INTEGER, account_id INTEGER); + INSERT INTO accounts (id, name) VALUES (1, 'Acme'), (2, 'Globex'), (3, 'Initech'); + INSERT INTO users (id, account_id) VALUES (10, 1), (11, 2), (12, 3); + INSERT INTO invoices (id, account_id) VALUES (20, 1), (21, 2), (22, 999); + `); + const testSchema = schema(); + const profiles = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: testSchema, + executor, + ctx: { runId: 'validate-test' }, + }); + const candidates = generateKloRelationshipDiscoveryCandidates(testSchema).filter( + (candidate) => candidate.from.table.name === 'users', + ); + + const validated = await validateKloRelationshipDiscoveryCandidates({ + connectionId: 'warehouse', + driver: 'sqlite', + candidates, + profiles, + executor, + ctx: { runId: 'validate-test' }, + }); + + expect(validated).toHaveLength(1); + expect(validated[0]).toMatchObject({ + from: { table: { name: 'users' }, columns: ['account_id'] }, + to: { table: { name: 'accounts' }, columns: ['id'] }, + status: 'accepted', + score: expect.any(Number), + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationCount: 0, + violationRatio: 0, + reasons: expect.arrayContaining(['validation_passed']), + }, + }); + expect(validated[0]?.score).toBeGreaterThanOrEqual(0.85); + }); + + it('rejects a candidate with missing parent values and records the deterministic reason', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER, name TEXT); + CREATE TABLE users (id INTEGER, account_id INTEGER); + CREATE TABLE invoices (id INTEGER, account_id INTEGER); + INSERT INTO accounts (id, name) VALUES (1, 'Acme'), (2, 'Globex'); + INSERT INTO users (id, account_id) VALUES (10, 1), (11, 2); + INSERT INTO invoices (id, account_id) VALUES (20, 1), (21, 999), (22, 1000); + `); + const testSchema = schema(); + const profiles = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: testSchema, + executor, + ctx: { runId: 'validate-test' }, + }); + const candidates = generateKloRelationshipDiscoveryCandidates(testSchema).filter( + (candidate) => candidate.from.table.name === 'invoices', + ); + + const validated = await validateKloRelationshipDiscoveryCandidates({ + connectionId: 'warehouse', + driver: 'sqlite', + candidates, + profiles, + executor, + ctx: { runId: 'validate-test' }, + settings: { + minSourceCoverage: 0.9, + maxViolationRatio: 0.01, + }, + }); + + expect(validated).toHaveLength(1); + expect(validated[0]).toMatchObject({ + from: { table: { name: 'invoices' }, columns: ['account_id'] }, + to: { table: { name: 'accounts' }, columns: ['id'] }, + status: 'rejected', + validation: { + sourceCoverage: 1 / 3, + violationCount: 2, + violationRatio: 2 / 3, + reasons: expect.arrayContaining(['low_source_coverage', 'excessive_violations']), + }, + }); + }); + + it('keeps over-budget candidates review-only without executing coverage SQL for them', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER, name TEXT); + CREATE TABLE users (id INTEGER, account_id INTEGER); + CREATE TABLE invoices (id INTEGER, account_id INTEGER); + INSERT INTO accounts (id, name) VALUES (1, 'Acme'), (2, 'Globex'), (3, 'Initech'); + INSERT INTO users (id, account_id) VALUES (10, 1), (11, 2), (12, 3); + INSERT INTO invoices (id, account_id) VALUES (20, 1), (21, 2), (22, 3); + `); + const testSchema = schema(); + const profiles = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: testSchema, + executor, + ctx: { runId: 'validate-budget-profile' }, + }); + executor.queryCount = 0; + const candidates = generateKloRelationshipDiscoveryCandidates(testSchema).map((candidate) => ({ + ...candidate, + confidence: candidate.from.table.name === 'users' ? 0.99 : 0.5, + })); + + const validated = await validateKloRelationshipDiscoveryCandidates({ + connectionId: 'warehouse', + driver: 'sqlite', + candidates, + profiles, + executor, + ctx: { runId: 'validate-budget' }, + tableCount: testSchema.tables.length, + settings: { + validationBudget: 1, + }, + }); + + expect(executor.queryCount).toBe(1); + expect(validated).toHaveLength(2); + expect(validated.find((candidate) => candidate.from.table.name === 'users')).toMatchObject({ + status: 'accepted', + validation: { reasons: expect.arrayContaining(['validation_passed']) }, + }); + expect(validated.find((candidate) => candidate.from.table.name === 'invoices')).toMatchObject({ + status: 'review', + validation: { + reasons: ['validation_unattempted'], + }, + }); + }); + + it('treats validation budget zero as review-only validation without coverage SQL', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER, name TEXT); + CREATE TABLE users (id INTEGER, account_id INTEGER); + INSERT INTO accounts (id, name) VALUES (1, 'Acme'), (2, 'Globex'); + INSERT INTO users (id, account_id) VALUES (10, 1), (11, 2); + `); + const testSchema = schema([ + table('accounts', [ + column('accounts', 'id', { nullable: false }), + column('accounts', 'name', { nativeType: 'TEXT', normalizedType: 'text', dimensionType: 'string' }), + ]), + table('users', [column('users', 'id', { nullable: false }), column('users', 'account_id', { nullable: false })]), + ]); + const profiles = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: testSchema, + executor, + ctx: { runId: 'validate-zero-budget-profile' }, + }); + executor.queryCount = 0; + const candidates = generateKloRelationshipDiscoveryCandidates(testSchema); + + const validated = await validateKloRelationshipDiscoveryCandidates({ + connectionId: 'warehouse', + driver: 'sqlite', + candidates, + profiles, + executor, + ctx: { runId: 'validate-zero-budget' }, + tableCount: testSchema.tables.length, + settings: { + validationBudget: 0, + }, + }); + + expect(executor.queryCount).toBe(0); + expect(validated).toHaveLength(1); + expect(validated[0]).toMatchObject({ + status: 'review', + score: expect.any(Number), + validation: { + checkedValues: 0, + reasons: ['validation_unattempted'], + }, + }); + }); + + it('marks rejected LLM proposals with the spec rejection reason', async () => { + executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE customers (id INTEGER); + CREATE TABLE orders (buyer_ref INTEGER); + INSERT INTO customers (id) VALUES (1), (2); + INSERT INTO orders (buyer_ref) VALUES (98), (99); + `); + const testSchema = schema([ + table('customers', [column('customers', 'id', { nullable: false })]), + table('orders', [column('orders', 'buyer_ref')]), + ]); + const profiles = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: testSchema, + executor, + ctx: { runId: 'llm-rejected-validation' }, + }); + const [candidate] = generateKloRelationshipDiscoveryCandidates( + schema([ + table('customers', [column('customers', 'id', { nullable: false })]), + table('orders', [column('orders', 'customer_id')]), + ]), + ); + if (!candidate) { + throw new Error('Expected base candidate'); + } + const llmCandidate = { + ...candidate, + id: 'orders:(orders.buyer_ref)->customers:(customers.id)', + from: { ...candidate.from, columnIds: ['orders.buyer_ref'], columns: ['buyer_ref'] }, + source: 'llm_proposal' as const, + evidence: { + ...candidate.evidence, + reasons: ['llm_proposal'], + llmConfidence: 0.84, + llmRationale: 'Buyer references should map to customers.', + }, + }; + + const [validated] = await validateKloRelationshipDiscoveryCandidates({ + connectionId: 'warehouse', + driver: 'sqlite', + candidates: [llmCandidate], + profiles, + executor, + ctx: { runId: 'llm-rejected-validation' }, + }); + + expect(validated?.status).toBe('rejected'); + expect(validated?.validation.reasons).toEqual( + expect.arrayContaining(['low_source_coverage', 'llm_proposed_but_validation_failed']), + ); + }); + + it('limits validation query concurrency', async () => { + const executor = new InMemorySqliteExecutor(); + executor.db.exec(` + CREATE TABLE accounts (id INTEGER NOT NULL); + CREATE TABLE orders (id INTEGER NOT NULL, account_id INTEGER NOT NULL); + CREATE TABLE invoices (id INTEGER NOT NULL, account_id INTEGER NOT NULL); + INSERT INTO accounts VALUES (1), (2); + INSERT INTO orders VALUES (10, 1), (11, 2); + INSERT INTO invoices VALUES (20, 1), (21, 2); + `); + + let active = 0; + let maxActive = 0; + const throttled = { + executeReadOnly: async (input: KloReadOnlyQueryInput, ctx: KloScanContext) => { + active += 1; + maxActive = Math.max(maxActive, active); + await new Promise((resolve) => setTimeout(resolve, input.sql.includes('WITH child_values') ? 10 : 0)); + const result = await executor.executeReadOnly(input, ctx); + active -= 1; + return result; + }, + }; + + const testSchema = schema([ + table('accounts', [column('accounts', 'id', { nullable: false })]), + table('orders', [column('orders', 'id', { nullable: false }), column('orders', 'account_id')]), + table('invoices', [column('invoices', 'id', { nullable: false }), column('invoices', 'account_id')]), + ]); + const profiles = await profileKloRelationshipSchema({ + connectionId: 'warehouse', + driver: 'sqlite', + schema: testSchema, + executor, + ctx: { runId: 'validation-concurrency-profile' }, + }); + const candidates = generateKloRelationshipDiscoveryCandidates(testSchema); + + await validateKloRelationshipDiscoveryCandidates({ + connectionId: 'warehouse', + driver: 'sqlite', + candidates, + profiles, + executor: throttled, + ctx: { runId: 'validation-concurrency' }, + settings: { concurrency: 1 }, + }); + + expect(maxActive).toBe(1); + executor.close(); + }); + + it('pins column_suffix_match validation scoring for plan-code suffix candidates', async () => { + const candidate = { + id: 'mart:(current_plan_code)->plans:(plan_code)', + from: { + tableId: 'mart-account-segments-id', + columnIds: ['current-plan-code-col'], + table: { catalog: null, db: null, name: 'mart_account_segments' }, + columns: ['current_plan_code'], + }, + to: { + tableId: 'plans-id', + columnIds: ['plan-code-col'], + table: { catalog: null, db: null, name: 'stg_plans' }, + columns: ['plan_code'], + }, + relationshipType: 'many_to_one' as const, + confidence: 0.902, + source: 'column_suffix_match' as const, + status: 'review' as const, + evidence: { + sourceColumnBase: 'current_plan', + targetTableBase: 'plan', + targetColumnBase: 'plan_code', + targetKeyScore: 0.86, + nameScore: 0.78, + reasons: ['column_suffix_match', 'profile_unique_target'], + }, + }; + const profiles = { + connectionId: 'warehouse', + driver: 'sqlite', + sqlAvailable: true, + queryCount: 0, + tables: [], + warnings: [], + columns: { + 'mart_account_segments.current_plan_code': { + table: { catalog: null, db: null, name: 'mart_account_segments' }, + column: 'current_plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + 'stg_plans.plan_code': { + table: { catalog: null, db: null, name: 'stg_plans' }, + column: 'plan_code', + nativeType: 'TEXT', + normalizedType: 'text', + rowCount: 4, + nullCount: 0, + distinctCount: 4, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['basic', 'enterprise', 'free', 'pro'], + minTextLength: 4, + maxTextLength: 10, + }, + }, + } satisfies KloRelationshipProfileArtifact; + const executor = { + async executeReadOnly() { + return { + headers: ['child_distinct', 'parent_distinct', 'overlap', 'violation_count'], + rows: [[4, 4, 4, 0]], + rowCount: 1, + totalRows: 1, + }; + }, + }; + + const [validated] = await validateKloRelationshipDiscoveryCandidates({ + connectionId: 'warehouse', + driver: 'sqlite', + candidates: [candidate], + profiles, + executor, + ctx: { runId: 'rule-b-validation-score' }, + }); + + expect(validated).toMatchObject({ + status: 'accepted', + score: 0.98, + validation: { + targetUniqueness: 1, + sourceCoverage: 1, + violationRatio: 0, + reasons: ['validation_passed'], + }, + }); + }); +}); diff --git a/packages/context/src/scan/relationship-validation.ts b/packages/context/src/scan/relationship-validation.ts new file mode 100644 index 00000000..209bdab0 --- /dev/null +++ b/packages/context/src/scan/relationship-validation.ts @@ -0,0 +1,370 @@ +import type { KloRelationshipEndpoint } from './enrichment-types.js'; +import { applyKloRelationshipValidationBudget, type KloRelationshipValidationBudget } from './relationship-budget.js'; +import type { KloRelationshipDiscoveryCandidate } from './relationship-candidates.js'; +import { + formatKloRelationshipTableRef, + type KloRelationshipProfileArtifact, + type KloRelationshipReadOnlyExecutor, + quoteKloRelationshipIdentifier, +} from './relationship-profiling.js'; +import type { KloConnectionDriver, KloQueryResult, KloScanContext } from './types.js'; + +export type KloValidatedRelationshipStatus = 'accepted' | 'review' | 'rejected'; + +export interface KloRelationshipValidationSettings { + acceptThreshold: number; + reviewThreshold: number; + minTargetUniqueness: number; + minSourceCoverage: number; + maxViolationRatio: number; + maxDistinctSourceValues: number; + concurrency: number; + validationBudget?: KloRelationshipValidationBudget; +} + +export interface KloRelationshipValidationEvidence { + targetUniqueness: number; + sourceCoverage: number; + violationCount: number; + violationRatio: number; + sourceNullRate: number; + targetNullRate: number; + childDistinct: number; + parentDistinct: number; + overlap: number; + checkedValues: number; + reasons: string[]; +} + +export interface KloValidatedRelationshipDiscoveryCandidate + extends Omit { + status: KloValidatedRelationshipStatus; + score: number; + validation: KloRelationshipValidationEvidence; +} + +export interface ValidateKloRelationshipDiscoveryCandidatesInput { + connectionId: string; + driver: KloConnectionDriver; + candidates: readonly KloRelationshipDiscoveryCandidate[]; + profiles: KloRelationshipProfileArtifact; + executor: KloRelationshipReadOnlyExecutor | null; + ctx: KloScanContext; + tableCount?: number; + settings?: Partial; +} + +const DEFAULT_SETTINGS: KloRelationshipValidationSettings = { + acceptThreshold: 0.85, + reviewThreshold: 0.55, + minTargetUniqueness: 0.9, + minSourceCoverage: 0.9, + maxViolationRatio: 0.01, + maxDistinctSourceValues: 10000, + concurrency: 4, +}; + +function mergeSettings( + settings: Partial | undefined, +): KloRelationshipValidationSettings { + return { ...DEFAULT_SETTINGS, ...settings }; +} + +function profileKey(table: string, column: string): string { + return `${table}.${column}`; +} + +function singleRelationshipColumn(endpointValue: KloRelationshipEndpoint): string { + const column = endpointValue.columns[0]; + if (!column) { + throw new Error(`Expected relationship endpoint ${endpointValue.table.name} to contain one column`); + } + return column; +} + +function headerIndex(result: KloQueryResult, header: string): number { + return result.headers.findIndex((candidate) => candidate.toLowerCase() === header.toLowerCase()); +} + +function firstRow(result: KloQueryResult): unknown[] { + return result.rows[0] ?? []; +} + +function numberAt(result: KloQueryResult, header: string): number { + const value = firstRow(result)[headerIndex(result, header)]; + if (typeof value === 'number') { + return value; + } + if (typeof value === 'bigint') { + return Number(value); + } + if (typeof value === 'string' && value.trim() !== '') { + return Number(value); + } + return 0; +} + +function limitSql(driver: KloConnectionDriver, limit: number): string { + if (driver === 'sqlserver') { + return ''; + } + return ` LIMIT ${Math.max(1, Math.floor(limit))}`; +} + +function topSql(driver: KloConnectionDriver, limit: number): string { + if (driver === 'sqlserver') { + return ` TOP (${Math.max(1, Math.floor(limit))})`; + } + return ''; +} + +function buildCoverageSql(input: { + driver: KloConnectionDriver; + childTable: string; + childColumn: string; + parentTable: string; + parentColumn: string; + maxDistinctSourceValues: number; +}): string { + const childTable = formatKloRelationshipTableRef(input.driver, { catalog: null, db: null, name: input.childTable }); + const parentTable = formatKloRelationshipTableRef(input.driver, { catalog: null, db: null, name: input.parentTable }); + const childColumn = quoteKloRelationshipIdentifier(input.driver, input.childColumn); + const parentColumn = quoteKloRelationshipIdentifier(input.driver, input.parentColumn); + const limit = limitSql(input.driver, input.maxDistinctSourceValues); + const top = topSql(input.driver, input.maxDistinctSourceValues); + + return [ + 'WITH child_values AS (', + `SELECT DISTINCT${top} ${childColumn} AS value FROM ${childTable} WHERE ${childColumn} IS NOT NULL${limit}`, + '), parent_values AS (', + `SELECT DISTINCT ${parentColumn} AS value FROM ${parentTable} WHERE ${parentColumn} IS NOT NULL`, + ')', + 'SELECT', + '(SELECT COUNT(*) FROM child_values) AS child_distinct,', + '(SELECT COUNT(*) FROM parent_values) AS parent_distinct,', + 'SUM(CASE WHEN parent_values.value IS NOT NULL THEN 1 ELSE 0 END) AS overlap,', + 'SUM(CASE WHEN parent_values.value IS NULL THEN 1 ELSE 0 END) AS violation_count', + 'FROM child_values', + 'LEFT JOIN parent_values ON child_values.value = parent_values.value', + ].join(' '); +} + +function score(input: { + candidateConfidence: number; + targetUniqueness: number; + sourceCoverage: number; + violationRatio: number; +}): number { + const violationScore = Math.max(0, 1 - input.violationRatio); + return Number( + Math.min( + 1, + 0.2 * input.candidateConfidence + + 0.3 * input.targetUniqueness + + 0.4 * input.sourceCoverage + + 0.1 * violationScore, + ).toFixed(3), + ); +} + +function statusFor(input: { + score: number; + reasons: readonly string[]; + settings: KloRelationshipValidationSettings; +}): KloValidatedRelationshipStatus { + if ( + input.reasons.includes('low_target_uniqueness') || + input.reasons.includes('low_source_coverage') || + input.reasons.includes('excessive_violations') + ) { + return 'rejected'; + } + if ( + input.score >= input.settings.acceptThreshold && + !input.reasons.includes('low_target_uniqueness') && + !input.reasons.includes('low_source_coverage') && + !input.reasons.includes('excessive_violations') + ) { + return 'accepted'; + } + if (input.score >= input.settings.reviewThreshold) { + return 'review'; + } + return 'rejected'; +} + +async function mapWithConcurrency( + inputs: readonly TInput[], + concurrency: number, + mapOne: (input: TInput) => Promise, +): Promise { + const safeConcurrency = Math.max(1, Math.floor(concurrency)); + const outputs: TOutput[] = new Array(inputs.length); + let nextIndex = 0; + + async function worker(): Promise { + while (nextIndex < inputs.length) { + const index = nextIndex; + nextIndex += 1; + outputs[index] = await mapOne(inputs[index] as TInput); + } + } + + await Promise.all(Array.from({ length: Math.min(safeConcurrency, inputs.length) }, () => worker())); + return outputs; +} + +function reviewWithoutValidation( + candidate: KloRelationshipDiscoveryCandidate, + profiles: KloRelationshipProfileArtifact, + reason: 'validation_unavailable' | 'profile_unavailable' | 'validation_unattempted', +): KloValidatedRelationshipDiscoveryCandidate { + const sourceColumn = singleRelationshipColumn(candidate.from); + const targetColumn = singleRelationshipColumn(candidate.to); + const sourceProfile = profiles.columns[profileKey(candidate.from.table.name, sourceColumn)]; + const targetProfile = profiles.columns[profileKey(candidate.to.table.name, targetColumn)]; + + return { + ...candidate, + status: 'review', + score: Number((candidate.confidence * 0.6).toFixed(3)), + validation: { + targetUniqueness: targetProfile?.uniquenessRatio ?? 0, + sourceCoverage: 0, + violationCount: 0, + violationRatio: 1, + sourceNullRate: sourceProfile?.nullRate ?? 0, + targetNullRate: targetProfile?.nullRate ?? 0, + childDistinct: sourceProfile?.distinctCount ?? 0, + parentDistinct: targetProfile?.distinctCount ?? 0, + overlap: 0, + checkedValues: 0, + reasons: [reason], + }, + }; +} + +export async function validateKloRelationshipDiscoveryCandidates( + input: ValidateKloRelationshipDiscoveryCandidatesInput, +): Promise { + const settings = mergeSettings(input.settings); + if (!input.executor || !input.profiles.sqlAvailable) { + return input.candidates.map((candidate) => + reviewWithoutValidation(candidate, input.profiles, 'validation_unavailable'), + ); + } + + const executor = input.executor; + + async function validateCandidate( + candidate: KloRelationshipDiscoveryCandidate, + ): Promise { + const sourceColumn = singleRelationshipColumn(candidate.from); + const targetColumn = singleRelationshipColumn(candidate.to); + const sourceProfile = input.profiles.columns[profileKey(candidate.from.table.name, sourceColumn)]; + const targetProfile = input.profiles.columns[profileKey(candidate.to.table.name, targetColumn)]; + if (!sourceProfile || !targetProfile) { + return reviewWithoutValidation(candidate, input.profiles, 'profile_unavailable'); + } + + const result = await executor.executeReadOnly( + { + connectionId: input.connectionId, + sql: buildCoverageSql({ + driver: input.driver, + childTable: candidate.from.table.name, + childColumn: sourceColumn, + parentTable: candidate.to.table.name, + parentColumn: targetColumn, + maxDistinctSourceValues: settings.maxDistinctSourceValues, + }), + maxRows: 1, + }, + input.ctx, + ); + const childDistinct = numberAt(result, 'child_distinct'); + const parentDistinct = numberAt(result, 'parent_distinct'); + const overlap = numberAt(result, 'overlap'); + const violationCount = numberAt(result, 'violation_count'); + const sourceCoverage = childDistinct === 0 ? 0 : overlap / childDistinct; + const violationRatio = childDistinct === 0 ? 1 : violationCount / childDistinct; + const targetUniqueness = targetProfile.uniquenessRatio; + const reasons: string[] = []; + + if (targetUniqueness < settings.minTargetUniqueness) { + reasons.push('low_target_uniqueness'); + } + if (sourceCoverage < settings.minSourceCoverage) { + reasons.push('low_source_coverage'); + } + if (violationRatio > settings.maxViolationRatio) { + reasons.push('excessive_violations'); + } + if (reasons.length === 0) { + reasons.push('validation_passed'); + } + + const candidateScore = score({ + candidateConfidence: candidate.confidence, + targetUniqueness, + sourceCoverage, + violationRatio, + }); + const candidateStatus = statusFor({ score: candidateScore, reasons, settings }); + if (candidate.source === 'llm_proposal' && candidateStatus === 'rejected') { + reasons.push('llm_proposed_but_validation_failed'); + } + return { + ...candidate, + status: candidateStatus, + score: candidateScore, + validation: { + targetUniqueness, + sourceCoverage, + violationCount, + violationRatio, + sourceNullRate: sourceProfile.nullRate, + targetNullRate: targetProfile.nullRate, + childDistinct, + parentDistinct, + overlap, + checkedValues: childDistinct, + reasons, + }, + }; + } + + const budgeted = applyKloRelationshipValidationBudget({ + candidates: input.candidates, + tableCount: input.tableCount ?? 0, + budget: settings.validationBudget ?? (input.tableCount === undefined ? 'all' : undefined), + score: (candidate) => candidate.confidence, + }); + const validated = await mapWithConcurrency( + budgeted.toValidate.map((entry) => entry.candidate), + settings.concurrency, + validateCandidate, + ); + const byOriginalIndex = new Map(); + for (let index = 0; index < budgeted.toValidate.length; index += 1) { + const originalIndex = budgeted.toValidate[index]?.originalIndex; + const candidate = validated[index]; + if (originalIndex !== undefined && candidate) { + byOriginalIndex.set(originalIndex, candidate); + } + } + for (const entry of budgeted.deferred) { + byOriginalIndex.set( + entry.originalIndex, + reviewWithoutValidation(entry.candidate, input.profiles, 'validation_unattempted'), + ); + } + + return input.candidates.map((_, index) => { + const candidate = byOriginalIndex.get(index); + if (!candidate) { + throw new Error(`Missing relationship validation result for candidate at index ${index}`); + } + return candidate; + }); +} diff --git a/packages/context/src/scan/sqlite-local-enrichment-state-store.ts b/packages/context/src/scan/sqlite-local-enrichment-state-store.ts new file mode 100644 index 00000000..886d97aa --- /dev/null +++ b/packages/context/src/scan/sqlite-local-enrichment-state-store.ts @@ -0,0 +1,237 @@ +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import type { + KloScanEnrichmentCompletedStage, + KloScanEnrichmentFailedStage, + KloScanEnrichmentStageLookup, + KloScanEnrichmentStageRecord, + KloScanEnrichmentStateStore, +} from './enrichment-state.js'; +import type { KloScanEnrichmentStage, KloScanMode } from './types.js'; + +export interface SqliteLocalScanEnrichmentStateStoreOptions { + dbPath: string; +} + +interface StageRow { + run_id: string; + connection_id: string; + sync_id: string; + mode: KloScanMode; + stage: KloScanEnrichmentStage; + input_hash: string; + status: 'completed' | 'failed'; + output_json: string | null; + error_message: string | null; + updated_at: string; +} + +function parseStageRow(row: StageRow): KloScanEnrichmentStageRecord { + if (row.status === 'completed') { + return { + runId: row.run_id, + connectionId: row.connection_id, + syncId: row.sync_id, + mode: row.mode, + stage: row.stage, + inputHash: row.input_hash, + status: 'completed', + output: JSON.parse(row.output_json ?? 'null') as TOutput, + errorMessage: null, + updatedAt: row.updated_at, + }; + } + + return { + runId: row.run_id, + connectionId: row.connection_id, + syncId: row.sync_id, + mode: row.mode, + stage: row.stage, + inputHash: row.input_hash, + status: 'failed', + output: null, + errorMessage: row.error_message ?? 'Unknown enrichment stage failure', + updatedAt: row.updated_at, + }; +} + +function isSafeRunId(runId: string): boolean { + return /^[a-zA-Z0-9][a-zA-Z0-9_.-]*$/.test(runId); +} + +export class SqliteLocalScanEnrichmentStateStore implements KloScanEnrichmentStateStore { + private readonly db: Database.Database; + + constructor(options: SqliteLocalScanEnrichmentStateStoreOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.exec(` + CREATE TABLE IF NOT EXISTS local_scan_enrichment_stages ( + run_id TEXT NOT NULL, + stage TEXT NOT NULL, + input_hash TEXT NOT NULL, + connection_id TEXT NOT NULL, + sync_id TEXT NOT NULL, + mode TEXT NOT NULL, + status TEXT NOT NULL, + output_json TEXT, + error_message TEXT, + updated_at TEXT NOT NULL, + PRIMARY KEY (run_id, stage) + ); + + CREATE INDEX IF NOT EXISTS local_scan_enrichment_stages_run_idx + ON local_scan_enrichment_stages (run_id, updated_at, stage); + `); + } + + async findCompletedStage( + input: KloScanEnrichmentStageLookup, + ): Promise | null> { + if (!isSafeRunId(input.runId)) { + return null; + } + const row = this.db + .prepare( + ` + SELECT * + FROM local_scan_enrichment_stages + WHERE run_id = ? + AND stage = ? + AND input_hash = ? + AND status = 'completed' + `, + ) + .get(input.runId, input.stage, input.inputHash) as StageRow | undefined; + + if (!row) { + return null; + } + const parsed = parseStageRow(row); + return parsed.status === 'completed' ? parsed : null; + } + + async saveCompletedStage( + input: Omit, 'status' | 'errorMessage'>, + ): Promise { + this.db + .prepare( + ` + INSERT INTO local_scan_enrichment_stages ( + run_id, + stage, + input_hash, + connection_id, + sync_id, + mode, + status, + output_json, + error_message, + updated_at + ) + VALUES ( + @runId, + @stage, + @inputHash, + @connectionId, + @syncId, + @mode, + 'completed', + @outputJson, + NULL, + @updatedAt + ) + ON CONFLICT(run_id, stage) DO UPDATE SET + input_hash = excluded.input_hash, + connection_id = excluded.connection_id, + sync_id = excluded.sync_id, + mode = excluded.mode, + status = excluded.status, + output_json = excluded.output_json, + error_message = excluded.error_message, + updated_at = excluded.updated_at + `, + ) + .run({ + runId: input.runId, + stage: input.stage, + inputHash: input.inputHash, + connectionId: input.connectionId, + syncId: input.syncId, + mode: input.mode, + outputJson: JSON.stringify(input.output), + updatedAt: input.updatedAt, + }); + } + + async saveFailedStage(input: Omit): Promise { + this.db + .prepare( + ` + INSERT INTO local_scan_enrichment_stages ( + run_id, + stage, + input_hash, + connection_id, + sync_id, + mode, + status, + output_json, + error_message, + updated_at + ) + VALUES ( + @runId, + @stage, + @inputHash, + @connectionId, + @syncId, + @mode, + 'failed', + NULL, + @errorMessage, + @updatedAt + ) + ON CONFLICT(run_id, stage) DO UPDATE SET + input_hash = excluded.input_hash, + connection_id = excluded.connection_id, + sync_id = excluded.sync_id, + mode = excluded.mode, + status = excluded.status, + output_json = excluded.output_json, + error_message = excluded.error_message, + updated_at = excluded.updated_at + `, + ) + .run({ + runId: input.runId, + stage: input.stage, + inputHash: input.inputHash, + connectionId: input.connectionId, + syncId: input.syncId, + mode: input.mode, + errorMessage: input.errorMessage, + updatedAt: input.updatedAt, + }); + } + + async listRunStages(runId: string): Promise { + if (!isSafeRunId(runId)) { + return []; + } + const rows = this.db + .prepare( + ` + SELECT * + FROM local_scan_enrichment_stages + WHERE run_id = ? + ORDER BY updated_at ASC, stage ASC + `, + ) + .all(runId) as StageRow[]; + return rows.map((row) => parseStageRow(row)); + } +} diff --git a/packages/context/src/scan/type-normalization.test.ts b/packages/context/src/scan/type-normalization.test.ts new file mode 100644 index 00000000..bedb4416 --- /dev/null +++ b/packages/context/src/scan/type-normalization.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from 'vitest'; +import { inferKloDimensionType, kloColumnTypeMappingFromNative, normalizeKloNativeType } from './type-normalization.js'; + +describe('KLO scan type normalization', () => { + it('normalizes native database type strings', () => { + expect(normalizeKloNativeType(' NUMERIC(12, 2) ')).toBe('numeric'); + expect(normalizeKloNativeType('TIMESTAMP WITH TIME ZONE')).toBe('timestamp with time zone'); + expect(normalizeKloNativeType('')).toBe('unknown'); + }); + + it('infers dimension types from native types', () => { + expect(inferKloDimensionType('BOOLEAN')).toBe('boolean'); + expect(inferKloDimensionType('timestamp with time zone')).toBe('time'); + expect(inferKloDimensionType('decimal(10,2)')).toBe('number'); + expect(inferKloDimensionType('varchar(255)')).toBe('string'); + }); + + it('builds a complete column type mapping', () => { + expect(kloColumnTypeMappingFromNative('BIGINT')).toEqual({ + normalizedType: 'bigint', + dimensionType: 'number', + }); + }); +}); diff --git a/packages/context/src/scan/type-normalization.ts b/packages/context/src/scan/type-normalization.ts new file mode 100644 index 00000000..cb075d44 --- /dev/null +++ b/packages/context/src/scan/type-normalization.ts @@ -0,0 +1,32 @@ +import type { KloSchemaDimensionType } from './types.js'; + +export interface KloColumnTypeMapping { + normalizedType: string; + dimensionType: KloSchemaDimensionType; +} + +export function normalizeKloNativeType(nativeType: string): string { + const normalized = nativeType.toLowerCase().replace(/\([^)]*\)/g, '').replace(/\s+/g, ' ').trim(); + return normalized.length > 0 ? normalized : 'unknown'; +} + +export function inferKloDimensionType(nativeType: string): KloSchemaDimensionType { + const normalized = normalizeKloNativeType(nativeType); + if (/\b(bool|boolean)\b/.test(normalized)) { + return 'boolean'; + } + if (/\b(date|datetime|time|timestamp)\b/.test(normalized)) { + return 'time'; + } + if (/\b(int|integer|bigint|smallint|decimal|numeric|number|float|double|real)\b/.test(normalized)) { + return 'number'; + } + return 'string'; +} + +export function kloColumnTypeMappingFromNative(nativeType: string): KloColumnTypeMapping { + return { + normalizedType: normalizeKloNativeType(nativeType), + dimensionType: inferKloDimensionType(nativeType), + }; +} diff --git a/packages/context/src/scan/types.test.ts b/packages/context/src/scan/types.test.ts new file mode 100644 index 00000000..3918e5d0 --- /dev/null +++ b/packages/context/src/scan/types.test.ts @@ -0,0 +1,258 @@ +import { describe, expect, it } from 'vitest'; +import { + createKloConnectorCapabilities, + type KloEventPropertyDiscovery, + type KloEventPropertyDiscoveryInput, + type KloEventPropertyValuesInput, + type KloEventPropertyValuesResult, + type KloEventStreamDiscoveryPort, + type KloEventTypeDiscovery, + type KloEventTypeDiscoveryInput, + type KloNetworkEndpoint, + type KloNetworkTunnelPort, + type KloQueryResult, + type KloScanConnector, + type KloScanContext, + type KloScanInput, + type KloSchemaSnapshot, +} from './types.js'; + +describe('KLO scan contract types', () => { + it('defaults to structural-only connector capabilities', () => { + expect(createKloConnectorCapabilities()).toEqual({ + structuralIntrospection: true, + tableSampling: false, + columnSampling: false, + columnStats: false, + readOnlySql: false, + nestedAnalysis: false, + eventStreamDiscovery: false, + formalForeignKeys: false, + estimatedRowCounts: false, + }); + }); + + it('keeps structural introspection mandatory when optional capabilities are enabled', () => { + expect( + createKloConnectorCapabilities({ + tableSampling: true, + readOnlySql: true, + eventStreamDiscovery: true, + estimatedRowCounts: true, + }), + ).toEqual({ + structuralIntrospection: true, + tableSampling: true, + columnSampling: false, + columnStats: false, + readOnlySql: true, + nestedAnalysis: false, + eventStreamDiscovery: true, + formalForeignKeys: false, + estimatedRowCounts: true, + }); + }); + + it('describes the connector surface without requiring enrichment methods', async () => { + const snapshot: KloSchemaSnapshot = { + connectionId: 'warehouse', + driver: 'postgres', + extractedAt: '2026-04-29T00:00:00.000Z', + scope: { schemas: ['public'] }, + metadata: { source: 'unit-test' }, + tables: [ + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: 'Customer orders', + estimatedRows: 42, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Primary key', + }, + ], + foreignKeys: [], + }, + ], + }; + + const connector: KloScanConnector = { + id: 'test-postgres', + driver: 'postgres', + capabilities: createKloConnectorCapabilities({ estimatedRowCounts: true }), + async introspect(input: KloScanInput, ctx: KloScanContext) { + expect(input.connectionId).toBe('warehouse'); + expect(ctx.runId).toBe('scan-run-1'); + return snapshot; + }, + }; + + await expect( + connector.introspect( + { + connectionId: 'warehouse', + driver: 'postgres', + scope: { schemas: ['public'] }, + mode: 'structural', + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual(snapshot); + }); + + it('models optional event-stream discovery as a connector capability and port', async () => { + const eventTypes: KloEventTypeDiscovery[] = [{ value: '$pageview', count: 42 }]; + const propertyKeys: KloEventPropertyDiscovery[] = [{ key: '$browser', count: 31 }]; + const propertyValues: KloEventPropertyValuesResult = { values: ['Chrome', 'Safari'], cardinality: 2 }; + const discovery: KloEventStreamDiscoveryPort = { + async listEventTypes(input: KloEventTypeDiscoveryInput) { + expect(input).toEqual({ + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + eventColumn: 'event', + limit: 2, + minCount: 30, + lookbackDays: 14, + }); + return eventTypes; + }, + async listPropertyKeys(input: KloEventPropertyDiscoveryInput) { + expect(input).toEqual({ + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + jsonColumn: 'properties', + sampleSize: 1000, + limit: 5, + lookbackDays: 7, + }); + return propertyKeys; + }, + async listPropertyValues(input: KloEventPropertyValuesInput) { + expect(input).toEqual({ + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + jsonColumn: 'properties', + propertyKey: '$browser', + limit: 3, + maxCardinality: 1000, + lookbackDays: 30, + }); + return propertyValues; + }, + }; + + const connector: KloScanConnector = { + id: 'posthog:product', + driver: 'posthog', + capabilities: createKloConnectorCapabilities({ eventStreamDiscovery: true }), + eventStreamDiscovery: discovery, + async introspect() { + return { + connectionId: 'product', + driver: 'posthog', + extractedAt: '2026-04-29T00:00:00.000Z', + scope: { catalogs: ['157881'] }, + metadata: {}, + tables: [], + }; + }, + }; + + await expect( + connector.eventStreamDiscovery?.listEventTypes( + { + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + eventColumn: 'event', + limit: 2, + minCount: 30, + lookbackDays: 14, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual([{ value: '$pageview', count: 42 }]); + await expect( + connector.eventStreamDiscovery?.listPropertyKeys( + { + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + jsonColumn: 'properties', + sampleSize: 1000, + limit: 5, + lookbackDays: 7, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual([{ key: '$browser', count: 31 }]); + await expect( + connector.eventStreamDiscovery?.listPropertyValues( + { + connectionId: 'product', + table: { catalog: '157881', db: null, name: 'events' }, + jsonColumn: 'properties', + propertyKey: '$browser', + limit: 3, + maxCardinality: 1000, + lookbackDays: 30, + }, + { runId: 'scan-run-1' }, + ), + ).resolves.toEqual({ values: ['Chrome', 'Safari'], cardinality: 2 }); + }); + + it('keeps read-only query results separate from schema snapshots', () => { + const result: KloQueryResult = { + headers: ['id', 'amount'], + headerTypes: ['integer', 'numeric'], + rows: [[1, 10.5]], + totalRows: 1, + rowCount: 1, + }; + + expect(result).toEqual({ + headers: ['id', 'amount'], + headerTypes: ['integer', 'numeric'], + rows: [[1, 10.5]], + totalRows: 1, + rowCount: 1, + }); + }); + + it('models host-provided network tunnel endpoint resolution without app imports', async () => { + const endpoint: KloNetworkEndpoint = { + host: '127.0.0.1', + port: 15432, + close: async () => undefined, + }; + const tunnelPort: KloNetworkTunnelPort<{ networkProxy?: { type: 'ssh_tunnel' } }> = { + async resolveEndpoint(input) { + expect(input).toEqual({ + connectionId: 'warehouse', + driver: 'postgres', + host: 'db.internal', + port: 5432, + connection: { networkProxy: { type: 'ssh_tunnel' } }, + }); + return endpoint; + }, + }; + + await expect( + tunnelPort.resolveEndpoint({ + connectionId: 'warehouse', + driver: 'postgres', + host: 'db.internal', + port: 5432, + connection: { networkProxy: { type: 'ssh_tunnel' } }, + }), + ).resolves.toBe(endpoint); + }); +}); diff --git a/packages/context/src/scan/types.ts b/packages/context/src/scan/types.ts new file mode 100644 index 00000000..bf08a0ab --- /dev/null +++ b/packages/context/src/scan/types.ts @@ -0,0 +1,391 @@ +export type KloConnectionDriver = + | 'sqlite' + | 'postgres' + | 'postgresql' + | 'sqlserver' + | 'bigquery' + | 'snowflake' + | 'posthog' + | 'mysql' + | 'clickhouse'; + +export type KloScanMode = 'structural' | 'relationships' | 'enriched'; + +export type KloScanTrigger = 'cli' | 'mcp' | 'schema_scan' | 'scheduled' | 'manual'; + +export interface KloConnectorCapabilities { + structuralIntrospection: true; + tableSampling: boolean; + columnSampling: boolean; + columnStats: boolean; + readOnlySql: boolean; + nestedAnalysis: boolean; + eventStreamDiscovery: boolean; + formalForeignKeys: boolean; + estimatedRowCounts: boolean; +} + +export type KloOptionalConnectorCapabilities = Partial>; + +export function createKloConnectorCapabilities( + capabilities: KloOptionalConnectorCapabilities = {}, +): KloConnectorCapabilities { + return { + structuralIntrospection: true, + tableSampling: capabilities.tableSampling ?? false, + columnSampling: capabilities.columnSampling ?? false, + columnStats: capabilities.columnStats ?? false, + readOnlySql: capabilities.readOnlySql ?? false, + nestedAnalysis: capabilities.nestedAnalysis ?? false, + eventStreamDiscovery: capabilities.eventStreamDiscovery ?? false, + formalForeignKeys: capabilities.formalForeignKeys ?? false, + estimatedRowCounts: capabilities.estimatedRowCounts ?? false, + }; +} + +export interface KloSchemaScope { + catalogs?: string[]; + schemas?: string[]; + datasets?: string[]; +} + +export type KloSchemaTableKind = 'table' | 'view' | 'external' | 'event_stream'; + +export type KloSchemaDimensionType = 'time' | 'string' | 'number' | 'boolean'; + +export interface KloSchemaColumn { + name: string; + nativeType: string; + normalizedType: string; + dimensionType: KloSchemaDimensionType; + nullable: boolean; + primaryKey: boolean; + comment: string | null; +} + +export interface KloSchemaForeignKey { + fromColumn: string; + toCatalog: string | null; + toDb: string | null; + toTable: string; + toColumn: string; + constraintName: string | null; +} + +export interface KloSchemaTable { + catalog: string | null; + db: string | null; + name: string; + kind: KloSchemaTableKind; + comment: string | null; + estimatedRows: number | null; + columns: KloSchemaColumn[]; + foreignKeys: KloSchemaForeignKey[]; +} + +export interface KloSchemaSnapshot { + connectionId: string; + driver: KloConnectionDriver; + extractedAt: string; + scope: KloSchemaScope; + tables: KloSchemaTable[]; + metadata: Record; +} + +export interface KloCredentialEnvReference { + kind: 'env'; + name: string; +} + +export interface KloCredentialFileReference { + kind: 'file'; + path: string; +} + +export interface KloResolvedCredentialEnvelope { + kind: 'resolved'; + source: 'standalone' | 'host'; + values: Record; + redacted?: boolean; +} + +export type KloCredentialEnvelope = + | KloCredentialEnvReference + | KloCredentialFileReference + | KloResolvedCredentialEnvelope; + +export interface KloNetworkEndpoint { + host: string; + port: number; + close?: () => Promise; +} + +export interface KloNetworkTunnelRequest> { + connectionId: string; + driver: KloConnectionDriver; + host: string; + port: number; + connection: TConnection; +} + +export interface KloNetworkTunnelPort> { + resolveEndpoint(input: KloNetworkTunnelRequest): Promise; +} + +export interface KloScanInput { + connectionId: string; + driver: KloConnectionDriver; + scope?: KloSchemaScope; + mode?: KloScanMode; + dryRun?: boolean; + detectRelationships?: boolean; + credentials?: KloCredentialEnvelope; + metadata?: Record; +} + +export interface KloProgressUpdateOptions { + transient?: boolean; +} + +export interface KloProgressPort { + update(progress: number, message?: string, options?: KloProgressUpdateOptions): Promise; + startPhase(weight: number): KloProgressPort; +} + +export interface KloScanLoggerPort { + debug(message: string, metadata?: Record): void; + info(message: string, metadata?: Record): void; + warn(message: string, metadata?: Record): void; + error(message: string, metadata?: Record): void; +} + +export interface KloScanContext { + runId: string; + signal?: AbortSignal; + progress?: KloProgressPort; + logger?: KloScanLoggerPort; +} + +export interface KloTableRef { + catalog: string | null; + db: string | null; + name: string; +} + +export interface KloTableSampleInput { + connectionId: string; + table: KloTableRef; + columns?: string[]; + limit: number; +} + +export interface KloTableSampleResult { + headers: string[]; + rows: unknown[][]; + totalRows: number; +} + +export interface KloColumnSampleInput { + connectionId: string; + table: KloTableRef; + column: string; + limit: number; +} + +export interface KloColumnSampleResult { + values: unknown[]; + nullCount: number | null; + distinctCount: number | null; +} + +export interface KloColumnStatsInput { + connectionId: string; + table: KloTableRef; + column: string; +} + +export interface KloColumnStatsResult { + min: unknown; + max: unknown; + average: number | null; + nullCount: number | null; + distinctCount: number | null; +} + +export interface KloEventTypeDiscoveryInput { + connectionId: string; + table: KloTableRef; + eventColumn: string; + limit: number; + minCount?: number; + lookbackDays?: number; +} + +export interface KloEventTypeDiscovery { + value: string; + count: number; +} + +export interface KloEventPropertyDiscoveryInput { + connectionId: string; + table: KloTableRef; + jsonColumn: string; + sampleSize: number; + limit: number; + lookbackDays?: number; +} + +export interface KloEventPropertyDiscovery { + key: string; + count: number; +} + +export interface KloEventPropertyValuesInput { + connectionId: string; + table: KloTableRef; + jsonColumn: string; + propertyKey: string; + limit: number; + maxCardinality?: number; + lookbackDays?: number; +} + +export interface KloEventPropertyValuesResult { + values: string[]; + cardinality: number; +} + +export interface KloEventStreamDiscoveryPort { + listEventTypes(input: KloEventTypeDiscoveryInput, ctx: KloScanContext): Promise; + listPropertyKeys(input: KloEventPropertyDiscoveryInput, ctx: KloScanContext): Promise; + listPropertyValues( + input: KloEventPropertyValuesInput, + ctx: KloScanContext, + ): Promise; +} + +export interface KloReadOnlyQueryInput { + connectionId: string; + sql: string; + maxRows?: number; +} + +export interface KloQueryResult { + headers: string[]; + headerTypes?: string[]; + rows: unknown[][]; + totalRows: number; + rowCount: number | null; +} + +export interface KloScanConnector { + id: string; + driver: KloConnectionDriver; + capabilities: KloConnectorCapabilities; + eventStreamDiscovery?: KloEventStreamDiscoveryPort; + introspect(input: KloScanInput, ctx: KloScanContext): Promise; + sampleColumn?(input: KloColumnSampleInput, ctx: KloScanContext): Promise; + sampleTable?(input: KloTableSampleInput, ctx: KloScanContext): Promise; + columnStats?(input: KloColumnStatsInput, ctx: KloScanContext): Promise; + executeReadOnly?(input: KloReadOnlyQueryInput, ctx: KloScanContext): Promise; + cleanup?(): Promise; +} + +export interface KloEmbeddingPort { + dimensions: number; + maxBatchSize: number; + embedBatch(texts: string[]): Promise; +} + +export interface KloStructuralSyncStats { + tablesCreated: number; + tablesUpdated: number; + tablesDeleted: number; + columnsCreated: number; + columnsUpdated: number; + columnsDeleted: number; +} + +export interface KloScanDiffSummary { + tablesAdded: number; + tablesModified: number; + tablesDeleted: number; + tablesUnchanged: number; + columnsAdded: number; + columnsModified: number; + columnsDeleted: number; +} + +export interface KloScanArtifactPaths { + rawSourcesDir: string | null; + reportPath: string | null; + manifestShards: string[]; + enrichmentArtifacts: string[]; +} + +export type KloScanWarningCode = + | 'connector_capability_missing' + | 'sampling_failed' + | 'statistics_failed' + | 'llm_unavailable' + | 'embedding_unavailable' + | 'scan_enrichment_backend_not_configured' + | 'relationship_validation_failed' + | 'relationship_llm_invalid_reference' + | 'relationship_llm_proposal_failed' + | 'credential_redacted' + | 'enrichment_failed'; + +export interface KloScanWarning { + code: KloScanWarningCode; + message: string; + table?: string; + column?: string; + recoverable: boolean; + metadata?: Record; +} + +export interface KloScanEnrichmentSummary { + dataDictionary: 'skipped' | 'completed' | 'failed'; + tableDescriptions: 'skipped' | 'completed' | 'failed'; + columnDescriptions: 'skipped' | 'completed' | 'failed'; + embeddings: 'skipped' | 'completed' | 'failed'; + deterministicRelationships: 'skipped' | 'completed' | 'failed'; + llmRelationshipValidation: 'skipped' | 'completed' | 'failed'; + statisticalValidation: 'skipped' | 'completed' | 'failed'; +} + +export interface KloScanRelationshipSummary { + accepted: number; + review: number; + rejected: number; + skipped: number; +} + +export type KloScanEnrichmentStage = 'descriptions' | 'embeddings' | 'relationships'; + +export interface KloScanEnrichmentStateSummary { + resumedStages: KloScanEnrichmentStage[]; + completedStages: KloScanEnrichmentStage[]; + failedStages: KloScanEnrichmentStage[]; +} + +export interface KloScanReport { + connectionId: string; + driver: KloConnectionDriver; + syncId: string; + runId: string; + trigger: KloScanTrigger; + mode: KloScanMode; + dryRun: boolean; + artifactPaths: KloScanArtifactPaths; + diffSummary: KloScanDiffSummary; + manifestShardsWritten: number; + structuralSyncStats: KloStructuralSyncStats; + enrichment: KloScanEnrichmentSummary; + capabilityGaps: Array>; + warnings: KloScanWarning[]; + relationships: KloScanRelationshipSummary; + enrichmentState: KloScanEnrichmentStateSummary; + createdAt: string; +} diff --git a/packages/context/src/search/backend-conformance.test.ts b/packages/context/src/search/backend-conformance.test.ts new file mode 100644 index 00000000..0a378307 --- /dev/null +++ b/packages/context/src/search/backend-conformance.test.ts @@ -0,0 +1,472 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, it } from 'vitest'; +import { SqliteContextEvidenceStore } from '../ingest/context-evidence/index.js'; +import type { JsonValue } from '../ingest/ports.js'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { type LocalSlSourceSearchResult, searchLocalSlSources, writeLocalSlSource } from '../sl/local-sl.js'; +import type { ContextEvidenceSearchResult } from '../tools/context-evidence-tool-store.js'; +import { + type LocalKnowledgeSearchResult, + searchLocalKnowledgePages, + writeLocalKnowledgePage, +} from '../wiki/local-knowledge.js'; +import { + assertSearchBackendCapabilities, + assertSearchBackendConformanceCase, + type SearchBackendConformanceResult, +} from './backend-conformance.js'; +import type { SearchBackendCapabilities } from './types.js'; + +const SQLITE_SEARCH_CAPABILITIES = { + fts: true, + vector: false, + fuzzy: false, + jsonSearch: true, + arraySearch: false, +} satisfies SearchBackendCapabilities; + +const ORDERS_YAML = [ + 'name: orders', + 'table: public.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + ' - name: revenue', + ' type: number', + 'measures:', + ' - name: total_revenue', + ' expr: sum(revenue)', + '', +].join('\n'); + +const FINANCE_ORDERS_YAML = [ + 'name: orders', + 'description: Finance orders used for invoice reconciliation.', + 'table: finance.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + ' - name: invoice_status', + ' type: string', + '', +].join('\n'); + +class FakeEmbeddingPort { + readonly maxBatchSize = 16; + + async computeEmbedding(text: string): Promise { + return text.toLowerCase().includes('semantic revenue') ? [1, 0] : [0, 1]; + } + + async computeEmbeddingsBulk(texts: string[]): Promise { + return Promise.all(texts.map((text) => this.computeEmbedding(text))); + } +} + +function toSlConformanceResult(result: LocalSlSourceSearchResult): SearchBackendConformanceResult { + return { + id: `${result.connectionId}/${result.name}`, + score: result.score ?? 0, + matchReasons: result.matchReasons ?? [], + lanes: result.lanes, + dictionaryMatches: result.dictionaryMatches, + }; +} + +function toWikiConformanceResult(result: LocalKnowledgeSearchResult): SearchBackendConformanceResult { + return { + id: result.key, + score: result.score, + matchReasons: result.matchReasons, + lanes: result.lanes, + }; +} + +function toContextConformanceResult(result: ContextEvidenceSearchResult): SearchBackendConformanceResult { + return { + id: `${result.externalId}:${result.stableCitationKey}`, + score: result.score, + matchReasons: result.matchReasons ?? [], + lanes: result.lanes, + }; +} + +async function seedSemanticLayerProject(project: KloLocalProject): Promise { + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: ORDERS_YAML, + }); + await writeLocalSlSource(project, { + connectionId: 'finance', + sourceName: 'orders', + yaml: FINANCE_ORDERS_YAML, + }); + await project.fileStore.writeFile( + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json', + `${JSON.stringify( + { + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 2, + tables: [], + columns: { + 'orders.status': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + nativeType: 'text', + normalizedType: 'string', + rowCount: 10, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 0.2, + nullRate: 0, + sampleValues: ['paid', 'refunded'], + minTextLength: 4, + maxTextLength: 8, + }, + }, + warnings: [], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed dictionary profile', + ); +} + +async function seedWikiProject(project: KloLocalProject): Promise { + await writeLocalKnowledgePage(project, { + key: 'metrics/revenue', + scope: 'GLOBAL', + summary: 'Semantic revenue definition', + content: 'Revenue is recognized when an order is paid.', + tags: ['finance'], + refs: ['semantic-layer/warehouse/orders.yaml'], + slRefs: ['orders'], + }); + await writeLocalKnowledgePage(project, { + key: 'support/escalations', + scope: 'GLOBAL', + summary: 'Support escalation process', + content: 'Escalations move urgent support tickets to the operations queue.', + tags: ['operations'], + }); +} + +async function seedContextDocument( + subject: SqliteContextEvidenceStore, + input: { + runId?: string; + syncId?: string; + externalId?: string; + title?: string; + rawPath?: string; + metadata?: JsonValue; + publishState?: 'pending' | 'published'; + embedding?: number[] | null; + content?: string; + searchText?: string; + } = {}, +): Promise<{ documentId: string; chunkId: string }> { + const runId = input.runId ?? 'run-1'; + const syncId = input.syncId ?? 'sync-1'; + const externalId = input.externalId ?? 'page-1'; + const title = input.title ?? 'Revenue Policy'; + const rawPath = input.rawPath ?? `pages/${externalId}/page.md`; + const doc = await subject.upsertDocument({ + runId, + connectionId: 'conn-1', + sourceKey: 'notion', + externalId, + externalParentId: null, + databaseId: null, + dataSourceId: null, + title, + path: `Company Handbook / ${title}`, + url: `https://notion.test/${externalId}`, + objectType: 'page', + lastEditedAt: new Date('2026-04-30T10:00:00.000Z'), + lastEditedBy: 'user-1', + rawPath, + syncId, + contentHash: `hash-${externalId}`, + publishState: input.publishState ?? 'published', + metadata: input.metadata ?? {}, + }); + await subject.replaceChunks(doc.id, [ + { + chunkKey: 'intro', + headingPath: ['Policy'], + ordinal: 0, + content: input.content ?? `${title} requires approval from the accountable owner.`, + searchText: input.searchText ?? `${title} approval accountable owner`, + embedding: input.embedding ?? [1, 0, 0], + tokenCount: 8, + citation: { + source: 'notion', + pageId: externalId, + title, + syncId, + rawPath, + }, + stableCitationKey: `notion:${externalId}:intro`, + syncId, + contentHash: `chunk-${externalId}`, + }, + ]); + + const read = await subject.readDocumentByExternalId('conn-1', 'notion', externalId, runId); + if (!read) { + throw new Error(`seeded document ${externalId} was not readable`); + } + + return { documentId: doc.id, chunkId: read.chunks[0].id }; +} + +describe('SQLite hybrid search backend conformance', () => { + let tempDir: string; + let project: KloLocalProject; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-search-conformance-')); + project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + dbPath = join(tempDir, '.klo', 'db.sqlite'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('documents SQLite search backend capabilities', () => { + assertSearchBackendCapabilities({ + backendName: 'sqlite', + capabilities: SQLITE_SEARCH_CAPABILITIES, + expected: { + fts: true, + vector: false, + fuzzy: false, + jsonSearch: true, + arraySearch: false, + }, + }); + }); + + it('keeps semantic-layer global ranking, dictionary evidence, and token fallback stable', async () => { + await seedSemanticLayerProject(project); + + const global = await searchLocalSlSources(project, { query: 'orders', limit: 5 }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'semantic-layer', + caseName: 'global source ranking', + results: global.map(toSlConformanceResult), + expectedTopIds: ['finance/orders', 'warehouse/orders'], + expectedReasonsById: { + 'finance/orders': ['lexical'], + 'warehouse/orders': ['lexical'], + }, + expectedLanes: { + lexical: { status: 'available' }, + semantic: { status: 'skipped', reason: 'embedding_unconfigured' }, + }, + }); + + const dictionary = await searchLocalSlSources(project, { + connectionId: 'warehouse', + query: 'refunded', + limit: 5, + }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'semantic-layer', + caseName: 'dictionary source evidence', + results: dictionary.map(toSlConformanceResult), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['dictionary'], + }, + expectedLanes: { + dictionary: { status: 'available' }, + semantic: { status: 'skipped', reason: 'embedding_unconfigured' }, + }, + expectedDictionaryMatchesById: { + 'warehouse/orders': [{ column: 'status', values: ['refunded'] }], + }, + }); + + const token = await searchLocalSlSources(project, { + connectionId: 'warehouse', + query: 'orders---', + limit: 5, + }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'semantic-layer', + caseName: 'token fallback reason', + results: token.map(toSlConformanceResult), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['token'], + }, + expectedLanes: { + token: { status: 'available' }, + }, + }); + }); + + it('keeps wiki lexical, semantic, and token behavior stable', async () => { + await seedWikiProject(project); + + const lexical = await searchLocalKnowledgePages(project, { + query: 'paid order', + userId: 'local', + limit: 5, + }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'wiki', + caseName: 'lexical page ranking', + results: lexical.map(toWikiConformanceResult), + expectedTopIds: ['metrics/revenue'], + expectedReasonsById: { + 'metrics/revenue': ['lexical'], + }, + expectedLanes: { + lexical: { status: 'available' }, + semantic: { status: 'skipped', reason: 'embedding_unconfigured' }, + }, + }); + + const semantic = await searchLocalKnowledgePages(project, { + query: 'semantic revenue', + userId: 'local', + limit: 5, + embeddingService: new FakeEmbeddingPort(), + }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'wiki', + caseName: 'semantic page ranking', + results: semantic.map(toWikiConformanceResult), + expectedTopIds: ['metrics/revenue'], + expectedReasonsById: { + 'metrics/revenue': ['semantic'], + }, + expectedLanes: { + semantic: { status: 'available' }, + }, + }); + + const token = await searchLocalKnowledgePages(project, { + query: 'paid---', + userId: 'local', + limit: 5, + }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'wiki', + caseName: 'token page fallback', + results: token.map(toWikiConformanceResult), + expectedTopIds: ['metrics/revenue'], + expectedReasonsById: { + 'metrics/revenue': ['token'], + }, + expectedLanes: { + token: { status: 'available' }, + }, + }); + }); + + it('keeps context-evidence lane fusion and token fallback stable', async () => { + const subject = new SqliteContextEvidenceStore({ dbPath }); + await seedContextDocument(subject, { + externalId: 'page-discount', + title: 'Enterprise Discount Policy', + content: 'Enterprise discounts require finance approval before quote approval.', + searchText: 'enterprise discount finance approval quote', + embedding: [1, 0, 0], + }); + await seedContextDocument(subject, { + externalId: 'page-owner', + title: 'Accountable Owner Policy', + content: 'Every policy has an accountable owner and review date.', + searchText: 'accountable owner review date', + embedding: [0.95, 0.05, 0], + }); + await seedContextDocument(subject, { + externalId: 'page-expense', + title: 'Expense Policy', + content: 'Expense reimbursement requires receipt review.', + searchText: 'expense reimbursement receipt review', + embedding: [0, 1, 0], + }); + + const fused = await subject.searchRRF({ + connectionId: 'conn-1', + sourceKey: 'notion', + queryEmbedding: [1, 0, 0], + queryText: 'enterprise discount approval', + limit: 2, + includeDeleted: false, + }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'context-evidence', + caseName: 'chunk lane fusion', + results: fused.map(toContextConformanceResult), + expectedTopIds: ['page-discount:notion:page-discount:intro'], + expectedReasonsById: { + 'page-discount:notion:page-discount:intro': ['lexical', 'semantic', 'token'], + }, + expectedLanes: { + lexical: { status: 'available' }, + semantic: { status: 'available' }, + token: { status: 'available' }, + }, + }); + + const tokenSubject = new SqliteContextEvidenceStore({ dbPath: join(tempDir, 'token.sqlite') }); + await seedContextDocument(tokenSubject, { + externalId: 'page-cpp', + title: 'C++ Warehouse Notes', + content: 'C++ parser notes for warehouse extraction.', + searchText: 'C++ parser warehouse extraction', + embedding: null, + }); + + const token = await tokenSubject.searchRRF({ + connectionId: 'conn-1', + sourceKey: 'notion', + queryEmbedding: null, + queryText: '++', + limit: 5, + includeDeleted: false, + }); + assertSearchBackendConformanceCase({ + backendName: 'sqlite', + surface: 'context-evidence', + caseName: 'fts-empty token fallback', + results: token.map(toContextConformanceResult), + expectedTopIds: ['page-cpp:notion:page-cpp:intro'], + expectedReasonsById: { + 'page-cpp:notion:page-cpp:intro': ['token'], + }, + expectedLanes: { + lexical: { status: 'skipped', reason: 'fts_query_empty' }, + semantic: { status: 'skipped', reason: 'embedding_unconfigured' }, + token: { status: 'available' }, + }, + }); + }); +}); diff --git a/packages/context/src/search/backend-conformance.ts b/packages/context/src/search/backend-conformance.ts new file mode 100644 index 00000000..fa6070b2 --- /dev/null +++ b/packages/context/src/search/backend-conformance.ts @@ -0,0 +1,151 @@ +import type { SearchBackendCapabilities, SearchLaneStatus } from './types.js'; + +export interface SearchBackendConformanceLane { + lane: string; + status: SearchLaneStatus; + reason?: string; +} + +export interface SearchBackendConformanceDictionaryMatch { + column: string; + values: readonly string[]; + overflowCount?: number; +} + +export interface SearchBackendConformanceResult { + id: string; + score: number; + matchReasons: readonly string[]; + lanes?: readonly SearchBackendConformanceLane[]; + dictionaryMatches?: readonly SearchBackendConformanceDictionaryMatch[]; +} + +export interface ExpectedSearchBackendConformanceLane { + status: SearchLaneStatus; + reason?: string; +} + +export interface AssertSearchBackendConformanceCaseInput { + backendName: string; + surface: string; + caseName: string; + results: readonly SearchBackendConformanceResult[]; + expectedTopIds: readonly string[]; + expectedReasonsById?: Record; + expectedLanes?: Record; + expectedDictionaryMatchesById?: Record; +} + +export interface AssertSearchBackendCapabilitiesInput { + backendName: string; + capabilities: SearchBackendCapabilities; + expected: Partial; +} + +function caseLabel( + input: Pick, +): string { + return `${input.backendName} ${input.surface} conformance case "${input.caseName}"`; +} + +function fail(label: string, failures: string[]): never { + throw new Error([`${label} failed:`, ...failures.map((failure) => `- ${failure}`)].join('\n')); +} + +function dictionaryMatchKey(match: SearchBackendConformanceDictionaryMatch): string { + const values = [...match.values].sort((left, right) => left.localeCompare(right)).join(','); + return `${match.column}:${values}:${match.overflowCount ?? 0}`; +} + +function dictionaryMatchKeys(matches: readonly SearchBackendConformanceDictionaryMatch[] | undefined): string[] { + return (matches ?? []).map(dictionaryMatchKey).sort((left, right) => left.localeCompare(right)); +} + +export function assertSearchBackendConformanceCase(input: AssertSearchBackendConformanceCaseInput): void { + const label = caseLabel(input); + const failures: string[] = []; + const topResults = input.results.slice(0, input.expectedTopIds.length); + + input.expectedTopIds.forEach((expectedId, index) => { + const actualId = topResults[index]?.id; + if (actualId !== expectedId) { + failures.push(`expected result ${index + 1} to be ${expectedId}, got ${actualId ?? ''}`); + } + }); + + const byId = new Map(input.results.map((result) => [result.id, result])); + + for (const expectedId of input.expectedTopIds) { + const result = byId.get(expectedId); + if (!result) { + continue; + } + if (!Number.isFinite(result.score) || result.score <= 0) { + failures.push(`expected ${expectedId} to have a positive finite score, got ${result.score}`); + } + } + + for (const [id, expectedReasons] of Object.entries(input.expectedReasonsById ?? {})) { + const result = byId.get(id); + if (!result) { + failures.push(`expected reasons for ${id}, but the result was missing`); + continue; + } + for (const reason of expectedReasons) { + if (!result.matchReasons.includes(reason)) { + failures.push(`expected ${id} to include match reason ${reason}, got [${result.matchReasons.join(', ')}]`); + } + } + } + + const allLanes = input.results.flatMap((result) => result.lanes ?? []); + for (const [lane, expected] of Object.entries(input.expectedLanes ?? {})) { + const actual = allLanes.find((entry) => entry.lane === lane); + if (!actual) { + failures.push(`expected lane ${lane} to be reported`); + continue; + } + if (actual.status !== expected.status) { + failures.push(`expected lane ${lane} status ${expected.status}, got ${actual.status}`); + } + if (expected.reason !== undefined && actual.reason !== expected.reason) { + failures.push(`expected lane ${lane} reason ${expected.reason}, got ${actual.reason ?? ''}`); + } + } + + for (const [id, expectedMatches] of Object.entries(input.expectedDictionaryMatchesById ?? {})) { + const result = byId.get(id); + if (!result) { + failures.push(`expected dictionary matches for ${id}, but the result was missing`); + continue; + } + + const actualKeys = dictionaryMatchKeys(result.dictionaryMatches); + for (const expectedKey of dictionaryMatchKeys(expectedMatches)) { + if (!actualKeys.includes(expectedKey)) { + failures.push(`expected ${id} dictionary evidence ${expectedKey}, got [${actualKeys.join(', ')}]`); + } + } + } + + if (failures.length > 0) { + fail(label, failures); + } +} + +export function assertSearchBackendCapabilities(input: AssertSearchBackendCapabilitiesInput): void { + const failures: string[] = []; + + for (const [capability, expected] of Object.entries(input.expected) as Array< + [keyof SearchBackendCapabilities, boolean] + >) { + const actual = input.capabilities[capability]; + if (actual !== expected) { + failures.push(`expected ${capability}=${expected}, got ${actual}`); + } + } + + if (failures.length > 0) { + fail(`${input.backendName} search backend capabilities`, failures); + } +} diff --git a/packages/context/src/search/hybrid-search-core.test.ts b/packages/context/src/search/hybrid-search-core.test.ts new file mode 100644 index 00000000..2350e2ed --- /dev/null +++ b/packages/context/src/search/hybrid-search-core.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, it } from 'vitest'; +import { HybridSearchCore } from './hybrid-search-core.js'; +import type { SearchCandidateGenerator } from './types.js'; + +function generator( + lane: string, + candidates: Array<{ id: string; rank: number; rawScore?: number; matchReason?: string; evidence?: unknown }>, + weight?: number, +): SearchCandidateGenerator { + return { + lane, + weight, + async generate() { + return { candidates }; + }, + }; +} + +describe('HybridSearchCore', () => { + it('runs lane generators with the shared pool size and applies final limit after RRF fusion', async () => { + const calls: Array<{ lane: string; laneCandidatePoolLimit: number; finalLimit: number }> = []; + const core = new HybridSearchCore(); + const result = await core.search({ + queryText: 'gross revenue', + limit: 1, + generators: [ + { + lane: 'lexical', + async generate(args) { + calls.push({ lane: 'lexical', ...args }); + return { + candidates: [ + { id: 'orders', rank: 1, rawScore: 0.8 }, + { id: 'customers', rank: 2, rawScore: 0.7 }, + ], + }; + }, + }, + { + lane: 'semantic', + async generate(args) { + calls.push({ lane: 'semantic', ...args }); + return { candidates: [{ id: 'customers', rank: 1, rawScore: 0.91 }] }; + }, + }, + ], + }); + + expect(calls).toEqual([ + expect.objectContaining({ lane: 'lexical', laneCandidatePoolLimit: 25, finalLimit: 1 }), + expect.objectContaining({ lane: 'semantic', laneCandidatePoolLimit: 25, finalLimit: 1 }), + ]); + expect(result.results.map((candidate) => candidate.id)).toEqual(['customers']); + expect(result.results[0]).toMatchObject({ + matchReasons: ['lexical', 'semantic'], + ranksByLane: { lexical: 2, semantic: 1 }, + rawScoresByLane: { lexical: 0.7, semantic: 0.91 }, + }); + expect(result.lanes).toEqual([ + expect.objectContaining({ lane: 'lexical', status: 'available', returnedCandidateCount: 2, weight: 1.5 }), + expect.objectContaining({ lane: 'semantic', status: 'available', returnedCandidateCount: 1, weight: 2 }), + ]); + }); + + it('keeps available lane results when another lane is skipped or fails', async () => { + const core = new HybridSearchCore(); + const result = await core.search({ + queryText: 'paid', + limit: 5, + generators: [ + generator('lexical', [{ id: 'orders', rank: 1 }]), + { + lane: 'semantic', + async generate() { + return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' }; + }, + }, + { + lane: 'dictionary', + async generate() { + throw new Error('dictionary index unavailable'); + }, + }, + ], + }); + + expect(result.results.map((candidate) => candidate.id)).toEqual(['orders']); + expect(result.lanes).toEqual([ + expect.objectContaining({ lane: 'lexical', status: 'available', reason: undefined }), + expect.objectContaining({ lane: 'semantic', status: 'skipped', reason: 'embedding_unconfigured' }), + expect.objectContaining({ lane: 'dictionary', status: 'failed', reason: 'dictionary index unavailable' }), + ]); + }); + + it('deduplicates one lane by best rank before fusion', async () => { + const core = new HybridSearchCore(); + const result = await core.search({ + queryText: 'paid status', + limit: 10, + generators: [ + generator('dictionary', [ + { id: 'orders', rank: 4, rawScore: 0.4, evidence: { column: 'state', values: ['paid'] } }, + { id: 'orders', rank: 1, rawScore: 0.9, evidence: { column: 'status', values: ['paid'] } }, + ]), + ], + }); + + expect(result.results).toHaveLength(1); + expect(result.results[0]).toMatchObject({ + id: 'orders', + ranksByLane: { dictionary: 1 }, + rawScoresByLane: { dictionary: 0.9 }, + evidenceByLane: { dictionary: [{ column: 'status', values: ['paid'] }] }, + }); + }); + + it('uses deterministic id ordering when scores and lane counts tie', async () => { + const core = new HybridSearchCore(); + const result = await core.search({ + queryText: 'revenue', + limit: 10, + generators: [generator('lexical', [{ id: 'zebra', rank: 1 }, { id: 'alpha', rank: 1 }])], + }); + + expect(result.results.map((candidate) => candidate.id)).toEqual(['alpha', 'zebra']); + }); +}); diff --git a/packages/context/src/search/hybrid-search-core.ts b/packages/context/src/search/hybrid-search-core.ts new file mode 100644 index 00000000..cef9609b --- /dev/null +++ b/packages/context/src/search/hybrid-search-core.ts @@ -0,0 +1,141 @@ +import { defaultLaneCandidatePoolLimit, normalizeSearchQuery } from './query.js'; +import { compareFusedSearchCandidates, DEFAULT_RRF_K, DEFAULT_SEARCH_LANE_WEIGHTS, rrfContribution } from './rrf.js'; +import type { + FusedSearchCandidate, + HybridSearchOptions, + HybridSearchResult, + SearchCandidate, + SearchCandidateGenerator, + SearchLaneBreakdown, + SearchLaneName, + SearchLaneResult, +} from './types.js'; + +interface ExecutedLane { + generator: SearchCandidateGenerator; + result: SearchLaneResult; +} + +function laneWeight(options: HybridSearchOptions, lane: SearchLaneName, generatorWeight?: number): number { + return generatorWeight ?? options.laneWeights?.[lane] ?? DEFAULT_SEARCH_LANE_WEIGHTS[lane] ?? 1; +} + +function normalizeCandidate(candidate: SearchCandidate, fallbackRank: number): SearchCandidate { + const rank = Number.isFinite(candidate.rank) && candidate.rank > 0 ? Math.floor(candidate.rank) : fallbackRank; + return { ...candidate, rank }; +} + +function bestCandidatesForLane(candidates: SearchCandidate[]): SearchCandidate[] { + const byId = new Map(); + candidates.forEach((candidate, index) => { + const normalized = normalizeCandidate(candidate, index + 1); + const existing = byId.get(normalized.id); + if ( + !existing || + normalized.rank < existing.rank || + (normalized.rank === existing.rank && normalized.id.localeCompare(existing.id) < 0) + ) { + byId.set(normalized.id, normalized); + } + }); + + return [...byId.values()].sort((left, right) => left.rank - right.rank || left.id.localeCompare(right.id)); +} + +function failedLaneResult(error: unknown): SearchLaneResult { + return { + status: 'failed', + candidates: [], + reason: error instanceof Error ? error.message : String(error), + }; +} + +export class HybridSearchCore { + async search(options: HybridSearchOptions): Promise { + const finalLimit = Math.max(1, options.limit); + const requestedCandidatePoolLimit = options.candidatePoolLimit ?? defaultLaneCandidatePoolLimit(finalLimit); + const normalizedQuery = normalizeSearchQuery(options.queryText); + + const executed = await Promise.all( + options.generators.map(async (generator): Promise => { + try { + const result = await generator.generate({ + queryText: options.queryText, + normalizedQuery, + finalLimit, + laneCandidatePoolLimit: requestedCandidatePoolLimit, + }); + return { generator, result }; + } catch (error) { + return { generator, result: failedLaneResult(error) }; + } + }), + ); + + const byId = new Map(); + const lanes: SearchLaneBreakdown[] = []; + const rrfK = options.rrfK ?? DEFAULT_RRF_K; + + for (const { generator, result } of executed) { + const weight = laneWeight(options, generator.lane, generator.weight); + const status = result.status ?? 'available'; + const effectiveCandidatePoolLimit = result.effectiveCandidatePoolLimit ?? requestedCandidatePoolLimit; + const laneCandidates = status === 'available' ? bestCandidatesForLane(result.candidates) : []; + + lanes.push({ + lane: generator.lane, + status, + requestedCandidatePoolLimit, + effectiveCandidatePoolLimit, + returnedCandidateCount: laneCandidates.length, + weight, + reason: result.reason, + }); + + if (status !== 'available') { + continue; + } + + for (const candidate of laneCandidates) { + const existing = + byId.get(candidate.id) ?? + ({ + id: candidate.id, + score: 0, + matchReasons: [], + ranksByLane: {}, + rawScoresByLane: {}, + evidenceByLane: {}, + } satisfies FusedSearchCandidate); + + existing.score += rrfContribution(weight, candidate.rank, rrfK); + existing.ranksByLane[generator.lane] = candidate.rank; + if (candidate.rawScore !== undefined) { + existing.rawScoresByLane[generator.lane] = candidate.rawScore; + } + const reason = candidate.matchReason ?? generator.lane; + if (!existing.matchReasons.includes(reason)) { + existing.matchReasons.push(reason); + } + if (candidate.evidence !== undefined) { + existing.evidenceByLane[generator.lane] = [ + ...(existing.evidenceByLane[generator.lane] ?? []), + candidate.evidence, + ]; + } + + byId.set(candidate.id, existing); + } + } + + const results = [...byId.values()].sort(compareFusedSearchCandidates).slice(0, finalLimit); + + return { + query: normalizedQuery, + requestedLimit: finalLimit, + requestedCandidatePoolLimit, + results, + lanes, + }; + } +} diff --git a/packages/context/src/search/index.ts b/packages/context/src/search/index.ts new file mode 100644 index 00000000..a62ae7bc --- /dev/null +++ b/packages/context/src/search/index.ts @@ -0,0 +1,35 @@ +export type { + AssertSearchBackendCapabilitiesInput, + AssertSearchBackendConformanceCaseInput, + ExpectedSearchBackendConformanceLane, + SearchBackendConformanceDictionaryMatch, + SearchBackendConformanceLane, + SearchBackendConformanceResult, +} from './backend-conformance.js'; +export { + assertSearchBackendCapabilities, + assertSearchBackendConformanceCase, +} from './backend-conformance.js'; +export { HybridSearchCore } from './hybrid-search-core.js'; +export { defaultLaneCandidatePoolLimit, normalizeSearchQuery } from './query.js'; +export { + compareFusedSearchCandidates, + DEFAULT_RRF_K, + DEFAULT_SEARCH_LANE_WEIGHTS, + rrfContribution, +} from './rrf.js'; +export type { + FusedSearchCandidate, + HybridSearchOptions, + HybridSearchResult, + NormalizedSearchQuery, + SearchBackendCapabilities, + SearchCandidate, + SearchCandidateGenerator, + SearchCandidateGeneratorArgs, + SearchLaneBreakdown, + SearchLaneName, + SearchLaneResult, + SearchLaneStatus, + SearchResultHydrator, +} from './types.js'; diff --git a/packages/context/src/search/pglite-owner-process.test.ts b/packages/context/src/search/pglite-owner-process.test.ts new file mode 100644 index 00000000..ff3defdb --- /dev/null +++ b/packages/context/src/search/pglite-owner-process.test.ts @@ -0,0 +1,331 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { createServer } from 'node:net'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { Client } from 'pg'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { assertSearchBackendCapabilities, assertSearchBackendConformanceCase } from './index.js'; +import { KloPGliteOwnerProcess, PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES } from './pglite-owner-process.js'; + +async function allocatePort(): Promise { + const server = createServer(); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const address = server.address(); + if (typeof address !== 'object' || address === null) { + throw new Error('Expected TCP server address while allocating a PGlite owner-process port.'); + } + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(); + }); + }); + return address.port; +} + +async function createHybridSearchFixture(owner: KloPGliteOwnerProcess): Promise { + await owner.query(` + CREATE TABLE prototype_documents ( + id TEXT PRIMARY KEY, + search_text TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + embedding vector(3) NOT NULL + ); + + CREATE INDEX prototype_documents_fts_idx + ON prototype_documents + USING GIN (to_tsvector('english', search_text)); + + CREATE INDEX prototype_documents_vector_idx + ON prototype_documents + USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 1); + + CREATE TABLE prototype_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + + CREATE INDEX prototype_dictionary_values_trgm_idx + ON prototype_dictionary_values + USING GIN (value gin_trgm_ops); + `); +} + +async function seedHybridSearchFixture(owner: KloPGliteOwnerProcess): Promise { + await owner.query( + ` + INSERT INTO prototype_documents (id, search_text, metadata, embedding) + VALUES + ($1, $2, $3::jsonb, $4::vector), + ($5, $6, $7::jsonb, $8::vector), + ($9, $10, $11::jsonb, $12::vector) + `, + [ + 'warehouse/orders', + 'orders paid revenue refund status customer', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'orders' }), + JSON.stringify([1, 0, 0]), + 'finance/orders', + 'orders finance bookings gross margin', + JSON.stringify({ connectionId: 'finance', sourceName: 'orders' }), + JSON.stringify([0.72, 0.28, 0]), + 'warehouse/customers', + 'customers accounts lifecycle region', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'customers' }), + JSON.stringify([0, 1, 0]), + ], + ); + + await owner.query(` + INSERT INTO prototype_dictionary_values (connection_id, source_name, column_name, value) + VALUES + ('warehouse', 'orders', 'status', 'refunded'), + ('warehouse', 'orders', 'status', 'paid'), + ('warehouse', 'customers', 'region', 'emea') + `); +} + +describe('KloPGliteOwnerProcess', () => { + let tempDir: string; + let dataDir: string; + let port: number; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-pglite-owner-process-')); + dataDir = join(tempDir, 'pgdata'); + port = await allocatePort(); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('declares the advanced PGlite search capabilities observed by the spike', () => { + assertSearchBackendCapabilities({ + backendName: 'pglite-owner-process', + capabilities: PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES, + expected: { + fts: true, + vector: true, + fuzzy: true, + jsonSearch: true, + arraySearch: false, + }, + }); + }); + + it('starts a socket owner process and serves PostgreSQL clients', async () => { + const owner = await KloPGliteOwnerProcess.start({ + dataDir, + host: '127.0.0.1', + port, + }); + + try { + await owner.query(` + CREATE TABLE owner_process_smoke ( + id TEXT PRIMARY KEY, + search_text TEXT NOT NULL, + embedding vector(3) NOT NULL + ); + + INSERT INTO owner_process_smoke (id, search_text, embedding) + VALUES + ('orders', 'orders paid revenue', '[1,0,0]'::vector), + ('customers', 'customers region lifecycle', '[0,1,0]'::vector); + `); + + const client = new Client(owner.connectionConfig()); + await client.connect(); + + try { + const result = await client.query<{ id: string }>(` + SELECT id + FROM owner_process_smoke + ORDER BY embedding <=> '[1,0,0]'::vector, id ASC + LIMIT 1 + `); + + expect(result.rows).toEqual([{ id: 'orders' }]); + } finally { + await client.end(); + } + } finally { + await owner.stop(); + } + }); + + it('runs lexical, semantic, and dictionary conformance probes through socket clients', async () => { + const owner = await KloPGliteOwnerProcess.start({ + dataDir, + host: '127.0.0.1', + port, + }); + + try { + await createHybridSearchFixture(owner); + await seedHybridSearchFixture(owner); + + const lexical = await owner.query<{ id: string; score: number }>( + ` + SELECT + id, + ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) AS score + FROM prototype_documents + WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1) + ORDER BY score DESC, id ASC + LIMIT 2 + `, + ['paid orders'], + ); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-owner-process', + surface: 'semantic-layer', + caseName: 'socket postgres fts lexical ranking', + results: lexical.rows.map((row) => ({ + id: row.id, + score: row.score, + matchReasons: ['lexical'], + })), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['lexical'], + }, + }); + + const semantic = await owner.query<{ id: string; similarity: number }>( + ` + SELECT + id, + 1 - (embedding <=> $1::vector) AS similarity + FROM prototype_documents + ORDER BY embedding <=> $1::vector, id ASC + LIMIT 2 + `, + [JSON.stringify([1, 0, 0])], + ); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-owner-process', + surface: 'semantic-layer', + caseName: 'socket pgvector semantic ranking', + results: semantic.rows.map((row) => ({ + id: row.id, + score: row.similarity, + matchReasons: ['semantic'], + })), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['semantic'], + }, + }); + + const dictionary = await owner.query<{ id: string; value: string; score: number }>( + ` + SELECT + connection_id || '/' || source_name AS id, + value, + similarity(value, $1) AS score + FROM prototype_dictionary_values + WHERE similarity(value, $1) > 0 + ORDER BY score DESC, id ASC, value ASC + LIMIT 2 + `, + ['refund'], + ); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-owner-process', + surface: 'semantic-layer', + caseName: 'socket pg_trgm dictionary ranking', + results: dictionary.rows.map((row) => ({ + id: row.id, + score: row.score, + matchReasons: ['dictionary'], + dictionaryMatches: [{ column: 'status', values: [row.value] }], + })), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['dictionary'], + }, + expectedDictionaryMatchesById: { + 'warehouse/orders': [{ column: 'status', values: ['refunded'] }], + }, + }); + } finally { + await owner.stop(); + } + }); + + it('persists indexed rows after stopping and restarting the owner process', async () => { + const firstOwner = await KloPGliteOwnerProcess.start({ + dataDir, + host: '127.0.0.1', + port, + }); + + try { + await createHybridSearchFixture(firstOwner); + await seedHybridSearchFixture(firstOwner); + } finally { + await firstOwner.stop(); + } + + const secondOwner = await KloPGliteOwnerProcess.start({ + dataDir, + host: '127.0.0.1', + port, + }); + + try { + const persisted = await secondOwner.query<{ count: number }>( + "SELECT COUNT(*)::int AS count FROM prototype_documents WHERE metadata->>'connectionId' = $1", + ['warehouse'], + ); + + expect(persisted.rows).toEqual([{ count: 2 }]); + } finally { + await secondOwner.stop(); + } + }); + + it('serves concurrent PostgreSQL clients through one owner process', async () => { + const owner = await KloPGliteOwnerProcess.start({ + dataDir, + host: '127.0.0.1', + port, + }); + + const clients: Client[] = []; + + try { + await createHybridSearchFixture(owner); + await seedHybridSearchFixture(owner); + + for (let index = 0; index < 4; index += 1) { + const client = new Client(owner.connectionConfig()); + await client.connect(); + clients.push(client); + } + + const results = await Promise.all( + clients.map((client) => + client.query<{ count: number }>('SELECT COUNT(*)::int AS count FROM prototype_documents'), + ), + ); + + expect(results.map((result) => result.rows[0]?.count)).toEqual([3, 3, 3, 3]); + } finally { + await Promise.all(clients.map((client) => client.end().catch(() => undefined))); + await owner.stop(); + } + }); +}); diff --git a/packages/context/src/search/pglite-owner-process.ts b/packages/context/src/search/pglite-owner-process.ts new file mode 100644 index 00000000..92abb220 --- /dev/null +++ b/packages/context/src/search/pglite-owner-process.ts @@ -0,0 +1,114 @@ +import { PGlite, type PGliteInterface } from '@electric-sql/pglite'; +import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm'; +import { vector } from '@electric-sql/pglite/vector'; +import { PGLiteSocketServer } from '@electric-sql/pglite-socket'; +import { Client, type ClientConfig, type QueryResult, type QueryResultRow } from 'pg'; +import type { SearchBackendCapabilities } from './types.js'; + +export const PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES = { + fts: true, + vector: true, + fuzzy: true, + jsonSearch: true, + arraySearch: false, +} satisfies SearchBackendCapabilities; + +export interface KloPGliteOwnerProcessOptions { + dataDir: string; + host: string; + port: number; + inspect?: boolean; + maxConnections?: number; +} + +export class KloPGliteOwnerProcess { + readonly dataDir: string; + readonly host: string; + readonly port: number; + + #db: PGliteInterface; + #server: PGLiteSocketServer; + #stopped = false; + + private constructor(options: KloPGliteOwnerProcessOptions, db: PGliteInterface, server: PGLiteSocketServer) { + this.dataDir = options.dataDir; + this.host = options.host; + this.port = options.port; + this.#db = db; + this.#server = server; + } + + static async start(options: KloPGliteOwnerProcessOptions): Promise { + const db = await PGlite.create({ + dataDir: options.dataDir, + extensions: { + vector, + pg_trgm, + }, + }); + + let server: PGLiteSocketServer | undefined; + + try { + await db.exec(` + CREATE EXTENSION IF NOT EXISTS vector; + CREATE EXTENSION IF NOT EXISTS pg_trgm; + `); + + server = new PGLiteSocketServer({ + db, + host: options.host, + port: options.port, + inspect: options.inspect ?? false, + maxConnections: options.maxConnections ?? 100, + }); + + await server.start(); + + return new KloPGliteOwnerProcess(options, db, server); + } catch (error) { + await server?.stop().catch(() => undefined); + await db.close().catch(() => undefined); + throw error; + } + } + + connectionConfig(): ClientConfig { + return { + host: this.host, + port: this.port, + user: 'postgres', + database: 'postgres', + application_name: 'klo-pglite-owner-prototype', + connectionTimeoutMillis: 5_000, + }; + } + + async connect(): Promise { + const client = new Client(this.connectionConfig()); + await client.connect(); + return client; + } + + async query( + text: string, + values?: readonly unknown[], + ): Promise> { + const client = await this.connect(); + try { + return await client.query(text, values ? [...values] : undefined); + } finally { + await client.end(); + } + } + + async stop(): Promise { + if (this.#stopped) { + return; + } + + this.#stopped = true; + await this.#server.stop(); + await this.#db.close(); + } +} diff --git a/packages/context/src/search/pglite-runtime-boundary.test.ts b/packages/context/src/search/pglite-runtime-boundary.test.ts new file mode 100644 index 00000000..5eae9f47 --- /dev/null +++ b/packages/context/src/search/pglite-runtime-boundary.test.ts @@ -0,0 +1,66 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { describe, expect, it } from 'vitest'; + +const kloRoot = fileURLToPath(new URL('../../../../', import.meta.url)); + +function readKloFile(relativePath: string): string { + return readFileSync(join(kloRoot, relativePath), 'utf8'); +} + +function readContextPackageJson(): { + dependencies?: Record; + devDependencies?: Record; + exports?: Record; + files?: string[]; +} { + return JSON.parse(readKloFile('packages/context/package.json')); +} + +describe('PGlite hybrid search runtime boundary', () => { + it('keeps PGlite packages as dev-only prototype dependencies', () => { + const pkg = readContextPackageJson(); + + expect(pkg.dependencies?.['@electric-sql/pglite']).toBeUndefined(); + expect(pkg.dependencies?.['@electric-sql/pglite-socket']).toBeUndefined(); + expect(pkg.devDependencies?.['@electric-sql/pglite']).toBeDefined(); + expect(pkg.devDependencies?.['@electric-sql/pglite-socket']).toBeDefined(); + expect(pkg.files).toEqual(['dist', 'prompts', 'skills']); + }); + + it('keeps PGlite prototypes out of public exports and production routing', () => { + const pkg = readContextPackageJson(); + const packageExportKeys = Object.keys(pkg.exports ?? {}); + + expect(packageExportKeys.filter((key) => key.toLowerCase().includes('pglite'))).toEqual([]); + + const publicExportFiles = [ + 'packages/context/src/index.ts', + 'packages/context/src/search/index.ts', + 'packages/context/src/sl/index.ts', + ]; + + for (const relativePath of publicExportFiles) { + expect(readKloFile(relativePath), relativePath).not.toMatch(/pglite/i); + } + + const productionRoutingFiles = [ + 'packages/cli/src/agent.ts', + 'packages/context/src/mcp/local-project-ports.ts', + 'packages/context/src/wiki/local-knowledge.ts', + 'packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.ts', + ]; + + for (const relativePath of productionRoutingFiles) { + expect(readKloFile(relativePath), relativePath).not.toMatch( + /pglite-owner-prototype|pglite-sl-search-prototype|@electric-sql\/pglite/i, + ); + } + + const localSlSource = readKloFile('packages/context/src/sl/local-sl.ts'); + expect(localSlSource).toContain("input.backend === 'pglite-owner-prototype'"); + expect(localSlSource).toContain('PGlite semantic-layer search prototype requires pglite owner-process options.'); + expect(localSlSource).toContain("await import('./pglite-sl-search-prototype.js')"); + }); +}); diff --git a/packages/context/src/search/pglite-spike.test.ts b/packages/context/src/search/pglite-spike.test.ts new file mode 100644 index 00000000..c3feb709 --- /dev/null +++ b/packages/context/src/search/pglite-spike.test.ts @@ -0,0 +1,302 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { PGlite, type PGliteInterface } from '@electric-sql/pglite'; +import { pg_trgm } from '@electric-sql/pglite/contrib/pg_trgm'; +import { vector } from '@electric-sql/pglite/vector'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + assertSearchBackendCapabilities, + assertSearchBackendConformanceCase, + type SearchBackendCapabilities, +} from './index.js'; + +type PGliteDb = PGliteInterface; + +const PGLITE_SPIKE_CAPABILITIES = { + fts: true, + vector: true, + fuzzy: true, + jsonSearch: true, + arraySearch: false, +} satisfies SearchBackendCapabilities; + +async function createSpikeDb(dataDir: string): Promise { + const db = await PGlite.create({ + dataDir, + extensions: { + vector, + pg_trgm, + }, + }); + + await db.exec(` + CREATE EXTENSION IF NOT EXISTS vector; + CREATE EXTENSION IF NOT EXISTS pg_trgm; + `); + + return db; +} + +async function createSchema(db: PGliteDb): Promise { + await db.exec(` + CREATE TABLE IF NOT EXISTS spike_documents ( + id TEXT PRIMARY KEY, + search_text TEXT NOT NULL, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + embedding vector(3) NOT NULL + ); + + CREATE INDEX IF NOT EXISTS spike_documents_fts_idx + ON spike_documents + USING GIN (to_tsvector('english', search_text)); + + CREATE INDEX IF NOT EXISTS spike_documents_vector_idx + ON spike_documents + USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 1); + + CREATE TABLE IF NOT EXISTS spike_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + + CREATE INDEX IF NOT EXISTS spike_dictionary_values_trgm_idx + ON spike_dictionary_values + USING GIN (value gin_trgm_ops); + `); +} + +async function seedSearchFixture(db: PGliteDb): Promise { + await db.query( + ` + INSERT INTO spike_documents (id, search_text, metadata, embedding) + VALUES + ($1, $2, $3::jsonb, $4::vector), + ($5, $6, $7::jsonb, $8::vector), + ($9, $10, $11::jsonb, $12::vector) + ON CONFLICT (id) DO UPDATE + SET search_text = EXCLUDED.search_text, + metadata = EXCLUDED.metadata, + embedding = EXCLUDED.embedding + `, + [ + 'warehouse/orders', + 'orders paid revenue refund status customer', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'orders' }), + JSON.stringify([1, 0, 0]), + 'finance/orders', + 'orders finance bookings gross margin', + JSON.stringify({ connectionId: 'finance', sourceName: 'orders' }), + JSON.stringify([0.72, 0.28, 0]), + 'warehouse/customers', + 'customers accounts lifecycle region', + JSON.stringify({ connectionId: 'warehouse', sourceName: 'customers' }), + JSON.stringify([0, 1, 0]), + ], + ); + + await db.query( + ` + INSERT INTO spike_dictionary_values (connection_id, source_name, column_name, value) + VALUES + ('warehouse', 'orders', 'status', 'refunded'), + ('warehouse', 'orders', 'status', 'paid'), + ('warehouse', 'customers', 'region', 'emea') + ON CONFLICT DO NOTHING + `, + ); +} + +async function closeDb(db: PGliteDb): Promise { + await db.close(); +} + +describe('PGlite hybrid search spike', () => { + let tempDir: string; + let dataDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-pglite-search-spike-')); + dataDir = join(tempDir, 'pgdata'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('documents PGlite search backend capabilities', () => { + assertSearchBackendCapabilities({ + backendName: 'pglite-spike', + capabilities: PGLITE_SPIKE_CAPABILITIES, + expected: { + fts: true, + vector: true, + fuzzy: true, + jsonSearch: true, + arraySearch: false, + }, + }); + }); + + it('supports FTS, pgvector ordering, and pg_trgm dictionary lookup', async () => { + const db = await createSpikeDb(dataDir); + + try { + await createSchema(db); + await seedSearchFixture(db); + + const lexical = await db.query<{ id: string; score: number }>( + ` + SELECT + id, + ts_rank_cd(to_tsvector('english', search_text), websearch_to_tsquery('english', $1)) AS score + FROM spike_documents + WHERE to_tsvector('english', search_text) @@ websearch_to_tsquery('english', $1) + ORDER BY score DESC, id ASC + LIMIT 2 + `, + ['paid orders'], + ); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-spike', + surface: 'semantic-layer', + caseName: 'postgres fts lexical ranking', + results: lexical.rows.map((row) => ({ + id: row.id, + score: row.score, + matchReasons: ['lexical'], + })), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['lexical'], + }, + }); + + const semantic = await db.query<{ id: string; similarity: number }>( + ` + SELECT + id, + 1 - (embedding <=> $1::vector) AS similarity + FROM spike_documents + ORDER BY embedding <=> $1::vector, id ASC + LIMIT 2 + `, + [JSON.stringify([1, 0, 0])], + ); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-spike', + surface: 'semantic-layer', + caseName: 'pgvector cosine ranking', + results: semantic.rows.map((row) => ({ + id: row.id, + score: row.similarity, + matchReasons: ['semantic'], + })), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['semantic'], + }, + }); + + const dictionary = await db.query<{ id: string; value: string; score: number }>( + ` + SELECT + connection_id || '/' || source_name AS id, + value, + similarity(value, $1) AS score + FROM spike_dictionary_values + WHERE similarity(value, $1) > 0 + ORDER BY score DESC, id ASC, value ASC + LIMIT 2 + `, + ['refund'], + ); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-spike', + surface: 'semantic-layer', + caseName: 'pg_trgm dictionary ranking', + results: dictionary.rows.map((row) => ({ + id: row.id, + score: row.score, + matchReasons: ['dictionary'], + dictionaryMatches: [{ column: 'status', values: [row.value] }], + })), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['dictionary'], + }, + expectedDictionaryMatchesById: { + 'warehouse/orders': [{ column: 'status', values: ['refunded'] }], + }, + }); + } finally { + await closeDb(db); + } + }); + + it('persists indexed rows after reopening the filesystem database', async () => { + const first = await createSpikeDb(dataDir); + + try { + await createSchema(first); + await seedSearchFixture(first); + } finally { + await closeDb(first); + } + + const second = await createSpikeDb(dataDir); + + try { + const persisted = await second.query<{ count: number }>( + "SELECT COUNT(*)::int AS count FROM spike_documents WHERE metadata->>'connectionId' = $1", + ['warehouse'], + ); + + expect(persisted.rows[0]).toEqual({ count: 2 }); + } finally { + await closeDb(second); + } + }); + + it('records direct concurrency behavior without assuming Postgres server parity', async () => { + const db = await createSpikeDb(dataDir); + + try { + await createSchema(db); + await seedSearchFixture(db); + + const reads = await Promise.all( + Array.from({ length: 4 }, () => + db.query<{ count: number }>('SELECT COUNT(*)::int AS count FROM spike_documents'), + ), + ); + + expect(reads.map((result) => result.rows[0]?.count)).toEqual([3, 3, 3, 3]); + + let secondOpenStatus: 'opened' | 'blocked' = 'opened'; + let second: PGliteDb | undefined; + + try { + second = await createSpikeDb(dataDir); + await second.query('SELECT 1'); + } catch { + secondOpenStatus = 'blocked'; + } finally { + if (second) { + await closeDb(second); + } + } + + expect(['opened', 'blocked']).toContain(secondOpenStatus); + } finally { + await closeDb(db); + } + }); +}); diff --git a/packages/context/src/search/query.test.ts b/packages/context/src/search/query.test.ts new file mode 100644 index 00000000..64f1fd0b --- /dev/null +++ b/packages/context/src/search/query.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, it } from 'vitest'; +import { defaultLaneCandidatePoolLimit, normalizeSearchQuery } from './query.js'; + +describe('search query helpers', () => { + it('normalizes punctuation and duplicate terms into stable lowercase tokens', () => { + expect(normalizeSearchQuery(' Gross-Revenue, gross_revenue! Paid orders ')).toEqual({ + raw: ' Gross-Revenue, gross_revenue! Paid orders ', + normalized: 'gross revenue gross_revenue paid orders', + terms: ['gross', 'revenue', 'gross_revenue', 'paid', 'orders'], + }); + }); + + it('returns an empty normalized query for punctuation-only input', () => { + expect(normalizeSearchQuery('--- ///')).toEqual({ + raw: '--- ///', + normalized: '', + terms: [], + }); + }); + + it('sizes per-lane candidate pools before final limiting', () => { + expect(defaultLaneCandidatePoolLimit(1)).toBe(25); + expect(defaultLaneCandidatePoolLimit(8)).toBe(25); + expect(defaultLaneCandidatePoolLimit(10)).toBe(30); + }); +}); diff --git a/packages/context/src/search/query.ts b/packages/context/src/search/query.ts new file mode 100644 index 00000000..57f0dbb1 --- /dev/null +++ b/packages/context/src/search/query.ts @@ -0,0 +1,19 @@ +import type { NormalizedSearchQuery } from './types.js'; + +export function normalizeSearchQuery(queryText: string): NormalizedSearchQuery { + const terms = queryText + .toLowerCase() + .split(/[^a-z0-9_]+/u) + .map((term) => term.trim()) + .filter(Boolean); + + return { + raw: queryText, + normalized: terms.join(' '), + terms, + }; +} + +export function defaultLaneCandidatePoolLimit(finalLimit: number): number { + return Math.max(25, Math.max(1, finalLimit) * 3); +} diff --git a/packages/context/src/search/rrf.test.ts b/packages/context/src/search/rrf.test.ts new file mode 100644 index 00000000..cbb4065b --- /dev/null +++ b/packages/context/src/search/rrf.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'vitest'; +import { compareFusedSearchCandidates, DEFAULT_SEARCH_LANE_WEIGHTS, rrfContribution } from './rrf.js'; +import type { FusedSearchCandidate } from './types.js'; + +describe('RRF scoring', () => { + it('uses the shared lane weights from the hybrid search spec', () => { + expect(DEFAULT_SEARCH_LANE_WEIGHTS).toEqual({ + semantic: 2, + dictionary: 2, + lexical: 1.5, + token: 0.75, + }); + }); + + it('calculates a weighted RRF contribution with k=60 by default', () => { + expect(rrfContribution(2, 1)).toBeCloseTo(2 / 61, 12); + expect(rrfContribution(1.5, 2)).toBeCloseTo(1.5 / 62, 12); + }); + + it('sorts fused candidates by score, lane count, and stable id', () => { + const first: FusedSearchCandidate = { + id: 'orders', + score: 0.05, + matchReasons: ['lexical'], + ranksByLane: { lexical: 1 }, + rawScoresByLane: {}, + evidenceByLane: {}, + }; + const second: FusedSearchCandidate = { + id: 'customers', + score: 0.05, + matchReasons: ['lexical', 'semantic'], + ranksByLane: { lexical: 2, semantic: 1 }, + rawScoresByLane: {}, + evidenceByLane: {}, + }; + const third: FusedSearchCandidate = { + id: 'accounts', + score: 0.04, + matchReasons: ['semantic'], + ranksByLane: { semantic: 1 }, + rawScoresByLane: {}, + evidenceByLane: {}, + }; + + expect([first, second, third].sort(compareFusedSearchCandidates).map((candidate) => candidate.id)).toEqual([ + 'customers', + 'orders', + 'accounts', + ]); + }); +}); diff --git a/packages/context/src/search/rrf.ts b/packages/context/src/search/rrf.ts new file mode 100644 index 00000000..c3ef7910 --- /dev/null +++ b/packages/context/src/search/rrf.ts @@ -0,0 +1,18 @@ +import type { FusedSearchCandidate, SearchLaneName } from './types.js'; + +export const DEFAULT_RRF_K = 60; + +export const DEFAULT_SEARCH_LANE_WEIGHTS: Record = { + semantic: 2, + dictionary: 2, + lexical: 1.5, + token: 0.75, +}; + +export function rrfContribution(weight: number, rank: number, rrfK = DEFAULT_RRF_K): number { + return weight / (rrfK + rank); +} + +export function compareFusedSearchCandidates(left: FusedSearchCandidate, right: FusedSearchCandidate): number { + return right.score - left.score || right.matchReasons.length - left.matchReasons.length || left.id.localeCompare(right.id); +} diff --git a/packages/context/src/search/types.ts b/packages/context/src/search/types.ts new file mode 100644 index 00000000..658961f0 --- /dev/null +++ b/packages/context/src/search/types.ts @@ -0,0 +1,85 @@ +export type SearchLaneName = 'lexical' | 'semantic' | 'dictionary' | 'token' | string; + +export type SearchLaneStatus = 'available' | 'skipped' | 'failed'; + +export interface NormalizedSearchQuery { + raw: string; + normalized: string; + terms: string[]; +} + +export interface SearchCandidate { + id: string; + rank: number; + rawScore?: number; + matchReason?: string; + evidence?: unknown; +} + +export interface SearchCandidateGeneratorArgs { + queryText: string; + normalizedQuery: NormalizedSearchQuery; + finalLimit: number; + laneCandidatePoolLimit: number; +} + +export interface SearchLaneResult { + status?: SearchLaneStatus; + candidates: SearchCandidate[]; + effectiveCandidatePoolLimit?: number; + reason?: string; +} + +export interface SearchCandidateGenerator { + lane: SearchLaneName; + weight?: number; + generate(args: SearchCandidateGeneratorArgs): Promise; +} + +export interface HybridSearchOptions { + queryText: string; + limit: number; + candidatePoolLimit?: number; + rrfK?: number; + laneWeights?: Partial>; + generators: SearchCandidateGenerator[]; +} + +export interface SearchLaneBreakdown { + lane: SearchLaneName; + status: SearchLaneStatus; + requestedCandidatePoolLimit: number; + effectiveCandidatePoolLimit: number; + returnedCandidateCount: number; + weight: number; + reason?: string; +} + +export interface FusedSearchCandidate { + id: string; + score: number; + matchReasons: SearchLaneName[]; + ranksByLane: Record; + rawScoresByLane: Record; + evidenceByLane: Record; +} + +export interface SearchResultHydrator { + hydrate(candidates: FusedSearchCandidate[]): Promise; +} + +export interface HybridSearchResult { + query: NormalizedSearchQuery; + requestedLimit: number; + requestedCandidatePoolLimit: number; + results: FusedSearchCandidate[]; + lanes: SearchLaneBreakdown[]; +} + +export interface SearchBackendCapabilities { + fts: boolean; + vector: boolean; + fuzzy: boolean; + jsonSearch: boolean; + arraySearch: boolean; +} diff --git a/packages/context/src/skills/index.ts b/packages/context/src/skills/index.ts new file mode 100644 index 00000000..13f06853 --- /dev/null +++ b/packages/context/src/skills/index.ts @@ -0,0 +1,2 @@ +export type { FrontmatterFields, SkillCaller, SkillMetadata, SkillsRegistryServiceOptions } from './skills-registry.service.js'; +export { SkillsRegistryService } from './skills-registry.service.js'; diff --git a/packages/context/src/skills/skills-registry.service.test.ts b/packages/context/src/skills/skills-registry.service.test.ts new file mode 100644 index 00000000..82c7c8ab --- /dev/null +++ b/packages/context/src/skills/skills-registry.service.test.ts @@ -0,0 +1,212 @@ +import { mkdtemp, mkdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { SkillsRegistryService } from './skills-registry.service.js'; + +describe('SkillsRegistryService', () => { + let service: SkillsRegistryService; + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'skills-registry-')); + service = new SkillsRegistryService({ skillsDir: tempDir }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + const writeSkill = async (dirName: string, body: string) => { + const dir = join(tempDir, dirName); + await mkdir(dir, { recursive: true }); + await writeFile(join(dir, 'SKILL.md'), body, 'utf-8'); + }; + + describe('parseFrontmatter', () => { + it('parses name and description', () => { + const frontmatter = service.parseFrontmatter('---\nname: foo\ndescription: Bar baz\n---\n\n# body'); + expect(frontmatter).toEqual({ name: 'foo', description: 'Bar baz' }); + }); + + it('supports wrapped description continuation lines', () => { + const frontmatter = service.parseFrontmatter( + ['---', 'name: sl', 'description: Line one', ' continuation of the description.', '---', '', '# body'].join( + '\n', + ), + ); + expect(frontmatter.name).toBe('sl'); + expect(frontmatter.description).toContain('Line one'); + expect(frontmatter.description).toContain('continuation'); + }); + + it('returns empty fields when no frontmatter block', () => { + expect(service.parseFrontmatter('# just a heading')).toEqual({}); + }); + }); + + describe('stripFrontmatter', () => { + it('removes the frontmatter block and leading blank line', () => { + const body = '---\nname: x\ndescription: y\n---\n\n# Hello\n\nparagraph'; + expect(service.stripFrontmatter(body)).toBe('# Hello\n\nparagraph'); + }); + + it('is a no-op when no frontmatter exists', () => { + expect(service.stripFrontmatter('# hello')).toBe('# hello'); + }); + }); + + describe('discoverSkills', () => { + it('returns an empty map when the directory does not exist', async () => { + const catalog = await service.discoverSkills(join(tempDir, 'missing')); + expect(catalog.size).toBe(0); + }); + + it('discovers valid skills and skips invalid ones', async () => { + await writeSkill('sl', '---\nname: sl\ndescription: Semantic layer.\n---\n\n# SL'); + await writeSkill('knowledge_capture', '---\nname: knowledge_capture\ndescription: Wiki capture.\n---\n\n# KC'); + await writeSkill('broken', '# no frontmatter at all'); + await mkdir(join(tempDir, 'not_a_skill'), { recursive: true }); + + const catalog = await service.discoverSkills(tempDir); + expect(catalog.size).toBe(2); + expect(catalog.get('sl')?.name).toBe('sl'); + expect(catalog.get('knowledge_capture')?.description).toContain('Wiki capture'); + expect(catalog.has('broken')).toBe(false); + }); + }); + + describe('buildSkillsPrompt', () => { + it('formats bullet list with name and description', () => { + const output = service.buildSkillsPrompt([ + { name: 'sl', description: 'Semantic layer.', path: '/tmp/sl' }, + { name: 'knowledge_capture', description: 'Wiki capture.', path: '/tmp/kc' }, + ]); + expect(output).toContain('- sl: Semantic layer.'); + expect(output).toContain('- knowledge_capture: Wiki capture.'); + expect(output).toContain('Use the `load_skill` tool'); + }); + + it('returns empty string when no skills are available', () => { + expect(service.buildSkillsPrompt([])).toBe(''); + }); + + it('appends the async capture note for the research caller', () => { + const output = service.buildSkillsPrompt( + [{ name: 'sl', description: 'Semantic layer.', path: '/tmp/sl' }], + 'research', + ); + expect(output).toContain('captured automatically by a post-turn memory agent'); + expect(output).toContain('Focus on answering, not on saving'); + }); + + it('does not append the note for memory_agent caller', () => { + const output = service.buildSkillsPrompt( + [{ name: 'sl_capture', description: 'Capture skill.', path: '/tmp/cap' }], + 'memory_agent', + ); + expect(output).not.toContain('captured automatically by a post-turn memory agent'); + }); + }); + + describe('parseFrontmatter callers field', () => { + it('parses inline-array form', () => { + const frontmatter = service.parseFrontmatter('---\nname: x\ndescription: y\ncallers: [memory_agent]\n---\n'); + expect(frontmatter.callers).toEqual(['memory_agent']); + }); + + it('parses comma-separated form', () => { + const frontmatter = service.parseFrontmatter('---\nname: x\ndescription: y\ncallers: research, memory_agent\n---\n'); + expect(frontmatter.callers).toEqual(['research', 'memory_agent']); + }); + + it('returns undefined when callers is absent', () => { + const frontmatter = service.parseFrontmatter('---\nname: x\ndescription: y\n---\n'); + expect(frontmatter.callers).toBeUndefined(); + }); + + it('drops unknown caller names with a warning', () => { + const frontmatter = service.parseFrontmatter('---\nname: x\ndescription: y\ncallers: [bogus, memory_agent]\n---\n'); + expect(frontmatter.callers).toEqual(['memory_agent']); + }); + + it('returns undefined when the value is empty', () => { + const frontmatter = service.parseFrontmatter('---\nname: x\ndescription: y\ncallers:\n---\n'); + expect(frontmatter.callers).toBeUndefined(); + }); + }); + + describe('listSkills and getSkill caller filter', () => { + beforeEach(async () => { + await writeSkill('sl', '---\nname: sl\ndescription: Open to all.\n---\n\n# SL'); + await writeSkill( + 'sl_capture', + '---\nname: sl_capture\ndescription: Memory-only capture skill.\ncallers: [memory_agent]\n---\n\n# Capture', + ); + await writeSkill( + 'knowledge_capture', + '---\nname: knowledge_capture\ndescription: Wiki capture.\ncallers: [memory_agent]\n---\n\n# KC', + ); + service = new SkillsRegistryService({ skillsDir: tempDir }); + }); + + it('research caller sees only open skills', async () => { + const skills = await service.listSkills('research'); + expect(skills.map((skill) => skill.name).sort()).toEqual(['sl']); + }); + + it('memory_agent caller sees memory-only and open skills', async () => { + const skills = await service.listSkills('memory_agent'); + expect(skills.map((skill) => skill.name).sort()).toEqual(['knowledge_capture', 'sl', 'sl_capture']); + }); + + it('listSkills with names and caller intersects both filters', async () => { + const skills = await service.listSkills(['sl', 'sl_capture'], 'research'); + expect(skills.map((skill) => skill.name)).toEqual(['sl']); + }); + + it('getSkill returns null for memory-only skill when caller is research', async () => { + const skill = await service.getSkill('sl_capture', 'research'); + expect(skill).toBeNull(); + }); + + it('getSkill returns the skill when caller has access', async () => { + const skill = await service.getSkill('sl_capture', 'memory_agent'); + expect(skill?.name).toBe('sl_capture'); + }); + + it('getSkill without caller returns the skill regardless of callers field', async () => { + const skill = await service.getSkill('sl_capture'); + expect(skill?.name).toBe('sl_capture'); + }); + + }); + + it('discovers skills from additional directories when the primary directory misses', async () => { + const extraDir = await mkdtemp(join(tmpdir(), 'skills-registry-extra-')); + try { + await mkdir(join(extraDir, 'knowledge_capture'), { recursive: true }); + await writeFile( + join(extraDir, 'knowledge_capture', 'SKILL.md'), + [ + '---', + 'name: knowledge_capture', + 'description: Packaged knowledge capture skill.', + 'callers: [memory_agent]', + '---', + '', + '# Knowledge Capture', + ].join('\n'), + 'utf-8', + ); + service = new SkillsRegistryService({ skillsDir: tempDir, additionalSkillDirs: [extraDir] }); + + const skills = await service.listSkills(['knowledge_capture'], 'memory_agent'); + + expect(skills.map((skill) => skill.name)).toEqual(['knowledge_capture']); + expect(skills[0]?.path).toBe(join(extraDir, 'knowledge_capture')); + } finally { + await rm(extraDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/context/src/skills/skills-registry.service.ts b/packages/context/src/skills/skills-registry.service.ts new file mode 100644 index 00000000..b4ff1a26 --- /dev/null +++ b/packages/context/src/skills/skills-registry.service.ts @@ -0,0 +1,255 @@ +import { readFile, readdir, stat } from 'node:fs/promises'; +import { join } from 'node:path'; +import { noopLogger, type KloLogger } from '../core/index.js'; + +export type SkillCaller = 'research' | 'memory_agent'; + +export interface SkillMetadata { + name: string; + description: string; + path: string; + callers?: SkillCaller[]; +} + +export interface FrontmatterFields { + name?: string; + description?: string; + callers?: SkillCaller[]; +} + +export interface SkillsRegistryServiceOptions { + skillsDir: string; + additionalSkillDirs?: string[]; + logger?: KloLogger; +} + +const SKILL_FILENAME = 'SKILL.md'; +const VALID_CALLERS: ReadonlySet = new Set(['research', 'memory_agent']); + +export class SkillsRegistryService { + private readonly logger: KloLogger; + private readonly skillsDir: string; + private readonly additionalSkillDirs: string[]; + private catalogPromise: Promise> | null = null; + + constructor(options: SkillsRegistryServiceOptions) { + this.logger = options.logger ?? noopLogger; + this.skillsDir = options.skillsDir; + this.additionalSkillDirs = options.additionalSkillDirs ?? []; + } + + private async loadCatalog(): Promise> { + if (!this.catalogPromise) { + this.catalogPromise = this.discoverAllSkills(); + } + return this.catalogPromise; + } + + async discoverSkills(rootDir: string): Promise> { + const catalog = new Map(); + + let entries: string[]; + try { + entries = await readdir(rootDir); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.logger.warn(`Skills directory not found or unreadable at ${rootDir}: ${message}`); + return catalog; + } + + for (const entry of entries.sort()) { + const dir = join(rootDir, entry); + const skillFile = join(dir, SKILL_FILENAME); + let isDir = false; + try { + isDir = (await stat(dir)).isDirectory(); + } catch { + continue; + } + if (!isDir) { + continue; + } + + let content: string; + try { + content = await readFile(skillFile, 'utf-8'); + } catch { + this.logger.warn(`Skipping skill directory '${entry}': missing ${SKILL_FILENAME}`); + continue; + } + + const frontmatter = this.parseFrontmatter(content); + if (!frontmatter.name || !frontmatter.description) { + this.logger.warn(`Skipping skill '${entry}': frontmatter missing name or description`); + continue; + } + + const key = frontmatter.name.toLowerCase(); + if (catalog.has(key)) { + this.logger.warn(`Duplicate skill name '${frontmatter.name}' in '${entry}'; first found wins`); + continue; + } + catalog.set(key, { + name: frontmatter.name, + description: frontmatter.description, + path: dir, + callers: frontmatter.callers, + }); + } + + this.logger.log(`Discovered ${catalog.size} skill(s): ${[...catalog.values()].map((skill) => skill.name).join(', ')}`); + return catalog; + } + + private async discoverAllSkills(): Promise> { + const catalog = new Map(); + for (const rootDir of [this.skillsDir, ...this.additionalSkillDirs]) { + const discovered = await this.discoverSkills(rootDir); + for (const [key, skill] of discovered) { + if (!catalog.has(key)) { + catalog.set(key, skill); + } + } + } + return catalog; + } + + parseFrontmatter(content: string): FrontmatterFields { + if (!content.startsWith('---')) { + return {}; + } + const end = content.indexOf('\n---', 3); + if (end === -1) { + return {}; + } + + const block = content.slice(3, end).trim(); + const fields: FrontmatterFields = {}; + let index = 0; + const lines = block.split(/\r?\n/); + while (index < lines.length) { + const line = lines[index]; + const match = /^([A-Za-z_][\w-]*):\s*(.*)$/.exec(line); + if (!match) { + index += 1; + continue; + } + + const [, key, rest] = match; + let value = rest.trim(); + const continuation: string[] = []; + let nextIndex = index + 1; + while (nextIndex < lines.length) { + const next = lines[nextIndex]; + if (!next.trim() || /^[A-Za-z_][\w-]*:/.test(next) || !/^\s/.test(next)) { + break; + } + continuation.push(next.trim()); + nextIndex += 1; + } + if (continuation.length > 0) { + value = [value, ...continuation].filter(Boolean).join(' '); + } + if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { + value = value.slice(1, -1); + } + + if (key === 'name' || key === 'description') { + fields[key] = value; + } else if (key === 'callers') { + fields.callers = this.parseCallersValue(value); + } + index = nextIndex; + } + return fields; + } + + stripFrontmatter(content: string): string { + if (!content.startsWith('---')) { + return content; + } + const end = content.indexOf('\n---', 3); + if (end === -1) { + return content; + } + return content.slice(end + 4).replace(/^(?:\r?\n)+/, ''); + } + + async listSkills(namesOrCaller?: string[] | SkillCaller, caller?: SkillCaller): Promise { + let names: string[] | undefined; + let resolvedCaller: SkillCaller | undefined; + if (Array.isArray(namesOrCaller)) { + names = namesOrCaller; + resolvedCaller = caller; + } else if (typeof namesOrCaller === 'string') { + resolvedCaller = namesOrCaller; + } + + const catalog = await this.loadCatalog(); + let skills = [...catalog.values()].sort((left, right) => left.name.localeCompare(right.name)); + if (resolvedCaller) { + skills = skills.filter((skill) => this.isAllowedFor(skill, resolvedCaller)); + } + if (!names || names.length === 0) { + return skills; + } + const requested = new Set(names.map((name) => name.toLowerCase())); + return skills.filter((skill) => requested.has(skill.name.toLowerCase())); + } + + async getSkill(name: string, caller?: SkillCaller): Promise { + const catalog = await this.loadCatalog(); + const skill = catalog.get(name.toLowerCase()) ?? null; + if (!skill) { + return null; + } + if (caller && !this.isAllowedFor(skill, caller)) { + return null; + } + return skill; + } + + isAllowedFor(skill: SkillMetadata, caller: SkillCaller): boolean { + if (!skill.callers || skill.callers.length === 0) { + return true; + } + return skill.callers.includes(caller); + } + + buildSkillsPrompt(skills: SkillMetadata[], caller?: SkillCaller): string { + if (skills.length === 0) { + return ''; + } + const list = skills.map((skill) => `- ${skill.name}: ${skill.description}`).join('\n'); + const captureNote = + caller === 'research' + ? '\n\nKnowledge pages and semantic-layer sources are captured automatically by a post-turn memory agent. Focus on answering, not on saving. Use `knowledge_read`/`knowledge_search` and `sl_read_source` to consult what already exists; the memory agent will write any new conventions or measures the turn surfaces.' + : ''; + return `\n## Skills\n\nUse the \`load_skill\` tool to load a skill when the task benefits from specialized instructions.${captureNote}\n\nAvailable skills:\n${list}\n`; + } + + private parseCallersValue(raw: string): SkillCaller[] | undefined { + const trimmed = raw.trim(); + if (!trimmed) { + return undefined; + } + const inner = trimmed.startsWith('[') && trimmed.endsWith(']') ? trimmed.slice(1, -1) : trimmed; + const parts = inner + .split(',') + .map((part) => part.trim().replace(/^['"]|['"]$/g, '')) + .filter(Boolean); + if (parts.length === 0) { + return undefined; + } + + const valid: SkillCaller[] = []; + for (const part of parts) { + if (VALID_CALLERS.has(part as SkillCaller)) { + valid.push(part as SkillCaller); + } else { + this.logger.warn(`Unknown caller '${part}' in skill frontmatter; ignoring`); + } + } + return valid.length > 0 ? valid : undefined; + } +} diff --git a/packages/context/src/sl/descriptions.ts b/packages/context/src/sl/descriptions.ts new file mode 100644 index 00000000..f3587ffb --- /dev/null +++ b/packages/context/src/sl/descriptions.ts @@ -0,0 +1,34 @@ +const DESCRIPTION_SOURCES = ['user', 'ai', 'dbt', 'db'] as const; +type DescriptionSource = (typeof DESCRIPTION_SOURCES)[number]; + +type DescriptionSources = Record; + +interface DescriptionResolutionConfig { + priority: string[]; +} + +export const DEFAULT_PRIORITY: DescriptionSource[] = [...DESCRIPTION_SOURCES]; + +/** + * Resolves which description to surface based on a priority list. + * Returns the first non-empty description matching a priority key, + * falling back to the first available value for unknown sources. + */ +export function resolveDescription( + descriptions: DescriptionSources | undefined, + config: DescriptionResolutionConfig, +): string | null { + if (!descriptions || Object.keys(descriptions).length === 0) { + return null; + } + + for (const source of config.priority) { + const text = descriptions[source]; + if (text) { + return text; + } + } + + // Fallback: first available value (for unknown future sources) + return Object.values(descriptions).find(Boolean) ?? null; +} diff --git a/packages/context/src/sl/index.ts b/packages/context/src/sl/index.ts new file mode 100644 index 00000000..7c34d7e6 --- /dev/null +++ b/packages/context/src/sl/index.ts @@ -0,0 +1,32 @@ +export type { SlValidationResult, SlValidatorPort } from './sl-validator.port.js'; +export type { + SemanticLayerQueryExecutionResult, + SemanticLayerQueryInput, + SemanticLayerSource, + SlDictionaryMatch, + SlSearchLaneSummary, + SlSearchMatchReason, + SlSearchMetadata, +} from './types.js'; +export type { + KloConnectionInfo, + KloQueryResult, + SlConnectionCatalogPort, + SlPythonPort, + SlSourcesIndexPort, +} from './ports.js'; +export { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js'; +export { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; +export { + composeOverlay, + enrichColumnsFromManifest, + findDanglingSegmentRefs, + SemanticLayerService, +} from './semantic-layer.service.js'; +export { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js'; +export type { SlDictionaryEntry } from './sl-dictionary-profile.js'; +export { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js'; +export { SqliteSlSourcesIndex, type SqliteSlSourcesIndexOptions } from './sqlite-sl-sources-index.js'; +export * from './local-sl.js'; +export * from './local-query.js'; +export * from './tools/index.js'; diff --git a/packages/context/src/sl/local-query.test.ts b/packages/context/src/sl/local-query.test.ts new file mode 100644 index 00000000..334e5fc5 --- /dev/null +++ b/packages/context/src/sl/local-query.test.ts @@ -0,0 +1,260 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { KloSemanticLayerComputePort } from '../daemon/index.js'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { compileLocalSlQuery } from './local-query.js'; + +describe('compileLocalSlQuery', () => { + let tempDir: string; + let project: KloLocalProject; + let compute: KloSemanticLayerComputePort; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-query-')); + project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + project.config.connections.warehouse = { driver: 'postgres', readonly: true }; + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + `name: orders +table: public.orders +grain: + - id +columns: + - name: id + type: number + - name: status + type: string +measures: + - name: order_count + expr: count(*) +joins: [] +`, + 'klo', + 'klo@example.com', + 'Add orders source', + ); + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders_overlay.yaml', + `name: orders_overlay +inherits_columns_from: orders +columns: + - name: paid_at + type: timestamp +joins: [] +measures: [] +grain: [] +`, + 'klo', + 'klo@example.com', + 'Add overlay source', + ); + + compute = { + query: vi.fn(async (input) => ({ + sql: 'select status, count(*) as order_count from public.orders group by status', + dialect: input.dialect, + columns: [{ name: 'orders.status' }, { name: 'orders.order_count' }], + plan: { measures: input.query.measures, dimensions: input.query.dimensions }, + })), + validateSources: vi.fn(), + generateSources: vi.fn(), + }; + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('compiles a local semantic-layer query with computable sources only', async () => { + const result = await compileLocalSlQuery(project, { + connectionId: 'warehouse', + query: { + measures: ['orders.order_count'], + dimensions: ['orders.status'], + limit: 25, + }, + compute, + }); + + expect(compute.query).toHaveBeenCalledWith({ + sources: [ + { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'status', type: 'string' }, + ], + measures: [{ name: 'order_count', expr: 'count(*)' }], + joins: [], + }, + ], + dialect: 'postgres', + query: { + measures: ['orders.order_count'], + dimensions: ['orders.status'], + limit: 25, + }, + }); + expect(result).toEqual({ + connectionId: 'warehouse', + dialect: 'postgres', + sql: 'select status, count(*) as order_count from public.orders group by status', + headers: ['orders.status', 'orders.order_count'], + rows: [], + totalRows: 0, + plan: { + measures: ['orders.order_count'], + dimensions: ['orders.status'], + execution: { + mode: 'compile_only', + reason: 'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.', + }, + }, + }); + }); + + it('compiles a local semantic-layer query from manifest-backed scan sources', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/public.yaml', + `tables: + payments: + table: public.payments + columns: + - name: payment_id + type: number + pk: true + - name: amount + type: number +`, + 'klo', + 'klo@example.com', + 'Add manifest shard', + ); + + await compileLocalSlQuery(project, { + connectionId: 'warehouse', + query: { + measures: ['sum(payments.amount)'], + dimensions: [], + }, + compute, + }); + + expect(compute.query).toHaveBeenLastCalledWith({ + sources: expect.arrayContaining([ + { + name: 'payments', + table: 'public.payments', + grain: ['payment_id'], + columns: [ + { + name: 'payment_id', + type: 'number', + role: undefined, + descriptions: undefined, + constraints: undefined, + enum_values: undefined, + tests: undefined, + }, + { + name: 'amount', + type: 'number', + role: undefined, + descriptions: undefined, + constraints: undefined, + enum_values: undefined, + tests: undefined, + }, + ], + joins: [], + measures: [], + }, + ]), + dialect: 'postgres', + query: { + measures: ['sum(payments.amount)'], + dimensions: [], + }, + }); + }); + + it('resolves the only configured connection when connectionId is omitted', async () => { + await compileLocalSlQuery(project, { + query: { measures: ['orders.order_count'], dimensions: [] }, + compute, + }); + + expect(compute.query).toHaveBeenCalledWith( + expect.objectContaining({ + dialect: 'postgres', + }), + ); + }); + + it('executes compiled SQL through a local query executor when requested', async () => { + const queryExecutor = { + execute: vi.fn(async () => ({ + headers: ['status', 'order_count'], + rows: [['paid', 2]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })), + }; + + const result = await compileLocalSlQuery(project, { + connectionId: 'warehouse', + query: { + measures: ['orders.order_count'], + dimensions: ['orders.status'], + limit: 25, + }, + compute, + execute: true, + maxRows: 10, + queryExecutor, + }); + + expect(queryExecutor.execute).toHaveBeenCalledWith({ + connectionId: 'warehouse', + projectDir: project.projectDir, + connection: { driver: 'postgres', readonly: true }, + sql: 'select status, count(*) as order_count from public.orders group by status', + maxRows: 10, + }); + expect(result.rows).toEqual([['paid', 2]]); + expect(result.totalRows).toBe(1); + expect(result.plan.execution).toEqual({ + mode: 'executed', + driver: 'postgres', + maxRows: 10, + rowCount: 1, + }); + }); + + it('requires a query executor for executed mode', async () => { + await expect( + compileLocalSlQuery(project, { + connectionId: 'warehouse', + query: { measures: ['orders.order_count'], dimensions: [] }, + compute, + execute: true, + }), + ).rejects.toThrow('Local semantic-layer execution requires a query executor.'); + }); + + it('requires connectionId when multiple connections are configured', async () => { + project.config.connections.analytics = { driver: 'bigquery', readonly: true }; + + await expect( + compileLocalSlQuery(project, { + query: { measures: ['orders.order_count'], dimensions: [] }, + compute, + }), + ).rejects.toThrow('connectionId is required when the local project has zero or multiple connections.'); + }); +}); diff --git a/packages/context/src/sl/local-query.ts b/packages/context/src/sl/local-query.ts new file mode 100644 index 00000000..4e7af76c --- /dev/null +++ b/packages/context/src/sl/local-query.ts @@ -0,0 +1,150 @@ +import type { KloSqlQueryExecutorPort } from '../connections/index.js'; +import type { KloSemanticLayerComputePort } from '../daemon/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { loadLocalSlSourceRecords } from './local-sl.js'; +import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput } from './types.js'; + +const COMPILE_ONLY_REASON = + 'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.'; + +export interface CompileLocalSlQueryOptions { + connectionId?: string; + query: SemanticLayerQueryInput; + compute: KloSemanticLayerComputePort; + execute?: boolean; + maxRows?: number; + queryExecutor?: KloSqlQueryExecutorPort; +} + +export interface CompileLocalSlQueryResult extends SemanticLayerQueryExecutionResult { + connectionId: string; + dialect: string; +} + +function assertSafePathToken(kind: string, value: string): string { + if ( + value.trim().length === 0 || + value.includes('..') || + value.includes('\\') || + value.startsWith('/') || + value.startsWith('.') || + value.includes('//') + ) { + throw new Error(`Unsafe ${kind}: ${value}`); + } + return value; +} + +function assertSafeConnectionId(connectionId: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } + return assertSafePathToken('connection id', connectionId); +} + +function dialectForDriver(driver: string | undefined): string { + const normalized = (driver ?? 'postgres').toUpperCase(); + const map: Record = { + POSTGRESQL: 'postgres', + POSTGRES: 'postgres', + BIGQUERY: 'bigquery', + SNOWFLAKE: 'snowflake', + MYSQL: 'mysql', + SQLSERVER: 'tsql', + MSSQL: 'tsql', + SQLITE: 'sqlite', + DUCKDB: 'duckdb', + CLICKHOUSE: 'clickhouse', + REDSHIFT: 'redshift', + DATABRICKS: 'databricks', + }; + return map[normalized] ?? 'postgres'; +} + +function resolveLocalConnectionId(project: KloLocalProject, requested: string | undefined): string { + if (requested) { + return assertSafeConnectionId(requested); + } + const ids = Object.keys(project.config.connections).sort(); + if (ids.length === 1) { + return assertSafeConnectionId(ids[0]); + } + throw new Error('connectionId is required when the local project has zero or multiple connections.'); +} + +async function loadComputableSources( + project: KloLocalProject, + connectionId: string, +): Promise[]> { + return (await loadLocalSlSourceRecords(project, { connectionId: assertSafeConnectionId(connectionId) })) + .map((record) => ({ ...record.source })) + .filter((source) => source.table || source.sql); +} + +function headersFromColumns(columns: Array>): string[] { + return columns + .map((column) => column.name) + .filter((name): name is string => typeof name === 'string' && name.length > 0); +} + +export async function compileLocalSlQuery( + project: KloLocalProject, + options: CompileLocalSlQueryOptions, +): Promise { + const connectionId = resolveLocalConnectionId(project, options.connectionId); + const dialect = dialectForDriver(project.config.connections[connectionId]?.driver); + const response = await options.compute.query({ + sources: await loadComputableSources(project, connectionId), + dialect, + query: options.query, + }); + + if (!options.execute) { + return { + connectionId, + dialect: response.dialect, + sql: response.sql, + headers: headersFromColumns(response.columns), + rows: [], + totalRows: 0, + plan: { + ...response.plan, + execution: { + mode: 'compile_only', + reason: COMPILE_ONLY_REASON, + }, + }, + }; + } + + if (!options.queryExecutor) { + throw new Error('Local semantic-layer execution requires a query executor.'); + } + + const maxRows = options.maxRows ?? options.query.limit; + const execution = await options.queryExecutor.execute({ + connectionId, + projectDir: project.projectDir, + connection: project.config.connections[connectionId], + sql: response.sql, + maxRows, + }); + + return { + connectionId, + dialect: response.dialect, + sql: response.sql, + headers: execution.headers, + rows: execution.rows, + totalRows: execution.totalRows, + plan: { + ...response.plan, + execution: { + mode: 'executed', + driver: project.config.connections[connectionId]?.driver ?? 'unknown', + maxRows, + rowCount: execution.rowCount, + }, + }, + }; +} diff --git a/packages/context/src/sl/local-sl.test.ts b/packages/context/src/sl/local-sl.test.ts new file mode 100644 index 00000000..63feb061 --- /dev/null +++ b/packages/context/src/sl/local-sl.test.ts @@ -0,0 +1,321 @@ +import { access, mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { + listLocalSlSources, + readLocalSlSource, + searchLocalSlSources, + validateLocalSlSource, + writeLocalSlSource, +} from './local-sl.js'; + +const ORDERS_YAML = [ + 'name: orders', + 'table: public.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + ' - name: revenue', + ' type: number', + 'measures:', + ' - name: total_revenue', + ' expr: sum(revenue)', + '', +].join('\n'); + +const SUPPORT_YAML = [ + 'name: tickets', + 'description: Support tickets grouped by priority.', + 'table: public.tickets', + 'grain:', + ' - ticket_id', + 'columns:', + ' - name: ticket_id', + ' type: string', + ' - name: priority', + ' type: string', + 'measures:', + ' - name: ticket_count', + ' expr: count(*)', + '', +].join('\n'); + +describe('local semantic-layer helpers', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-sl-')); + project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('writes, reads, lists, and validates semantic-layer sources', async () => { + const write = await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: ORDERS_YAML, + }); + + expect(write.path).toBe('semantic-layer/warehouse/orders.yaml'); + + await expect( + readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders' }), + ).resolves.toMatchObject({ + connectionId: 'warehouse', + name: 'orders', + path: 'semantic-layer/warehouse/orders.yaml', + yaml: ORDERS_YAML, + }); + + await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([ + { + columnCount: 2, + connectionId: 'warehouse', + joinCount: 0, + measureCount: 1, + name: 'orders', + path: 'semantic-layer/warehouse/orders.yaml', + }, + ]); + + await expect(validateLocalSlSource(ORDERS_YAML)).resolves.toEqual({ valid: true, errors: [] }); + }); + + it('lists and reads manifest-backed scan sources as queryable sources', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/public.yaml', + `tables: + payments: + table: public.payments + columns: + - name: payment_id + type: number + pk: true + - name: amount + type: number +`, + 'klo', + 'klo@example.com', + 'Add manifest shard', + ); + + await expect(listLocalSlSources(project, { connectionId: 'warehouse' })).resolves.toEqual([ + { + columnCount: 2, + connectionId: 'warehouse', + joinCount: 0, + measureCount: 0, + name: 'payments', + path: 'semantic-layer/warehouse/_schema/public.yaml#payments', + }, + ]); + + await expect(readLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'payments' })).resolves.toEqual( + expect.objectContaining({ + columnCount: 2, + connectionId: 'warehouse', + joinCount: 0, + measureCount: 0, + name: 'payments', + path: 'semantic-layer/warehouse/_schema/public.yaml#payments', + yaml: expect.stringContaining('table: public.payments'), + }), + ); + }); + + it('expands manifest-backed scan sources when listing all connections', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/public.yaml', + `tables: + payments: + table: public.payments + columns: + - name: payment_id + type: number + pk: true + - name: amount + type: number +`, + 'klo', + 'klo@example.com', + 'Add manifest shard', + ); + + await expect(listLocalSlSources(project)).resolves.toEqual([ + { + columnCount: 2, + connectionId: 'warehouse', + joinCount: 0, + measureCount: 0, + name: 'payments', + path: 'semantic-layer/warehouse/_schema/public.yaml#payments', + }, + ]); + }); + + it('searches local semantic-layer source text through SQLite FTS', async () => { + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: ORDERS_YAML, + }); + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'tickets', + yaml: SUPPORT_YAML, + }); + + const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'total revenue' }); + + expect(results).toEqual([ + expect.objectContaining({ + connectionId: 'warehouse', + name: 'orders', + path: 'semantic-layer/warehouse/orders.yaml', + score: expect.any(Number), + }), + ]); + expect(results[0]?.score).toBeGreaterThan(0); + await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined(); + }); + + it('searches all connections with one global hybrid ranking pass', async () => { + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: ORDERS_YAML, + }); + await writeLocalSlSource(project, { + connectionId: 'finance', + sourceName: 'orders', + yaml: [ + 'name: orders', + 'description: Finance orders used for invoice reconciliation.', + 'table: finance.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + ' - name: invoice_status', + ' type: string', + '', + ].join('\n'), + }); + + const results = await searchLocalSlSources(project, { query: 'orders' }); + + expect(results.map((result) => `${result.connectionId}/${result.name}`)).toEqual([ + 'finance/orders', + 'warehouse/orders', + ]); + expect(results[0]).toMatchObject({ + score: expect.any(Number), + matchReasons: expect.arrayContaining(['lexical']), + lanes: expect.arrayContaining([expect.objectContaining({ lane: 'lexical', status: 'available' })]), + }); + }); + + it('returns dictionary evidence when collected sample values explain a match', async () => { + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: ORDERS_YAML, + }); + await project.fileStore.writeFile( + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json', + `${JSON.stringify( + { + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 2, + tables: [], + columns: { + 'orders.status': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + nativeType: 'text', + normalizedType: 'string', + rowCount: 10, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 0.2, + nullRate: 0, + sampleValues: ['paid', 'refunded'], + minTextLength: 4, + maxTextLength: 8, + }, + }, + warnings: [], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed dictionary profile', + ); + + const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'refunded' }); + + expect(results).toEqual([ + expect.objectContaining({ + connectionId: 'warehouse', + name: 'orders', + matchReasons: ['dictionary'], + dictionaryMatches: [{ column: 'status', values: ['refunded'] }], + }), + ]); + }); + + it('adds the token lane alongside lexical matches for normalized query terms', async () => { + await writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'orders', + yaml: ORDERS_YAML, + }); + + const results = await searchLocalSlSources(project, { connectionId: 'warehouse', query: 'orders---' }); + + expect(results[0]).toMatchObject({ + connectionId: 'warehouse', + name: 'orders', + matchReasons: expect.arrayContaining(['token']), + }); + }); + + it('reports schema validation errors without writing invalid YAML', async () => { + const invalidYaml = ['name: broken', 'table: public.orders', 'columns: []', ''].join('\n'); + + await expect(validateLocalSlSource(invalidYaml)).resolves.toMatchObject({ + valid: false, + errors: [expect.stringContaining('grain')], + }); + + await expect( + writeLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: 'broken', + yaml: invalidYaml, + }), + ).rejects.toThrow('Invalid semantic-layer source'); + }); + + it('rejects unsafe source paths', async () => { + await expect( + readLocalSlSource(project, { + connectionId: 'warehouse', + sourceName: '../orders', + }), + ).rejects.toThrow('Unsafe semantic-layer source name'); + }); +}); diff --git a/packages/context/src/sl/local-sl.ts b/packages/context/src/sl/local-sl.ts new file mode 100644 index 00000000..ff32ee65 --- /dev/null +++ b/packages/context/src/sl/local-sl.ts @@ -0,0 +1,595 @@ +import { join } from 'node:path'; +import YAML from 'yaml'; +import { z } from 'zod'; +import type { KloEmbeddingPort, KloFileWriteResult } from '../core/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js'; +import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js'; +import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; +import { composeOverlay, type ManifestTableEntry, projectManifestEntry } from './semantic-layer.service.js'; +import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js'; +import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js'; +import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js'; +import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js'; +import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js'; + +export interface LocalSlSourceSummary { + connectionId: string; + name: string; + path: string; + description?: string; + columnCount: number; + measureCount: number; + joinCount: number; +} + +export interface LocalSlSourceSearchResult extends LocalSlSourceSummary { + score: number; + matchReasons?: SlSearchMatchReason[]; + dictionaryMatches?: SlDictionaryMatch[]; + lanes?: SlSearchLaneSummary[]; +} + +export interface LocalSlSearchInput { + connectionId?: string; + query: string; + embeddingService?: KloEmbeddingPort | null; + limit?: number; + backend?: 'pglite-owner-prototype'; + pglite?: PgliteSlSearchPrototypeOwnerOptions; +} + +export interface LocalSlSource extends LocalSlSourceSummary { + yaml: string; +} + +export interface LocalSlSourceRecord extends LocalSlSource { + source: SemanticLayerSource; +} + +export interface LocalSlValidationResult { + valid: boolean; + errors: string[]; +} + +const LOCAL_AUTHOR = 'klo'; +const LOCAL_AUTHOR_EMAIL = 'klo@example.com'; + +function assertSafePathToken(kind: string, value: string): string { + if ( + value.trim().length === 0 || + value.includes('..') || + value.includes('\\') || + value.startsWith('/') || + value.startsWith('.') || + value.includes('//') + ) { + throw new Error(`Unsafe ${kind}: ${value}`); + } + return value; +} + +function assertSafeConnectionId(connectionId: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } + return assertSafePathToken('connection id', connectionId); +} + +function isSafeConnectionId(connectionId: string | undefined): connectionId is string { + return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId); +} + +function assertSafeSourceName(sourceName: string): string { + if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) { + throw new Error(`Unsafe semantic-layer source name: ${sourceName}`); + } + return assertSafePathToken('semantic-layer source name', sourceName); +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function slPath(connectionId: string, sourceName: string): string { + return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`; +} + +function sourceNameFromPath(path: string): string { + return ( + path + .split('/') + .at(-1) + ?.replace(/\.ya?ml$/, '') ?? path + ); +} + +function parseYamlRecord(raw: string): Record { + const parsed = YAML.parse(raw) as unknown; + if (!isRecord(parsed)) { + throw new Error('Semantic-layer source YAML must contain an object'); + } + return parsed; +} + +function descriptionMap(value: Record): Record | undefined { + const result: Record = {}; + const descriptions = value.descriptions; + if (isRecord(descriptions)) { + for (const [key, text] of Object.entries(descriptions)) { + if (typeof text === 'string' && text.trim().length > 0) { + result[key] = text; + } + } + } + + const flatDescription = value.description; + if (!result.user && typeof flatDescription === 'string' && flatDescription.trim().length > 0) { + result.user = flatDescription; + } + + return Object.keys(result).length > 0 ? result : undefined; +} + +function validationErrors(error: unknown): string[] { + if (error instanceof z.ZodError) { + return error.issues.map((issue) => `${issue.path.join('.') || ''}: ${issue.message}`); + } + return [error instanceof Error ? error.message : String(error)]; +} + +function summarizeSource(args: { connectionId: string; path: string; raw: string }): LocalSlSourceSummary { + const parsed = parseYamlRecord(args.raw); + const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(args.path); + const description = resolveDescription(descriptionMap(parsed), { priority: DEFAULT_PRIORITY }) ?? undefined; + return { + connectionId: args.connectionId, + name, + path: args.path, + ...(description ? { description } : {}), + columnCount: Array.isArray(parsed.columns) ? parsed.columns.length : 0, + measureCount: Array.isArray(parsed.measures) ? parsed.measures.length : 0, + joinCount: Array.isArray(parsed.joins) ? parsed.joins.length : 0, + }; +} + +function sourceToYaml(source: SemanticLayerSource): string { + return YAML.stringify(source, { indent: 2, lineWidth: 0 }); +} + +function summarizeSemanticSource(args: { + connectionId: string; + path: string; + source: SemanticLayerSource; +}): LocalSlSourceSummary { + const description = resolveDescription(args.source.descriptions, { priority: DEFAULT_PRIORITY }) ?? undefined; + return { + connectionId: args.connectionId, + name: args.source.name, + path: args.path, + ...(description ? { description } : {}), + columnCount: args.source.columns.length, + measureCount: args.source.measures.length, + joinCount: args.source.joins.length, + }; +} + +function manifestTables(value: Record): Record | null { + return isRecord(value.tables) ? (value.tables as Record) : null; +} + +function parsedStandaloneSource(parsed: Record, name: string): SemanticLayerSource { + const source = parsed as Partial; + return { + ...source, + name, + grain: Array.isArray(parsed.grain) ? (parsed.grain.filter((item) => typeof item === 'string') as string[]) : [], + columns: Array.isArray(parsed.columns) ? (parsed.columns as SemanticLayerSource['columns']) : [], + joins: Array.isArray(parsed.joins) ? (parsed.joins as SemanticLayerSource['joins']) : [], + measures: Array.isArray(parsed.measures) ? (parsed.measures as SemanticLayerSource['measures']) : [], + }; +} + +export async function loadLocalSlSourceRecords( + project: KloLocalProject, + input: { connectionId: string }, +): Promise { + const connectionId = assertSafeConnectionId(input.connectionId); + const dir = `semantic-layer/${connectionId}`; + const schemaDir = `${dir}/_schema`; + const listed = await project.fileStore.listFiles(dir); + const paths = listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort(); + const sources = new Map(); + + for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) { + const raw = await project.fileStore.readFile(path); + const tables = manifestTables(parseYamlRecord(raw.content)); + if (!tables) { + continue; + } + for (const [name, entry] of Object.entries(tables)) { + const source = projectManifestEntry(name, entry); + const projectedPath = `${path}#${name}`; + sources.set(name, { + ...summarizeSemanticSource({ connectionId, path: projectedPath, source }), + yaml: sourceToYaml(source), + source, + }); + } + } + + for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) { + const raw = await project.fileStore.readFile(path); + const parsed = parseYamlRecord(raw.content); + const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path); + if (parsed.table || parsed.sql) { + const source = parsedStandaloneSource(parsed, name); + sources.set(name, { ...summarizeSource({ connectionId, path, raw: raw.content }), yaml: raw.content, source }); + continue; + } + + const base = sources.get(name); + if (!base) { + continue; + } + const source = composeOverlay(base.source, parsed); + sources.set(name, { + ...summarizeSemanticSource({ connectionId, path, source }), + yaml: sourceToYaml(source), + source, + }); + } + + return [...sources.values()].sort((left, right) => left.name.localeCompare(right.name)); +} + +export async function validateLocalSlSource(rawYaml: string): Promise { + try { + const parsed = parseYamlRecord(rawYaml); + const schema = parsed.table || parsed.sql ? sourceDefinitionSchema : sourceOverlaySchema; + schema.parse(parsed); + return { valid: true, errors: [] }; + } catch (error) { + return { valid: false, errors: validationErrors(error) }; + } +} + +export async function writeLocalSlSource( + project: KloLocalProject, + input: { connectionId: string; sourceName: string; yaml: string }, +): Promise { + const validation = await validateLocalSlSource(input.yaml); + if (!validation.valid) { + throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`); + } + + const parsed = parseYamlRecord(input.yaml); + if (typeof parsed.name === 'string' && parsed.name !== input.sourceName) { + throw new Error(`Semantic-layer source name "${parsed.name}" does not match requested path "${input.sourceName}"`); + } + + const path = slPath(input.connectionId, input.sourceName); + return project.fileStore.writeFile( + path, + input.yaml.endsWith('\n') ? input.yaml : `${input.yaml}\n`, + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `Write semantic-layer source: ${input.connectionId}/${input.sourceName}`, + ); +} + +export async function readLocalSlSource( + project: KloLocalProject, + input: { connectionId: string; sourceName: string }, +): Promise { + const path = slPath(input.connectionId, input.sourceName); + try { + const result = await project.fileStore.readFile(path); + return { + ...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }), + yaml: result.content, + }; + } catch { + const records = await loadLocalSlSourceRecords(project, { + connectionId: input.connectionId, + }); + const record = records.find((source) => source.name === input.sourceName); + return record ? { ...record } : null; + } +} + +export async function listLocalSlSources( + project: KloLocalProject, + input: { connectionId?: string } = {}, +): Promise { + if (input.connectionId) { + return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map( + ({ source: _source, yaml: _yaml, ...summary }) => summary, + ); + } + const listed = await project.fileStore.listFiles('semantic-layer'); + const connectionIds = [...new Set(listed.files.map((path) => path.split('/')[1]).filter(isSafeConnectionId))].sort(); + const summaries: LocalSlSourceSummary[] = []; + for (const connectionId of connectionIds) { + const records = await loadLocalSlSourceRecords(project, { connectionId }); + summaries.push(...records.map(({ source: _source, yaml: _yaml, ...summary }) => summary)); + } + return summaries.sort( + (left, right) => left.connectionId.localeCompare(right.connectionId) || left.name.localeCompare(right.name), + ); +} + +interface LocalSlSearchCandidate { + summary: LocalSlSourceSummary; + source: SemanticLayerSource; + searchText: string; +} + +function sqliteSlDbPath(project: KloLocalProject): string { + return join(project.projectDir, '.klo', 'db.sqlite'); +} + +async function loadLocalSlSearchCandidates( + project: KloLocalProject, + input: { connectionId?: string } = {}, +): Promise { + if (input.connectionId) { + return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map((record) => ({ + summary: { + connectionId: record.connectionId, + name: record.name, + path: record.path, + ...(record.description ? { description: record.description } : {}), + columnCount: record.columnCount, + measureCount: record.measureCount, + joinCount: record.joinCount, + }, + source: record.source, + searchText: buildSemanticLayerSourceSearchText(record.source), + })); + } + + const listed = await project.fileStore.listFiles('semantic-layer'); + const connectionIds = [...new Set(listed.files.map((path) => path.split('/')[1]).filter(isSafeConnectionId))].sort(); + const candidates: LocalSlSearchCandidate[] = []; + for (const connectionId of connectionIds) { + candidates.push(...(await loadLocalSlSearchCandidates(project, { connectionId }))); + } + return candidates.sort( + (left, right) => + left.summary.connectionId.localeCompare(right.summary.connectionId) || + left.summary.name.localeCompare(right.summary.name), + ); +} + +function candidateKey(summary: LocalSlSourceSummary): string { + return `${summary.connectionId}/${summary.name}`; +} + +function tokenLaneCandidates(candidates: LocalSlSearchCandidate[], terms: readonly string[]) { + if (terms.length === 0) { + return []; + } + return candidates + .map((candidate) => { + const haystack = candidate.searchText.toLowerCase(); + const matchedTerms = terms.filter((term) => haystack.includes(term)); + return { + candidate, + score: matchedTerms.length / terms.length, + }; + }) + .filter((result) => result.score > 0) + .sort( + (left, right) => + right.score - left.score || + left.candidate.summary.connectionId.localeCompare(right.candidate.summary.connectionId) || + left.candidate.summary.name.localeCompare(right.candidate.summary.name), + ); +} + +async function refreshHybridSlIndexes(input: { + index: SqliteSlSourcesIndex; + project: KloLocalProject; + candidates: LocalSlSearchCandidate[]; + embeddingService?: KloEmbeddingPort | null; +}): Promise { + const candidatesByConnection = new Map(); + for (const candidate of input.candidates) { + candidatesByConnection.set(candidate.summary.connectionId, [ + ...(candidatesByConnection.get(candidate.summary.connectionId) ?? []), + candidate, + ]); + } + + for (const [connectionId, group] of candidatesByConnection) { + if (input.embeddingService) { + const service = new SlSearchService(input.embeddingService, input.index); + await service.indexSources( + connectionId, + group.map((candidate) => candidate.source), + ); + } else { + await input.index.upsertSources( + connectionId, + group.map((candidate) => ({ + sourceName: candidate.summary.name, + searchText: candidate.searchText, + embedding: null, + })), + ); + await input.index.deleteStale( + connectionId, + group.map((candidate) => candidate.summary.name), + ); + } + } + + const dictionaryEntries = await loadLatestSlDictionaryEntries(input.project, [...candidatesByConnection.keys()]); + for (const connectionId of candidatesByConnection.keys()) { + await input.index.replaceDictionaryEntries( + connectionId, + dictionaryEntries.filter((entry) => entry.connectionId === connectionId), + ); + } +} + +export async function searchLocalSlSources( + project: KloLocalProject, + input: LocalSlSearchInput, +): Promise { + const query = input.query.trim(); + if (!query) { + return (await listLocalSlSources(project, { connectionId: input.connectionId })).map((source) => ({ + ...source, + score: 1, + })); + } + + if (input.backend === 'pglite-owner-prototype') { + if (!input.pglite) { + throw new Error('PGlite semantic-layer search prototype requires pglite owner-process options.'); + } + const { searchLocalSlSourcesWithPglitePrototype } = await import('./pglite-sl-search-prototype.js'); + return searchLocalSlSourcesWithPglitePrototype(project, { + connectionId: input.connectionId, + query, + embeddingService: input.embeddingService ?? null, + limit: input.limit, + pglite: input.pglite, + }); + } + + const candidates = await loadLocalSlSearchCandidates(project, { connectionId: input.connectionId }); + if (project.config.storage.search !== 'sqlite-fts5') { + return candidates + .map((candidate) => { + const terms = query + .toLowerCase() + .split(/\s+/) + .map((term) => term.trim()) + .filter(Boolean); + return { + candidate, + score: + terms.length === 0 + ? 0 + : terms.filter((term) => candidate.searchText.toLowerCase().includes(term)).length / terms.length, + }; + }) + .filter((result) => result.score > 0) + .map((result) => ({ + ...result.candidate.summary, + score: result.score, + matchReasons: ['token'], + })) + .sort( + (left, right) => + right.score - left.score || + left.connectionId.localeCompare(right.connectionId) || + left.path.localeCompare(right.path), + ); + } + + const index = new SqliteSlSourcesIndex({ dbPath: sqliteSlDbPath(project) }); + await refreshHybridSlIndexes({ index, project, candidates, embeddingService: input.embeddingService ?? null }); + + const candidateById = new Map(candidates.map((candidate) => [candidateKey(candidate.summary), candidate])); + const connectionIds = input.connectionId ? [input.connectionId] : undefined; + const finalLimit = input.limit ?? candidates.length; + const core = new HybridSearchCore(); + const dictionaryEvidence = new Map(); + + const generators: SearchCandidateGenerator[] = [ + { + lane: 'lexical', + async generate(args) { + const rows = await index.searchLexicalCandidates({ + connectionIds, + queryText: args.queryText, + limit: args.laneCandidatePoolLimit, + }); + return { + candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })), + }; + }, + }, + { + lane: 'dictionary', + async generate(args) { + const rows = await index.searchDictionaryCandidates({ + connectionIds, + queryText: args.queryText, + limit: args.laneCandidatePoolLimit, + }); + for (const row of rows) { + dictionaryEvidence.set(row.id, row.matches); + } + return { + candidates: rows.map((row) => ({ + id: row.id, + rank: row.rank, + rawScore: row.rawScore, + evidence: row.matches, + })), + }; + }, + }, + { + lane: 'token', + async generate(args) { + const rows = tokenLaneCandidates(candidates, args.normalizedQuery.terms).slice(0, args.laneCandidatePoolLimit); + return { + candidates: rows.map((row, index) => ({ + id: candidateKey(row.candidate.summary), + rank: index + 1, + rawScore: row.score, + })), + }; + }, + }, + { + lane: 'semantic', + async generate(args) { + if (!input.embeddingService) { + return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' }; + } + try { + const queryEmbedding = await input.embeddingService.computeEmbedding(args.queryText); + const rows = await index.searchSemanticCandidates({ + connectionIds, + queryEmbedding, + limit: args.laneCandidatePoolLimit, + }); + return { + candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })), + }; + } catch (error) { + return { + status: 'skipped', + candidates: [], + reason: `embedding_unhealthy:${error instanceof Error ? error.message : String(error)}`, + }; + } + }, + }, + ]; + + const result = await core.search({ queryText: query, limit: finalLimit, generators }); + const hydrated: LocalSlSourceSearchResult[] = []; + for (const fused of result.results) { + const candidate = candidateById.get(fused.id); + if (!candidate) { + continue; + } + const dictionaryMatches = dictionaryEvidence.get(fused.id); + hydrated.push({ + ...candidate.summary, + score: fused.score, + matchReasons: fused.matchReasons as SlSearchMatchReason[], + ...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}), + lanes: result.lanes, + }); + } + return hydrated; +} diff --git a/packages/context/src/sl/pglite-sl-search-prototype.test.ts b/packages/context/src/sl/pglite-sl-search-prototype.test.ts new file mode 100644 index 00000000..5288eb2f --- /dev/null +++ b/packages/context/src/sl/pglite-sl-search-prototype.test.ts @@ -0,0 +1,268 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { createServer } from 'node:net'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { assertSearchBackendConformanceCase } from '../search/index.js'; +import { searchLocalSlSources, writeLocalSlSource, type LocalSlSourceSearchResult } from './local-sl.js'; +import { searchLocalSlSourcesWithPglitePrototype } from './pglite-sl-search-prototype.js'; + +const ORDERS_YAML = [ + 'name: orders', + 'description: Orders with paid revenue and refund status.', + 'table: public.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + ' - name: status', + ' type: string', + ' - name: revenue', + ' type: number', + 'measures:', + ' - name: total_revenue', + ' expr: sum(revenue)', + '', +].join('\n'); + +const FINANCE_ORDERS_YAML = [ + 'name: orders', + 'description: Finance orders used for invoice reconciliation.', + 'table: finance.orders', + 'grain:', + ' - order_id', + 'columns:', + ' - name: order_id', + ' type: string', + ' - name: invoice_status', + ' type: string', + '', +].join('\n'); + +const CUSTOMERS_YAML = [ + 'name: customers', + 'description: Customer lifecycle accounts by region.', + 'table: public.customers', + 'grain:', + ' - customer_id', + 'columns:', + ' - name: customer_id', + ' type: string', + ' - name: region', + ' type: string', + '', +].join('\n'); + +class FakeEmbeddingPort { + readonly maxBatchSize = 16; + + async computeEmbedding(text: string): Promise { + const normalized = text.toLowerCase(); + if (normalized.includes('semantic revenue') || normalized.includes('orders with paid revenue')) { + return [1, 0, 0]; + } + if (normalized.includes('finance orders')) { + return [0.72, 0.28, 0]; + } + return [0, 1, 0]; + } + + async computeEmbeddingsBulk(texts: string[]): Promise { + return Promise.all(texts.map((text) => this.computeEmbedding(text))); + } +} + +async function allocatePort(): Promise { + const server = createServer(); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const address = server.address(); + if (typeof address !== 'object' || address === null) { + throw new Error('Expected TCP server address while allocating a PGlite SL prototype port.'); + } + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(); + }); + }); + return address.port; +} + +function toConformanceResult(result: LocalSlSourceSearchResult) { + return { + id: `${result.connectionId}/${result.name}`, + score: result.score, + matchReasons: result.matchReasons ?? [], + lanes: result.lanes, + dictionaryMatches: result.dictionaryMatches, + }; +} + +async function seedSemanticLayerProject(project: KloLocalProject): Promise { + await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML }); + await writeLocalSlSource(project, { connectionId: 'finance', sourceName: 'orders', yaml: FINANCE_ORDERS_YAML }); + await writeLocalSlSource(project, { connectionId: 'warehouse', sourceName: 'customers', yaml: CUSTOMERS_YAML }); + + await project.fileStore.writeFile( + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json', + `${JSON.stringify( + { + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 2, + tables: [], + columns: { + 'orders.status': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + nativeType: 'text', + normalizedType: 'string', + rowCount: 10, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 0.2, + nullRate: 0, + sampleValues: ['paid', 'refunded'], + minTextLength: 4, + maxTextLength: 8, + }, + 'customers.region': { + table: { catalog: null, db: 'public', name: 'customers' }, + column: 'region', + nativeType: 'text', + normalizedType: 'string', + rowCount: 10, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 0.3, + nullRate: 0, + sampleValues: ['emea', 'amer', 'apac'], + minTextLength: 4, + maxTextLength: 4, + }, + }, + warnings: [], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed PGlite dictionary profile', + ); +} + +describe('PGlite semantic-layer search prototype', () => { + let tempDir: string; + let project: KloLocalProject; + let pgliteDataDir: string; + let port: number; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-pglite-sl-prototype-')); + project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + project.config.ingest.embeddings.dimensions = 3; + pgliteDataDir = join(tempDir, 'pglite-search'); + port = await allocatePort(); + await seedSemanticLayerProject(project); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('returns lexical semantic-layer matches through PGlite FTS', async () => { + const results = await searchLocalSlSourcesWithPglitePrototype(project, { + query: 'paid revenue', + limit: 5, + pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port }, + }); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-owner-prototype', + surface: 'semantic-layer', + caseName: 'pglite lexical source ranking', + results: results.map(toConformanceResult), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['lexical'], + }, + expectedLanes: { + lexical: { status: 'available' }, + semantic: { status: 'skipped', reason: 'embedding_unconfigured' }, + }, + }); + }); + + it('returns dictionary evidence through PGlite pg_trgm and exact matching', async () => { + const results = await searchLocalSlSourcesWithPglitePrototype(project, { + connectionId: 'warehouse', + query: 'refund', + limit: 5, + pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port }, + }); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-owner-prototype', + surface: 'semantic-layer', + caseName: 'pglite dictionary source evidence', + results: results.map(toConformanceResult), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['dictionary'], + }, + expectedLanes: { + dictionary: { status: 'available' }, + semantic: { status: 'skipped', reason: 'embedding_unconfigured' }, + }, + expectedDictionaryMatchesById: { + 'warehouse/orders': [{ column: 'status', values: ['refunded'] }], + }, + }); + }); + + it('returns semantic matches through PGlite vector ordering when embeddings are configured', async () => { + const results = await searchLocalSlSourcesWithPglitePrototype(project, { + query: 'semantic revenue', + limit: 5, + embeddingService: new FakeEmbeddingPort(), + pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port }, + }); + + assertSearchBackendConformanceCase({ + backendName: 'pglite-owner-prototype', + surface: 'semantic-layer', + caseName: 'pglite semantic source ranking', + results: results.map(toConformanceResult), + expectedTopIds: ['warehouse/orders'], + expectedReasonsById: { + 'warehouse/orders': ['semantic'], + }, + expectedLanes: { + semantic: { status: 'available' }, + }, + }); + }); + + it('routes through PGlite only when the private local search input opts in', async () => { + const results = await searchLocalSlSources(project, { + query: 'refnd', + limit: 5, + backend: 'pglite-owner-prototype', + pglite: { dataDir: pgliteDataDir, host: '127.0.0.1', port }, + }); + + expect(results[0]).toMatchObject({ + connectionId: 'warehouse', + name: 'orders', + matchReasons: expect.arrayContaining(['dictionary']), + dictionaryMatches: [{ column: 'status', values: ['refunded'] }], + }); + }); +}); diff --git a/packages/context/src/sl/pglite-sl-search-prototype.ts b/packages/context/src/sl/pglite-sl-search-prototype.ts new file mode 100644 index 00000000..551ad5d8 --- /dev/null +++ b/packages/context/src/sl/pglite-sl-search-prototype.ts @@ -0,0 +1,569 @@ +import { mkdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { KloEmbeddingPort } from '../core/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js'; +import { KloPGliteOwnerProcess } from '../search/pglite-owner-process.js'; +import { + listLocalSlSources, + loadLocalSlSourceRecords, + type LocalSlSourceSearchResult, + type LocalSlSourceSummary, +} from './local-sl.js'; +import { loadLatestSlDictionaryEntries, type SlDictionaryEntry } from './sl-dictionary-profile.js'; +import { buildSemanticLayerSourceSearchText } from './sl-search.service.js'; +import type { SemanticLayerSource, SlDictionaryMatch, SlSearchMatchReason } from './types.js'; + +export interface PgliteSlSearchPrototypeOwnerOptions { + dataDir?: string; + host: string; + port: number; +} + +export interface PgliteSlSearchPrototypeInput { + connectionId?: string; + query: string; + embeddingService?: KloEmbeddingPort | null; + limit?: number; + pglite: PgliteSlSearchPrototypeOwnerOptions; +} + +interface LocalSlSearchCandidate { + summary: LocalSlSourceSummary; + source: SemanticLayerSource; + searchText: string; +} + +interface PgliteLaneRow { + id: string; + connection_id: string; + source_name: string; + score: number | string; +} + +interface PgliteDictionaryRow extends PgliteLaneRow { + column_name: string; + value: string; +} + +function candidateKey(summary: LocalSlSourceSummary): string { + return `${summary.connectionId}/${summary.name}`; +} + +function pgliteDataDir(project: KloLocalProject, input: PgliteSlSearchPrototypeOwnerOptions): string { + return input.dataDir ?? join(project.projectDir, '.klo', 'pglite-search-prototype'); +} + +function vectorDimensions(project: KloLocalProject): number { + const dimensions = project.config.ingest.embeddings.dimensions; + if (!Number.isInteger(dimensions) || dimensions <= 0) { + throw new Error(`PGlite SL search prototype needs a positive embedding dimension, got ${String(dimensions)}.`); + } + return dimensions; +} + +function connectionIdsForSearch(input: { connectionId?: string }): string[] | null { + return input.connectionId ? [input.connectionId] : null; +} + +async function loadCandidates( + project: KloLocalProject, + input: { connectionId?: string } = {}, +): Promise { + if (input.connectionId) { + return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map((record) => ({ + summary: { + connectionId: record.connectionId, + name: record.name, + path: record.path, + ...(record.description ? { description: record.description } : {}), + columnCount: record.columnCount, + measureCount: record.measureCount, + joinCount: record.joinCount, + }, + source: record.source, + searchText: buildSemanticLayerSourceSearchText(record.source), + })); + } + + const listed = await project.fileStore.listFiles('semantic-layer'); + const connectionIds = [ + ...new Set( + listed.files + .map((path) => path.split('/')[1]) + .filter((connectionId): connectionId is string => + typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId), + ), + ), + ].sort(); + const candidates: LocalSlSearchCandidate[] = []; + for (const connectionId of connectionIds) { + candidates.push(...(await loadCandidates(project, { connectionId }))); + } + return candidates.sort( + (left, right) => + left.summary.connectionId.localeCompare(right.summary.connectionId) || + left.summary.name.localeCompare(right.summary.name), + ); +} + +function tokenLaneCandidates(candidates: LocalSlSearchCandidate[], terms: readonly string[]) { + if (terms.length === 0) { + return []; + } + return candidates + .map((candidate) => { + const haystack = candidate.searchText.toLowerCase(); + const matchedTerms = terms.filter((term) => haystack.includes(term)); + return { + candidate, + score: matchedTerms.length / terms.length, + }; + }) + .filter((result) => result.score > 0) + .sort( + (left, right) => + right.score - left.score || + left.candidate.summary.connectionId.localeCompare(right.candidate.summary.connectionId) || + left.candidate.summary.name.localeCompare(right.candidate.summary.name), + ); +} + +function postgresqlOrTsQuery(query: string): string { + const terms = query + .toLowerCase() + .split(/[^a-z0-9_]+/u) + .map((term) => term.trim()) + .filter(Boolean); + + return [...new Set(terms)].join(' | '); +} + +async function resetPrototypeSchema(owner: KloPGliteOwnerProcess, dimensions: number): Promise { + await owner.query(` + DROP TABLE IF EXISTS prototype_sl_dictionary_values; + DROP TABLE IF EXISTS prototype_sl_sources; + + CREATE TABLE prototype_sl_sources ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + path TEXT NOT NULL, + description TEXT, + column_count INTEGER NOT NULL, + measure_count INTEGER NOT NULL, + join_count INTEGER NOT NULL, + search_text TEXT NOT NULL, + embedding vector(${dimensions}), + PRIMARY KEY (connection_id, source_name) + ); + + CREATE INDEX prototype_sl_sources_fts_idx + ON prototype_sl_sources + USING GIN (to_tsvector('english', search_text)); + + CREATE INDEX prototype_sl_sources_vector_idx + ON prototype_sl_sources + USING ivfflat (embedding vector_cosine_ops) + WITH (lists = 1); + + CREATE TABLE prototype_sl_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + value_lower TEXT NOT NULL, + cardinality INTEGER, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + + CREATE INDEX prototype_sl_dictionary_values_trgm_idx + ON prototype_sl_dictionary_values + USING GIN (value gin_trgm_ops); + `); +} + +async function sourceEmbeddings(input: { + candidates: LocalSlSearchCandidate[]; + embeddingService?: KloEmbeddingPort | null; + dimensions: number; +}): Promise | null> { + if (!input.embeddingService) { + return null; + } + + const texts = input.candidates.map((candidate) => candidate.searchText); + const embeddings = await input.embeddingService.computeEmbeddingsBulk(texts); + const byId = new Map(); + embeddings.forEach((embedding, index) => { + if (embedding.length !== input.dimensions) { + throw new Error( + `PGlite SL search prototype expected ${input.dimensions} embedding dimensions, got ${embedding.length}.`, + ); + } + const candidate = input.candidates[index]; + if (candidate) { + byId.set(candidateKey(candidate.summary), embedding); + } + }); + return byId; +} + +async function insertSourceRows(input: { + owner: KloPGliteOwnerProcess; + candidates: LocalSlSearchCandidate[]; + embeddings: Map | null; +}): Promise { + for (const candidate of input.candidates) { + const summary = candidate.summary; + const embedding = input.embeddings?.get(candidateKey(summary)); + await input.owner.query( + ` + INSERT INTO prototype_sl_sources ( + connection_id, + source_name, + path, + description, + column_count, + measure_count, + join_count, + search_text, + embedding + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::vector) + `, + [ + summary.connectionId, + summary.name, + summary.path, + summary.description ?? null, + summary.columnCount, + summary.measureCount, + summary.joinCount, + candidate.searchText, + embedding ? JSON.stringify(embedding) : null, + ], + ); + } +} + +async function insertDictionaryRows(owner: KloPGliteOwnerProcess, entries: SlDictionaryEntry[]): Promise { + for (const entry of entries) { + await owner.query( + ` + INSERT INTO prototype_sl_dictionary_values ( + connection_id, + source_name, + column_name, + value, + value_lower, + cardinality + ) + VALUES ($1, $2, $3, $4, lower($4), $5) + `, + [entry.connectionId, entry.sourceName, entry.columnName, entry.value, entry.cardinality ?? null], + ); + } +} + +function groupDictionaryRows(rows: PgliteDictionaryRow[], limit: number) { + const grouped = new Map(); + for (const row of rows) { + grouped.set(row.id, [...(grouped.get(row.id) ?? []), row]); + } + + return [...grouped.entries()] + .map(([id, group]) => { + const first = group[0]; + const byColumn = new Map(); + for (const row of group.sort( + (left, right) => left.column_name.localeCompare(right.column_name) || left.value.localeCompare(right.value), + )) { + byColumn.set(row.column_name, [...(byColumn.get(row.column_name) ?? []), row.value]); + } + const matches: SlDictionaryMatch[] = [...byColumn.entries()].map(([column, values]) => ({ + column, + values: values.slice(0, 5), + ...(values.length > 5 ? { overflowCount: values.length - 5 } : {}), + })); + return { + id, + connectionId: first?.connection_id ?? '', + sourceName: first?.source_name ?? '', + rawScore: matches.reduce((total, match) => total + match.values.length, 0), + matches, + }; + }) + .sort( + (left, right) => + right.rawScore - left.rawScore || + right.matches.length - left.matches.length || + left.connectionId.localeCompare(right.connectionId) || + left.sourceName.localeCompare(right.sourceName), + ) + .slice(0, Math.max(1, limit)) + .map((candidate, index) => ({ ...candidate, rank: index + 1 })); +} + +async function queryLexicalCandidates(input: { + owner: KloPGliteOwnerProcess; + queryText: string; + connectionIds: string[] | null; + limit: number; +}) { + const tsQuery = postgresqlOrTsQuery(input.queryText); + if (!tsQuery) { + return []; + } + + const result = await input.owner.query( + ` + SELECT + connection_id || '/' || source_name AS id, + connection_id, + source_name, + ts_rank_cd(to_tsvector('english', search_text), to_tsquery('english', $1)) AS score + FROM prototype_sl_sources + WHERE to_tsvector('english', search_text) @@ to_tsquery('english', $1) + AND ($2::text[] IS NULL OR connection_id = ANY($2::text[])) + ORDER BY score DESC, connection_id ASC, source_name ASC + LIMIT $3 + `, + [tsQuery, input.connectionIds, Math.max(1, input.limit)], + ); + + return result.rows.map((row, index) => ({ + id: row.id, + connectionId: row.connection_id, + sourceName: row.source_name, + rank: index + 1, + rawScore: Number(row.score), + })); +} + +async function querySemanticCandidates(input: { + owner: KloPGliteOwnerProcess; + queryText: string; + connectionIds: string[] | null; + embeddingService?: KloEmbeddingPort | null; + dimensions: number; + limit: number; +}) { + if (!input.embeddingService) { + return { status: 'skipped' as const, candidates: [], reason: 'embedding_unconfigured' }; + } + + try { + const queryEmbedding = await input.embeddingService.computeEmbedding(input.queryText); + if (queryEmbedding.length !== input.dimensions) { + return { + status: 'skipped' as const, + candidates: [], + reason: `embedding_unhealthy:expected ${input.dimensions} dimensions, got ${queryEmbedding.length}`, + }; + } + + const result = await input.owner.query( + ` + SELECT + connection_id || '/' || source_name AS id, + connection_id, + source_name, + 1 - (embedding <=> $1::vector) AS score + FROM prototype_sl_sources + WHERE embedding IS NOT NULL + AND ($2::text[] IS NULL OR connection_id = ANY($2::text[])) + ORDER BY embedding <=> $1::vector, connection_id ASC, source_name ASC + LIMIT $3 + `, + [JSON.stringify(queryEmbedding), input.connectionIds, Math.max(1, input.limit)], + ); + + return { + candidates: result.rows.map((row, index) => ({ + id: row.id, + connectionId: row.connection_id, + sourceName: row.source_name, + rank: index + 1, + rawScore: Number(row.score), + })), + }; + } catch (error) { + return { + status: 'skipped' as const, + candidates: [], + reason: `embedding_unhealthy:${error instanceof Error ? error.message : String(error)}`, + }; + } +} + +async function queryDictionaryCandidates(input: { + owner: KloPGliteOwnerProcess; + queryText: string; + connectionIds: string[] | null; + limit: number; +}) { + const query = input.queryText.trim(); + if (!query) { + return []; + } + + const result = await input.owner.query( + ` + SELECT + connection_id || '/' || source_name AS id, + connection_id, + source_name, + column_name, + value, + GREATEST( + similarity(value, $1), + CASE WHEN value_lower = lower($1) THEN 1 ELSE 0 END, + CASE WHEN value_lower LIKE '%' || lower($1) || '%' THEN 0.75 ELSE 0 END + ) AS score + FROM prototype_sl_dictionary_values + WHERE ( + similarity(value, $1) > 0 + OR value_lower = lower($1) + OR value_lower LIKE '%' || lower($1) || '%' + ) + AND ($2::text[] IS NULL OR connection_id = ANY($2::text[])) + ORDER BY score DESC, connection_id ASC, source_name ASC, column_name ASC, value ASC + LIMIT $3 + `, + [query, input.connectionIds, Math.max(25, input.limit * 4)], + ); + + return groupDictionaryRows(result.rows, input.limit); +} + +export async function searchLocalSlSourcesWithPglitePrototype( + project: KloLocalProject, + input: PgliteSlSearchPrototypeInput, +): Promise { + const query = input.query.trim(); + if (!query) { + return (await listLocalSlSources(project, { connectionId: input.connectionId })).map((source) => ({ + ...source, + score: 1, + })); + } + + const candidates = await loadCandidates(project, { connectionId: input.connectionId }); + const dimensions = vectorDimensions(project); + const dataDir = pgliteDataDir(project, input.pglite); + await mkdir(dataDir, { recursive: true }); + + const owner = await KloPGliteOwnerProcess.start({ + dataDir, + host: input.pglite.host, + port: input.pglite.port, + }); + + try { + const embeddings = await sourceEmbeddings({ + candidates, + embeddingService: input.embeddingService ?? null, + dimensions, + }); + await resetPrototypeSchema(owner, dimensions); + await insertSourceRows({ owner, candidates, embeddings }); + + const candidateConnectionIds = [...new Set(candidates.map((candidate) => candidate.summary.connectionId))].sort(); + const dictionaryEntries = await loadLatestSlDictionaryEntries(project, candidateConnectionIds); + await insertDictionaryRows(owner, dictionaryEntries); + + const candidateById = new Map(candidates.map((candidate) => [candidateKey(candidate.summary), candidate])); + const connectionIds = connectionIdsForSearch(input); + const finalLimit = input.limit ?? candidates.length; + const dictionaryEvidence = new Map(); + const core = new HybridSearchCore(); + + const generators: SearchCandidateGenerator[] = [ + { + lane: 'lexical', + async generate(args) { + const rows = await queryLexicalCandidates({ + owner, + queryText: args.queryText, + connectionIds, + limit: args.laneCandidatePoolLimit, + }); + return { + candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })), + }; + }, + }, + { + lane: 'dictionary', + async generate(args) { + const rows = await queryDictionaryCandidates({ + owner, + queryText: args.queryText, + connectionIds, + limit: args.laneCandidatePoolLimit, + }); + for (const row of rows) { + dictionaryEvidence.set(row.id, row.matches); + } + return { + candidates: rows.map((row) => ({ + id: row.id, + rank: row.rank, + rawScore: row.rawScore, + evidence: row.matches, + })), + }; + }, + }, + { + lane: 'token', + async generate(args) { + const rows = tokenLaneCandidates(candidates, args.normalizedQuery.terms).slice( + 0, + args.laneCandidatePoolLimit, + ); + return { + candidates: rows.map((row, index) => ({ + id: candidateKey(row.candidate.summary), + rank: index + 1, + rawScore: row.score, + })), + }; + }, + }, + { + lane: 'semantic', + async generate(args) { + return querySemanticCandidates({ + owner, + queryText: args.queryText, + connectionIds, + embeddingService: input.embeddingService ?? null, + dimensions, + limit: args.laneCandidatePoolLimit, + }); + }, + }, + ]; + + const fused = await core.search({ queryText: query, limit: finalLimit, generators }); + const hydrated: LocalSlSourceSearchResult[] = []; + for (const result of fused.results) { + const candidate = candidateById.get(result.id); + if (!candidate) { + continue; + } + const dictionaryMatches = dictionaryEvidence.get(result.id); + hydrated.push({ + ...candidate.summary, + score: result.score, + matchReasons: result.matchReasons as SlSearchMatchReason[], + ...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}), + lanes: fused.lanes, + }); + } + return hydrated; + } finally { + await owner.stop(); + } +} diff --git a/packages/context/src/sl/ports.ts b/packages/context/src/sl/ports.ts new file mode 100644 index 00000000..dbad7d02 --- /dev/null +++ b/packages/context/src/sl/ports.ts @@ -0,0 +1,53 @@ +import type { SemanticLayerQueryInput, SemanticLayerSource } from './types.js'; + +export interface KloConnectionInfo { + id: string; + name: string; + connectionType: string; +} + +export interface KloQueryResult { + headers?: string[]; + rows?: unknown[][]; + totalRows?: number; +} + +export interface SlConnectionCatalogPort { + listEnabledConnections(ids: string[]): Promise; + getConnectionById(connectionId: string): Promise; + executeQuery(connectionId: string, sql: string): Promise; +} + +export interface SlPythonPort { + validateSources(input: { + sources: SemanticLayerSource[]; + dialect: string; + recently_touched?: string[]; + }): Promise<{ + data?: { errors?: string[]; warnings?: string[]; per_source_warnings?: Record } | null; + error?: unknown; + }>; + query(input: { + sources: SemanticLayerSource[]; + query: SemanticLayerQueryInput; + dialect: string; + }): Promise<{ data?: { sql?: string; plan?: Record } | null; error?: unknown }>; +} + +export interface SlSourcesIndexPort { + upsertSources( + connectionId: string, + sources: Array<{ sourceName: string; searchText: string; embedding: number[] | null; contentHash?: string | null }>, + ): Promise; + getExistingSearchTexts(connectionId: string): Promise>; + deleteStale(connectionId: string, keepNames: string[]): Promise; + deleteByConnection(connectionId: string): Promise; + deleteByConnectionAndName(connectionId: string, sourceName: string): Promise; + search( + connectionId: string, + queryEmbedding: number[] | null, + queryText: string, + limit: number, + minRrfScore?: number, + ): Promise>; +} diff --git a/packages/context/src/sl/schemas.ts b/packages/context/src/sl/schemas.ts new file mode 100644 index 00000000..f5232b1d --- /dev/null +++ b/packages/context/src/sl/schemas.ts @@ -0,0 +1,149 @@ +import { z } from 'zod'; + +// Literal vocabularies — kept in lockstep with the Python Pydantic model at +// python-service/klo-sl/semantic_layer/models.py (SourceColumn / ColumnRole / +// ColumnVisibility / JoinDeclaration). If these diverge, YAMLs can pass +// TypeScript validation at ingest time but fail Python loading at query time. +const columnTypeValues = ['string', 'number', 'time', 'boolean'] as const; +const columnRoleValues = ['time', 'default'] as const; +const columnVisibilityValues = ['public', 'internal', 'hidden'] as const; +const joinRelationshipValues = ['many_to_one', 'one_to_many', 'one_to_one'] as const; + +const slMeasureDefinitionSchema = z.object({ + name: z.string().min(1), + expr: z.string().min(1), + filter: z.string().optional(), + segments: z.array(z.string().min(1)).optional(), + description: z.string().optional(), +}); + +const segmentDefinitionSchema = z.object({ + name: z.string().min(1), + expr: z.string().min(1), + description: z.string().optional(), +}); + +const defaultTimeDimensionDbtSchema = z.object({ + dbt: z.string().optional(), +}); + +const dbtColumnConstraintsSchema = z.object({ + not_null: z.boolean().optional(), + unique: z.boolean().optional(), +}); + +const dbtDataTestRefSchema = z.object({ + name: z.string().min(1), + package: z.string().min(1), + kwargs: z.record(z.string(), z.unknown()).optional(), +}); + +const dbtColumnTestsSchema = z.object({ + dbt: z.array(dbtDataTestRefSchema).optional(), + dbt_by_package: z.record(z.string(), z.array(z.string().min(1))).optional(), +}); + +const sourceKeyedStringArraySchema = z.object({ + dbt: z.array(z.string().min(1)).optional(), +}); + +const sourceKeyedColumnConstraintsSchema = z.object({ + dbt: dbtColumnConstraintsSchema.optional(), +}); + +const freshnessDbtSchema = z.object({ + raw: z.unknown().optional(), + loaded_at_field: z.string().nullable().optional(), +}); + +const sourceFreshnessSchema = z.object({ + dbt: freshnessDbtSchema.optional(), +}); + +const joinDeclarationSchema = z.object({ + to: z.string().min(1), + on: z.string().min(1), + relationship: z.enum(joinRelationshipValues), + alias: z.string().optional(), +}); + +const sourceColumnSchema = z.object({ + name: z.string().min(1), + // type/description optional on standalone sources: compose-time enrichment fills them + // from the manifest entry named in `inherits_columns_from`. If the agent does not set + // `inherits_columns_from`, or the column is not in the manifest, type must be present + // — surfaced by sl_validate. + type: z.enum(columnTypeValues).optional(), + role: z.enum(columnRoleValues).optional(), + visibility: z.enum(columnVisibilityValues).optional(), + description: z.string().optional(), + expr: z.string().optional(), + constraints: sourceKeyedColumnConstraintsSchema.optional(), + enum_values: sourceKeyedStringArraySchema.optional(), + tests: dbtColumnTestsSchema.optional(), +}); + +/** Overlay column: type requires expr (structural types are inherited from manifest). */ +const overlayColumnSchema = z + .object({ + name: z.string().min(1), + type: z.enum(columnTypeValues).optional(), + role: z.enum(columnRoleValues).optional(), + visibility: z.enum(columnVisibilityValues).optional(), + description: z.string().optional(), + expr: z.string().optional(), + }) + .refine((col) => !col.type || col.expr, { + message: "Overlay column with 'type' must also have 'expr' (only computed columns may specify a type)", + }); + +/** Standalone source: has `table` or `sql`, requires grain + columns. */ +export const sourceDefinitionSchema = z + .object({ + name: z.string().min(1), + description: z.string().optional(), + // Accepted for documentation parity with the Python spec; behavior is driven + // by the `table` / `sql` fields, not by this discriminator. + source_type: z.enum(['table', 'sql']).optional(), + table: z.string().optional(), + sql: z.string().optional(), + // Manifest key (e.g. "CONSIGNMENTS") whose column metadata fills any blank + // type/descriptions/role on this source's columns at compose time. Lets the + // agent write `columns: [{name: FOO}]` instead of redeclaring known fields. + // Lookup is fuzzy: bare key, fully-qualified table path, or any suffix all match. + inherits_columns_from: z.string().optional(), + grain: z.array(z.string()).min(1), + columns: z.array(sourceColumnSchema).default([]), + joins: z.array(joinDeclarationSchema).default([]), + measures: z.array(slMeasureDefinitionSchema).default([]), + segments: z.array(segmentDefinitionSchema).optional(), + default_time_dimension: defaultTimeDimensionDbtSchema.optional(), + tags: sourceKeyedStringArraySchema.optional(), + freshness: sourceFreshnessSchema.optional(), + }) + .strict() + .refine((s) => (s.table || s.sql) && !(s.table && s.sql), { + message: "Standalone source must have exactly one of 'table' or 'sql' (not both)", + }); + +/** Overlay source: no table/sql, all fields optional except name. */ +export const sourceOverlaySchema = z + .object({ + name: z.string().min(1), + description: z.string().optional(), + descriptions: z.record(z.string(), z.string()).optional(), + grain: z.array(z.string()).optional(), + columns: z.array(overlayColumnSchema).optional(), + joins: z.array(joinDeclarationSchema).optional(), + measures: z.array(slMeasureDefinitionSchema).optional(), + segments: z.array(segmentDefinitionSchema).optional(), + exclude_columns: z.array(z.string()).optional(), + disable_joins: z.array(z.string()).optional(), + default_time_dimension: defaultTimeDimensionDbtSchema.optional(), + }) + .strict(); + +/** Returns true if the source data is an overlay (no table/sql field). */ +export function isOverlaySource(source: Record): boolean { + return !source.table && !source.sql; +} diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts new file mode 100644 index 00000000..5d7413ac --- /dev/null +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -0,0 +1,678 @@ +import type { Mock } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + composeOverlay, + enrichColumnsFromManifest, + findDanglingSegmentRefs, + SemanticLayerService, +} from './semantic-layer.service.js'; +import { sourceDefinitionSchema } from './schemas.js'; +import type { SemanticLayerSource } from './types.js'; + +const pythonPort = { + validateSources: vi.fn(), + generateSources: vi.fn(), + query: vi.fn(), +}; + +function connectionCatalog(connectionType = 'SNOWFLAKE') { + return { + listEnabledConnections: vi.fn().mockResolvedValue([]), + getConnectionById: vi.fn().mockResolvedValue({ id: 'conn-1', name: 'conn-1', connectionType }), + executeQuery: vi.fn(), + }; +} + +const baseTable: SemanticLayerSource = { + name: 'fct_labs', + grain: ['lab_order_id'], + table: 'analytics.fct_labs', + columns: [ + { name: 'lab_order_id', type: 'string' }, + { name: 'admin_user_id', type: 'string' }, + { name: 'lab_type', type: 'string' }, + ], + joins: [], + measures: [], +}; + +describe('composeOverlay', () => { + it('carries top-level segments from overlay into the composed source', () => { + const overlay = { + name: 'fct_labs', + segments: [{ name: 'byol', expr: "lab_type = 'byol'", description: 'BYOL cohort' }], + }; + const composed = composeOverlay(baseTable, overlay); + expect(composed.segments).toHaveLength(1); + expect(composed.segments?.[0].name).toBe('byol'); + expect(composed.segments?.[0].expr).toBe("lab_type = 'byol'"); + }); + + it('preserves measure-level segments references', () => { + const overlay = { + name: 'fct_labs', + segments: [{ name: 'byol', expr: "lab_type = 'byol'" }], + measures: [ + { + name: 'byol_subscriber_count', + expr: 'count(distinct admin_user_id)', + segments: ['byol'], + description: 'BYOL subscribers', + }, + ], + }; + const composed = composeOverlay(baseTable, overlay); + expect(composed.measures).toHaveLength(1); + expect(composed.measures[0].segments).toEqual(['byol']); + }); + + it('leaves base segments unchanged when overlay does not specify segments', () => { + const baseWithSegments: SemanticLayerSource = { + ...baseTable, + segments: [{ name: 'pre_existing', expr: 'is_paid = true' }], + }; + const overlay = { name: 'fct_labs', description: 'no segments here' }; + const composed = composeOverlay(baseWithSegments, overlay); + expect(composed.segments).toEqual([{ name: 'pre_existing', expr: 'is_paid = true' }]); + }); + + it('replaces base segments when overlay provides its own (even an empty array)', () => { + const baseWithSegments: SemanticLayerSource = { + ...baseTable, + segments: [{ name: 'pre_existing', expr: 'is_paid = true' }], + }; + const overlay = { name: 'fct_labs', segments: [] }; + const composed = composeOverlay(baseWithSegments, overlay); + expect(composed.segments).toEqual([]); + }); + + it('throws on unknown top-level overlay keys with a pointed error', () => { + const overlay = { name: 'fct_labs', frobnicate: true }; + expect(() => composeOverlay(baseTable, overlay)).toThrow( + /overlay for 'fct_labs' has unhandled keys \[frobnicate\]/, + ); + }); + + it('lists every unknown key in the error message, not just the first', () => { + const overlay = { name: 'fct_labs', foo: 1, bar: 2 }; + expect(() => composeOverlay(baseTable, overlay)).toThrow(/foo, bar/); + }); + + it('still handles existing known keys without regression', () => { + const overlay = { + name: 'fct_labs', + description: 'patient lab orders', + exclude_columns: ['admin_user_id'], + columns: [{ name: 'is_byol', type: 'boolean', expr: "lab_type = 'byol'" }], + measures: [{ name: 'count_all', expr: 'count(*)' }], + }; + const composed = composeOverlay(baseTable, overlay); + expect(composed.columns.find((c) => c.name === 'admin_user_id')).toBeUndefined(); + expect(composed.columns.find((c) => c.name === 'is_byol')).toBeDefined(); + expect(composed.measures).toHaveLength(1); + }); + + it('merges overlay descriptions (plural) with base descriptions keyed by source', () => { + const baseWithDescriptions: SemanticLayerSource = { + ...baseTable, + descriptions: { db: 'scan-derived description', ai: 'AI description' }, + }; + const overlay = { + name: 'fct_labs', + descriptions: { dbt: 'dbt description', ai: 'AI description (overridden)' }, + }; + const composed = composeOverlay(baseWithDescriptions, overlay); + expect(composed.descriptions).toEqual({ + db: 'scan-derived description', + ai: 'AI description (overridden)', + dbt: 'dbt description', + }); + }); +}); + +describe('enrichColumnsFromManifest', () => { + const manifest: SemanticLayerSource = { + name: 'CONSIGNMENTS', + table: 'ANALYTICS.MARTS.CONSIGNMENTS', + grain: ['CONSIGNED_ITEM_ID'], + columns: [ + { + name: 'CONSIGNED_ITEM_ID', + type: 'string', + descriptions: { ai: 'Unique identifier for the consigned item record.' }, + }, + { + name: 'CASH_ADV_AMOUNT', + type: 'number', + descriptions: { ai: 'Amount of cash advance disbursed to consigners.' }, + }, + { + name: 'CONSIGNMENT_CREATED_AT', + type: 'time', + role: 'time', + descriptions: { ai: 'Timestamp when the consignment was created.' }, + }, + ], + joins: [], + measures: [], + }; + + it('fills blank type and descriptions on source columns from the manifest', () => { + const source: SemanticLayerSource = { + name: 'aav_consignments', + sql: 'SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT FROM MARTS.CONSIGNMENTS WHERE ...', + inherits_columns_from: 'CONSIGNMENTS', + grain: ['CONSIGNED_ITEM_ID'], + columns: [ + { name: 'CONSIGNED_ITEM_ID', type: '' }, + { name: 'CASH_ADV_AMOUNT', type: '' }, + ], + joins: [], + measures: [], + }; + const enriched = enrichColumnsFromManifest(source, manifest); + expect(enriched.columns[0]).toEqual({ + name: 'CONSIGNED_ITEM_ID', + type: 'string', + descriptions: { ai: 'Unique identifier for the consigned item record.' }, + }); + expect(enriched.columns[1]).toEqual({ + name: 'CASH_ADV_AMOUNT', + type: 'number', + descriptions: { ai: 'Amount of cash advance disbursed to consigners.' }, + }); + }); + + it('preserves a local description if the source already declared one', () => { + const source: SemanticLayerSource = { + name: 'aav_consignments', + sql: 'SELECT CONSIGNED_ITEM_ID FROM ...', + inherits_columns_from: 'CONSIGNMENTS', + grain: ['CONSIGNED_ITEM_ID'], + columns: [ + { + name: 'CONSIGNED_ITEM_ID', + type: 'string', + descriptions: { ai: 'AAV-specific note: always non-null in this filtered view.' }, + }, + ], + joins: [], + measures: [], + }; + const enriched = enrichColumnsFromManifest(source, manifest); + expect(enriched.columns[0].descriptions).toEqual({ + ai: 'AAV-specific note: always non-null in this filtered view.', + }); + }); + + it('passes through columns absent from the manifest unchanged', () => { + const source: SemanticLayerSource = { + name: 'aav_consignments', + sql: 'SELECT ALT_VALUE_COMBINED, my_derived FROM ...', + inherits_columns_from: 'CONSIGNMENTS', + grain: ['CONSIGNED_ITEM_ID'], + columns: [{ name: 'my_derived', type: 'number', expr: 'CASH_ADV_AMOUNT * 2' }], + joins: [], + measures: [], + }; + const enriched = enrichColumnsFromManifest(source, manifest); + expect(enriched.columns[0]).toEqual({ + name: 'my_derived', + type: 'number', + expr: 'CASH_ADV_AMOUNT * 2', + }); + }); + + it('copies role from the manifest when the source omits it', () => { + const source: SemanticLayerSource = { + name: 'aav_consignments', + sql: 'SELECT CONSIGNMENT_CREATED_AT FROM ...', + inherits_columns_from: 'CONSIGNMENTS', + grain: ['CONSIGNED_ITEM_ID'], + columns: [{ name: 'CONSIGNMENT_CREATED_AT', type: '' }], + joins: [], + measures: [], + }; + const enriched = enrichColumnsFromManifest(source, manifest); + expect(enriched.columns[0].role).toBe('time'); + expect(enriched.columns[0].type).toBe('time'); + }); + + it('returns the source unchanged when manifestEntry is null/undefined', () => { + const source: SemanticLayerSource = { + name: 'aav_consignments', + sql: 'SELECT FOO FROM ...', + grain: ['FOO'], + columns: [{ name: 'FOO', type: '' }], + joins: [], + measures: [], + }; + const enriched = enrichColumnsFromManifest(source, null); + expect(enriched).toEqual(source); + }); +}); + +describe('sourceDefinitionSchema', () => { + it('preserves dbt structural metadata fields used by manifest-backed SL readers', () => { + const result = sourceDefinitionSchema.safeParse({ + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { + name: 'status', + type: 'string', + constraints: { dbt: { not_null: true, unique: true } }, + enum_values: { dbt: ['placed', 'shipped'] }, + tests: { + dbt: [{ name: 'accepted_values', package: 'dbt' }], + dbt_by_package: { dbt: ['accepted_values'] }, + }, + }, + ], + joins: [], + measures: [], + tags: { dbt: ['mart', 'finance'] }, + freshness: { dbt: { loaded_at_field: 'updated_at', raw: { warn_after: { count: 12, period: 'hour' } } } }, + default_time_dimension: { dbt: 'updated_at' }, + }); + + expect(result.success).toBe(true); + if (!result.success) { + return; + } + expect(result.data.columns[0]).toMatchObject({ + constraints: { dbt: { not_null: true, unique: true } }, + enum_values: { dbt: ['placed', 'shipped'] }, + tests: { + dbt: [{ name: 'accepted_values', package: 'dbt' }], + dbt_by_package: { dbt: ['accepted_values'] }, + }, + }); + expect(result.data.tags).toEqual({ dbt: ['mart', 'finance'] }); + expect(result.data.freshness).toEqual({ + dbt: { loaded_at_field: 'updated_at', raw: { warn_after: { count: 12, period: 'hour' } } }, + }); + }); +}); + +describe('findManifestEntryByTableRef', () => { + let configService: { + listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>; + readFile: Mock<(path: string) => Promise<{ content: string }>>; + }; + let service: SemanticLayerService; + + beforeEach(() => { + configService = { + listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>().mockResolvedValue({ + files: ['semantic-layer/conn-1/_schema/marts.yaml'], + }), + readFile: vi.fn<(path: string) => Promise<{ content: string }>>().mockResolvedValue({ + content: [ + 'tables:', + ' CONSIGNMENTS:', + ' table: ANALYTICS.MARTS.CONSIGNMENTS', + ' columns:', + ' - { name: CONSIGNED_ITEM_ID, type: string, pk: true }', + ].join('\n'), + }), + }; + service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort); + }); + + it('finds by exact bare manifest key', async () => { + const entry = await service.findManifestEntryByTableRef('conn-1', 'CONSIGNMENTS'); + expect(entry?.name).toBe('CONSIGNMENTS'); + }); + + it('finds by fully-qualified table path', async () => { + const entry = await service.findManifestEntryByTableRef('conn-1', 'ANALYTICS.MARTS.CONSIGNMENTS'); + expect(entry?.name).toBe('CONSIGNMENTS'); + }); + + it('finds by schema-qualified suffix', async () => { + const entry = await service.findManifestEntryByTableRef('conn-1', 'MARTS.CONSIGNMENTS'); + expect(entry?.name).toBe('CONSIGNMENTS'); + }); + + it('matches case-insensitively on table path', async () => { + const entry = await service.findManifestEntryByTableRef('conn-1', 'analytics.marts.consignments'); + expect(entry?.name).toBe('CONSIGNMENTS'); + }); + + it('returns null when nothing matches', async () => { + const entry = await service.findManifestEntryByTableRef('conn-1', 'NOT_A_TABLE'); + expect(entry).toBeNull(); + }); +}); + +describe('loadAllSources — standalone enrichment via inherits_columns_from', () => { + let configService: { + listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>; + readFile: Mock<(path: string) => Promise<{ content: string }>>; + }; + let service: SemanticLayerService; + + beforeEach(() => { + configService = { + listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>(), + readFile: vi.fn<(path: string) => Promise<{ content: string }>>(), + }; + service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort); + }); + + it('preserves dbt metadata when projecting manifest-backed sources', async () => { + const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml'; + configService.listFiles.mockImplementation((dir: string) => { + if (dir === 'semantic-layer/conn-1' || dir === 'semantic-layer/conn-1/_schema') { + return Promise.resolve({ files: [schemaPath] }); + } + return Promise.resolve({ files: [] }); + }); + configService.readFile.mockResolvedValue({ + content: [ + 'tables:', + ' orders:', + ' table: public.orders', + ' tags: { dbt: [mart] }', + ' freshness:', + ' dbt:', + ' loaded_at_field: updated_at', + ' columns:', + ' - name: status', + ' type: string', + ' constraints: { dbt: { not_null: true } }', + ' enum_values: { dbt: [placed, shipped] }', + ' tests:', + ' dbt:', + ' - { name: accepted_values, package: dbt }', + ].join('\n'), + }); + + const sources = await service.loadAllSources('conn-1'); + + expect(sources[0]).toMatchObject({ + name: 'orders', + tags: { dbt: ['mart'] }, + freshness: { dbt: { loaded_at_field: 'updated_at' } }, + columns: [ + { + name: 'status', + constraints: { dbt: { not_null: true } }, + enum_values: { dbt: ['placed', 'shipped'] }, + tests: { dbt: [{ name: 'accepted_values', package: 'dbt' }] }, + }, + ], + }); + }); + + it('fills blank columns on a standalone source from the manifest entry it points at', async () => { + const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml'; + const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml'; + + configService.listFiles.mockImplementation((dir: string) => { + if (dir === 'semantic-layer/conn-1') { + return Promise.resolve({ files: [schemaPath, standalonePath] }); + } + if (dir === 'semantic-layer/conn-1/_schema') { + return Promise.resolve({ files: [schemaPath] }); + } + return Promise.resolve({ files: [] }); + }); + configService.readFile.mockImplementation((path: string) => { + if (path === schemaPath) { + return Promise.resolve({ + content: [ + 'tables:', + ' CONSIGNMENTS:', + ' table: ANALYTICS.MARTS.CONSIGNMENTS', + ' columns:', + ' - name: CONSIGNED_ITEM_ID', + ' type: string', + ' descriptions: { ai: "Unique consigned-item id." }', + ' - name: CASH_ADV_AMOUNT', + ' type: number', + ' descriptions: { ai: "Cash advance amount." }', + ].join('\n'), + }); + } + if (path === standalonePath) { + return Promise.resolve({ + content: [ + 'name: aav_consignments', + 'sql: |', + ' SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT FROM ANALYTICS.MARTS.CONSIGNMENTS WHERE x', + 'inherits_columns_from: CONSIGNMENTS', + 'grain: [CONSIGNED_ITEM_ID]', + 'columns:', + ' - { name: CONSIGNED_ITEM_ID }', + ' - { name: CASH_ADV_AMOUNT }', + ].join('\n'), + }); + } + return Promise.reject(new Error(`Unexpected readFile: ${path}`)); + }); + + const sources = await service.loadAllSources('conn-1'); + const aav = sources.find((s) => s.name === 'aav_consignments'); + expect(aav).toBeDefined(); + expect(aav?.columns).toEqual([ + { name: 'CONSIGNED_ITEM_ID', type: 'string', descriptions: { ai: 'Unique consigned-item id.' } }, + { name: 'CASH_ADV_AMOUNT', type: 'number', descriptions: { ai: 'Cash advance amount.' } }, + ]); + }); + + it('accepts a fully-qualified path in inherits_columns_from', async () => { + const schemaPath = 'semantic-layer/conn-1/_schema/marts.yaml'; + const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml'; + configService.listFiles.mockImplementation((dir: string) => { + if (dir === 'semantic-layer/conn-1') { + return Promise.resolve({ files: [schemaPath, standalonePath] }); + } + if (dir === 'semantic-layer/conn-1/_schema') { + return Promise.resolve({ files: [schemaPath] }); + } + return Promise.resolve({ files: [] }); + }); + configService.readFile.mockImplementation((path: string) => { + if (path === schemaPath) { + return Promise.resolve({ + content: [ + 'tables:', + ' CONSIGNMENTS:', + ' table: ANALYTICS.MARTS.CONSIGNMENTS', + ' columns:', + ' - { name: CONSIGNED_ITEM_ID, type: string }', + ].join('\n'), + }); + } + return Promise.resolve({ + content: [ + 'name: aav_consignments', + 'sql: SELECT 1', + 'inherits_columns_from: ANALYTICS.MARTS.CONSIGNMENTS', + 'grain: [CONSIGNED_ITEM_ID]', + 'columns:', + ' - { name: CONSIGNED_ITEM_ID }', + ].join('\n'), + }); + }); + + const sources = await service.loadAllSources('conn-1'); + const aav = sources.find((s) => s.name === 'aav_consignments'); + expect(aav?.columns[0].type).toBe('string'); + }); + + it('passes the source through unchanged if inherits_columns_from misses', async () => { + const standalonePath = 'semantic-layer/conn-1/aav_consignments.yaml'; + configService.listFiles.mockImplementation((dir: string) => { + if (dir === 'semantic-layer/conn-1') { + return Promise.resolve({ files: [standalonePath] }); + } + return Promise.resolve({ files: [] }); + }); + configService.readFile.mockResolvedValue({ + content: [ + 'name: aav_consignments', + 'sql: SELECT 1', + 'inherits_columns_from: NO_SUCH_TABLE', + 'grain: [FOO]', + 'columns:', + ' - { name: FOO, type: string }', + ].join('\n'), + }); + + const sources = await service.loadAllSources('conn-1'); + const aav = sources.find((s) => s.name === 'aav_consignments'); + expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]); + }); +}); + +describe('validateWithProposedSource', () => { + let configService: { + listFiles: Mock<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>; + readFile: Mock<(path: string) => Promise<{ content: string }>>; + }; + let service: SemanticLayerService; + + beforeEach(() => { + pythonPort.validateSources.mockReset(); + configService = { + listFiles: vi.fn<(dir: string, recursive?: boolean) => Promise<{ files: string[] }>>().mockResolvedValue({ + files: [], + }), + readFile: vi.fn<(path: string) => Promise<{ content: string }>>(), + }; + service = new SemanticLayerService(configService as never, connectionCatalog('BIGQUERY'), pythonPort); + }); + + it('uses the connection warehouse dialect, not hardcoded postgres', async () => { + pythonPort.validateSources.mockResolvedValue({ + data: { errors: [], warnings: [] }, + }); + + await service.validateWithProposedSource('conn-1', { + name: 'std', + table: 'analytics.std', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [], + }); + + expect(pythonPort.validateSources).toHaveBeenCalledWith( + expect.objectContaining({ + dialect: 'bigquery', + }), + ); + }); + + it('composes a bare overlay with its manifest base before validating', async () => { + const schemaPath = 'semantic-layer/conn-1/_schema/core.yaml'; + const listFilesImpl = (dir: string): Promise<{ files: string[] }> => { + if (dir === 'semantic-layer/conn-1') { + return Promise.resolve({ files: [schemaPath, 'semantic-layer/conn-1/fct_orders.yaml'] }); + } + if (dir === 'semantic-layer/conn-1/_schema') { + return Promise.resolve({ files: [schemaPath] }); + } + return Promise.resolve({ files: [] }); + }; + const readFileImpl = (path: string): Promise<{ content: string }> => { + if (path === schemaPath) { + return Promise.resolve({ + content: [ + 'tables:', + ' fct_orders:', + ' table: analytics.fct_orders', + ' columns:', + ' - { name: id, type: string, pk: true }', + ' - { name: amount, type: number }', + ].join('\n'), + }); + } + if (path === 'semantic-layer/conn-1/fct_orders.yaml') { + return Promise.resolve({ content: 'name: fct_orders\nmeasures: []\n' }); + } + return Promise.reject(new Error(`Unexpected readFile: ${path}`)); + }; + configService.listFiles.mockImplementation(listFilesImpl); + configService.readFile.mockImplementation(readFileImpl); + + pythonPort.validateSources.mockResolvedValue({ + data: { errors: [], warnings: [] }, + }); + + const overlay: SemanticLayerSource = { + name: 'fct_orders', + grain: ['id'], + columns: [], + joins: [], + measures: [{ name: 'total_amount', expr: 'sum(amount)' }], + }; + + await service.validateWithProposedSource('conn-1', overlay); + + expect(pythonPort.validateSources).toHaveBeenCalledTimes(1); + const sources = (pythonPort.validateSources.mock.calls[0][0]?.sources ?? []) as Array>; + const composed = sources.find((s) => s.name === 'fct_orders'); + expect(composed).toBeDefined(); + expect(composed?.table).toBe('analytics.fct_orders'); + expect(composed?.measures).toEqual([{ name: 'total_amount', expr: 'sum(amount)' }]); + }); + + it('returns a pointed error when a bare overlay has no manifest base', async () => { + configService.listFiles.mockResolvedValue({ files: [] }); + + const overlay: SemanticLayerSource = { + name: 'orphan', + grain: [], + columns: [], + joins: [], + measures: [], + }; + + const result = await service.validateWithProposedSource('conn-1', overlay); + expect(result.errors[0]).toMatch(/Overlay 'orphan' has no matching manifest entry/); + expect(pythonPort.validateSources).not.toHaveBeenCalled(); + }); +}); + +describe('findDanglingSegmentRefs', () => { + it('returns empty when every measure segment resolves', () => { + const source = { + segments: [{ name: 'byol' }, { name: 'paid' }], + measures: [ + { name: 'byol_count', segments: ['byol'] }, + { name: 'paid_count', segments: ['paid', 'byol'] }, + ], + }; + expect(findDanglingSegmentRefs(source)).toEqual([]); + }); + + it('flags measures whose segment reference does not exist on the source', () => { + const source = { + segments: [{ name: 'byol' }], + measures: [{ name: 'broken', segments: ['byol', 'missing'] }], + }; + const refs = findDanglingSegmentRefs(source); + expect(refs).toHaveLength(1); + expect(refs[0]).toMatch(/measure 'broken' references unknown segment 'missing'/); + }); + + it('flags when a source has zero segments but measures reference one', () => { + const source = { + measures: [{ name: 'broken', segments: ['byol'] }], + }; + const refs = findDanglingSegmentRefs(source); + expect(refs).toHaveLength(1); + expect(refs[0]).toMatch(/unknown segment 'byol'/); + }); + + it('is a no-op for sources with no measures or no segment references', () => { + expect(findDanglingSegmentRefs({ measures: [{ name: 'simple', expr: 'count(*)' }] })).toEqual([]); + expect(findDanglingSegmentRefs({})).toEqual([]); + }); +}); diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts new file mode 100644 index 00000000..a1039228 --- /dev/null +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -0,0 +1,1130 @@ +import YAML from 'yaml'; +import type { KloFileStorePort, KloLogger } from '../core/index.js'; +import { noopLogger } from '../core/index.js'; +import type { SlConnectionCatalogPort, SlPythonPort } from './ports.js'; +import { isOverlaySource, sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js'; +import type { SemanticLayerQueryExecutionResult, SemanticLayerQueryInput, SemanticLayerSource } from './types.js'; + +interface WriteSourceOptions { + skipValidation?: boolean; +} + +const SL_DIR_PREFIX = 'semantic-layer'; + +function formatPortError(error: unknown, fallback: string): string { + if (typeof error === 'string') { + return error; + } + if (error && typeof error === 'object') { + const detail = 'detail' in error ? error.detail : undefined; + if (typeof detail === 'string') { + return detail; + } + if (Array.isArray(detail)) { + return detail + .map((entry) => { + const loc = entry && typeof entry === 'object' && 'loc' in entry && Array.isArray(entry.loc) ? entry.loc : []; + const msg = entry && typeof entry === 'object' && 'msg' in entry ? String(entry.msg) : String(entry); + return `${loc.join('.')}: ${msg}`; + }) + .join('; '); + } + return JSON.stringify(error); + } + return fallback; +} + +export class SemanticLayerService { + constructor( + private readonly configService: KloFileStorePort, + private readonly connections: SlConnectionCatalogPort, + private readonly python: SlPythonPort, + private readonly logger: KloLogger = noopLogger, + ) {} + + /** + * Return a clone of this service whose disk reads/writes go through a worktree-scoped + * ConfigService. Used by the memory agent so SL tool reads inside the LLM loop see + * session-branch state (otherwise `sl_edit`/`sl_validate` would race against main). + */ + forWorktree(workdir: string): SemanticLayerService { + return new SemanticLayerService( + this.configService.forWorktree(workdir) as KloFileStorePort, + this.connections, + this.python, + this.logger, + ); + } + + async listConnectionIds(): Promise { + try { + const result = await this.configService.listFiles(SL_DIR_PREFIX); + // Directories under semantic-layer/ are connectionIds (UUIDs) + const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + return result.files + .map((f) => f.replace(`${SL_DIR_PREFIX}/`, '').split('/')[0]) + .filter((name, i, arr) => uuidPattern.test(name) && arr.indexOf(name) === i); + } catch { + return []; + } + } + + async listConnectionIdsWithNames(): Promise> { + const ids = await this.listConnectionIds(); + if (ids.length === 0) { + return []; + } + return this.connections.listEnabledConnections(ids); + } + + // ── YAML File Operations ──────────────────────────────── + + private sourcePath(connectionId: string, sourceName: string): string { + return `${SL_DIR_PREFIX}/${connectionId}/${sourceName}.yaml`; + } + + async writeSource( + connectionId: string, + source: SemanticLayerSource, + author: string, + authorEmail: string, + commitMessage?: string, + options?: WriteSourceOptions & { skipLock?: boolean }, + ) { + // Writes are intentionally permissive — the agent must be able to save broken files so + // it can iterate on them with punctual edits (Claude-Code-style). Validation happens on + // demand via `sl_validate` and at query time (where invalid sources should be skipped + // rather than poisoning the whole connection's catalog). Issues found here are logged + // as warnings so the caller can surface them without blocking the save. The same + // warnings are returned to the caller so tool-facing wrappers can surface them to the + // LLM and enable same-turn self-correction. + const warnings: string[] = []; + + if (!options?.skipValidation) { + const sourceData: Record = { ...source }; + + if ((sourceData.table || sourceData.sql) && (await this.isManifestBacked(connectionId, source.name))) { + const msg = + `standalone source '${source.name}' shadows an existing manifest entry and ` + + `will drop the manifest's columns and joins. Rewrite as an overlay: remove ` + + `"sql:", "table:", "grain:", "columns:", "joins:"; keep only "name:" plus ` + + `"measures:"/"segments:"/"description:"`; + warnings.push(msg); + this.logger.warn(`[writeSource] ${msg}. Saving anyway.`); + } + + const schema = isOverlaySource(sourceData) ? sourceOverlaySchema : sourceDefinitionSchema; + const parsed = schema.safeParse(source); + if (!parsed.success) { + const issues = parsed.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; '); + warnings.push(`schema: ${issues}`); + this.logger.warn(`[writeSource] schema validation warnings for '${source.name}': ${issues}. Saving anyway.`); + } + + const danglingRefs = findDanglingSegmentRefs(sourceData); + if (danglingRefs.length > 0) { + warnings.push(...danglingRefs); + this.logger.warn(`[writeSource] '${source.name}': ${danglingRefs.join('; ')}. Saving anyway.`); + } + } + + const path = this.sourcePath(connectionId, source.name); + const content = YAML.stringify(source, { indent: 2, lineWidth: 0 }); + const message = commitMessage ?? `Update semantic layer source: ${source.name}`; + const result = await this.configService.writeFile(path, content, author, authorEmail, message, { + skipLock: options?.skipLock, + }); + return { ...result, warnings }; + } + + async readSourceFile(connectionId: string, sourceName: string): Promise<{ content: string; path: string }> { + const path = this.sourcePath(connectionId, sourceName); + const result = await this.configService.readFile(path); + return { content: result.content, path }; + } + + async loadSource(connectionId: string, sourceName: string): Promise { + try { + const { content } = await this.readSourceFile(connectionId, sourceName); + return YAML.parse(content) as SemanticLayerSource; + } catch { + return null; + } + } + + async loadAllSources(connectionId: string): Promise { + const dir = `${SL_DIR_PREFIX}/${connectionId}`; + const schemaDir = `${dir}/_schema`; + + let allFiles: string[]; + try { + const result = await this.configService.listFiles(dir); + allFiles = result.files.filter((f) => f.endsWith('.yaml')); + } catch { + return []; + } + + // 1. Load manifest shards from _schema/*.yaml → project to sources + const sources = new Map(); + const schemaFiles = allFiles.filter((f) => f.startsWith(`${schemaDir}/`)); + + for (const filePath of schemaFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const shard = YAML.parse(content) as { tables?: Record }; + if (shard?.tables) { + for (const [name, entry] of Object.entries(shard.tables)) { + sources.set(name, projectManifestEntry(name, entry)); + } + } + } catch (e) { + this.logger.warn(`Failed to parse manifest shard ${filePath}: ${e}`); + } + } + + // 2. Load files outside _schema/ + const nonSchemaFiles = allFiles.filter((f) => !f.startsWith(`${schemaDir}/`)); + for (const filePath of nonSchemaFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const data = YAML.parse(content) as Record; + if (!data?.name) { + continue; + } + + const name = data.name as string; + + if (data.sql || data.table) { + // Standalone source — warn if it conflicts with a manifest entry + if (sources.has(name)) { + this.logger.warn(`Standalone source '${name}' in ${filePath} overrides manifest entry of the same name`); + } + let standalone: SemanticLayerSource = { + ...(data as Partial), + name, + grain: Array.isArray(data.grain) ? (data.grain as string[]) : [], + columns: Array.isArray(data.columns) ? (data.columns as SemanticLayerSource['columns']) : [], + joins: Array.isArray(data.joins) ? (data.joins as SemanticLayerSource['joins']) : [], + measures: Array.isArray(data.measures) ? (data.measures as SemanticLayerSource['measures']) : [], + }; + // If the source declares `inherits_columns_from`, fill any blank + // type/descriptions/role from the matching manifest entry. Lets the + // agent write `columns: [{name: FOO}]` without redeclaring known fields. + const inheritFrom = typeof data.inherits_columns_from === 'string' ? data.inherits_columns_from : null; + if (inheritFrom) { + const base = await this.findManifestEntryByTableRef(connectionId, inheritFrom); + if (base) { + standalone = enrichColumnsFromManifest(standalone, base); + } else { + this.logger.warn( + `Standalone source '${name}': inherits_columns_from "${inheritFrom}" did not match any manifest entry; columns left as-authored`, + ); + } + } + sources.set(name, standalone); + } else { + // Overlay — compose with manifest entry if present + const base = sources.get(name); + if (base) { + sources.set(name, composeOverlay(base, data)); + } else { + this.logger.warn(`Orphan overlay '${name}' in ${filePath}: no matching manifest entry`); + } + } + } catch (e) { + this.logger.warn(`Failed to parse YAML file ${filePath}: ${e}`); + } + } + + return Array.from(sources.values()); + } + + /** + * Return the union of all source names visible to this connection, each tagged with + * whether it appears in the manifest and whether an overlay YAML exists for it. + * Includes "orphan overlays" (overlay file present, no manifest entry) — these are + * absent from `loadAllSources` because they can't be composed, but the UI still + * needs to surface them as warnings when referenced elsewhere. + */ + async getSourceStatuses( + connectionId: string, + ): Promise> { + const dir = `${SL_DIR_PREFIX}/${connectionId}`; + const schemaDir = `${dir}/_schema`; + const result = new Map(); + + let allFiles: string[]; + try { + const listing = await this.configService.listFiles(dir); + allFiles = listing.files.filter((f) => f.endsWith('.yaml')); + } catch { + return result; + } + + const getOrCreate = (name: string) => { + let entry = result.get(name); + if (!entry) { + entry = { inManifest: false, overlayExists: false, standalone: false }; + result.set(name, entry); + } + return entry; + }; + + const schemaFiles = allFiles.filter((f) => f.startsWith(`${schemaDir}/`)); + for (const filePath of schemaFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const shard = YAML.parse(content) as { tables?: Record }; + if (shard?.tables) { + for (const name of Object.keys(shard.tables)) { + getOrCreate(name).inManifest = true; + } + } + } catch { + // Skip unparseable shards + } + } + + const nonSchemaFiles = allFiles.filter((f) => !f.startsWith(`${schemaDir}/`)); + for (const filePath of nonSchemaFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const data = YAML.parse(content) as Record; + if (!data?.name || typeof data.name !== 'string') { + continue; + } + const entry = getOrCreate(data.name); + if (data.sql || data.table) { + entry.standalone = true; + } else { + entry.overlayExists = true; + } + } catch { + // Skip unparseable files + } + } + + return result; + } + + /** + * Return all manifest-backed source names for a connection — the set the agent may + * legitimately target with an overlay. Drives the `sl_write` orphan-overlay guardrail + * so the agent is steered toward a standalone-with-`sql:` rewrite when the name it + * picked has no base table. + */ + async listManifestSourceNames(connectionId: string): Promise { + const statuses = await this.getSourceStatuses(connectionId); + return [...statuses.entries()].filter(([, s]) => s.inManifest).map(([name]) => name); + } + + async isManifestBacked(connectionId: string, sourceName: string): Promise { + return (await this.getManifestEntry(connectionId, sourceName)) !== null; + } + + async getManifestEntry(connectionId: string, sourceName: string): Promise { + const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`; + try { + const result = await this.configService.listFiles(schemaDir); + const yamlFiles = result.files.filter((f) => f.endsWith('.yaml')); + for (const filePath of yamlFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const shard = YAML.parse(content) as { tables?: Record }; + const entry = shard?.tables?.[sourceName]; + if (entry) { + return projectManifestEntry(sourceName, entry); + } + } catch { + // skip unparseable shards + } + } + } catch { + // no schema dir + } + return null; + } + + /** + * Resolve a table reference to its manifest entry. Accepts: + * - the bare manifest key (`CONSIGNMENTS`) + * - the fully-qualified `table:` value (`ANALYTICS.MARTS.CONSIGNMENTS`) + * - any dot-suffix of the table value (`MARTS.CONSIGNMENTS`) + * + * Case-insensitive on the path comparison. Returns the projected source or null. + */ + async findManifestEntryByTableRef(connectionId: string, ref: string): Promise { + // Try exact key match first (cheap, hits the by-name index). + const exact = await this.getManifestEntry(connectionId, ref); + if (exact) { + return exact; + } + + const lowered = ref.toLowerCase(); + const dotSuffix = `.${lowered}`; + const schemaDir = `${SL_DIR_PREFIX}/${connectionId}/_schema`; + + let yamlFiles: string[]; + try { + const result = await this.configService.listFiles(schemaDir); + yamlFiles = result.files.filter((f) => f.endsWith('.yaml')); + } catch { + return null; + } + + for (const filePath of yamlFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const shard = YAML.parse(content) as { tables?: Record }; + if (!shard?.tables) { + continue; + } + for (const [name, entry] of Object.entries(shard.tables)) { + const tablePath = entry.table?.toLowerCase() ?? ''; + if (tablePath === lowered || tablePath.endsWith(dotSuffix)) { + return projectManifestEntry(name, entry); + } + } + } catch { + // skip unparseable shards + } + } + return null; + } + + async getDialectForConnection(connectionId: string): Promise { + const connection = await this.connections.getConnectionById(connectionId); + if (!connection) { + throw new Error(`Data source not found: ${connectionId}`); + } + return SemanticLayerService.mapDialect(connection.connectionType); + } + + async listSourceNames(connectionId: string): Promise { + const dir = `${SL_DIR_PREFIX}/${connectionId}`; + try { + const result = await this.configService.listFiles(dir); + return result.files.filter((f) => f.endsWith('.yaml')).map((f) => f.replace(`${dir}/`, '').replace('.yaml', '')); + } catch { + return []; + } + } + + async listFilesForConnection(connectionId: string): Promise { + const dir = `${SL_DIR_PREFIX}/${connectionId}`; + try { + const result = await this.configService.listFiles(dir, true); + return result.files.filter((f) => f.endsWith('.yaml')); + } catch { + return []; + } + } + + async readFileByPath(connectionId: string, relativePath: string): Promise<{ content: string; readOnly: boolean }> { + const fullPath = `${SL_DIR_PREFIX}/${connectionId}/${relativePath}`; + const result = await this.configService.readFile(fullPath); + return { + content: result.content, + readOnly: relativePath.startsWith('_schema/'), + }; + } + + async deleteSource(connectionId: string, sourceName: string, author: string, authorEmail: string) { + const path = this.sourcePath(connectionId, sourceName); + return this.configService.deleteFile(path, author, authorEmail, `Delete semantic layer source: ${sourceName}`); + } + + async getSourceHistory(connectionId: string, sourceName: string) { + const path = this.sourcePath(connectionId, sourceName); + return this.configService.getFileHistory(path); + } + + /** + * Validate the semantic layer state that *would* exist if `proposedSource` + * were written, without persisting anything. Used by write/edit tools to + * block invalid commits before they hit git. + */ + async validateWithProposedSource( + connectionId: string, + proposedSource: SemanticLayerSource, + ): Promise<{ errors: string[]; warnings: string[]; perSourceWarnings: Record }> { + const existing = await this.loadAllSources(connectionId); + const merged = existing.filter((s) => s.name !== proposedSource.name); + + // Overlays (no table/sql) must be composed with their manifest base before + // validation, otherwise the filter below drops them and the edited source + // escapes validation entirely. + let toPush: SemanticLayerSource = proposedSource; + if (proposedSource.table == null && proposedSource.sql == null) { + const base = await this.getManifestEntry(connectionId, proposedSource.name); + if (!base) { + return { + errors: [ + `Overlay '${proposedSource.name}' has no matching manifest entry — cannot validate. ` + + `Rewrite as a standalone source with 'table:' or 'sql:', or add a manifest shard under _schema/.`, + ], + warnings: [], + perSourceWarnings: {}, + }; + } + toPush = composeOverlay(base, { ...proposedSource }); + } else if (proposedSource.inherits_columns_from) { + const base = await this.findManifestEntryByTableRef(connectionId, proposedSource.inherits_columns_from); + if (base) { + toPush = enrichColumnsFromManifest(proposedSource, base); + } + // Miss is non-fatal — the source ships unenriched, validator will surface + // any column-without-type errors via the warehouse probe. + } + merged.push(toPush); + + const validatable = merged.filter((s) => s.table != null || s.sql != null); + if (validatable.length === 0) { + return { errors: [], warnings: [], perSourceWarnings: {} }; + } + + const dialect = await this.getDialectForConnection(connectionId); + + try { + const { data, error } = await this.python.validateSources({ + sources: validatable, + dialect, + recently_touched: [proposedSource.name], + }); + if (error) { + const errorMsg = formatPortError(error, 'Unknown validation error'); + return { errors: [errorMsg], warnings: [], perSourceWarnings: {} }; + } + if (!data) { + return { errors: [], warnings: [], perSourceWarnings: {} }; + } + return { + errors: data.errors ?? [], + warnings: data.warnings ?? [], + perSourceWarnings: data.per_source_warnings ?? {}, + }; + } catch (e) { + return { + errors: [`Validation call failed: ${e instanceof Error ? e.message : String(e)}`], + warnings: [], + perSourceWarnings: {}, + }; + } + } + + async validateSourcesForConnection(connectionId: string): Promise<{ errors: string[]; warnings: string[] }> { + const allSources = await this.loadAllSources(connectionId); + const sources = allSources.filter((source) => source.table != null || source.sql != null); + if (sources.length === 0) { + return { errors: [], warnings: [] }; + } + + const dialect = await this.getDialectForConnection(connectionId); + const { data, error } = await this.python.validateSources({ sources, dialect }); + if (error) { + return { errors: [formatPortError(error, 'Unknown validation error')], warnings: [] }; + } + if (!data) { + return { errors: [], warnings: [] }; + } + return { + errors: data.errors ?? [], + warnings: data.warnings ?? [], + }; + } + + /** + * Validate overlays and standalone sources against the current manifest. + * Returns warnings for stale references (non-blocking). + */ + async validateOverlaysAfterScan(connectionId: string): Promise { + const dir = `${SL_DIR_PREFIX}/${connectionId}`; + const schemaDir = `${dir}/_schema`; + const warnings: string[] = []; + + let allFiles: string[]; + try { + const result = await this.configService.listFiles(dir); + allFiles = result.files.filter((f) => f.endsWith('.yaml')); + } catch { + return warnings; + } + + // Load manifest entries to know what columns/joins/tables exist + const manifestColumns = new Map>(); // sourceName → column names + const manifestJoins = new Map>(); // sourceName → normalized join on clauses + const allSourceNames = new Set(); + + const schemaFiles = allFiles.filter((f) => f.startsWith(`${schemaDir}/`)); + for (const filePath of schemaFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const shard = YAML.parse(content) as { + tables?: Record; joins?: Array<{ on: string }> }>; + }; + if (shard?.tables) { + for (const [name, entry] of Object.entries(shard.tables)) { + allSourceNames.add(name); + manifestColumns.set(name, new Set((entry.columns ?? []).map((c) => c.name))); + manifestJoins.set(name, new Set((entry.joins ?? []).map((j) => j.on.replace(/\s+/g, ' ').trim()))); + } + } + } catch { + // Skip unparseable shards + } + } + + // Check overlays and standalone sources + const nonSchemaFiles = allFiles.filter((f) => !f.startsWith(`${schemaDir}/`)); + for (const filePath of nonSchemaFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const data = YAML.parse(content) as Record; + if (!data?.name) { + continue; + } + const name = data.name as string; + + if (data.sql || data.table) { + // Standalone source — check join targets exist + const joins = (data.joins as Array<{ to: string }>) ?? []; + for (const join of joins) { + if (!allSourceNames.has(join.to)) { + warnings.push(`${name}: join target '${join.to}' does not exist`); + } + } + allSourceNames.add(name); + } else { + // Overlay — check references against manifest + const excludeColumns = (data.exclude_columns as string[]) ?? []; + const disableJoins = (data.disable_joins as string[]) ?? []; + const cols = manifestColumns.get(name); + const joins = manifestJoins.get(name); + + if (!cols) { + warnings.push(`${name}: overlay has no matching manifest entry`); + continue; + } + + for (const col of excludeColumns) { + if (!cols.has(col)) { + warnings.push(`${name}: exclude_columns references non-existent column '${col}'`); + } + } + + for (const joinOn of disableJoins) { + const normalized = joinOn.replace(/\s+/g, ' ').trim(); + if (!joins?.has(normalized)) { + warnings.push(`${name}: disable_joins references non-existent join '${joinOn}'`); + } + } + + // Check computed column expressions for stale column references + const overlayColumns = (data.columns as Array<{ name: string; expr?: string }>) ?? []; + for (const col of overlayColumns) { + if (col.expr) { + for (const ref of extractColumnReferences(col.expr)) { + if (!cols.has(ref)) { + warnings.push(`${name}: computed column '${col.name}' references non-existent column '${ref}'`); + } + } + } + } + + // Check measure expressions for stale column references + const overlayMeasures = (data.measures as Array<{ name: string; expr: string }>) ?? []; + for (const measure of overlayMeasures) { + if (measure.expr) { + for (const ref of extractColumnReferences(measure.expr)) { + if (!cols.has(ref)) { + warnings.push(`${name}: measure '${measure.name}' references non-existent column '${ref}'`); + } + } + } + } + } + } catch { + // Skip unparseable files + } + } + + return warnings; + } + + /** + * Build FK context from composed entities for a connection. + * Returns a map keyed by `tableName.columnName` with outgoing and incoming FK relationships. + * This replaces direct column_links DB queries for FK context. + */ + buildForeignKeyContext(sources: SemanticLayerSource[]): Map< + string, + { + outgoing: Array<{ toTable: string; toColumn: string }>; + incoming: Array<{ fromTable: string; fromColumn: string }>; + } + > { + const fkMap = new Map< + string, + { + outgoing: Array<{ toTable: string; toColumn: string }>; + incoming: Array<{ fromTable: string; fromColumn: string }>; + } + >(); + + const getOrCreate = (key: string) => { + let ctx = fkMap.get(key); + if (!ctx) { + ctx = { outgoing: [], incoming: [] }; + fkMap.set(key, ctx); + } + return ctx; + }; + + for (const source of sources) { + for (const join of source.joins) { + // Parse the `on` clause: "orders.customer_id = customers.id" + const parsed = parseJoinOn(join.on, source.name, join.to); + if (!parsed) { + continue; + } + + // Outgoing: source column → target table.column + const fromKey = `${source.name}.${parsed.fromColumn}`; + getOrCreate(fromKey).outgoing.push({ toTable: join.to, toColumn: parsed.toColumn }); + + // Incoming: target column ← source table.column + const toKey = `${join.to}.${parsed.toColumn}`; + getOrCreate(toKey).incoming.push({ fromTable: source.name, fromColumn: parsed.fromColumn }); + } + } + + return fkMap; + } + + /** + * Build a column metadata lookup from manifest YAML for a connection. + * Returns a map keyed by `tableName.columnName` with type and descriptions map. + * Used by embedding refresh and other consumers that need column metadata after it was + * removed from source_columns DB table. + */ + async buildColumnMetadataMap(connectionId: string): Promise<{ + columns: Map; nullable?: boolean; pk?: boolean }>; + tables: Map }>; + }> { + const dir = `${SL_DIR_PREFIX}/${connectionId}/_schema`; + const columns = new Map< + string, + { type: string; descriptions: Record; nullable?: boolean; pk?: boolean } + >(); + const tables = new Map }>(); + + try { + const result = await this.configService.listFiles(dir); + const yamlFiles = result.files.filter((f) => f.endsWith('.yaml')); + + for (const filePath of yamlFiles) { + try { + const { content } = await this.configService.readFile(filePath); + const shard = YAML.parse(content) as { + tables?: Record< + string, + { + descriptions?: Record; + description?: string; + db_description?: string; + columns?: Array<{ + name: string; + type: string; + pk?: boolean; + nullable?: boolean; + descriptions?: Record; + description?: string; + db_description?: string; + }>; + } + >; + }; + if (shard?.tables) { + for (const [tableName, entry] of Object.entries(shard.tables)) { + tables.set(tableName, { + descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description) ?? {}, + }); + for (const col of entry.columns ?? []) { + columns.set(`${tableName}.${col.name}`, { + type: col.type, + descriptions: migrateDescriptions(col.descriptions, col.description, col.db_description) ?? {}, + nullable: col.nullable, + pk: col.pk, + }); + } + } + } + } catch { + // Skip unparseable shards + } + } + } catch { + // Schema dir may not exist + } + + return { columns, tables }; + } + + /** + * All callers should use this instead of maintaining their own dialect maps. + */ + static mapDialect(connectionType: string): string { + const normalized = connectionType.toUpperCase(); + const map: Record = { + POSTGRESQL: 'postgres', + POSTGRES: 'postgres', + BIGQUERY: 'bigquery', + SNOWFLAKE: 'snowflake', + MYSQL: 'mysql', + SQLSERVER: 'tsql', + MSSQL: 'tsql', + SQLITE: 'sqlite', + DUCKDB: 'duckdb', + CLICKHOUSE: 'clickhouse', + REDSHIFT: 'redshift', + DATABRICKS: 'databricks', + }; + return map[normalized] ?? 'postgres'; + } + + /** + * Execute a semantic layer query: load composed sources, generate SQL via + * the python SL engine, and execute the generated SQL against the data source. + */ + async executeQuery(connectionId: string, query: SemanticLayerQueryInput): Promise { + // 1. Load sources, filtering out sources with no table or sql + const allSources = await this.loadAllSources(connectionId); + const sources = allSources.filter((s) => { + if (!s.table && !s.sql) { + this.logger.warn(`Skipping source "${s.name}" with no table or sql defined`); + return false; + } + return true; + }); + + if (sources.length === 0) { + throw new Error('No semantic layer sources found for this connection'); + } + + // 2. Resolve dialect + const connection = await this.connections.getConnectionById(connectionId); + if (!connection) { + throw new Error(`Data source not found: ${connectionId}`); + } + const dialect = SemanticLayerService.mapDialect(connection.connectionType); + + // 3. Generate SQL via python SL engine + const { data: slResult, error: slError } = await this.python.query({ + sources, + query, + dialect, + }); + + if (slError || !slResult?.sql) { + const errorMsg = formatPortError(slError, 'Unknown error generating SQL from semantic layer'); + throw new Error(`Semantic layer query failed: ${errorMsg}`); + } + + // 4. Execute the generated SQL + const result = await this.connections.executeQuery(connectionId, slResult.sql); + + return { + sql: slResult.sql, + headers: result.headers ?? [], + rows: result.rows ?? [], + totalRows: result.totalRows ?? (result.rows ?? []).length, + plan: (slResult.plan as Record) ?? {}, + }; + } +} + +// ── Manifest types and helpers ──────────────────────────────────── + +interface ManifestColumnEntry { + name: string; + type: string; + pk?: boolean; + nullable?: boolean; + // New format: descriptions map + descriptions?: Record; + // Legacy format: flat fields (read-only backwards compat) + description?: string; + db_description?: string; + constraints?: { dbt?: { not_null?: boolean; unique?: boolean } }; + enum_values?: { dbt?: string[] }; + tests?: { + dbt?: Array<{ name: string; package: string }>; + dbt_by_package?: Record; + }; +} + +interface ManifestJoinEntry { + to: string; + on: string; + relationship: string; + source?: string; +} + +export interface ManifestTableEntry { + table: string; + // New format: descriptions map + descriptions?: Record; + // Legacy format: flat fields (read-only backwards compat) + description?: string; + db_description?: string; + columns: ManifestColumnEntry[]; + joins?: ManifestJoinEntry[]; + tags?: { dbt?: string[] }; + freshness?: { dbt?: { raw?: unknown; loaded_at_field?: string | null } }; +} + +/** Migrate legacy flat description/db_description fields to a descriptions map. */ +function migrateDescriptions( + descriptions?: Record, + description?: string, + dbDescription?: string, +): Record | undefined { + if (descriptions && Object.keys(descriptions).length > 0) { + return descriptions; + } + const result: Record = {}; + if (description) { + result.ai = description; + } + if (dbDescription) { + result.db = dbDescription; + } + return Object.keys(result).length > 0 ? result : undefined; +} + +export function projectManifestEntry(name: string, entry: ManifestTableEntry): SemanticLayerSource { + const columns = entry.columns.map((c) => ({ + name: c.name, + type: c.type, + role: c.type === 'time' ? 'time' : undefined, + descriptions: migrateDescriptions(c.descriptions, c.description, c.db_description), + constraints: c.constraints, + enum_values: c.enum_values, + tests: c.tests, + })); + + const pkColumns = entry.columns.filter((c) => c.pk).map((c) => c.name); + const grain = pkColumns.length > 0 ? pkColumns : entry.columns.map((c) => c.name); + + // Table-level dbt config from manifest shards is surfaced on the source for search / tools. + return { + name, + table: entry.table, + descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description), + grain, + columns, + joins: (entry.joins ?? []).map((j) => ({ to: j.to, on: j.on, relationship: j.relationship, source: j.source })), + measures: [], + ...(entry.tags?.dbt?.length ? { tags: entry.tags } : {}), + ...(entry.freshness?.dbt ? { freshness: entry.freshness } : {}), + }; +} + +function normalizeWs(s: string): string { + return s.replace(/\s+/g, ' ').trim(); +} + +const SQL_KEYWORDS = new Set([ + 'sum', + 'count', + 'avg', + 'min', + 'max', + 'distinct', + 'case', + 'when', + 'then', + 'else', + 'end', + 'and', + 'or', + 'not', + 'is', + 'null', + 'as', + 'in', + 'between', + 'like', + 'cast', + 'coalesce', + 'nullif', + 'if', + 'true', + 'false', + 'asc', + 'desc', +]); + +function extractColumnReferences(expr: string): string[] { + const cleaned = expr.replace(/'[^']*'/g, '').replace(/\b\d+(\.\d+)?\b/g, ''); + const tokens = cleaned.match(/\b[a-zA-Z_]\w*\b/g) ?? []; + return [...new Set(tokens.filter((t) => !SQL_KEYWORDS.has(t.toLowerCase())))]; +} + +/** + * Returns one message per measure-level segment reference that doesn't resolve to + * a segment defined on the source. Array is empty when every reference checks out. + */ +export function findDanglingSegmentRefs(source: Record): string[] { + const segmentDefs = (source.segments as Array<{ name: string }> | undefined) ?? []; + const segmentNames = new Set(segmentDefs.map((s) => s.name)); + const measures = (source.measures as Array<{ name: string; segments?: string[] }> | undefined) ?? []; + const problems: string[] = []; + for (const m of measures) { + for (const ref of m.segments ?? []) { + if (!segmentNames.has(ref)) { + problems.push(`measure '${m.name}' references unknown segment '${ref}' (not in source.segments)`); + } + } + } + return problems; +} + +const COMPOSE_KNOWN_KEYS = new Set([ + 'name', + 'description', + 'descriptions', + 'grain', + 'columns', + 'joins', + 'measures', + 'segments', + 'exclude_columns', + 'disable_joins', + 'default_time_dimension', +]); + +export function composeOverlay(base: SemanticLayerSource, overlay: Record): SemanticLayerSource { + const unknownKeys = Object.keys(overlay).filter((k) => !COMPOSE_KNOWN_KEYS.has(k)); + if (unknownKeys.length > 0) { + throw new Error( + `composeOverlay: overlay for '${base.name}' has unhandled keys [${unknownKeys.join(', ')}]. ` + + `Add a compose branch or remove from the schema.`, + ); + } + + const result = { ...base }; + + if (overlay.description) { + result.descriptions = { ...(result.descriptions ?? {}), user: overlay.description as string }; + } + + // Descriptions (plural) merge keyed by source (e.g. `dbt`, `ai`, `db`). Overlay keys + // win over matching base keys but unrelated base keys are preserved. + if (overlay.descriptions) { + result.descriptions = { + ...(result.descriptions ?? {}), + ...(overlay.descriptions as Record), + }; + } + + // Filter out excluded columns + const excluded = new Set((overlay.exclude_columns as string[] | undefined) ?? []); + let columns = result.columns.filter((c) => !excluded.has(c.name)); + + // Append overlay computed columns + const overlayColumns = (overlay.columns as SemanticLayerSource['columns'] | undefined) ?? []; + columns = [...columns, ...overlayColumns]; + result.columns = columns; + + // Measures from overlay only + result.measures = (overlay.measures as SemanticLayerSource['measures'] | undefined) ?? []; + + // Segments: overlay-replaces semantics. Manifest tables don't carry segments today; + // if that changes, add a union branch here. + if (overlay.segments !== undefined) { + result.segments = overlay.segments as SemanticLayerSource['segments']; + } + + // Override grain + if (overlay.grain) { + result.grain = overlay.grain as string[]; + } + + if (overlay.default_time_dimension !== undefined) { + result.default_time_dimension = overlay.default_time_dimension as SemanticLayerSource['default_time_dimension']; + } + + // Union + dedupe joins, apply suppressions + const disabled = new Set(((overlay.disable_joins as string[] | undefined) ?? []).map(normalizeWs)); + const manifestJoins = result.joins.filter((j) => !disabled.has(normalizeWs(j.on))); + const overlayJoins = (overlay.joins as SemanticLayerSource['joins'] | undefined) ?? []; + const existingKeys = new Set(manifestJoins.map((j) => `${j.to}::${normalizeWs(j.on)}`)); + const newJoins = overlayJoins.filter((j) => !existingKeys.has(`${j.to}::${normalizeWs(j.on)}`)); + result.joins = [...manifestJoins, ...newJoins]; + + return result; +} + +/** + * Parse a join `on` clause like "orders.customer_id = customers.id" + * into { fromColumn, toColumn } relative to the source and target tables. + */ +function parseJoinOn( + on: string, + sourceName: string, + targetName: string, +): { fromColumn: string; toColumn: string } | null { + // Match: table.column = table.column (with optional whitespace) + const match = on.match(/^(\w+)\.(\w+)\s*=\s*(\w+)\.(\w+)$/); + if (!match) { + return null; + } + + const [, leftTable, leftCol, rightTable, rightCol] = match; + + if (leftTable === sourceName && rightTable === targetName) { + return { fromColumn: leftCol, toColumn: rightCol }; + } + if (leftTable === targetName && rightTable === sourceName) { + return { fromColumn: rightCol, toColumn: leftCol }; + } + + // Fallback: left side is "from", right side is "to" + return { fromColumn: leftCol, toColumn: rightCol }; +} + +/** + * Fill any blank `type`, `descriptions`, or `role` on the source's columns from the + * matching manifest column (by name). Local values always win. Columns absent from + * the manifest pass through unchanged. Returns a new source; does not mutate input. + */ +export function enrichColumnsFromManifest( + source: SemanticLayerSource, + manifestEntry: SemanticLayerSource | null | undefined, +): SemanticLayerSource { + if (!manifestEntry?.columns?.length) { + return source; + } + const manifestByName = new Map(manifestEntry.columns.map((c) => [c.name, c])); + const enrichedColumns = source.columns.map((col) => { + const base = manifestByName.get(col.name); + if (!base) { + return col; + } + const merged: typeof col = { ...col }; + if (!merged.type) { + merged.type = base.type; + } + if (!merged.descriptions || Object.keys(merged.descriptions).length === 0) { + if (base.descriptions && Object.keys(base.descriptions).length > 0) { + merged.descriptions = { ...base.descriptions }; + } + } + if (!merged.role && base.role) { + merged.role = base.role; + } + return merged; + }); + return { ...source, columns: enrichedColumns }; +} diff --git a/packages/context/src/sl/sl-dictionary-profile.test.ts b/packages/context/src/sl/sl-dictionary-profile.test.ts new file mode 100644 index 00000000..64b1e454 --- /dev/null +++ b/packages/context/src/sl/sl-dictionary-profile.test.ts @@ -0,0 +1,115 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js'; + +describe('loadLatestSlDictionaryEntries', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-sl-dictionary-profile-')); + project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('loads latest relationship-profile sample values for dictionary candidate columns', async () => { + await project.fileStore.writeFile( + 'raw-sources/warehouse/live-database/sync-1/enrichment/relationship-profile.json', + `${JSON.stringify( + { + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 4, + tables: [], + columns: { + 'orders.status': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + nativeType: 'text', + normalizedType: 'string', + rowCount: 20, + nullCount: 0, + distinctCount: 3, + uniquenessRatio: 0.15, + nullRate: 0, + sampleValues: ['paid', 'refunded', 'pending'], + minTextLength: 4, + maxTextLength: 8, + }, + 'orders.customer_id': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'customer_id', + nativeType: 'text', + normalizedType: 'string', + rowCount: 20, + nullCount: 0, + distinctCount: 20, + uniquenessRatio: 1, + nullRate: 0, + sampleValues: ['cus_1'], + minTextLength: 5, + maxTextLength: 5, + }, + }, + warnings: [], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed profile', + ); + + await project.fileStore.writeFile( + 'raw-sources/warehouse/live-database/sync-2/enrichment/relationship-profile.json', + `${JSON.stringify( + { + connectionId: 'warehouse', + driver: 'postgres', + sqlAvailable: true, + queryCount: 4, + tables: [], + columns: { + 'orders.status': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + nativeType: 'text', + normalizedType: 'string', + rowCount: 20, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 0.1, + nullRate: 0, + sampleValues: ['settled', 'voided'], + minTextLength: 6, + maxTextLength: 7, + }, + }, + warnings: [], + }, + null, + 2, + )}\n`, + 'klo', + 'klo@example.com', + 'Seed newer profile', + ); + + await expect(loadLatestSlDictionaryEntries(project, ['warehouse'])).resolves.toEqual([ + { connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'settled', cardinality: 2 }, + { connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'voided', cardinality: 2 }, + ]); + }); + + it('returns an empty list when no relationship profile exists', async () => { + await expect(loadLatestSlDictionaryEntries(project, ['warehouse'])).resolves.toEqual([]); + }); +}); diff --git a/packages/context/src/sl/sl-dictionary-profile.ts b/packages/context/src/sl/sl-dictionary-profile.ts new file mode 100644 index 00000000..f881a0f3 --- /dev/null +++ b/packages/context/src/sl/sl-dictionary-profile.ts @@ -0,0 +1,120 @@ +import type { KloLocalProject } from '../project/index.js'; +import { defaultKloDataDictionarySettings, isKloDataDictionaryCandidate } from '../scan/index.js'; + +export interface SlDictionaryEntry { + connectionId: string; + sourceName: string; + columnName: string; + value: string; + cardinality: number | null; +} + +interface RelationshipProfileColumn { + table?: { name?: string }; + column?: string; + nativeType?: string; + normalizedType?: string; + distinctCount?: number; + sampleValues?: unknown[]; +} + +interface RelationshipProfileArtifact { + connectionId?: string; + columns?: Record; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function parseProfile(raw: string): RelationshipProfileArtifact | null { + const parsed = JSON.parse(raw) as unknown; + if (!isRecord(parsed)) { + return null; + } + return parsed as RelationshipProfileArtifact; +} + +function normalizedValues(values: unknown[] | undefined): string[] { + const seen = new Set(); + const result: string[] = []; + for (const value of values ?? []) { + const text = String(value).trim(); + const key = text.toLowerCase(); + if (text.length === 0 || seen.has(key)) { + continue; + } + seen.add(key); + result.push(text); + } + return result.sort((left, right) => left.localeCompare(right)); +} + +function columnEntries(connectionId: string, column: RelationshipProfileColumn): SlDictionaryEntry[] { + const sourceName = column.table?.name; + const columnName = column.column; + if (!sourceName || !columnName) { + return []; + } + + const columnType = column.normalizedType ?? column.nativeType ?? ''; + if (!isKloDataDictionaryCandidate(columnType, columnName)) { + return []; + } + + const cardinality = typeof column.distinctCount === 'number' ? column.distinctCount : null; + if (cardinality !== null && cardinality > defaultKloDataDictionarySettings.cardinalityThreshold) { + return []; + } + + return normalizedValues(column.sampleValues).map((value) => ({ + connectionId, + sourceName, + columnName, + value, + cardinality, + })); +} + +async function latestProfilePath(project: KloLocalProject, connectionId: string): Promise { + const root = `raw-sources/${connectionId}/live-database`; + let files: string[]; + try { + files = (await project.fileStore.listFiles(root)).files; + } catch { + return null; + } + + return ( + files + .filter((path) => path.endsWith('/enrichment/relationship-profile.json')) + .sort((left, right) => left.localeCompare(right)) + .at(-1) ?? null + ); +} + +export async function loadLatestSlDictionaryEntries( + project: KloLocalProject, + connectionIds: readonly string[], +): Promise { + const entries: SlDictionaryEntry[] = []; + for (const connectionId of [...new Set(connectionIds)].sort()) { + const path = await latestProfilePath(project, connectionId); + if (!path) { + continue; + } + const raw = await project.fileStore.readFile(path); + const profile = parseProfile(raw.content); + const profileConnectionId = profile?.connectionId ?? connectionId; + for (const column of Object.values(profile?.columns ?? {})) { + entries.push(...columnEntries(profileConnectionId, column)); + } + } + return entries.sort( + (left, right) => + left.connectionId.localeCompare(right.connectionId) || + left.sourceName.localeCompare(right.sourceName) || + left.columnName.localeCompare(right.columnName) || + left.value.localeCompare(right.value), + ); +} diff --git a/packages/context/src/sl/sl-search.service.test.ts b/packages/context/src/sl/sl-search.service.test.ts new file mode 100644 index 00000000..3def9495 --- /dev/null +++ b/packages/context/src/sl/sl-search.service.test.ts @@ -0,0 +1,165 @@ +import { describe, expect, it, vi } from 'vitest'; +import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js'; +import type { SemanticLayerSource } from './types.js'; + +describe('SlSearchService', () => { + it('builds search text from source, columns, measures, and joins', () => { + const service = new SlSearchService( + { maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() }, + { + upsertSources: vi.fn(), + getExistingSearchTexts: vi.fn(), + deleteStale: vi.fn(), + deleteByConnection: vi.fn(), + deleteByConnectionAndName: vi.fn(), + search: vi.fn(), + }, + ); + const source: SemanticLayerSource = { + name: 'orders', + descriptions: { user: 'Customer orders' }, + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'string' }, + { name: 'amount', type: 'number', descriptions: { user: 'Order amount' } }, + ], + measures: [{ name: 'revenue', expr: 'sum(amount)', description: 'Gross revenue' }], + joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }], + }; + + expect(service.buildSearchText(source)).toContain('orders'); + expect(service.buildSearchText(source)).toContain('Customer orders'); + expect(service.buildSearchText(source)).toContain('amount (number) Order amount'); + expect(service.buildSearchText(source)).toContain('measure: revenue sum(amount) Gross revenue'); + expect(service.buildSearchText(source)).toContain('join: customers (many_to_one)'); + }); + + it('exports the same canonical search text builder used by SlSearchService', () => { + const service = new SlSearchService( + { maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() }, + { + upsertSources: vi.fn(), + getExistingSearchTexts: vi.fn(), + deleteStale: vi.fn(), + deleteByConnection: vi.fn(), + deleteByConnectionAndName: vi.fn(), + search: vi.fn(), + }, + ); + const source: SemanticLayerSource = { + name: 'orders', + descriptions: { user: 'Customer orders' }, + table: 'public.orders', + grain: ['id'], + columns: [ + { + name: 'status', + type: 'string', + enum_values: { dbt: ['paid', 'refunded'] }, + constraints: { dbt: { not_null: true } }, + }, + ], + joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }], + measures: [{ name: 'total_revenue', expr: 'sum(revenue)', description: 'Gross revenue' }], + tags: { dbt: ['finance'] }, + }; + + expect(buildSemanticLayerSourceSearchText(source)).toBe(service.buildSearchText(source)); + expect(buildSemanticLayerSourceSearchText(source)).toContain('dbt values: paid, refunded'); + expect(buildSemanticLayerSourceSearchText(source)).toContain('measure: total_revenue sum(revenue) Gross revenue'); + expect(buildSemanticLayerSourceSearchText(source)).toContain('dbt tags: finance'); + }); + + it('includes dbt enum, not_null, and unique tokens for columns', () => { + const service = new SlSearchService( + { maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() }, + { + upsertSources: vi.fn(), + getExistingSearchTexts: vi.fn(), + deleteStale: vi.fn(), + deleteByConnection: vi.fn(), + deleteByConnectionAndName: vi.fn(), + search: vi.fn(), + }, + ); + const source: SemanticLayerSource = { + name: 'src_orders', + table: 'public.orders', + grain: [], + columns: [ + { + name: 'status', + type: 'string', + descriptions: {}, + enum_values: { dbt: ['a', 'b'] }, + constraints: { dbt: { not_null: true, unique: true } }, + }, + ], + joins: [], + measures: [], + }; + const text = service.buildSearchText(source); + expect(text).toContain('dbt values: a, b'); + expect(text).toContain('not_null'); + expect(text).toContain('unique'); + }); + + it('includes dbt default time token for MetricFlow agg_time_dimension', () => { + const service = new SlSearchService( + { maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() }, + { + upsertSources: vi.fn(), + getExistingSearchTexts: vi.fn(), + deleteStale: vi.fn(), + deleteByConnection: vi.fn(), + deleteByConnectionAndName: vi.fn(), + search: vi.fn(), + }, + ); + const source: SemanticLayerSource = { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [], + default_time_dimension: { dbt: 'order_date' }, + }; + expect(service.buildSearchText(source)).toContain('dbt default time: order_date'); + }); + + it('includes dbt table tags and freshness from manifest-backed source', () => { + const service = new SlSearchService( + { maxBatchSize: 16, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() }, + { + upsertSources: vi.fn(), + getExistingSearchTexts: vi.fn(), + deleteStale: vi.fn(), + deleteByConnection: vi.fn(), + deleteByConnectionAndName: vi.fn(), + search: vi.fn(), + }, + ); + const source: SemanticLayerSource = { + name: 'customers', + table: 'jaffle.customers', + grain: ['id'], + columns: [{ name: 'id', type: 'number' }], + joins: [], + measures: [], + tags: { dbt: ['raw', 'core'] }, + freshness: { + dbt: { + loaded_at_field: 'updated_at', + raw: { warn_after: { count: 12, period: 'hour' } }, + }, + }, + }; + const text = service.buildSearchText(source); + expect(text).toContain('dbt tags: raw, core'); + expect(text).toContain('dbt freshness:'); + expect(text).toContain('loaded_at=updated_at'); + expect(text).toContain('warn_after'); + }); +}); diff --git a/packages/context/src/sl/sl-search.service.ts b/packages/context/src/sl/sl-search.service.ts new file mode 100644 index 00000000..648c4066 --- /dev/null +++ b/packages/context/src/sl/sl-search.service.ts @@ -0,0 +1,168 @@ +import type { KloEmbeddingPort, KloLogger } from '../core/index.js'; +import { noopLogger } from '../core/index.js'; +import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js'; +import type { SlSourcesIndexPort } from './ports.js'; +import type { SemanticLayerSource } from './types.js'; + +export function buildSemanticLayerSourceSearchText( + source: SemanticLayerSource, + priority: string[] = DEFAULT_PRIORITY, +): string { + const config = { priority }; + const parts: string[] = [source.name.replace(/_/g, ' ')]; + + const sourceDesc = resolveDescription(source.descriptions, config); + if (sourceDesc) { + parts.push(sourceDesc); + } + + if (source.table) { + parts.push(`table: ${source.table}`); + } + + if (source.default_time_dimension?.dbt) { + parts.push(`dbt default time: ${source.default_time_dimension.dbt}`); + } + + for (const col of source.columns ?? []) { + const colDesc = resolveDescription(col.descriptions, config); + let extra = ''; + if (col.enum_values?.dbt?.length) { + extra += ` [dbt values: ${col.enum_values.dbt.join(', ')}]`; + } + if (col.constraints?.dbt?.not_null) { + extra += ' not_null'; + } + if (col.constraints?.dbt?.unique) { + extra += ' unique'; + } + parts.push(`${col.name} (${col.type})${colDesc ? ` ${colDesc}` : ''}${extra}`); + } + + for (const m of source.measures ?? []) { + parts.push(`measure: ${m.name} ${m.expr}${m.description ? ` ${m.description}` : ''}`); + } + + for (const j of source.joins ?? []) { + parts.push(`join: ${j.to} (${j.relationship})`); + } + + if (source.tags?.dbt?.length) { + parts.push(`dbt tags: ${source.tags.dbt.join(', ')}`); + } + + if (source.freshness?.dbt) { + const fd = source.freshness.dbt; + const bits: string[] = []; + if (fd.loaded_at_field) { + bits.push(`loaded_at=${fd.loaded_at_field}`); + } + if (fd.raw !== undefined) { + let rawStr = JSON.stringify(fd.raw); + if (rawStr.length > 120) { + rawStr = `${rawStr.slice(0, 117)}...`; + } + bits.push(rawStr); + } + if (bits.length > 0) { + parts.push(`dbt freshness: ${bits.join(' ')}`); + } + } + + return parts.join('. '); +} + +export class SlSearchService { + constructor( + private readonly embeddingService: KloEmbeddingPort, + private readonly slSourcesRepository: SlSourcesIndexPort, + private readonly logger: KloLogger = noopLogger, + ) {} + + async indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise { + if (sources.length === 0) { + await this.slSourcesRepository.deleteByConnection(connectionId); + return; + } + + // Detect which sources actually changed by comparing search_text + const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId); + const searchTexts = sources.map((s) => this.buildSearchText(s)); + + const changedIndices: number[] = []; + for (let i = 0; i < sources.length; i++) { + const prev = existing.get(sources[i].name); + if (!prev || prev.searchText !== searchTexts[i] || !prev.hasEmbedding) { + changedIndices.push(i); + } + } + + if (changedIndices.length === 0) { + // Still clean up stale sources even if nothing changed + const keepNames = sources.map((s) => s.name); + await this.slSourcesRepository.deleteStale(connectionId, keepNames); + this.logger.log(`SL sources for connection ${connectionId}: all ${sources.length} up to date, 0 reindexed`); + return; + } + + // Compute embeddings only for changed sources + const changedTexts = changedIndices.map((i) => searchTexts[i]); + let changedEmbeddings: (number[] | null)[]; + try { + const batchSize = this.embeddingService.maxBatchSize; + const allEmbeddings: number[][] = []; + for (let i = 0; i < changedTexts.length; i += batchSize) { + const batch = changedTexts.slice(i, i + batchSize); + const batchEmbeddings = await this.embeddingService.computeEmbeddingsBulk(batch); + allEmbeddings.push(...batchEmbeddings); + } + changedEmbeddings = allEmbeddings; + } catch (error) { + this.logger.warn( + `Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`, + ); + changedEmbeddings = changedIndices.map(() => null); + } + + const rows = changedIndices.map((srcIdx, i) => { + return { + sourceName: sources[srcIdx].name, + searchText: searchTexts[srcIdx], + embedding: changedEmbeddings[i], + }; + }); + + await this.slSourcesRepository.upsertSources(connectionId, rows); + + // Remove sources that no longer exist in YAML + const keepNames = sources.map((s) => s.name); + await this.slSourcesRepository.deleteStale(connectionId, keepNames); + + this.logger.log( + `SL sources for connection ${connectionId}: ${changedIndices.length}/${sources.length} reindexed, ${sources.length - changedIndices.length} unchanged`, + ); + } + + async search( + connectionId: string, + query: string, + limit = 15, + minRrfScore = 0, + ): Promise> { + let queryEmbedding: number[] | null = null; + try { + queryEmbedding = await this.embeddingService.computeEmbedding(query); + } catch (error) { + this.logger.warn( + `Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`, + ); + } + + const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore); + return results.map((r) => ({ sourceName: r.sourceName, score: r.rrfScore })); + } + + buildSearchText(source: SemanticLayerSource, priority: string[] = DEFAULT_PRIORITY): string { + return buildSemanticLayerSourceSearchText(source, priority); + } +} diff --git a/packages/context/src/sl/sl-validator.port.ts b/packages/context/src/sl/sl-validator.port.ts new file mode 100644 index 00000000..83a29968 --- /dev/null +++ b/packages/context/src/sl/sl-validator.port.ts @@ -0,0 +1,8 @@ +export interface SlValidationResult { + errors: string[]; + warnings: string[]; +} + +export interface SlValidatorPort { + validateSingleSource(deps: TDeps, connectionId: string, sourceName: string): Promise; +} diff --git a/packages/context/src/sl/sqlite-sl-sources-index.test.ts b/packages/context/src/sl/sqlite-sl-sources-index.test.ts new file mode 100644 index 00000000..51b2fa95 --- /dev/null +++ b/packages/context/src/sl/sqlite-sl-sources-index.test.ts @@ -0,0 +1,164 @@ +import { access, mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js'; + +describe('SqliteSlSourcesIndex', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-sqlite-sl-index-')); + dbPath = join(tempDir, 'db.sqlite'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('creates SQLite tables and searches indexed source text', async () => { + const index = new SqliteSlSourcesIndex({ dbPath }); + + await index.upsertSources('warehouse', [ + { + sourceName: 'orders', + searchText: 'orders table: public.orders measure: total_revenue sum(revenue) gross revenue', + embedding: null, + }, + { + sourceName: 'tickets', + searchText: 'tickets table: public.tickets measure: ticket_count count(*) support queue', + embedding: null, + }, + ]); + + await expect(access(dbPath)).resolves.toBeUndefined(); + expect(await index.search('warehouse', null, 'gross revenue', 10)).toEqual([ + expect.objectContaining({ + sourceName: 'orders', + rrfScore: expect.any(Number), + }), + ]); + }); + + it('reports existing search text and embedding presence', async () => { + const index = new SqliteSlSourcesIndex({ dbPath }); + + await index.upsertSources('warehouse', [ + { + sourceName: 'orders', + searchText: 'orders gross revenue', + embedding: [0.1, 0.2, 0.3], + }, + { + sourceName: 'tickets', + searchText: 'tickets support queue', + embedding: null, + }, + ]); + + await expect(index.getExistingSearchTexts('warehouse')).resolves.toEqual( + new Map([ + ['orders', { searchText: 'orders gross revenue', hasEmbedding: true }], + ['tickets', { searchText: 'tickets support queue', hasEmbedding: false }], + ]), + ); + }); + + it('deletes stale, named, and connection-scoped rows from the FTS index', async () => { + const index = new SqliteSlSourcesIndex({ dbPath }); + + await index.upsertSources('warehouse', [ + { sourceName: 'orders', searchText: 'orders revenue', embedding: null }, + { sourceName: 'tickets', searchText: 'tickets support', embedding: null }, + ]); + await index.upsertSources('finance', [{ sourceName: 'invoices', searchText: 'invoices revenue', embedding: null }]); + + await index.deleteStale('warehouse', ['orders']); + expect(await index.search('warehouse', null, 'support', 10)).toEqual([]); + expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([ + expect.objectContaining({ sourceName: 'orders' }), + ]); + expect(await index.search('finance', null, 'revenue', 10)).toEqual([ + expect.objectContaining({ sourceName: 'invoices' }), + ]); + + await index.deleteByConnectionAndName('warehouse', 'orders'); + expect(await index.search('warehouse', null, 'revenue', 10)).toEqual([]); + + await index.deleteByConnection('finance'); + expect(await index.search('finance', null, 'revenue', 10)).toEqual([]); + }); + + it('returns lane candidates with stable connection-scoped IDs', async () => { + const index = new SqliteSlSourcesIndex({ dbPath }); + + await index.upsertSources('warehouse', [ + { sourceName: 'orders', searchText: 'orders gross revenue paid status', embedding: [1, 0] }, + ]); + await index.upsertSources('finance', [ + { sourceName: 'orders', searchText: 'finance orders invoices', embedding: [0, 1] }, + ]); + + await expect(index.searchLexicalCandidates({ queryText: 'gross revenue', limit: 25 })).resolves.toEqual([ + expect.objectContaining({ + id: 'warehouse/orders', + connectionId: 'warehouse', + sourceName: 'orders', + rank: 1, + rawScore: expect.any(Number), + }), + ]); + + await expect(index.searchSemanticCandidates({ queryEmbedding: [0, 1], limit: 25 })).resolves.toEqual([ + expect.objectContaining({ id: 'finance/orders', connectionId: 'finance', sourceName: 'orders', rank: 1 }), + expect.objectContaining({ id: 'warehouse/orders', connectionId: 'warehouse', sourceName: 'orders', rank: 2 }), + ]); + }); + + it('aggregates dictionary matches to one source-level lane candidate', async () => { + const index = new SqliteSlSourcesIndex({ dbPath }); + + await index.replaceDictionaryEntries('warehouse', [ + { connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'paid', cardinality: 3 }, + { connectionId: 'warehouse', sourceName: 'orders', columnName: 'status', value: 'refunded', cardinality: 3 }, + { connectionId: 'warehouse', sourceName: 'orders', columnName: 'channel', value: 'paid search', cardinality: 4 }, + { + connectionId: 'warehouse', + sourceName: 'tickets', + columnName: 'priority', + value: 'paid support', + cardinality: 5, + }, + ]); + + await expect(index.searchDictionaryCandidates({ queryText: 'paid', limit: 25 })).resolves.toEqual([ + expect.objectContaining({ + id: 'warehouse/orders', + connectionId: 'warehouse', + sourceName: 'orders', + rank: 1, + matches: [ + { column: 'channel', values: ['paid search'] }, + { column: 'status', values: ['paid'] }, + ], + }), + expect.objectContaining({ + id: 'warehouse/tickets', + connectionId: 'warehouse', + sourceName: 'tickets', + rank: 2, + matches: [{ column: 'priority', values: ['paid support'] }], + }), + ]); + }); + + it('returns an empty result for blank or punctuation-only queries', async () => { + const index = new SqliteSlSourcesIndex({ dbPath }); + await index.upsertSources('warehouse', [{ sourceName: 'orders', searchText: 'orders revenue', embedding: null }]); + + expect(await index.search('warehouse', null, ' ', 10)).toEqual([]); + expect(await index.search('warehouse', null, '---', 10)).toEqual([]); + }); +}); diff --git a/packages/context/src/sl/sqlite-sl-sources-index.ts b/packages/context/src/sl/sqlite-sl-sources-index.ts new file mode 100644 index 00000000..a5000976 --- /dev/null +++ b/packages/context/src/sl/sqlite-sl-sources-index.ts @@ -0,0 +1,549 @@ +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import type { SlSourcesIndexPort } from './ports.js'; +import type { SlDictionaryEntry } from './sl-dictionary-profile.js'; +import type { SlDictionaryMatch } from './types.js'; + +export interface SqliteSlSourcesIndexOptions { + dbPath: string; +} + +type ExistingRow = { + source_name: string; + search_text: string; + embedding_json: string | null; +}; + +type SearchRow = { + connection_id?: string; + source_name: string; + rank: number; +}; + +export interface SlSqliteLaneCandidate { + id: string; + connectionId: string; + sourceName: string; + rank: number; + rawScore: number; +} + +export interface SlSqliteDictionaryCandidate extends SlSqliteLaneCandidate { + matches: SlDictionaryMatch[]; +} + +type IndexedSourceRow = { + connection_id: string; + source_name: string; + embedding_json: string | null; +}; + +type DictionarySearchRow = { + connection_id: string; + source_name: string; + column_name: string; + value: string; + rank: number | null; +}; + +function candidateId(connectionId: string, sourceName: string): string { + return `${connectionId}/${sourceName}`; +} + +function cosineSimilarity(left: number[], right: number[]): number { + if (left.length === 0 || left.length !== right.length) { + return 0; + } + let dot = 0; + let leftNorm = 0; + let rightNorm = 0; + for (let i = 0; i < left.length; i++) { + const l = left[i] ?? 0; + const r = right[i] ?? 0; + dot += l * r; + leftNorm += l * l; + rightNorm += r * r; + } + if (leftNorm === 0 || rightNorm === 0) { + return 0; + } + return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm)); +} + +function normalizeFtsQuery(query: string): string { + const terms = query + .toLowerCase() + .split(/[^a-z0-9_]+/u) + .map((term) => term.trim()) + .filter(Boolean); + + return [...new Set(terms)].map((term) => `"${term.replaceAll('"', '""')}"`).join(' OR '); +} + +function scoreFromRank(rank: number): number { + return Number((1 / (1 + Math.abs(rank))).toFixed(6)); +} + +export class SqliteSlSourcesIndex implements SlSourcesIndexPort { + private readonly db: Database.Database; + + constructor(options: SqliteSlSourcesIndexOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.db.exec(` + CREATE TABLE IF NOT EXISTS local_sl_sources ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + search_text TEXT NOT NULL, + embedding_json TEXT, + content_hash TEXT, + updated_at TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name) + ); + + CREATE VIRTUAL TABLE IF NOT EXISTS local_sl_sources_fts USING fts5( + connection_id UNINDEXED, + source_name UNINDEXED, + search_text + ); + + CREATE TABLE IF NOT EXISTS local_sl_dictionary_values ( + connection_id TEXT NOT NULL, + source_name TEXT NOT NULL, + column_name TEXT NOT NULL, + value TEXT NOT NULL, + value_lower TEXT NOT NULL, + cardinality INTEGER, + updated_at TEXT NOT NULL, + PRIMARY KEY (connection_id, source_name, column_name, value) + ); + + CREATE VIRTUAL TABLE IF NOT EXISTS local_sl_dictionary_values_fts USING fts5( + connection_id UNINDEXED, + source_name UNINDEXED, + column_name UNINDEXED, + value + ); + `); + } + + async upsertSources( + connectionId: string, + sources: Array<{ sourceName: string; searchText: string; embedding: number[] | null; contentHash?: string | null }>, + ): Promise { + if (sources.length === 0) { + return; + } + + const upsertRow = this.db.prepare(` + INSERT INTO local_sl_sources ( + connection_id, + source_name, + search_text, + embedding_json, + content_hash, + updated_at + ) + VALUES ( + @connectionId, + @sourceName, + @searchText, + @embeddingJson, + @contentHash, + @updatedAt + ) + ON CONFLICT(connection_id, source_name) DO UPDATE SET + search_text = excluded.search_text, + embedding_json = excluded.embedding_json, + content_hash = COALESCE(excluded.content_hash, local_sl_sources.content_hash), + updated_at = excluded.updated_at + `); + const deleteFts = this.db.prepare(` + DELETE FROM local_sl_sources_fts + WHERE connection_id = @connectionId + AND source_name = @sourceName + `); + const insertFts = this.db.prepare(` + INSERT INTO local_sl_sources_fts (connection_id, source_name, search_text) + VALUES (@connectionId, @sourceName, @searchText) + `); + + const transaction = this.db.transaction( + ( + rows: Array<{ + sourceName: string; + searchText: string; + embedding: number[] | null; + contentHash?: string | null; + }>, + ) => { + const updatedAt = new Date().toISOString(); + for (const source of rows) { + const row = { + connectionId, + sourceName: source.sourceName, + searchText: source.searchText, + embeddingJson: source.embedding ? JSON.stringify(source.embedding) : null, + contentHash: source.contentHash ?? null, + updatedAt, + }; + upsertRow.run(row); + deleteFts.run(row); + insertFts.run(row); + } + }, + ); + + transaction(sources); + } + + async getExistingSearchTexts( + connectionId: string, + ): Promise> { + const rows = this.db + .prepare( + ` + SELECT source_name, search_text, embedding_json + FROM local_sl_sources + WHERE connection_id = ? + ORDER BY source_name ASC + `, + ) + .all(connectionId) as ExistingRow[]; + + return new Map( + rows.map((row) => [row.source_name, { searchText: row.search_text, hasEmbedding: row.embedding_json !== null }]), + ); + } + + async deleteStale(connectionId: string, keepNames: string[]): Promise { + if (keepNames.length === 0) { + await this.deleteByConnection(connectionId); + return; + } + + const placeholders = keepNames.map(() => '?').join(', '); + const stale = this.db + .prepare( + ` + SELECT source_name + FROM local_sl_sources + WHERE connection_id = ? + AND source_name NOT IN (${placeholders}) + `, + ) + .all(connectionId, ...keepNames) as Array<{ source_name: string }>; + + const deleteFts = this.db.prepare(` + DELETE FROM local_sl_sources_fts + WHERE connection_id = ? + AND source_name = ? + `); + const deleteRow = this.db.prepare(` + DELETE FROM local_sl_sources + WHERE connection_id = ? + AND source_name = ? + `); + const remove = this.db.transaction((sourceNames: string[]) => { + for (const sourceName of sourceNames) { + deleteFts.run(connectionId, sourceName); + deleteRow.run(connectionId, sourceName); + } + }); + + remove(stale.map((row) => row.source_name)); + } + + async deleteByConnection(connectionId: string): Promise { + const remove = this.db.transaction(() => { + this.db.prepare('DELETE FROM local_sl_sources_fts WHERE connection_id = ?').run(connectionId); + this.db.prepare('DELETE FROM local_sl_sources WHERE connection_id = ?').run(connectionId); + }); + remove(); + } + + async deleteByConnectionAndName(connectionId: string, sourceName: string): Promise { + this.deleteByConnectionAndNameSync(connectionId, sourceName); + } + + async replaceDictionaryEntries(connectionId: string, entries: SlDictionaryEntry[]): Promise { + const remove = this.db.transaction(() => { + this.db.prepare('DELETE FROM local_sl_dictionary_values_fts WHERE connection_id = ?').run(connectionId); + this.db.prepare('DELETE FROM local_sl_dictionary_values WHERE connection_id = ?').run(connectionId); + }); + const insertRow = this.db.prepare(` + INSERT INTO local_sl_dictionary_values ( + connection_id, + source_name, + column_name, + value, + value_lower, + cardinality, + updated_at + ) + VALUES ( + @connectionId, + @sourceName, + @columnName, + @value, + @valueLower, + @cardinality, + @updatedAt + ) + `); + const insertFts = this.db.prepare(` + INSERT INTO local_sl_dictionary_values_fts (connection_id, source_name, column_name, value) + VALUES (@connectionId, @sourceName, @columnName, @value) + `); + const write = this.db.transaction((rows: SlDictionaryEntry[]) => { + const updatedAt = new Date().toISOString(); + for (const entry of rows.filter((candidate) => candidate.connectionId === connectionId)) { + const row = { + connectionId: entry.connectionId, + sourceName: entry.sourceName, + columnName: entry.columnName, + value: entry.value, + valueLower: entry.value.toLowerCase(), + cardinality: entry.cardinality, + updatedAt, + }; + insertRow.run(row); + insertFts.run(row); + } + }); + + remove(); + write(entries); + } + + async searchLexicalCandidates(input: { + connectionIds?: readonly string[]; + queryText: string; + limit: number; + }): Promise { + const ftsQuery = normalizeFtsQuery(input.queryText); + if (!ftsQuery) { + return []; + } + const connectionIds = [...new Set(input.connectionIds ?? [])].sort(); + const connectionPredicate = + connectionIds.length > 0 ? `AND connection_id IN (${connectionIds.map(() => '?').join(', ')})` : ''; + const rows = this.db + .prepare( + ` + SELECT connection_id, source_name, bm25(local_sl_sources_fts) AS rank + FROM local_sl_sources_fts + WHERE local_sl_sources_fts MATCH ? + ${connectionPredicate} + ORDER BY rank ASC, connection_id ASC, source_name ASC + LIMIT ? + `, + ) + .all(ftsQuery, ...connectionIds, Math.max(1, input.limit)) as Array; + + return rows.map((row, index) => ({ + id: candidateId(row.connection_id, row.source_name), + connectionId: row.connection_id, + sourceName: row.source_name, + rank: index + 1, + rawScore: Number(row.rank), + })); + } + + async searchSemanticCandidates(input: { + connectionIds?: readonly string[]; + queryEmbedding: number[]; + limit: number; + }): Promise { + const connectionIds = [...new Set(input.connectionIds ?? [])].sort(); + const connectionPredicate = + connectionIds.length > 0 ? `WHERE connection_id IN (${connectionIds.map(() => '?').join(', ')})` : ''; + const rows = this.db + .prepare( + ` + SELECT connection_id, source_name, embedding_json + FROM local_sl_sources + ${connectionPredicate} + ORDER BY connection_id ASC, source_name ASC + `, + ) + .all(...connectionIds) as IndexedSourceRow[]; + + return rows + .flatMap((row) => { + if (!row.embedding_json) { + return []; + } + try { + const embedding = JSON.parse(row.embedding_json) as unknown; + if (!Array.isArray(embedding) || !embedding.every((value) => typeof value === 'number')) { + return []; + } + return [ + { + id: candidateId(row.connection_id, row.source_name), + connectionId: row.connection_id, + sourceName: row.source_name, + rank: 0, + rawScore: cosineSimilarity(input.queryEmbedding, embedding), + }, + ]; + } catch { + return []; + } + }) + .sort( + (left, right) => + right.rawScore - left.rawScore || + left.connectionId.localeCompare(right.connectionId) || + left.sourceName.localeCompare(right.sourceName), + ) + .slice(0, Math.max(1, input.limit)) + .map((candidate, index) => ({ ...candidate, rank: index + 1 })); + } + + async searchDictionaryCandidates(input: { + connectionIds?: readonly string[]; + queryText: string; + limit: number; + }): Promise { + const ftsQuery = normalizeFtsQuery(input.queryText); + const normalizedQuery = input.queryText.trim().toLowerCase(); + if (!ftsQuery && !normalizedQuery) { + return []; + } + + const connectionIds = [...new Set(input.connectionIds ?? [])].sort(); + const connectionPredicate = + connectionIds.length > 0 ? `AND connection_id IN (${connectionIds.map(() => '?').join(', ')})` : ''; + const ftsRows = ftsQuery + ? (this.db + .prepare( + ` + SELECT connection_id, source_name, column_name, value, bm25(local_sl_dictionary_values_fts) AS rank + FROM local_sl_dictionary_values_fts + WHERE local_sl_dictionary_values_fts MATCH ? + ${connectionPredicate} + ORDER BY rank ASC, connection_id ASC, source_name ASC, column_name ASC, value ASC + LIMIT ? + `, + ) + .all(ftsQuery, ...connectionIds, Math.max(25, input.limit * 4)) as DictionarySearchRow[]) + : []; + + const substringRows = normalizedQuery + ? (this.db + .prepare( + ` + SELECT connection_id, source_name, column_name, value, NULL AS rank + FROM local_sl_dictionary_values + WHERE value_lower LIKE ? + ${connectionPredicate} + ORDER BY connection_id ASC, source_name ASC, column_name ASC, value ASC + LIMIT ? + `, + ) + .all(`%${normalizedQuery}%`, ...connectionIds, Math.max(25, input.limit * 4)) as DictionarySearchRow[]) + : []; + + const rowsByKey = new Map(); + for (const row of [...ftsRows, ...substringRows]) { + const key = `${row.connection_id}/${row.source_name}/${row.column_name}/${row.value}`; + if (!rowsByKey.has(key)) { + rowsByKey.set(key, row); + } + } + + const grouped = new Map(); + for (const row of rowsByKey.values()) { + const key = candidateId(row.connection_id, row.source_name); + grouped.set(key, [...(grouped.get(key) ?? []), row]); + } + + return [...grouped.entries()] + .map(([id, rows]) => { + const [first] = rows; + const byColumn = new Map(); + for (const row of rows.sort( + (left, right) => left.column_name.localeCompare(right.column_name) || left.value.localeCompare(right.value), + )) { + byColumn.set(row.column_name, [...(byColumn.get(row.column_name) ?? []), row.value]); + } + const matches = [...byColumn.entries()].map(([column, values]) => ({ column, values: values.slice(0, 5) })); + return { + id, + connectionId: first?.connection_id ?? '', + sourceName: first?.source_name ?? '', + rank: 0, + rawScore: matches.reduce((total, match) => total + match.values.length, 0), + matches, + }; + }) + .sort( + (left, right) => + right.rawScore - left.rawScore || + right.matches.length - left.matches.length || + left.connectionId.localeCompare(right.connectionId) || + left.sourceName.localeCompare(right.sourceName), + ) + .slice(0, Math.max(1, input.limit)) + .map((candidate, index) => ({ ...candidate, rank: index + 1 })); + } + + async search( + connectionId: string, + _queryEmbedding: number[] | null, + queryText: string, + limit: number, + minRrfScore = 0, + ): Promise> { + const ftsQuery = normalizeFtsQuery(queryText); + if (!ftsQuery) { + return []; + } + + const rows = this.db + .prepare( + ` + SELECT source_name, bm25(local_sl_sources_fts) AS rank + FROM local_sl_sources_fts + WHERE connection_id = ? + AND local_sl_sources_fts MATCH ? + ORDER BY rank ASC, source_name ASC + LIMIT ? + `, + ) + .all(connectionId, ftsQuery, Math.max(1, limit)) as SearchRow[]; + + return rows + .map((row) => ({ sourceName: row.source_name, rrfScore: scoreFromRank(row.rank) })) + .filter((row) => row.rrfScore >= minRrfScore); + } + + private deleteByConnectionAndNameSync(connectionId: string, sourceName: string): void { + const remove = this.db.transaction(() => { + this.db + .prepare( + ` + DELETE FROM local_sl_sources_fts + WHERE connection_id = ? + AND source_name = ? + `, + ) + .run(connectionId, sourceName); + this.db + .prepare( + ` + DELETE FROM local_sl_sources + WHERE connection_id = ? + AND source_name = ? + `, + ) + .run(connectionId, sourceName); + }); + remove(); + } +} diff --git a/packages/context/src/sl/tools/base-semantic-layer.tool.ts b/packages/context/src/sl/tools/base-semantic-layer.tool.ts new file mode 100644 index 00000000..2ec6891f --- /dev/null +++ b/packages/context/src/sl/tools/base-semantic-layer.tool.ts @@ -0,0 +1,154 @@ +import type { ZodType } from 'zod'; +import type { GitAuthorResolverPort, ToolContext, ToolOutput } from '../../tools/index.js'; +import { BaseTool } from '../../tools/index.js'; +import { sourceDefinitionSchema } from '../schemas.js'; +import { SemanticLayerService } from '../semantic-layer.service.js'; +import { SlSearchService } from '../sl-search.service.js'; + +export { sourceDefinitionSchema }; + +// ── Shared output types ── + +export interface SemanticLayerStructured { + success: boolean; + sourceName: string; + yaml?: string; + commitHash?: string; + errors?: string[]; + validationErrors?: string[]; + validationWarnings?: string[]; + actionRequiredWarnings?: string[]; +} + +export interface BaseSemanticLayerToolDeps { + semanticLayerService: SemanticLayerService; + slSearchService: SlSearchService; + authorResolver: GitAuthorResolverPort; +} + +// ── Abstract base class ── + +export abstract class BaseSemanticLayerTool extends BaseTool { + protected readonly semanticLayerService: SemanticLayerService; + protected readonly slSearchService: SlSearchService; + protected readonly authorResolver: GitAuthorResolverPort; + + constructor(deps: BaseSemanticLayerToolDeps) { + super(); + this.semanticLayerService = deps.semanticLayerService; + this.slSearchService = deps.slSearchService; + this.authorResolver = deps.authorResolver; + } + + protected async readSourceYaml( + connectionId: string, + sourceName: string, + context?: ToolContext, + ): Promise { + const semanticLayerService = context?.session?.semanticLayerService ?? this.semanticLayerService; + + try { + const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName); + return content; + } catch { + return null; + } + } + + protected buildMarkdown( + success: boolean, + errors: string[], + sourceName: string, + extra?: { + yaml?: string; + commitHash?: string; + validationErrors?: string[]; + validationWarnings?: string[]; + actionRequiredWarnings?: string[]; + editCount?: number; + }, + ): string { + const parts: string[] = []; + + if (success) { + const verb = extra?.editCount != null ? `applied ${extra.editCount} edit(s) to` : 'saved'; + parts.push(`Source **${sourceName}** ${verb} successfully.`); + } else { + parts.push(`Source **${sourceName}** update completed with ${errors.length} error(s):`); + for (const err of errors) { + parts.push(`- ${err}`); + } + } + + if (extra?.commitHash) { + parts.push(`Commit: \`${extra.commitHash}\``); + } + + if (extra?.actionRequiredWarnings && extra.actionRequiredWarnings.length > 0) { + parts.push('\n**Action required:**'); + for (const warning of extra.actionRequiredWarnings) { + parts.push(`- ${warning}`); + } + } + + if (extra?.validationErrors && extra.validationErrors.length > 0) { + parts.push('\n**Validation errors:**'); + for (const ve of extra.validationErrors) { + parts.push(`- ${ve}`); + } + } + + if (extra?.validationWarnings && extra.validationWarnings.length > 0) { + parts.push('\n**Validation warnings:**'); + for (const vw of extra.validationWarnings) { + parts.push(`- ${vw}`); + } + } + + if (extra?.yaml) { + const yaml = extra.yaml; + const MAX_YAML = 2000; + if (yaml.length > MAX_YAML) { + parts.push(`\n**YAML** (${yaml.length} chars, truncated):\n\`\`\`yaml\n${yaml.slice(0, MAX_YAML)}...\n\`\`\``); + } else { + parts.push(`\n**YAML**:\n\`\`\`yaml\n${yaml}\n\`\`\``); + } + } + + return parts.join('\n'); + } + + protected buildOutput( + success: boolean, + errors: string[], + sourceName: string, + extra?: { + yaml?: string; + commitHash?: string; + validationErrors?: string[]; + validationWarnings?: string[]; + actionRequiredWarnings?: string[]; + editCount?: number; + }, + ): ToolOutput { + return { + markdown: this.buildMarkdown(success, errors, sourceName, extra), + structured: { + success, + sourceName, + yaml: extra?.yaml, + commitHash: extra?.commitHash, + ...(errors.length > 0 ? { errors } : {}), + ...(extra?.validationErrors && extra.validationErrors.length > 0 + ? { validationErrors: extra.validationErrors } + : {}), + ...(extra?.validationWarnings && extra.validationWarnings.length > 0 + ? { validationWarnings: extra.validationWarnings } + : {}), + ...(extra?.actionRequiredWarnings && extra.actionRequiredWarnings.length > 0 + ? { actionRequiredWarnings: extra.actionRequiredWarnings } + : {}), + }, + }; + } +} diff --git a/packages/context/src/sl/tools/connection-id-schema.test.ts b/packages/context/src/sl/tools/connection-id-schema.test.ts new file mode 100644 index 00000000..48e023e5 --- /dev/null +++ b/packages/context/src/sl/tools/connection-id-schema.test.ts @@ -0,0 +1,18 @@ +import { describe, expect, it } from 'vitest'; +import { slToolConnectionIdSchema } from './connection-id-schema.js'; + +describe('slToolConnectionIdSchema', () => { + it('accepts app UUIDs and local project connection ids', () => { + expect(slToolConnectionIdSchema.parse('00000000-0000-4000-8000-000000000001')).toBe( + '00000000-0000-4000-8000-000000000001', + ); + expect(slToolConnectionIdSchema.parse('warehouse')).toBe('warehouse'); + expect(slToolConnectionIdSchema.parse('warehouse_prod-1')).toBe('warehouse_prod-1'); + }); + + it('rejects empty, path-like, and hidden connection ids', () => { + for (const value of ['', '../warehouse', 'warehouse/prod', '.warehouse', 'warehouse prod']) { + expect(() => slToolConnectionIdSchema.parse(value)).toThrow(); + } + }); +}); diff --git a/packages/context/src/sl/tools/connection-id-schema.ts b/packages/context/src/sl/tools/connection-id-schema.ts new file mode 100644 index 00000000..a4047128 --- /dev/null +++ b/packages/context/src/sl/tools/connection-id-schema.ts @@ -0,0 +1,6 @@ +import { z } from 'zod'; + +export const slToolConnectionIdSchema = z + .string() + .min(1) + .regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/, 'Connection id must be alphanumeric and may contain _ or -'); diff --git a/packages/context/src/sl/tools/index.ts b/packages/context/src/sl/tools/index.ts new file mode 100644 index 00000000..915f91ad --- /dev/null +++ b/packages/context/src/sl/tools/index.ts @@ -0,0 +1,11 @@ +export type { BaseSemanticLayerToolDeps, SemanticLayerStructured } from './base-semantic-layer.tool.js'; +export { BaseSemanticLayerTool, sourceDefinitionSchema } from './base-semantic-layer.tool.js'; +export type { SlDiscoverySettings } from './sl-discover.tool.js'; +export { SlDiscoverTool } from './sl-discover.tool.js'; +export { SlEditSourceTool } from './sl-edit-source.tool.js'; +export { SlReadSourceTool } from './sl-read-source.tool.js'; +export { SlRollbackTool } from './sl-rollback.tool.js'; +export { SlValidateTool, validateSemanticLayerEndpoint } from './sl-validate.tool.js'; +export { SlWriteSourceTool } from './sl-write-source.tool.js'; +export type { SlValidationDeps, SourceValidationResult } from './sl-warehouse-validation.js'; +export { revertSourceToPreHead, validateSingleSource } from './sl-warehouse-validation.js'; diff --git a/packages/context/src/sl/tools/sl-discover.tool.ts b/packages/context/src/sl/tools/sl-discover.tool.ts new file mode 100644 index 00000000..ed7c1854 --- /dev/null +++ b/packages/context/src/sl/tools/sl-discover.tool.ts @@ -0,0 +1,337 @@ +import { z } from 'zod'; +import { DEFAULT_PRIORITY, resolveDescription } from '../descriptions.js'; +import type { SemanticLayerSource } from '../types.js'; +import type { ToolContext, ToolOutput } from '../../tools/index.js'; +import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js'; +import { slToolConnectionIdSchema } from './connection-id-schema.js'; + +export interface SlDiscoverySettings { + maxSources: number; + minRrfScore: number; + maxDetailedSources: number; +} + +const slDiscoverInputSchema = z.object({ + connectionId: slToolConnectionIdSchema + .optional() + .describe('Data source connection ID (omit to discover across all data sources)'), + query: z.string().optional().describe('Search query to filter sources/columns/measures by name or description'), + sourceName: z + .string() + .optional() + .describe('Inspect a specific source in full detail (requires connectionId if multiple data sources)'), +}); + +type SlDiscoverInput = z.infer; + +interface SlDiscoverStructured { + sources: Array<{ + connectionId: string; + connectionName: string; + name: string; + description?: string; + columnCount: number; + measureCount: number; + joinCount: number; + }>; + detail?: Record; + totalSources: number; +} + +export class SlDiscoverTool extends BaseSemanticLayerTool { + readonly name = 'sl_discover'; + + constructor( + deps: BaseSemanticLayerToolDeps, + private readonly discoverySettings: SlDiscoverySettings, + ) { + super(deps); + } + + get description(): string { + return ` +Discover available semantic layer sources, columns, measures, and joins. +When called without a connectionId, discovers sources across ALL data sources — grouped by data source name and ID. +Use this to understand what data is available before writing a semantic_query. + + + +- Before querying: understand available sources across all data sources +- To inspect a specific source in detail (columns, joins, measures, grain) — requires connectionId when multiple data sources exist +- To search for sources related to a concept (e.g., "revenue", "customers") across all data sources +`; + } + + get inputSchema() { + return slDiscoverInputSchema; + } + + async call(input: SlDiscoverInput, _context: ToolContext): Promise> { + const { query, sourceName } = input; + + // Resolve connectionId: use provided value, or auto-detect + let connectionId = input.connectionId; + if (!connectionId) { + const connections = await this.semanticLayerService.listConnectionIdsWithNames(); + if (connections.length === 0) { + return { + markdown: 'No semantic layer sources found. Run a schema scan first.', + structured: { sources: [], totalSources: 0 }, + }; + } + if (connections.length === 1) { + connectionId = connections[0].id; + } else { + // Multiple connections — aggregate or prompt depending on operation + if (sourceName) { + const connectionList = connections + .map((c) => `- **${c.name}** (${c.connectionType}): \`${c.id}\``) + .join('\n'); + return { + markdown: `Multiple data sources have semantic layer sources. Specify a connectionId to inspect source "${sourceName}":\n\n${connectionList}`, + structured: { sources: [], totalSources: 0 }, + }; + } + return this.discoverAcrossConnections(connections, query); + } + } + + // If inspecting a specific source — show the SL interface (columns, measures, joins) + // without the raw SQL. Use `sl_read_source` to see the full YAML including SQL. + if (sourceName) { + const sources = await this.semanticLayerService.loadAllSources(connectionId); + const source = sources.find((s) => s.name === sourceName); + if (!source) { + return { + markdown: `Source **${sourceName}** not found for this connection.`, + structured: { sources: [], totalSources: 0 }, + }; + } + + const parts: string[] = []; + this.appendSourceDetail(parts, source); + + if (source.grain?.length) { + parts.push(`Grain: ${source.grain.join(', ')}`); + } + + return { + markdown: parts.join('\n'), + structured: { + sources: [ + { + connectionId, + connectionName: connectionId, + name: source.name, + description: + resolveDescription(source.descriptions, { priority: DEFAULT_PRIORITY }) ?? undefined, + columnCount: source.columns.length, + measureCount: source.measures.length, + joinCount: source.joins.length, + }, + ], + totalSources: 1, + }, + }; + } + + // Single connection: list all sources + const connections = await this.semanticLayerService.listConnectionIdsWithNames(); + const connInfo = connections.find((c) => c.id === connectionId); + return this.discoverForConnection(connectionId, connInfo?.name ?? connectionId, query); + } + + private async discoverAcrossConnections( + connections: Array<{ id: string; name: string; connectionType: string }>, + query?: string, + ): Promise> { + // Load sources from all connections in parallel + const results = await Promise.all( + connections.map(async (conn) => { + const sources = await this.semanticLayerService.loadAllSources(conn.id); + let filtered = sources; + if (query) { + filtered = await this.filterByQuery(conn.id, sources, query); + } + return { conn, sources: filtered }; + }), + ); + + const allSummaries: SlDiscoverStructured['sources'] = []; + const parts: string[] = []; + let totalSources = 0; + + for (const { conn, sources } of results) { + if (sources.length === 0) { + continue; + } + totalSources += sources.length; + + parts.push(`## ${conn.name} (${conn.connectionType}) — \`${conn.id}\``); + parts.push(''); + + const config = { priority: DEFAULT_PRIORITY }; + for (const s of sources) { + allSummaries.push({ + connectionId: conn.id, + connectionName: conn.name, + name: s.name, + description: resolveDescription(s.descriptions, config) ?? undefined, + columnCount: (s.columns ?? []).length, + measureCount: (s.measures ?? []).length, + joinCount: (s.joins ?? []).length, + }); + } + + this.appendTieredSources(parts, sources, !!query); + } + + if (totalSources === 0) { + return { + markdown: query + ? `No semantic layer sources found matching "${query}".` + : 'No semantic layer sources found. Run a schema scan first, or create sources with sl_write_source.', + structured: { sources: [], totalSources: 0 }, + }; + } + + const header = `**${totalSources} source(s) found across ${results.filter((r) => r.sources.length > 0).length} data source(s)**${query ? ` matching "${query}"` : ''}:\n`; + parts.unshift(header); + + return { + markdown: parts.join('\n'), + structured: { sources: allSummaries, totalSources }, + }; + } + + private async discoverForConnection( + connectionId: string, + connectionName: string, + query?: string, + ): Promise> { + const sources = await this.semanticLayerService.loadAllSources(connectionId); + + if (sources.length === 0) { + return { + markdown: 'No semantic layer sources found. Run a schema scan first, or create sources with sl_write_source.', + structured: { sources: [], totalSources: 0 }, + }; + } + + const filtered = query ? await this.filterByQuery(connectionId, sources, query) : sources; + + const config = { priority: DEFAULT_PRIORITY }; + const summaries = filtered.map((s) => ({ + connectionId, + connectionName, + name: s.name, + description: resolveDescription(s.descriptions, config) ?? undefined, + columnCount: (s.columns ?? []).length, + measureCount: (s.measures ?? []).length, + joinCount: (s.joins ?? []).length, + })); + + const parts: string[] = [`**${filtered.length} source(s) found**${query ? ` matching "${query}"` : ''}:\n`]; + + this.appendTieredSources(parts, filtered, !!query); + + return { + markdown: parts.join('\n'), + structured: { sources: summaries, totalSources: filtered.length }, + }; + } + + private async filterByQuery( + connectionId: string, + sources: SemanticLayerSource[], + query: string, + ): Promise { + const config = this.discoverySettings; + const searchResults = await this.slSearchService.search(connectionId, query, config.maxSources, config.minRrfScore); + if (searchResults.length > 0) { + const rankedNames = new Set(searchResults.map((r) => r.sourceName)); + const nameOrder = new Map(searchResults.map((r, i) => [r.sourceName, i])); + return sources + .filter((s) => rankedNames.has(s.name)) + .sort((a, b) => (nameOrder.get(a.name) ?? 0) - (nameOrder.get(b.name) ?? 0)); + } + return this.fallbackTermMatch(sources, query); + } + + private fallbackTermMatch(sources: SemanticLayerSource[], query: string): SemanticLayerSource[] { + const config = { priority: DEFAULT_PRIORITY }; + const terms = query.toLowerCase().split(/\s+/).filter(Boolean); + const scored = sources + .map((s) => { + const searchText = [ + s.name, + resolveDescription(s.descriptions, config) ?? '', + ...s.columns.map((c) => `${c.name} ${resolveDescription(c.descriptions, config) ?? ''}`), + ...s.measures.map((m) => `${m.name} ${m.description ?? ''}`), + ] + .join(' ') + .toLowerCase(); + const matchCount = terms.filter((term) => searchText.includes(term)).length; + return { source: s, matchCount }; + }) + .filter((x) => x.matchCount > 0) + .sort((a, b) => b.matchCount - a.matchCount); + return scored.map((x) => x.source); + } + + /** + * Render sources in two tiers: + * - Top N (ranked by relevance when query is present) get full detail + * - Remaining sources get a one-liner with name, description, and measure count + */ + private appendTieredSources(parts: string[], sources: SemanticLayerSource[], hasQuery: boolean): void { + const maxDetailed = this.discoverySettings.maxDetailedSources; + const detailLimit = hasQuery ? maxDetailed : 0; + const detailed = sources.slice(0, detailLimit); + const rest = sources.slice(detailLimit); + + for (const s of detailed) { + this.appendSourceDetail(parts, s); + } + + if (rest.length > 0) { + if (detailed.length > 0) { + parts.push('**Other sources** (pass `sourceName` to inspect):'); + } + const defaultConfig = { priority: DEFAULT_PRIORITY }; + for (const s of rest) { + const resolvedDesc = resolveDescription(s.descriptions, defaultConfig); + const desc = resolvedDesc ? ` — ${resolvedDesc}` : ''; + const stats = [s.measures.length > 0 ? `${s.measures.length} measures` : null, `${s.columns.length} cols`] + .filter(Boolean) + .join(', '); + parts.push(`- **${s.name}**${desc} (${stats})`); + } + parts.push(''); + } + } + + /** Full detail for a single source: metadata, measures, joins, all public columns. */ + private appendSourceDetail(parts: string[], s: SemanticLayerSource): void { + const detailDesc = resolveDescription(s.descriptions, { priority: DEFAULT_PRIORITY }); + parts.push(`### ${s.name}${detailDesc ? ` — ${detailDesc}` : ''}`); + parts.push( + `Type: ${s.sql ? 'sql' : 'table'} | Columns: ${s.columns.length} | Measures: ${s.measures.length} | Joins: ${s.joins.length}`, + ); + + if (s.measures.length > 0) { + parts.push(`Measures: ${s.measures.map((m) => `\`${m.name}\` (${m.expr})`).join(', ')}`); + } + + if (s.joins.length > 0) { + parts.push(`Joins: ${s.joins.map((j) => `→ ${j.to} (${j.relationship})`).join(', ')}`); + } + + const publicCols = s.columns.filter((c) => c.visibility !== 'hidden'); + if (publicCols.length > 0) { + parts.push(`Columns: ${publicCols.map((c) => `\`${s.name}.${c.name}\` (${c.type})`).join(', ')}`); + } + + parts.push(''); + } +} diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.test.ts b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts new file mode 100644 index 00000000..5165112a --- /dev/null +++ b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts @@ -0,0 +1,187 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js'; +import { SlEditSourceTool } from './sl-edit-source.tool.js'; + +function makeTool(overrides: any = {}) { + const semanticLayerService = { + readSourceFile: vi.fn().mockResolvedValue({ + content: + 'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n', + }), + validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }), + loadAllSources: vi.fn().mockResolvedValue([]), + deleteSource: vi.fn().mockResolvedValue(undefined), + isManifestBacked: vi.fn().mockResolvedValue(false), + ...overrides.semanticLayerService, + }; + const slSearchService = { + indexSources: vi.fn().mockResolvedValue(undefined), + ...overrides.slSearchService, + }; + const tool = new SlEditSourceTool({ + semanticLayerService: semanticLayerService as never, + slSearchService: slSearchService as never, + authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) }, + }); + return { tool, semanticLayerService, slSearchService }; +} + +const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' }; + +function makeSession(overrides: Partial = {}): ToolSession { + return { + connectionId: '11111111-1111-1111-1111-111111111111', + isWorktreeScoped: true, + preHead: 'base', + touchedSlSources: createTouchedSlSources(), + actions: [], + semanticLayerService: { + readSourceFile: vi.fn().mockResolvedValue({ + content: + 'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: string\nmeasures: []\njoins: []\n', + }), + validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }), + loadAllSources: vi.fn().mockResolvedValue([]), + } as any, + wikiService: {} as any, + configService: {} as any, + gitService: {} as any, + ...overrides, + }; +} + +describe('SlEditSourceTool — session gating', () => { + it('skips slSearchService.indexSources when session is worktree-scoped', async () => { + const { tool, slSearchService } = makeTool(); + const session = makeSession(); + const context: ToolContext = { ...baseContext, session }; + const result = await tool.call( + { + connectionId: session.connectionId, + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + context, + ); + expect(result.structured.success).toBe(true); + expect(slSearchService.indexSources).not.toHaveBeenCalled(); + expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'orders')).toBe(true); + expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'orders' })); + }); + + it('records cross-connection SL edits with targetConnectionId', async () => { + const { tool } = makeTool(); + const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' }); + const warehouseConnectionId = '22222222-2222-4222-8222-222222222222'; + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: warehouseConnectionId, + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + context, + ); + + expect(result.structured.success).toBe(true); + expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'orders')).toBe(true); + expect(session.actions).toContainEqual( + expect.objectContaining({ + target: 'sl', + type: 'updated', + key: 'orders', + targetConnectionId: warehouseConnectionId, + }), + ); + }); + + it('indexes normally when no session is present', async () => { + const { tool, slSearchService } = makeTool(); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + baseContext, + ); + expect(result.structured.success).toBe(true); + expect(slSearchService.indexSources).toHaveBeenCalledTimes(1); + }); + + it('uses session.semanticLayerService when session is present', async () => { + const { tool } = makeTool(); + const session = makeSession(); + const context: ToolContext = { ...baseContext, session }; + await tool.call( + { + connectionId: session.connectionId, + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + context, + ); + expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); + }); +}); + +describe('SlEditSourceTool — manifest-backed source without overlay', () => { + it('returns a directed hint pointing at sl_write_source + overlay shape', async () => { + const { tool, semanticLayerService } = makeTool({ + semanticLayerService: { + readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')), + isManifestBacked: vi.fn().mockResolvedValue(true), + }, + }); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'CONSIGNMENTS', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures:\n - name: aav_count\n expr: count(*)' }], + } as any, + baseContext, + ); + + expect(result.structured.success).toBe(false); + expect(semanticLayerService.isManifestBacked).toHaveBeenCalledWith( + '11111111-1111-1111-1111-111111111111', + 'CONSIGNMENTS', + ); + expect(semanticLayerService.writeSource).not.toHaveBeenCalled(); + + const joinedErrors = (result.structured.errors ?? []).join('\n'); + expect(joinedErrors).toContain('CONSIGNMENTS'); + expect(joinedErrors).toContain('manifest'); + expect(joinedErrors).toContain('sl_write_source'); + expect(joinedErrors).toContain('overlay'); + // Overlay shape: only name + measures/segments/description + expect(joinedErrors).toContain('measures'); + expect(joinedErrors).toContain('segments'); + }); + + it('still returns the plain "Source not found" error for truly-missing names', async () => { + const { tool, semanticLayerService } = makeTool({ + semanticLayerService: { + readSourceFile: vi.fn().mockRejectedValue(new Error('ENOENT')), + isManifestBacked: vi.fn().mockResolvedValue(false), + }, + }); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'does_not_exist', + yaml_edits: [{ oldText: 'x', newText: 'y' }], + } as any, + baseContext, + ); + + expect(result.structured.success).toBe(false); + expect(result.structured.errors).toEqual(['Source not found. Use sl_write_source to create it.']); + expect(semanticLayerService.isManifestBacked).toHaveBeenCalledTimes(1); + expect(semanticLayerService.writeSource).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.ts b/packages/context/src/sl/tools/sl-edit-source.tool.ts new file mode 100644 index 00000000..29fa275d --- /dev/null +++ b/packages/context/src/sl/tools/sl-edit-source.tool.ts @@ -0,0 +1,200 @@ +import YAML from 'yaml'; +import { z } from 'zod'; +import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js'; +import { applySqlEdits } from '../../tools/sql-edit-replacer.js'; +import type { SemanticLayerSource } from '../types.js'; +import { + BaseSemanticLayerTool, + type BaseSemanticLayerToolDeps, + type SemanticLayerStructured, +} from './base-semantic-layer.tool.js'; +import { slToolConnectionIdSchema } from './connection-id-schema.js'; + +const slEditSourceInputSchema = z.object({ + connectionId: slToolConnectionIdSchema.describe('Data source connection ID'), + sourceName: z.string().describe('Name of the source to edit'), + yaml_edits: z + .array( + z.object({ + oldText: z.string().describe('Exact text to find in the current YAML. Must match exactly (byte-for-byte).'), + newText: z.string().describe('Replacement text. Use empty string to delete.'), + reason: z.string().optional().describe('Brief reason for this edit.'), + }), + ) + .optional() + .describe('Targeted exact-match search/replace edits on the raw YAML content.'), + delete: z.boolean().optional().describe('Set to true to delete this source entirely'), +}); + +type SlEditSourceInput = z.infer; + +function actionTargetConnectionId( + runConnectionId: string | null | undefined, + actionConnectionId: string, +): string | null { + return runConnectionId && runConnectionId !== actionConnectionId ? actionConnectionId : null; +} + +export class SlEditSourceTool extends BaseSemanticLayerTool { + readonly name = 'sl_edit_source'; + + constructor(deps: BaseSemanticLayerToolDeps) { + super(deps); + } + + get description(): string { + return ` +Make targeted edits to an existing semantic layer source using exact-match search/replace on YAML content. +If no source exists yet, use sl_write_source instead — this tool will reject the call. + + + +- Adding/removing a measure on an existing source +- Adding/updating a join relationship +- Updating column descriptions +- Removing an obsolete source (set delete: true) +- Consolidation: delete redundant sources, edit the surviving one + + + +- yaml_edits: exact-match search/replace on raw YAML. oldText must match byte-for-byte (no whitespace normalization or fuzzy matching). + Include enough surrounding context in oldText for a unique match. +- Read the source first with sl_read_source to copy the exact text you want to replace. +- Keep edits scoped to the user's request — don't proactively regenerate all measures. +`; + } + + get inputSchema() { + return slEditSourceInputSchema; + } + + async call(input: SlEditSourceInput, context: ToolContext): Promise> { + const { connectionId, sourceName } = input; + const { name: author, email: authorEmail } = await this.authorResolver.resolve(context.userId); + + const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; + const skipIndex = context.session?.isWorktreeScoped === true; + + // Handle delete + if (input.delete) { + try { + await semanticLayerService.deleteSource(connectionId, sourceName, author, authorEmail); + if (context.session) { + addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName); + context.session.actions.push({ + target: 'sl', + type: 'removed', + key: sourceName, + detail: 'Deleted source', + targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId), + }); + } + return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined }); + } catch (error) { + return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName); + } + } + + // Read existing source + let currentYaml: string | null = null; + try { + const { content } = await semanticLayerService.readSourceFile(connectionId, sourceName); + currentYaml = content; + } catch { + currentYaml = null; + } + if (!currentYaml) { + const manifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName); + if (manifestBacked) { + return this.buildOutput( + false, + [ + [ + `Source "${sourceName}" exists in the schema manifest but has no overlay file yet — sl_edit_source cannot edit it directly.`, + `Bootstrap an overlay with sl_write_source, then re-run sl_edit_source on subsequent changes:`, + ` name: ${sourceName}`, + ` measures:`, + ` - name: `, + ` expr: ""`, + ` description: ""`, + `Overlay shape: "name:" plus any of "measures:", "segments:", "description:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`, + ].join('\n'), + ], + sourceName, + ); + } + return this.buildOutput(false, ['Source not found. Use sl_write_source to create it.'], sourceName); + } + + const errors: string[] = []; + let yaml = currentYaml; + let editCount = 0; + + // Apply yaml_edits (text-level search/replace, exact-match only) + if (input.yaml_edits && input.yaml_edits.length > 0) { + const editResult = applySqlEdits(yaml, input.yaml_edits, { exactOnly: true }); + yaml = editResult.sql; + editCount = editResult.appliedEdits; + if (!editResult.success) { + errors.push(...editResult.errors); + } + } + + // Parse resulting YAML + let source: SemanticLayerSource; + try { + source = YAML.parse(yaml) as SemanticLayerSource; + } catch (e) { + return this.buildOutput(false, [`YAML parse error after edits: ${e}`], sourceName); + } + + // Re-serialize and write + const updatedYaml = YAML.stringify(source, { indent: 2, lineWidth: 0 }); + + const { errors: validationErrors, warnings: validationWarnings } = + await semanticLayerService.validateWithProposedSource(connectionId, source); + if (validationErrors.length > 0) { + return this.buildOutput( + false, + [...errors, 'Validation failed — edits were NOT saved:', ...validationErrors], + sourceName, + { yaml: updatedYaml, editCount, validationErrors, validationWarnings }, + ); + } + + const commitMessage = `Edit source ${sourceName}: ${ + input.yaml_edits ? `${input.yaml_edits.length} YAML edit(s)` : 'update' + }`; + + try { + const result = await semanticLayerService.writeSource(connectionId, source, author, authorEmail, commitMessage); + + if (!skipIndex) { + const allSources = await semanticLayerService.loadAllSources(connectionId); + await this.slSearchService.indexSources(connectionId, allSources).catch(() => {}); + } + + if (context.session) { + addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName); + context.session.actions.push({ + target: 'sl', + type: 'updated', + key: sourceName, + detail: `Applied ${editCount} edit(s)`, + targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId), + }); + } + + return this.buildOutput(errors.length === 0, errors, sourceName, { + yaml: updatedYaml, + commitHash: result.commitHash ?? undefined, + editCount, + validationErrors, + validationWarnings, + }); + } catch (error) { + errors.push(error instanceof Error ? error.message : String(error)); + return this.buildOutput(false, errors, sourceName, { yaml: updatedYaml, editCount }); + } + } +} diff --git a/packages/context/src/sl/tools/sl-read-source.tool.session.test.ts b/packages/context/src/sl/tools/sl-read-source.tool.session.test.ts new file mode 100644 index 00000000..dcb2a919 --- /dev/null +++ b/packages/context/src/sl/tools/sl-read-source.tool.session.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources, type ToolContext } from '../../tools/index.js'; +import { SlReadSourceTool } from './sl-read-source.tool.js'; + +function makeTool(overrides: Partial> = {}) { + const semanticLayerService = { + readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_default\n', path: 'default' }), + ...overrides.semanticLayerService, + }; + + const tool = new SlReadSourceTool({ + semanticLayerService: semanticLayerService as never, + slSearchService: {} as never, + authorResolver: { resolve: vi.fn() }, + }); + return { tool, semanticLayerService }; +} + +function makeContext(overrides: Partial = {}): ToolContext { + return { + sourceId: 'src', + messageId: 'msg', + userId: 'user', + ...overrides, + }; +} + +function makeSession(overrides: Partial = {}): ToolSession { + return { + connectionId: '11111111-1111-1111-1111-111111111111', + isWorktreeScoped: true, + preHead: 'base', + touchedSlSources: createTouchedSlSources(), + actions: [], + semanticLayerService: { + readSourceFile: vi.fn().mockResolvedValue({ content: 'name: foo_session\n', path: 'session' }), + } as any, + wikiService: {} as any, + configService: {} as any, + gitService: {} as any, + ...overrides, + }; +} + +describe('SlReadSourceTool - session-scoped reads', () => { + it('reads through context.session.semanticLayerService when a session is present', async () => { + const { tool, semanticLayerService } = makeTool(); + const session = makeSession(); + + const result = await tool.call( + { connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' }, + makeContext({ session }), + ); + + expect((session.semanticLayerService as any).readSourceFile).toHaveBeenCalledWith( + '11111111-1111-1111-1111-111111111111', + 'foo', + ); + expect(semanticLayerService.readSourceFile).not.toHaveBeenCalled(); + expect(result.structured.yaml).toContain('foo_session'); + }); + + it('reads through the default service when no session is present', async () => { + const { tool, semanticLayerService } = makeTool(); + + const result = await tool.call( + { connectionId: '11111111-1111-1111-1111-111111111111', sourceName: 'foo' }, + makeContext(), + ); + + expect(semanticLayerService.readSourceFile).toHaveBeenCalledWith('11111111-1111-1111-1111-111111111111', 'foo'); + expect(result.structured.yaml).toContain('foo_default'); + }); +}); diff --git a/packages/context/src/sl/tools/sl-read-source.tool.ts b/packages/context/src/sl/tools/sl-read-source.tool.ts new file mode 100644 index 00000000..fb5de830 --- /dev/null +++ b/packages/context/src/sl/tools/sl-read-source.tool.ts @@ -0,0 +1,63 @@ +import { z } from 'zod'; +import type { ToolContext, ToolOutput } from '../../tools/index.js'; +import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js'; +import { slToolConnectionIdSchema } from './connection-id-schema.js'; + +const slReadSourceInputSchema = z.object({ + connectionId: slToolConnectionIdSchema.describe('Data source connection ID'), + sourceName: z.string().describe('Name of the source to read'), +}); + +type SlReadSourceInput = z.infer; + +interface SlReadSourceStructured { + sourceName: string; + yaml: string; +} + +export class SlReadSourceTool extends BaseSemanticLayerTool { + readonly name = 'sl_read_source'; + + constructor(deps: BaseSemanticLayerToolDeps) { + super(deps); + } + + get description(): string { + return ` +Read the raw YAML definition of a semantic layer source, including its SQL implementation. +Use this when you need to understand how a source is built — e.g., before editing it with sl_edit_source or sl_write_source. + + + +- Before editing a source: understand its full definition (SQL, columns, measures, joins) +- When debugging a source: see the underlying SQL query +- When creating a new source based on an existing one + + + +- To discover what sources/measures/dimensions are available for querying — use sl_discover instead +- To query data — use semantic_query or create_widget with slQuery +`; + } + + get inputSchema() { + return slReadSourceInputSchema; + } + + async call(input: SlReadSourceInput, context: ToolContext): Promise> { + const { connectionId, sourceName } = input; + + const yaml = await this.readSourceYaml(connectionId, sourceName, context); + if (!yaml) { + return { + markdown: `Source **${sourceName}** not found for connection ${connectionId}.`, + structured: { sourceName, yaml: '' }, + }; + } + + return { + markdown: `## Source: ${sourceName}\n\n\`\`\`yaml\n${yaml}\n\`\`\``, + structured: { sourceName, yaml }, + }; + } +} diff --git a/packages/context/src/sl/tools/sl-rollback.tool.test.ts b/packages/context/src/sl/tools/sl-rollback.tool.test.ts new file mode 100644 index 00000000..73461e87 --- /dev/null +++ b/packages/context/src/sl/tools/sl-rollback.tool.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js'; +import { SlRollbackTool } from './sl-rollback.tool.js'; + +function makeSession(overrides: Partial = {}): ToolSession { + return { + connectionId: 'conn-1', + isWorktreeScoped: true, + preHead: 'base', + touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]), + actions: [{ target: 'sl', type: 'updated', key: 'orders', detail: 'x' }], + semanticLayerService: {} as any, + wikiService: {} as any, + configService: { + writeFile: vi.fn().mockResolvedValue(undefined), + deleteFile: vi.fn().mockResolvedValue(undefined), + } as any, + gitService: { getFileAtCommit: vi.fn().mockResolvedValue('pre: content') } as any, + ...overrides, + }; +} + +describe('SlRollbackTool', () => { + const connections = { + getConnectionById: vi.fn(), + listEnabledConnections: vi.fn(), + executeQuery: vi.fn(), + }; + + it('errors when context.session is absent', async () => { + const tool = new SlRollbackTool({} as never, connections as never, 1); + const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' }; + const result = await tool.call({ sourceName: 'orders' } as any, context); + expect(result.structured.success).toBe(false); + expect(result.markdown).toMatch(/session/i); + }); + + it('errors when session has no connectionId (wiki-only turn)', async () => { + const tool = new SlRollbackTool({} as never, connections as never, 1); + const session = makeSession({ connectionId: null }); + const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session }; + const result = await tool.call({ sourceName: 'orders' } as any, context); + expect(result.structured.success).toBe(false); + expect(result.markdown).toMatch(/connection-scoped session/i); + // Session state untouched + expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(true); + expect((session.gitService as any).getFileAtCommit).not.toHaveBeenCalled(); + }); + + it('restores the source content from preHead, clears touched set, prunes actions', async () => { + const slSourcesRepository = { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) }; + const tool = new SlRollbackTool(slSourcesRepository as never, connections as never, 1); + const session = makeSession(); + const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session }; + const result = await tool.call({ sourceName: 'orders' } as any, context); + + expect(result.structured.success).toBe(true); + expect((session.gitService as any).getFileAtCommit).toHaveBeenCalledWith( + expect.stringContaining('orders.yaml'), + 'base', + ); + expect((session.configService as any).writeFile).toHaveBeenCalled(); + expect(hasTouchedSlSource(session.touchedSlSources, 'conn-1', 'orders')).toBe(false); + expect(session.actions).toEqual([]); + }); +}); diff --git a/packages/context/src/sl/tools/sl-rollback.tool.ts b/packages/context/src/sl/tools/sl-rollback.tool.ts new file mode 100644 index 00000000..f3354ac1 --- /dev/null +++ b/packages/context/src/sl/tools/sl-rollback.tool.ts @@ -0,0 +1,87 @@ +import { z } from 'zod'; +import { BaseTool, deleteTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js'; +import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js'; +import { revertSourceToPreHead } from './sl-warehouse-validation.js'; + +const slRollbackInputSchema = z.object({ + sourceName: z.string().describe('Name of the source to roll back'), +}); + +type SlRollbackInput = z.infer; + +interface SlRollbackStructured { + success: boolean; + sourceName: string; + outcome?: string; +} + +export class SlRollbackTool extends BaseTool { + readonly name = 'sl_rollback'; + + constructor( + private readonly slSourcesRepository: SlSourcesIndexPort, + private readonly connections: SlConnectionCatalogPort, + private readonly probeRowCount: number, + ) { + super(); + } + + get description(): string { + return ` +Abandon this-session changes to a source and restore it to its pre-session state. +Use when a write/edit failed validation in a way you cannot fix in-session (e.g. the source requires elevated warehouse permissions). +`; + } + + get inputSchema() { + return slRollbackInputSchema; + } + + async call(input: SlRollbackInput, context: ToolContext): Promise> { + const session = context.session; + if (!session) { + return { + markdown: + 'Error: sl_rollback requires an active session (ingest WU or memory-agent). Use git revert for interactive rollback.', + structured: { success: false, sourceName: input.sourceName }, + }; + } + if (!session.connectionId) { + return { + markdown: 'Error: sl_rollback requires a connection-scoped session; this session has no warehouse connection.', + structured: { success: false, sourceName: input.sourceName }, + }; + } + + const outcome = await revertSourceToPreHead( + { + semanticLayerService: session.semanticLayerService, + connections: this.connections, + configService: session.configService, + gitService: session.gitService, + slSourcesRepository: this.slSourcesRepository, + probeRowCount: this.probeRowCount, + }, + session.connectionId, + session.preHead, + input.sourceName, + ); + + deleteTouchedSlSource(session.touchedSlSources, session.connectionId, input.sourceName); + for (let i = session.actions.length - 1; i >= 0; i--) { + const a = session.actions[i]; + if ( + a.target === 'sl' && + a.key === input.sourceName && + (a.targetConnectionId ?? session.connectionId) === session.connectionId + ) { + session.actions.splice(i, 1); + } + } + + return { + markdown: `Source "${input.sourceName}" rolled back: ${outcome}.`, + structured: { success: true, sourceName: input.sourceName, outcome }, + }; + } +} diff --git a/packages/context/src/sl/tools/sl-validate.tool.test.ts b/packages/context/src/sl/tools/sl-validate.tool.test.ts new file mode 100644 index 00000000..190a7e12 --- /dev/null +++ b/packages/context/src/sl/tools/sl-validate.tool.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources, type ToolContext } from '../../tools/index.js'; +import type { SemanticLayerService } from '../semantic-layer.service.js'; +import type { SemanticLayerSource } from '../types.js'; +import { SlValidateTool, validateSemanticLayerEndpoint } from './sl-validate.tool.js'; + +describe('validateSemanticLayerEndpoint', () => { + it('uses the connection warehouse dialect, not hardcoded postgres', async () => { + const serviceMock = { + validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + }; + + await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService); + + expect(serviceMock.validateSourcesForConnection).toHaveBeenCalledWith('conn-1'); + }); + + it('short-circuits when there are no validatable sources', async () => { + const serviceMock = { + validateSourcesForConnection: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + }; + + const result = await validateSemanticLayerEndpoint('conn-1', serviceMock as unknown as SemanticLayerService); + + expect(result).toEqual({ errors: [], warnings: [] }); + }); +}); + +describe('SlValidateTool — session-aware touched-set filtering', () => { + it('when session present, only returns errors/warnings that mention touched sources', async () => { + const sources: SemanticLayerSource[] = [ + { name: 'orders', table: 'x.orders', grain: ['id'], columns: [], joins: [], measures: [] }, + { name: 'customers', table: 'x.customers', grain: ['id'], columns: [], joins: [], measures: [] }, + ]; + const serviceMock = { + loadAllSources: vi.fn().mockResolvedValue(sources), + validateSourcesForConnection: vi.fn().mockResolvedValue({ + errors: ['orders: missing join target', 'customers: invalid grain'], + warnings: ['orders: disconnected-components warning'], + }), + }; + + const tool = new SlValidateTool({ + semanticLayerService: serviceMock as never, + slSearchService: {} as never, + authorResolver: { resolve: vi.fn() }, + }); + + const session: ToolSession = { + connectionId: 'conn-1', + isWorktreeScoped: true, + preHead: null, + touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'orders' }]), + actions: [], + semanticLayerService: serviceMock as any, + wikiService: {} as any, + configService: {} as any, + gitService: {} as any, + }; + const context: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u', session }; + const result = await tool.call({ connectionId: 'conn-1' } as any, context); + expect(result.structured.validationErrors).toEqual(['orders: missing join target']); + expect(result.structured.validationWarnings).toEqual(['orders: disconnected-components warning']); + }); +}); diff --git a/packages/context/src/sl/tools/sl-validate.tool.ts b/packages/context/src/sl/tools/sl-validate.tool.ts new file mode 100644 index 00000000..4b457f0c --- /dev/null +++ b/packages/context/src/sl/tools/sl-validate.tool.ts @@ -0,0 +1,130 @@ +import { z } from 'zod'; +import { type ToolContext, type ToolOutput, touchedSlSourceNamesForConnection } from '../../tools/index.js'; +import { SemanticLayerService } from '../semantic-layer.service.js'; +import { + BaseSemanticLayerTool, + type BaseSemanticLayerToolDeps, + type SemanticLayerStructured, +} from './base-semantic-layer.tool.js'; +import { slToolConnectionIdSchema } from './connection-id-schema.js'; + +const slValidateInputSchema = z.object({ + connectionId: slToolConnectionIdSchema.describe('Data source connection ID'), +}); + +type SlValidateInput = z.infer; + +type ValidationReport = { + errors: string[]; + warnings: string[]; +}; + +export async function validateSemanticLayerEndpoint( + connectionId: string, + semanticLayerService: SemanticLayerService, +): Promise { + try { + return await semanticLayerService.validateSourcesForConnection(connectionId); + } catch (e) { + return { + errors: [`Validation call failed: ${e instanceof Error ? e.message : String(e)}`], + warnings: [], + }; + } +} + +export class SlValidateTool extends BaseSemanticLayerTool { + readonly name = 'sl_validate'; + + constructor(deps: BaseSemanticLayerToolDeps) { + super(deps); + } + + get description(): string { + return ` +Validate that all semantic layer sources for a connection form a consistent model. +Checks: all join targets exist, grain is valid, no missing references. + + + +- After making edits with sl_write_source +- Before querying, to ensure the model is healthy +- When troubleshooting query failures +`; + } + + get inputSchema() { + return slValidateInputSchema; + } + + async call(input: SlValidateInput, context: ToolContext): Promise> { + const { connectionId } = input; + + const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; + + const sources = await semanticLayerService.loadAllSources(connectionId); + if (sources.length === 0) { + return this.buildOutput(true, [], '(all)', { + validationErrors: ['No sources found for this connection.'], + }); + } + + let { errors, warnings } = await validateSemanticLayerEndpoint(connectionId, semanticLayerService); + + const touched = context.session?.touchedSlSources; + if (touched && touched.size > 0) { + const touchedArr = touchedSlSourceNamesForConnection(touched, connectionId); + if (touchedArr.length > 0) { + errors = errors.filter((e) => touchedArr.some((n) => e.includes(n))); + warnings = warnings.filter((w) => touchedArr.some((n) => w.includes(n))); + } + } + + const valid = errors.length === 0; + const parts: string[] = []; + parts.push(`**Semantic layer validation** for ${sources.length} source(s):`); + + if (valid && warnings.length === 0) { + parts.push('All sources are valid. Join graph is consistent.'); + } else { + const summary: string[] = []; + if (errors.length > 0) { + summary.push(`${errors.length} error(s)`); + } + if (warnings.length > 0) { + summary.push(`${warnings.length} warning(s)`); + } + parts.push(`Found ${summary.join(' and ')}:`); + if (errors.length > 0) { + parts.push('', '**Errors:**'); + for (const err of errors) { + parts.push(`- ${err}`); + } + } + if (warnings.length > 0) { + parts.push('', '**Warnings:**'); + for (const warn of warnings) { + parts.push(`- ${warn}`); + } + } + } + + // List sources summary + parts.push('\n**Sources:**'); + for (const s of sources) { + parts.push( + `- **${s.name}** (${s.sql ? 'sql' : 'table'}): ${s.columns.length} cols, ${s.measures.length} measures, ${s.joins.length} joins`, + ); + } + + return { + markdown: parts.join('\n'), + structured: { + success: valid, + sourceName: '(all)', + validationErrors: errors.length > 0 ? errors : undefined, + validationWarnings: warnings.length > 0 ? warnings : undefined, + }, + }; + } +} diff --git a/packages/context/src/sl/tools/sl-warehouse-validation.test.ts b/packages/context/src/sl/tools/sl-warehouse-validation.test.ts new file mode 100644 index 00000000..d0f7f04a --- /dev/null +++ b/packages/context/src/sl/tools/sl-warehouse-validation.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it, vi } from 'vitest'; +import { validateSingleSource } from './sl-warehouse-validation.js'; + +function makeDeps(opts: { sourceYaml: string; executeQuery: ReturnType }) { + return { + semanticLayerService: { + readSourceFile: vi.fn().mockResolvedValue({ content: opts.sourceYaml, path: 'x' }), + isManifestBacked: vi.fn().mockResolvedValue(false), + listManifestSourceNames: vi.fn().mockResolvedValue([]), + loadSource: vi.fn().mockResolvedValue(null), + loadAllSources: vi.fn().mockResolvedValue([]), + } as never, + connections: { + executeQuery: opts.executeQuery, + getConnectionById: vi.fn().mockResolvedValue({ id: 'conn-1', name: 'conn-1', connectionType: 'bigquery' }), + listEnabledConnections: vi.fn().mockResolvedValue([]), + } as never, + configService: {} as never, + gitService: {} as never, + slSourcesRepository: { deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined) } as never, + probeRowCount: 1, + }; +} + +describe('validateSingleSource warehouse dry-run', () => { + it('surfaces warehouse error when dry-run fails on unknown column', async () => { + const yaml = `name: fct_arr_delta +source_type: sql +sql: | + SELECT * FROM analytics.fct_arr_delta WHERE date_date < CURRENT_DATE() +grain: [date_date] +columns: + - name: date_date + type: time +measures: + - name: count_delta_events + expr: count(*) +joins: [] +`; + const executeQuery = vi.fn().mockRejectedValue(new Error('Unrecognized name: date_date at [1:42]')); + const deps = makeDeps({ sourceYaml: yaml, executeQuery }); + const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta'); + expect(result.errors.join('\n')).toMatch(/Unrecognized name: date_date/); + expect(result.errors.join('\n')).toMatch(/embedded sql dry-run failed/); + }); + + it('flags declared columns missing from the dry-run result', async () => { + const yaml = `name: fct_arr_delta +source_type: sql +sql: | + SELECT date, customer_id FROM analytics.fct_arr_delta +columns: + - name: date_date + type: time + - name: customer_id + type: string +measures: + - name: count_delta + expr: count(*) +joins: [] +grain: [customer_id] +`; + const executeQuery = vi.fn().mockResolvedValue({ + headers: ['date', 'customer_id'], + rows: [], + totalRows: 0, + error: null, + }); + const deps = makeDeps({ sourceYaml: yaml, executeQuery }); + const result = await validateSingleSource(deps, 'conn-1', 'fct_arr_delta'); + expect(result.errors.join('\n')).toMatch(/declared columns absent from sql result — date_date/); + expect(result.errors.join('\n')).toMatch(/warehouse returned:/); + }); + + it('passes cleanly when dry-run succeeds and declared columns match', async () => { + const yaml = `name: lab_results +source_type: sql +sql: | + SELECT lab_order_id, admin_user_id FROM analytics.raw_lab_results +grain: [lab_order_id] +columns: + - name: lab_order_id + type: string + - name: admin_user_id + type: string +measures: + - name: count_lab_results + expr: count(lab_order_id) +joins: [] +`; + const executeQuery = vi.fn().mockResolvedValue({ + headers: ['lab_order_id', 'admin_user_id'], + rows: [], + totalRows: 0, + error: null, + }); + const deps = makeDeps({ sourceYaml: yaml, executeQuery }); + const result = await validateSingleSource(deps, 'conn-1', 'lab_results'); + expect(result.errors).toEqual([]); + }); + + it('uses LIMIT 1 (not LIMIT 0) so runtime policies fire', async () => { + const yaml = `name: foo +source_type: sql +sql: | + SELECT a FROM analytics.bar +grain: [a] +columns: + - {name: a, type: string} +measures: [] +joins: [] +`; + const executeQuery = vi.fn().mockResolvedValue({ headers: ['a'], rows: [], totalRows: 0, error: null }); + const deps = makeDeps({ sourceYaml: yaml, executeQuery }); + await validateSingleSource(deps, 'conn-1', 'foo'); + const probeSql = executeQuery.mock.calls[0][1] as string; + expect(probeSql).toMatch(/LIMIT 1\b/); + expect(probeSql).not.toMatch(/LIMIT 0\b/); + }); +}); diff --git a/packages/context/src/sl/tools/sl-warehouse-validation.ts b/packages/context/src/sl/tools/sl-warehouse-validation.ts new file mode 100644 index 00000000..cb16d60b --- /dev/null +++ b/packages/context/src/sl/tools/sl-warehouse-validation.ts @@ -0,0 +1,325 @@ +import YAML from 'yaml'; +import type { GitService, KloFileStorePort } from '../../core/index.js'; +import { SYSTEM_GIT_AUTHOR } from '../../tools/index.js'; +import type { SlConnectionCatalogPort, SlSourcesIndexPort } from '../ports.js'; +import { sourceOverlaySchema } from '../schemas.js'; +import { SemanticLayerService } from '../semantic-layer.service.js'; +import { sourceDefinitionSchema } from './base-semantic-layer.tool.js'; + +export interface SlValidationDeps { + semanticLayerService: SemanticLayerService; + connections: SlConnectionCatalogPort; + configService: KloFileStorePort; + gitService: GitService; + slSourcesRepository: SlSourcesIndexPort; + probeRowCount: number; +} + +export interface SourceValidationResult { + errors: string[]; + warnings: string[]; +} + +const slSourcePath = (connectionId: string, sourceName: string): string => + `semantic-layer/${connectionId}/${sourceName}.yaml`; + +function resolveDialect(warehouse: string | null): string | null { + if (!warehouse) { + return null; + } + return SemanticLayerService.mapDialect(warehouse); +} + +function wrapWithZeroRowQuery(sql: string, dialect: string): string { + if (dialect === 'tsql') { + return `SELECT TOP 0 * FROM (${sql}) AS _discovery`; + } + return `SELECT * FROM (${sql}) AS _discovery LIMIT 0`; +} + +function wrapWithSingleRowQuery(sql: string, dialect: string): string { + if (dialect === 'tsql') { + return `SELECT TOP 1 * FROM (${sql}) AS _base`; + } + return `SELECT * FROM (${sql}) AS _base LIMIT 1`; +} + +/** + * Validate one SL source end-to-end: YAML parse, Zod schema, duplicate-measure detection, + * warehouse dry-run (`SELECT * FROM (sql) LIMIT 1` — forces runtime policy enforcement). + * + * Returns errors and hint-style warnings. An empty errors array means the YAML is + * structurally valid AND the warehouse can execute a probe against its embedded sql. + */ +export async function validateSingleSource( + deps: SlValidationDeps, + connectionId: string, + sourceName: string, +): Promise { + const errors: string[] = []; + const warnings: string[] = []; + + let content: string; + try { + const result = await deps.semanticLayerService.readSourceFile(connectionId, sourceName); + content = result.content; + } catch { + errors.push(`${sourceName}.yaml: file not found`); + return { errors, warnings }; + } + + let parsed: Record; + try { + parsed = YAML.parse(content); + } catch (e) { + errors.push(`${sourceName}.yaml: invalid YAML — ${e instanceof Error ? e.message : String(e)}`); + return { errors, warnings }; + } + if (!parsed || typeof parsed !== 'object') { + errors.push(`${sourceName}.yaml: top-level content is not an object`); + return { errors, warnings }; + } + + const isOverlay = !parsed.table && !parsed.sql; + if (!isOverlay) { + const isManifestBacked = await deps.semanticLayerService.isManifestBacked(connectionId, sourceName); + if (isManifestBacked) { + errors.push( + `${sourceName}.yaml: standalone source shadows an existing manifest entry — ` + + `writing it as-is drops the manifest's columns and joins. ` + + `Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` + + `"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` + + `that inherits the manifest schema. Call sl_describe_table to see it first.`, + ); + return { errors, warnings }; + } + } + const schema = isOverlay ? sourceOverlaySchema : sourceDefinitionSchema; + const result = schema.safeParse(parsed); + if (!result.success) { + const issues = result.error.issues.map((i) => `${i.path.join('.')}: ${i.message}`).join('; '); + errors.push(`${sourceName}.yaml: schema — ${issues}`); + const errorPaths = new Set(result.error.issues.map((i) => String(i.path[0]))); + if (errorPaths.has('joins')) { + warnings.push( + `${sourceName}.yaml: hint — join format: {to, on: 'local_col = TARGET.col', relationship: 'many_to_one|one_to_many|one_to_one'}`, + ); + } + if (errorPaths.has('columns')) { + warnings.push( + `${sourceName}.yaml: hint — overlay columns must be computed: {name, expr, type}. Do NOT include base table columns.`, + ); + } + if (errorPaths.has('measures')) { + warnings.push( + `${sourceName}.yaml: hint — measure format: {name, expr, description (optional), filter (optional)}`, + ); + } + return { errors, warnings }; + } + + const measures = (parsed.measures as Array<{ name: string }> | undefined) ?? []; + const seenMeasures = new Set(); + for (const m of measures) { + if (seenMeasures.has(m.name)) { + errors.push(`${sourceName}.yaml: duplicate measure name "${m.name}"`); + } + seenMeasures.add(m.name); + } + + let warehouse: string | null = null; + try { + const connection = await deps.connections.getConnectionById(connectionId); + warehouse = connection?.connectionType ?? null; + } catch { + warehouse = null; + } + + if (typeof parsed.sql === 'string' && parsed.sql.trim().length > 0) { + const innerSql = parsed.sql.trim().replace(/;+\s*$/, ''); + const probeRowCount = deps.probeRowCount; + const dialect = resolveDialect(warehouse); + let probeSql: string; + if (dialect) { + probeSql = + probeRowCount === 0 ? wrapWithZeroRowQuery(innerSql, dialect) : wrapWithSingleRowQuery(innerSql, dialect); + } else { + probeSql = `SELECT * FROM (${innerSql}) AS _probe LIMIT ${probeRowCount}`; + } + const sourceColumns = ((parsed.columns as Array<{ name?: string; type?: string }> | undefined) ?? []) + .map((c) => ({ name: c.name ?? '', type: c.type ?? '' })) + .filter((c) => c.name); + try { + const probe = await deps.connections.executeQuery(connectionId, probeSql); + const actual = new Set((probe.headers ?? []).map((h) => h.toLowerCase())); + const missing = sourceColumns.map((c) => c.name).filter((n) => !actual.has(n.toLowerCase())); + if (missing.length > 0) { + errors.push( + `${sourceName}.yaml: declared columns absent from sql result — ${missing.join(', ')} (warehouse returned: ${[...actual].slice(0, 10).join(', ')}${actual.size > 10 ? ', …' : ''})`, + ); + } + } catch (e) { + errors.push( + formatProbeError({ + sourceName, + measureName: null, + probeSql, + warehouse, + sourceColumns, + error: e, + headline: 'embedded sql dry-run failed', + }), + ); + } + } else if (isOverlay) { + const measureErrors = await probeOverlayMeasures(deps, connectionId, sourceName, warehouse); + errors.push(...measureErrors); + } + + return { errors, warnings }; +} + +function formatProbeError(args: { + sourceName: string; + measureName: string | null; + probeSql: string; + warehouse: string | null; + sourceColumns: Array<{ name: string; type: string }>; + error: unknown; + headline: string; +}): string { + const { sourceName, measureName, probeSql, warehouse, sourceColumns, error, headline } = args; + const errMsg = error instanceof Error ? error.message : String(error); + const refColumns = sourceColumns.filter((c) => referencesColumn(probeSql, c.name)); + const lines: string[] = [ + measureName ? `${sourceName}.yaml: measure "${measureName}" ${headline}.` : `${sourceName}.yaml: ${headline}.`, + ]; + if (warehouse) { + lines.push(` Warehouse: ${warehouse}`); + } + lines.push(` Probe SQL: ${probeSql}`); + if (refColumns.length > 0) { + lines.push(` Referenced columns: ${refColumns.map((c) => `${c.name} (${c.type || '?'})`).join(', ')}`); + } + lines.push(` Error: ${errMsg}`); + return lines.join('\n'); +} + +function referencesColumn(sql: string, columnName: string): boolean { + if (!columnName) { + return false; + } + const escaped = columnName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(`\\b${escaped}\\b`).test(sql); +} + +async function probeOverlayMeasures( + deps: SlValidationDeps, + connectionId: string, + sourceName: string, + warehouse: string | null, +): Promise { + const errors: string[] = []; + let composed: + | { + name: string; + table?: string; + sql?: string; + columns?: Array<{ name?: string; type?: string }>; + measures: Array<{ name: string; expr: string; filter?: string; segments?: string[] }>; + segments?: Array<{ name: string; expr: string }>; + } + | undefined; + try { + const all = await deps.semanticLayerService.loadAllSources(connectionId); + composed = all.find((s) => s.name === sourceName); + } catch (e) { + errors.push( + `${sourceName}.yaml: failed to load composed source for probe — ${e instanceof Error ? e.message : String(e)}`, + ); + return errors; + } + if (!composed?.table || composed.measures.length === 0) { + return errors; + } + + const sourceColumns = (composed.columns ?? []) + .map((c) => ({ name: c.name ?? '', type: c.type ?? '' })) + .filter((c) => c.name); + + for (const measure of composed.measures) { + const measureRef = `${sourceName}.${measure.name}`; + let probeSql = ``; + try { + const result = await deps.semanticLayerService.executeQuery(connectionId, { + measures: [measureRef], + dimensions: [], + filters: [], + limit: 1, + }); + probeSql = result.sql ?? probeSql; + } catch (e) { + errors.push( + formatProbeError({ + sourceName, + measureName: measure.name, + probeSql, + warehouse, + sourceColumns, + error: e, + headline: 'dry-run failed', + }), + ); + } + } + return errors; +} + +/** + * Restore `sourceName` to the content it had at `preHead`, or delete it if it didn't + * exist then. Used by sl_rollback (agent-driven) and the pre-squash revert gate + * (automatic). Returns a short human-readable description of what happened. + */ +export async function revertSourceToPreHead( + deps: SlValidationDeps, + connectionId: string, + preHead: string | null, + sourceName: string, +): Promise { + const relPath = slSourcePath(connectionId, sourceName); + let preContent: string | null = null; + if (preHead) { + try { + preContent = await deps.gitService.getFileAtCommit(relPath, preHead); + } catch { + preContent = null; + } + } + + if (preContent !== null) { + await deps.configService.writeFile( + relPath, + preContent, + SYSTEM_GIT_AUTHOR.name, + SYSTEM_GIT_AUTHOR.email, + `Revert SL source to pre-session state: ${sourceName}`, + { skipLock: true }, + ); + return 'restored to pre-session content'; + } + + try { + await deps.configService.deleteFile( + relPath, + SYSTEM_GIT_AUTHOR.name, + SYSTEM_GIT_AUTHOR.email, + `Drop SL source (not present at session start): ${sourceName}`, + { skipLock: true }, + ); + await deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName); + return 'deleted (did not exist at session start)'; + } catch { + await deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName); + return 'no-op (already absent)'; + } +} diff --git a/packages/context/src/sl/tools/sl-write-source.tool.test.ts b/packages/context/src/sl/tools/sl-write-source.tool.test.ts new file mode 100644 index 00000000..4ad6bf53 --- /dev/null +++ b/packages/context/src/sl/tools/sl-write-source.tool.test.ts @@ -0,0 +1,267 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources, hasTouchedSlSource, type ToolContext } from '../../tools/index.js'; +import { SlWriteSourceTool } from './sl-write-source.tool.js'; + +function makeTool(overrides: Partial> = {}) { + const semanticLayerService = { + listManifestSourceNames: vi.fn().mockResolvedValue(['ACCOUNTS', 'ORDERS']), + isManifestBacked: vi.fn().mockResolvedValue(false), + loadSource: vi.fn().mockResolvedValue(null), + loadAllSources: vi.fn().mockResolvedValue([]), + validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }), + deleteSource: vi.fn().mockResolvedValue(undefined), + readSourceFile: vi.fn().mockRejectedValue(new Error('not found')), + ...overrides.semanticLayerService, + }; + const slSearchService = { + indexSources: vi.fn().mockResolvedValue(undefined), + ...overrides.slSearchService, + }; + const tool = new SlWriteSourceTool({ + semanticLayerService: semanticLayerService as never, + slSearchService: slSearchService as never, + authorResolver: { resolve: vi.fn().mockResolvedValue({ name: 'T U', email: 't@u.com' }) }, + }); + return { tool, semanticLayerService, slSearchService }; +} + +const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' }; + +describe('SlWriteSourceTool — orphan overlay guard', () => { + it('rejects overlay YAMLs targeting a name absent from the manifest', async () => { + const { tool } = makeTool(); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'does_not_exist', + source: { + name: 'does_not_exist', + measures: [{ name: 'count_rows', expr: 'count(*)' }], + } as any, + } as any, + baseContext, + ); + expect(result.structured.success).toBe(false); + expect(result.markdown).toMatch(/no manifest entry with that name exists/i); + expect(result.markdown).toMatch(/ACCOUNTS|ORDERS/); + }); +}); + +describe('SlWriteSourceTool — session gating', () => { + function makeSession(overrides: Partial = {}): ToolSession { + return { + connectionId: '11111111-1111-1111-1111-111111111111', + isWorktreeScoped: true, + preHead: 'base', + touchedSlSources: createTouchedSlSources(), + actions: [], + semanticLayerService: { + loadSource: vi.fn().mockResolvedValue(null), + loadAllSources: vi.fn().mockResolvedValue([]), + validateWithProposedSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }), + writeSource: vi.fn().mockResolvedValue({ commitHash: 'c1' }), + deleteSource: vi.fn().mockResolvedValue(undefined), + listManifestSourceNames: vi.fn().mockResolvedValue([]), + isManifestBacked: vi.fn().mockResolvedValue(false), + readSourceFile: vi.fn().mockRejectedValue(new Error('not found')), + findManifestEntryByTableRef: vi.fn().mockResolvedValue(null), + } as any, + wikiService: {} as any, + configService: {} as any, + gitService: {} as any, + ...overrides, + }; + } + + it('skips slSearchService.indexSources when session is worktree-scoped', async () => { + const { tool, slSearchService } = makeTool(); + const session = makeSession(); + const context: ToolContext = { ...baseContext, session }; + const result = await tool.call( + { + connectionId: session.connectionId, + sourceName: 'my_source', + source: { + name: 'my_source', + sql: 'select 1 as id', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + context, + ); + expect(result.structured.success).toBe(true); + expect(slSearchService.indexSources).not.toHaveBeenCalled(); + expect(hasTouchedSlSource(session.touchedSlSources, session.connectionId!, 'my_source')).toBe(true); + expect(session.actions).toContainEqual(expect.objectContaining({ target: 'sl', key: 'my_source' })); + }); + + it('records cross-connection SL writes with targetConnectionId', async () => { + const { tool } = makeTool(); + const session = makeSession({ connectionId: '11111111-1111-4111-8111-111111111111' }); + const warehouseConnectionId = '22222222-2222-4222-8222-222222222222'; + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: warehouseConnectionId, + sourceName: 'mapped_orders', + source: { + name: 'mapped_orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + context, + ); + + expect(result.structured.success).toBe(true); + expect(hasTouchedSlSource(session.touchedSlSources, warehouseConnectionId, 'mapped_orders')).toBe(true); + expect(session.actions).toContainEqual( + expect.objectContaining({ + target: 'sl', + key: 'mapped_orders', + targetConnectionId: warehouseConnectionId, + }), + ); + }); + + it('indexes normally when no session is present', async () => { + const { tool, slSearchService } = makeTool(); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'my_source', + source: { + name: 'my_source', + sql: 'select 1 as id', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + baseContext, + ); + expect(result.structured.success).toBe(true); + expect(slSearchService.indexSources).toHaveBeenCalledTimes(1); + }); + + it('uses session.semanticLayerService when session is present', async () => { + const { tool } = makeTool(); + const session = makeSession(); + const context: ToolContext = { ...baseContext, session }; + await tool.call( + { + connectionId: session.connectionId, + sourceName: 'my_source', + source: { + name: 'my_source', + sql: 'select 1 as id', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + context, + ); + expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); + }); +}); + +describe('SlWriteSourceTool — disconnected-components warning in markdown', () => { + it('surfaces validation warnings (including disconnected-components) in the markdown body', async () => { + const { tool } = makeTool({ + semanticLayerService: { + validateWithProposedSource: vi.fn().mockResolvedValue({ + errors: [], + warnings: ['orders: disconnected-components — no join path to ACCOUNTS'], + }), + }, + }); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'orders', + source: { + name: 'orders', + sql: 'select 1 as id', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + baseContext, + ); + expect(result.markdown).toMatch(/disconnected-components/i); + }); + + it('renders per-source warnings prominently when the just-written source becomes a singleton component', async () => { + const { tool } = makeTool({ + semanticLayerService: { + validateWithProposedSource: vi.fn().mockResolvedValue({ + errors: [], + warnings: ['Model has 2 disconnected components.'], + perSourceWarnings: { + foo: ["Source 'foo' is now a singleton component (no joins to any other source)."], + }, + }), + }, + }); + + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'foo', + source: { + name: 'foo', + sql: 'select 1 as id', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + baseContext, + ); + + expect(result.markdown).toMatch(/Action required/i); + expect(result.markdown).toContain("Source 'foo' is now a singleton component"); + }); +}); + +describe('SlWriteSourceTool — standalone shadow guard', () => { + it('rejects standalone YAMLs that shadow a manifest entry', async () => { + const { tool } = makeTool({ + semanticLayerService: { + isManifestBacked: vi.fn().mockResolvedValue(true), + }, + }); + const result = await tool.call( + { + connectionId: '11111111-1111-1111-1111-111111111111', + sourceName: 'ACCOUNTS', + source: { + name: 'ACCOUNTS', + table: 'raw.accounts', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + baseContext, + ); + expect(result.structured.success).toBe(false); + expect(result.markdown).toMatch(/shadows an existing manifest entry|already exists/i); + }); +}); diff --git a/packages/context/src/sl/tools/sl-write-source.tool.ts b/packages/context/src/sl/tools/sl-write-source.tool.ts new file mode 100644 index 00000000..39a5ad5e --- /dev/null +++ b/packages/context/src/sl/tools/sl-write-source.tool.ts @@ -0,0 +1,380 @@ +import YAML from 'yaml'; +import { z } from 'zod'; +import { addTouchedSlSource, type ToolContext, type ToolOutput } from '../../tools/index.js'; +import { sourceOverlaySchema } from '../schemas.js'; +import type { SemanticLayerService } from '../semantic-layer.service.js'; +import type { SemanticLayerSource } from '../types.js'; +import { + BaseSemanticLayerTool, + type BaseSemanticLayerToolDeps, + type SemanticLayerStructured, + sourceDefinitionSchema, +} from './base-semantic-layer.tool.js'; +import { slToolConnectionIdSchema } from './connection-id-schema.js'; + +const sourceInputSchema = z.union([sourceDefinitionSchema, sourceOverlaySchema]); + +const slWriteSourceInputSchema = z.object({ + connectionId: slToolConnectionIdSchema.describe('Data source connection ID'), + sourceName: z + .string() + .regex(/^[a-z0-9][a-z0-9_]*$/, 'Source name must be snake_case (lowercase alphanumeric and underscores)') + .describe('Name of the source to create, edit, or delete'), + source: sourceInputSchema + .optional() + .describe('Source definition (standalone with table/sql) or overlay (measures, computed columns, etc.)'), + delete: z.boolean().optional().describe('Set to true to delete this source entirely'), +}); + +type SlWriteSourceInput = z.infer; + +function actionTargetConnectionId( + runConnectionId: string | null | undefined, + actionConnectionId: string, +): string | null { + return runConnectionId && runConnectionId !== actionConnectionId ? actionConnectionId : null; +} + +export class SlWriteSourceTool extends BaseSemanticLayerTool { + readonly name = 'sl_write_source'; + + constructor(deps: BaseSemanticLayerToolDeps) { + super(deps); + } + + get description(): string { + return ` +Create a new semantic layer source or fully rewrite an existing one. +If the source already exists, this tool will overwrite it with the new definition. + + + +- First time creating a source definition +- When modeling a new SQL-backed source (e.g., churn risk view, ARR calculation) +- When the user asks to start over / fully rewrite a source +- Consolidating multiple sources into one (write merged definition) +- For targeted edits to existing sources (add/remove measures, update joins), prefer sl_edit_source instead + + + +- New source: provide \`source\` with full definition +- Full rewrite: provide \`source\` (overwrites existing) +- Targeted edits on an existing source: use sl_edit_source instead +- Delete: set \`delete: true\` + + + +- name: Unique identifier for the source +- table: For physical table/view sources (e.g., "public.orders"). Mutually exclusive with sql. +- sql: For SQL-based sources (the SQL query). Mutually exclusive with table. +- grain: What one row represents (e.g., ["id"], ["customer_id", "product_id"]) +- columns: All columns with type (string/number/time/boolean) and optional descriptions +- joins: Relationships to other sources (to, on, relationship: many_to_one/one_to_many/one_to_one) +- measures: Pre-defined aggregations (name, expr like "sum(amount)", optional filter, optional segments — bare names of segments defined on the same source, optional description) +- segments: Named, reusable boolean predicates scoped to this source (name, expr — a SQL boolean over this source's columns, optional description). A measure references one with \`segments: [name]\`; a query references one with the dotted form \`source.segment_name\`. Use when the same predicate appears on 3+ measures — e.g. extract \`is_paid = true and is_refunded = '0'\` as \`segments: [{name: paid_non_refunded, expr: "..."}]\` and have each measure use \`segments: [paid_non_refunded]\` instead of re-typing the predicate inside \`sum(case when ... then x end)\`. Segments are predicates only — they cannot be selected as dimensions or grouped by; if you need to group by the predicate, add a \`columns[]\` entry instead. + + + +Sources with joins: [] are disconnected from the semantic layer join graph and cannot be composed with other sources in semantic queries. +Before writing, use discover_data to check existing sources and their grain columns. +For each grain/key column in your source (e.g., account_id, item_id), find the matching dimension source (e.g., ACCOUNTS, ITEMS) and declare a many_to_one join. +Example: a source graining on [account_id] should declare: + joins: + - to: ACCOUNTS + on: source_name.account_id = ACCOUNTS.ACCOUNT_ID + relationship: many_to_one +The on condition format: local_column = TARGET_SOURCE.target_column (right side must include target source name). +Do NOT join back to a table that the SQL already aggregates from if the grain column is not in the output (the relationship is already baked into the SQL). +`; + } + + get inputSchema() { + return slWriteSourceInputSchema; + } + + async call(input: SlWriteSourceInput, context: ToolContext): Promise> { + const { connectionId, sourceName } = input; + const { name: author, email: authorEmail } = await this.authorResolver.resolve(context.userId); + + const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; + const skipIndex = context.session?.isWorktreeScoped === true; + + // Handle delete + if (input.delete) { + try { + await semanticLayerService.deleteSource(connectionId, sourceName, author, authorEmail); + if (!skipIndex) { + const allSources = await semanticLayerService.loadAllSources(connectionId); + await this.slSearchService.indexSources(connectionId, allSources).catch(() => {}); + } + if (context.session) { + addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName); + context.session.actions.push({ + target: 'sl', + type: 'removed', + key: sourceName, + detail: 'Deleted source', + targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId), + }); + } + return this.buildOutput(true, [], sourceName, { yaml: undefined, commitHash: undefined }); + } catch (error) { + return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName); + } + } + + // Require source for create/rewrite + if (!input.source) { + return this.buildOutput( + false, + ['Provide `source` to create or rewrite. For targeted edits, use sl_edit_source.'], + sourceName, + ); + } + + return this.writeFullSource( + connectionId, + input.source, + sourceName, + author, + authorEmail, + context, + semanticLayerService, + skipIndex, + ); + } + + private async writeFullSource( + connectionId: string, + source: z.infer, + sourceName: string, + author: string, + authorEmail: string, + context: ToolContext, + semanticLayerService: SemanticLayerService, + skipIndex: boolean, + ): Promise> { + const isOverlay = !('table' in source && source.table) && !('sql' in source && source.sql); + + const existing = await this.readSourceYamlFromService(semanticLayerService, connectionId, sourceName); + const commitMessage = existing + ? `${isOverlay ? 'Update overlay' : 'Rewrite source'}: ${sourceName}` + : `${isOverlay ? 'Create overlay' : 'Create source'}: ${sourceName}`; + + const yamlContent = YAML.stringify(source); + + const orphanError = await this.rejectOrphanOverlay(semanticLayerService, connectionId, sourceName, yamlContent); + if (orphanError) { + return this.buildOutput(false, [orphanError], sourceName, { yaml: yamlContent }); + } + const shadowError = await this.rejectStandaloneShadow(semanticLayerService, connectionId, sourceName, yamlContent); + if (shadowError) { + return this.buildOutput(false, [shadowError], sourceName, { yaml: yamlContent }); + } + + const validatedSource = source as SemanticLayerSource; + const validationResult = await semanticLayerService.validateWithProposedSource(connectionId, validatedSource); + const validationErrors = validationResult.errors; + const validationWarnings = [...validationResult.warnings]; + const actionRequiredWarnings = validationResult.perSourceWarnings?.[sourceName] ?? []; + if (validationErrors.length > 0) { + return this.buildOutput(false, ['Validation failed — source was NOT saved:', ...validationErrors], sourceName, { + yaml: yamlContent, + validationErrors, + validationWarnings, + actionRequiredWarnings, + }); + } + + try { + const result = await semanticLayerService.writeSource( + connectionId, + validatedSource, + author, + authorEmail, + commitMessage, + ); + + if (!skipIndex) { + const allSources = await semanticLayerService.loadAllSources(connectionId); + await this.slSearchService.indexSources(connectionId, allSources).catch(() => {}); + } + + if (context.session) { + addTouchedSlSource(context.session.touchedSlSources, connectionId, sourceName); + context.session.actions.push({ + target: 'sl', + type: existing ? 'updated' : 'created', + key: sourceName, + detail: existing ? `Rewrote source` : `Created source`, + targetConnectionId: actionTargetConnectionId(context.session.connectionId, connectionId), + }); + } + + return this.buildOutput(true, [], sourceName, { + yaml: yamlContent, + commitHash: result.commitHash ?? undefined, + validationErrors, + validationWarnings, + actionRequiredWarnings, + }); + } catch (error) { + return this.buildOutput(false, [error instanceof Error ? error.message : String(error)], sourceName); + } + } + + private async readSourceYamlFromService( + service: SemanticLayerService, + connectionId: string, + sourceName: string, + ): Promise { + try { + const { content } = await service.readSourceFile(connectionId, sourceName); + return content; + } catch { + return null; + } + } + + private async rejectOrphanOverlay( + semanticLayerService: SemanticLayerService, + connectionId: string, + sourceName: string, + content: string, + ): Promise { + let parsed: Record; + try { + parsed = YAML.parse(content) as Record; + } catch { + return null; + } + if (!parsed || typeof parsed !== 'object') { + return null; + } + const isOverlay = !('table' in parsed && parsed.table) && !('sql' in parsed && parsed.sql); + if (!isOverlay) { + return null; + } + + const manifestNames = await semanticLayerService.listManifestSourceNames(connectionId); + if (manifestNames.includes(sourceName)) { + return null; + } + + const suggestions = this.nearestMatches(sourceName, manifestNames, 3); + return [ + `Error: cannot write "${sourceName}" as an overlay — no manifest entry with that name exists.`, + suggestions.length > 0 + ? ` Nearest manifest matches: ${suggestions.join(', ')}.` + : ` No manifest entries resemble "${sourceName}".`, + `To customize an existing base table, retarget the overlay at one of the nearest matches.`, + `For a LookML derived_table or any source backed by inline SQL, rewrite as a standalone`, + `curated source with a top-level "sql:" block plus explicit "grain:" and "columns:".`, + ].join('\n'); + } + + private async rejectStandaloneShadow( + semanticLayerService: SemanticLayerService, + connectionId: string, + sourceName: string, + content: string, + ): Promise { + let parsed: Record; + try { + parsed = YAML.parse(content) as Record; + } catch { + return null; + } + if (!parsed || typeof parsed !== 'object') { + return null; + } + const isOverlay = !('table' in parsed && parsed.table) && !('sql' in parsed && parsed.sql); + if (isOverlay) { + return null; + } + + const isManifestBacked = await semanticLayerService.isManifestBacked(connectionId, sourceName); + if (!isManifestBacked) { + return null; + } + + return [ + `Error: cannot write "${sourceName}" as a standalone source — a manifest entry with that name already exists.`, + ` Writing standalone would drop the manifest's columns and joins, leaving only what you list here.`, + `To add measures/segments on top of the manifest, rewrite this YAML as an overlay:`, + ` - Remove "sql:", "table:", "grain:", "columns:", and "joins:".`, + ` - Keep only "name:", plus "measures:", "segments:", and/or "description:".`, + ` - The manifest's schema is inherited automatically.`, + `If you really need a different base table, use a different source name.`, + ].join('\n'); + } + + private nearestMatches(needle: string, haystack: string[], limit: number): string[] { + if (haystack.length === 0) { + return []; + } + const lowerNeedle = needle.toLowerCase(); + const scored = haystack.map((candidate) => { + const lower = candidate.toLowerCase(); + const prefixBoost = lower.startsWith(lowerNeedle) || lowerNeedle.startsWith(lower) ? 0.2 : 0; + const substringBoost = lower.includes(lowerNeedle) || lowerNeedle.includes(lower) ? 0.1 : 0; + const score = jaroWinkler(lowerNeedle, lower) + prefixBoost + substringBoost; + return { candidate, score }; + }); + scored.sort((a, b) => b.score - a.score); + return scored + .filter((s) => s.score > 0.4) + .slice(0, limit) + .map((s) => s.candidate); + } +} + +function jaroWinkler(a: string, b: string): number { + if (a === b) { + return 1; + } + const matchDistance = Math.max(0, Math.floor(Math.max(a.length, b.length) / 2) - 1); + const aMatches = new Array(a.length).fill(false); + const bMatches = new Array(b.length).fill(false); + let matches = 0; + for (let i = 0; i < a.length; i++) { + const start = Math.max(0, i - matchDistance); + const end = Math.min(i + matchDistance + 1, b.length); + for (let j = start; j < end; j++) { + if (bMatches[j]) { + continue; + } + if (a[i] !== b[j]) { + continue; + } + aMatches[i] = true; + bMatches[j] = true; + matches++; + break; + } + } + if (matches === 0) { + return 0; + } + let transpositions = 0; + let k = 0; + for (let i = 0; i < a.length; i++) { + if (!aMatches[i]) { + continue; + } + while (!bMatches[k]) { + k++; + } + if (a[i] !== b[k]) { + transpositions++; + } + k++; + } + const jaro = (matches / a.length + matches / b.length + (matches - transpositions / 2) / matches) / 3; + let prefix = 0; + const maxPrefix = Math.min(4, a.length, b.length); + while (prefix < maxPrefix && a[prefix] === b[prefix]) { + prefix++; + } + return jaro + prefix * 0.1 * (1 - jaro); +} diff --git a/packages/context/src/sl/types.ts b/packages/context/src/sl/types.ts new file mode 100644 index 00000000..ff0334c1 --- /dev/null +++ b/packages/context/src/sl/types.ts @@ -0,0 +1,88 @@ +export interface SemanticLayerSource { + name: string; + descriptions?: Record; + table?: string; + sql?: string; + inherits_columns_from?: string; + grain: string[]; + columns: Array<{ + name: string; + type: string; + role?: string; + visibility?: string; + descriptions?: Record; + expr?: string; + natural_granularity?: string; + constraints?: { dbt?: { not_null?: boolean; unique?: boolean } }; + enum_values?: { dbt?: string[] }; + tests?: { + dbt?: Array<{ name: string; package: string; kwargs?: Record }>; + dbt_by_package?: Record; + }; + }>; + joins: Array<{ + to: string; + on: string; + relationship: string; + alias?: string; + source?: string; + }>; + measures: Array<{ + name: string; + expr: string; + filter?: string; + segments?: string[]; + description?: string; + }>; + segments?: Array<{ + name: string; + expr: string; + description?: string; + }>; + default_time_dimension?: { dbt?: string }; + tags?: { dbt?: string[] }; + freshness?: { dbt?: { raw?: unknown; loaded_at_field?: string | null } }; +} + +export interface SemanticLayerQueryInput { + measures: Array; + dimensions: Array; + filters?: string[]; + segments?: string[]; + order_by?: Array; + limit?: number; + include_empty?: boolean; +} + +export interface SemanticLayerQueryExecutionResult { + sql: string; + headers: string[]; + rows: unknown[][]; + totalRows: number; + plan: Record; +} + +export type SlSearchMatchReason = 'lexical' | 'semantic' | 'dictionary' | 'token' | (string & {}); + +export interface SlDictionaryMatch { + column: string; + values: string[]; + overflowCount?: number; +} + +export interface SlSearchLaneSummary { + lane: string; + status: 'available' | 'skipped' | 'failed'; + requestedCandidatePoolLimit: number; + effectiveCandidatePoolLimit: number; + returnedCandidateCount: number; + weight: number; + reason?: string; +} + +export interface SlSearchMetadata { + score: number; + matchReasons: SlSearchMatchReason[]; + dictionaryMatches?: SlDictionaryMatch[]; + lanes?: SlSearchLaneSummary[]; +} diff --git a/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts b/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts new file mode 100644 index 00000000..f6cdd3fe --- /dev/null +++ b/packages/context/src/sql-analysis/http-sql-analysis-port.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createHttpSqlAnalysisPort } from './http-sql-analysis-port.js'; + +describe('createHttpSqlAnalysisPort', () => { + it('calls the python-service fingerprint endpoint and maps snake_case response fields', async () => { + const requestJson = vi.fn(async () => ({ + fingerprint: 'fingerprint-template', + normalized_sql: 'SELECT * FROM analytics.orders WHERE status = ?', + tables_touched: ['analytics.orders'], + literal_slots: [{ position: 1, type: 'string', example_value: 'paid' }], + })); + const port = createHttpSqlAnalysisPort({ baseUrl: 'http://python.test', requestJson }); + + await expect( + port.analyzeForFingerprint("SELECT * FROM analytics.orders WHERE status = 'paid'", 'postgres'), + ).resolves.toEqual({ + fingerprint: 'fingerprint-template', + normalizedSql: 'SELECT * FROM analytics.orders WHERE status = ?', + tablesTouched: ['analytics.orders'], + literalSlots: [{ position: 1, type: 'string', exampleValue: 'paid' }], + }); + + expect(requestJson).toHaveBeenCalledWith('/api/sql/analyze-for-fingerprint', { + sql: "SELECT * FROM analytics.orders WHERE status = 'paid'", + dialect: 'postgres', + }); + }); + + it('preserves python-service parse errors in the mapped result', async () => { + const requestJson = vi.fn(async () => ({ + fingerprint: '', + normalized_sql: '', + tables_touched: [], + literal_slots: [], + error: 'Invalid expression / Unexpected token', + })); + const port = createHttpSqlAnalysisPort({ baseUrl: 'http://python.test', requestJson }); + + await expect(port.analyzeForFingerprint('SELECT * FROM WHERE', 'postgres')).resolves.toEqual({ + fingerprint: '', + normalizedSql: '', + tablesTouched: [], + literalSlots: [], + error: 'Invalid expression / Unexpected token', + }); + }); + + it('rejects malformed daemon responses instead of inventing defaults', async () => { + const requestJson = vi.fn(async () => ({ + fingerprint: 'abc', + normalized_sql: 'SELECT ?', + tables_touched: 'orders', + literal_slots: [], + })); + const port = createHttpSqlAnalysisPort({ baseUrl: 'http://python.test', requestJson }); + + await expect(port.analyzeForFingerprint('SELECT 1', 'postgres')).rejects.toThrow( + 'sql analysis response is missing string[] field tables_touched', + ); + }); +}); diff --git a/packages/context/src/sql-analysis/http-sql-analysis-port.ts b/packages/context/src/sql-analysis/http-sql-analysis-port.ts new file mode 100644 index 00000000..cc1e96a4 --- /dev/null +++ b/packages/context/src/sql-analysis/http-sql-analysis-port.ts @@ -0,0 +1,159 @@ +import { request as httpRequest } from 'node:http'; +import { request as httpsRequest } from 'node:https'; +import { URL } from 'node:url'; +import type { + SqlAnalysisDialect, + SqlAnalysisFingerprintResult, + SqlAnalysisLiteralSlot, + SqlAnalysisLiteralSlotType, + SqlAnalysisPort, +} from './ports.js'; + +export type KloSqlAnalysisHttpJsonRunner = ( + path: string, + payload: Record, +) => Promise>; + +export interface HttpSqlAnalysisPortOptions { + baseUrl: string; + requestJson?: KloSqlAnalysisHttpJsonRunner; +} + +function normalizedBaseUrl(baseUrl: string): string { + return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`; +} + +function parseJsonObject(raw: string, path: string): Record { + const parsed = JSON.parse(raw) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error(`sql analysis HTTP ${path} returned non-object JSON`); + } + return parsed as Record; +} + +function postJson(baseUrl: string): KloSqlAnalysisHttpJsonRunner { + return async (path, payload) => + new Promise((resolve, reject) => { + const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl)); + const body = JSON.stringify(payload); + const client = target.protocol === 'https:' ? httpsRequest : httpRequest; + const request = client( + target, + { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + 'content-length': Buffer.byteLength(body), + }, + }, + (response) => { + const chunks: Buffer[] = []; + response.on('data', (chunk: Buffer) => chunks.push(chunk)); + response.on('end', () => { + const text = Buffer.concat(chunks).toString('utf8'); + const statusCode = response.statusCode ?? 0; + if (statusCode < 200 || statusCode >= 300) { + reject(new Error(`sql analysis HTTP ${path} failed with ${statusCode}: ${text}`)); + return; + } + try { + resolve(parseJsonObject(text, path)); + } catch (error) { + reject(error); + } + }); + }, + ); + request.on('error', reject); + request.end(body); + }); +} + +function requiredString(raw: Record, field: string): string { + const value = raw[field]; + if (typeof value !== 'string') { + throw new Error(`sql analysis response is missing string field ${field}`); + } + return value; +} + +function optionalString(raw: Record, field: string): string | null | undefined { + const value = raw[field]; + if (value === null || value === undefined || typeof value === 'string') { + return value; + } + throw new Error(`sql analysis response has invalid optional string field ${field}`); +} + +function requiredStringArray(raw: Record, field: string): string[] { + const value = raw[field]; + if (!Array.isArray(value) || value.some((item) => typeof item !== 'string')) { + throw new Error(`sql analysis response is missing string[] field ${field}`); + } + return value; +} + +function isLiteralSlotType(value: unknown): value is SqlAnalysisLiteralSlotType { + return ( + value === 'string' || + value === 'number' || + value === 'timestamp' || + value === 'date' || + value === 'boolean' || + value === 'null' || + value === 'unknown' + ); +} + +function literalSlots(raw: Record): SqlAnalysisLiteralSlot[] { + const value = raw.literal_slots; + if (!Array.isArray(value)) { + throw new Error('sql analysis response is missing literal_slots array'); + } + return value.map((item) => { + if (!item || typeof item !== 'object' || Array.isArray(item)) { + throw new Error('sql analysis response contains invalid literal slot'); + } + const slot = item as Record; + if (typeof slot.position !== 'number') { + throw new Error('sql analysis response literal slot is missing numeric position'); + } + if (!isLiteralSlotType(slot.type)) { + throw new Error('sql analysis response literal slot is missing valid type'); + } + if (typeof slot.example_value !== 'string') { + throw new Error('sql analysis response literal slot is missing example_value'); + } + return { + position: slot.position, + type: slot.type, + exampleValue: slot.example_value, + }; + }); +} + +function mapResult(raw: Record): SqlAnalysisFingerprintResult { + const error = optionalString(raw, 'error'); + return { + fingerprint: requiredString(raw, 'fingerprint'), + normalizedSql: requiredString(raw, 'normalized_sql'), + tablesTouched: requiredStringArray(raw, 'tables_touched'), + literalSlots: literalSlots(raw), + ...(error !== undefined ? { error } : {}), + }; +} + +export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions): SqlAnalysisPort { + const requestJson = options.requestJson ?? postJson(options.baseUrl); + + return { + async analyzeForFingerprint(sql: string, dialect: SqlAnalysisDialect) { + const raw = await requestJson('/api/sql/analyze-for-fingerprint', { + sql, + dialect, + }); + return mapResult(raw); + }, + }; +} diff --git a/packages/context/src/sql-analysis/index.ts b/packages/context/src/sql-analysis/index.ts new file mode 100644 index 00000000..a4f6315f --- /dev/null +++ b/packages/context/src/sql-analysis/index.ts @@ -0,0 +1,9 @@ +export { createHttpSqlAnalysisPort } from './http-sql-analysis-port.js'; +export type { HttpSqlAnalysisPortOptions, KloSqlAnalysisHttpJsonRunner } from './http-sql-analysis-port.js'; +export type { + SqlAnalysisDialect, + SqlAnalysisFingerprintResult, + SqlAnalysisLiteralSlot, + SqlAnalysisLiteralSlotType, + SqlAnalysisPort, +} from './ports.js'; diff --git a/packages/context/src/sql-analysis/ports.ts b/packages/context/src/sql-analysis/ports.ts new file mode 100644 index 00000000..69b15780 --- /dev/null +++ b/packages/context/src/sql-analysis/ports.ts @@ -0,0 +1,30 @@ +export type SqlAnalysisDialect = + | 'bigquery' + | 'snowflake' + | 'postgres' + | 'redshift' + | 'mysql' + | 'sqlite' + | 'tsql' + | 'clickhouse' + | (string & {}); + +export type SqlAnalysisLiteralSlotType = 'string' | 'number' | 'timestamp' | 'date' | 'boolean' | 'null' | 'unknown'; + +export interface SqlAnalysisLiteralSlot { + position: number; + type: SqlAnalysisLiteralSlotType; + exampleValue: string; +} + +export interface SqlAnalysisFingerprintResult { + fingerprint: string; + normalizedSql: string; + tablesTouched: string[]; + literalSlots: SqlAnalysisLiteralSlot[]; + error?: string | null; +} + +export interface SqlAnalysisPort { + analyzeForFingerprint(sql: string, dialect: SqlAnalysisDialect): Promise; +} diff --git a/packages/context/src/test/make-local-git-repo.ts b/packages/context/src/test/make-local-git-repo.ts new file mode 100644 index 00000000..364c99db --- /dev/null +++ b/packages/context/src/test/make-local-git-repo.ts @@ -0,0 +1,45 @@ +import { cp, mkdir, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { SimpleGit } from 'simple-git'; +import { createSimpleGit } from '../ingest/git-env.js'; + +export interface LocalGitRepo { + repoDir: string; + repoUrl: string; + git: SimpleGit; + commit: (message: string) => Promise; + writeFile: (relPath: string, content: string) => Promise; + deleteFile: (relPath: string) => Promise; +} + +export async function makeLocalGitRepo(fixtureDir: string, destRoot: string): Promise { + const repoDir = join(destRoot, 'repo'); + await mkdir(repoDir, { recursive: true }); + await cp(fixtureDir, repoDir, { recursive: true }); + const git = createSimpleGit(repoDir); + await git.init(); + await git.raw(['checkout', '-B', 'main']); + await git.addConfig('user.email', 'test@klo.local'); + await git.addConfig('user.name', 'KLO Test'); + await git.add('.'); + await git.commit('initial'); + const commit = async (message: string): Promise => { + await git.add('.'); + await git.commit(message); + return (await git.log({ maxCount: 1 })).latest?.hash ?? ''; + }; + return { + repoDir, + repoUrl: `file://${repoDir}`, + git, + commit, + writeFile: async (relPath: string, content: string) => { + const dest = join(repoDir, relPath); + await mkdir(join(dest, '..'), { recursive: true }); + await writeFile(dest, content, 'utf-8'); + }, + deleteFile: async (relPath: string) => { + await rm(join(repoDir, relPath), { force: true }); + }, + }; +} diff --git a/packages/context/src/tools/authors.ts b/packages/context/src/tools/authors.ts new file mode 100644 index 00000000..95979d21 --- /dev/null +++ b/packages/context/src/tools/authors.ts @@ -0,0 +1,13 @@ +export interface GitAuthor { + name: string; + email: string; +} + +export interface GitAuthorResolverPort { + resolve(userId: string | null | undefined): Promise; +} + +export const SYSTEM_GIT_AUTHOR: GitAuthor = { + name: 'System User', + email: 'system@example.com', +}; diff --git a/packages/context/src/tools/base-tool.ts b/packages/context/src/tools/base-tool.ts new file mode 100644 index 00000000..526e26c3 --- /dev/null +++ b/packages/context/src/tools/base-tool.ts @@ -0,0 +1,174 @@ +import { tool } from 'ai'; +import { z, type ZodType } from 'zod'; +import { noopLogger, type KloLogger } from '../core/index.js'; +import type { IngestToolMetadata, ToolSession } from './tool-session.js'; + +export interface ToolOutput { + markdown: string; + structured: T; +} + +export interface ToolTimingTrackerPort { + recordToolExecutionStart(messageId: string, toolName: string, toolCallId: string): void; + recordToolExecutionEnd(messageId: string, toolName: string, toolCallId: string, state: string): void; +} + +export interface ToolProgressRelayPort { + emit(event: unknown): void; +} + +type ChatSource = + | 'RESEARCH' + | 'DASHBOARD' + | 'WIDGET_CONFIG' + | 'EVALUATION' + | 'METRIC_WORKSHOP' + | 'INPUT_CONFIG' + | 'SCHEDULED_RESEARCH' + | 'DASHBOARD_GENERATION'; + +export interface ToolContext { + sourceId: string; + messageId: string; + userId: string; + userRoles?: string[]; + authToken?: string; + currentUserMessage?: string; + toolCallId?: string; + toolCallHistory?: string[]; + timingTracker?: ToolTimingTrackerPort; + source?: ChatSource; + dashboardId?: string; + methodologyEntries?: MethodologyEntry[]; + progressRelay?: ToolProgressRelayPort; + connectionId?: string; + ingest?: IngestToolMetadata; + /** + * Per-session state (ingest WU, memory-agent post-turn). When present, SL/wiki + * tools use session-scoped services and emit touched-set entries instead of + * writing to shared indexes immediately. Non-session callers leave this unset. + */ + session?: ToolSession; + currentDefinition?: { + sql: string; + measures: unknown[]; + dimensions: unknown[]; + parameters: unknown[]; + segments: unknown[]; + name?: string; + description?: string; + }; +} + +export interface MethodologyEntry { + key: string; + toolName: string; + label: string; + args: Record; + result?: unknown; +} + +/** + * SECURITY: All tools require authentication. userId must always be provided in ToolContext. + */ +export abstract class BaseTool { + protected readonly logger: KloLogger; + + abstract readonly name: string; + + constructor(logger: KloLogger = noopLogger) { + this.logger = logger; + } + + abstract get description(): string; + + abstract get inputSchema(): TInput; + + abstract call(input: z.infer, context: ToolContext): Promise; + + getParametersSchema(): { + type: 'object'; + properties: Record; + required?: string[]; + } { + const jsonSchema = z.toJSONSchema(this.inputSchema, { + target: 'draft-7', + }); + + return jsonSchema as any; + } + + toAnthropicFormat(): { + name: string; + description: string; + input_schema: { + type: 'object'; + properties: Record; + required?: string[]; + }; + } { + return { + name: this.name, + description: this.description, + input_schema: this.getParametersSchema(), + }; + } + + toAiSdkTool(context: ToolContext): any { + const toolName = this.name; + const logger = this.logger; + + return tool({ + description: this.description, + inputSchema: this.inputSchema, + execute: async (params, { toolCallId }) => { + // Create context copy with current toolCallId (safe for parallel execution) + const callContext = { ...context, toolCallId }; + + // Record tool execution start (input generation has already been tracked via onChunk) + if (callContext.timingTracker && toolCallId) { + callContext.timingTracker.recordToolExecutionStart(callContext.messageId, toolName, toolCallId); + } + + let state = 'completed'; + try { + if (!callContext.userId) { + throw new Error('Authentication required: userId must be provided in ToolContext'); + } + const parsedInput = this.parseInput(params as Record); + const result = await this.call(parsedInput, callContext); + return result; + } catch (error) { + state = 'error'; + this.logger.error( + `Tool ${this.name} execution failed: ${error instanceof Error ? error.message : String(error)}`, + ); + throw error; + } finally { + // Record tool execution end + if (callContext.timingTracker && toolCallId) { + callContext.timingTracker.recordToolExecutionEnd(callContext.messageId, toolName, toolCallId, state); + } + } + }, + // Send only markdown to LLM - frontend still receives full { markdown, structured } via stream + toModelOutput: ({ output }) => { + if (output && typeof output === 'object' && 'markdown' in output) { + return { type: 'content', value: [{ type: 'text', text: output.markdown as string }] }; + } + if (typeof output !== 'string') { + logger.warn(`Tool ${toolName} returned unexpected output type: ${typeof output}. Coercing to string.`); + } + return { type: 'content', value: [{ type: 'text', text: String(output) }] }; + }, + }); + } + + parseInput(input: Record): z.infer { + return this.inputSchema.parse(input); + } + + protected getCurrentUserQuery(context: ToolContext): string | null { + return context.currentUserMessage ?? null; + } +} diff --git a/packages/context/src/tools/context-candidate-mark.tool.ts b/packages/context/src/tools/context-candidate-mark.tool.ts new file mode 100644 index 00000000..d6f2439e --- /dev/null +++ b/packages/context/src/tools/context-candidate-mark.tool.ts @@ -0,0 +1,64 @@ +import { z } from 'zod'; +import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js'; +import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js'; +import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js'; + +const contextCandidateMarkInputSchema = z.object({ + candidateKey: z.string().min(1), + status: z.enum(['pending', 'promoted', 'merged', 'rejected', 'conflict']), + rejectionReason: z.string().max(500).nullable().default(null), +}); + +type ContextCandidateMarkInput = z.infer; + +interface ContextCandidateMarkStructured { + success: boolean; + error?: string; + candidateKey?: string; + status?: string; +} + +export class ContextCandidateMarkTool extends BaseTool { + readonly name = 'context_candidate_mark'; + + constructor(private readonly store: ContextEvidenceToolStorePort) { + super(); + } + + get description(): string { + return 'Mark a context knowledge candidate after curator reconciliation promotes, merges, rejects, or keeps it as a conflict.'; + } + + get inputSchema() { + return contextCandidateMarkInputSchema; + } + + async call( + input: ContextCandidateMarkInput, + context: ToolContext, + ): Promise> { + const ingest = resolveIngestMetadata(context); + if (!ingest) { + return ingestMetadataRequired(); + } + + const updated = await this.store.updateCandidateStatus({ + runId: ingest.runId, + candidateKey: input.candidateKey, + status: input.status, + rejectionReason: input.rejectionReason, + }); + + if (!updated) { + return { + markdown: `No candidate found with key "${input.candidateKey}".`, + structured: { success: false, error: 'CANDIDATE_NOT_FOUND', candidateKey: input.candidateKey }, + }; + } + + return { + markdown: `Candidate "${updated.candidate_key}" marked ${updated.status}.`, + structured: { success: true, candidateKey: updated.candidate_key, status: updated.status }, + }; + } +} diff --git a/packages/context/src/tools/context-candidate-write.tool.ts b/packages/context/src/tools/context-candidate-write.tool.ts new file mode 100644 index 00000000..5152503e --- /dev/null +++ b/packages/context/src/tools/context-candidate-write.tool.ts @@ -0,0 +1,179 @@ +import { createHash } from 'node:crypto'; +import { z } from 'zod'; +import type { KloEmbeddingPort } from '../core/index.js'; +import { buildContextCandidateEmbeddingText } from '../ingest/context-candidates/index.js'; +import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js'; +import { chunkIdSchema } from './context-evidence-ids.js'; +import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js'; +import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js'; + +const scoreSchema = z.number().int().min(0).max(3); + +const contextCandidateWriteInputSchema = z.object({ + candidateKey: z.string().min(1).max(160), + topic: z.string().min(1).max(200), + assertion: z.string().min(1).max(500), + rationale: z.string().min(1).max(1000), + evidenceChunkIds: z.array(chunkIdSchema).min(1), + suggestedPageKey: z.string().min(1).max(120).optional(), + actionHint: z.enum(['create', 'update', 'merge', 'conflict', 'skip']), + durabilityScore: scoreSchema, + authorityScore: scoreSchema, + reuseScore: scoreSchema, + noveltyScore: scoreSchema, + riskScore: scoreSchema, +}); + +type ContextCandidateWriteInput = z.infer; + +interface ContextCandidateWriteStructured { + success: boolean; + error?: string; + message?: string; + candidateKey?: string; + promotionScore?: number; + status?: string; +} + +export class ContextCandidateWriteTool extends BaseTool { + readonly name = 'context_candidate_write'; + + constructor( + private readonly store: ContextEvidenceToolStorePort, + private readonly embeddingService: Pick, + ) { + super(); + } + + get description(): string { + return 'Write a durable knowledge candidate from indexed context evidence. Use this during ingest candidate extraction instead of wiki_write.'; + } + + get inputSchema() { + return contextCandidateWriteInputSchema; + } + + async call( + input: ContextCandidateWriteInput, + context: ToolContext, + ): Promise> { + const ingest = resolveIngestMetadata(context); + if (!ingest) { + return ingestMetadataRequired(); + } + + const connectionId = context.connectionId ?? context.session?.connectionId; + if (!connectionId) { + return { + markdown: 'Error: no connectionId is available for candidate write.', + structured: { + success: false, + error: 'CONNECTION_REQUIRED', + message: 'Run this inside an ingest session with a connectionId.', + }, + }; + } + + if (input.evidenceChunkIds.length === 0) { + return { + markdown: 'Error: candidates require at least one evidence chunk.', + structured: { success: false, error: 'EVIDENCE_REQUIRED', message: 'Provide one or more evidenceChunkIds.' }, + }; + } + + const chunks = await this.store.readChunksByIds( + input.evidenceChunkIds, + connectionId, + ingest.sourceKey, + ingest.runId, + ); + if (chunks.length !== input.evidenceChunkIds.length) { + const found = new Set(chunks.map((chunk) => chunk.chunkId)); + const missing = input.evidenceChunkIds.filter((id) => !found.has(id)); + return { + markdown: `Error: evidence chunks not found or not visible: ${missing.join(', ')}`, + structured: { + success: false, + error: 'EVIDENCE_NOT_FOUND', + message: `Missing evidence chunk ids: ${missing.join(', ')}`, + }, + }; + } + + const promotionScore = + input.durabilityScore + input.authorityScore + input.reuseScore + input.noveltyScore - input.riskScore; + const status = input.actionHint === 'conflict' ? 'conflict' : input.actionHint === 'skip' ? 'rejected' : 'pending'; + const evidenceRefs = chunks.map((chunk) => ({ + chunkId: chunk.chunkId, + stableCitationKey: chunk.stableCitationKey, + syncId: chunk.syncId, + rawPath: chunk.rawPath, + title: chunk.title, + path: chunk.path, + url: chunk.url, + lastEditedAt: chunk.lastEditedAt?.toISOString() ?? null, + snippetHash: createHash('sha256').update(chunk.content).digest('hex'), + citation: chunk.citation, + })); + const embedding = await this.computeCandidateEmbedding(input); + + try { + const candidate = await this.store.insertCandidate({ + runId: ingest.runId, + connectionId, + sourceKey: ingest.sourceKey, + candidateKey: input.candidateKey, + topic: input.topic, + assertion: input.assertion, + rationale: input.rationale, + evidenceChunkIds: input.evidenceChunkIds, + evidenceRefs, + suggestedPageKey: input.suggestedPageKey ?? null, + actionHint: input.actionHint, + durabilityScore: input.durabilityScore, + authorityScore: input.authorityScore, + reuseScore: input.reuseScore, + noveltyScore: input.noveltyScore, + riskScore: input.riskScore, + promotionScore, + status, + rejectionReason: input.actionHint === 'skip' ? 'Extractor marked this candidate as skip.' : null, + embedding, + }); + + return { + markdown: `Candidate "${candidate.candidate_key}" saved with promotion score ${candidate.promotion_score}.`, + structured: { + success: true, + candidateKey: candidate.candidate_key, + promotionScore: candidate.promotion_score, + status: candidate.status, + }, + }; + } catch (error) { + return { + markdown: `Error: candidate "${input.candidateKey}" could not be saved.`, + structured: { + success: false, + error: 'CANDIDATE_WRITE_FAILED', + message: error instanceof Error ? error.message : String(error), + }, + }; + } + } + + private async computeCandidateEmbedding( + input: Pick, + ): Promise { + try { + return await this.embeddingService.computeEmbedding(buildContextCandidateEmbeddingText(input)); + } catch (error) { + this.logger.warn( + `Candidate embedding generation failed for topic "${input.topic}": ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return null; + } + } +} diff --git a/packages/context/src/tools/context-evidence-ids.ts b/packages/context/src/tools/context-evidence-ids.ts new file mode 100644 index 00000000..a75030c3 --- /dev/null +++ b/packages/context/src/tools/context-evidence-ids.ts @@ -0,0 +1,16 @@ +import { z } from 'zod'; + +const UUID_BODY = '[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}'; + +const CHUNK_ID_PATTERN = new RegExp(`^ctxchunk-${UUID_BODY}$`); +const DOCUMENT_ID_PATTERN = new RegExp(`^ctxdoc-${UUID_BODY}$`); + +export const chunkIdSchema = z + .string() + .regex(CHUNK_ID_PATTERN, 'Use a chunkId returned by context_evidence_search (format: "ctxchunk-").') + .describe('A chunkId from context_evidence_search results, e.g. "ctxchunk-".'); + +export const documentIdSchema = z + .string() + .regex(DOCUMENT_ID_PATTERN, 'Use a documentId returned by context_evidence_search or context_evidence_neighbors (format: "ctxdoc-").') + .describe('A documentId from context_evidence_search or context_evidence_neighbors results, e.g. "ctxdoc-".'); diff --git a/packages/context/src/tools/context-evidence-neighbors.tool.ts b/packages/context/src/tools/context-evidence-neighbors.tool.ts new file mode 100644 index 00000000..551859f7 --- /dev/null +++ b/packages/context/src/tools/context-evidence-neighbors.tool.ts @@ -0,0 +1,99 @@ +import { z } from 'zod'; +import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js'; +import { documentIdSchema } from './context-evidence-ids.js'; +import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js'; +import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js'; + +const contextEvidenceNeighborsInputSchema = z.object({ + documentId: documentIdSchema, + relation: z.enum(['parent', 'children', 'linked', 'backlinked', 'same_path']), + limit: z.number().int().min(1).max(25).default(10), +}); + +type ContextEvidenceNeighborsInput = z.infer; + +interface ContextEvidenceNeighborsStructured { + success: true; + results: Array<{ + documentId: string; + externalId: string; + title: string; + path: string; + relation: string; + url: string | null; + lastEditedAt: string | null; + }>; + totalFound: number; +} + +export class ContextEvidenceNeighborsTool extends BaseTool { + readonly name = 'context_evidence_neighbors'; + + constructor(private readonly store: ContextEvidenceToolStorePort) { + super(); + } + + get description(): string { + return 'Find parent, child, linked, backlinked, or same-folder evidence documents for the current ingest source.'; + } + + get inputSchema() { + return contextEvidenceNeighborsInputSchema; + } + + async call( + input: ContextEvidenceNeighborsInput, + context: ToolContext, + ): Promise> { + const ingest = resolveIngestMetadata(context); + if (!ingest) { + return ingestMetadataRequired(); + } + + const connectionId = context.connectionId ?? context.session?.connectionId; + if (!connectionId) { + return { + markdown: 'Error: no connectionId is available for context evidence neighbors.', + structured: { + success: false, + error: 'CONNECTION_REQUIRED', + message: 'Run this inside an ingest session with a connectionId.', + }, + }; + } + + const results = await this.store.findNeighborDocuments({ + connectionId, + sourceKey: ingest.sourceKey, + documentId: input.documentId, + relation: input.relation, + limit: input.limit, + currentRunId: ingest.runId, + }); + + if (results.length === 0) { + return { + markdown: `No ${input.relation} evidence documents found.`, + structured: { success: true, results: [], totalFound: 0 }, + }; + } + + return { + markdown: [ + `Found ${results.length} ${input.relation} evidence document(s):`, + '', + ...results.map( + (result, index) => `${index + 1}. **${result.title}** (${result.path}) documentId=${result.documentId}`, + ), + ].join('\n'), + structured: { + success: true, + totalFound: results.length, + results: results.map((result) => ({ + ...result, + lastEditedAt: result.lastEditedAt?.toISOString() ?? null, + })), + }, + }; + } +} diff --git a/packages/context/src/tools/context-evidence-read.tool.ts b/packages/context/src/tools/context-evidence-read.tool.ts new file mode 100644 index 00000000..d8edd441 --- /dev/null +++ b/packages/context/src/tools/context-evidence-read.tool.ts @@ -0,0 +1,153 @@ +import { z } from 'zod'; +import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js'; +import { chunkIdSchema, documentIdSchema } from './context-evidence-ids.js'; +import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js'; +import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js'; + +const contextEvidenceReadInputSchema = z + .object({ + chunkId: chunkIdSchema.optional(), + documentId: documentIdSchema.optional(), + externalId: z.string().min(1).optional(), + includeNeighborChunks: z.boolean().default(false), + }) + .refine((input) => [input.chunkId, input.documentId, input.externalId].filter(Boolean).length === 1, { + message: 'Provide exactly one of chunkId, documentId, or externalId.', + }); + +type ContextEvidenceReadInput = z.infer; + +interface ContextEvidenceReadStructured { + success: true; + found: boolean; + documentId?: string; + chunkId?: string; + externalId?: string; + title?: string; + path?: string; + url?: string | null; + content?: string; + citation?: unknown; +} + +export class ContextEvidenceReadTool extends BaseTool { + readonly name = 'context_evidence_read'; + + constructor(private readonly store: ContextEvidenceToolStorePort) { + super(); + } + + get description(): string { + return 'Read a context evidence chunk or document by chunkId, documentId, or externalId.'; + } + + get inputSchema() { + return contextEvidenceReadInputSchema; + } + + async call( + input: ContextEvidenceReadInput, + context: ToolContext, + ): Promise> { + const ingest = resolveIngestMetadata(context); + if (!ingest) { + return ingestMetadataRequired(); + } + + if (input.chunkId) { + const connectionId = context.connectionId ?? context.session?.connectionId; + if (!connectionId) { + return { + markdown: 'Error: no connectionId is available for evidence read.', + structured: { success: false, error: 'CONNECTION_REQUIRED', message: 'Run inside an ingest session.' }, + }; + } + const found = await this.store.readChunkById(input.chunkId, connectionId, ingest.sourceKey, ingest.runId); + if (!found) { + return { + markdown: `No evidence chunk found for ${input.chunkId}.`, + structured: { success: true, found: false }, + }; + } + if (input.includeNeighborChunks) { + const document = await this.store.readDocumentById( + found.document.id, + connectionId, + ingest.sourceKey, + ingest.runId, + ); + const content = document?.chunks.map((chunk) => chunk.content).join('\n\n') ?? found.chunk.content; + return { + markdown: `## ${found.document.title}\n\n${content}`, + structured: { + success: true, + found: true, + documentId: found.document.id, + chunkId: found.chunk.id, + externalId: found.document.external_id, + title: found.document.title, + path: found.document.path, + url: found.document.url, + content, + citation: found.chunk.citation, + }, + }; + } + return { + markdown: `## ${found.document.title}\n\n${found.chunk.content}`, + structured: { + success: true, + found: true, + documentId: found.document.id, + chunkId: found.chunk.id, + externalId: found.document.external_id, + title: found.document.title, + path: found.document.path, + url: found.document.url, + content: found.chunk.content, + citation: found.chunk.citation, + }, + }; + } + + const connectionId = context.connectionId ?? context.session?.connectionId; + if (!connectionId) { + return { + markdown: 'Error: no connectionId is available for evidence read.', + structured: { success: false, error: 'CONNECTION_REQUIRED', message: 'Run inside an ingest session.' }, + }; + } + let document: Awaited>; + if (input.documentId) { + document = await this.store.readDocumentById(input.documentId, connectionId, ingest.sourceKey, ingest.runId); + } else if (input.externalId) { + document = await this.store.readDocumentByExternalId( + connectionId, + ingest.sourceKey, + input.externalId, + ingest.runId, + ); + } else { + return { markdown: 'No evidence document found.', structured: { success: true, found: false } }; + } + + if (!document) { + return { markdown: 'No evidence document found.', structured: { success: true, found: false } }; + } + + const content = document.chunks.map((chunk) => chunk.content).join('\n\n'); + return { + markdown: `## ${document.document.title}\n\n${content}`, + structured: { + success: true, + found: true, + documentId: document.document.id, + externalId: document.document.external_id, + title: document.document.title, + path: document.document.path, + url: document.document.url, + content, + }, + }; + } +} diff --git a/packages/context/src/tools/context-evidence-search.tool.ts b/packages/context/src/tools/context-evidence-search.tool.ts new file mode 100644 index 00000000..caeb8f4f --- /dev/null +++ b/packages/context/src/tools/context-evidence-search.tool.ts @@ -0,0 +1,142 @@ +import { z } from 'zod'; +import type { KloEmbeddingPort } from '../core/index.js'; +import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js'; +import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js'; +import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js'; + +const contextEvidenceSearchInputSchema = z.object({ + query: z.string().min(1), + connectionId: z.string().uuid().optional(), + sourceKey: z.string().min(1).optional(), + limit: z.number().int().min(1).max(25).default(10), + includeDeleted: z.boolean().default(false), +}); + +type ContextEvidenceSearchInput = z.infer; + +interface ContextEvidenceSearchStructured { + success: true; + results: Array<{ + chunkId: string; + documentId: string; + externalId: string; + title: string; + path: string; + url: string | null; + snippet: string; + score: number; + matchReasons?: string[]; + lanes?: Array<{ + lane: string; + status: 'available' | 'skipped' | 'failed'; + requestedCandidatePoolLimit: number; + effectiveCandidatePoolLimit: number; + returnedCandidateCount: number; + weight: number; + reason?: string; + }>; + citation: unknown; + stableCitationKey: string; + syncId: string; + lastEditedAt: string | null; + }>; + totalFound: number; +} + +export class ContextEvidenceSearchTool extends BaseTool { + readonly name = 'context_evidence_search'; + + constructor( + private readonly store: ContextEvidenceToolStorePort, + private readonly embeddingService: Pick, + ) { + super(); + } + + get description(): string { + return ( + 'Search the internal context evidence index for the current ingest source. ' + + 'Use this to research indexed evidence before writing candidates or curating wiki knowledge.' + ); + } + + get inputSchema() { + return contextEvidenceSearchInputSchema; + } + + async call( + input: ContextEvidenceSearchInput, + context: ToolContext, + ): Promise> { + const ingest = resolveIngestMetadata(context); + if (!ingest) { + return ingestMetadataRequired(); + } + + let queryEmbedding: number[] | null = null; + try { + queryEmbedding = await this.embeddingService.computeEmbedding(input.query); + } catch { + queryEmbedding = null; + } + + const connectionId = input.connectionId ?? context.connectionId ?? context.session?.connectionId; + if (!connectionId) { + return { + markdown: 'Error: no connectionId is available for context evidence search.', + structured: { + success: false, + error: 'CONNECTION_REQUIRED', + message: 'Provide connectionId or run this inside an ingest session with a connectionId.', + }, + }; + } + + const results = await this.store.searchRRF({ + connectionId, + sourceKey: input.sourceKey ?? ingest.sourceKey, + queryEmbedding, + queryText: input.query, + limit: input.limit, + includeDeleted: input.includeDeleted, + currentRunId: ingest.runId, + }); + + if (results.length === 0) { + return { + markdown: `No context evidence found for "${input.query}".`, + structured: { success: true, results: [], totalFound: 0 }, + }; + } + + return { + markdown: [ + `Found ${results.length} evidence chunk(s):`, + '', + ...results.map((result, index) => { + const reasonLine = + result.matchReasons && result.matchReasons.length > 0 + ? ` matchReasons: ${result.matchReasons.join(', ')}\n` + : ''; + return ( + `${index + 1}. **${result.title}** (${result.path})\n` + + ` chunkId: ${result.chunkId}\n` + + ` stableCitationKey: ${result.stableCitationKey}\n` + + reasonLine + + ` snippet: ${result.snippet}` + ); + }), + ].join('\n'), + structured: { + success: true, + totalFound: results.length, + results: results.map((result) => ({ + ...result, + ...(result.matchReasons ? { matchReasons: result.matchReasons } : {}), + ...(result.lanes ? { lanes: result.lanes } : {}), + lastEditedAt: result.lastEditedAt?.toISOString() ?? null, + })), + }, + }; + } +} diff --git a/packages/context/src/tools/context-evidence-tool-store.ts b/packages/context/src/tools/context-evidence-tool-store.ts new file mode 100644 index 00000000..f7100706 --- /dev/null +++ b/packages/context/src/tools/context-evidence-tool-store.ts @@ -0,0 +1,145 @@ +import type { InsertContextCandidateInput } from '../ingest/context-candidates/index.js'; +import type { JsonValue } from '../ingest/ports.js'; + +export interface ContextEvidenceSearchArgs { + connectionId: string; + sourceKey?: string; + queryEmbedding: number[] | null; + queryText: string; + limit: number; + includeDeleted: boolean; + currentRunId?: string; +} + +export type ContextEvidenceSearchMatchReason = 'lexical' | 'semantic' | 'token' | (string & {}); + +export interface ContextEvidenceSearchLaneSummary { + lane: string; + status: 'available' | 'skipped' | 'failed'; + requestedCandidatePoolLimit: number; + effectiveCandidatePoolLimit: number; + returnedCandidateCount: number; + weight: number; + reason?: string; +} + +export interface ContextEvidenceSearchResult { + chunkId: string; + documentId: string; + externalId: string; + title: string; + path: string; + url: string | null; + snippet: string; + score: number; + citation: JsonValue; + stableCitationKey: string; + syncId: string; + lastEditedAt: Date | null; + matchReasons?: ContextEvidenceSearchMatchReason[]; + lanes?: ContextEvidenceSearchLaneSummary[]; +} + +export interface ContextEvidenceDocumentForRead { + id: string; + title: string; + path: string; + external_id: string; + url: string | null; +} + +export interface ContextEvidenceChunkForRead { + id: string; + content: string; + citation?: JsonValue; +} + +export interface ContextEvidenceReadResult { + document: ContextEvidenceDocumentForRead; + chunks: ContextEvidenceChunkForRead[]; +} + +export interface ContextEvidenceChunkReadResult { + document: ContextEvidenceDocumentForRead; + chunk: ContextEvidenceChunkForRead; +} + +export interface ContextEvidenceNeighborResult { + documentId: string; + externalId: string; + title: string; + path: string; + relation: 'parent' | 'children' | 'linked' | 'backlinked' | 'same_path'; + url: string | null; + lastEditedAt: Date | null; +} + +export interface ContextEvidenceChunkForCandidate { + chunkId: string; + documentId: string; + externalId: string; + title: string; + path: string; + url: string | null; + rawPath: string; + content: string; + citation: JsonValue; + stableCitationKey: string; + syncId: string; + lastEditedAt: Date | null; +} + +export interface ContextCandidateInsertResult { + id: string; + candidate_key: string; + promotion_score: number; + status: string; +} + +export interface ContextCandidateStatusResult { + candidate_key: string; + status: string; +} + +export interface ContextEvidenceToolStorePort { + searchRRF(args: ContextEvidenceSearchArgs): Promise; + readChunkById( + chunkId: string, + connectionId: string, + sourceKey: string, + currentRunId?: string, + ): Promise; + readDocumentById( + documentId: string, + connectionId: string, + sourceKey: string, + currentRunId?: string, + ): Promise; + readDocumentByExternalId( + connectionId: string, + sourceKey: string, + externalId: string, + currentRunId?: string, + ): Promise; + findNeighborDocuments(args: { + connectionId: string; + sourceKey: string; + documentId: string; + relation: 'parent' | 'children' | 'linked' | 'backlinked' | 'same_path'; + limit: number; + currentRunId?: string; + }): Promise; + readChunksByIds( + chunkIds: string[], + connectionId: string, + sourceKey: string, + currentRunId?: string, + ): Promise; + insertCandidate(input: InsertContextCandidateInput): Promise; + updateCandidateStatus(args: { + runId: string; + candidateKey: string; + status: 'pending' | 'promoted' | 'merged' | 'rejected' | 'conflict'; + rejectionReason: string | null; + }): Promise; +} diff --git a/packages/context/src/tools/context-evidence-tools.test.ts b/packages/context/src/tools/context-evidence-tools.test.ts new file mode 100644 index 00000000..d3f11fea --- /dev/null +++ b/packages/context/src/tools/context-evidence-tools.test.ts @@ -0,0 +1,598 @@ +import { createHash } from 'node:crypto'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { KloEmbeddingPort } from '../core/index.js'; +import { SqliteContextEvidenceStore } from '../ingest/context-evidence/sqlite-context-evidence-store.js'; +import { ContextCandidateMarkTool } from './context-candidate-mark.tool.js'; +import { ContextCandidateWriteTool } from './context-candidate-write.tool.js'; +import { ContextEvidenceNeighborsTool } from './context-evidence-neighbors.tool.js'; +import { ContextEvidenceReadTool } from './context-evidence-read.tool.js'; +import { ContextEvidenceSearchTool } from './context-evidence-search.tool.js'; +import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js'; +import { createTouchedSlSources, type ToolContext, type ToolSession } from './index.js'; + +const ingestContext = (): ToolContext => ({ + sourceId: 'ingest', + messageId: 'job-1-wu-unit-1', + userId: 'system', + connectionId: '00000000-0000-0000-0000-000000000001', + ingest: { + runId: '10000000-0000-0000-0000-000000000001', + jobId: 'job-1', + syncId: 'sync-1', + sourceKey: 'notion', + }, + session: { + connectionId: '00000000-0000-0000-0000-000000000001', + isWorktreeScoped: true, + preHead: 'abc123', + touchedSlSources: createTouchedSlSources(), + actions: [], + ingest: { + runId: '10000000-0000-0000-0000-000000000001', + jobId: 'job-1', + syncId: 'sync-1', + sourceKey: 'notion', + }, + } as unknown as ToolSession, +}); + +const makeEmbeddingService = (overrides: Partial = {}) => + ({ + computeEmbedding: vi.fn().mockResolvedValue([0.25, 0.5, 0.75]), + ...overrides, + }) as Partial as KloEmbeddingPort; + +describe('context evidence tools', () => { + it('searches context evidence with ingest defaults', async () => { + const repository = { + searchRRF: vi.fn().mockResolvedValue([ + { + chunkId: 'chunk-1', + documentId: 'doc-1', + externalId: 'page-1', + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + url: 'https://notion.example/page-1', + snippet: 'Booked revenue excludes refunds and test accounts.', + score: 0.35, + citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' }, + stableCitationKey: 'notion:page-1:policy:abc', + syncId: 'sync-1', + lastEditedAt: new Date('2026-04-12T10:15:00.000Z'), + matchReasons: ['lexical', 'semantic'], + lanes: [ + { + lane: 'lexical', + status: 'available', + requestedCandidatePoolLimit: 25, + effectiveCandidatePoolLimit: 25, + returnedCandidateCount: 1, + weight: 1.5, + }, + { + lane: 'semantic', + status: 'available', + requestedCandidatePoolLimit: 25, + effectiveCandidatePoolLimit: 25, + returnedCandidateCount: 1, + weight: 2, + }, + ], + }, + ]), + } as Partial as ContextEvidenceToolStorePort; + const embeddings = { + computeEmbedding: vi.fn().mockResolvedValue([0.1, ...Array.from({ length: 383 }, () => 0)]), + } as Partial as KloEmbeddingPort; + + const tool = new ContextEvidenceSearchTool(repository, embeddings); + const result = await tool.call({ query: 'revenue refunds', limit: 5, includeDeleted: false }, ingestContext()); + + expect(repository.searchRRF).toHaveBeenCalledWith({ + connectionId: '00000000-0000-0000-0000-000000000001', + sourceKey: 'notion', + queryEmbedding: [0.1, ...Array.from({ length: 383 }, () => 0)], + queryText: 'revenue refunds', + limit: 5, + includeDeleted: false, + currentRunId: '10000000-0000-0000-0000-000000000001', + }); + expect(result.markdown).toContain('Revenue Recognition'); + expect(result.markdown).toContain('matchReasons: lexical, semantic'); + expect(result.structured.success).toBe(true); + if (result.structured.success) { + expect(result.structured.results[0]).toMatchObject({ + chunkId: 'chunk-1', + stableCitationKey: 'notion:page-1:policy:abc', + matchReasons: ['lexical', 'semantic'], + lanes: expect.arrayContaining([expect.objectContaining({ lane: 'lexical', status: 'available' })]), + }); + } + }); + + it('returns a structured ingest metadata error outside ingest sessions', async () => { + const tool = new ContextEvidenceSearchTool( + { searchRRF: vi.fn() } as Partial as ContextEvidenceToolStorePort, + { computeEmbedding: vi.fn() } as Partial as KloEmbeddingPort, + ); + + const result = await tool.call( + { query: 'revenue', limit: 5, includeDeleted: false }, + { sourceId: 'research', messageId: 'm1', userId: 'user-1' }, + ); + + expect(result.structured).toMatchObject({ success: false, error: 'INGEST_METADATA_REQUIRED' }); + }); + + it('reads a full document by external id', async () => { + const repository = { + readDocumentByExternalId: vi.fn().mockResolvedValue({ + document: { + id: 'doc-1', + title: 'Onboarding SOP', + path: 'Ops / Onboarding SOP', + external_id: 'page-ops', + raw_path: 'pages/page-ops/page.md', + url: 'https://notion.example/page-ops', + }, + chunks: [ + { + id: 'chunk-1', + heading_path: ['Onboarding SOP', 'Checklist'], + content: 'Create account, invite to workspace, confirm dashboard access.', + citation: { source: 'notion', pageId: 'page-ops' }, + }, + ], + }), + } as Partial as ContextEvidenceToolStorePort; + + const tool = new ContextEvidenceReadTool(repository); + const result = await tool.call({ externalId: 'page-ops', includeNeighborChunks: false }, ingestContext()); + + expect(repository.readDocumentByExternalId).toHaveBeenCalledWith( + '00000000-0000-0000-0000-000000000001', + 'notion', + 'page-ops', + '10000000-0000-0000-0000-000000000001', + ); + expect(result.markdown).toContain('## Onboarding SOP'); + expect(result.markdown).toContain('Create account'); + expect(result.structured.success).toBe(true); + if (result.structured.success) { + expect(result.structured.found).toBe(true); + } + }); + + it('reads documents and chunks by id with connection and source scope', async () => { + const repository = { + readDocumentById: vi.fn().mockResolvedValue({ + document: { + id: '00000000-0000-0000-0000-000000000201', + title: 'Scoped Document', + path: 'Scoped Document', + external_id: 'page-scoped', + url: null, + }, + chunks: [{ id: 'chunk-1', content: 'Scoped content.' }], + }), + readChunkById: vi.fn().mockResolvedValue({ + document: { + id: '00000000-0000-0000-0000-000000000201', + title: 'Scoped Document', + path: 'Scoped Document', + external_id: 'page-scoped', + url: null, + }, + chunk: { + id: '00000000-0000-0000-0000-000000000301', + content: 'Scoped chunk.', + citation: { source: 'notion' }, + }, + }), + } as Partial as ContextEvidenceToolStorePort; + + const tool = new ContextEvidenceReadTool(repository); + await tool.call( + { documentId: '00000000-0000-0000-0000-000000000201', includeNeighborChunks: false }, + ingestContext(), + ); + await tool.call({ chunkId: '00000000-0000-0000-0000-000000000301', includeNeighborChunks: false }, ingestContext()); + + expect(repository.readDocumentById).toHaveBeenCalledWith( + '00000000-0000-0000-0000-000000000201', + '00000000-0000-0000-0000-000000000001', + 'notion', + '10000000-0000-0000-0000-000000000001', + ); + expect(repository.readChunkById).toHaveBeenCalledWith( + '00000000-0000-0000-0000-000000000301', + '00000000-0000-0000-0000-000000000001', + 'notion', + '10000000-0000-0000-0000-000000000001', + ); + }); + + it('lists evidence neighbors', async () => { + const repository = { + findNeighborDocuments: vi.fn().mockResolvedValue([ + { + documentId: 'doc-child', + externalId: 'page-child', + title: 'Revenue Caveats', + path: 'Company Handbook / Finance / Revenue Caveats', + relation: 'children', + url: null, + lastEditedAt: null, + }, + ]), + } as Partial as ContextEvidenceToolStorePort; + + const tool = new ContextEvidenceNeighborsTool(repository); + const result = await tool.call({ documentId: 'doc-1', relation: 'children', limit: 10 }, ingestContext()); + + expect(repository.findNeighborDocuments).toHaveBeenCalledWith({ + connectionId: '00000000-0000-0000-0000-000000000001', + sourceKey: 'notion', + documentId: 'doc-1', + relation: 'children', + limit: 10, + currentRunId: '10000000-0000-0000-0000-000000000001', + }); + expect(result.markdown).toContain('Revenue Caveats'); + }); + + it('writes a cited candidate with durable evidence refs', async () => { + const repository = { + readChunksByIds: vi.fn().mockResolvedValue([ + { + chunkId: '00000000-0000-0000-0000-000000000101', + documentId: 'doc-1', + externalId: 'page-1', + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + url: 'https://notion.example/page-1', + rawPath: 'pages/page-1/page.md', + content: 'Booked revenue excludes refunds and test accounts.', + citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' }, + stableCitationKey: 'notion:page-1:policy:abc', + syncId: 'sync-1', + lastEditedAt: new Date('2026-04-12T10:15:00.000Z'), + }, + ]), + insertCandidate: vi.fn().mockResolvedValue({ + id: 'candidate-1', + candidate_key: 'revenue-definition', + promotion_score: 10, + status: 'pending', + }), + } as Partial as ContextEvidenceToolStorePort; + + const embeddings = makeEmbeddingService(); + const tool = new ContextCandidateWriteTool(repository, embeddings); + const result = await tool.call( + { + candidateKey: 'revenue-definition', + topic: 'Revenue Recognition', + assertion: 'Booked revenue excludes refunds and test accounts.', + rationale: 'Finance handbook is the source of truth and describes the reusable revenue rule.', + evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'], + suggestedPageKey: 'revenue-definition', + actionHint: 'create', + durabilityScore: 3, + authorityScore: 3, + reuseScore: 3, + noveltyScore: 2, + riskScore: 1, + }, + ingestContext(), + ); + + expect(repository.readChunksByIds).toHaveBeenCalledWith( + ['00000000-0000-0000-0000-000000000101'], + '00000000-0000-0000-0000-000000000001', + 'notion', + '10000000-0000-0000-0000-000000000001', + ); + + expect(repository.insertCandidate).toHaveBeenCalledWith( + expect.objectContaining({ + runId: '10000000-0000-0000-0000-000000000001', + connectionId: '00000000-0000-0000-0000-000000000001', + sourceKey: 'notion', + candidateKey: 'revenue-definition', + promotionScore: 10, + status: 'pending', + evidenceRefs: [ + expect.objectContaining({ + chunkId: '00000000-0000-0000-0000-000000000101', + stableCitationKey: 'notion:page-1:policy:abc', + snippetHash: createHash('sha256') + .update('Booked revenue excludes refunds and test accounts.') + .digest('hex'), + }), + ], + }), + ); + expect(embeddings.computeEmbedding).toHaveBeenCalledWith( + 'Revenue Recognition - Booked revenue excludes refunds and test accounts.', + ); + expect(repository.insertCandidate).toHaveBeenCalledWith( + expect.objectContaining({ + embedding: [0.25, 0.5, 0.75], + }), + ); + expect(result.structured).toMatchObject({ success: true, candidateKey: 'revenue-definition', promotionScore: 10 }); + }); + + it('saves candidate writes with a null embedding when embedding generation fails', async () => { + const repository = { + readChunksByIds: vi.fn().mockResolvedValue([ + { + chunkId: '00000000-0000-0000-0000-000000000101', + documentId: 'doc-1', + externalId: 'page-1', + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + url: 'https://notion.example/page-1', + rawPath: 'pages/page-1/page.md', + content: 'Booked revenue excludes refunds and test accounts.', + citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' }, + stableCitationKey: 'notion:page-1:policy:abc', + syncId: 'sync-1', + lastEditedAt: new Date('2026-04-12T10:15:00.000Z'), + }, + ]), + insertCandidate: vi.fn().mockResolvedValue({ + id: 'candidate-1', + candidate_key: 'revenue-definition', + promotion_score: 10, + status: 'pending', + }), + } as Partial as ContextEvidenceToolStorePort; + const embeddings = makeEmbeddingService({ + computeEmbedding: vi.fn().mockRejectedValue(new Error('embedding provider unavailable')), + }); + + const tool = new ContextCandidateWriteTool(repository, embeddings); + const result = await tool.call( + { + candidateKey: 'revenue-definition', + topic: 'Revenue Recognition', + assertion: 'Booked revenue excludes refunds and test accounts.', + rationale: 'Finance handbook is the source of truth and describes the reusable revenue rule.', + evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'], + suggestedPageKey: 'revenue-definition', + actionHint: 'create', + durabilityScore: 3, + authorityScore: 3, + reuseScore: 3, + noveltyScore: 2, + riskScore: 1, + }, + ingestContext(), + ); + + expect(embeddings.computeEmbedding).toHaveBeenCalledWith( + 'Revenue Recognition - Booked revenue excludes refunds and test accounts.', + ); + expect(repository.insertCandidate).toHaveBeenCalledWith( + expect.objectContaining({ + embedding: null, + }), + ); + expect(result.structured).toMatchObject({ success: true, candidateKey: 'revenue-definition', promotionScore: 10 }); + }); + + it('rejects candidate writes without evidence chunks', async () => { + const embeddings = makeEmbeddingService(); + const tool = new ContextCandidateWriteTool( + { + insertCandidate: vi.fn(), + } as Partial as ContextEvidenceToolStorePort, + embeddings, + ); + + const result = await tool.call( + { + candidateKey: 'uncited', + topic: 'Uncited', + assertion: 'This has no evidence.', + rationale: 'No evidence was provided.', + evidenceChunkIds: [], + actionHint: 'create', + durabilityScore: 1, + authorityScore: 1, + reuseScore: 1, + noveltyScore: 1, + riskScore: 1, + }, + ingestContext(), + ); + + expect(result.structured).toMatchObject({ success: false, error: 'EVIDENCE_REQUIRED' }); + expect(embeddings.computeEmbedding).not.toHaveBeenCalled(); + }); + + it('marks a candidate status during reconciliation', async () => { + const repository = { + updateCandidateStatus: vi.fn().mockResolvedValue({ + id: 'candidate-1', + candidate_key: 'revenue-definition', + status: 'promoted', + }), + } as Partial as ContextEvidenceToolStorePort; + + const tool = new ContextCandidateMarkTool(repository); + const result = await tool.call( + { candidateKey: 'revenue-definition', status: 'promoted', rejectionReason: null }, + ingestContext(), + ); + + expect(repository.updateCandidateStatus).toHaveBeenCalledWith({ + runId: '10000000-0000-0000-0000-000000000001', + candidateKey: 'revenue-definition', + status: 'promoted', + rejectionReason: null, + }); + expect(result.structured).toMatchObject({ success: true, candidateKey: 'revenue-definition', status: 'promoted' }); + }); +}); + +describe('context evidence tools against real SqliteContextEvidenceStore', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-context-tools-sqlite-')); + dbPath = join(tempDir, '.klo', 'db.sqlite'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + const realStoreContext = (): ToolContext => ({ + sourceId: 'ingest', + messageId: 'job-1-wu-unit-1', + userId: 'system', + connectionId: 'conn-1', + ingest: { + runId: 'run-1', + jobId: 'job-1', + syncId: 'sync-1', + sourceKey: 'notion', + }, + session: { + connectionId: 'conn-1', + isWorktreeScoped: true, + preHead: 'abc123', + touchedSlSources: createTouchedSlSources(), + actions: [], + ingest: { + runId: 'run-1', + jobId: 'job-1', + syncId: 'sync-1', + sourceKey: 'notion', + }, + } as unknown as ToolSession, + }); + + async function seedChunk(store: SqliteContextEvidenceStore): Promise { + const doc = await store.upsertDocument({ + runId: 'run-1', + connectionId: 'conn-1', + sourceKey: 'notion', + externalId: 'page-1', + externalParentId: null, + databaseId: null, + dataSourceId: null, + title: 'Revenue Recognition', + path: 'Company Handbook / Finance / Revenue Recognition', + url: 'https://notion.test/page-1', + objectType: 'page', + lastEditedAt: new Date('2026-04-30T10:00:00.000Z'), + lastEditedBy: 'user-1', + rawPath: 'pages/page-1/page.md', + syncId: 'sync-1', + contentHash: 'hash-page-1', + publishState: 'published', + metadata: {}, + }); + await store.replaceChunks(doc.id, [ + { + chunkKey: 'intro', + headingPath: ['Revenue'], + ordinal: 0, + content: 'Booked revenue excludes refunds and test accounts.', + searchText: 'booked revenue excludes refunds test accounts', + embedding: [1, 0, 0], + tokenCount: 8, + citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' }, + stableCitationKey: 'notion:page-1:intro', + syncId: 'sync-1', + contentHash: 'chunk-page-1', + }, + ]); + const read = await store.readDocumentByExternalId('conn-1', 'notion', 'page-1', 'run-1'); + if (!read) { + throw new Error('seeded chunk not readable'); + } + return read.chunks[0].id; + } + + it('candidate write accepts the prefixed chunkId returned by the real store and persists', async () => { + const store = new SqliteContextEvidenceStore({ dbPath }); + const chunkId = await seedChunk(store); + expect(chunkId).toMatch(/^ctxchunk-[0-9a-f-]{36}$/); + + const tool = new ContextCandidateWriteTool(store, { + computeEmbedding: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]), + } as Partial as KloEmbeddingPort); + + const parsed = tool.parseInput({ + candidateKey: 'revenue-definition', + topic: 'Revenue Recognition', + assertion: 'Booked revenue excludes refunds and test accounts.', + rationale: 'The Finance handbook is the source of truth.', + evidenceChunkIds: [chunkId], + actionHint: 'create', + durabilityScore: 3, + authorityScore: 3, + reuseScore: 3, + noveltyScore: 2, + riskScore: 1, + }); + + const result = await tool.call(parsed, realStoreContext()); + expect(result.structured).toMatchObject({ + success: true, + candidateKey: 'revenue-definition', + promotionScore: 10, + status: 'pending', + }); + }); + + it('candidate write schema rejects a bare UUID without the ctxchunk- prefix', () => { + const tool = new ContextCandidateWriteTool( + {} as ContextEvidenceToolStorePort, + { computeEmbedding: vi.fn() } as Partial as KloEmbeddingPort, + ); + + expect(() => + tool.parseInput({ + candidateKey: 'revenue-definition', + topic: 'Revenue Recognition', + assertion: 'Booked revenue excludes refunds and test accounts.', + rationale: 'Finance handbook is the source of truth.', + evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'], + actionHint: 'create', + durabilityScore: 3, + authorityScore: 3, + reuseScore: 3, + noveltyScore: 2, + riskScore: 1, + }), + ).toThrow(/ctxchunk/); + }); + + it('evidence read schema rejects bare UUIDs for chunkId and documentId', () => { + const tool = new ContextEvidenceReadTool({} as ContextEvidenceToolStorePort); + + expect(() => + tool.parseInput({ chunkId: '00000000-0000-0000-0000-000000000301', includeNeighborChunks: false }), + ).toThrow(/ctxchunk/); + expect(() => + tool.parseInput({ documentId: '00000000-0000-0000-0000-000000000201', includeNeighborChunks: false }), + ).toThrow(/ctxdoc/); + }); + + it('evidence neighbors schema rejects bare UUIDs for documentId', () => { + const tool = new ContextEvidenceNeighborsTool({} as ContextEvidenceToolStorePort); + expect(() => + tool.parseInput({ documentId: '00000000-0000-0000-0000-000000000201', relation: 'children', limit: 10 }), + ).toThrow(/ctxdoc/); + }); +}); diff --git a/packages/context/src/tools/context-ingest-metadata.ts b/packages/context/src/tools/context-ingest-metadata.ts new file mode 100644 index 00000000..09659075 --- /dev/null +++ b/packages/context/src/tools/context-ingest-metadata.ts @@ -0,0 +1,23 @@ +import type { ToolContext, ToolOutput } from './base-tool.js'; +import type { IngestToolMetadata } from './tool-session.js'; + +export interface ToolFailure { + success: false; + error: string; + message: string; +} + +export function resolveIngestMetadata(context: ToolContext): IngestToolMetadata | null { + return context.session?.ingest ?? context.ingest ?? null; +} + +export function ingestMetadataRequired(): ToolOutput { + return { + markdown: 'Error: this tool is only available inside an ingest WorkUnit or ingest reconciliation session.', + structured: { + success: false, + error: 'INGEST_METADATA_REQUIRED', + message: 'This tool requires ingest metadata on ToolContext or ToolSession.', + } as T, + }; +} diff --git a/packages/context/src/tools/index.ts b/packages/context/src/tools/index.ts new file mode 100644 index 00000000..7116b54c --- /dev/null +++ b/packages/context/src/tools/index.ts @@ -0,0 +1,43 @@ +export type { GitAuthor, GitAuthorResolverPort } from './authors.js'; +export { SYSTEM_GIT_AUTHOR } from './authors.js'; +export type { + MethodologyEntry, + ToolContext, + ToolOutput, + ToolProgressRelayPort, + ToolTimingTrackerPort, +} from './base-tool.js'; +export { BaseTool } from './base-tool.js'; +export { ContextCandidateMarkTool } from './context-candidate-mark.tool.js'; +export { ContextCandidateWriteTool } from './context-candidate-write.tool.js'; +export { ContextEvidenceNeighborsTool } from './context-evidence-neighbors.tool.js'; +export { ContextEvidenceReadTool } from './context-evidence-read.tool.js'; +export { ContextEvidenceSearchTool } from './context-evidence-search.tool.js'; +export type { + ContextCandidateInsertResult, + ContextCandidateStatusResult, + ContextEvidenceChunkForCandidate, + ContextEvidenceChunkForRead, + ContextEvidenceChunkReadResult, + ContextEvidenceDocumentForRead, + ContextEvidenceNeighborResult, + ContextEvidenceReadResult, + ContextEvidenceSearchArgs, + ContextEvidenceSearchResult, + ContextEvidenceToolStorePort, +} from './context-evidence-tool-store.js'; +export type { ToolFailure } from './context-ingest-metadata.js'; +export { ingestMetadataRequired, resolveIngestMetadata } from './context-ingest-metadata.js'; +export type { SqlEdit } from './sql-edit-replacer.js'; +export { applySqlEdits } from './sql-edit-replacer.js'; +export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js'; +export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js'; +export { + addTouchedSlSource, + createTouchedSlSources, + deleteTouchedSlSource, + hasTouchedSlSource, + listTouchedSlSources, + touchedSlSourceCount, + touchedSlSourceNamesForConnection, +} from './touched-sl-sources.js'; diff --git a/packages/context/src/tools/sql-edit-replacer.ts b/packages/context/src/tools/sql-edit-replacer.ts new file mode 100644 index 00000000..e8577f0f --- /dev/null +++ b/packages/context/src/tools/sql-edit-replacer.ts @@ -0,0 +1,229 @@ +export interface SqlEdit { + oldText: string; + newText: string; + reason?: string; +} + +interface SqlEditResult { + success: boolean; + sql: string; + appliedEdits: number; + errors: string[]; +} + +type ReplacerResult = { sql: string; note?: string } | { error: string } | null; + +function exactReplacer(sql: string, oldText: string, newText: string): ReplacerResult { + if (oldText.length === 0) { + return null; + } + + let count = 0; + let idx = -1; + let searchFrom = 0; + + while (true) { + const found = sql.indexOf(oldText, searchFrom); + if (found === -1) { + break; + } + count++; + idx = found; + searchFrom = found + 1; + } + + if (count === 0) { + return null; + } + if (count > 1) { + return { error: `Found ${count} matches for text, expected 1. Add more surrounding context.` }; + } + + return { sql: sql.slice(0, idx) + newText + sql.slice(idx + oldText.length) }; +} + +function buildCharacterMap(original: string): number[] { + const map: number[] = []; + for (let i = 0; i < original.length; i++) { + if (/\s/.test(original[i])) { + if (map.length === 0 || !/\s/.test(original[i - 1])) { + map.push(i); + } + } else { + map.push(i); + } + } + return map; +} + +function whitespaceNormalizedReplacer(sql: string, oldText: string, newText: string): ReplacerResult { + const normalizedSql = sql.replace(/\s+/g, ' '); + const normalizedOldText = oldText.replace(/\s+/g, ' '); + + if (normalizedOldText.length === 0) { + return null; + } + + let count = 0; + let matchIdx = -1; + let searchFrom = 0; + + while (true) { + const found = normalizedSql.indexOf(normalizedOldText, searchFrom); + if (found === -1) { + break; + } + count++; + matchIdx = found; + searchFrom = found + 1; + } + + if (count === 0) { + return null; + } + if (count > 1) { + return null; + } + + const charMap = buildCharacterMap(sql); + + const originalStart = charMap[matchIdx]; + const normalizedEnd = matchIdx + normalizedOldText.length; + + let originalEnd: number; + if (normalizedEnd >= charMap.length) { + originalEnd = sql.length; + } else { + originalEnd = charMap[normalizedEnd]; + } + + return { sql: sql.slice(0, originalStart) + newText + sql.slice(originalEnd) }; +} + +function levenshteinDistance(a: string, b: string): number { + const m = a.length; + const n = b.length; + + if (m === 0) { + return n; + } + if (n === 0) { + return m; + } + + const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0)); + + for (let i = 0; i <= m; i++) { + dp[i][0] = i; + } + for (let j = 0; j <= n; j++) { + dp[0][j] = j; + } + + for (let i = 1; i <= m; i++) { + for (let j = 1; j <= n; j++) { + const cost = a[i - 1] === b[j - 1] ? 0 : 1; + dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost); + } + } + + return dp[m][n]; +} + +function fuzzyReplacer(sql: string, oldText: string, newText: string): ReplacerResult { + if (oldText.length === 0) { + return null; + } + + const targetLen = oldText.length; + const minWindow = Math.max(1, Math.floor(targetLen * 0.85)); + const maxWindow = Math.ceil(targetLen * 1.15); + + let bestDistance = Infinity; + let bestStart = -1; + let bestEnd = -1; + + for (let windowLen = minWindow; windowLen <= maxWindow; windowLen++) { + if (windowLen > sql.length) { + break; + } + + for (let start = 0; start <= sql.length - windowLen; start++) { + const candidate = sql.slice(start, start + windowLen); + const distance = levenshteinDistance(candidate, oldText); + if (distance < bestDistance) { + bestDistance = distance; + bestStart = start; + bestEnd = start + windowLen; + } + } + } + + if (bestStart === -1) { + return null; + } + + const maxLen = Math.max(oldText.length, bestEnd - bestStart); + const similarity = 1 - bestDistance / maxLen; + + if (similarity < 0.85) { + return null; + } + + const matchedText = sql.slice(bestStart, bestEnd); + return { + sql: sql.slice(0, bestStart) + newText + sql.slice(bestEnd), + note: `Fuzzy match used (similarity: ${(similarity * 100).toFixed(1)}%, matched: "${matchedText}")`, + }; +} + +interface ApplySqlEditsOptions { + exactOnly?: boolean; +} + +export function applySqlEdits(sql: string, edits: SqlEdit[], options?: ApplySqlEditsOptions): SqlEditResult { + let currentSql = sql; + let appliedEdits = 0; + const errors: string[] = []; + + for (const edit of edits) { + const replacers = options?.exactOnly + ? [exactReplacer] + : [exactReplacer, whitespaceNormalizedReplacer, fuzzyReplacer]; + let applied = false; + + for (const replacer of replacers) { + const result = replacer(currentSql, edit.oldText, edit.newText); + + if (result === null) { + continue; + } + + if ('error' in result) { + const context = edit.reason ? ` (reason: ${edit.reason})` : ''; + errors.push(`${result.error}${context}`); + applied = true; + break; + } + + currentSql = result.sql; + appliedEdits++; + applied = true; + break; + } + + if (!applied) { + const context = edit.reason ? ` (reason: ${edit.reason})` : ''; + errors.push( + `No match found for edit${context}: "${edit.oldText.slice(0, 80)}${edit.oldText.length > 80 ? '...' : ''}"`, + ); + } + } + + return { + success: errors.length === 0, + sql: currentSql, + appliedEdits, + errors, + }; +} diff --git a/packages/context/src/tools/tool-session.ts b/packages/context/src/tools/tool-session.ts new file mode 100644 index 00000000..ec03eb4f --- /dev/null +++ b/packages/context/src/tools/tool-session.ts @@ -0,0 +1,54 @@ +import type { GitService, KloFileStorePort } from '../core/index.js'; +import type { SemanticLayerService } from '../sl/index.js'; +import type { KnowledgeWikiService } from '../wiki/index.js'; +import type { TouchedSlSourceSet } from './touched-sl-sources.js'; + +export interface IngestToolMetadata { + runId: string; + jobId: string; + syncId: string; + sourceKey: string; +} + +export interface MemoryAction { + target: 'wiki' | 'sl'; + type: 'created' | 'updated' | 'removed'; + key: string; + detail: string; + targetConnectionId?: string | null; +} + +interface EvictionDecisionRecord { + rawPath: string; + artifactKind: 'wiki' | 'sl'; + artifactKey: string; + action: 'removed' | 'retained_deprecated' | 'retained_supported'; + reason: string; +} + +/** + * Per-WU (or per-memory-agent) state threaded through ToolContext. When present, + * SL/wiki tools read session-scoped services and emit touched-set entries / actions + * instead of hitting shared services. When absent, tools behave as they do for + * interactive research/workshop callers. + */ +export interface ToolSession { + /** + * Warehouse connection targeted by SL tools. `null` when the session has no + * warehouse connection (wiki-only memory-agent turns) — SL tools must guard + * for this and return a structured error rather than execute against a + * blank connection. + */ + connectionId: string | null; + /** When true, worktree-scoped service writes bypass DB index updates. */ + isWorktreeScoped: boolean; + preHead: string | null; + touchedSlSources: TouchedSlSourceSet; + actions: MemoryAction[]; + semanticLayerService: SemanticLayerService; + wikiService: KnowledgeWikiService; + configService: KloFileStorePort; + gitService: GitService; + ingest?: IngestToolMetadata; + evictionDecisions?: EvictionDecisionRecord[]; +} diff --git a/packages/context/src/tools/touched-sl-sources.test.ts b/packages/context/src/tools/touched-sl-sources.test.ts new file mode 100644 index 00000000..818676d2 --- /dev/null +++ b/packages/context/src/tools/touched-sl-sources.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, it } from 'vitest'; +import { + addTouchedSlSource, + createTouchedSlSources, + deleteTouchedSlSource, + hasTouchedSlSource, + listTouchedSlSources, + touchedSlSourceCount, + touchedSlSourceNamesForConnection, +} from './touched-sl-sources.js'; + +describe('target-aware touched SL source helpers', () => { + it('deduplicates by connectionId and sourceName while preserving target identity', () => { + const touched = createTouchedSlSources(); + + addTouchedSlSource(touched, 'warehouse-a', 'orders'); + addTouchedSlSource(touched, 'warehouse-a', 'orders'); + addTouchedSlSource(touched, 'warehouse-b', 'orders'); + + expect(listTouchedSlSources(touched)).toEqual([ + { connectionId: 'warehouse-a', sourceName: 'orders' }, + { connectionId: 'warehouse-b', sourceName: 'orders' }, + ]); + expect(touchedSlSourceCount(touched)).toBe(2); + expect(hasTouchedSlSource(touched, 'warehouse-a', 'orders')).toBe(true); + expect(hasTouchedSlSource(touched, 'warehouse-b', 'orders')).toBe(true); + }); + + it('lists touched names for one connection and deletes only that connection/source pair', () => { + const touched = createTouchedSlSources([ + { connectionId: 'warehouse-a', sourceName: 'orders' }, + { connectionId: 'warehouse-a', sourceName: 'customers' }, + { connectionId: 'warehouse-b', sourceName: 'orders' }, + ]); + + deleteTouchedSlSource(touched, 'warehouse-a', 'orders'); + + expect(touchedSlSourceNamesForConnection(touched, 'warehouse-a')).toEqual(['customers']); + expect(touchedSlSourceNamesForConnection(touched, 'warehouse-b')).toEqual(['orders']); + expect(listTouchedSlSources(touched)).toEqual([ + { connectionId: 'warehouse-a', sourceName: 'customers' }, + { connectionId: 'warehouse-b', sourceName: 'orders' }, + ]); + }); +}); diff --git a/packages/context/src/tools/touched-sl-sources.ts b/packages/context/src/tools/touched-sl-sources.ts new file mode 100644 index 00000000..44c3a834 --- /dev/null +++ b/packages/context/src/tools/touched-sl-sources.ts @@ -0,0 +1,60 @@ +export interface TouchedSlSource { + connectionId: string; + sourceName: string; +} + +export type TouchedSlSourceSet = Map>; + +export function createTouchedSlSources(entries: TouchedSlSource[] = []): TouchedSlSourceSet { + const touched: TouchedSlSourceSet = new Map(); + for (const entry of entries) { + addTouchedSlSource(touched, entry.connectionId, entry.sourceName); + } + return touched; +} + +export function addTouchedSlSource(touched: TouchedSlSourceSet, connectionId: string, sourceName: string): void { + const bucket = touched.get(connectionId) ?? new Set(); + bucket.add(sourceName); + touched.set(connectionId, bucket); +} + +export function deleteTouchedSlSource(touched: TouchedSlSourceSet, connectionId: string, sourceName: string): void { + const bucket = touched.get(connectionId); + if (!bucket) { + return; + } + bucket.delete(sourceName); + if (bucket.size === 0) { + touched.delete(connectionId); + } +} + +export function hasTouchedSlSource(touched: TouchedSlSourceSet, connectionId: string, sourceName: string): boolean { + return touched.get(connectionId)?.has(sourceName) ?? false; +} + +export function listTouchedSlSources(touched: TouchedSlSourceSet): TouchedSlSource[] { + const out: TouchedSlSource[] = []; + for (const [connectionId, sources] of touched) { + for (const sourceName of sources) { + out.push({ connectionId, sourceName }); + } + } + return out.sort((left, right) => { + const byConnection = left.connectionId.localeCompare(right.connectionId); + return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection; + }); +} + +export function touchedSlSourceCount(touched: TouchedSlSourceSet): number { + let total = 0; + for (const sources of touched.values()) { + total += sources.size; + } + return total; +} + +export function touchedSlSourceNamesForConnection(touched: TouchedSlSourceSet, connectionId: string): string[] { + return [...(touched.get(connectionId) ?? [])].sort(); +} diff --git a/packages/context/src/wiki/index.ts b/packages/context/src/wiki/index.ts new file mode 100644 index 00000000..892eff34 --- /dev/null +++ b/packages/context/src/wiki/index.ts @@ -0,0 +1,29 @@ +export { buildKnowledgeSearchText } from './knowledge-search-text.js'; +export { KnowledgeWikiService } from './knowledge-wiki.service.js'; +export * from './local-knowledge.js'; +export type { + KnowledgeEventPort, + KnowledgeGitDiffPort, + KnowledgeIndexPort, + UpsertPageParams, + WikiFileStorePort, +} from './ports.js'; +export type { + ExistingKnowledgeIndexPage, + SqliteKnowledgeIndexOptions, + SqliteKnowledgeIndexPage, + SqliteKnowledgeIndexSearchResult, + WikiSqliteLaneCandidate, +} from './sqlite-knowledge-index.js'; +export { SqliteKnowledgeIndex } from './sqlite-knowledge-index.js'; +export * from './tools/index.js'; +export type { + HistoricSqlWikiUsageFrontmatter, + WikiFrontmatter, + WikiPage, + WikiPageWithScope, + WikiScope, + WikiSearchLaneSummary, + WikiSearchMatchReason, + WikiSearchMetadata, +} from './types.js'; diff --git a/packages/context/src/wiki/knowledge-search-text.ts b/packages/context/src/wiki/knowledge-search-text.ts new file mode 100644 index 00000000..68d097bf --- /dev/null +++ b/packages/context/src/wiki/knowledge-search-text.ts @@ -0,0 +1,7 @@ +export function buildKnowledgeSearchText(blockKey: string, summary: string, content: string, tags?: string[]): string { + const parts = [blockKey, summary, content]; + if (tags && tags.length > 0) { + parts.push(tags.join(' ')); + } + return parts.join('\n'); +} diff --git a/packages/context/src/wiki/knowledge-wiki.service.test.ts b/packages/context/src/wiki/knowledge-wiki.service.test.ts new file mode 100644 index 00000000..8fadee11 --- /dev/null +++ b/packages/context/src/wiki/knowledge-wiki.service.test.ts @@ -0,0 +1,118 @@ +import { describe, expect, it, vi } from 'vitest'; +import { KnowledgeWikiService, type WikiFrontmatter } from './knowledge-wiki.service.js'; + +function makeService() { + const pagesRepository: Record> = { + upsertPage: vi.fn().mockResolvedValue(undefined), + deleteByKey: vi.fn().mockResolvedValue(undefined), + deleteByScope: vi.fn().mockResolvedValue(undefined), + deleteStale: vi.fn().mockResolvedValue(undefined), + getExistingSearchTexts: vi.fn().mockResolvedValue(new Map()), + applyDiffTransactional: vi.fn().mockResolvedValue(undefined), + }; + const embeddingService = { + computeEmbedding: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]), + computeEmbeddingsBulk: vi.fn().mockResolvedValue([]), + maxBatchSize: 16, + }; + const configService = { + forWorktree: vi.fn().mockReturnValue({ + writeFile: vi.fn(), + readFile: vi.fn(), + deleteFile: vi.fn(), + listFiles: vi.fn(), + getFileHistory: vi.fn(), + }), + writeFile: vi.fn(), + readFile: vi.fn(), + deleteFile: vi.fn(), + listFiles: vi.fn(), + getFileHistory: vi.fn(), + }; + const gitService = { + diffNameStatus: vi.fn().mockResolvedValue([]), + getFileAtCommit: vi.fn().mockResolvedValue(''), + }; + const service = new KnowledgeWikiService( + configService as any, + embeddingService as any, + pagesRepository as any, + gitService as any, + ); + return { service, pagesRepository, embeddingService, configService, gitService }; +} + +const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' }; + +describe('KnowledgeWikiService.forWorktree isolation', () => { + it('syncSinglePage in worktree scope does not call pagesRepository.upsertPage', async () => { + const { service, pagesRepository, embeddingService } = makeService(); + const scoped = service.forWorktree('/tmp/fake-worktree'); + + await scoped.syncSinglePage('GLOBAL', null, 'key', fm, 'body'); + + expect(pagesRepository.upsertPage).not.toHaveBeenCalled(); + expect(embeddingService.computeEmbedding).not.toHaveBeenCalled(); + }); + + it('deleteFromIndex in worktree scope does not call pagesRepository.deleteByKey', async () => { + const { service, pagesRepository } = makeService(); + const scoped = service.forWorktree('/tmp/fake-worktree'); + + await scoped.deleteFromIndex('GLOBAL', null, 'key'); + + expect(pagesRepository.deleteByKey).not.toHaveBeenCalled(); + }); + + it('syncSinglePage in main scope still calls pagesRepository.upsertPage', async () => { + const { service, pagesRepository } = makeService(); + + await service.syncSinglePage('GLOBAL', null, 'key', fm, 'body'); + + expect(pagesRepository.upsertPage).toHaveBeenCalledTimes(1); + }); +}); + +describe('KnowledgeWikiService.syncFromCommit', () => { + it('applies upserts for added/modified files and deletes for removed files in a single transactional batch', async () => { + const { service, pagesRepository, gitService } = makeService(); + + gitService.diffNameStatus.mockResolvedValue([ + { status: 'A', path: 'knowledge/global/new-page.md' }, + { status: 'M', path: 'knowledge/global/changed-page.md' }, + { status: 'D', path: 'knowledge/global/gone-page.md' }, + ]); + gitService.getFileAtCommit.mockImplementation((path: string) => { + if (path.endsWith('new-page.md')) { + return Promise.resolve('---\nsummary: new\nusage_mode: auto\n---\n\nbody-new\n'); + } + if (path.endsWith('changed-page.md')) { + return Promise.resolve('---\nsummary: changed\nusage_mode: auto\n---\n\nbody-changed\n'); + } + return Promise.reject(new Error(`unexpected getFileAtCommit path: ${path}`)); + }); + + await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid'); + + expect(pagesRepository.applyDiffTransactional).toHaveBeenCalledTimes(1); + const call = pagesRepository.applyDiffTransactional.mock.calls[0][0]; + expect(call.runId).toBe('run-uuid'); + expect(call.upserts).toHaveLength(2); + expect(call.upserts).toEqual( + expect.arrayContaining([ + expect.objectContaining({ scope: 'GLOBAL', pageKey: 'new-page', summary: 'new' }), + expect.objectContaining({ scope: 'GLOBAL', pageKey: 'changed-page', summary: 'changed' }), + ]), + ); + expect(call.deletes).toEqual([{ scope: 'GLOBAL', scopeId: null, pageKey: 'gone-page' }]); + }); + + it('is a no-op when the diff between shas has no knowledge changes', async () => { + const { service, pagesRepository, gitService } = makeService(); + gitService.diffNameStatus.mockResolvedValue([]); + + await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid'); + + expect(pagesRepository.applyDiffTransactional).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/wiki/knowledge-wiki.service.ts b/packages/context/src/wiki/knowledge-wiki.service.ts new file mode 100644 index 00000000..467f8df7 --- /dev/null +++ b/packages/context/src/wiki/knowledge-wiki.service.ts @@ -0,0 +1,437 @@ +import { createHash } from 'node:crypto'; +import YAML from 'yaml'; +import type { KloEmbeddingPort, KloFileStorePort, KloLogger } from '../core/index.js'; +import { noopLogger } from '../core/index.js'; +import { buildKnowledgeSearchText } from './knowledge-search-text.js'; +import type { KnowledgeGitDiffPort, KnowledgeIndexPort, UpsertPageParams } from './ports.js'; +import type { WikiFrontmatter, WikiPage, WikiPageWithScope } from './types.js'; + +const WIKI_PREFIX = 'knowledge'; + +export type { WikiFrontmatter }; + +export class KnowledgeWikiService { + private isWorktreeScoped = false; + + constructor( + private readonly configService: KloFileStorePort, + private readonly embeddingService: KloEmbeddingPort, + private readonly pagesRepository: KnowledgeIndexPort, + private readonly gitService: KnowledgeGitDiffPort, + private readonly logger: KloLogger = noopLogger, + ) {} + + /** + * Return a clone of this service whose disk writes go through a worktree-scoped + * ConfigService AND whose DB-index writes are no-ops. Used by memory-agent + * session worktrees so wiki tool calls during the LLM loop land on the session + * branch. The shared `knowledge` table is only touched once per run, atomically, + * via `syncFromCommit` after Stage 6 squashes the branch into main. + */ + forWorktree(workdir: string): KnowledgeWikiService { + return new KnowledgeWikiService( + this.configService.forWorktree(workdir) as KloFileStorePort, + this.embeddingService, + this.pagesRepository, + this.gitService, + this.logger, + ).markWorktreeScoped(); + } + + private markWorktreeScoped(): KnowledgeWikiService { + this.isWorktreeScoped = true; + return this; + } + + // ── File paths ──────────────────────────────────────────────── + + private scopeDir(scope: string, scopeId?: string | null): string { + if (scope === 'GLOBAL') { + return `${WIKI_PREFIX}/global`; + } + return `${WIKI_PREFIX}/user/${scopeId}`; + } + + pagePath(scope: string, scopeId: string | null | undefined, pageKey: string): string { + return `${this.scopeDir(scope, scopeId)}/${pageKey}.md`; + } + + // ── Parsing / serialization ─────────────────────────────────── + + parsePage(raw: string): { frontmatter: WikiFrontmatter; content: string } { + const match = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/); + if (!match) { + throw new Error('Invalid wiki page: missing YAML frontmatter'); + } + const frontmatter = YAML.parse(match[1]) as WikiFrontmatter; + const content = match[2].trim(); + return { frontmatter, content }; + } + + serializePage(frontmatter: WikiFrontmatter, content: string): string { + const yaml = YAML.stringify(frontmatter, { indent: 2, lineWidth: 0 }).trimEnd(); + return `---\n${yaml}\n---\n\n${content}\n`; + } + + // ── File CRUD ───────────────────────────────────────────────── + + async writePage( + scope: string, + scopeId: string | null | undefined, + pageKey: string, + frontmatter: WikiFrontmatter, + content: string, + author: string, + authorEmail: string, + commitMessage?: string, + options?: { skipLock?: boolean }, + ) { + const path = this.pagePath(scope, scopeId, pageKey); + const serialized = this.serializePage(frontmatter, content); + const message = commitMessage ?? `Update knowledge page: ${pageKey}`; + return this.configService.writeFile(path, serialized, author, authorEmail, message, { + skipLock: options?.skipLock, + }); + } + + async readPage(scope: string, scopeId: string | null | undefined, pageKey: string): Promise { + const path = this.pagePath(scope, scopeId, pageKey); + try { + const result = await this.configService.readFile(path); + const { frontmatter, content } = this.parsePage(result.content); + return { pageKey, frontmatter, content }; + } catch { + return null; + } + } + + async deletePage( + scope: string, + scopeId: string | null | undefined, + pageKey: string, + author: string, + authorEmail: string, + ) { + const path = this.pagePath(scope, scopeId, pageKey); + try { + return await this.configService.deleteFile(path, author, authorEmail, `Remove knowledge page: ${pageKey}`); + } catch (error) { + // Check if the file actually exists — if not, deletion is a no-op + try { + await this.configService.readFile(path); + } catch { + // File doesn't exist, nothing to delete + return null; + } + // File exists but delete failed — propagate so callers don't assume success + this.logger.error(`Failed to delete wiki page at ${path} despite file existing`); + throw error; + } + } + + async listPageKeys(scope: string, scopeId?: string | null): Promise { + const dir = this.scopeDir(scope, scopeId); + try { + const result = await this.configService.listFiles(dir); + return result.files + .filter((f) => f.endsWith('.md')) + .map((f) => { + // Strip the directory prefix and .md extension + const name = f.replace(`${dir}/`, '').replace(/\.md$/, ''); + return name; + }) + .filter((name) => !name.includes('/')); + } catch { + return []; + } + } + + async getPageHistory(scope: string, scopeId: string | null | undefined, pageKey: string) { + const path = this.pagePath(scope, scopeId, pageKey); + return this.configService.getFileHistory(path); + } + + // ── Read page for user (USER scope first, fallback to GLOBAL) ─ + + async readPageForUser(userId: string, pageKey: string): Promise { + // Try USER scope first + const userPage = await this.readPage('USER', userId, pageKey); + if (userPage) { + return { ...userPage, scope: 'USER' }; + } + // Fall back to GLOBAL + const globalPage = await this.readPage('GLOBAL', null, pageKey); + if (globalPage) { + return { ...globalPage, scope: 'GLOBAL' }; + } + return null; + } + + /** + * Write a page verbatim from raw .md text (front-matter + body) after parse-validation. + * Preserves the user's exact formatting (raw mode source-of-truth). + */ + async writeRawPageAndSync( + scope: string, + scopeId: string | null | undefined, + pageKey: string, + rawContent: string, + author: string, + authorEmail: string, + commitMessage?: string, + ): Promise<{ frontmatter: WikiFrontmatter; content: string }> { + const parsed = this.parsePage(rawContent); + if (!parsed.frontmatter.summary || String(parsed.frontmatter.summary).trim().length === 0) { + throw new Error('Front-matter field "summary" is required'); + } + const validModes = ['always', 'auto', 'never']; + if (!validModes.includes(parsed.frontmatter.usage_mode)) { + throw new Error(`Front-matter field "usage_mode" must be one of: ${validModes.join(', ')}`); + } + + const path = this.pagePath(scope, scopeId, pageKey); + await this.configService.writeFile( + path, + rawContent, + author, + authorEmail, + commitMessage ?? `Update knowledge page (raw): ${pageKey}`, + ); + await this.syncSinglePage(scope, scopeId, pageKey, parsed.frontmatter, parsed.content); + return parsed; + } + + /** + * Write a wiki page and then sync it to the DB search index. + * Chains the two operations so the index is only updated after the file write succeeds. + */ + async writePageAndSync( + scope: string, + scopeId: string | null | undefined, + pageKey: string, + frontmatter: WikiFrontmatter, + content: string, + author: string, + authorEmail: string, + commitMessage?: string, + ): Promise { + await this.writePage(scope, scopeId, pageKey, frontmatter, content, author, authorEmail, commitMessage); + const serialized = this.serializePage(frontmatter, content); + const contentHash = createHash('sha256').update(serialized).digest('hex'); + await this.syncSinglePage(scope, scopeId, pageKey, frontmatter, content, contentHash); + } + + // ── Index sync (files → DB) ─────────────────────────────────── + + /** + * Sync a single page to the DB search index after a write. + * Computes search_text and embedding, then upserts to knowledge index. + */ + async syncSinglePage( + scope: string, + scopeId: string | null | undefined, + pageKey: string, + frontmatter: WikiFrontmatter, + content: string, + contentHash?: string | null, + ): Promise { + if (this.isWorktreeScoped) { + // Worktree-scoped writes stay on the session branch only. The shared + // knowledge index is updated atomically from the squashed commit diff + // after Stage 6 via syncFromCommit(). + return; + } + + const searchText = buildKnowledgeSearchText(pageKey, frontmatter.summary, content, frontmatter.tags); + + let embedding: number[] | null = null; + try { + embedding = await this.embeddingService.computeEmbedding(searchText); + } catch (err) { + this.logger.warn(`Embedding failed for page "${pageKey}": ${err instanceof Error ? err.message : String(err)}`); + } + + await this.pagesRepository.upsertPage({ + scope, + scopeId: scopeId ?? null, + pageKey, + summary: frontmatter.summary, + usageMode: frontmatter.usage_mode, + sortOrder: frontmatter.sort_order ?? 0, + searchText, + embedding, + contentHash: contentHash ?? null, + }); + } + + /** + * Full sync: load all pages from disk for a scope, reindex changed pages, clean stale entries. + * Mirrors SlSearchService.indexSources() pattern. + */ + async syncIndex(scope: string, scopeId?: string | null): Promise { + const pageKeys = await this.listPageKeys(scope, scopeId); + if (pageKeys.length === 0) { + await this.pagesRepository.deleteByScope(scope, scopeId ?? null); + return; + } + + // Load and parse all pages + const pages: Array<{ pageKey: string; frontmatter: WikiFrontmatter; content: string; searchText: string }> = []; + for (const key of pageKeys) { + const page = await this.readPage(scope, scopeId, key); + if (page) { + const searchText = buildKnowledgeSearchText(key, page.frontmatter.summary, page.content, page.frontmatter.tags); + pages.push({ pageKey: key, frontmatter: page.frontmatter, content: page.content, searchText }); + } + } + + // Detect changes + const existing = await this.pagesRepository.getExistingSearchTexts(scope, scopeId ?? null); + const changedPages = pages.filter((p) => { + const ex = existing.get(p.pageKey); + return !ex || ex.searchText !== p.searchText || !ex.hasEmbedding; + }); + + if (changedPages.length === 0) { + // Still clean up stale + await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys); + this.logger.log(`Wiki sync ${scope}: all ${pages.length} pages up to date`); + return; + } + + // Compute embeddings for changed pages (batched) + const changedTexts = changedPages.map((p) => p.searchText); + let embeddings: (number[] | null)[]; + try { + const batchSize = this.embeddingService.maxBatchSize; + const all: number[][] = []; + for (let i = 0; i < changedTexts.length; i += batchSize) { + const batch = changedTexts.slice(i, i + batchSize); + const batchEmb = await this.embeddingService.computeEmbeddingsBulk(batch); + all.push(...batchEmb); + } + embeddings = all; + } catch (err) { + this.logger.warn(`Embedding batch failed during sync: ${err instanceof Error ? err.message : String(err)}`); + embeddings = changedPages.map(() => null); + } + + // Upsert changed pages + for (let i = 0; i < changedPages.length; i++) { + const p = changedPages[i]; + await this.pagesRepository.upsertPage({ + scope, + scopeId: scopeId ?? null, + pageKey: p.pageKey, + summary: p.frontmatter.summary, + usageMode: p.frontmatter.usage_mode, + sortOrder: p.frontmatter.sort_order ?? 0, + searchText: p.searchText, + embedding: embeddings[i], + }); + } + + // Clean stale entries + await this.pagesRepository.deleteStale(scope, scopeId ?? null, pageKeys); + + this.logger.log( + `Wiki sync ${scope}: ${changedPages.length}/${pages.length} reindexed, ${pages.length - changedPages.length} unchanged`, + ); + } + + /** + * Delete a page from the DB index (after file deletion). + */ + async deleteFromIndex(scope: string, scopeId: string | null | undefined, pageKey: string): Promise { + if (this.isWorktreeScoped) { + return; + } + await this.pagesRepository.deleteByKey(scope, scopeId ?? null, pageKey); + } + + /** + * Apply the diff between two commits on the config repo to the shared + * `knowledge` index in a single transaction. Called by the ingest runner + * after Stage 6 squashes the session branch into main: the pre-squash main + * SHA and the post-squash SHA bracket exactly the set of knowledge-file + * changes this run produced. + * + * Any added/modified file becomes an upsert (tagged with `source_run_id`), + * any deleted file becomes a delete. Parsing errors fail the whole + * transaction so the shared table stays consistent. + */ + async syncFromCommit(fromSha: string, toSha: string, runId: string): Promise { + const diff = await this.gitService.diffNameStatus(fromSha, toSha, 'knowledge/'); + if (diff.length === 0) { + return; + } + const upserts: UpsertPageParams[] = []; + const deletes: Array<{ scope: string; scopeId: string | null; pageKey: string }> = []; + + for (const entry of diff) { + const parsedPath = parseKnowledgePath(entry.path); + if (!parsedPath) { + this.logger.warn(`[knowledge.sync] skipping unparseable path: ${entry.path}`); + continue; + } + if (entry.status === 'D') { + deletes.push(parsedPath); + continue; + } + const content = await this.gitService.getFileAtCommit(entry.path, toSha); + const parsed = this.parsePage(content); + const searchText = buildKnowledgeSearchText( + parsedPath.pageKey, + parsed.frontmatter.summary, + parsed.content, + parsed.frontmatter.tags, + ); + let embedding: number[] | null = null; + try { + embedding = await this.embeddingService.computeEmbedding(searchText); + } catch (err) { + this.logger.warn( + `[knowledge.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + const contentHash = createHash('sha256').update(content).digest('hex'); + upserts.push({ + scope: parsedPath.scope, + scopeId: parsedPath.scopeId, + pageKey: parsedPath.pageKey, + summary: parsed.frontmatter.summary, + usageMode: parsed.frontmatter.usage_mode, + sortOrder: parsed.frontmatter.sort_order ?? 0, + searchText, + embedding, + contentHash, + }); + } + + await this.pagesRepository.applyDiffTransactional({ runId, upserts, deletes }); + this.logger.log(`[knowledge.sync] run=${runId} applied ${upserts.length} upsert(s), ${deletes.length} delete(s)`); + } +} + +/** + * Parse a `knowledge//...` file path into its scope and page key. + * `knowledge/global/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' } + * `knowledge/user//bar.md` → { scope: 'USER', scopeId: '', pageKey: 'bar' } + */ +function parseKnowledgePath(path: string): { scope: string; scopeId: string | null; pageKey: string } | null { + if (!path.endsWith('.md')) { + return null; + } + const segments = path.split('/'); + if (segments[0] !== 'knowledge') { + return null; + } + const rest = segments.slice(1); + if (rest.length === 2 && rest[0] === 'global') { + return { scope: 'GLOBAL', scopeId: null, pageKey: rest[1].replace(/\.md$/, '') }; + } + if (rest.length === 3 && rest[0] === 'user') { + return { scope: 'USER', scopeId: rest[1], pageKey: rest[2].replace(/\.md$/, '') }; + } + return null; +} diff --git a/packages/context/src/wiki/local-knowledge.test.ts b/packages/context/src/wiki/local-knowledge.test.ts new file mode 100644 index 00000000..f8e8a655 --- /dev/null +++ b/packages/context/src/wiki/local-knowledge.test.ts @@ -0,0 +1,236 @@ +import { access, mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKloProject, type KloLocalProject } from '../project/index.js'; +import { + listLocalKnowledgePages, + readLocalKnowledgePage, + searchLocalKnowledgePages, + writeLocalKnowledgePage, +} from './local-knowledge.js'; + +class FakeEmbeddingPort { + readonly maxBatchSize = 16; + + async computeEmbedding(text: string): Promise { + return text.toLowerCase().includes('semantic revenue') ? [1, 0] : [0, 1]; + } + + async computeEmbeddingsBulk(texts: string[]): Promise { + return Promise.all(texts.map((text) => this.computeEmbedding(text))); + } +} + +describe('local knowledge helpers', () => { + let tempDir: string; + let project: KloLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-local-knowledge-')); + project = await initKloProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('writes, reads, lists, and searches global knowledge pages', async () => { + const write = await writeLocalKnowledgePage(project, { + key: 'metrics/revenue', + scope: 'GLOBAL', + summary: 'Revenue metric definition', + content: 'Revenue is recognized when an order is paid.', + tags: ['finance'], + refs: ['semantic-layer/warehouse/orders.yaml'], + slRefs: ['orders'], + }); + + expect(write.path).toBe('knowledge/global/metrics/revenue.md'); + expect(write.operation).toBe('write'); + + await expect(readLocalKnowledgePage(project, { key: 'metrics/revenue', userId: 'local' })).resolves.toMatchObject({ + key: 'metrics/revenue', + scope: 'GLOBAL', + summary: 'Revenue metric definition', + content: 'Revenue is recognized when an order is paid.', + tags: ['finance'], + refs: ['semantic-layer/warehouse/orders.yaml'], + slRefs: ['orders'], + }); + + await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([ + { + key: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + scope: 'GLOBAL', + summary: 'Revenue metric definition', + }, + ]); + + const search = await searchLocalKnowledgePages(project, { query: 'paid order', userId: 'local' }); + expect(search).toEqual([ + expect.objectContaining({ + key: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + scope: 'GLOBAL', + score: expect.any(Number), + matchReasons: expect.arrayContaining(['lexical']), + lanes: expect.arrayContaining([expect.objectContaining({ lane: 'lexical', status: 'available' })]), + }), + ]); + expect(search[0]?.score).toBeGreaterThan(0); + await expect(access(join(project.projectDir, '.klo', 'db.sqlite'))).resolves.toBeUndefined(); + }); + + it('adds the token lane alongside lexical wiki matches', async () => { + await writeLocalKnowledgePage(project, { + key: 'metrics/revenue', + scope: 'GLOBAL', + summary: 'Revenue metric definition', + content: 'Revenue is recognized when an order is paid.', + tags: ['finance'], + }); + + const search = await searchLocalKnowledgePages(project, { query: 'paid---', userId: 'local', limit: 5 }); + + expect(search[0]).toMatchObject({ + key: 'metrics/revenue', + matchReasons: expect.arrayContaining(['token']), + lanes: expect.arrayContaining([expect.objectContaining({ lane: 'token', status: 'available' })]), + }); + }); + + it('uses stored page embeddings when a wiki embedding backend is configured', async () => { + await writeLocalKnowledgePage(project, { + key: 'metrics/revenue', + scope: 'GLOBAL', + summary: 'Semantic revenue definition', + content: 'Revenue search text.', + tags: ['finance'], + }); + await writeLocalKnowledgePage(project, { + key: 'support/escalations', + scope: 'GLOBAL', + summary: 'Support escalation process', + content: 'Support search text.', + tags: ['operations'], + }); + + const search = await searchLocalKnowledgePages(project, { + query: 'semantic revenue', + userId: 'local', + limit: 5, + embeddingService: new FakeEmbeddingPort(), + }); + + expect(search[0]).toMatchObject({ + key: 'metrics/revenue', + matchReasons: expect.arrayContaining(['semantic']), + lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]), + }); + }); + + it('reports semantic lane as skipped when wiki embeddings are not configured', async () => { + await writeLocalKnowledgePage(project, { + key: 'metrics/revenue', + scope: 'GLOBAL', + summary: 'Revenue metric definition', + content: 'Revenue is recognized when an order is paid.', + tags: ['finance'], + }); + + const search = await searchLocalKnowledgePages(project, { query: 'revenue', userId: 'local', limit: 5 }); + + expect(search[0]?.lanes).toEqual( + expect.arrayContaining([ + expect.objectContaining({ lane: 'semantic', status: 'skipped', reason: 'embedding_unconfigured' }), + ]), + ); + }); + + it('prefers user knowledge over global pages with the same key', async () => { + await writeLocalKnowledgePage(project, { + key: 'handoff', + scope: 'GLOBAL', + summary: 'Global handoff', + content: 'Global context.', + }); + await writeLocalKnowledgePage(project, { + key: 'handoff', + scope: 'USER', + userId: 'agent-1', + summary: 'User handoff', + content: 'User context.', + }); + + await expect(readLocalKnowledgePage(project, { key: 'handoff', userId: 'agent-1' })).resolves.toMatchObject({ + scope: 'USER', + summary: 'User handoff', + }); + }); + + it('serializes historic-SQL frontmatter fields for global pages', async () => { + await writeLocalKnowledgePage(project, { + key: 'queries/monthly-paid-orders', + scope: 'GLOBAL', + summary: 'Monthly paid orders', + content: '## Monthly paid order count', + tags: ['historic-sql', 'query-pattern'], + slRefs: ['analytics.orders'], + source: 'historic-sql', + intent: 'Monthly paid order count', + tables: ['analytics.orders'], + representativeSql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + usage: { + executions: 42, + distinct_users: 3, + first_seen: '2026-02-01', + last_seen: '2026-05-04', + p50_runtime_ms: 100, + p95_runtime_ms: 200, + error_rate: 0, + rows_produced: 42, + }, + fingerprints: ['fp_paid_orders'], + }); + + const raw = await project.fileStore.readFile('knowledge/global/queries/monthly-paid-orders.md'); + expect(raw.content).toContain('source: historic-sql'); + expect(raw.content).toContain('intent: Monthly paid order count'); + expect(raw.content).toContain(['tables:', ' - analytics.orders'].join('\n')); + expect(raw.content).toContain("representative_sql: SELECT count(*) FROM analytics.orders WHERE status = 'paid'"); + expect(raw.content).toContain(['usage:', ' executions: 42', ' distinct_users: 3'].join('\n')); + expect(raw.content).toContain(['fingerprints:', ' - fp_paid_orders'].join('\n')); + }); + + it('falls back to Markdown scanning when the config does not select sqlite-fts5', async () => { + project.config.storage.search = 'postgres-hybrid'; + await writeLocalKnowledgePage(project, { + key: 'metrics/revenue', + scope: 'GLOBAL', + summary: 'Revenue metric definition', + content: 'Revenue is recognized when an order is paid.', + tags: ['finance'], + }); + + await expect(searchLocalKnowledgePages(project, { query: 'paid order', userId: 'local' })).resolves.toEqual([ + expect.objectContaining({ + key: 'metrics/revenue', + score: 3, + matchReasons: ['token'], + }), + ]); + }); + + it('rejects unsafe knowledge keys', async () => { + await expect( + writeLocalKnowledgePage(project, { + key: '../secret', + scope: 'GLOBAL', + summary: 'bad', + content: 'bad', + }), + ).rejects.toThrow('Unsafe knowledge key'); + }); +}); diff --git a/packages/context/src/wiki/local-knowledge.ts b/packages/context/src/wiki/local-knowledge.ts new file mode 100644 index 00000000..95a17ea1 --- /dev/null +++ b/packages/context/src/wiki/local-knowledge.ts @@ -0,0 +1,391 @@ +import { join } from 'node:path'; +import YAML from 'yaml'; +import type { KloEmbeddingPort, KloFileWriteResult } from '../core/index.js'; +import type { KloLocalProject } from '../project/index.js'; +import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js'; +import { buildKnowledgeSearchText } from './knowledge-search-text.js'; +import { SqliteKnowledgeIndex, type SqliteKnowledgeIndexPage } from './sqlite-knowledge-index.js'; +import type { HistoricSqlWikiUsageFrontmatter, WikiSearchLaneSummary, WikiSearchMatchReason } from './types.js'; + +export type LocalKnowledgeScope = 'GLOBAL' | 'USER'; + +export interface LocalKnowledgePage { + key: string; + path: string; + scope: LocalKnowledgeScope; + summary: string; + content: string; + tags: string[]; + refs: string[]; + slRefs: string[]; +} + +export interface LocalKnowledgeSummary { + key: string; + path: string; + scope: LocalKnowledgeScope; + summary: string; +} + +export interface LocalKnowledgeSearchResult extends LocalKnowledgeSummary { + score: number; + matchReasons: WikiSearchMatchReason[]; + lanes?: WikiSearchLaneSummary[]; +} + +export interface WriteLocalKnowledgePageInput { + key: string; + scope: LocalKnowledgeScope; + userId?: string; + summary: string; + content: string; + tags?: string[]; + refs?: string[]; + slRefs?: string[]; + source?: string; + intent?: string; + tables?: string[]; + representativeSql?: string; + usage?: HistoricSqlWikiUsageFrontmatter; + fingerprints?: string[]; +} + +const LOCAL_AUTHOR = 'klo'; +const LOCAL_AUTHOR_EMAIL = 'klo@example.com'; + +function assertSafePathToken(kind: string, value: string): string { + if ( + value.trim().length === 0 || + value.includes('..') || + value.includes('\\') || + value.startsWith('/') || + value.startsWith('.') || + value.includes('//') + ) { + throw new Error(`Unsafe ${kind}: ${value}`); + } + return value; +} + +function assertSafeKnowledgeKey(key: string): string { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_/-]*$/.test(key)) { + throw new Error(`Unsafe knowledge key: ${key}`); + } + return assertSafePathToken('knowledge key', key); +} + +function stringArray(value: unknown): string[] { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []; +} + +function knowledgePath(scope: LocalKnowledgeScope, userId: string | undefined, key: string): string { + const safeKey = assertSafeKnowledgeKey(key); + if (scope === 'GLOBAL') { + return `knowledge/global/${safeKey}.md`; + } + return `knowledge/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`; +} + +function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string { + const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${assertSafePathToken('user id', userId)}/`; + return path.slice(prefix.length).replace(/\.md$/, ''); +} + +function parseKnowledgePage(key: string, path: string, scope: LocalKnowledgeScope, raw: string): LocalKnowledgePage { + const match = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/); + if (!match) { + return { + key, + path, + scope, + summary: '', + content: raw.trim(), + tags: [], + refs: [], + slRefs: [], + }; + } + + const frontmatter = (YAML.parse(match[1]) ?? {}) as Record; + return { + key, + path, + scope, + summary: typeof frontmatter.summary === 'string' ? frontmatter.summary : '', + content: match[2].trim(), + tags: stringArray(frontmatter.tags), + refs: stringArray(frontmatter.refs), + slRefs: stringArray(frontmatter.sl_refs), + }; +} + +function serializeKnowledgePage(input: WriteLocalKnowledgePageInput): string { + const frontmatter = { + summary: input.summary, + tags: input.tags ?? [], + refs: input.refs ?? [], + sl_refs: input.slRefs ?? [], + usage_mode: 'auto', + ...(input.source === undefined ? {} : { source: input.source }), + ...(input.intent === undefined ? {} : { intent: input.intent }), + ...(input.tables === undefined ? {} : { tables: input.tables }), + ...(input.representativeSql === undefined ? {} : { representative_sql: input.representativeSql }), + ...(input.usage === undefined ? {} : { usage: input.usage }), + ...(input.fingerprints === undefined ? {} : { fingerprints: input.fingerprints }), + }; + return `---\n${YAML.stringify(frontmatter, { indent: 2, lineWidth: 0 }).trimEnd()}\n---\n\n${input.content.trim()}\n`; +} + +async function readPageAtPath( + project: KloLocalProject, + key: string, + path: string, + scope: LocalKnowledgeScope, +): Promise { + try { + const result = await project.fileStore.readFile(path); + return parseKnowledgePage(key, path, scope, result.content); + } catch { + return null; + } +} + +export async function writeLocalKnowledgePage( + project: KloLocalProject, + input: WriteLocalKnowledgePageInput, +): Promise { + const path = knowledgePath(input.scope, input.userId, input.key); + return project.fileStore.writeFile( + path, + serializeKnowledgePage(input), + LOCAL_AUTHOR, + LOCAL_AUTHOR_EMAIL, + `Write knowledge page: ${input.key}`, + ); +} + +export async function readLocalKnowledgePage( + project: KloLocalProject, + input: { key: string; userId?: string }, +): Promise { + const userPath = knowledgePath('USER', input.userId, input.key); + const userPage = await readPageAtPath(project, input.key, userPath, 'USER'); + if (userPage) { + return userPage; + } + return readPageAtPath(project, input.key, knowledgePath('GLOBAL', undefined, input.key), 'GLOBAL'); +} + +export async function listLocalKnowledgePages( + project: KloLocalProject, + input: { userId?: string } = {}, +): Promise { + const userId = input.userId ?? 'local'; + const pages: LocalKnowledgeSummary[] = []; + for (const scope of ['GLOBAL', 'USER'] as const) { + const root = scope === 'GLOBAL' ? 'knowledge/global' : `knowledge/user/${assertSafePathToken('user id', userId)}`; + const listed = await project.fileStore.listFiles(root); + for (const path of listed.files.filter((file) => file.endsWith('.md')).sort()) { + const key = keyFromKnowledgePath(path, scope, userId); + const page = await readPageAtPath(project, key, path, scope); + if (page) { + pages.push({ key, path, scope, summary: page.summary }); + } + } + } + return pages.sort((left, right) => left.path.localeCompare(right.path)); +} + +function scorePage(page: LocalKnowledgePage, terms: string[]): number { + const haystack = buildKnowledgeSearchText(page.key, page.summary, page.content, page.tags).toLowerCase(); + return terms.some((term) => haystack.includes(term)) ? 3 : 0; +} + +function sqliteKnowledgeDbPath(project: KloLocalProject): string { + return join(project.projectDir, '.klo', 'db.sqlite'); +} + +function pageSearchText(page: LocalKnowledgePage): string { + return buildKnowledgeSearchText(page.key, page.summary, page.content, page.tags); +} + +async function embeddingForPageSearchText( + searchText: string, + embeddingService: KloEmbeddingPort | null, +): Promise { + if (!embeddingService) { + return null; + } + return embeddingService.computeEmbedding(searchText); +} + +function tokenLaneCandidates(pages: LocalKnowledgePage[], terms: string[]) { + if (terms.length === 0) { + return []; + } + return pages + .map((page) => { + const haystack = pageSearchText(page).toLowerCase(); + const matched = terms.filter((term) => haystack.includes(term)).length; + return { page, score: matched / terms.length }; + }) + .filter((result) => result.score > 0) + .sort((left, right) => right.score - left.score || left.page.path.localeCompare(right.page.path)); +} + +async function loadAllKnowledgePages( + project: KloLocalProject, + input: { userId?: string } = {}, +): Promise { + const summaries = await listLocalKnowledgePages(project, { userId: input.userId }); + const pages: LocalKnowledgePage[] = []; + for (const summary of summaries) { + const page = await readPageAtPath(project, summary.key, summary.path, summary.scope); + if (page) { + pages.push(page); + } + } + return pages; +} + +async function searchLocalKnowledgePagesWithSqlite( + project: KloLocalProject, + input: { query: string; userId?: string; embeddingService?: KloEmbeddingPort | null; limit?: number }, +): Promise { + const pages = await loadAllKnowledgePages(project, { userId: input.userId }); + const byPath = new Map(pages.map((page) => [page.path, page])); + const embeddingService = input.embeddingService ?? null; + const index = new SqliteKnowledgeIndex({ dbPath: sqliteKnowledgeDbPath(project) }); + const existingPages = index.getExistingPages(); + const indexPages: SqliteKnowledgeIndexPage[] = []; + for (const page of pages) { + const searchText = pageSearchText(page); + const existing = existingPages.get(page.path); + const embedding = + existing?.searchText === searchText && existing.embedding + ? existing.embedding + : await embeddingForPageSearchText(searchText, embeddingService).catch(() => null); + indexPages.push({ + path: page.path, + key: page.key, + scope: page.scope, + summary: page.summary, + content: page.content, + tags: page.tags, + embedding, + }); + } + + index.sync(indexPages); + + const finalLimit = input.limit ?? Math.max(1, indexPages.length); + const core = new HybridSearchCore(); + const generators: SearchCandidateGenerator[] = [ + { + lane: 'lexical', + async generate(args) { + const rows = index.searchLexicalCandidates({ + queryText: args.queryText, + limit: args.laneCandidatePoolLimit, + }); + return { + candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })), + }; + }, + }, + { + lane: 'token', + async generate(args) { + const rows = tokenLaneCandidates(pages, args.normalizedQuery.terms).slice(0, args.laneCandidatePoolLimit); + return { + candidates: rows.map((row, index) => ({ + id: row.page.path, + rank: index + 1, + rawScore: row.score, + })), + }; + }, + }, + { + lane: 'semantic', + async generate(args) { + if (!embeddingService) { + return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' }; + } + try { + const queryEmbedding = await embeddingService.computeEmbedding(args.queryText); + const rows = index.searchSemanticCandidates({ + queryEmbedding, + limit: args.laneCandidatePoolLimit, + }); + return { + candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })), + }; + } catch (error) { + return { + status: 'skipped', + candidates: [], + reason: `embedding_unhealthy:${error instanceof Error ? error.message : String(error)}`, + }; + } + }, + }, + ]; + + const result = await core.search({ queryText: input.query, limit: finalLimit, generators }); + return result.results + .map((fused): LocalKnowledgeSearchResult | null => { + const page = byPath.get(fused.id); + return page + ? { + key: page.key, + path: page.path, + scope: page.scope, + summary: page.summary, + score: fused.score, + matchReasons: fused.matchReasons as WikiSearchMatchReason[], + lanes: result.lanes, + } + : null; + }) + .filter((result): result is LocalKnowledgeSearchResult => result !== null); +} + +async function searchLocalKnowledgePagesWithScan( + project: KloLocalProject, + input: { query: string; userId?: string; limit?: number }, +): Promise { + const terms = input.query + .toLowerCase() + .split(/\s+/) + .map((term) => term.trim()) + .filter(Boolean); + const pages = await loadAllKnowledgePages(project, { userId: input.userId }); + const results: LocalKnowledgeSearchResult[] = []; + for (const page of pages) { + const score = scorePage(page, terms); + if (score > 0) { + results.push({ + key: page.key, + path: page.path, + scope: page.scope, + summary: page.summary, + score, + matchReasons: ['token' as const], + }); + } + } + return results + .sort((left, right) => right.score - left.score || left.path.localeCompare(right.path)) + .slice(0, input.limit ?? results.length); +} + +export async function searchLocalKnowledgePages( + project: KloLocalProject, + input: { query: string; userId?: string; embeddingService?: KloEmbeddingPort | null; limit?: number }, +): Promise { + if (project.config.storage.search === 'sqlite-fts5') { + return searchLocalKnowledgePagesWithSqlite(project, input); + } + return searchLocalKnowledgePagesWithScan(project, input); +} diff --git a/packages/context/src/wiki/ports.ts b/packages/context/src/wiki/ports.ts new file mode 100644 index 00000000..21a2fa87 --- /dev/null +++ b/packages/context/src/wiki/ports.ts @@ -0,0 +1,68 @@ +import type { KloFileStorePort } from '../core/file-store.js'; + +export interface UpsertPageParams { + scope: string; + scopeId: string | null; + pageKey: string; + summary: string; + usageMode: string; + sortOrder: number; + searchText: string; + embedding: number[] | null; + contentHash?: string | null; + sourceRunId?: string | null; +} + +export interface KnowledgeIndexPort { + upsertPage(params: UpsertPageParams): Promise; + applyDiffTransactional(params: { + runId: string; + upserts: UpsertPageParams[]; + deletes: Array<{ scope: string; scopeId: string | null; pageKey: string }>; + }): Promise; + getExistingSearchTexts( + scope: string, + scopeId: string | null, + ): Promise>; + deleteStale(scope: string, scopeId: string | null, keepKeys: string[]): Promise; + deleteByScope(scope: string, scopeId: string | null): Promise; + deleteByKey(scope: string, scopeId: string | null, pageKey: string): Promise; + findPageByKey( + scope: string, + scopeId: string | null, + pageKey: string, + ): Promise<{ id?: string; page_key: string } | null | undefined>; + listPagesForUser( + userId: string, + ): Promise>; + getUserPageCount(userId: string): Promise; + incrementUsageCount(pageIds: string[]): Promise; + searchRRF( + userId: string, + queryEmbedding: number[] | null, + queryText: string, + limit: number, + ): Promise>; +} + +export interface KnowledgeEventPort { + createEvent(params: { + blockId: string | null; + eventType: string; + actorId: string; + chatId?: string | null; + messageId?: string | null; + payload: Record; + }): Promise; +} + +export interface KnowledgeGitDiffPort { + diffNameStatus( + fromSha: string, + toSha: string, + pathPrefix?: string, + ): Promise>; + getFileAtCommit(path: string, sha: string): Promise; +} + +export type WikiFileStorePort = KloFileStorePort; diff --git a/packages/context/src/wiki/sqlite-knowledge-index.test.ts b/packages/context/src/wiki/sqlite-knowledge-index.test.ts new file mode 100644 index 00000000..05509e12 --- /dev/null +++ b/packages/context/src/wiki/sqlite-knowledge-index.test.ts @@ -0,0 +1,115 @@ +import { access, mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { SqliteKnowledgeIndex, type SqliteKnowledgeIndexPage } from './sqlite-knowledge-index.js'; + +describe('SqliteKnowledgeIndex', () => { + let tempDir: string; + let dbPath: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'klo-sqlite-knowledge-index-')); + dbPath = join(tempDir, 'db.sqlite'); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + function page(overrides: Partial = {}): SqliteKnowledgeIndexPage { + return { + path: 'knowledge/global/revenue.md', + key: 'revenue', + scope: 'GLOBAL', + summary: 'Revenue definition', + content: 'Revenue is the sum of paid order amounts.', + tags: ['finance'], + embedding: null, + ...overrides, + }; + } + + it('creates a SQLite FTS5 index and returns lexical lane candidates', async () => { + const index = new SqliteKnowledgeIndex({ dbPath }); + + index.sync([ + page(), + page({ + path: 'knowledge/global/support.md', + key: 'support', + summary: 'Support queue', + content: 'Tickets are grouped by priority.', + tags: ['operations'], + }), + ]); + + await expect(access(dbPath)).resolves.toBeUndefined(); + expect(index.searchLexicalCandidates({ queryText: 'paid order', limit: 10 })).toEqual([ + expect.objectContaining({ + id: 'knowledge/global/revenue.md', + path: 'knowledge/global/revenue.md', + rank: 1, + rawScore: expect.any(Number), + }), + ]); + }); + + it('removes stale rows when the Markdown source list changes', () => { + const index = new SqliteKnowledgeIndex({ dbPath }); + index.rebuild([page(), page({ path: 'knowledge/global/churn.md', key: 'churn', content: 'Churn risk.' })]); + expect(index.search('churn', 10)).toHaveLength(1); + + index.rebuild([page()]); + + expect(index.search('churn', 10)).toEqual([]); + }); + + it('exposes existing search text and embedding state for incremental refresh', () => { + const index = new SqliteKnowledgeIndex({ dbPath }); + index.sync([page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [1, 0] })]); + + expect(index.getExistingPages()).toEqual( + new Map([ + [ + 'knowledge/global/revenue.md', + expect.objectContaining({ + searchText: expect.stringContaining('Revenue definition'), + embedding: [1, 0], + }), + ], + ]), + ); + }); + + it('returns semantic lane candidates from stored page embeddings', () => { + const index = new SqliteKnowledgeIndex({ dbPath }); + index.sync([ + page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [1, 0] }), + page({ path: 'knowledge/global/support.md', key: 'support', summary: 'Support queue', embedding: [0, 1] }), + ]); + + expect(index.searchSemanticCandidates({ queryEmbedding: [1, 0], limit: 10 })).toEqual([ + expect.objectContaining({ + id: 'knowledge/global/revenue.md', + path: 'knowledge/global/revenue.md', + rank: 1, + rawScore: 1, + }), + expect.objectContaining({ + id: 'knowledge/global/support.md', + path: 'knowledge/global/support.md', + rank: 2, + rawScore: 0, + }), + ]); + }); + + it('returns an empty result for blank or punctuation-only queries', () => { + const index = new SqliteKnowledgeIndex({ dbPath }); + index.rebuild([page()]); + + expect(index.search(' ', 10)).toEqual([]); + expect(index.search('---', 10)).toEqual([]); + }); +}); diff --git a/packages/context/src/wiki/sqlite-knowledge-index.ts b/packages/context/src/wiki/sqlite-knowledge-index.ts new file mode 100644 index 00000000..acadc02e --- /dev/null +++ b/packages/context/src/wiki/sqlite-knowledge-index.ts @@ -0,0 +1,276 @@ +import { mkdirSync } from 'node:fs'; +import { dirname } from 'node:path'; +import Database from 'better-sqlite3'; +import { buildKnowledgeSearchText } from './knowledge-search-text.js'; +import type { LocalKnowledgeScope } from './local-knowledge.js'; + +export interface SqliteKnowledgeIndexOptions { + dbPath: string; +} + +export interface SqliteKnowledgeIndexPage { + path: string; + key: string; + scope: LocalKnowledgeScope; + summary: string; + content: string; + tags: string[]; + embedding?: number[] | null; +} + +export interface SqliteKnowledgeIndexSearchResult { + path: string; + score: number; +} + +export interface WikiSqliteLaneCandidate { + id: string; + path: string; + rank: number; + rawScore: number; +} + +export interface ExistingKnowledgeIndexPage { + searchText: string; + embedding: number[] | null; +} + +interface SearchRow { + path: string; + rank: number; +} + +type IndexedPageRow = { + path: string; + embedding_json: string | null; +}; + +function cosineSimilarity(left: number[], right: number[]): number { + if (left.length === 0 || left.length !== right.length) { + return 0; + } + let dot = 0; + let leftNorm = 0; + let rightNorm = 0; + for (let i = 0; i < left.length; i++) { + const l = left[i] ?? 0; + const r = right[i] ?? 0; + dot += l * r; + leftNorm += l * l; + rightNorm += r * r; + } + if (leftNorm === 0 || rightNorm === 0) { + return 0; + } + return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm)); +} + +function scoreFromRank(rank: number): number { + return Number((1 / (1 + Math.abs(rank))).toFixed(6)); +} + +function parseEmbedding(raw: string | null): number[] | null { + if (!raw) { + return null; + } + try { + const embedding = JSON.parse(raw) as unknown; + return Array.isArray(embedding) && embedding.every((value) => typeof value === 'number') ? embedding : null; + } catch { + return null; + } +} + +function normalizeFtsQuery(query: string): string { + const terms = query + .toLowerCase() + .split(/[^a-z0-9_]+/u) + .map((term) => term.trim()) + .filter(Boolean); + + return [...new Set(terms)].map((term) => `"${term.replaceAll('"', '""')}"`).join(' OR '); +} + +export class SqliteKnowledgeIndex { + private readonly db: Database.Database; + + constructor(options: SqliteKnowledgeIndexOptions) { + mkdirSync(dirname(options.dbPath), { recursive: true }); + this.db = new Database(options.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('foreign_keys = ON'); + this.db.exec(` + CREATE TABLE IF NOT EXISTS knowledge_pages ( + path TEXT PRIMARY KEY, + key TEXT NOT NULL, + scope TEXT NOT NULL, + summary TEXT NOT NULL, + content TEXT NOT NULL, + tags TEXT NOT NULL, + search_text TEXT NOT NULL, + embedding_json TEXT + ); + + CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_pages_fts USING fts5( + path UNINDEXED, + key, + summary, + content, + tags + ); + `); + const columns = this.db.prepare('PRAGMA table_info(knowledge_pages)').all() as Array<{ name: string }>; + const columnNames = new Set(columns.map((column) => column.name)); + if (!columnNames.has('search_text')) { + this.db.exec("ALTER TABLE knowledge_pages ADD COLUMN search_text TEXT NOT NULL DEFAULT ''"); + } + if (!columnNames.has('embedding_json')) { + this.db.exec('ALTER TABLE knowledge_pages ADD COLUMN embedding_json TEXT'); + } + } + + sync(pages: SqliteKnowledgeIndexPage[]): void { + const keepPaths = pages.map((page) => page.path); + const clearPages = + keepPaths.length === 0 + ? this.db.prepare('DELETE FROM knowledge_pages') + : this.db.prepare(`DELETE FROM knowledge_pages WHERE path NOT IN (${keepPaths.map(() => '?').join(', ')})`); + const clearFts = + keepPaths.length === 0 + ? this.db.prepare('DELETE FROM knowledge_pages_fts') + : this.db.prepare(`DELETE FROM knowledge_pages_fts WHERE path NOT IN (${keepPaths.map(() => '?').join(', ')})`); + const upsertPage = this.db.prepare(` + INSERT INTO knowledge_pages (path, key, scope, summary, content, tags, search_text, embedding_json) + VALUES (@path, @key, @scope, @summary, @content, @tags, @searchText, @embeddingJson) + ON CONFLICT(path) DO UPDATE SET + key = excluded.key, + scope = excluded.scope, + summary = excluded.summary, + content = excluded.content, + tags = excluded.tags, + search_text = excluded.search_text, + embedding_json = excluded.embedding_json + `); + const deleteFts = this.db.prepare('DELETE FROM knowledge_pages_fts WHERE path = @path'); + const insertFts = this.db.prepare(` + INSERT INTO knowledge_pages_fts (path, key, summary, content, tags) + VALUES (@path, @key, @summary, @content, @tags) + `); + + const transaction = this.db.transaction((items: SqliteKnowledgeIndexPage[]) => { + clearPages.run(...keepPaths); + clearFts.run(...keepPaths); + for (const page of items) { + const searchText = buildKnowledgeSearchText(page.key, page.summary, page.content, page.tags); + const row = { + path: page.path, + key: page.key, + scope: page.scope, + summary: page.summary, + content: searchText, + tags: page.tags.join(' '), + searchText, + embeddingJson: page.embedding ? JSON.stringify(page.embedding) : null, + }; + upsertPage.run(row); + deleteFts.run(row); + insertFts.run(row); + } + }); + + transaction(pages); + } + + rebuild(pages: SqliteKnowledgeIndexPage[]): void { + this.sync(pages); + } + + getExistingPages(): Map { + const rows = this.db + .prepare( + ` + SELECT path, search_text, embedding_json + FROM knowledge_pages + ORDER BY path ASC + `, + ) + .all() as Array<{ path: string; search_text: string; embedding_json: string | null }>; + + return new Map( + rows.map((row) => [ + row.path, + { + searchText: row.search_text, + embedding: parseEmbedding(row.embedding_json), + }, + ]), + ); + } + + searchLexicalCandidates(input: { queryText: string; limit: number }): WikiSqliteLaneCandidate[] { + const ftsQuery = normalizeFtsQuery(input.queryText); + if (!ftsQuery) { + return []; + } + + const rows = this.db + .prepare( + ` + SELECT path, bm25(knowledge_pages_fts) AS rank + FROM knowledge_pages_fts + WHERE knowledge_pages_fts MATCH ? + ORDER BY rank ASC, path ASC + LIMIT ? + `, + ) + .all(ftsQuery, Math.max(1, input.limit)) as SearchRow[]; + + return rows.map((row, index) => ({ + id: row.path, + path: row.path, + rank: index + 1, + rawScore: Number(row.rank), + })); + } + + searchSemanticCandidates(input: { queryEmbedding: number[]; limit: number }): WikiSqliteLaneCandidate[] { + const rows = this.db + .prepare( + ` + SELECT path, embedding_json + FROM knowledge_pages + ORDER BY path ASC + `, + ) + .all() as IndexedPageRow[]; + + return rows + .flatMap((row) => { + if (!row.embedding_json) { + return []; + } + const embedding = parseEmbedding(row.embedding_json); + if (!embedding) { + return []; + } + return [ + { + id: row.path, + path: row.path, + rank: 0, + rawScore: cosineSimilarity(input.queryEmbedding, embedding), + }, + ]; + }) + .sort((left, right) => right.rawScore - left.rawScore || left.path.localeCompare(right.path)) + .slice(0, Math.max(1, input.limit)) + .map((candidate, index) => ({ ...candidate, rank: index + 1 })); + } + + search(query: string, limit: number): SqliteKnowledgeIndexSearchResult[] { + return this.searchLexicalCandidates({ queryText: query, limit }).map((row) => ({ + path: row.path, + score: scoreFromRank(row.rawScore), + })); + } +} diff --git a/packages/context/src/wiki/tools/index.ts b/packages/context/src/wiki/tools/index.ts new file mode 100644 index 00000000..8cc5ac9c --- /dev/null +++ b/packages/context/src/wiki/tools/index.ts @@ -0,0 +1,5 @@ +export { WikiListTagsTool } from './wiki-list-tags.tool.js'; +export { WikiReadTool } from './wiki-read.tool.js'; +export { WikiRemoveTool } from './wiki-remove.tool.js'; +export { WikiSearchTool } from './wiki-search.tool.js'; +export { WikiWriteTool } from './wiki-write.tool.js'; diff --git a/packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts b/packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts new file mode 100644 index 00000000..a47b5912 --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolContext } from '../../tools/index.js'; +import { WikiListTagsTool } from './wiki-list-tags.tool.js'; + +describe('WikiListTagsTool', () => { + const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' }; + + it("returns distinct sorted tags across the user's visible pages", async () => { + const pagesRepository = { + listPagesForUser: vi.fn().mockResolvedValue([ + { scope: 'GLOBAL', scope_id: null, page_key: 'k1' }, + { scope: 'USER', scope_id: 'u', page_key: 'k2' }, + ]), + }; + const wikiService = { + readPage: vi.fn().mockImplementation((_scope, _scopeId, key) => { + if (key === 'k1') { + return Promise.resolve({ frontmatter: { tags: ['metrics', 'finance'] }, content: '' }); + } + if (key === 'k2') { + return Promise.resolve({ frontmatter: { tags: ['metrics'] }, content: '' }); + } + return Promise.resolve(null); + }), + }; + const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any); + + const result = await tool.call({}, baseContext); + expect(result.markdown).toContain('finance'); + expect(result.markdown).toContain('metrics'); + expect(result.structured.tags).toEqual(['finance', 'metrics']); + }); + + it('returns a friendly message when no pages have tags', async () => { + const pagesRepository = { listPagesForUser: vi.fn().mockResolvedValue([]) }; + const wikiService = { readPage: vi.fn() }; + const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any); + + const result = await tool.call({}, baseContext); + expect(result.markdown).toMatch(/no tags/i); + }); +}); diff --git a/packages/context/src/wiki/tools/wiki-list-tags.tool.ts b/packages/context/src/wiki/tools/wiki-list-tags.tool.ts new file mode 100644 index 00000000..cd3c5aac --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-list-tags.tool.ts @@ -0,0 +1,49 @@ +import { z } from 'zod'; +import type { KnowledgeIndexPort } from '../ports.js'; +type BlockScope = 'GLOBAL' | 'USER'; +import { KnowledgeWikiService } from '../index.js'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js'; + +const wikiListTagsInputSchema = z.object({}); + +type WikiListTagsInput = z.infer; + +export class WikiListTagsTool extends BaseTool { + readonly name = 'wiki_list_tags'; + + constructor( + private readonly wikiService: KnowledgeWikiService, + private readonly pagesRepository: KnowledgeIndexPort, + ) { + super(); + } + + get description(): string { + return ` +List distinct topic tags across all wiki pages visible to the user. +Call before writing a new page so you can reuse existing tags consistently instead of coining near-duplicates. +`; + } + + get inputSchema() { + return wikiListTagsInputSchema; + } + + async call(_input: WikiListTagsInput, context: ToolContext): Promise> { + const pages = await this.pagesRepository.listPagesForUser(context.userId); + const set = new Set(); + for (const p of pages) { + const scope = p.scope as BlockScope; + const scopeId = scope === 'USER' ? p.scope_id : null; + const page = await this.wikiService.readPage(scope, scopeId, p.page_key); + for (const t of page?.frontmatter.tags ?? []) { + set.add(t); + } + } + const tags = [...set].sort(); + return { + markdown: tags.length === 0 ? '(no tags in use yet)' : tags.join(', '), + structured: { tags }, + }; + } +} diff --git a/packages/context/src/wiki/tools/wiki-read.tool.ts b/packages/context/src/wiki/tools/wiki-read.tool.ts new file mode 100644 index 00000000..73eb7e40 --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-read.tool.ts @@ -0,0 +1,82 @@ +import { z } from 'zod'; +import type { KnowledgeIndexPort } from '../ports.js'; +import { KnowledgeWikiService } from '../index.js'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js'; + +const WikiReadInputSchema = z.object({ + key: z + .string() + .describe('The block_key to read. Check the in the system prompt for available keys.'), +}); + +type WikiReadInput = z.infer; + +interface WikiReadStructured { + blockKey: string; + content: string; + scope: string; + found: boolean; + tags?: string[]; + refs?: string[]; +} + +export class WikiReadTool extends BaseTool { + readonly name = 'wiki_read'; + + constructor( + private readonly wikiService: KnowledgeWikiService, + private readonly pagesRepository: KnowledgeIndexPort, + ) { + super(); + } + + get description(): string { + return ( + 'Load the full content of a knowledge block by its key. ' + + 'Use this to retrieve detailed rules, preferences, or definitions listed in the . ' + + 'Call this when the user query relates to a topic covered by an available knowledge block.' + ); + } + + get inputSchema() { + return WikiReadInputSchema; + } + + async call(input: WikiReadInput, context: ToolContext): Promise> { + const page = await this.wikiService.readPageForUser(context.userId, input.key); + + if (!page) { + return { + markdown: `No knowledge block found with key "${input.key}".`, + structured: { blockKey: input.key, content: '', scope: '', found: false }, + }; + } + + const indexEntry = await this.pagesRepository.findPageByKey( + page.scope, + page.scope === 'USER' ? context.userId : null, + input.key, + ); + if (indexEntry?.id) { + void this.pagesRepository.incrementUsageCount([indexEntry.id]); + } + + let md = `## ${page.pageKey}\n\n${page.content}`; + const refs = page.frontmatter.refs; + if (refs && refs.length > 0) { + md += `\n\nSee also: ${refs.map((r) => `[[${r}]]`).join(', ')}`; + } + + return { + markdown: md, + structured: { + blockKey: page.pageKey, + content: page.content, + scope: page.scope, + found: true, + tags: page.frontmatter.tags, + refs: page.frontmatter.refs, + }, + }; + } +} diff --git a/packages/context/src/wiki/tools/wiki-remove.tool.test.ts b/packages/context/src/wiki/tools/wiki-remove.tool.test.ts new file mode 100644 index 00000000..7999dd26 --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-remove.tool.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources, type ToolContext } from '../../tools/index.js'; +import { WikiRemoveTool } from './wiki-remove.tool.js'; + +describe('WikiRemoveTool', () => { + const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' }; + + it('removes an existing page when no session is present', async () => { + const wikiService = { + deletePage: vi.fn().mockResolvedValue(undefined), + deleteFromIndex: vi.fn().mockResolvedValue(undefined), + }; + const pagesRepository = { + findPageByKey: vi.fn().mockResolvedValue({ page_key: 'old' }), + }; + const knowledgeRepository = { createEvent: vi.fn().mockResolvedValue(undefined) }; + const tool = new WikiRemoveTool(wikiService as any, pagesRepository as any, knowledgeRepository as any); + const result = await tool.call({ key: 'old' } as any, baseContext); + expect(wikiService.deletePage).toHaveBeenCalledTimes(1); + expect(wikiService.deleteFromIndex).toHaveBeenCalledTimes(1); + expect(result.markdown).toMatch(/removed/i); + }); + + it('skips deleteFromIndex when session is worktree-scoped', async () => { + const wikiService = { + deletePage: vi.fn().mockResolvedValue(undefined), + deleteFromIndex: vi.fn().mockResolvedValue(undefined), + }; + const pagesRepository = { findPageByKey: vi.fn().mockResolvedValue({ page_key: 'old' }) }; + const knowledgeRepository = { createEvent: vi.fn().mockResolvedValue(undefined) }; + const tool = new WikiRemoveTool(wikiService as any, pagesRepository as any, knowledgeRepository as any); + const session: ToolSession = { + connectionId: 'c', + isWorktreeScoped: true, + preHead: null, + touchedSlSources: createTouchedSlSources(), + actions: [], + semanticLayerService: {} as any, + wikiService: wikiService as any, + configService: {} as any, + gitService: {} as any, + }; + await tool.call({ key: 'old' } as any, { ...baseContext, session }); + expect(wikiService.deletePage).toHaveBeenCalledTimes(1); + expect(wikiService.deleteFromIndex).not.toHaveBeenCalled(); + expect(session.actions).toContainEqual(expect.objectContaining({ target: 'wiki', type: 'removed', key: 'old' })); + }); + + it('returns a friendly message when the page does not exist', async () => { + const wikiService = { deletePage: vi.fn(), deleteFromIndex: vi.fn() }; + const pagesRepository = { findPageByKey: vi.fn().mockResolvedValue(null) }; + const knowledgeRepository = { createEvent: vi.fn() }; + const tool = new WikiRemoveTool(wikiService as any, pagesRepository as any, knowledgeRepository as any); + const result = await tool.call({ key: 'missing' } as any, baseContext); + expect(result.structured.success).toBe(false); + expect(result.markdown).toMatch(/not found/i); + }); +}); diff --git a/packages/context/src/wiki/tools/wiki-remove.tool.ts b/packages/context/src/wiki/tools/wiki-remove.tool.ts new file mode 100644 index 00000000..0e5905ec --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-remove.tool.ts @@ -0,0 +1,85 @@ +import { z } from 'zod'; +import type { KnowledgeIndexPort } from '../ports.js'; +import type { KnowledgeEventPort } from '../ports.js'; +type BlockScope = 'GLOBAL' | 'USER'; +import { KnowledgeWikiService } from '../index.js'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js'; + +const SYSTEM_AUTHOR = 'System User'; +const SYSTEM_EMAIL = 'system@example.com'; + +const wikiRemoveInputSchema = z.object({ + key: z.string().describe('The page key to remove'), +}); + +type WikiRemoveInput = z.infer; + +interface WikiRemoveStructured { + success: boolean; + key: string; +} + +export class WikiRemoveTool extends BaseTool { + readonly name = 'wiki_remove'; + + constructor( + private readonly wikiService: KnowledgeWikiService, + private readonly pagesRepository: KnowledgeIndexPort, + private readonly knowledgeRepository: KnowledgeEventPort, + ) { + super(); + } + + get description(): string { + return `Remove a knowledge page that is no longer relevant.`; + } + + get inputSchema() { + return wikiRemoveInputSchema; + } + + async call(input: WikiRemoveInput, context: ToolContext): Promise> { + const wikiService = context.session?.wikiService ?? this.wikiService; + const writesGlobal = !!context.session; + const skipIndex = context.session?.isWorktreeScoped === true; + + const scope: BlockScope = writesGlobal ? 'GLOBAL' : 'USER'; + const scopeId = scope === 'USER' ? context.userId : null; + + const existing = await this.pagesRepository.findPageByKey(scope, scopeId, input.key); + if (!existing) { + return { + markdown: `Page "${input.key}" not found.`, + structured: { success: false, key: input.key }, + }; + } + + await wikiService.deletePage(scope, scopeId, input.key, SYSTEM_AUTHOR, SYSTEM_EMAIL); + if (!skipIndex) { + await wikiService.deleteFromIndex(scope, scopeId, input.key); + } + + await this.knowledgeRepository.createEvent({ + blockId: null, + eventType: 'BLOCK_REMOVED', + actorId: context.userId, + chatId: null, + messageId: null, + payload: { removedKey: input.key, blockKey: input.key }, + }); + + if (context.session) { + context.session.actions.push({ + target: 'wiki', + type: 'removed', + key: input.key, + detail: `Removed page "${input.key}"`, + }); + } + + return { + markdown: `Page "${input.key}" removed.`, + structured: { success: true, key: input.key }, + }; + } +} diff --git a/packages/context/src/wiki/tools/wiki-search.tool.test.ts b/packages/context/src/wiki/tools/wiki-search.tool.test.ts new file mode 100644 index 00000000..a15a78df --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-search.tool.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it, vi } from 'vitest'; +import { WikiSearchTool } from './wiki-search.tool.js'; + +describe('WikiSearchTool', () => { + it('searches through the injected wiki adapter port', async () => { + const search = vi.fn(async () => ({ + results: [ + { + key: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + scope: 'GLOBAL' as const, + summary: 'Revenue metric definition', + score: 0.02459016393442623, + matchReasons: ['lexical' as const, 'token' as const], + }, + ], + totalFound: 1, + })); + const tool = new WikiSearchTool({ search }); + + const result = await tool.call( + { query: 'paid order', limit: 5 }, + { sourceId: 'test', messageId: 'message-1', userId: 'agent' }, + ); + + expect(search).toHaveBeenCalledWith({ userId: 'agent', query: 'paid order', limit: 5 }); + expect(result.structured).toEqual({ + results: [ + { + blockKey: 'metrics/revenue', + path: 'knowledge/global/metrics/revenue.md', + summary: 'Revenue metric definition', + score: 0.02459016393442623, + matchReasons: ['lexical', 'token'], + }, + ], + totalFound: 1, + }); + expect(result.markdown).toContain('**metrics/revenue**'); + }); +}); diff --git a/packages/context/src/wiki/tools/wiki-search.tool.ts b/packages/context/src/wiki/tools/wiki-search.tool.ts new file mode 100644 index 00000000..6fcae1c5 --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-search.tool.ts @@ -0,0 +1,92 @@ +import { z } from 'zod'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js'; +import type { WikiSearchLaneSummary, WikiSearchMatchReason } from '../types.js'; + +const WikiSearchInputSchema = z.object({ + query: z.string().describe('Natural language search query to find relevant knowledge blocks.'), + limit: z.number().optional().default(10).describe('Maximum number of results to return (default 10).'), +}); + +type WikiSearchInput = z.infer; + +interface WikiSearchResult { + blockKey: string; + path: string; + summary: string; + score: number; + matchReasons?: WikiSearchMatchReason[]; + lanes?: WikiSearchLaneSummary[]; +} + +interface WikiSearchStructured { + results: WikiSearchResult[]; + totalFound: number; +} + +export interface WikiSearchAdapterPort { + search(input: { userId: string; query: string; limit: number }): Promise<{ + results: Array<{ + key: string; + path: string; + summary: string; + score: number; + matchReasons?: WikiSearchMatchReason[]; + lanes?: WikiSearchLaneSummary[]; + }>; + totalFound: number; + }>; +} + +export class WikiSearchTool extends BaseTool { + readonly name = 'wiki_search'; + + constructor(private readonly searchAdapter: WikiSearchAdapterPort) { + super(); + } + + get description(): string { + return ( + 'Search knowledge blocks by hybrid lexical, semantic, and token matching. ' + + 'Use this when you need to find knowledge on a topic not visible in the discovery index. ' + + 'Returns ranked summaries — use wiki_read to load the full content of specific results.' + ); + } + + get inputSchema() { + return WikiSearchInputSchema; + } + + async call(input: WikiSearchInput, context: ToolContext): Promise> { + const response = await this.searchAdapter.search({ + userId: context.userId, + query: input.query, + limit: input.limit, + }); + + if (response.results.length === 0) { + return { + markdown: `No knowledge blocks found matching "${input.query}".`, + structured: { results: [], totalFound: 0 }, + }; + } + + const lines = response.results.map((r, i) => `${i + 1}. **${r.key}**: ${r.summary}`); + + const structured: WikiSearchStructured = { + results: response.results.map((r) => ({ + blockKey: r.key, + path: r.path, + summary: r.summary, + score: r.score, + matchReasons: r.matchReasons, + lanes: r.lanes, + })), + totalFound: response.totalFound, + }; + + return { + markdown: `Found ${response.results.length} knowledge block(s):\n\n${lines.join('\n')}`, + structured, + }; + } +} diff --git a/packages/context/src/wiki/tools/wiki-write.tool.test.ts b/packages/context/src/wiki/tools/wiki-write.tool.test.ts new file mode 100644 index 00000000..3b51c6e3 --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-write.tool.test.ts @@ -0,0 +1,168 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources, type ToolContext } from '../../tools/index.js'; +import { WikiWriteTool } from './wiki-write.tool.js'; + +function makeTool(overrides: any = {}) { + const wikiService = { + readPage: vi.fn().mockResolvedValue(null), + writePage: vi.fn().mockResolvedValue(undefined), + syncSinglePage: vi.fn().mockResolvedValue(undefined), + ...overrides.wikiService, + }; + const pagesRepository = { + findPageByKey: vi.fn().mockResolvedValue(null), + getUserPageCount: vi.fn().mockResolvedValue(0), + ...overrides.pagesRepository, + }; + const knowledgeRepository = { + createEvent: vi.fn().mockResolvedValue(undefined), + ...overrides.knowledgeRepository, + }; + const tool = new WikiWriteTool(wikiService as any, pagesRepository as any, knowledgeRepository as any); + return { tool, wikiService, pagesRepository, knowledgeRepository }; +} + +describe('WikiWriteTool', () => { + const baseContext: ToolContext = { sourceId: 's', messageId: 'm', userId: 'u' }; + + it('creates a new page and indexes it when no session is present', async () => { + const { tool, wikiService } = makeTool(); + const result = await tool.call( + { key: 'leads-source', summary: 'Lead source definitions', content: '# Leads' } as any, + baseContext, + ); + expect(wikiService.writePage).toHaveBeenCalledTimes(1); + expect(wikiService.syncSinglePage).toHaveBeenCalledTimes(1); + expect(result.markdown).toMatch(/created/i); + }); + + it('skips syncSinglePage when session is worktree-scoped', async () => { + const { tool, wikiService } = makeTool(); + const session: ToolSession = { + connectionId: 'conn-1', + isWorktreeScoped: true, + preHead: null, + touchedSlSources: createTouchedSlSources(), + actions: [], + semanticLayerService: {} as any, + wikiService: wikiService as any, + configService: {} as any, + gitService: {} as any, + }; + const context: ToolContext = { ...baseContext, session }; + await tool.call({ key: 'k', summary: 's', content: '# x' } as any, context); + expect(wikiService.writePage).toHaveBeenCalledTimes(1); + expect(wikiService.syncSinglePage).not.toHaveBeenCalled(); + expect(session.actions).toContainEqual(expect.objectContaining({ target: 'wiki', type: 'created', key: 'k' })); + }); + + it('requires either content or replacements', async () => { + const { tool } = makeTool(); + const result = await tool.call({ key: 'k', summary: 's' } as any, baseContext); + expect(result.structured.success).toBe(false); + expect(result.markdown).toMatch(/content.*or.*replacements/i); + }); + + it('writes historic-SQL frontmatter fields', async () => { + const { tool, wikiService } = makeTool(); + + await tool.call( + { + key: 'queries/monthly-paid-orders', + summary: 'Monthly paid orders', + tags: ['historic-sql', 'query-pattern'], + sl_refs: ['analytics.orders'], + source: 'historic-sql', + intent: 'Monthly paid order count', + tables: ['analytics.orders'], + representative_sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + usage: { + executions: 42, + distinct_users: 3, + first_seen: '2026-02-01', + last_seen: '2026-05-04', + p50_runtime_ms: 100, + p95_runtime_ms: 200, + error_rate: 0, + rows_produced: 42, + }, + fingerprints: ['fp_paid_orders'], + content: '## Monthly paid order count', + } as any, + baseContext, + ); + + expect(wikiService.writePage.mock.calls[0][3]).toEqual({ + summary: 'Monthly paid orders', + usage_mode: 'auto', + sort_order: 0, + tags: ['historic-sql', 'query-pattern'], + refs: undefined, + sl_refs: ['analytics.orders'], + source: 'historic-sql', + intent: 'Monthly paid order count', + tables: ['analytics.orders'], + representative_sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + usage: { + executions: 42, + distinct_users: 3, + first_seen: '2026-02-01', + last_seen: '2026-05-04', + p50_runtime_ms: 100, + p95_runtime_ms: 200, + error_rate: 0, + rows_produced: 42, + }, + fingerprints: ['fp_paid_orders'], + }); + }); + + it('preserves historic-SQL frontmatter fields when update omits them', async () => { + const existingFrontmatter = { + summary: 'Monthly paid orders', + usage_mode: 'auto' as const, + sort_order: 0, + tags: ['historic-sql'], + sl_refs: ['analytics.orders'], + source: 'historic-sql', + intent: 'Monthly paid order count', + tables: ['analytics.orders'], + representative_sql: "SELECT count(*) FROM analytics.orders WHERE status = 'paid'", + usage: { + executions: 42, + distinct_users: 3, + first_seen: '2026-02-01', + last_seen: '2026-05-04', + p50_runtime_ms: 100, + p95_runtime_ms: 200, + error_rate: 0, + rows_produced: 42, + }, + fingerprints: ['fp_paid_orders'], + }; + const { tool, wikiService } = makeTool({ + wikiService: { + readPage: vi.fn().mockResolvedValue({ + pageKey: 'queries/monthly-paid-orders', + frontmatter: existingFrontmatter, + content: 'old body', + }), + }, + }); + + await tool.call( + { + key: 'queries/monthly-paid-orders', + summary: 'Monthly paid orders updated', + content: '## Monthly paid order count updated', + } as any, + baseContext, + ); + + expect(wikiService.writePage.mock.calls[0][3]).toEqual({ + ...existingFrontmatter, + summary: 'Monthly paid orders updated', + }); + }); +}); diff --git a/packages/context/src/wiki/tools/wiki-write.tool.ts b/packages/context/src/wiki/tools/wiki-write.tool.ts new file mode 100644 index 00000000..f0ba954d --- /dev/null +++ b/packages/context/src/wiki/tools/wiki-write.tool.ts @@ -0,0 +1,167 @@ +import { z } from 'zod'; +import type { KnowledgeIndexPort } from '../ports.js'; +import type { KnowledgeEventPort } from '../ports.js'; +type BlockScope = 'GLOBAL' | 'USER'; +import { KnowledgeWikiService, type WikiFrontmatter } from '../index.js'; +import { applySqlEdits } from '../../tools/sql-edit-replacer.js'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js'; + +const MAX_USER_BLOCKS = 100; +const SYSTEM_AUTHOR = 'System User'; +const SYSTEM_EMAIL = 'system@example.com'; + +const historicSqlUsageFrontmatterSchema = z.object({ + executions: z.number().int().nonnegative(), + distinct_users: z.number().int().nonnegative(), + first_seen: z.string().min(1), + last_seen: z.string().min(1), + p50_runtime_ms: z.number().nonnegative().nullable(), + p95_runtime_ms: z.number().nonnegative().nullable(), + error_rate: z.number().min(0).max(1), + rows_produced: z.number().int().nonnegative().optional(), +}); + +const wikiWriteInputSchema = z.object({ + key: z.string().max(120), + summary: z.string().max(200), + content: z.string().max(4000).optional(), + replacements: z + .array(z.object({ oldText: z.string(), newText: z.string(), reason: z.string().optional() })) + .optional(), + tags: z.array(z.string()).optional(), + refs: z.array(z.string()).optional(), + sl_refs: z.array(z.string()).optional(), + source: z.string().optional(), + intent: z.string().optional(), + tables: z.array(z.string()).optional(), + representative_sql: z.string().optional(), + usage: historicSqlUsageFrontmatterSchema.optional(), + fingerprints: z.array(z.string()).optional(), +}); + +type WikiWriteInput = z.infer; + +interface WikiWriteStructured { + success: boolean; + key: string; + action?: 'created' | 'updated'; +} + +export class WikiWriteTool extends BaseTool { + readonly name = 'wiki_write'; + + constructor( + private readonly wikiService: KnowledgeWikiService, + private readonly pagesRepository: KnowledgeIndexPort, + private readonly knowledgeRepository: KnowledgeEventPort, + ) { + super(); + } + + get description(): string { + return ` +Create or update a knowledge page. Provide content for create/rewrite, or replacements for targeted edits. +tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to clear, [values] to set. +`; + } + + get inputSchema() { + return wikiWriteInputSchema; + } + + async call(input: WikiWriteInput, context: ToolContext): Promise> { + const wikiService = context.session?.wikiService ?? this.wikiService; + const writesGlobal = !!context.session; + const skipIndex = context.session?.isWorktreeScoped === true; + + if (!input.content && (!input.replacements || input.replacements.length === 0)) { + return { + markdown: 'Error: provide either content (for create/rewrite) or replacements (for edits).', + structured: { success: false, key: input.key }, + }; + } + + const scope: BlockScope = writesGlobal ? 'GLOBAL' : 'USER'; + const scopeId = scope === 'USER' ? context.userId : null; + const existing = await wikiService.readPage(scope, scopeId, input.key); + + if (!existing && !input.content) { + return { + markdown: `Page "${input.key}" does not exist. Provide content to create it.`, + structured: { success: false, key: input.key }, + }; + } + + if (scope === 'USER' && !existing) { + const count = await this.pagesRepository.getUserPageCount(context.userId); + if (count >= MAX_USER_BLOCKS) { + return { + markdown: `Cannot create "${input.key}": user has reached the limit of ${MAX_USER_BLOCKS} pages.`, + structured: { success: false, key: input.key }, + }; + } + } + + const existingFm = existing?.frontmatter; + const resolvedTags = input.tags === undefined ? existingFm?.tags : input.tags; + const resolvedRefs = input.refs === undefined ? existingFm?.refs : input.refs; + const resolvedSlRefs = input.sl_refs === undefined ? existingFm?.sl_refs : input.sl_refs; + + let finalContent: string; + const finalFm: WikiFrontmatter = { + summary: input.summary, + usage_mode: existingFm?.usage_mode ?? 'auto', + sort_order: existingFm?.sort_order ?? 0, + tags: resolvedTags, + refs: resolvedRefs, + sl_refs: resolvedSlRefs, + source: input.source === undefined ? existingFm?.source : input.source, + intent: input.intent === undefined ? existingFm?.intent : input.intent, + tables: input.tables === undefined ? existingFm?.tables : input.tables, + representative_sql: + input.representative_sql === undefined ? existingFm?.representative_sql : input.representative_sql, + usage: input.usage === undefined ? existingFm?.usage : input.usage, + fingerprints: input.fingerprints === undefined ? existingFm?.fingerprints : input.fingerprints, + }; + + if (input.content) { + finalContent = input.content; + } else { + const editResult = applySqlEdits(existing?.content ?? '', input.replacements ?? []); + if (!editResult.success) { + return { + markdown: `Edit errors: ${editResult.errors.join('; ')}`, + structured: { success: false, key: input.key }, + }; + } + finalContent = editResult.sql; + } + + await wikiService.writePage(scope, scopeId, input.key, finalFm, finalContent, SYSTEM_AUTHOR, SYSTEM_EMAIL); + if (!skipIndex) { + await wikiService.syncSinglePage(scope, scopeId, input.key, finalFm, finalContent); + } + + await this.knowledgeRepository.createEvent({ + blockId: null, + eventType: existing ? 'BLOCK_UPDATED' : 'BLOCK_CREATED', + actorId: context.userId, + chatId: null, + messageId: null, + payload: { + pageKey: input.key, + previousContent: existing ? existing.content.slice(0, 500) : null, + }, + }); + + const action = existing ? 'updated' : 'created'; + if (context.session) { + context.session.actions.push({ target: 'wiki', type: action, key: input.key, detail: input.summary }); + } + + return { + markdown: `Page "${input.key}" ${action}.`, + structured: { success: true, key: input.key, action }, + }; + } +} diff --git a/packages/context/src/wiki/types.ts b/packages/context/src/wiki/types.ts new file mode 100644 index 00000000..cd11d49b --- /dev/null +++ b/packages/context/src/wiki/types.ts @@ -0,0 +1,55 @@ +export type WikiScope = 'GLOBAL' | 'USER'; + +export interface HistoricSqlWikiUsageFrontmatter { + executions: number; + distinct_users: number; + first_seen: string; + last_seen: string; + p50_runtime_ms: number | null; + p95_runtime_ms: number | null; + error_rate: number; + rows_produced?: number; +} + +export interface WikiFrontmatter { + summary: string; + tags?: string[]; + refs?: string[]; + sl_refs?: string[]; + usage_mode: 'always' | 'auto' | 'never'; + sort_order?: number; + source?: string; + intent?: string; + tables?: string[]; + representative_sql?: string; + usage?: HistoricSqlWikiUsageFrontmatter; + fingerprints?: string[]; +} + +export interface WikiPage { + pageKey: string; + frontmatter: WikiFrontmatter; + content: string; +} + +export interface WikiPageWithScope extends WikiPage { + scope: WikiScope; +} + +export type WikiSearchMatchReason = 'lexical' | 'semantic' | 'token' | (string & {}); + +export interface WikiSearchLaneSummary { + lane: string; + status: 'available' | 'skipped' | 'failed'; + requestedCandidatePoolLimit: number; + effectiveCandidatePoolLimit: number; + returnedCandidateCount: number; + weight: number; + reason?: string; +} + +export interface WikiSearchMetadata { + score: number; + matchReasons: WikiSearchMatchReason[]; + lanes?: WikiSearchLaneSummary[]; +} diff --git a/packages/context/test/fixtures/lookml/extends-chain/orders.model.lkml b/packages/context/test/fixtures/lookml/extends-chain/orders.model.lkml new file mode 100644 index 00000000..2ad1eebb --- /dev/null +++ b/packages/context/test/fixtures/lookml/extends-chain/orders.model.lkml @@ -0,0 +1,5 @@ +connection: "my_bq" + +include: "views/*.view.lkml" + +explore: orders_ext {} diff --git a/packages/context/test/fixtures/lookml/extends-chain/views/base.view.lkml b/packages/context/test/fixtures/lookml/extends-chain/views/base.view.lkml new file mode 100644 index 00000000..eccfcf00 --- /dev/null +++ b/packages/context/test/fixtures/lookml/extends-chain/views/base.view.lkml @@ -0,0 +1,11 @@ +view: base { + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: created_at { + type: time + sql: ${TABLE}.created_at ;; + } +} diff --git a/packages/context/test/fixtures/lookml/extends-chain/views/orders.view.lkml b/packages/context/test/fixtures/lookml/extends-chain/views/orders.view.lkml new file mode 100644 index 00000000..4e1b7172 --- /dev/null +++ b/packages/context/test/fixtures/lookml/extends-chain/views/orders.view.lkml @@ -0,0 +1,12 @@ +view: orders { + extends: [base] + sql_table_name: public.orders ;; + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + measure: gross { + type: sum + sql: ${amount} ;; + } +} diff --git a/packages/context/test/fixtures/lookml/extends-chain/views/orders_ext.view.lkml b/packages/context/test/fixtures/lookml/extends-chain/views/orders_ext.view.lkml new file mode 100644 index 00000000..f58a2226 --- /dev/null +++ b/packages/context/test/fixtures/lookml/extends-chain/views/orders_ext.view.lkml @@ -0,0 +1,7 @@ +view: orders_ext { + extends: [orders] + measure: refund { + type: sum + sql: ${TABLE}.refund_amount ;; + } +} diff --git a/packages/context/test/fixtures/lookml/multi-model/marketing.model.lkml b/packages/context/test/fixtures/lookml/multi-model/marketing.model.lkml new file mode 100644 index 00000000..1ec26085 --- /dev/null +++ b/packages/context/test/fixtures/lookml/multi-model/marketing.model.lkml @@ -0,0 +1,11 @@ +connection: "my_bq" + +include: "views/shared_dims.view.lkml" +include: "views/campaigns.view.lkml" + +explore: campaigns { + join: shared_dims { + relationship: many_to_one + sql_on: ${campaigns.region_id} = ${shared_dims.id} ;; + } +} diff --git a/packages/context/test/fixtures/lookml/multi-model/orders.model.lkml b/packages/context/test/fixtures/lookml/multi-model/orders.model.lkml new file mode 100644 index 00000000..65a89082 --- /dev/null +++ b/packages/context/test/fixtures/lookml/multi-model/orders.model.lkml @@ -0,0 +1,11 @@ +connection: "my_bq" + +include: "views/shared_dims.view.lkml" +include: "views/orders.view.lkml" + +explore: orders { + join: shared_dims { + relationship: many_to_one + sql_on: ${orders.region_id} = ${shared_dims.id} ;; + } +} diff --git a/packages/context/test/fixtures/lookml/multi-model/views/campaigns.view.lkml b/packages/context/test/fixtures/lookml/multi-model/views/campaigns.view.lkml new file mode 100644 index 00000000..e1481086 --- /dev/null +++ b/packages/context/test/fixtures/lookml/multi-model/views/campaigns.view.lkml @@ -0,0 +1,16 @@ +view: campaigns { + sql_table_name: public.campaigns ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: region_id { + type: number + sql: ${TABLE}.region_id ;; + } + measure: spend { + type: sum + sql: ${TABLE}.spend_cents ;; + } +} diff --git a/packages/context/test/fixtures/lookml/multi-model/views/orders.view.lkml b/packages/context/test/fixtures/lookml/multi-model/views/orders.view.lkml new file mode 100644 index 00000000..4cc9240f --- /dev/null +++ b/packages/context/test/fixtures/lookml/multi-model/views/orders.view.lkml @@ -0,0 +1,15 @@ +view: orders { + sql_table_name: public.orders ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: region_id { + type: number + sql: ${TABLE}.region_id ;; + } + measure: count { + type: count + } +} diff --git a/packages/context/test/fixtures/lookml/multi-model/views/shared_dims.view.lkml b/packages/context/test/fixtures/lookml/multi-model/views/shared_dims.view.lkml new file mode 100644 index 00000000..a57de93f --- /dev/null +++ b/packages/context/test/fixtures/lookml/multi-model/views/shared_dims.view.lkml @@ -0,0 +1,12 @@ +view: shared_dims { + sql_table_name: public.regions ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: name { + type: string + sql: ${TABLE}.name ;; + } +} diff --git a/packages/context/test/fixtures/lookml/single-model/orders.model.lkml b/packages/context/test/fixtures/lookml/single-model/orders.model.lkml new file mode 100644 index 00000000..e976c794 --- /dev/null +++ b/packages/context/test/fixtures/lookml/single-model/orders.model.lkml @@ -0,0 +1,10 @@ +connection: "my_bq" + +include: "views/*.view.lkml" + +explore: orders { + join: customers { + relationship: many_to_one + sql_on: ${orders.customer_id} = ${customers.id} ;; + } +} diff --git a/packages/context/test/fixtures/lookml/single-model/views/customers.view.lkml b/packages/context/test/fixtures/lookml/single-model/views/customers.view.lkml new file mode 100644 index 00000000..8c5d8d70 --- /dev/null +++ b/packages/context/test/fixtures/lookml/single-model/views/customers.view.lkml @@ -0,0 +1,12 @@ +view: customers { + sql_table_name: public.customers ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: name { + type: string + sql: ${TABLE}.name ;; + } +} diff --git a/packages/context/test/fixtures/lookml/single-model/views/orders.view.lkml b/packages/context/test/fixtures/lookml/single-model/views/orders.view.lkml new file mode 100644 index 00000000..2f07431e --- /dev/null +++ b/packages/context/test/fixtures/lookml/single-model/views/orders.view.lkml @@ -0,0 +1,20 @@ +view: orders { + sql_table_name: public.orders ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: customer_id { + type: number + sql: ${TABLE}.customer_id ;; + } + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + measure: gross_amount { + type: sum + sql: ${amount} ;; + } +} diff --git a/packages/context/test/fixtures/lookml/three-churn/billing.model.lkml b/packages/context/test/fixtures/lookml/three-churn/billing.model.lkml new file mode 100644 index 00000000..63ff7ef3 --- /dev/null +++ b/packages/context/test/fixtures/lookml/three-churn/billing.model.lkml @@ -0,0 +1,5 @@ +connection: "my_pg" + +include: "views/billing/billing_churn_risk.view.lkml" + +explore: billing {} diff --git a/packages/context/test/fixtures/lookml/three-churn/customers.model.lkml b/packages/context/test/fixtures/lookml/three-churn/customers.model.lkml new file mode 100644 index 00000000..d3832b19 --- /dev/null +++ b/packages/context/test/fixtures/lookml/three-churn/customers.model.lkml @@ -0,0 +1,5 @@ +connection: "my_pg" + +include: "views/customers/customer_churn_risk.view.lkml" + +explore: customers {} diff --git a/packages/context/test/fixtures/lookml/three-churn/support.model.lkml b/packages/context/test/fixtures/lookml/three-churn/support.model.lkml new file mode 100644 index 00000000..da764a78 --- /dev/null +++ b/packages/context/test/fixtures/lookml/three-churn/support.model.lkml @@ -0,0 +1,5 @@ +connection: "my_pg" + +include: "views/support/support_churn_risk.view.lkml" + +explore: support {} diff --git a/packages/context/test/fixtures/lookml/three-churn/views/billing/billing_churn_risk.view.lkml b/packages/context/test/fixtures/lookml/three-churn/views/billing/billing_churn_risk.view.lkml new file mode 100644 index 00000000..47620fe4 --- /dev/null +++ b/packages/context/test/fixtures/lookml/three-churn/views/billing/billing_churn_risk.view.lkml @@ -0,0 +1,16 @@ +view: billing { + sql_table_name: billing ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: past_due_days { + type: number + sql: ${TABLE}.past_due_days ;; + } + measure: churn_risk_score { + type: average + sql: LEAST(1.0, ${past_due_days} / 90.0) ;; + } +} diff --git a/packages/context/test/fixtures/lookml/three-churn/views/customers/customer_churn_risk.view.lkml b/packages/context/test/fixtures/lookml/three-churn/views/customers/customer_churn_risk.view.lkml new file mode 100644 index 00000000..c2f1a9b6 --- /dev/null +++ b/packages/context/test/fixtures/lookml/three-churn/views/customers/customer_churn_risk.view.lkml @@ -0,0 +1,16 @@ +view: customers { + sql_table_name: customers ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: engagement_score { + type: number + sql: ${TABLE}.engagement_score ;; + } + measure: churn_risk_score { + type: average + sql: 1 - ${engagement_score} / 100.0 ;; + } +} diff --git a/packages/context/test/fixtures/lookml/three-churn/views/support/support_churn_risk.view.lkml b/packages/context/test/fixtures/lookml/three-churn/views/support/support_churn_risk.view.lkml new file mode 100644 index 00000000..0cf633f0 --- /dev/null +++ b/packages/context/test/fixtures/lookml/three-churn/views/support/support_churn_risk.view.lkml @@ -0,0 +1,16 @@ +view: support { + sql_table_name: support ;; + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + dimension: tickets_open { + type: number + sql: ${TABLE}.tickets_open ;; + } + measure: churn_risk_pct { + type: average + sql: CASE WHEN ${tickets_open} > 5 THEN 1.0 ELSE 0.1 * ${tickets_open} END ;; + } +} diff --git a/packages/context/test/fixtures/metabase/card-ref/cards/10.json b/packages/context/test/fixtures/metabase/card-ref/cards/10.json new file mode 100644 index 00000000..002a7569 --- /dev/null +++ b/packages/context/test/fixtures/metabase/card-ref/cards/10.json @@ -0,0 +1,15 @@ +{ + "metabaseId": 10, + "name": "Base revenue", + "description": null, + "type": "model", + "databaseId": 42, + "collectionId": 5, + "archived": false, + "resolvedSql": "SELECT day, SUM(amount_cents) AS revenue_cents FROM public.orders GROUP BY 1", + "templateTags": [], + "resultMetadata": [], + "collectionPath": ["Orders Team"], + "referencedCardIds": [], + "resolutionStatus": "resolved" +} diff --git a/packages/context/test/fixtures/metabase/card-ref/cards/11.json b/packages/context/test/fixtures/metabase/card-ref/cards/11.json new file mode 100644 index 00000000..e4639fb2 --- /dev/null +++ b/packages/context/test/fixtures/metabase/card-ref/cards/11.json @@ -0,0 +1,15 @@ +{ + "metabaseId": 11, + "name": "Revenue filtered", + "description": null, + "type": "question", + "databaseId": 42, + "collectionId": 5, + "archived": false, + "resolvedSql": "SELECT day, revenue_cents FROM ({{#10}}) WHERE revenue_cents > 0", + "templateTags": [{ "name": "base", "type": "card", "defaultValue": null, "cardReference": 10 }], + "resultMetadata": [], + "collectionPath": ["Orders Team"], + "referencedCardIds": [10], + "resolutionStatus": "resolved" +} diff --git a/packages/context/test/fixtures/metabase/card-ref/collections/5.json b/packages/context/test/fixtures/metabase/card-ref/collections/5.json new file mode 100644 index 00000000..0fcd409c --- /dev/null +++ b/packages/context/test/fixtures/metabase/card-ref/collections/5.json @@ -0,0 +1,5 @@ +{ + "metabaseId": 5, + "name": "Orders Team", + "parentId": "root" +} diff --git a/packages/context/test/fixtures/metabase/card-ref/databases/42.json b/packages/context/test/fixtures/metabase/card-ref/databases/42.json new file mode 100644 index 00000000..49b8604a --- /dev/null +++ b/packages/context/test/fixtures/metabase/card-ref/databases/42.json @@ -0,0 +1,6 @@ +{ + "metabaseDatabaseId": 42, + "metabaseDatabaseName": "Analytics", + "metabaseEngine": "postgres", + "targetConnectionId": "b2c3d4e5-f6a7-4890-abcd-ef0123456789" +} diff --git a/packages/context/test/fixtures/metabase/card-ref/sync-config.json b/packages/context/test/fixtures/metabase/card-ref/sync-config.json new file mode 100644 index 00000000..bb1bacae --- /dev/null +++ b/packages/context/test/fixtures/metabase/card-ref/sync-config.json @@ -0,0 +1,13 @@ +{ + "metabaseConnectionId": "a1b2c3d4-e5f6-4789-9abc-def012345678", + "metabaseDatabaseId": 42, + "syncMode": "ALL", + "selections": [], + "defaultTagNames": [], + "mapping": { + "metabaseDatabaseId": 42, + "metabaseDatabaseName": "Analytics", + "metabaseEngine": "postgres", + "targetConnectionId": "b2c3d4e5-f6a7-4890-abcd-ef0123456789" + } +} diff --git a/packages/context/test/fixtures/metabase/multi-collection/cards/1.json b/packages/context/test/fixtures/metabase/multi-collection/cards/1.json new file mode 100644 index 00000000..8671f160 --- /dev/null +++ b/packages/context/test/fixtures/metabase/multi-collection/cards/1.json @@ -0,0 +1,32 @@ +{ + "metabaseId": 1, + "name": "Daily orders", + "description": "Orders rolled up by day", + "type": "model", + "databaseId": 42, + "collectionId": 5, + "archived": false, + "resolvedSql": "SELECT date_trunc('day', created_at) AS day, COUNT(*) AS order_count FROM public.orders GROUP BY 1", + "templateTags": [], + "resultMetadata": [ + { + "name": "day", + "display_name": "Day", + "base_type": "type/DateTime", + "semantic_type": "type/CreationTimestamp", + "description": null, + "fk_target_field_id": null + }, + { + "name": "order_count", + "display_name": "Count", + "base_type": "type/Integer", + "semantic_type": null, + "description": null, + "fk_target_field_id": null + } + ], + "collectionPath": ["Orders Team"], + "referencedCardIds": [], + "resolutionStatus": "resolved" +} diff --git a/packages/context/test/fixtures/metabase/multi-collection/cards/2.json b/packages/context/test/fixtures/metabase/multi-collection/cards/2.json new file mode 100644 index 00000000..504896c2 --- /dev/null +++ b/packages/context/test/fixtures/metabase/multi-collection/cards/2.json @@ -0,0 +1,32 @@ +{ + "metabaseId": 2, + "name": "Revenue by day", + "description": null, + "type": "model", + "databaseId": 42, + "collectionId": 5, + "archived": false, + "resolvedSql": "SELECT date_trunc('day', created_at) AS day, SUM(amount_cents) AS revenue_cents FROM public.orders GROUP BY 1", + "templateTags": [], + "resultMetadata": [ + { + "name": "day", + "display_name": "Day", + "base_type": "type/DateTime", + "semantic_type": "type/CreationTimestamp", + "description": null, + "fk_target_field_id": null + }, + { + "name": "revenue_cents", + "display_name": "Revenue (cents)", + "base_type": "type/Integer", + "semantic_type": null, + "description": null, + "fk_target_field_id": null + } + ], + "collectionPath": ["Orders Team"], + "referencedCardIds": [], + "resolutionStatus": "resolved" +} diff --git a/packages/context/test/fixtures/metabase/multi-collection/cards/3.json b/packages/context/test/fixtures/metabase/multi-collection/cards/3.json new file mode 100644 index 00000000..2ef2921e --- /dev/null +++ b/packages/context/test/fixtures/metabase/multi-collection/cards/3.json @@ -0,0 +1,32 @@ +{ + "metabaseId": 3, + "name": "Campaign clicks", + "description": "Daily campaign click-through", + "type": "model", + "databaseId": 42, + "collectionId": 6, + "archived": false, + "resolvedSql": "SELECT date_trunc('day', click_time) AS day, COUNT(*) AS click_count FROM public.campaign_clicks GROUP BY 1", + "templateTags": [], + "resultMetadata": [ + { + "name": "day", + "display_name": "Day", + "base_type": "type/DateTime", + "semantic_type": "type/CreationTimestamp", + "description": null, + "fk_target_field_id": null + }, + { + "name": "click_count", + "display_name": "Clicks", + "base_type": "type/Integer", + "semantic_type": null, + "description": null, + "fk_target_field_id": null + } + ], + "collectionPath": ["Marketing"], + "referencedCardIds": [], + "resolutionStatus": "resolved" +} diff --git a/packages/context/test/fixtures/metabase/multi-collection/collections/5.json b/packages/context/test/fixtures/metabase/multi-collection/collections/5.json new file mode 100644 index 00000000..65b3ca3f --- /dev/null +++ b/packages/context/test/fixtures/metabase/multi-collection/collections/5.json @@ -0,0 +1 @@ +{ "metabaseId": 5, "name": "Orders Team", "parentId": "root" } diff --git a/packages/context/test/fixtures/metabase/multi-collection/collections/6.json b/packages/context/test/fixtures/metabase/multi-collection/collections/6.json new file mode 100644 index 00000000..a9f2041b --- /dev/null +++ b/packages/context/test/fixtures/metabase/multi-collection/collections/6.json @@ -0,0 +1 @@ +{ "metabaseId": 6, "name": "Marketing", "parentId": "root" } diff --git a/packages/context/test/fixtures/metabase/multi-collection/databases/42.json b/packages/context/test/fixtures/metabase/multi-collection/databases/42.json new file mode 100644 index 00000000..49b8604a --- /dev/null +++ b/packages/context/test/fixtures/metabase/multi-collection/databases/42.json @@ -0,0 +1,6 @@ +{ + "metabaseDatabaseId": 42, + "metabaseDatabaseName": "Analytics", + "metabaseEngine": "postgres", + "targetConnectionId": "b2c3d4e5-f6a7-4890-abcd-ef0123456789" +} diff --git a/packages/context/test/fixtures/metabase/multi-collection/sync-config.json b/packages/context/test/fixtures/metabase/multi-collection/sync-config.json new file mode 100644 index 00000000..e2df52e0 --- /dev/null +++ b/packages/context/test/fixtures/metabase/multi-collection/sync-config.json @@ -0,0 +1,16 @@ +{ + "metabaseConnectionId": "a1b2c3d4-e5f6-4789-9abc-def012345678", + "metabaseDatabaseId": 42, + "syncMode": "ONLY", + "selections": [ + { "selectionType": "collection", "metabaseObjectId": 5 }, + { "selectionType": "collection", "metabaseObjectId": 6 } + ], + "defaultTagNames": [], + "mapping": { + "metabaseDatabaseId": 42, + "metabaseDatabaseName": "Analytics", + "metabaseEngine": "postgres", + "targetConnectionId": "b2c3d4e5-f6a7-4890-abcd-ef0123456789" + } +} diff --git a/packages/context/test/fixtures/metabase/simple/cards/1.json b/packages/context/test/fixtures/metabase/simple/cards/1.json new file mode 100644 index 00000000..8671f160 --- /dev/null +++ b/packages/context/test/fixtures/metabase/simple/cards/1.json @@ -0,0 +1,32 @@ +{ + "metabaseId": 1, + "name": "Daily orders", + "description": "Orders rolled up by day", + "type": "model", + "databaseId": 42, + "collectionId": 5, + "archived": false, + "resolvedSql": "SELECT date_trunc('day', created_at) AS day, COUNT(*) AS order_count FROM public.orders GROUP BY 1", + "templateTags": [], + "resultMetadata": [ + { + "name": "day", + "display_name": "Day", + "base_type": "type/DateTime", + "semantic_type": "type/CreationTimestamp", + "description": null, + "fk_target_field_id": null + }, + { + "name": "order_count", + "display_name": "Count", + "base_type": "type/Integer", + "semantic_type": null, + "description": null, + "fk_target_field_id": null + } + ], + "collectionPath": ["Orders Team"], + "referencedCardIds": [], + "resolutionStatus": "resolved" +} diff --git a/packages/context/test/fixtures/metabase/simple/cards/2.json b/packages/context/test/fixtures/metabase/simple/cards/2.json new file mode 100644 index 00000000..504896c2 --- /dev/null +++ b/packages/context/test/fixtures/metabase/simple/cards/2.json @@ -0,0 +1,32 @@ +{ + "metabaseId": 2, + "name": "Revenue by day", + "description": null, + "type": "model", + "databaseId": 42, + "collectionId": 5, + "archived": false, + "resolvedSql": "SELECT date_trunc('day', created_at) AS day, SUM(amount_cents) AS revenue_cents FROM public.orders GROUP BY 1", + "templateTags": [], + "resultMetadata": [ + { + "name": "day", + "display_name": "Day", + "base_type": "type/DateTime", + "semantic_type": "type/CreationTimestamp", + "description": null, + "fk_target_field_id": null + }, + { + "name": "revenue_cents", + "display_name": "Revenue (cents)", + "base_type": "type/Integer", + "semantic_type": null, + "description": null, + "fk_target_field_id": null + } + ], + "collectionPath": ["Orders Team"], + "referencedCardIds": [], + "resolutionStatus": "resolved" +} diff --git a/packages/context/test/fixtures/metabase/simple/collections/5.json b/packages/context/test/fixtures/metabase/simple/collections/5.json new file mode 100644 index 00000000..0fcd409c --- /dev/null +++ b/packages/context/test/fixtures/metabase/simple/collections/5.json @@ -0,0 +1,5 @@ +{ + "metabaseId": 5, + "name": "Orders Team", + "parentId": "root" +} diff --git a/packages/context/test/fixtures/metabase/simple/databases/42.json b/packages/context/test/fixtures/metabase/simple/databases/42.json new file mode 100644 index 00000000..49b8604a --- /dev/null +++ b/packages/context/test/fixtures/metabase/simple/databases/42.json @@ -0,0 +1,6 @@ +{ + "metabaseDatabaseId": 42, + "metabaseDatabaseName": "Analytics", + "metabaseEngine": "postgres", + "targetConnectionId": "b2c3d4e5-f6a7-4890-abcd-ef0123456789" +} diff --git a/packages/context/test/fixtures/metabase/simple/sync-config.json b/packages/context/test/fixtures/metabase/simple/sync-config.json new file mode 100644 index 00000000..bb1bacae --- /dev/null +++ b/packages/context/test/fixtures/metabase/simple/sync-config.json @@ -0,0 +1,13 @@ +{ + "metabaseConnectionId": "a1b2c3d4-e5f6-4789-9abc-def012345678", + "metabaseDatabaseId": 42, + "syncMode": "ALL", + "selections": [], + "defaultTagNames": [], + "mapping": { + "metabaseDatabaseId": 42, + "metabaseDatabaseName": "Analytics", + "metabaseEngine": "postgres", + "targetConnectionId": "b2c3d4e5-f6a7-4890-abcd-ef0123456789" + } +} diff --git a/packages/context/test/fixtures/metricflow/dbt-mixed/dbt_project.yml b/packages/context/test/fixtures/metricflow/dbt-mixed/dbt_project.yml new file mode 100644 index 00000000..a29b2726 --- /dev/null +++ b/packages/context/test/fixtures/metricflow/dbt-mixed/dbt_project.yml @@ -0,0 +1,5 @@ +name: my_proj +version: "1.0.0" +config-version: 2 +profile: my_proj +model-paths: ["models"] diff --git a/packages/context/test/fixtures/metricflow/dbt-mixed/models/orders.yml b/packages/context/test/fixtures/metricflow/dbt-mixed/models/orders.yml new file mode 100644 index 00000000..5f9adb5f --- /dev/null +++ b/packages/context/test/fixtures/metricflow/dbt-mixed/models/orders.yml @@ -0,0 +1,7 @@ +semantic_models: + - name: orders + model: ref('orders') + entities: + - {name: order_id, type: primary} + measures: + - {name: order_count, agg: count, expr: order_id} diff --git a/packages/context/test/fixtures/metricflow/extends-chain/metrics/orders_final.yml b/packages/context/test/fixtures/metricflow/extends-chain/metrics/orders_final.yml new file mode 100644 index 00000000..95612ae5 --- /dev/null +++ b/packages/context/test/fixtures/metricflow/extends-chain/metrics/orders_final.yml @@ -0,0 +1,9 @@ +metrics: + - name: revenue + type: derived + description: Net revenue (gross minus refunds). + type_params: + expr: gross_amount - refund_amount + metrics: + - name: gross_amount + - name: refund_amount diff --git a/packages/context/test/fixtures/metricflow/extends-chain/models/orders.yml b/packages/context/test/fixtures/metricflow/extends-chain/models/orders.yml new file mode 100644 index 00000000..34952c89 --- /dev/null +++ b/packages/context/test/fixtures/metricflow/extends-chain/models/orders.yml @@ -0,0 +1,19 @@ +semantic_models: + - name: orders + description: Base order fact table. + model: ref('orders') + entities: + - name: order_id + type: primary + dimensions: + - name: ordered_at + type: time + type_params: + time_granularity: day + measures: + - name: order_count + agg: count + expr: order_id + - name: gross_amount + agg: sum + expr: amount diff --git a/packages/context/test/fixtures/metricflow/extends-chain/models/orders_ext.yml b/packages/context/test/fixtures/metricflow/extends-chain/models/orders_ext.yml new file mode 100644 index 00000000..40142905 --- /dev/null +++ b/packages/context/test/fixtures/metricflow/extends-chain/models/orders_ext.yml @@ -0,0 +1,9 @@ +semantic_models: + - name: orders_ext + description: Orders with refund handling added. + model: ref('orders_ext') + extends: orders + measures: + - name: refund_amount + agg: sum + expr: refund_amt diff --git a/packages/context/test/fixtures/metricflow/multi-component/models/marketing/campaigns.yml b/packages/context/test/fixtures/metricflow/multi-component/models/marketing/campaigns.yml new file mode 100644 index 00000000..37449b1c --- /dev/null +++ b/packages/context/test/fixtures/metricflow/multi-component/models/marketing/campaigns.yml @@ -0,0 +1,7 @@ +semantic_models: + - name: campaigns + model: ref('campaigns') + entities: + - {name: campaign_id, type: primary} + measures: + - {name: spend, agg: sum, expr: spend_cents} diff --git a/packages/context/test/fixtures/metricflow/multi-component/models/sales/orders.yml b/packages/context/test/fixtures/metricflow/multi-component/models/sales/orders.yml new file mode 100644 index 00000000..5f9adb5f --- /dev/null +++ b/packages/context/test/fixtures/metricflow/multi-component/models/sales/orders.yml @@ -0,0 +1,7 @@ +semantic_models: + - name: orders + model: ref('orders') + entities: + - {name: order_id, type: primary} + measures: + - {name: order_count, agg: count, expr: order_id} diff --git a/packages/context/test/fixtures/metricflow/single-model/models/orders.yml b/packages/context/test/fixtures/metricflow/single-model/models/orders.yml new file mode 100644 index 00000000..2fffe5af --- /dev/null +++ b/packages/context/test/fixtures/metricflow/single-model/models/orders.yml @@ -0,0 +1,33 @@ +semantic_models: + - name: orders + description: Order fact table. + model: ref('orders') + entities: + - name: order_id + type: primary + - name: customer_id + type: foreign + dimensions: + - name: ordered_at + type: time + type_params: + time_granularity: day + - name: status + type: categorical + measures: + - name: order_count + agg: count + expr: order_id + - name: gross_amount + agg: sum + expr: amount + +metrics: + - name: total_orders + type: simple + type_params: + measure: order_count + - name: revenue + type: simple + type_params: + measure: gross_amount diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..6faa32a7beb34c9eb16944c1480628b4a8cf1cf5 GIT binary patch literal 20480 zcmeI&&uZH+90zd8c1U+<)KQ!ea?x(dXols$poc-d&g%kR(xoo!7F@?$OY^5oT-c6z zh_NRb^bvOH*b{6!?pF2&lWY<&Zg%$JD^TowE%K8-ISKjXjdu}8{M~Fm31Z$MJA_hl zz&RmgOZJlNt7ga>)mM$KZvJ!KB2PYl+m-{2N}I4R>~ZC*yg`Ei1Rwwb2tWV=5P$## zAOL}43oJ`UxmK^!WfTWz<7hS?4ad{;vvfFc9pB}?^W1ZJn(^j%I=mQh-+k-zUSIZ8 z&)Z8L=F_YBY%~mWgFnQdc(>=fNA6(#6Rm|_t~DBT`D{In^U++@loum65gGEPdYUg9 zUN7Q%Vl{XaXXo9z)XFu}q|5!JsG6?odZwI8cyq&2l6X4F{_GwPgE+VD)_a;36G<^O zP1UtbF_rMoo#BErV4};P9i7W|KAdJ$9}Tg!Y>dLga8B} z009U<00Izz00bZa0SG{V>Bg>U9t+zGCReZeuU)zOPXh5GT3o(79rPGY1M4ITVilzO z{~N;oupjIOgb;uL1Rwwb2tWV=5P$##AOHaf6iJ{ePx{SA{e8<;XZyBgKW$s0ZFQ>h zyg$i>I^tjFc2ynwtMOJQ7wh-`@7OQA|6ink7;}LD1Rwwb2tWV=5P$##AOHafY!YC4 j>7ltxy^h@Ii(?^-il%}Go~?HN^2Go~(*Yv=ZfXAjJ8{kz literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml new file mode 100644 index 00000000..f6b97166 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml @@ -0,0 +1,32 @@ +expectedPks: + - table: cust + columns: + - cust_id + - table: ord_hdr + columns: + - ord_id + - table: prod + columns: + - prod_cd +expectedLinks: + - fromTable: ord_hdr + fromColumns: + - cust_id + toTable: cust + toColumns: + - cust_id + relationship: many_to_one + - fromTable: ord_ln + fromColumns: + - ord_id + toTable: ord_hdr + toColumns: + - ord_id + relationship: many_to_one + - fromTable: ord_ln + fromColumns: + - prod_cd + toTable: prod + toColumns: + - prod_cd + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml new file mode 100644 index 00000000..275a1008 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml @@ -0,0 +1,7 @@ +id: abbreviated_legacy_no_declared_constraints +name: Abbreviated legacy naming fixture with no declared constraints +tier: row_bearing +origin: synthetic +thresholdEligible: false +defaultModes: + - declared_pks_and_declared_fks_removed diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json new file mode 100644 index 00000000..ac3840e2 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json @@ -0,0 +1,170 @@ +{ + "connectionId": "abbreviated_legacy_no_declared_constraints", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "cust", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "cust_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "nm", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "stat_cd", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "ord_hdr", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "ord_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "cust_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ord_dt", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "ord_ln", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "ln_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ord_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "prod_cd", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "qty", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "prod", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "prod_cd", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "prod_nm", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "cat_cd", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/expected-links.yaml new file mode 100644 index 00000000..4d3f3189 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/expected-links.yaml @@ -0,0 +1,878 @@ +expectedPks: + - table: dbo.AWBuildVersion + columns: + - SystemInformationID + - table: dbo.DatabaseLog + columns: + - DatabaseLogID + - table: dbo.ErrorLog + columns: + - ErrorLogID + - table: HumanResources.Department + columns: + - DepartmentID + - table: HumanResources.Employee + columns: + - BusinessEntityID + - table: HumanResources.EmployeeDepartmentHistory + columns: + - BusinessEntityID + - DepartmentID + - ShiftID + - StartDate + - table: HumanResources.EmployeePayHistory + columns: + - BusinessEntityID + - RateChangeDate + - table: HumanResources.JobCandidate + columns: + - JobCandidateID + - table: HumanResources.Shift + columns: + - ShiftID + - table: Person.Address + columns: + - AddressID + - table: Person.AddressType + columns: + - AddressTypeID + - table: Person.BusinessEntity + columns: + - BusinessEntityID + - table: Person.BusinessEntityAddress + columns: + - BusinessEntityID + - AddressID + - AddressTypeID + - table: Person.BusinessEntityContact + columns: + - BusinessEntityID + - PersonID + - ContactTypeID + - table: Person.ContactType + columns: + - ContactTypeID + - table: Person.CountryRegion + columns: + - CountryRegionCode + - table: Person.EmailAddress + columns: + - BusinessEntityID + - EmailAddressID + - table: Person.Password + columns: + - BusinessEntityID + - table: Person.Person + columns: + - BusinessEntityID + - table: Person.PersonPhone + columns: + - BusinessEntityID + - PhoneNumber + - PhoneNumberTypeID + - table: Person.PhoneNumberType + columns: + - PhoneNumberTypeID + - table: Person.StateProvince + columns: + - StateProvinceID + - table: Production.BillOfMaterials + columns: + - BillOfMaterialsID + - table: Production.Culture + columns: + - CultureID + - table: Production.Document + columns: + - DocumentNode + - table: Production.Illustration + columns: + - IllustrationID + - table: Production.Location + columns: + - LocationID + - table: Production.Product + columns: + - ProductID + - table: Production.ProductCategory + columns: + - ProductCategoryID + - table: Production.ProductCostHistory + columns: + - ProductID + - StartDate + - table: Production.ProductDescription + columns: + - ProductDescriptionID + - table: Production.ProductDocument + columns: + - ProductID + - DocumentNode + - table: Production.ProductInventory + columns: + - ProductID + - LocationID + - table: Production.ProductListPriceHistory + columns: + - ProductID + - StartDate + - table: Production.ProductModel + columns: + - ProductModelID + - table: Production.ProductModelIllustration + columns: + - ProductModelID + - IllustrationID + - table: Production.ProductModelProductDescriptionCulture + columns: + - ProductModelID + - ProductDescriptionID + - CultureID + - table: Production.ProductPhoto + columns: + - ProductPhotoID + - table: Production.ProductProductPhoto + columns: + - ProductID + - ProductPhotoID + - table: Production.ProductReview + columns: + - ProductReviewID + - table: Production.ProductSubcategory + columns: + - ProductSubcategoryID + - table: Production.ScrapReason + columns: + - ScrapReasonID + - table: Production.TransactionHistory + columns: + - TransactionID + - table: Production.TransactionHistoryArchive + columns: + - TransactionID + - table: Production.UnitMeasure + columns: + - UnitMeasureCode + - table: Production.WorkOrder + columns: + - WorkOrderID + - table: Production.WorkOrderRouting + columns: + - WorkOrderID + - ProductID + - OperationSequence + - table: Purchasing.ProductVendor + columns: + - ProductID + - BusinessEntityID + - table: Purchasing.PurchaseOrderDetail + columns: + - PurchaseOrderID + - PurchaseOrderDetailID + - table: Purchasing.PurchaseOrderHeader + columns: + - PurchaseOrderID + - table: Purchasing.ShipMethod + columns: + - ShipMethodID + - table: Purchasing.Vendor + columns: + - BusinessEntityID + - table: Sales.CountryRegionCurrency + columns: + - CountryRegionCode + - CurrencyCode + - table: Sales.CreditCard + columns: + - CreditCardID + - table: Sales.Currency + columns: + - CurrencyCode + - table: Sales.CurrencyRate + columns: + - CurrencyRateID + - table: Sales.Customer + columns: + - CustomerID + - table: Sales.PersonCreditCard + columns: + - BusinessEntityID + - CreditCardID + - table: Sales.SalesOrderDetail + columns: + - SalesOrderID + - SalesOrderDetailID + - table: Sales.SalesOrderHeader + columns: + - SalesOrderID + - table: Sales.SalesOrderHeaderSalesReason + columns: + - SalesOrderID + - SalesReasonID + - table: Sales.SalesPerson + columns: + - BusinessEntityID + - table: Sales.SalesPersonQuotaHistory + columns: + - BusinessEntityID + - QuotaDate + - table: Sales.SalesReason + columns: + - SalesReasonID + - table: Sales.SalesTaxRate + columns: + - SalesTaxRateID + - table: Sales.SalesTerritory + columns: + - TerritoryID + - table: Sales.SalesTerritoryHistory + columns: + - BusinessEntityID + - TerritoryID + - StartDate + - table: Sales.ShoppingCartItem + columns: + - ShoppingCartItemID + - table: Sales.SpecialOffer + columns: + - SpecialOfferID + - table: Sales.SpecialOfferProduct + columns: + - SpecialOfferID + - ProductID + - table: Sales.Store + columns: + - BusinessEntityID +expectedLinks: + - fromTable: HumanResources.Employee + fromColumns: + - BusinessEntityID + toTable: Person.Person + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: HumanResources.EmployeeDepartmentHistory + fromColumns: + - BusinessEntityID + toTable: HumanResources.Employee + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: HumanResources.EmployeeDepartmentHistory + fromColumns: + - DepartmentID + toTable: HumanResources.Department + toColumns: + - DepartmentID + relationship: many_to_one + - fromTable: HumanResources.EmployeeDepartmentHistory + fromColumns: + - ShiftID + toTable: HumanResources.Shift + toColumns: + - ShiftID + relationship: many_to_one + - fromTable: HumanResources.EmployeePayHistory + fromColumns: + - BusinessEntityID + toTable: HumanResources.Employee + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: HumanResources.JobCandidate + fromColumns: + - BusinessEntityID + toTable: HumanResources.Employee + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.Address + fromColumns: + - StateProvinceID + toTable: Person.StateProvince + toColumns: + - StateProvinceID + relationship: many_to_one + - fromTable: Person.BusinessEntityAddress + fromColumns: + - AddressID + toTable: Person.Address + toColumns: + - AddressID + relationship: many_to_one + - fromTable: Person.BusinessEntityAddress + fromColumns: + - AddressTypeID + toTable: Person.AddressType + toColumns: + - AddressTypeID + relationship: many_to_one + - fromTable: Person.BusinessEntityAddress + fromColumns: + - BusinessEntityID + toTable: Person.BusinessEntity + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.BusinessEntityContact + fromColumns: + - BusinessEntityID + toTable: Person.BusinessEntity + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.BusinessEntityContact + fromColumns: + - ContactTypeID + toTable: Person.ContactType + toColumns: + - ContactTypeID + relationship: many_to_one + - fromTable: Person.BusinessEntityContact + fromColumns: + - PersonID + toTable: Person.Person + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.EmailAddress + fromColumns: + - BusinessEntityID + toTable: Person.Person + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.Password + fromColumns: + - BusinessEntityID + toTable: Person.Person + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.Person + fromColumns: + - BusinessEntityID + toTable: Person.BusinessEntity + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.PersonPhone + fromColumns: + - BusinessEntityID + toTable: Person.Person + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Person.PersonPhone + fromColumns: + - PhoneNumberTypeID + toTable: Person.PhoneNumberType + toColumns: + - PhoneNumberTypeID + relationship: many_to_one + - fromTable: Person.StateProvince + fromColumns: + - CountryRegionCode + toTable: Person.CountryRegion + toColumns: + - CountryRegionCode + relationship: many_to_one + - fromTable: Person.StateProvince + fromColumns: + - TerritoryID + toTable: Sales.SalesTerritory + toColumns: + - TerritoryID + relationship: many_to_one + - fromTable: Production.BillOfMaterials + fromColumns: + - ComponentID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.BillOfMaterials + fromColumns: + - ProductAssemblyID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.BillOfMaterials + fromColumns: + - UnitMeasureCode + toTable: Production.UnitMeasure + toColumns: + - UnitMeasureCode + relationship: many_to_one + - fromTable: Production.Document + fromColumns: + - Owner + toTable: HumanResources.Employee + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Production.Product + fromColumns: + - ProductModelID + toTable: Production.ProductModel + toColumns: + - ProductModelID + relationship: many_to_one + - fromTable: Production.Product + fromColumns: + - ProductSubcategoryID + toTable: Production.ProductSubcategory + toColumns: + - ProductSubcategoryID + relationship: many_to_one + - fromTable: Production.Product + fromColumns: + - SizeUnitMeasureCode + toTable: Production.UnitMeasure + toColumns: + - UnitMeasureCode + relationship: many_to_one + - fromTable: Production.Product + fromColumns: + - WeightUnitMeasureCode + toTable: Production.UnitMeasure + toColumns: + - UnitMeasureCode + relationship: many_to_one + - fromTable: Production.ProductCostHistory + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.ProductDocument + fromColumns: + - DocumentNode + toTable: Production.Document + toColumns: + - DocumentNode + relationship: many_to_one + - fromTable: Production.ProductDocument + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.ProductInventory + fromColumns: + - LocationID + toTable: Production.Location + toColumns: + - LocationID + relationship: many_to_one + - fromTable: Production.ProductInventory + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.ProductListPriceHistory + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.ProductModelIllustration + fromColumns: + - IllustrationID + toTable: Production.Illustration + toColumns: + - IllustrationID + relationship: many_to_one + - fromTable: Production.ProductModelIllustration + fromColumns: + - ProductModelID + toTable: Production.ProductModel + toColumns: + - ProductModelID + relationship: many_to_one + - fromTable: Production.ProductModelProductDescriptionCulture + fromColumns: + - CultureID + toTable: Production.Culture + toColumns: + - CultureID + relationship: many_to_one + - fromTable: Production.ProductModelProductDescriptionCulture + fromColumns: + - ProductDescriptionID + toTable: Production.ProductDescription + toColumns: + - ProductDescriptionID + relationship: many_to_one + - fromTable: Production.ProductModelProductDescriptionCulture + fromColumns: + - ProductModelID + toTable: Production.ProductModel + toColumns: + - ProductModelID + relationship: many_to_one + - fromTable: Production.ProductProductPhoto + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.ProductProductPhoto + fromColumns: + - ProductPhotoID + toTable: Production.ProductPhoto + toColumns: + - ProductPhotoID + relationship: many_to_one + - fromTable: Production.ProductReview + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.ProductSubcategory + fromColumns: + - ProductCategoryID + toTable: Production.ProductCategory + toColumns: + - ProductCategoryID + relationship: many_to_one + - fromTable: Production.TransactionHistory + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.WorkOrder + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Production.WorkOrder + fromColumns: + - ScrapReasonID + toTable: Production.ScrapReason + toColumns: + - ScrapReasonID + relationship: many_to_one + - fromTable: Production.WorkOrderRouting + fromColumns: + - LocationID + toTable: Production.Location + toColumns: + - LocationID + relationship: many_to_one + - fromTable: Production.WorkOrderRouting + fromColumns: + - WorkOrderID + toTable: Production.WorkOrder + toColumns: + - WorkOrderID + relationship: many_to_one + - fromTable: Purchasing.ProductVendor + fromColumns: + - BusinessEntityID + toTable: Purchasing.Vendor + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Purchasing.ProductVendor + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Purchasing.ProductVendor + fromColumns: + - UnitMeasureCode + toTable: Production.UnitMeasure + toColumns: + - UnitMeasureCode + relationship: many_to_one + - fromTable: Purchasing.PurchaseOrderDetail + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Purchasing.PurchaseOrderDetail + fromColumns: + - PurchaseOrderID + toTable: Purchasing.PurchaseOrderHeader + toColumns: + - PurchaseOrderID + relationship: many_to_one + - fromTable: Purchasing.PurchaseOrderHeader + fromColumns: + - EmployeeID + toTable: HumanResources.Employee + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Purchasing.PurchaseOrderHeader + fromColumns: + - ShipMethodID + toTable: Purchasing.ShipMethod + toColumns: + - ShipMethodID + relationship: many_to_one + - fromTable: Purchasing.PurchaseOrderHeader + fromColumns: + - VendorID + toTable: Purchasing.Vendor + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Purchasing.Vendor + fromColumns: + - BusinessEntityID + toTable: Person.BusinessEntity + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.CountryRegionCurrency + fromColumns: + - CountryRegionCode + toTable: Person.CountryRegion + toColumns: + - CountryRegionCode + relationship: many_to_one + - fromTable: Sales.CountryRegionCurrency + fromColumns: + - CurrencyCode + toTable: Sales.Currency + toColumns: + - CurrencyCode + relationship: many_to_one + - fromTable: Sales.CurrencyRate + fromColumns: + - FromCurrencyCode + toTable: Sales.Currency + toColumns: + - CurrencyCode + relationship: many_to_one + - fromTable: Sales.CurrencyRate + fromColumns: + - ToCurrencyCode + toTable: Sales.Currency + toColumns: + - CurrencyCode + relationship: many_to_one + - fromTable: Sales.Customer + fromColumns: + - PersonID + toTable: Person.Person + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.Customer + fromColumns: + - StoreID + toTable: Sales.Store + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.Customer + fromColumns: + - TerritoryID + toTable: Sales.SalesTerritory + toColumns: + - TerritoryID + relationship: many_to_one + - fromTable: Sales.PersonCreditCard + fromColumns: + - BusinessEntityID + toTable: Person.Person + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.PersonCreditCard + fromColumns: + - CreditCardID + toTable: Sales.CreditCard + toColumns: + - CreditCardID + relationship: many_to_one + - fromTable: Sales.SalesOrderDetail + fromColumns: + - ProductID + - SpecialOfferID + toTable: Sales.SpecialOfferProduct + toColumns: + - ProductID + - SpecialOfferID + relationship: many_to_one + - fromTable: Sales.SalesOrderDetail + fromColumns: + - SalesOrderID + toTable: Sales.SalesOrderHeader + toColumns: + - SalesOrderID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - BillToAddressID + toTable: Person.Address + toColumns: + - AddressID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - CreditCardID + toTable: Sales.CreditCard + toColumns: + - CreditCardID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - CurrencyRateID + toTable: Sales.CurrencyRate + toColumns: + - CurrencyRateID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - CustomerID + toTable: Sales.Customer + toColumns: + - CustomerID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - SalesPersonID + toTable: Sales.SalesPerson + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - ShipMethodID + toTable: Purchasing.ShipMethod + toColumns: + - ShipMethodID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - ShipToAddressID + toTable: Person.Address + toColumns: + - AddressID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeader + fromColumns: + - TerritoryID + toTable: Sales.SalesTerritory + toColumns: + - TerritoryID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeaderSalesReason + fromColumns: + - SalesOrderID + toTable: Sales.SalesOrderHeader + toColumns: + - SalesOrderID + relationship: many_to_one + - fromTable: Sales.SalesOrderHeaderSalesReason + fromColumns: + - SalesReasonID + toTable: Sales.SalesReason + toColumns: + - SalesReasonID + relationship: many_to_one + - fromTable: Sales.SalesPerson + fromColumns: + - BusinessEntityID + toTable: HumanResources.Employee + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.SalesPerson + fromColumns: + - TerritoryID + toTable: Sales.SalesTerritory + toColumns: + - TerritoryID + relationship: many_to_one + - fromTable: Sales.SalesPersonQuotaHistory + fromColumns: + - BusinessEntityID + toTable: Sales.SalesPerson + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.SalesTaxRate + fromColumns: + - StateProvinceID + toTable: Person.StateProvince + toColumns: + - StateProvinceID + relationship: many_to_one + - fromTable: Sales.SalesTerritory + fromColumns: + - CountryRegionCode + toTable: Person.CountryRegion + toColumns: + - CountryRegionCode + relationship: many_to_one + - fromTable: Sales.SalesTerritoryHistory + fromColumns: + - BusinessEntityID + toTable: Sales.SalesPerson + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.SalesTerritoryHistory + fromColumns: + - TerritoryID + toTable: Sales.SalesTerritory + toColumns: + - TerritoryID + relationship: many_to_one + - fromTable: Sales.ShoppingCartItem + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Sales.SpecialOfferProduct + fromColumns: + - ProductID + toTable: Production.Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Sales.SpecialOfferProduct + fromColumns: + - SpecialOfferID + toTable: Sales.SpecialOffer + toColumns: + - SpecialOfferID + relationship: many_to_one + - fromTable: Sales.Store + fromColumns: + - BusinessEntityID + toTable: Person.BusinessEntity + toColumns: + - BusinessEntityID + relationship: many_to_one + - fromTable: Sales.Store + fromColumns: + - SalesPersonID + toTable: Sales.SalesPerson + toColumns: + - BusinessEntityID + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/fixture.yaml new file mode 100644 index 00000000..4449a236 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/fixture.yaml @@ -0,0 +1,14 @@ +id: adventureworks_oltp_with_declared_metadata +name: AdventureWorks OLTP (SQL Server 2022, declared metadata) +tier: row_bearing +origin: public +thresholdEligible: true +defaultModes: + - metadata_present + - declared_pks_and_declared_fks_removed + - declared_pks_removed + - declared_fks_removed + - profiling_disabled + - validation_disabled + - llm_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/snapshot.json new file mode 100644 index 00000000..e274130b --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/adventureworks_oltp_with_declared_metadata/snapshot.json @@ -0,0 +1,5961 @@ +{ + "connectionId": "adventureworks_oltp_with_declared_metadata", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": { + "catalogs": [ + "main" + ], + "schemas": [ + "main" + ] + }, + "metadata": { + "database": "AdventureWorks", + "schemas": [ + "dbo", + "HumanResources", + "Person", + "Production", + "Purchasing", + "Sales" + ], + "host": "127.0.0.1", + "table_count": 91, + "total_columns": 744, + "source_driver": "sqlserver", + "source_connection_id": "adventureworks_oltp_with_declared_metadata", + "source_database": "AdventureWorks" + }, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "dbo.AWBuildVersion", + "kind": "table", + "comment": "Current version number of the AdventureWorks 2016 sample database. ", + "estimatedRows": 1, + "columns": [ + { + "name": "SystemInformationID", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Database Version", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Version number of the database in 9.yy.mm.dd.00 format." + }, + { + "name": "VersionDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "dbo.DatabaseLog", + "kind": "table", + "comment": "Audit table tracking all DDL changes made to the AdventureWorks database. Data is captured by the database trigger ddlDatabaseTriggerLog.", + "estimatedRows": 1596, + "columns": [ + { + "name": "DatabaseLogID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Primary key for DatabaseLog records." + }, + { + "name": "PostTime", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index created by a primary key constraint." + }, + { + "name": "DatabaseUser", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The user who implemented the DDL change." + }, + { + "name": "Event", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The type of DDL statement that was executed." + }, + { + "name": "Schema", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "The schema to which the changed object belongs." + }, + { + "name": "Object", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "The object that was changed by the DDL statment." + }, + { + "name": "TSQL", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The exact Transact-SQL statement that was executed." + }, + { + "name": "XmlEvent", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The raw XML data generated by database trigger." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "dbo.ErrorLog", + "kind": "table", + "comment": "Audit table tracking errors in the the AdventureWorks database that are caught by the CATCH block of a TRY...CATCH construct. Data is inserted by stored procedure dbo.uspLogError when it is executed from inside the CATCH block of a TRY...CATCH construct.", + "estimatedRows": 0, + "columns": [ + { + "name": "ErrorLogID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ErrorTime", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "The date and time at which the error occurred." + }, + { + "name": "UserName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The user who executed the batch in which the error occurred." + }, + { + "name": "ErrorNumber", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "The error number of the error that occurred." + }, + { + "name": "ErrorSeverity", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "The severity of the error that occurred." + }, + { + "name": "ErrorState", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "The state number of the error that occurred." + }, + { + "name": "ErrorProcedure", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "The name of the stored procedure or trigger where the error occurred." + }, + { + "name": "ErrorLine", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "The line number at which the error occurred." + }, + { + "name": "ErrorMessage", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The message text of the error that occurred." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "HumanResources.Department", + "kind": "table", + "comment": "Lookup table containing the departments within the Adventure Works Cycles company.", + "estimatedRows": 16, + "columns": [ + { + "name": "DepartmentID", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "GroupName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Name of the group to which the department belongs." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "HumanResources.Employee", + "kind": "table", + "comment": "Employee information such as salary, department, and title.", + "estimatedRows": 290, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "NationalIDNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "LoginID", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "OrganizationNode", + "nativeType": "hierarchyid", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "OrganizationLevel", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "JobTitle", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "BirthDate", + "nativeType": "date", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date of birth." + }, + { + "name": "MaritalStatus", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "M = Married, S = Single" + }, + { + "name": "Gender", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "M = Male, F = Female" + }, + { + "name": "HireDate", + "nativeType": "date", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Employee hired on this date." + }, + { + "name": "SalariedFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Job classification. 0 = Hourly, not exempt from collective bargaining. 1 = Salaried, exempt from collective bargaining." + }, + { + "name": "VacationHours", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Number of available vacation hours." + }, + { + "name": "SickLeaveHours", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Number of available sick leave hours." + }, + { + "name": "CurrentFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "0 = Inactive, 1 = Active" + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Person", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Employee_Person_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "HumanResources.EmployeeDepartmentHistory", + "kind": "table", + "comment": "Employee department transfers.", + "estimatedRows": 296, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "DepartmentID", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "ShiftID", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "StartDate", + "nativeType": "date", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": true, + "comment": "Date the employee started work in the department." + }, + { + "name": "EndDate", + "nativeType": "date", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Date the employee left the department. NULL = Current department." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Employee", + "toColumn": "BusinessEntityID", + "constraintName": "FK_EmployeeDepartmentHistory_Employee_BusinessEntityID" + }, + { + "fromColumn": "DepartmentID", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Department", + "toColumn": "DepartmentID", + "constraintName": "FK_EmployeeDepartmentHistory_Department_DepartmentID" + }, + { + "fromColumn": "ShiftID", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Shift", + "toColumn": "ShiftID", + "constraintName": "FK_EmployeeDepartmentHistory_Shift_ShiftID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "HumanResources.EmployeePayHistory", + "kind": "table", + "comment": "Employee pay history.", + "estimatedRows": 316, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "RateChangeDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": true, + "comment": "Date the change in pay is effective" + }, + { + "name": "Rate", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Salary hourly rate." + }, + { + "name": "PayFrequency", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "1 = Salary received monthly, 2 = Salary received biweekly" + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Employee", + "toColumn": "BusinessEntityID", + "constraintName": "FK_EmployeePayHistory_Employee_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "HumanResources.JobCandidate", + "kind": "table", + "comment": "Résumés submitted to Human Resources by job applicants.", + "estimatedRows": 13, + "columns": [ + { + "name": "JobCandidateID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "time", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "Resume", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Résumé in XML format." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Employee", + "toColumn": "BusinessEntityID", + "constraintName": "FK_JobCandidate_Employee_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "HumanResources.Shift", + "kind": "table", + "comment": "Work shift lookup table.", + "estimatedRows": 3, + "columns": [ + { + "name": "ShiftID", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "StartTime", + "nativeType": "time", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "EndTime", + "nativeType": "time", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Shift end time." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Person.Address", + "kind": "table", + "comment": "Street address information for customers, employees, and vendors.", + "estimatedRows": 19614, + "columns": [ + { + "name": "AddressID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "AddressLine1", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "AddressLine2", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "City", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "StateProvinceID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Unique identification number for the state or province. Foreign key to StateProvince table." + }, + { + "name": "PostalCode", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Postal code for the street address." + }, + { + "name": "SpatialLocation", + "nativeType": "geography", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Latitude and longitude of this address." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "StateProvinceID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.StateProvince", + "toColumn": "StateProvinceID", + "constraintName": "FK_Address_StateProvince_StateProvinceID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Person.AddressType", + "kind": "table", + "comment": "Types of addresses stored in the Address table. ", + "estimatedRows": 6, + "columns": [ + { + "name": "AddressTypeID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Person.BusinessEntity", + "kind": "table", + "comment": "Source of the ID that connects vendors, customers, and employees with address and contact information.", + "estimatedRows": 20777, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Person.BusinessEntityAddress", + "kind": "table", + "comment": "Cross-reference table mapping customers, vendors, and employees to their addresses.", + "estimatedRows": 19614, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "AddressID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "AddressTypeID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "AddressID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Address", + "toColumn": "AddressID", + "constraintName": "FK_BusinessEntityAddress_Address_AddressID" + }, + { + "fromColumn": "AddressTypeID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.AddressType", + "toColumn": "AddressTypeID", + "constraintName": "FK_BusinessEntityAddress_AddressType_AddressTypeID" + }, + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.BusinessEntity", + "toColumn": "BusinessEntityID", + "constraintName": "FK_BusinessEntityAddress_BusinessEntity_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Person.BusinessEntityContact", + "kind": "table", + "comment": "Cross-reference table mapping stores, vendors, and employees to people", + "estimatedRows": 909, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "PersonID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "ContactTypeID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.BusinessEntity", + "toColumn": "BusinessEntityID", + "constraintName": "FK_BusinessEntityContact_BusinessEntity_BusinessEntityID" + }, + { + "fromColumn": "ContactTypeID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.ContactType", + "toColumn": "ContactTypeID", + "constraintName": "FK_BusinessEntityContact_ContactType_ContactTypeID" + }, + { + "fromColumn": "PersonID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Person", + "toColumn": "BusinessEntityID", + "constraintName": "FK_BusinessEntityContact_Person_PersonID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Person.ContactType", + "kind": "table", + "comment": "Lookup table containing the types of business entity contacts.", + "estimatedRows": 20, + "columns": [ + { + "name": "ContactTypeID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Person.CountryRegion", + "kind": "table", + "comment": "Lookup table containing the ISO standard codes for countries and regions.", + "estimatedRows": 238, + "columns": [ + { + "name": "CountryRegionCode", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Person.EmailAddress", + "kind": "table", + "comment": "Where to send a person email.", + "estimatedRows": 19972, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "EmailAddressID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "EmailAddress", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "E-mail address for the person." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Person", + "toColumn": "BusinessEntityID", + "constraintName": "FK_EmailAddress_Person_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Person.Password", + "kind": "table", + "comment": "One way hashed authentication information", + "estimatedRows": 19972, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "PasswordHash", + "nativeType": "varchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Password for the e-mail account." + }, + { + "name": "PasswordSalt", + "nativeType": "varchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Random value concatenated with the password string before the password is hashed." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Person", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Password_Person_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Person.Person", + "kind": "table", + "comment": "Human beings involved with AdventureWorks: employees, customer contacts, and vendor contacts.", + "estimatedRows": 19972, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "PersonType", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Primary type of person: SC = Store Contact, IN = Individual (retail) customer, SP = Sales person, EM = Employee (non-sales), VC = Vendor contact, GC = General contact" + }, + { + "name": "NameStyle", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "Title", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "A courtesy title. For example, Mr. or Ms." + }, + { + "name": "FirstName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "First name of the person." + }, + { + "name": "MiddleName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Middle name or middle initial of the person." + }, + { + "name": "LastName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Last name of the person." + }, + { + "name": "Suffix", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Surname suffix. For example, Sr. or Jr." + }, + { + "name": "EmailPromotion", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "0 = Contact does not wish to receive e-mail promotions, 1 = Contact does wish to receive e-mail promotions from AdventureWorks, 2 = Contact does wish to receive e-mail promotions from AdventureWorks and selected partners. " + }, + { + "name": "AdditionalContactInfo", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Additional contact information about the person stored in xml format. " + }, + { + "name": "Demographics", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Personal information such as hobbies, and income collected from online shoppers. Used for sales analysis." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.BusinessEntity", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Person_BusinessEntity_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Person.PersonPhone", + "kind": "table", + "comment": "Telephone number and type of a person.", + "estimatedRows": 19972, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "PhoneNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "PhoneNumberTypeID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Kind of phone number. Foreign key to PhoneNumberType.PhoneNumberTypeID." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Person", + "toColumn": "BusinessEntityID", + "constraintName": "FK_PersonPhone_Person_BusinessEntityID" + }, + { + "fromColumn": "PhoneNumberTypeID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.PhoneNumberType", + "toColumn": "PhoneNumberTypeID", + "constraintName": "FK_PersonPhone_PhoneNumberType_PhoneNumberTypeID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Person.PhoneNumberType", + "kind": "table", + "comment": "Type of phone number of a person.", + "estimatedRows": 3, + "columns": [ + { + "name": "PhoneNumberTypeID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Name of the telephone number type" + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Person.StateProvince", + "kind": "table", + "comment": "State and province lookup table.", + "estimatedRows": 181, + "columns": [ + { + "name": "StateProvinceID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "StateProvinceCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "CountryRegionCode", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "IsOnlyStateProvinceFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "State or province description." + }, + { + "name": "TerritoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "ID of the territory in which the state or province is located. Foreign key to SalesTerritory.SalesTerritoryID." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "CountryRegionCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.CountryRegion", + "toColumn": "CountryRegionCode", + "constraintName": "FK_StateProvince_CountryRegion_CountryRegionCode" + }, + { + "fromColumn": "TerritoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesTerritory", + "toColumn": "TerritoryID", + "constraintName": "FK_StateProvince_SalesTerritory_TerritoryID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.BillOfMaterials", + "kind": "table", + "comment": "Items required to make bicycles and bicycle subassemblies. It identifies the heirarchical relationship between a parent product and its components.", + "estimatedRows": 2679, + "columns": [ + { + "name": "BillOfMaterialsID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index." + }, + { + "name": "ProductAssemblyID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Nonclustered index created by a primary key constraint." + }, + { + "name": "ComponentID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "StartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date the component started being used in the assembly item." + }, + { + "name": "EndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Date the component stopped being used in the assembly item." + }, + { + "name": "UnitMeasureCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Standard code identifying the unit of measure for the quantity." + }, + { + "name": "BOMLevel", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Indicates the depth the component is from its parent (AssemblyID)." + }, + { + "name": "PerAssemblyQty", + "nativeType": "decimal", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity of the component needed to create the assembly." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ComponentID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_BillOfMaterials_Product_ComponentID" + }, + { + "fromColumn": "ProductAssemblyID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_BillOfMaterials_Product_ProductAssemblyID" + }, + { + "fromColumn": "UnitMeasureCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.UnitMeasure", + "toColumn": "UnitMeasureCode", + "constraintName": "FK_BillOfMaterials_UnitMeasure_UnitMeasureCode" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.Culture", + "kind": "table", + "comment": "Lookup table containing the languages in which some AdventureWorks data is stored.", + "estimatedRows": 8, + "columns": [ + { + "name": "CultureID", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.Document", + "kind": "table", + "comment": "Product maintenance documents.", + "estimatedRows": 13, + "columns": [ + { + "name": "DocumentNode", + "nativeType": "hierarchyid", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "DocumentLevel", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Depth in the document hierarchy." + }, + { + "name": "Title", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "Owner", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support FileStream." + }, + { + "name": "FolderFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "FileName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "File name of the document" + }, + { + "name": "FileExtension", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "File extension indicating the document type. For example, .doc or .txt." + }, + { + "name": "Revision", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Revision number of the document. " + }, + { + "name": "ChangeNumber", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Engineering change approval number." + }, + { + "name": "Status", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "1 = Pending approval, 2 = Approved, 3 = Obsolete" + }, + { + "name": "DocumentSummary", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Document abstract." + }, + { + "name": "Document", + "nativeType": "varbinary", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Complete document." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Required for FileStream." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "Owner", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Employee", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Document_Employee_Owner" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.Illustration", + "kind": "table", + "comment": "Bicycle assembly diagrams.", + "estimatedRows": 5, + "columns": [ + { + "name": "IllustrationID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Diagram", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Illustrations used in manufacturing instructions. Stored as XML." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.Location", + "kind": "table", + "comment": "Product inventory and manufacturing locations.", + "estimatedRows": 14, + "columns": [ + { + "name": "LocationID", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "CostRate", + "nativeType": "smallmoney", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Standard hourly cost of the manufacturing location." + }, + { + "name": "Availability", + "nativeType": "decimal", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Work capacity (in hours) of the manufacturing location." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.Product", + "kind": "table", + "comment": "Products sold or used in the manfacturing of sold products.", + "estimatedRows": 504, + "columns": [ + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ProductNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "MakeFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "FinishedGoodsFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "0 = Product is not a salable item. 1 = Product is salable." + }, + { + "name": "Color", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Product color." + }, + { + "name": "SafetyStockLevel", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Minimum inventory quantity. " + }, + { + "name": "ReorderPoint", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Inventory level that triggers a purchase order or work order. " + }, + { + "name": "StandardCost", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Standard cost of the product." + }, + { + "name": "ListPrice", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Selling price." + }, + { + "name": "Size", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Product size." + }, + { + "name": "SizeUnitMeasureCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Unit of measure for Size column." + }, + { + "name": "WeightUnitMeasureCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Unit of measure for Weight column." + }, + { + "name": "Weight", + "nativeType": "decimal", + "normalizedType": "real", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Product weight." + }, + { + "name": "DaysToManufacture", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Number of days required to manufacture the product." + }, + { + "name": "ProductLine", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "R = Road, M = Mountain, T = Touring, S = Standard" + }, + { + "name": "Class", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "H = High, M = Medium, L = Low" + }, + { + "name": "Style", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "W = Womens, M = Mens, U = Universal" + }, + { + "name": "ProductSubcategoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Product is a member of this product subcategory. Foreign key to ProductSubCategory.ProductSubCategoryID. " + }, + { + "name": "ProductModelID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Product is a member of this product model. Foreign key to ProductModel.ProductModelID." + }, + { + "name": "SellStartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date the product was available for sale." + }, + { + "name": "SellEndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Date the product was no longer available for sale." + }, + { + "name": "DiscontinuedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Date the product was discontinued." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductModelID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ProductModel", + "toColumn": "ProductModelID", + "constraintName": "FK_Product_ProductModel_ProductModelID" + }, + { + "fromColumn": "ProductSubcategoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ProductSubcategory", + "toColumn": "ProductSubcategoryID", + "constraintName": "FK_Product_ProductSubcategory_ProductSubcategoryID" + }, + { + "fromColumn": "SizeUnitMeasureCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.UnitMeasure", + "toColumn": "UnitMeasureCode", + "constraintName": "FK_Product_UnitMeasure_SizeUnitMeasureCode" + }, + { + "fromColumn": "WeightUnitMeasureCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.UnitMeasure", + "toColumn": "UnitMeasureCode", + "constraintName": "FK_Product_UnitMeasure_WeightUnitMeasureCode" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductCategory", + "kind": "table", + "comment": "High-level product categorization.", + "estimatedRows": 4, + "columns": [ + { + "name": "ProductCategoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductCostHistory", + "kind": "table", + "comment": "Changes in the cost of a product over time.", + "estimatedRows": 395, + "columns": [ + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "StartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": true, + "comment": "Product cost start date." + }, + { + "name": "EndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Product cost end date." + }, + { + "name": "StandardCost", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Standard cost of the product." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ProductCostHistory_Product_ProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductDescription", + "kind": "table", + "comment": "Product descriptions in several languages.", + "estimatedRows": 762, + "columns": [ + { + "name": "ProductDescriptionID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Description", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductDocument", + "kind": "table", + "comment": "Cross-reference table mapping products to related product documents.", + "estimatedRows": 32, + "columns": [ + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "DocumentNode", + "nativeType": "hierarchyid", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Document identification number. Foreign key to Document.DocumentNode." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "DocumentNode", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Document", + "toColumn": "DocumentNode", + "constraintName": "FK_ProductDocument_Document_DocumentNode" + }, + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ProductDocument_Product_ProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductInventory", + "kind": "table", + "comment": "Product inventory information.", + "estimatedRows": 1069, + "columns": [ + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "LocationID", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Inventory location identification number. Foreign key to Location.LocationID. " + }, + { + "name": "Shelf", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Storage compartment within an inventory location." + }, + { + "name": "Bin", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Storage container on a shelf in an inventory location." + }, + { + "name": "Quantity", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity of products in the inventory location." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "LocationID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Location", + "toColumn": "LocationID", + "constraintName": "FK_ProductInventory_Location_LocationID" + }, + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ProductInventory_Product_ProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductListPriceHistory", + "kind": "table", + "comment": "Changes in the list price of a product over time.", + "estimatedRows": 395, + "columns": [ + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "StartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": true, + "comment": "List price start date." + }, + { + "name": "EndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "List price end date" + }, + { + "name": "ListPrice", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Product list price." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ProductListPriceHistory_Product_ProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductModel", + "kind": "table", + "comment": "Product model classification.", + "estimatedRows": 128, + "columns": [ + { + "name": "ProductModelID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "CatalogDescription", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "Instructions", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Manufacturing instructions in xml format." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductModelIllustration", + "kind": "table", + "comment": "Cross-reference table mapping product models and illustrations.", + "estimatedRows": 7, + "columns": [ + { + "name": "ProductModelID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "IllustrationID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Primary key. Foreign key to Illustration.IllustrationID." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "IllustrationID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Illustration", + "toColumn": "IllustrationID", + "constraintName": "FK_ProductModelIllustration_Illustration_IllustrationID" + }, + { + "fromColumn": "ProductModelID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ProductModel", + "toColumn": "ProductModelID", + "constraintName": "FK_ProductModelIllustration_ProductModel_ProductModelID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductModelProductDescriptionCulture", + "kind": "table", + "comment": "Cross-reference table mapping product descriptions and the language the description is written in.", + "estimatedRows": 762, + "columns": [ + { + "name": "ProductModelID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductDescriptionID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Primary key. Foreign key to ProductDescription.ProductDescriptionID." + }, + { + "name": "CultureID", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Culture identification number. Foreign key to Culture.CultureID." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "CultureID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Culture", + "toColumn": "CultureID", + "constraintName": "FK_ProductModelProductDescriptionCulture_Culture_CultureID" + }, + { + "fromColumn": "ProductDescriptionID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ProductDescription", + "toColumn": "ProductDescriptionID", + "constraintName": "FK_ProductModelProductDescriptionCulture_ProductDescription_ProductDescriptionID" + }, + { + "fromColumn": "ProductModelID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ProductModel", + "toColumn": "ProductModelID", + "constraintName": "FK_ProductModelProductDescriptionCulture_ProductModel_ProductModelID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductPhoto", + "kind": "table", + "comment": "Product images.", + "estimatedRows": 101, + "columns": [ + { + "name": "ProductPhotoID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ThumbNailPhoto", + "nativeType": "varbinary", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Small image of the product." + }, + { + "name": "ThumbnailPhotoFileName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Small image file name." + }, + { + "name": "LargePhoto", + "nativeType": "varbinary", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Large image of the product." + }, + { + "name": "LargePhotoFileName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Large image file name." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductProductPhoto", + "kind": "table", + "comment": "Cross-reference table mapping products and product photos.", + "estimatedRows": 504, + "columns": [ + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Product identification number. Foreign key to Product.ProductID." + }, + { + "name": "ProductPhotoID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index created by a primary key constraint." + }, + { + "name": "Primary", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "0 = Photo is not the principal image. 1 = Photo is the principal image." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ProductProductPhoto_Product_ProductID" + }, + { + "fromColumn": "ProductPhotoID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ProductPhoto", + "toColumn": "ProductPhotoID", + "constraintName": "FK_ProductProductPhoto_ProductPhoto_ProductPhotoID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductReview", + "kind": "table", + "comment": "Customer reviews of products they have purchased.", + "estimatedRows": 4, + "columns": [ + { + "name": "ProductReviewID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ReviewerName", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Name of the reviewer." + }, + { + "name": "ReviewDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date review was submitted." + }, + { + "name": "EmailAddress", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Reviewer's e-mail address." + }, + { + "name": "Rating", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Product rating given by the reviewer. Scale is 1 to 5 with 5 as the highest rating." + }, + { + "name": "Comments", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Reviewer's comments" + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ProductReview_Product_ProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ProductSubcategory", + "kind": "table", + "comment": "Product subcategories. See ProductCategory table.", + "estimatedRows": 37, + "columns": [ + { + "name": "ProductSubcategoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductCategoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductCategoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ProductCategory", + "toColumn": "ProductCategoryID", + "constraintName": "FK_ProductSubcategory_ProductCategory_ProductCategoryID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.ScrapReason", + "kind": "table", + "comment": "Manufacturing failure reasons lookup table.", + "estimatedRows": 16, + "columns": [ + { + "name": "ScrapReasonID", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.TransactionHistory", + "kind": "table", + "comment": "Record of each purchase order, sales order, or work order transaction year to date.", + "estimatedRows": 113443, + "columns": [ + { + "name": "TransactionID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ReferenceOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ReferenceOrderLineID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Line number associated with the purchase order, sales order, or work order." + }, + { + "name": "TransactionDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time of the transaction." + }, + { + "name": "TransactionType", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "W = WorkOrder, S = SalesOrder, P = PurchaseOrder" + }, + { + "name": "Quantity", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Product quantity." + }, + { + "name": "ActualCost", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Product cost." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_TransactionHistory_Product_ProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.TransactionHistoryArchive", + "kind": "table", + "comment": "Transactions for previous years.", + "estimatedRows": 89253, + "columns": [ + { + "name": "TransactionID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ReferenceOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ReferenceOrderLineID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Line number associated with the purchase order, sales order, or work order." + }, + { + "name": "TransactionDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time of the transaction." + }, + { + "name": "TransactionType", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "W = Work Order, S = Sales Order, P = Purchase Order" + }, + { + "name": "Quantity", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Product quantity." + }, + { + "name": "ActualCost", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Product cost." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.UnitMeasure", + "kind": "table", + "comment": "Unit of measure lookup table.", + "estimatedRows": 38, + "columns": [ + { + "name": "UnitMeasureCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Production.WorkOrder", + "kind": "table", + "comment": "Manufacturing work orders.", + "estimatedRows": 72591, + "columns": [ + { + "name": "WorkOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "OrderQty", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "StockedQty", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity built and put in inventory." + }, + { + "name": "ScrappedQty", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity that failed inspection." + }, + { + "name": "StartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Work order start date." + }, + { + "name": "EndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Work order end date." + }, + { + "name": "DueDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Work order due date." + }, + { + "name": "ScrapReasonID", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Reason for inspection failure." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_WorkOrder_Product_ProductID" + }, + { + "fromColumn": "ScrapReasonID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.ScrapReason", + "toColumn": "ScrapReasonID", + "constraintName": "FK_WorkOrder_ScrapReason_ScrapReasonID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Production.WorkOrderRouting", + "kind": "table", + "comment": "Work order details.", + "estimatedRows": 67131, + "columns": [ + { + "name": "WorkOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "OperationSequence", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Primary key. Indicates the manufacturing process sequence." + }, + { + "name": "LocationID", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Manufacturing location where the part is processed. Foreign key to Location.LocationID." + }, + { + "name": "ScheduledStartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Planned manufacturing start date." + }, + { + "name": "ScheduledEndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Planned manufacturing end date." + }, + { + "name": "ActualStartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Actual start date." + }, + { + "name": "ActualEndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Actual end date." + }, + { + "name": "ActualResourceHrs", + "nativeType": "decimal", + "normalizedType": "real", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Number of manufacturing hours used." + }, + { + "name": "PlannedCost", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Estimated manufacturing cost." + }, + { + "name": "ActualCost", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Actual manufacturing cost." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "LocationID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Location", + "toColumn": "LocationID", + "constraintName": "FK_WorkOrderRouting_Location_LocationID" + }, + { + "fromColumn": "WorkOrderID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.WorkOrder", + "toColumn": "WorkOrderID", + "constraintName": "FK_WorkOrderRouting_WorkOrder_WorkOrderID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Purchasing.ProductVendor", + "kind": "table", + "comment": "Cross-reference table mapping vendors with the products they supply.", + "estimatedRows": 460, + "columns": [ + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "AverageLeadTime", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "StandardPrice", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The vendor's usual selling price." + }, + { + "name": "LastReceiptCost", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "The selling price when last purchased." + }, + { + "name": "LastReceiptDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Date the product was last received by the vendor." + }, + { + "name": "MinOrderQty", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "The maximum quantity that should be ordered." + }, + { + "name": "MaxOrderQty", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "The minimum quantity that should be ordered." + }, + { + "name": "OnOrderQty", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "The quantity currently on order." + }, + { + "name": "UnitMeasureCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "The product's unit of measure." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Purchasing.Vendor", + "toColumn": "BusinessEntityID", + "constraintName": "FK_ProductVendor_Vendor_BusinessEntityID" + }, + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ProductVendor_Product_ProductID" + }, + { + "fromColumn": "UnitMeasureCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.UnitMeasure", + "toColumn": "UnitMeasureCode", + "constraintName": "FK_ProductVendor_UnitMeasure_UnitMeasureCode" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Purchasing.PurchaseOrderDetail", + "kind": "table", + "comment": "Individual products associated with a specific purchase order. See PurchaseOrderHeader.", + "estimatedRows": 8845, + "columns": [ + { + "name": "PurchaseOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "PurchaseOrderDetailID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "DueDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date the product is expected to be received." + }, + { + "name": "OrderQty", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity ordered." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Product identification number. Foreign key to Product.ProductID." + }, + { + "name": "UnitPrice", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Vendor's selling price of a single product." + }, + { + "name": "LineTotal", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Per product subtotal. Computed as OrderQty * UnitPrice." + }, + { + "name": "ReceivedQty", + "nativeType": "decimal", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity actually received from the vendor." + }, + { + "name": "RejectedQty", + "nativeType": "decimal", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity rejected during inspection." + }, + { + "name": "StockedQty", + "nativeType": "decimal", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity accepted into inventory. Computed as ReceivedQty - RejectedQty." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_PurchaseOrderDetail_Product_ProductID" + }, + { + "fromColumn": "PurchaseOrderID", + "toCatalog": null, + "toDb": "main", + "toTable": "Purchasing.PurchaseOrderHeader", + "toColumn": "PurchaseOrderID", + "constraintName": "FK_PurchaseOrderDetail_PurchaseOrderHeader_PurchaseOrderID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Purchasing.PurchaseOrderHeader", + "kind": "table", + "comment": "General purchase order information. See PurchaseOrderDetail.", + "estimatedRows": 4012, + "columns": [ + { + "name": "PurchaseOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "RevisionNumber", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "Status", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "EmployeeID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Employee who created the purchase order. Foreign key to Employee.BusinessEntityID." + }, + { + "name": "VendorID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Vendor with whom the purchase order is placed. Foreign key to Vendor.BusinessEntityID." + }, + { + "name": "ShipMethodID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Shipping method. Foreign key to ShipMethod.ShipMethodID." + }, + { + "name": "OrderDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Purchase order creation date." + }, + { + "name": "ShipDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Estimated shipment date from the vendor." + }, + { + "name": "SubTotal", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Purchase order subtotal. Computed as SUM(PurchaseOrderDetail.LineTotal)for the appropriate PurchaseOrderID." + }, + { + "name": "TaxAmt", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Tax amount." + }, + { + "name": "Freight", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Shipping cost." + }, + { + "name": "TotalDue", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Total due to vendor. Computed as Subtotal + TaxAmt + Freight." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "EmployeeID", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Employee", + "toColumn": "BusinessEntityID", + "constraintName": "FK_PurchaseOrderHeader_Employee_EmployeeID" + }, + { + "fromColumn": "ShipMethodID", + "toCatalog": null, + "toDb": "main", + "toTable": "Purchasing.ShipMethod", + "toColumn": "ShipMethodID", + "constraintName": "FK_PurchaseOrderHeader_ShipMethod_ShipMethodID" + }, + { + "fromColumn": "VendorID", + "toCatalog": null, + "toDb": "main", + "toTable": "Purchasing.Vendor", + "toColumn": "BusinessEntityID", + "constraintName": "FK_PurchaseOrderHeader_Vendor_VendorID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Purchasing.ShipMethod", + "kind": "table", + "comment": "Shipping company lookup table.", + "estimatedRows": 5, + "columns": [ + { + "name": "ShipMethodID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ShipBase", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "ShipRate", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Shipping charge per pound." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Purchasing.Vendor", + "kind": "table", + "comment": "Companies from whom Adventure Works Cycles purchases parts or other goods.", + "estimatedRows": 104, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "AccountNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Company name." + }, + { + "name": "CreditRating", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "1 = Superior, 2 = Excellent, 3 = Above average, 4 = Average, 5 = Below average" + }, + { + "name": "PreferredVendorStatus", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "0 = Do not use if another vendor is available. 1 = Preferred over other vendors supplying the same product." + }, + { + "name": "ActiveFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "0 = Vendor no longer used. 1 = Vendor is actively used." + }, + { + "name": "PurchasingWebServiceURL", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Vendor URL." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.BusinessEntity", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Vendor_BusinessEntity_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.CountryRegionCurrency", + "kind": "table", + "comment": "Cross-reference table mapping ISO currency codes to a country or region.", + "estimatedRows": 109, + "columns": [ + { + "name": "CountryRegionCode", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "CurrencyCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "CountryRegionCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.CountryRegion", + "toColumn": "CountryRegionCode", + "constraintName": "FK_CountryRegionCurrency_CountryRegion_CountryRegionCode" + }, + { + "fromColumn": "CurrencyCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.Currency", + "toColumn": "CurrencyCode", + "constraintName": "FK_CountryRegionCurrency_Currency_CurrencyCode" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.CreditCard", + "kind": "table", + "comment": "Customer credit card information.", + "estimatedRows": 19118, + "columns": [ + { + "name": "CreditCardID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "CardType", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "CardNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Credit card number." + }, + { + "name": "ExpMonth", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Credit card expiration month." + }, + { + "name": "ExpYear", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Credit card expiration year." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.Currency", + "kind": "table", + "comment": "Lookup table containing standard ISO currencies.", + "estimatedRows": 105, + "columns": [ + { + "name": "CurrencyCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.CurrencyRate", + "kind": "table", + "comment": "Currency exchange rates.", + "estimatedRows": 13532, + "columns": [ + { + "name": "CurrencyRateID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "CurrencyRateDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "FromCurrencyCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Exchange rate was converted from this currency code." + }, + { + "name": "ToCurrencyCode", + "nativeType": "nchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Exchange rate was converted to this currency code." + }, + { + "name": "AverageRate", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Average exchange rate for the day." + }, + { + "name": "EndOfDayRate", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Final exchange rate for the day." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "FromCurrencyCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.Currency", + "toColumn": "CurrencyCode", + "constraintName": "FK_CurrencyRate_Currency_FromCurrencyCode" + }, + { + "fromColumn": "ToCurrencyCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.Currency", + "toColumn": "CurrencyCode", + "constraintName": "FK_CurrencyRate_Currency_ToCurrencyCode" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.Customer", + "kind": "table", + "comment": "Current customer information. Also see the Person and Store tables.", + "estimatedRows": 19820, + "columns": [ + { + "name": "CustomerID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "PersonID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "StoreID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "TerritoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "AccountNumber", + "nativeType": "varchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique number identifying the customer assigned by the accounting system." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "PersonID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Person", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Customer_Person_PersonID" + }, + { + "fromColumn": "StoreID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.Store", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Customer_Store_StoreID" + }, + { + "fromColumn": "TerritoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesTerritory", + "toColumn": "TerritoryID", + "constraintName": "FK_Customer_SalesTerritory_TerritoryID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.PersonCreditCard", + "kind": "table", + "comment": "Cross-reference table mapping people to their credit card information in the CreditCard table. ", + "estimatedRows": 19118, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "CreditCardID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Credit card identification number. Foreign key to CreditCard.CreditCardID." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Person", + "toColumn": "BusinessEntityID", + "constraintName": "FK_PersonCreditCard_Person_BusinessEntityID" + }, + { + "fromColumn": "CreditCardID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.CreditCard", + "toColumn": "CreditCardID", + "constraintName": "FK_PersonCreditCard_CreditCard_CreditCardID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesOrderDetail", + "kind": "table", + "comment": "Individual products associated with a specific sales order. See SalesOrderHeader.", + "estimatedRows": 121317, + "columns": [ + { + "name": "SalesOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "SalesOrderDetailID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "CarrierTrackingNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "OrderQty", + "nativeType": "smallint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Quantity ordered per product." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Product sold to customer. Foreign key to Product.ProductID." + }, + { + "name": "SpecialOfferID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Promotional code. Foreign key to SpecialOffer.SpecialOfferID." + }, + { + "name": "UnitPrice", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Selling price of a single product." + }, + { + "name": "UnitPriceDiscount", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Discount amount." + }, + { + "name": "LineTotal", + "nativeType": "numeric", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Per product subtotal. Computed as UnitPrice * (1 - UnitPriceDiscount) * OrderQty." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SpecialOfferProduct", + "toColumn": "ProductID", + "constraintName": "FK_SalesOrderDetail_SpecialOfferProduct_SpecialOfferIDProductID" + }, + { + "fromColumn": "SalesOrderID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesOrderHeader", + "toColumn": "SalesOrderID", + "constraintName": "FK_SalesOrderDetail_SalesOrderHeader_SalesOrderID" + }, + { + "fromColumn": "SpecialOfferID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SpecialOfferProduct", + "toColumn": "SpecialOfferID", + "constraintName": "FK_SalesOrderDetail_SpecialOfferProduct_SpecialOfferIDProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesOrderHeader", + "kind": "table", + "comment": "General sales order information.", + "estimatedRows": 31465, + "columns": [ + { + "name": "SalesOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "RevisionNumber", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "OrderDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "DueDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ShipDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "Status", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Order current status. 1 = In process; 2 = Approved; 3 = Backordered; 4 = Rejected; 5 = Shipped; 6 = Cancelled" + }, + { + "name": "OnlineOrderFlag", + "nativeType": "bit", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "0 = Order placed by sales person. 1 = Order placed online by customer." + }, + { + "name": "SalesOrderNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique sales order identification number." + }, + { + "name": "PurchaseOrderNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Customer purchase order number reference. " + }, + { + "name": "AccountNumber", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Financial accounting number reference." + }, + { + "name": "CustomerID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Customer identification number. Foreign key to Customer.BusinessEntityID." + }, + { + "name": "SalesPersonID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Sales person who created the sales order. Foreign key to SalesPerson.BusinessEntityID." + }, + { + "name": "TerritoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Territory in which the sale was made. Foreign key to SalesTerritory.SalesTerritoryID." + }, + { + "name": "BillToAddressID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Customer billing address. Foreign key to Address.AddressID." + }, + { + "name": "ShipToAddressID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Customer shipping address. Foreign key to Address.AddressID." + }, + { + "name": "ShipMethodID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Shipping method. Foreign key to ShipMethod.ShipMethodID." + }, + { + "name": "CreditCardID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Credit card identification number. Foreign key to CreditCard.CreditCardID." + }, + { + "name": "CreditCardApprovalCode", + "nativeType": "varchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Approval code provided by the credit card company." + }, + { + "name": "CurrencyRateID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Currency exchange rate used. Foreign key to CurrencyRate.CurrencyRateID." + }, + { + "name": "SubTotal", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Sales subtotal. Computed as SUM(SalesOrderDetail.LineTotal)for the appropriate SalesOrderID." + }, + { + "name": "TaxAmt", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Tax amount." + }, + { + "name": "Freight", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Shipping cost." + }, + { + "name": "TotalDue", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Total due from customer. Computed as Subtotal + TaxAmt + Freight." + }, + { + "name": "Comment", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Sales representative comments." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BillToAddressID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Address", + "toColumn": "AddressID", + "constraintName": "FK_SalesOrderHeader_Address_BillToAddressID" + }, + { + "fromColumn": "CreditCardID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.CreditCard", + "toColumn": "CreditCardID", + "constraintName": "FK_SalesOrderHeader_CreditCard_CreditCardID" + }, + { + "fromColumn": "CurrencyRateID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.CurrencyRate", + "toColumn": "CurrencyRateID", + "constraintName": "FK_SalesOrderHeader_CurrencyRate_CurrencyRateID" + }, + { + "fromColumn": "CustomerID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.Customer", + "toColumn": "CustomerID", + "constraintName": "FK_SalesOrderHeader_Customer_CustomerID" + }, + { + "fromColumn": "SalesPersonID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesPerson", + "toColumn": "BusinessEntityID", + "constraintName": "FK_SalesOrderHeader_SalesPerson_SalesPersonID" + }, + { + "fromColumn": "ShipMethodID", + "toCatalog": null, + "toDb": "main", + "toTable": "Purchasing.ShipMethod", + "toColumn": "ShipMethodID", + "constraintName": "FK_SalesOrderHeader_ShipMethod_ShipMethodID" + }, + { + "fromColumn": "ShipToAddressID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.Address", + "toColumn": "AddressID", + "constraintName": "FK_SalesOrderHeader_Address_ShipToAddressID" + }, + { + "fromColumn": "TerritoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesTerritory", + "toColumn": "TerritoryID", + "constraintName": "FK_SalesOrderHeader_SalesTerritory_TerritoryID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesOrderHeaderSalesReason", + "kind": "table", + "comment": "Cross-reference table mapping sales orders to sales reason codes.", + "estimatedRows": 27647, + "columns": [ + { + "name": "SalesOrderID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "SalesReasonID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Primary key. Foreign key to SalesReason.SalesReasonID." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "SalesOrderID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesOrderHeader", + "toColumn": "SalesOrderID", + "constraintName": "FK_SalesOrderHeaderSalesReason_SalesOrderHeader_SalesOrderID" + }, + { + "fromColumn": "SalesReasonID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesReason", + "toColumn": "SalesReasonID", + "constraintName": "FK_SalesOrderHeaderSalesReason_SalesReason_SalesReasonID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesPerson", + "kind": "table", + "comment": "Sales representative current information.", + "estimatedRows": 17, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "TerritoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "SalesQuota", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Projected yearly sales." + }, + { + "name": "Bonus", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Bonus due if quota is met." + }, + { + "name": "CommissionPct", + "nativeType": "smallmoney", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Commision percent received per sale." + }, + { + "name": "SalesYTD", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Sales total year to date." + }, + { + "name": "SalesLastYear", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Sales total of previous year." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "HumanResources.Employee", + "toColumn": "BusinessEntityID", + "constraintName": "FK_SalesPerson_Employee_BusinessEntityID" + }, + { + "fromColumn": "TerritoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesTerritory", + "toColumn": "TerritoryID", + "constraintName": "FK_SalesPerson_SalesTerritory_TerritoryID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesPersonQuotaHistory", + "kind": "table", + "comment": "Sales performance tracking.", + "estimatedRows": 163, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "QuotaDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": true, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "SalesQuota", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Sales quota amount." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesPerson", + "toColumn": "BusinessEntityID", + "constraintName": "FK_SalesPersonQuotaHistory_SalesPerson_BusinessEntityID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesReason", + "kind": "table", + "comment": "Lookup table of customer purchase reasons.", + "estimatedRows": 10, + "columns": [ + { + "name": "SalesReasonID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Sales reason description." + }, + { + "name": "ReasonType", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Category the sales reason belongs to." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesTaxRate", + "kind": "table", + "comment": "Tax rate lookup table.", + "estimatedRows": 29, + "columns": [ + { + "name": "SalesTaxRateID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "StateProvinceID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "TaxType", + "nativeType": "tinyint", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "TaxRate", + "nativeType": "smallmoney", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Tax rate amount." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Tax rate description." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "StateProvinceID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.StateProvince", + "toColumn": "StateProvinceID", + "constraintName": "FK_SalesTaxRate_StateProvince_StateProvinceID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesTerritory", + "kind": "table", + "comment": "Sales territory lookup table.", + "estimatedRows": 10, + "columns": [ + { + "name": "TerritoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index." + }, + { + "name": "CountryRegionCode", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "Group", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Geographic area to which the sales territory belong." + }, + { + "name": "SalesYTD", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Sales in the territory year to date." + }, + { + "name": "SalesLastYear", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Sales in the territory the previous year." + }, + { + "name": "CostYTD", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Business costs in the territory year to date." + }, + { + "name": "CostLastYear", + "nativeType": "money", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Business costs in the territory the previous year." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "CountryRegionCode", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.CountryRegion", + "toColumn": "CountryRegionCode", + "constraintName": "FK_SalesTerritory_CountryRegion_CountryRegionCode" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SalesTerritoryHistory", + "kind": "table", + "comment": "Sales representative transfers to other sales territories.", + "estimatedRows": 17, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "TerritoryID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "StartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": true, + "comment": "Primary key. Date the sales representive started work in the territory." + }, + { + "name": "EndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": "Date the sales representative left work in the territory." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesPerson", + "toColumn": "BusinessEntityID", + "constraintName": "FK_SalesTerritoryHistory_SalesPerson_BusinessEntityID" + }, + { + "fromColumn": "TerritoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesTerritory", + "toColumn": "TerritoryID", + "constraintName": "FK_SalesTerritoryHistory_SalesTerritory_TerritoryID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.ShoppingCartItem", + "kind": "table", + "comment": "Contains online customer orders until the order is submitted or cancelled.", + "estimatedRows": 3, + "columns": [ + { + "name": "ShoppingCartItemID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ShoppingCartID", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "Quantity", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Product quantity ordered." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Product ordered. Foreign key to Product.ProductID." + }, + { + "name": "DateCreated", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date the time the record was created." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_ShoppingCartItem_Product_ProductID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SpecialOffer", + "kind": "table", + "comment": "Sale discounts lookup table.", + "estimatedRows": 16, + "columns": [ + { + "name": "SpecialOfferID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Description", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "DiscountPct", + "nativeType": "smallmoney", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Discount precentage." + }, + { + "name": "Type", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Discount type category." + }, + { + "name": "Category", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Group the discount applies to such as Reseller or Customer." + }, + { + "name": "StartDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Discount start date." + }, + { + "name": "EndDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Discount end date." + }, + { + "name": "MinQty", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Minimum discount percent allowed." + }, + { + "name": "MaxQty", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Maximum discount percent allowed." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.SpecialOfferProduct", + "kind": "table", + "comment": "Cross-reference table mapping products to special offer discounts.", + "estimatedRows": 538, + "columns": [ + { + "name": "SpecialOfferID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "ProductID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Production.Product", + "toColumn": "ProductID", + "constraintName": "FK_SpecialOfferProduct_Product_ProductID" + }, + { + "fromColumn": "SpecialOfferID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SpecialOffer", + "toColumn": "SpecialOfferID", + "constraintName": "FK_SpecialOfferProduct_SpecialOffer_SpecialOfferID" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Sales.Store", + "kind": "table", + "comment": "Customers (resellers) of Adventure Works products.", + "estimatedRows": 701, + "columns": [ + { + "name": "BusinessEntityID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": "Clustered index created by a primary key constraint." + }, + { + "name": "Name", + "nativeType": "nvarchar", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "Unique nonclustered index. Used to support replication samples." + }, + { + "name": "SalesPersonID", + "nativeType": "int", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": "Nonclustered index." + }, + { + "name": "Demographics", + "nativeType": "xml", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": "Demographic informationg about the store such as the number of employees, annual sales and store type." + }, + { + "name": "rowguid", + "nativeType": "uniqueidentifier", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": "ROWGUIDCOL number uniquely identifying the record. Used to support a merge replication sample." + }, + { + "name": "ModifiedDate", + "nativeType": "datetime", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": "Date and time the record was last updated." + } + ], + "foreignKeys": [ + { + "fromColumn": "BusinessEntityID", + "toCatalog": null, + "toDb": "main", + "toTable": "Person.BusinessEntity", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Store_BusinessEntity_BusinessEntityID" + }, + { + "fromColumn": "SalesPersonID", + "toCatalog": null, + "toDb": "main", + "toTable": "Sales.SalesPerson", + "toColumn": "BusinessEntityID", + "constraintName": "FK_Store_SalesPerson_SalesPersonID" + } + ] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/expected-links.yaml new file mode 100644 index 00000000..de743ea2 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/expected-links.yaml @@ -0,0 +1,126 @@ +expectedPks: + - table: Address + columns: + - AddressID + - table: BuildVersion + columns: + - SystemInformationID + - table: Customer + columns: + - CustomerID + - table: CustomerAddress + columns: + - CustomerID + - AddressID + - table: ErrorLog + columns: + - ErrorLogID + - table: Product + columns: + - ProductID + - table: ProductCategory + columns: + - ProductCategoryID + - table: ProductDescription + columns: + - ProductDescriptionID + - table: ProductModel + columns: + - ProductModelID + - table: ProductModelProductDescription + columns: + - ProductModelID + - ProductDescriptionID + - Culture + - table: SalesOrderDetail + columns: + - SalesOrderID + - SalesOrderDetailID + - table: SalesOrderHeader + columns: + - SalesOrderID +expectedLinks: + - fromTable: CustomerAddress + fromColumns: + - AddressID + toTable: Address + toColumns: + - AddressID + relationship: many_to_one + - fromTable: CustomerAddress + fromColumns: + - CustomerID + toTable: Customer + toColumns: + - CustomerID + relationship: many_to_one + - fromTable: Product + fromColumns: + - ProductCategoryID + toTable: ProductCategory + toColumns: + - ProductCategoryID + relationship: many_to_one + - fromTable: Product + fromColumns: + - ProductModelID + toTable: ProductModel + toColumns: + - ProductModelID + relationship: many_to_one + - fromTable: ProductCategory + fromColumns: + - ParentProductCategoryID + toTable: ProductCategory + toColumns: + - ProductCategoryID + relationship: many_to_one + - fromTable: ProductModelProductDescription + fromColumns: + - ProductDescriptionID + toTable: ProductDescription + toColumns: + - ProductDescriptionID + relationship: many_to_one + - fromTable: ProductModelProductDescription + fromColumns: + - ProductModelID + toTable: ProductModel + toColumns: + - ProductModelID + relationship: many_to_one + - fromTable: SalesOrderDetail + fromColumns: + - ProductID + toTable: Product + toColumns: + - ProductID + relationship: many_to_one + - fromTable: SalesOrderDetail + fromColumns: + - SalesOrderID + toTable: SalesOrderHeader + toColumns: + - SalesOrderID + relationship: many_to_one + - fromTable: SalesOrderHeader + fromColumns: + - BillToAddressID + toTable: Address + toColumns: + - AddressID + relationship: many_to_one + - fromTable: SalesOrderHeader + fromColumns: + - CustomerID + toTable: Customer + toColumns: + - CustomerID + relationship: many_to_one + - fromTable: SalesOrderHeader + fromColumns: + - ShipToAddressID + toTable: Address + toColumns: + - AddressID + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/fixture.yaml new file mode 100644 index 00000000..f73a7842 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/fixture.yaml @@ -0,0 +1,14 @@ +id: adventureworkslt_with_declared_metadata +name: AdventureWorksLT (SQLite, declared metadata) +tier: row_bearing +origin: public +thresholdEligible: true +defaultModes: + - metadata_present + - declared_pks_and_declared_fks_removed + - declared_pks_removed + - declared_fks_removed + - profiling_disabled + - validation_disabled + - llm_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/snapshot.json new file mode 100644 index 00000000..b2e7d960 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/adventureworkslt_with_declared_metadata/snapshot.json @@ -0,0 +1,1224 @@ +{ + "connectionId": "adventureworkslt_with_declared_metadata", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "Address", + "kind": "table", + "comment": null, + "estimatedRows": 450, + "columns": [ + { + "name": "AddressID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "AddressLine1", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "AddressLine2", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "City", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "StateProvince", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "CountryRegion", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "PostalCode", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "BuildVersion", + "kind": "table", + "comment": null, + "estimatedRows": 1, + "columns": [ + { + "name": "SystemInformationID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "Database Version", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "VersionDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Customer", + "kind": "table", + "comment": null, + "estimatedRows": 847, + "columns": [ + { + "name": "CustomerID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "NameStyle", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Title", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "FirstName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "MiddleName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "LastName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Suffix", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "CompanyName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "SalesPerson", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "EmailAddress", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Phone", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "PasswordHash", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "PasswordSalt", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "CustomerAddress", + "kind": "table", + "comment": null, + "estimatedRows": 417, + "columns": [ + { + "name": "CustomerID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "AddressID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "AddressType", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "AddressID", + "toCatalog": null, + "toDb": "main", + "toTable": "Address", + "toColumn": "AddressID", + "constraintName": "CustomerAddress_AddressID_fkey" + }, + { + "fromColumn": "CustomerID", + "toCatalog": null, + "toDb": "main", + "toTable": "Customer", + "toColumn": "CustomerID", + "constraintName": "CustomerAddress_CustomerID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "ErrorLog", + "kind": "table", + "comment": null, + "estimatedRows": 0, + "columns": [ + { + "name": "ErrorLogID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "ErrorTime", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "UserName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ErrorNumber", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ErrorSeverity", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ErrorState", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ErrorProcedure", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ErrorLine", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ErrorMessage", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Product", + "kind": "table", + "comment": null, + "estimatedRows": 295, + "columns": [ + { + "name": "ProductID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "Name", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ProductNumber", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Color", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "StandardCost", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ListPrice", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Size", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Weight", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ProductCategoryID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ProductModelID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "SellStartDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "SellEndDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "DiscontinuedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ThumbNailPhoto", + "nativeType": "BLOB", + "normalizedType": "blob", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ThumbnailPhotoFileName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductCategoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "ProductCategory", + "toColumn": "ProductCategoryID", + "constraintName": "Product_ProductCategoryID_fkey" + }, + { + "fromColumn": "ProductModelID", + "toCatalog": null, + "toDb": "main", + "toTable": "ProductModel", + "toColumn": "ProductModelID", + "constraintName": "Product_ProductModelID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "ProductCategory", + "kind": "table", + "comment": null, + "estimatedRows": 41, + "columns": [ + { + "name": "ProductCategoryID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "ParentProductCategoryID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Name", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "ParentProductCategoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "ProductCategory", + "toColumn": "ProductCategoryID", + "constraintName": "ProductCategory_ParentProductCategoryID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "ProductDescription", + "kind": "table", + "comment": null, + "estimatedRows": 762, + "columns": [ + { + "name": "ProductDescriptionID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "Description", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "ProductModel", + "kind": "table", + "comment": null, + "estimatedRows": 128, + "columns": [ + { + "name": "ProductModelID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "Name", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "CatalogDescription", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "ProductModelProductDescription", + "kind": "table", + "comment": null, + "estimatedRows": 762, + "columns": [ + { + "name": "ProductModelID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "ProductDescriptionID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "Culture", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductModelID", + "toCatalog": null, + "toDb": "main", + "toTable": "ProductModel", + "toColumn": "ProductModelID", + "constraintName": "ProductModelProductDescription_ProductModelID_fkey" + }, + { + "fromColumn": "ProductDescriptionID", + "toCatalog": null, + "toDb": "main", + "toTable": "ProductDescription", + "toColumn": "ProductDescriptionID", + "constraintName": "ProductModelProductDescription_ProductDescriptionID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "SalesOrderDetail", + "kind": "table", + "comment": null, + "estimatedRows": 542, + "columns": [ + { + "name": "SalesOrderID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "SalesOrderDetailID", + "nativeType": "INTEGER IDENTITY (1, 1)", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "OrderQty", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ProductID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "UnitPrice", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "UnitPriceDiscount", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "LineTotal", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Product", + "toColumn": "ProductID", + "constraintName": "SalesOrderDetail_ProductID_fkey" + }, + { + "fromColumn": "SalesOrderID", + "toCatalog": null, + "toDb": "main", + "toTable": "SalesOrderHeader", + "toColumn": "SalesOrderID", + "constraintName": "SalesOrderDetail_SalesOrderID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "SalesOrderHeader", + "kind": "table", + "comment": null, + "estimatedRows": 32, + "columns": [ + { + "name": "SalesOrderID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "RevisionNumber", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "OrderDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "DueDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Status", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "OnlineOrderFlag", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "SalesOrderNumber", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "PurchaseOrderNumber", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "AccountNumber", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "CustomerID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipToAddressID", + "nativeType": "INT", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "BillToAddressID", + "nativeType": "INT", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipMethod", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "CreditCardApprovalCode", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "SubTotal", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "TaxAmt", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Freight", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "TotalDue", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Comment", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "rowguid", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ModifiedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "BillToAddressID", + "toCatalog": null, + "toDb": "main", + "toTable": "Address", + "toColumn": "AddressID", + "constraintName": "SalesOrderHeader_BillToAddressID_fkey" + }, + { + "fromColumn": "ShipToAddressID", + "toCatalog": null, + "toDb": "main", + "toTable": "Address", + "toColumn": "AddressID", + "constraintName": "SalesOrderHeader_ShipToAddressID_fkey" + }, + { + "fromColumn": "CustomerID", + "toCatalog": null, + "toDb": "main", + "toTable": "Customer", + "toColumn": "CustomerID", + "constraintName": "SalesOrderHeader_CustomerID_fkey" + } + ] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/analytical_warehouse_no_naming_convention/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/analytical_warehouse_no_naming_convention/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..09f6ac1f837771e1b019d36fa50ccdb3139ee54f GIT binary patch literal 20480 zcmeI(O=}ZD7zgluE9t>x3?&9y)F~8fK!li6Z`O1zmRM_EKm}Q*n@PKLU&79$&|`{U z!S7N00$x3Ms0Ys?UcBkWyR)07vd!XATKXRtb~7`~KF_avnI~!IMaz?%4x%_NWytu(x zoNAW2=>~#6-R|0(cKiG(wef4SP_5VXW9M&vF+b$th&z4e`A2EonGtI?ugpAMoqkOA zk$I^JZ!a7Tr&+0MpI;2#je>y3uE+c|x7;K1!|kj zwmCy)CcT^mbTz@w4Je%$ut2$8%ttVv3nz$@*Csnx8?Wf;-(W1x!^{FDL1aganINLot&x*8U!E!0SG_<0uX=z1Rwwb2tWV= zmrsD0R=HZ)+Sy`57OL)+4XeCV*|0ac`a(ZE;sIw%x>de+yVvQ3>YM&P?MNoMP(^~+ zbD5Z&|4%gXgM1<40*j`#uzc6v?y4VQ@OT(| zLY?K4A0ha>zi3#MO08?JKa-pVlVK)=C)Bb3magWOpR{dz5OXfLKX3$(5BcQ$f2xt6 zc=zqN}E<)(fUO z`CKn(uQTATlZC76ah1R(Y+@{_3jzWVfB*y_009U<00Izz00jPlz-5OOtMxkVW~tX~ z%P96`?6!kYx?Z~-wY)Tl!leJKwz?}!M{vh%Sc3P@;j<%L#oNQc=Nq;o*2FHiw;XQo zTh{#Rw0SW10s?sAzH zt1~mSE56nHsmO%=Qd#m><($TmpS4mqIm!CfGO&VtZ>|~F#CS69)C~av2tWV=5P$## zAOHafKmY;|fWQz7RI~ysO;hc0Z*#w9THEWUUSf>ER=Z<@*&)Sxk%p8~uO%oplrtLFd! literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/expected-links.yaml new file mode 100644 index 00000000..41efcf66 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/expected-links.yaml @@ -0,0 +1,11 @@ +expectedPks: + - table: order_lines + columns: [order_id, line_number] + - table: order_line_allocations + columns: [order_id, line_number, warehouse_code] +expectedLinks: + - fromTable: order_line_allocations + fromColumns: [order_id, line_number] + toTable: order_lines + toColumns: [order_id, line_number] + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/fixture.yaml new file mode 100644 index 00000000..d0fd925a --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/fixture.yaml @@ -0,0 +1,10 @@ +id: composite_keys_no_declared_constraints +name: Composite-key warehouse fixture with no declared constraints +tier: row_bearing +origin: synthetic +defaultModes: + - declared_pks_and_declared_fks_removed + - llm_disabled + - profiling_disabled + - validation_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/snapshot.json new file mode 100644 index 00000000..75bce26a --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/composite_keys_no_declared_constraints/snapshot.json @@ -0,0 +1,103 @@ +{ + "connectionId": "composite_keys_no_declared_constraints", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": null, + "name": "order_lines", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "order_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "line_number", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "product_sku", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "quantity", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": null, + "name": "order_line_allocations", + "kind": "table", + "comment": null, + "estimatedRows": 4, + "columns": [ + { + "name": "order_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "line_number", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "warehouse_code", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "allocated_quantity", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..dc092a21e9e0120036affa4362df94302df1b83f GIT binary patch literal 139264 zcmeFa378z!buT`>*3#WoEsds2qv`39Pz&``snK-L^vsAwh(#bF0RjOMt29W1G@uPF zAa;5k?AS5(a~u{g5aXC6*v7FPl@ghD-Q7BBAU z=~3bTPk{gL{88W^(awLs|AxrFMgLTLPI}{ODUd4KNRL@G&QGmQu1PG_hu}Yt|2gh~ z;~qHff#V)H?t$YTIPQV}D?Cu$qbSzkV5sgKp1O9=^vu4S_Rs8?KGgYN^{k86o^i?A z#h09M=Ek*)J3n1q&=<2mGdmZrzu=O!=dQhY@urK{pMS>1moMJ1_VN|3bZW3_sH*Knf$e$PPgcM14S$#>3! zyZ}BscsL;Low3vVcKXuxPN`lURxHa3)%%${*ji(MML7EGNB{k_r}s|H>e^H3BBnyI`>e z%l6D%H@)MQ9ebv?1tq^WCG5>}i8uF{-!eaFzRBEWo?#9e|6zRDc%RWQt~O3K7NmZX zdOG!R>bBID)JRHA{xJCu$v;gVPF|2)mW(96o%je0KK|#p2abE-xCf4V;J62ld*HYS zj(gy^2abE-|C49pukqkR~(cyxp{Vn+__o0>j)U~1nXDEIS&fLYSl-@%ku zL4PoP@W8>DL(@|SrgpTOm=m)MHRz!x_s;Ar?wvXaM>o@Z0eEo#p##%94lf>HGY)vT z32(-!!`ndT4a*lXO`c8VaehAEu>LuQRro2}hAmmhCiD#(^Cs-rw9I0Pe49r389Rn$ z<``Dy=kp93>SI&-9xHiMy3Sg_H2F4-@DqB5_40<5R`D~shMh2<&F6WnH0jOf*_5Bh zRCzX)Cip2`!_vHArEz}3j$w;UHlydUQpKCG<1B+|@@*R9=kpCq%`t40pVEzDl1=D& ztW@?U?AWv{!4&y6mG~JuhUs$*8{y~k3>(tel%B^%R(Vsp&Qh5s&!&+{enQVMg*R+u zf}gPy#W)Jo>=?E#+R zU6CH!Rgf$*O2>To*rz0gNOv!TmFPS5Xpo{=S1=dyTDXoIEA$3v#l zNmK1CTAExleROK?fj!eF!I}Kjo|&CfJNM4)TX89Z`_*{}u6W>yx!}Fid#{~7xZ)Cq z));#1nCQ&}J$g*^#ROeG7TUQ8(N^0MpJbQaZIYXqYBkN192bzhux$uK`y@r^3kcdL zDMFu5&^}24+S!O`OJkP!B)iaUk{g(6)#OQ51SHRE8^X{&NfCNILHi^{=ye3`lN6wx za}jN6%o3ku7rIUI9Hv?|c#>lRl55+BFtkrngg%>~eUc*dSp@Bq6ri0m5p8MA5}#xj zx=r#7rdmz$Bu51#*R%~`XrH7AeL6w=Bt_`e1nrX)pqJn8neVF*@bSCoM5We1W&RgAUWPPgrR+s zB6NkIeUc*d7(x3a1!!j!(U!(6@kw@}+a$|OwW{+ZM+78GZ9^E^Cn-XY5VTKHgkDL| zK1l)Eflpv0$t>|ncA*(brwHKn{i?>1TougGS;4>>1A81rV26M`iXyO0z#c;Z*d9i( z)i%Q;*adF$D=@{X%JJ)}8QNulv%GBu1AF{L;1dbh<0k?yBVdo80Bq+GY-!94k6#zK z&2K4FtSTJ8u5!WUcS73=2KM-gz)J|&<0k?yCSZ@B0BjE-*wUC89=|Sdn;-PdxIW_? zzphHb=I_P`!L0oaC)8~HIaJbqo^Ha}>-kzbVK*Hs_5{GbCz zD;U`0Cjvts?twjiA}}=M9@yh20Nc=)BR^(_$FB?A<_Enw@{4f%x=I6=AGGOc1p|Bh zL||yuJ+Q}51cr{?1AF`gU>kaO|5us_`A; z#-jY0$yfitP%)+U__Y)sW zyd!Z(VtWGa_$L(o$NHD`_v>%b59;UXww}^n*1oQNRC}9tt9H3|iZ-DBPW>nK3H3qs zR`qK2R5hpkPI*rGwDO2Dt6Zg=uH=-S_;c}3#UF{c;xqAeaVM_Fz8Cvc?A@_eY!E@ND4%#qd-#%UXZQ!6@}#4kf0ZYPHoJI{9$?f%fAvp|a8v50~9JmCSxvS+%(Go9fiE|sA(+~FBA{Ey%2 z4gZkdKjkhfHWFtf4?%?@kFbPa)QNBe=QeDcH*92{A)B;9d` zACc0k4z4_zTj}aa`+9E9cjTqXMJItA{gQ55xOclJo5&U2v8F%P4+)H2;f^)iV-KC` zj~#G8hHf!k`(}=M!tq76JJ4tk^n+2F7#Ma38t%aTm-qv71rVZF4Bw*Zdp&t&V7WUu z-lZ`k7xUbGD4=!#U^s;Ngk*;pap9=jyzk0s%#bdcrAMN3^R$+nhV zjgG7|HgE#S&?!mRysJOy2}c(#aR(;a14$&t1{S*m6YjtrZ}$i0hCql;N%*#1{;Vf2 z4?t@thj?oC`NJ%+`)=Jc=K=l!Har9j!se9_MJEB$tH92-LY|htRIb$Rg31i zW8>|yn-=(E2ht!zr^H>O&e`M%Cl;CRz*u{rABmC40mB^_a|do*;SbEEK!~o0`4*kI z+mnwkOST6uiMoURP>hfd&0NA&6ZO@sJKI;YNC!DOCEB)c_1&JVlGEI=5q~U+#`q%D z9UEznJ^Lzu?0^C?bV|fE@8tVD;nj$Z)Jb^wc-_L;FJ@Mn|+wDnI3k%H;Wf7M}mV-rQ$yGE3$> z(>ztCT&-=JThm*9TDFSKt!|9_u$3+w-L>hIO()K91ns||Hp zU8^ovHRUDcOUnC{Hz@~|OO|2_KE=!c?ji{28wB6@N(8~J_Y`N*dt4@c^e>muhymPeHEkHTLJzc2j8@c!^c z;jwUE=#|hvhyFJ7j?kTRte`spY?mIo4uSB-BXF(Yx|!*n*Z!F&t>kV4=~7=ayi9cXs(afun8Vf{t1KCkNDhH-iw&THu-Gr@?Y1cN0j#tOYJ%G|Nx1 z<*~sdAP0wZN_R!p!{cTk)We>Kcy`EjfnU&m)AO*X6iCx5< zV}l1k4t90qU>EV`=)isuL!f#)9wELgXZOvWnK-kQ+uJ?!hO+1Ek=&lPd_J*^cyw&= z29Seg9XVJ=96CC1J&3`w`5nuMJImP_5JIfxdp;rlEai50&rCczGPtX2W~G1nVC3qGJECRnf*_#(nR)(w$$|OBF-%3E(a;tWfHrHJ4Xh$f*kDX$iXh(n_$-# z5JRBMjz@?uC$o6z2|VJ@Oq@B9!&^>sWhinZUA+94zx4 z3YOv3C9ursSVr7AnZ=t*V430hg!pqJhgXy4%uGBwK8Tl*_?i8WwNhmeZy$j;<*#ku zqhJ_bHv+>_9m9w}C$o6F2nEUJ`_ll3wXcn*VfHdLRMVvX2!#g`*SAy6@+&Mmow{*a+jvVYF-mDDZ zwH&Z3(eViJ7GxBKSy(TRc6l2#G~awydA^OJn{|C@X{b&hXJwXuWjPd z$^hPf0mHP8VZ@(fS-kE7hH0K*#G#`(yxszabu{DJCN3=x;*A!5X5!J(0A69CGZ$a* ztR5M_8!I4GJ7y7Qj%D$J3YevOW)W|W=I}xan5B|P5r38k@eT@@)saJ_h&xLIc%+Va)BLTY55H=M&$bU>RQIm@_l+=;R>Y-QZ_lmh}vu7{u!uAddNKn|QP| zfEP5tuvo`1;?I#RUdRB$VxD2XL%}e-fdPhfG~?PPE}b02OBeji#G?}fc+Y~)e8Ned z)#C$ruL6Y8j#$e~=U5gm2XM0zXO1qy>i}fd%uUQ*cxpCq zC-(pI%m?ua+6mDAzheH}{E_*6^Iy$>Hot0q!Tg;03G-v-hs^hx?=>GbA51=x{G;Ty z2HHB|4hA4`;qop?SAcmc9v$Tzfhl4->KdRz5Qb4 z73GV{yOdj$^OdFXU&Wt{KO8?2-w@Bneja-|_Kw)0*x4})dil>qAB^sao*wOw{50~J z$Xg@3B4m71{mOF zA6~$UKD4Y7*5I82_!7mP+wi`Hr?})JZLIA>!xOVOKjTrHMdO8yg@jj`I5&@7Mj)>;@hN+mLB^|0+*-u$ zL(Z#AJX^qKM98a5ylZ1$BI8Azo5waKm>2OWdw)U7i@3Fj?aMKEk)2S$ZbrzfM0SIX zjg5p?iN*{2oIqYB8ZYd3WV}i=UfBM~d6j6qup1KdqVdAUNXCoC3;U#CUNm0VFG+dP zcwzH&3|=%|*h>j{(d=boTP5K|lUE+QtUz8gXR#L(WV~qFDq`y;=S4GD0b4OKFB&iG z%4ED~ys$|N=0)R$&6<=KiPv)I+%zyW7W8>kCR&G~htt5$Xz<5Tndn>!9i0Yd;x$n2 z)2K`$Z$oFNp_QT!S+ZC0#RE-SSrLE4lFhmtnm!>Tx?hK({gW`FyLBlvgMp0b9<`w- zlrkbwcc4R*Ga_MM4(+0t5seWvjxt6xM$kqEGomqqZc@gG?oS6gODQ9|L6<|DDP%|6-W+eyuXDK6^mzF~}Eo4M<(=c?_GDc+CFNHQc zm=TQ;^x9HJG)B;O%NfxaK^HD&L}LUUxr7l-O-rFY4`f7Bk_|n&j1kRB4mRwv&m)?b zmSg`e`h=yqX&9S%@h9vEjS;r?L7%YD*e>~_E%5i^Kff2ci-$r_S$^zwI;PPwJ`ag$rqAeNxnb%j^rK5J;_UwW67n-MB*ojZzVpR`0K<2@Ld5j ziHi~=iNy&OP79vVKcK%;pVhC^FV$D;gL+bXN&B|;8SUNLTj1S*E41}mNn4_6>Wk{L z>L=BAsc%wmRxeXeS95Ag`48or%4gw@z#Ej?lq;3(m|{Gs^0@uTrA z@pI!V<3{YiVlTj5f%nJW7`r_-4d)A^v264=(H}=YAN^SL;b=8_Ai6a=8C@PV;m*Jd zk&AdeE6yGpNHQXzCC`gQ0nQr+3aA*~`9e_NM8bS+j~~ioJcI zc`mX)SH*+GzR{j}yWxP~<|+6d)qT6R!NcSoM-Cm{zjyjzriy2Y-MV_sDjp&dUFlpE z&kd!z@a(W#mu^<^*pTQ_a#cJkl;|?6cvc8Q(k_+kP!*2{`$j`)0hRu~DxMMc5B8*a zznE1#9VEJXhpKoUDAASftKx}ZkS;t5B)asWDxUvIbSZsRJOk|3HB`m3KNx8W7-ch6 z$R_AflV{X#R3USK89k=!o}nsa2$`7aZ`(bm^IDPoOTvxQkbn zxwDNkc>A;F9Sj#TyoGu@`~cR2A#g?Jwm?O_=B>POHg!*mcZPY8SJXRnqg+wC??AAk{k$T5XrEk>(ziEI(a@gu zxTJtdHgf|T7fSL>`i<-1=ddc1omDv_SJa!?9jGX6?3$zKI=Lb}GaaZ%F?RBbGCSJ1 z1aEoPyq4iY32#A8!4Dw(34sqX+XEH#n%j6qX?B4`rs(QGMY?$vuSm(^*|S8Ec}07G z?k&j6*?^EPpwQok=g5$kblw@}7G6>B&}DK(>Ap*Y6- zFv(^%v2h`dXVPz62tS7fsdX0Q1#(5bnezh`rHzeq6m5_z(lh4;DpHK~yrRsyHcsU& z&zk2lTuAj6yI2)`=uL1t~BqF(cCUQs%C)-e>}StWet%&|x}@mLZ*`8pOUIXrlj zSY+aHBMeZy1&L>h@Oh^QDD?N?aUv{8g?EOD2Zuya?+_jhN))C0P7YSIidUo$P0AH1 zeG`F-hVTpz42TPuWHS{uE)?gP^c!RFb6Ak^&Vn43E9%XZ0~Mu>(i}x2az%P(WuPL( zIEh!3DYkJj-tw%ug5g3jZ$Ucn0|1_hNvQa z2#$bcij+P$_z6%n1V=qME+Sx(&0rG&yE?)%={K;KfCU-pEJ$o2B#L@7*im#VN*mZn zP(|23NEGQAY$v)EDF${CL{SEt2f&4S%d;l75D=}q6C5O#|M3Dilg9ZaS>k%&tXy7vef&K;1FlewGc~2P7BoHmV zIXqPmYe_GFRv|zOvnL z8{pdm`?VhRhw7KrkEm}~>+1FDMe3wFq{fw>D&J84PWdzCb;LdVc(o#ai=l-;&<+69=Yd+&crK#gTD~_2wusU)}>3 z3))ZgK~k|;kIp0oJSy-cV^$Tn6-tcloVHPmb>}Ea-N$JwTdYe*N%9#^TghVWIY{Qu z<0X?8YtxxT<>r~NSbL6=^crp+;DZf1O3b6&JirIH8cK}EIc>lPHycV)FLK&|5AHf9 zS2cIKCly+B>5<(4fwzwWRk@5oHpQt4LVqmPVaXUS%+Lf z@@`JmsD&*%jz~PgsVZC8x}!jUnNwA=uw@5Ybg zj)K&E+#D5ak1I$%!>JmxZs-V9PSvP&y(>tp;Z&8a8CRem~bY z&do7lUFQl?FLHB?TiCJ#k5pq{#~=E$rV>U`%pzOjy^rg46+Sj&TbccpQ;@l$)bsVF!A9c&>!Mdjat~o;|T3pPF2~$MxF`MoT`$A4Lk^RgBMI% z*ukS9wT_!(!opS_1pW)`1u+HfSRBn!O3mbaip(WOEb5yLeU4ed-Q#EFt zbmWV~+ymQBGCK!Ui7%N|F~$ zTG-g5An`mm$ApEgJrgYC<`}oIrAL9jiJPNhVNZ_&?H*3mm^JDO)K7A%MlEdWafI?J zr>bmWSI-0^oT`$A?L7$MNnS8%VOx)a#BOem2@Csr6zC6ebBtTq;G;l$mYbtut>_5S zoT@R)aRur+PSvPoy8`8IPF2|&b_MY#I8`O9&=$lJykOE=?h5pmxj81R6FY(eH^;cO z%oS+cxj8CU-W90#ajM3wrLI7EhEp|aozM}eoT{?5#1+KXaH>icHvHhkXo43^TG;5L zKtIaOF=1h=j{@y+ZjNyaTYeO%FLHBKENu0eU@@m^%)(|L1>05?a)!bTqj%A=gB zG0Sa9BJt-rRihTR{5T@MkW*E*u;)iXY!j!dWMS_Qg0RjDCM|6GQJ~$!%`stN*N+1A zliVERmfMVkl~=hrDwf-egq0Ca)tKeBBH{RMPSvR8b|T@}L!7EId{1Y4^60ais!|5} z5Dl!-_@7T^pbKF?PjfR&WS|GZpR4P*3C1(ff#A=TyE*-p4D=uP^Y{~-p0NycANcdw z%bcFk4D=rC=LJqrIRl*s{ye(9qX#30|-0-iNTj05W$oQu5xY0DO zGFHO3{{7F?hu}N^u1=NUHvoM1`2Bx!7v}i=|Ks=nN3J-2|6h7PncgKne*d4nW>mmZ zU3j0oM6QO}Si4K^lhc=%@>tFX-Y2K;EV18GAiGaqBKP--Sntd4$(P72z5;duLSFO| z-|_qZkT4$JdHweI{eSVQJAVJ)eMkHMq5Jm_g4aWw{3tqw`FBs>m zcxVj8VmCSZ@|IJ@BV-vl`u88%Ji%$@e z2>xn5?rn6smmPKsN=A-u=OViqB_~IBbb;NM5|X34+GZE1WaP-w$g?|D!Q{w_aM%Sa zDLJwzitN(WG01U|!v`Q@a$Mx_k%)vG-PCz@T`Q0r-OCQUyCow>w{wx*=aQ47JG#Jb zc?rqUU2U^#UovuBXJ9M9H!V`TCRM?Z+fW%ap=x2p8rOvNdeHtR{&WO=5iAWDO=bnW_9o;-^O=qJ;3T|gHp?GIY!EM+!<<8ilJKHgtauQ?a$aON z6mCSgC|>l$+CG3tN$MxtA-^B7fxJlia`qug#*4;lFM_4KXbLOrLAZz)&0_Wqh!pdp zIV^uYVuN_m%;n4=O2UiguHtS4%X!gw?LxSa7u{diAyUGN#%mg}fxJk(iaXn;O1^N7 zZrZ|*wyD8ixJEaweQn!R(HE}KFR#f@wN35*!Zo^Go$ZK{eUTjb+M41v1k1ljj`V_s zYY;B{B016s+E*h|@j(y$_J2+XQdTl4KIK|G~)w;@|Wh+VvQ9=D=YCu!t$49*Fud|M$0PkTu}K- zbE{p0>mwM@_@JQtB|9}=gF7Vxa%7u2HMn9TCr37Fu?81UaIMP7ku6)O!G#oDtO9Z* z!P+%e7m3Ny^qQ~130E*Vnp>S3D~lwNquI4s>p3Plnr91DRvL-O(JX6MS!E<4MWoB#j)c5bdg4AI18E*BVW=`KntW#TL%pNHotnRkuQlS%Qs3 zGi|ZzmPoOTcq5Uo3M*9I8Y$KnxRFT4wX1HCBqB#QbiV3VN!{e=K6a{ZnIt4fw{o%S z)=4sQWK9&RZlNR~M|QMbbt@$yIkGJBRku_MAV>DGQ*~=4F*&l8i&eK+l8~djxlnbh zrC@S&N844mToRF^8#-Tg>!ogTbRRoaw_p;Iqg%OHbt@(rIWBT;$s{1hMb536gygu$ zxkXa|IWBUnYH}(TvGDyy92tTCsW|qNU_$Rx4C5K)i?? z&1`n{e1wY0(ZrUoZbWb(Ig-zu>IQ_#$Z?T757BaR+@4#Hcp*6wxqNk9+unf3TtuT& zJ-2PI@EIO0QHs@b+V;wx;gJ8~hB9?q|DtHbTYE@aT5VSIp_p@C+9ib9(WKra+s}USXj*HxB2$PW`%b{33 z7145XT;xtcyoem#&vx}>go?@0?VPW!LU0f{x{ICaB*G-*=teG9ClDli*zS}RF_$TQ9Yl$t1<@&Fo|9_8utG-cRroF0tNqdL3 zUt6aQs4v4e_q|2EL0zr(D&JQ=qWrNk1;5cR9)C9e!FVHnd3-qbo7gvE?}az?H^y?& z|AcSldslRS^o-~t_~!plN8S?I896PI4*wwhk??GIdw4XghMo<5Fw_WL9vbfXP0u$1 z+kf|m`5LQ`1}kcQZu3Gz;M7=Yk)W#hP0h$;gqUTBy0Dk$@ao6n4!mjfCV#I?dPI(x{spO{GrF zEsccaXf7?*+|o!wjwaPY%`J_B$*WA)bM2_y}e9bM50?2WZb4w#JIWBT;X(S`Z zMb0ga1mw8LvC?R`A|gk7zI=@pNQ0GbavWO9PzDFW0jGN9C2^4#_A$DIkGnjHC7yn$kEJZ*H~#J zCPx!nzQziqAaXRHIW<-pNyyQZR;;nQNJ@@owL*;*M?!LBf7mrv8cE1;kz)l?AUQ5_ ztTK|3<08lEA~`uOa;!KKk)xT-u8pwc)n0;%7kQD7siHAX<9ZPB(I)wjA-ICkb>j+qDxBDt?kc_j10r z48cK966h9oYI%f7o+Qv#uUK1(XgN8ukqfmG5HBQ0mV{kff=~%LvLf=e#Rv{0N4Bt2 z8$y_b9NopmS`N`ta&#jXYJ-Ruk)xa0u4NG_CP(*jzBYj1AaZmIJGDg!laV94xLE5) zw459lxrK-qlH($0Ayh(+i(Cf5LF8y*;?(*OCLu@blVWWFqNU_$g;J>XB3?v}7AJOX zK0?LhXnB&a%|mb?IpSNVmPVM29C2^4W+GZnj(E6GGY~H%NA`zZOCeNDj+Q6w{{OT8 z`~ClUdKRAlKL!2&UhQ;v{{Iu`|L;|=RV%8ld`o#uX)0TlV*GdUufTWp-4@>zUmW{I z>2nLn|v1ax|9~>Z~SORtY9Y6RBNi713}-M2=?Be4W)rgOzS_G=n;IRuDNAAvu~t zi*;5G$;lB97wW7g5|Sfcw(G1Sl8_@#&evI86iANj3a8ErA_+N~LW^})4oS&zwPBsr zL?UurZCGa&k(eA;8`fD}6hw~ZGpEi9A{jZdF^Y9o4#~-Jkz+NHkQ^5|RuM_aagk$n zQ6M=ka;zYdkfSNBSZC#slpM`!g*vN=MC52@v+JxP5|g8eEnjDKQ4l$r&zw3dh-Bor z$gy%rPL7KltBHi`0Tn{ z7m1G9>88%tlUPIr9kbKD?9>wolRQJ9+qqcR5iNa&KzDSZt|4CZ41w-yyRITs{0xEC zDfzmB;6QR@ML6|1!er#gqA1p5h?bKh>!MJPB3?+2>}tCnL8zD<-PHMd7{Nj0=w5c} zA%scD(d}HU_aItIj_&9}?JmTN$kAPG*X~59gdEw_`Pv-_4kSk-SGyfyGIBI>wc8La zCr2Y!yA|;wa0vN1U9m9YSy*IpSNVb`W7Qa%5u^Yd0cVN{(i=LhS(JMdWB^vupbiDkeu0 zTfVjr!9nC`K67e&5hfu=Q(CdM2hnnJWN#E|Hy~a}j*Hy&2$hiIA~%EJKyqBNCnt5d|NoK1J&CImMY#X}to~Q}-TEfD|Nk59%kVq= z4#EBZocizTKdNto`~PRC3zQ!zAA|e&TE+t@c_kH+qZogW*D zz8d{P^db0-d}l@nB0r0KCh|Yv{{QO8yzmR*zk&PzQ{l0&7Wx;s|KA9039ab)t?2&0 z({L-K#F*eDfIgxxHrxs+F)BO>pvT~ahFc*eCW1}^=u>*T;Z{hA@!*pHdNVuUU=`9} zB~h7s5TY$Uj|lQR zZTO0Hw>r{S3iCVN&4s#K9qA_p<#)QH?YdhXiOA6nov*vqQ8zidkDa<(9SOpxLqr16KcdMgd za&$-Ab+esah4^I^0XZ&m z+Yvfk5t8E~w++F8TBH=~S0h?Vj+QBf`c;S*k)sugUB42cVsf-b$=9zy za1c3KojCQ&5hf!?+*_<~MYNn8@o=HO1@S_1#LIU5GK5OVk?oPMUy9%$ax|Yg^-B;Y zAxBeMvA!A6QgSq_73vowUPO*&HoJZiLdE20V$0VzAvlm6*%eOxLWIf4agnkvj|VLUJT>`TCh{djlSI5sgm$jJCbvM_oi| zvA(8luk1M1}(1pX1T}Y}Z#KRDA4Cw{yOJ8iIqKDbQW)-T$Zk|5uDhjKlE$ z|4`}|sVCt#{mnrC-aNFofs(eM3wUEd7%|Nl$- zs`lsFO>qBziTW$`Y4v`%|9`e>DK9A>hx`A#l#>-R{v6!@e?9!BzH&Sa_y6A)yE}Gi zY#H4DeXp~0#nAvv-!?FOrjB;?2*$~Rbr6iAM29;d-d zBN;ieb&3sE7)i;|tXgQW>PSS6rc}GZY9lc@nosi$Rv`tEqq)>+u+m6EjwaJ$gB3<{ za$MwCbtELmMUK@*5^`MRScMcwjzq55a7&})slr`E8@@urEsc_=1nnaFh|g}grBU+a z;9W#}zI?+ijgqUnb`iPH=``HZC^;!e5j2+;8*XVNAxF1zq2ZQB!Q|+6wi|9~BqB$5 zbH3r0Mgio=Qg9k>X(T2`RztDjmPRsiWI+@fZfPVSM;3+Ma7!a0IkKDc4YxGvCPz21 z({M{8AvwB_iw(Cll8~cYxzKP+qhNA$JKGJnG!l`cyE)%*OQQgCT;$x+NKB54oLd^n z$Z?T#OCtd}E^@3i8m@@Q(Vj2gU!<*7#J5i46okph5%(4wCnH);j_i#>V-?~>TLsFC<6yhus)OsDvCBxiW$S z$#Ib@AxuV&i`)pJ<>a`?twg+t9L;QY<0OQN$Q7x3(0Yj8%C&v92dC)f&@qdE*|5f<*|KZrL;r{<4v76!j z|18}9e=_<2y#IeX-2eY^X`~M^1Sm>Lf_lE9<@Bb_GyxQ}5 zdjG%uZ>PzsA*W)l7nJnW?Zqamg;rL~b;5py`o7R)1<|sKd2Ud@LOpLcSvfRZF*gMF zE7bq_CM${tE9QA~`xSas!D+H;NJfrqm|~OFLUM9s*A$wpAQFihR>8hXTlvwcs?}a!5>$EQey# zEr(>}NG>fj-Et_H98IKl(=CTYptZAaW`~ahjwG(yGXXkyDZO#}x;j^;C`X&_8Sj*DCh(QA^-|6an<%h~g;kW-?t5g&{{w?_J zf6e&Tcro_7*jM2G|822NvBl9}!2SPsMh`{LiuS?%|4&8U7P$_-|9@Wi-@+dYzYf0t ze=Mwoz7u*3zW;wq$nN=l&sVzpfA*7N%dL&l8-<-D&8LNyTN|Y}1$L6;w=~!-w>C;& zDCi`~mki`vZf%slU~ZdCv#8T@Yoqk}f;O3^(PGQ3jb!A=dMUKr+DJf-tQNcF)?*ax{$=TW)P6AxHCRq2<;_!Q^N%wOej&BqB#MX};ywM&0CS z7Ij)~Z6qc~(rB^e)T2=`rN0X`DVpY*_MMREf(tL~6MuFsrZ=Dt^ie%)7dy6er63NLC z4;NajE)tR>`@?Rrsz^+ZHhuXPtBr!l(R}7KSy3b*$Jd9fB$ATj>qAx-iOA8+W;ajHCag{Cr2aKWOb2{9F1I)RYhWQG_mEItTqZFNAsD} zWJQsL98GD(CM$`g~8>?!od&a^Ox|y<+nQL`x6c>25AGuSdM-z@6@ByE%hU z@zVslq4Uk%2o8FhK=-lJ+=Va+Il7gL&Fc^?Cr8#qp*f9sAvv-x?B-5{O30CAk#Fum za3DFd8l2{}2$PW`Te;YrLbQ||-OYvOcEpRw(H(6!w;@zaj&A6D^BM#Pk)!+AX9_8IpW@8^I}BH$q^41ninBnNRD{fZf-)T zgdEu(`R0WP4kAbMnbW)gVG?pQr4^g!BU(z1X0<|dBjQEmXlApU8xSfcM-yAVc^-lT z$&p>*G}j|cMvjZzIz-FKagjS0@j`N3|UzvBBo6Tj$ zf5H3ze{S4ltTUFt`~Od;?oZv2Ivd{qe+lmY|8a5`y#H^){r?XpUJvj8m*M{ZH}vX-= zHRuq4W>mZ77Dw~91Rny>hxYlFTO7^5Z0;cd{aOL1ng%PPU-XhyYLtTY;~h{(}&ns2cJX|U2wj;2zl#VRAGA|yw1X|cuXqLmd1Ihs@p zEmj-}$&r0&w^(T;AxAc4zQqcpKyt*lPK#AWGIC@`7F(oKpm63!T&85W_tBd61NK!4dSaBpI$3>2nMiO#dV?kQ^5|RvJmjagk#MQXn}Jxnj$$jOJY`OzyPbE419oXx=43$({CmcFV1d=4}p6 z?zH90x7^BT-onz4 zIvWcpry?Xr_i?dx7NS>HB;@E;F0{@>{IW_gIl7(g))@#Lu87Fd-JEZ&LGWNDfE*XO z(-9^k$3<>6qUGec$eo6GAvrE`ry^8Ljut5S)+q=MB1cOTr*$&IB;;s?Qf#e4w3HmJ zQ3|a|#EZz$^2BaUAXGw*I62=MM{poH;#;RxL70pjac{9ThG;oCvNsB?QN)YL(adJI z$_N#cqlqowDj_(C9L;A=YXo5uax|qCTPqPQB}cPbp>-1Ch2+TouvQ0z$>)XkyE^mLoWb9L;A=>qLY}$kCKmY%N2y zlpM`!g;pN%LULT>mLgO_j*Hw02o5C2MQ#bgWaLQX3a!O$V+GH*h+?}n)HYW5e2eJK zw{mS`10LRyh2XRX+r|nX-qBr*{r_b>&-9qzH2=c91HS)nnel7mi^jW*gT`8;KlM`T z6REePrcej%zfLxjmnT;wexLYS;ysC55*rgk`Y-g)>G$h<^wahE+7GmkX>ZWB zX=N>@{)_sUdY5{!x=eXh`MmOWWuLM}>5cy&{?YjB;#bCDBgVc4xBqX7t&a^ve;WNn z^v%(o(ebDn`Bvny$X$_(B1^-sgr5vQ5Z)7B9ZrY77y3wO7Jm1i)AKtpR`SQeVs?Y2 z&4;4wVCkf(9Xs|P*>|`!xn}z4)ZPPortzDdruNM2oZ7i}X5R{|WH194wke)l3C)D?6SK}5{p_OndV83 z3rJ#B3x+VXPf~=&;^&{*Vl&^}24+QAYRNHR-&l3nOFNi1@Kq`{LM6OhCz7Yt!&pQH$l z#cl`flN6z`*zKTwk^;1Yr7n;lOYPjXa1602P>grR+sA~Y7g9kfqU zgvO$`gZ4=Z&<>WoK$2PFlk7sbNn+6pB$GVJvVbI3yI9?;ulCJc#0%z#cylI77f5KLOb8L$IYWGdzA>;5NSnOtBi}_;u9>F2CNk6%6e06M^Rw zu*XjXo=3nQKLOZIBiPcI86LkbaGRgW6sr-AUsq}1@-y02FtEo@1WpmK$4>-K60pZl z0Jakdwlrpj$FB?A=BG2oYMA5KRTa4Ww6+xt?C}$URRZ?-iNFd0d;A1oJC0yWV`g~# zy1%uZ>*#EDI z9_cZ^W$No6BEmn$!qhF6c0tOuabKC>RJ@EgA2Ub+El7Ox$$7R&fuI{6q zQdd;5mVnlV!M%?RF&xW@4&2MDVmKBQ9k`cTMR2EzB?WX&%os1bcEeeET~WoF0+-(9=>X5Pt2gN| z#YzKgVQ`T zTNvDDDunkCxX)Aszl)%Kq5`yYC!(#kDL&6GbercLOtsq2)9h+iT$;DH4Pj`Xr3if+ zLHi^{=vxWeCn-QXw;G-ioUvJ2fNd5Ec2EuLgo+v1Wu*fxZreUc*djRftJ6rm3gv`H=&K0YBPl{VS0dWdm?a*`ZgiXE6->3to;&-GlDjGhce!5PHiV&lk|Ok0 zg7!&@&|3)FCn-R;@BhR1|8?&FFEw5 z`TsS^QZkNcP3;HAa&H6??tNl!SQhPw#ubr;VQ-7@fz4{jQT6IiK zDbFh(f$#s@qKqh!__Of+f49ar$4`v?2EPCA9qa}`4!-~IlhOO5`=YC(>Bx^FACJ5_ zG8Gw(B*V{zKOAm^FAJ{>heQ7i-~V?@=;F|_p4WoD|F1Z^5s1T`k_A(L*u~J<4TxV> zS+M<>7ei;yL+Eg2!OlP6V(9F81P@jg?0^NobKBH`nes?^yf{)W(u)_)>^g)wl?B)S zAs0hu&qcJ99KA?Ym^}yaB69Q=l|8!_p<;6M3RQmgYy=0ABbRWT*|QKPBS&uG6lc#w zw45BdHB*>91Mxy~V5SEhTB8Lxzk3o)$96l2klOvJK&*B4N zu(x;H+-*d!cRREABpB@Ny+*K&=ne1UEItVadwZ`I=LoVQ3bXhu80_u6Dkw+L{cO+T zlVAu&@0Gzhg1Zhmiw}YW$pEI#;^kfZm!^RxKiH;^2?-tEldBVQRgdc(Uoi_d(IL5_I1 zFpH0U#pH;W?OA;4D3K67UAd9REdO=-nhJf@YCqgkymi|4dLax}Bq zvv@`;Ax9Hieilz?1If{R=FH;Btdtzt7{ysUnLP$ME^>G@D<;Q94v%GJZY zE^>GnD3Lnk;B7R895R;XBLlM!D{iVtweQk7EfNmYT2u;M0;Tt&tAc5!F#)8 zZP>GT?h00m-rJ>{IzNl2t`L&$_jc)Cc3ODcDkDd?bFqcTt#Wd7M;BUn;wmIZceUNZ z<5n3tvNZB7JZ%joM^=Q>!edq`IU2bZ9ueG z=w5bOc)ThjN4GQf{||&V_n1F6KX3jm+yQ8qhs|ru)#gd2Vf@bcf$=%x!^Yc<+l?EH z&Bj@VW9X^ZQr}B`D)m7)7dV=lO07w)Of5|QXYz&QSCa2fz9V@D+!44WIhI_SOeB7i z_*UZ6iN8)f0PhOSBrbyA7O*&>>Oa(<(LbQSQ=iqZgKrI3tqDa%5BP(%6UpznXiQ6 zKZ-vce$|p_rO!(KM%h(e0%tc@VfBw za9`-Rp>Kzt4m}=vQ|OM+PHG|xr$=c@Pt-8b4ZZ}-fu-8WCo9G=;?Yuo;P+u$tW@czBi z2Q%!^X}7LklRYFQy3)BSe#T05;pb|%F5P6$IEgML$DU(KbeUEBc!VKomr8c1il2gg zqoK5bN`GG!Kl=Iyd(ymLO!nZB=;|G+;^&z}SGuo?pI||{@Z*Z;(ub<}nIzGr^s&c* z-MWUV_`w4sO#!29rivdm;8Bxj)NfSrg9X-v>AGjAiXR>lUA-ChsIFUA+Nk0u1E&k~ zzeJawsbbRa)}cZpBfQGy+in5 zNTMj+htGln6yZZ3qDUXY2Rjl)N*_MU=~gtfr#&tyV3N(;z{Z7=Jd=LodiXhnKiOH8 zGjc_}8GOzH(U_wsZS0z(=sLL~Ju@AsNHKQuiZb}j113!HmS@dt87`FY7UUHC0K%UT z_#m@AP*JbBjaQV;U2_aYR|hK6&8v7tO72RzBJ+y&0Nq=Vm$LyOT|l9~Zz}*G{5tOp za|^Gicjz*?qIBP-!HO>773o8p<%*QPivtx6UDO_@37BLvo7lLJ#xvAsVL6|LeG=|hupMM~d9prWDi_PDr!Nj6hq<3e$sNxv}$KZgYw?<~ks zxuV_-9s|Srn4>6d;E69)G$L1|XI2I(QjC*$MHxJrg$ZN4}qR4>OL4yIE^_YWmD-E14 zAke=68U_uPBku_VngpVyHwV3eL`!-Bvy3huN&PPMoz&-{_rDk33Ybn^ z0`~&?Q)=?($uGfo1HL<1P0l3GO;(bFiQgoCl=v$Aj=*;$Zckj#I{#cErvDrKuE3A! zkLqvI_v%;aXX#7yG<;{^H{rd32esE}H)kAVXViaC-=j9vgX-n#qpN;)v?0vB} z#BPmE!S5EBi1o#y(Vs-0jy@567Z5%E=eP%sd*J^J4-C~XlSBT@2@+^FQ^V8_sXND~ z(0-$aIUPQAa-Gk?8YXjzuHH-yvwOF$v{8e!&g+7_F4LuFYLM9jbSXv+k~`IvsX=x} zvLKs2%fDscFRCqeMpnC1OqR`EcH=z_yPJOYyF zg2O*N3<}bPXF^059RA@ckVF?8{^7Aux2~Zoo(93l0Rba8{KJDG@aO=~2oC@7fC$#= zKxeI1@jytT3l9JAY^Ylo9RBT_t81@Z7aacW3Dl(+)f;$SndWF0(q-9@6ixRXiGk zX!HxHz*@yaB3KjsykE>Jo)y9R>ga;Cil;*oU9eX1oG3^a9v2Z^uvYPaNTLhYDxMg1 z>l&)!K@p5xC}0F@70--deJ$h}!CJ+$BUq~oy|s#GMG{@GR`J-VTNkWVJU-%d;c<~f z7pzr0IqKF0YZXt7h^|Z(kBR_k@eE+CUcitc%U`SK!%twXS^{@usv84!!CKwG>w>j< z-m!G857Y%~bset@*6O)(U1s&1_K=LfR@bs2p^ShEtktst0c#?|`^BuD#p{B#dZt_# ztkp9Fy6_|k)@sKtSgUw;1d$cl1#1-#le+C1s^S?EjO-IIg0+grNf7Bio)N57JXV4= z(dVsIJVuh}g0+e#O5M6(t>VcNrwdP#B)VX&;-ONvE?BF0m_&4Cs(6Y7$OSwDSgUxP z1dd$buT?xhg0;Fp;Eqfc50bid!CJ)=C87(~Djp$8b)6Kb3)X6p*9B{JgU2*rwb36B)VX& z;(1fIE(QDlw}m$Jm@k^om`|7wnfI6n%&q1cb2&W!f5rHY@fqVU;oAU?8r$G~fTFP= z^&5El|8(l{)St2wfQ_lq)S^@<`64|1{~LJv-$>q&yg0ckxj3mLewO%V;^T>T!|w^W zF|jpqdg8=HO8=GqZT-{wU+DM38w1zqYxNa+ulAbuUF|9D1KQiPJG7nJ2Ke^CeyvCS zq55U@BkJ4Lx_Z5O5u5=Gsd43}$~Tn1Q~nHom*4?-n_#uF41S-$FX1-|ehS_xcvJkQ z_|@^V;|@Cmcs}+dJo*04oz{2S7qc23i6#a1Y!DuZy6WtV@h~}cP$V-v0 zNB%bQaAY>JH?ldhDl!y_g?|$MYWSn!cZBQVnec_-N_Zd~3jHwjrO<~$_lNEZO@p(7 z|5)tNe7J8(Z~BP?Q!_i;xBf?qBjXl(HjfhXHr$zo0$@WI9mCCROvHsFID8c62N z<0ZfcJ2RA|@8#wJKG>R}#C(dI2l!x*#w0!5JirH=G?W;VoHpQt%^FHl2RLoO2fH+s zBp>Cp0UvDBP?C6_(*}I7T?0uv%}XXNY}1%vAvev0g>4!N%uU=Z;}&*nC@}8fCaGB1 zrJ*48Nlw+6h0PiYlCN^AMlI~om|%ocRkpB6LqTFUr>bOOuLc4$%?l&92W+>2K z=2Vp|?9f19n7m-p!p4jV3fvqM7B*%mFt&4Zj9b{Cp&)f1H%G<7z6=G)XE;@37WQRK zpmM54E$q)wkXXa1DqGl}p+G;%sVZ66rhy=3@`6bV+cOjxk8^WOSlFJSAoU_Q$GC-E z8WSw$=BQZMouMGPl~XllVRwdt#J!xVQ45&8NXH3w;sVZ66qk$l4@`6bV zTQd|GliVB=7WQT+NFCtj7`L!RLqYOUZjOqDof!%e&vUBAENsn~U?Hb!)WQx81^OmV zRoTME3XY0Y6BagPC`i7_%`t9aW5xs{+#D4P`!N(G zc5|x6EbPZnpg+W^8nv)5LxJ`zr>bmWJH`ZQPF2amb_@i%!3!oWY|BuPTF1>XVPRW_ zg5=%Y9OD*tXDCQK!Oc;zuq#7>{xYX(%)+jW2@0I5Q45mCG8CxyajHrd zHf12tQoLZ&!rlx8$!EAZCM@j9m_X&`7`L!BLqTE~5Q$W)y8_276Ge9mlpEn&EUaiX($eAKHWV$YJ{Qv~%wlFCu?0^LsD*%%6R-4*!t% z#(Vd7?|V0X_kQntAq}-;a7o63JqtYBbgX=I)5RisiGPn+7L2^|ZYSG|&j0K4s zrJ=e8mtic3KPe5>F}MsPK|CW1Rt+x4Sde~Mnq$S_D$EniN^>k5T!pb9wMUwxXK*#f zf}|x4wPbKL#)8DN(ol;A7iBDnzaKa^#u^{%0 zG*rjnN{j^2v@BRPxFBOe>J4d*6@%+APf(NQST?v2V?lDiG)K?idW;2$N2Q^b3@*c1 z5dVQR)S|(4m?ua`Lv;-@3qkB5X{fF-Cj`->(oh{^b|46+WWlOo z37Gt6f}zNZ(omflszZ_#+NAvV)fuWnyuXi0Gpx)|4Z?p0S<=n%$J9GP~`fuhYvTtR7L^lF_ ziJt%8mc1l^x*xETNu}SQ8v?$O{zBSHAEa9XPM~xD zvDEAI^#2(;_y0iZ+SH!Za;lgLC0|Q^KlwD>8E{|n>g3r}`OhZ)De+3;yNRa~k0kD< z+XHqd+KH+7KhO;V-;V!5{FCv+@k`^U#~bmn*qdbZ*1s(eY&A4DT^J zed(1Anc+~AP;_z1Gcv`YCZTBKl&58mLrp@_$0<+CB!`-WqLEXclF1A;2}LKTJSnr8 zMZdcN3?t9CM{_m!Xx@Ua}&=>^AH}<@R=KbOB#*vh>p+P_<}SV;SnvLxv>M% zXoN@feCG0xNuv=S(e#;}k|l&kbbV$gUzFw{JfiJ0J2@uJLwH2rXLe$@G!Nksji1@^ z`=rqbkLdi&jy)rdMtDT)XEy(aG#cR%y`R~9O&X2xh~|$mB})j8=>8a!`=xmZk7)lG z6OT&s5FXM0F~)x&%|m!Z1IQRpNTU%R(E&2Xc1fcV9?=3a@`t6-2#@Fi8M&vV(Fl)d z0?kay62c?8Kr<7sN%IgM(FU5C*e1)4rO^nF=m<^c_ei4=9?=q-&RNoEgh%v*rnApVqY)m_6q=flC4@(Gg{H>e zlI9^iqAfHvz97v*ctl@lYV3eC58)Awp{e|1(rAQ7bcUvKFG`~k9?=?_%8f~*5gySS zn#%5$Mk73;IW##TO9+qX4o!~VC(X0M_5a?$Uv7T?-&E#jbjJTdy8my5>iZwl+5V5D z_oZ9jF91B5x{E6MZOLCI|8w$l$y<`|NKPkyp7i`w@XNvb=r;hH!5|r|`q#2}@z8AzGY6Gdd`TBYS-hm^wuTw{LBQLtIYc-1 z@jaEpEBo*QhK9v!jBaa~x!(VZFT94KV)3%0+ZtxBTYrBU-oId4yawsEhM8*>_m|;) zDlLoGr5+q!tWvXhwW`6v)fo+om#+>ST%b|0ct!hzgPSu~XWbE{Z)<*(=(A%_lkC3s`S1Tfc*Qm5CZmUuxrms>j zv(@uE-4hlT=)Zk6i@U8#aCn(Y!{Yv{1{~g`Qn9!v>kAHVQ86v<&}zZKT4-6^w%r6A zEQgxK-CQ*|SP~73`@1@Dup%lJ_k8`q!7VDL#T{TBINZf8i`&9_aJZ3c7I%x4;BYfH zEbbp`z~Nr5SlmA-<| zTm2)ftp^7luJ!L`eI+>9AC3N9tf~fwiLKJVla>9zN%uqb?_f0zI814+{$bYEg2Sv< z>)+1$DsY(D8vWZ?RRd0n+^wwa3r>pMAy(6YlOlHuYwN*Dk-M4omEfev-NdSDaG2OC z{To@?4;g|^x!b7)%x5eQ-Z_H*64HFOb1Si9Cyn0;>0k;Te;ppm4s`rNc5i#3;A z<8H3?uVj7YRmqTx8~rO-RSga|bfy16R`vsj``GMX&T1NPxRqP|{j9AA2Wz6%zl`;j z;9y-e`j@h*2AmYR53sT?I9Ls4{}NWyfrG8w>R-&-T5!0VYyEw!uL6fVy3xOgRn_2d zLs$BHS=kR9?qjq6epb_f!>!!va~E?Ha8l&Bx2XmvMUI=DI&f0txc})7PKq2iLv`Tr zLfTfJ8>D)0NKAwy~OxsR#=hd0w!`rKOe1&3GCntkrGYQcefx&FT=7zyNG z$$vZlMBd3C% z`#`IE>akvk&$A}2(ObeG@D z;cwE3{`6#=YFp~DlOuTlijZEVoy5!p<`BI6wmOS%?e>)N{7MNq<%cF3+tunXL|RAPcdb`9lZ7kG2A zz{G~^njCrreAz8BkCA2uXLicWTGq{cR(E9gLYb)w$tS=v@aR&B*$G)Tl#^xP(8U6? z60&S!Xc@S(TVxVKmQ9R&0{-ljnR2X~89cgBVwNG#y!-T_;hp14%q~bgzP7f(ql*Qm z6J*%<&@k|4x5(Uq3>zOA1`h3%nJ$oF!;yIrflC)k%n;<6*F4%ezQD`?XFlV?k=3gO zCITcJ8=3{q>=yZ3pUfH?nFZeLc-Oa)Sz{0>@aIB_zuU>Ip`0SM=1ww;zteGMaOP@} zzrv-NPdhL&dZox;*d)!5>;h+Y%KRNocICk?aOXmazn00ap`7em^CsEFU&mxue&`YK ziuP9X0LDxi78O&p+~@%OU1XZpBbFF zSU!2<%!_|Evb$S8X&_I5W#G}}(upJ|%Z75Y3>?}koIql-EH$(Y+__Xdo`e*u)W|2` z&&BfU#+kvR-O|d2nHQcP8Qv)^55&o}wGAHa6?!Bl!;(Y8z@JOSB@&Wh$&q2;(8cm1 zNhw&vk$G){OS`4+hMB>mokC~b%&k{PRxcD52ExS9EO6#haVH7Mti;GH@aAH2D+iWIoBQ)sT68JxLLG}q7Ecx!0%!tuq%KpG#}11if>y#^O`dkj`wcITQ_qpHL`oPTp3sv1Ixgp z-O`SMWkWex1`h2MjvZJQ8(IeLTqtfISQZ=kWX+#s*|v={gGX0O$84CnS{xa^QYsI` z(Y3V=9_NFp&RA{@MKJ z^Y`bk$e)(qo)6_-$vv0*-P{AYt8!=Nj?2ZeKclz)e?RMHug#v7ZBh0AcbV^H{xI{2 z%ng}yGp$UHPW=CX&ICL}@A{vYUQAD<{yz15>MN;7Qn#iqOs%F2y65i&s{MZ}d6*{K z`nTnQEe~vYV9Ntr9@z50mIwaDdEglDgz=8>_A(H*fc4CXcdeGalg8WEG>E|le&eN) z!b-tAa7%s;$GQ1c}#o}=f7$9NMWhu9Y7|YI#i(P z*Pb6NES9}v$Yfq=jmQQ*`rXk0WFFV!WL{}#-n~B`Ny$7e$H~0X$h-XY0GiBj%ZQxk4jjMICZ5HCT z=32HbJFso2Kyd@ll5JeD}!u1x=Us6iZC*72FzP? zE}KWiJDWFDAoIYtWFA%TY~IY!Jn$@;N7XxT$5Th>{C_q1tK@f*kJ0)6-Si&+FA~osek*ZJ;zW9X|7Y=U z(C_tK8efU0Vn2@kQS2kJeX&+76#ZfJ@#qn{qrV#YRpdL7$0D~xc1LEze;fW!;YY&P zg-;A;L$8Lu7W#PT($E5(|9>g?$H9*T&kHsKzYaXFqYO3cZo*6lFpQgg50b;FoQ8E5 ztJZtQjs6G8@i0}zx|5YlJ>!P;2g#+Qk=7loX7-F*R0qjd)=r+bti!CmvuE6_K1ja2 zreCx8(Y_KKo~LS9x3Q`Q93081ShupWFE}_aVp@k-O$QFnSX&Pc4(8OXn^|8K zIec%UVco>4YH)a%s$%iU0bg)FGI4N=mSYHJW&r>z553#Bm93G~sSXZ;MA2>Wn zWm;FUng$%6q-t4LvbGi+9;vEXSFpYk9C*25eUMc(;K0ch>vC511qZ%0t^KT~0|zOs zWnISFT5y=vYSyK!uL6getzq%u2@N>uh^Tc5EBk?yj)+Zj0*9HcVV%dSYH*m? zD%Sg0*)MXK&rIvRtfq+^rnHvDhdnj{Cq?c(tgi$oMebZy)qs;CcMdE2f`iDltg{E^ zYF;D6H_~d>Sp#$ZUn9g5-wo^C19Mfc5#oXGiuJC6xf@?2#7%5kyID>18X@lEmUSj; z>)(+Cj#9JU$@;2yByl@8tTR|u{f;E==8APXEBn18*@2~CTJK;r9XMDGE$cMa)`Nou zQL|2EeI+dEo+grwcwDW)T}P+t0IT=q+xYfRUNr5Y>$e?XLS6*!LBf^ovfw< z2kvcIZPwO+4ij6&;>MycIHV?~#qC8MI813RtIpb5aG2F< zR*m&lk;BZ^u(%be11Cj}8m%=8=%|&zmUI`ZvLCky`KA4?$O+b zb0_EW+1IjvN_YM3&vvuv%#SjElDVJG`nNMdy4~-K>BIE9ewEb!N&R{1GpR$VGgHOn z>&gF+{B-i#(YJVK-dCq>SqLmF^Wx|R!C_Xb*&ZQMfy2z!usu4Y28W5QVtb^>7n~G1j}7U-Ns;rokRF^AIgb!2 z!9nCIwxC0^hu3F!EbmNP(4pDe71?2biV%@%ZM_Ex{IF5J%zThO7| zL;hb~xScDuphFwM;Vw39L5Gy!a3i;DL5DQpa4*+vL5CFJU{y41L5Gy!U{O?TL5F<6 z!CEkFL5I}fU^%pGes)V-PX`Y6a?Res`qiF4INZ+-`&d@3_f+6;J6G)OtX%4C1c$rW zw70RE*;9hUjoh-2VQnoq+{-nap9ZVJNs;42+d6Pkn$fz@>2q{z*)wiXqeoo9U&ILvGfJIAVOaG2OCc9xa> zz+pZ!?F_5wz`@37*=g3+gOehcVtpkzDRN0x)qs;CmtbXIa8l&rtfm2nDXnG4SX&DY zvs%rLvc3u&X10bMVO2FaOl%c9%*uY?FrS%rh}Cr9q{szXTMtf(T!8hJ;4pI5y{xJM z2az+adj?i3A15PGX<0`GR%;$7BUR!0|NVjQ4C?>8^5^Dv(3}6?%6%?(6W#bfL-qeR zvkzyl%r4VC|1W2rrr-Cwkm~D$uhrnk{A`aMUd{12vfrKXavB)^jUcyfPo zDH%z;koa=qzQp;7O8m|E_v4Sn57E8-GqIn?o{c>myE3*M%SB&~J{^4^dSSF34Md)g z{9fcpeLT#%4|6B0i1djyI4wk6;|IUW$-*lMi zgzCLQ;G_P$2zy{vhY3%pMA{GNSYOj&suMCv@BV)V?Q1*CbV55xz5mbReRYTVPN+&B z_|46TpXo5&(ILbfS#_B8_#?yw*>sriXc1zHY&%SMHbIEVvhFbH(IA8kXFAMzvA%pA0ej7ro+rf zix87#+hO9f2|`Spb%*JX2B8!~Wz~zFb~q1ps`iC4Gp(&Cfr zygN6+hjm`nzC(=}Hnu{YKXIqnx3aD#e`2Pq+J{)% zH-BP=Gwoa0VhujbaV`61*4E;~ELXR0Vk?yRq)cbu$htawuv4n`4Xo{tPm13`wpfQx zirTaQnQ-}P*T8lM!u>sVKV4>MiWzLvFp@nMED?Q7U#4L;0qE&Bj#Yw=;0tJ@!9 zE0p-8OlM!sx;lJP{H|hce|%E>u4IdK_@wwyXk?8{l%ui=h$ zo@wtNSgd}62dkoGUpBB<^8^p~bEu33RS4ZfUArR zCbzmH2um^G_{9dFwV93}EX9EB9~(?;RYwq(V!&D-8_a4=hY5=oA?CHV!;EDUgqY## zj-V_GgqYt$iXqb$ zJwhpl%v&}?D8-P83lTE~La9V^n70ryQzFC?t?Dp+p&aRh5KFVB!^B045UaDc!@Ok^ zgjk@}9cC{Ygy3z{Ve+Cy2+poL%wZG=!N*O9*^3?_Y?Zdd8|aZ&+7Vb3*>Wk=RE#^a+P+Z7`~5nHLDcg3aL8p zWo_S8+L2n+j$SG>k(qct2^iN2b2gg>zU3utgAtYNw4ah&Dy>QG1oPn zvsh1u5Yt`Tc{i(Xf)FxZ-FX*(K#fp};cnK|A(ZyvnXK)P5MtPN-Z?N?`>+QFt~+N8 zOjbPXK|*6Xrw>e4KhAJUmXJf5|E5#rWvI;*UwLx{V(?X0l69wBbg=qvbHZmBr{E?&3Zb7$Z6V6i`DfA zk=4|lCVxPQ5DCqo{(m8me=+~1{5|<|sQ!O5cQp4qx$CI@pUS?P{d(4>`oEivWM0fX zLG}Nh%(3Y=)8C@{|6qDodW!1*r&AB6E=_f*{{La}OUb*Fd#L_@BXKnGJ5>LlL~sAU z8vi=g|NG+IcqH~B)&Ga-^#8HZH>v)ABziEqD>_B>|I?8NBbP?HRR8}l{H5^S;XPFU zzY#hb`t8uc(8-}(@YUcm!3TpE({KMp0x$TL|MK5VR}hnO;1NZU2d}qPT|rFBfq%WB z$b+2KbOkXf2Od_Gv6!^lt{^7mz^9aDEM~8|D~L%s@JYWiRw5>@ASP;ru;#0-ASQkY zVZAq9K}<9VA!)T;K}>WAF?-crK}-|~F?E@)ASOzLn768~ASONtF=sVhK}^&LF=@43 zK}@s=A$!$bK}=K#A$6IqASP;rQVaz#@k1!Z&?6>hPmK_hR@?Px$xc#NuTsojb=Tu2 zRr-MMD#g@gx*jvBldfVPV&1B{9zQ9O_J)0kIjiY<#6*h_+}w6OTCxd3@O0hvxQQAe zc-wS6W}-s~8>Z@d{KOZb#ME4mnCK8nOwILZi5{WE)Lf67C=p^gX1X3T(ICX6S9Lvp z;*SvaNz?U+i58(0Lywkhf>4T~$4%4-r5Jk5M2Ap{p~p{r5n`@ux*jpnA;fgoc0F36 zM~E4(?t0uri4e1%>3YmWgAkKm)%Ey^KSC*n9x>4(lw#=7l1&gwF*FA}Mv-Y2Acj@9 z!MeUpbnGoR-8$>(9ur_8)^=;Gu5Y4ad%5l&H?Udpm;iUV>AsD1HBEHf*)~U_HMg7SY3}0cX-`BmOr3;On{8Wbhopv4k0X%s=JM~{Sm^tXu8L+o)#f2 zjka56^-U1M4zIf<{(ur8?sC&DvaSXpZt|*IU~ON7xVM|`JnQKY;`VO4bF8jMh&#OQ z&hiJ;2w|6-uEDxGgi;J=Slb_=6vJuO(;}2&IK}FEgh*@Z?j(Oei4Yl$=}xe&1|f2q zsyoiwz6g=cG~F@Q(;-As({}T$z6nC`bluJI2h<3`+oqdkT^&MjcGbVL6-W5|o?k(2-gLzgKFt$4DAC)lIKroU SLWhl(x+{+GDf;HvWBGqzhcRye literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/expected-links.yaml new file mode 100644 index 00000000..ef9cc235 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/expected-links.yaml @@ -0,0 +1,51 @@ +expectedPks: + - table: accounts + columns: [id] + - table: users + columns: [id] + - table: subscriptions + columns: [id] + - table: opportunities + columns: [id] + - table: product_events + columns: [id] + - table: support_tickets + columns: [id] + - table: invoices + columns: [id] +expectedLinks: + - fromTable: users + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: subscriptions + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: opportunities + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: product_events + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: product_events + fromColumns: [user_id] + toTable: users + toColumns: [id] + relationship: many_to_one + - fromTable: support_tickets + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: invoices + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/fixture.yaml new file mode 100644 index 00000000..67956b2b --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/fixture.yaml @@ -0,0 +1,13 @@ +id: demo_b2b_declared_metadata +name: Packaged B2B demo with declared metadata +tier: smoke +origin: synthetic +defaultModes: + - metadata_present + - declared_fks_removed + - declared_pks_removed + - declared_pks_and_declared_fks_removed + - llm_disabled + - profiling_disabled + - validation_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/snapshot.json new file mode 100644 index 00000000..894bc4b6 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_declared_metadata/snapshot.json @@ -0,0 +1,137 @@ +{ + "connectionId": "demo_b2b", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "accounts", + "kind": "table", + "comment": null, + "estimatedRows": 30, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "name", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "segment", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "region", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "lifecycle_status", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "users", + "kind": "table", + "comment": null, + "estimatedRows": 150, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "email", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "role", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "active", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "boolean", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "users_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "subscriptions", + "kind": "table", + "comment": null, + "estimatedRows": 90, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "plan", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "arr", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "started_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "ended_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": true, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "subscriptions_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "opportunities", + "kind": "table", + "comment": null, + "estimatedRows": 180, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "stage", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "amount", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "close_date", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "opportunities_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "product_events", + "kind": "table", + "comment": null, + "estimatedRows": 1500, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "user_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "event_name", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "occurred_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "product_events_account_id_fkey" }, + { "fromColumn": "user_id", "toCatalog": null, "toDb": "main", "toTable": "users", "toColumn": "id", "constraintName": "product_events_user_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "support_tickets", + "kind": "table", + "comment": null, + "estimatedRows": 270, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "severity", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "status", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "resolution_hours", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": true, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "support_tickets_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "invoices", + "kind": "table", + "comment": null, + "estimatedRows": 240, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "amount", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "status", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "issued_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "invoices_account_id_fkey" } + ] + } + ], + "metadata": { + "fixture": "packaged-b2b-demo" + } +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..dc092a21e9e0120036affa4362df94302df1b83f GIT binary patch literal 139264 zcmeFa378z!buT`>*3#WoEsds2qv`39Pz&``snK-L^vsAwh(#bF0RjOMt29W1G@uPF zAa;5k?AS5(a~u{g5aXC6*v7FPl@ghD-Q7BBAU z=~3bTPk{gL{88W^(awLs|AxrFMgLTLPI}{ODUd4KNRL@G&QGmQu1PG_hu}Yt|2gh~ z;~qHff#V)H?t$YTIPQV}D?Cu$qbSzkV5sgKp1O9=^vu4S_Rs8?KGgYN^{k86o^i?A z#h09M=Ek*)J3n1q&=<2mGdmZrzu=O!=dQhY@urK{pMS>1moMJ1_VN|3bZW3_sH*Knf$e$PPgcM14S$#>3! zyZ}BscsL;Low3vVcKXuxPN`lURxHa3)%%${*ji(MML7EGNB{k_r}s|H>e^H3BBnyI`>e z%l6D%H@)MQ9ebv?1tq^WCG5>}i8uF{-!eaFzRBEWo?#9e|6zRDc%RWQt~O3K7NmZX zdOG!R>bBID)JRHA{xJCu$v;gVPF|2)mW(96o%je0KK|#p2abE-xCf4V;J62ld*HYS zj(gy^2abE-|C49pukqkR~(cyxp{Vn+__o0>j)U~1nXDEIS&fLYSl-@%ku zL4PoP@W8>DL(@|SrgpTOm=m)MHRz!x_s;Ar?wvXaM>o@Z0eEo#p##%94lf>HGY)vT z32(-!!`ndT4a*lXO`c8VaehAEu>LuQRro2}hAmmhCiD#(^Cs-rw9I0Pe49r389Rn$ z<``Dy=kp93>SI&-9xHiMy3Sg_H2F4-@DqB5_40<5R`D~shMh2<&F6WnH0jOf*_5Bh zRCzX)Cip2`!_vHArEz}3j$w;UHlydUQpKCG<1B+|@@*R9=kpCq%`t40pVEzDl1=D& ztW@?U?AWv{!4&y6mG~JuhUs$*8{y~k3>(tel%B^%R(Vsp&Qh5s&!&+{enQVMg*R+u zf}gPy#W)Jo>=?E#+R zU6CH!Rgf$*O2>To*rz0gNOv!TmFPS5Xpo{=S1=dyTDXoIEA$3v#l zNmK1CTAExleROK?fj!eF!I}Kjo|&CfJNM4)TX89Z`_*{}u6W>yx!}Fid#{~7xZ)Cq z));#1nCQ&}J$g*^#ROeG7TUQ8(N^0MpJbQaZIYXqYBkN192bzhux$uK`y@r^3kcdL zDMFu5&^}24+S!O`OJkP!B)iaUk{g(6)#OQ51SHRE8^X{&NfCNILHi^{=ye3`lN6wx za}jN6%o3ku7rIUI9Hv?|c#>lRl55+BFtkrngg%>~eUc*dSp@Bq6ri0m5p8MA5}#xj zx=r#7rdmz$Bu51#*R%~`XrH7AeL6w=Bt_`e1nrX)pqJn8neVF*@bSCoM5We1W&RgAUWPPgrR+s zB6NkIeUc*d7(x3a1!!j!(U!(6@kw@}+a$|OwW{+ZM+78GZ9^E^Cn-XY5VTKHgkDL| zK1l)Eflpv0$t>|ncA*(brwHKn{i?>1TougGS;4>>1A81rV26M`iXyO0z#c;Z*d9i( z)i%Q;*adF$D=@{X%JJ)}8QNulv%GBu1AF{L;1dbh<0k?yBVdo80Bq+GY-!94k6#zK z&2K4FtSTJ8u5!WUcS73=2KM-gz)J|&<0k?yCSZ@B0BjE-*wUC89=|Sdn;-PdxIW_? zzphHb=I_P`!L0oaC)8~HIaJbqo^Ha}>-kzbVK*Hs_5{GbCz zD;U`0Cjvts?twjiA}}=M9@yh20Nc=)BR^(_$FB?A<_Enw@{4f%x=I6=AGGOc1p|Bh zL||yuJ+Q}51cr{?1AF`gU>kaO|5us_`A; z#-jY0$yfitP%)+U__Y)sW zyd!Z(VtWGa_$L(o$NHD`_v>%b59;UXww}^n*1oQNRC}9tt9H3|iZ-DBPW>nK3H3qs zR`qK2R5hpkPI*rGwDO2Dt6Zg=uH=-S_;c}3#UF{c;xqAeaVM_Fz8Cvc?A@_eY!E@ND4%#qd-#%UXZQ!6@}#4kf0ZYPHoJI{9$?f%fAvp|a8v50~9JmCSxvS+%(Go9fiE|sA(+~FBA{Ey%2 z4gZkdKjkhfHWFtf4?%?@kFbPa)QNBe=QeDcH*92{A)B;9d` zACc0k4z4_zTj}aa`+9E9cjTqXMJItA{gQ55xOclJo5&U2v8F%P4+)H2;f^)iV-KC` zj~#G8hHf!k`(}=M!tq76JJ4tk^n+2F7#Ma38t%aTm-qv71rVZF4Bw*Zdp&t&V7WUu z-lZ`k7xUbGD4=!#U^s;Ngk*;pap9=jyzk0s%#bdcrAMN3^R$+nhV zjgG7|HgE#S&?!mRysJOy2}c(#aR(;a14$&t1{S*m6YjtrZ}$i0hCql;N%*#1{;Vf2 z4?t@thj?oC`NJ%+`)=Jc=K=l!Har9j!se9_MJEB$tH92-LY|htRIb$Rg31i zW8>|yn-=(E2ht!zr^H>O&e`M%Cl;CRz*u{rABmC40mB^_a|do*;SbEEK!~o0`4*kI z+mnwkOST6uiMoURP>hfd&0NA&6ZO@sJKI;YNC!DOCEB)c_1&JVlGEI=5q~U+#`q%D z9UEznJ^Lzu?0^C?bV|fE@8tVD;nj$Z)Jb^wc-_L;FJ@Mn|+wDnI3k%H;Wf7M}mV-rQ$yGE3$> z(>ztCT&-=JThm*9TDFSKt!|9_u$3+w-L>hIO()K91ns||Hp zU8^ovHRUDcOUnC{Hz@~|OO|2_KE=!c?ji{28wB6@N(8~J_Y`N*dt4@c^e>muhymPeHEkHTLJzc2j8@c!^c z;jwUE=#|hvhyFJ7j?kTRte`spY?mIo4uSB-BXF(Yx|!*n*Z!F&t>kV4=~7=ayi9cXs(afun8Vf{t1KCkNDhH-iw&THu-Gr@?Y1cN0j#tOYJ%G|Nx1 z<*~sdAP0wZN_R!p!{cTk)We>Kcy`EjfnU&m)AO*X6iCx5< zV}l1k4t90qU>EV`=)isuL!f#)9wELgXZOvWnK-kQ+uJ?!hO+1Ek=&lPd_J*^cyw&= z29Seg9XVJ=96CC1J&3`w`5nuMJImP_5JIfxdp;rlEai50&rCczGPtX2W~G1nVC3qGJECRnf*_#(nR)(w$$|OBF-%3E(a;tWfHrHJ4Xh$f*kDX$iXh(n_$-# z5JRBMjz@?uC$o6z2|VJ@Oq@B9!&^>sWhinZUA+94zx4 z3YOv3C9ursSVr7AnZ=t*V430hg!pqJhgXy4%uGBwK8Tl*_?i8WwNhmeZy$j;<*#ku zqhJ_bHv+>_9m9w}C$o6F2nEUJ`_ll3wXcn*VfHdLRMVvX2!#g`*SAy6@+&Mmow{*a+jvVYF-mDDZ zwH&Z3(eViJ7GxBKSy(TRc6l2#G~awydA^OJn{|C@X{b&hXJwXuWjPd z$^hPf0mHP8VZ@(fS-kE7hH0K*#G#`(yxszabu{DJCN3=x;*A!5X5!J(0A69CGZ$a* ztR5M_8!I4GJ7y7Qj%D$J3YevOW)W|W=I}xan5B|P5r38k@eT@@)saJ_h&xLIc%+Va)BLTY55H=M&$bU>RQIm@_l+=;R>Y-QZ_lmh}vu7{u!uAddNKn|QP| zfEP5tuvo`1;?I#RUdRB$VxD2XL%}e-fdPhfG~?PPE}b02OBeji#G?}fc+Y~)e8Ned z)#C$ruL6Y8j#$e~=U5gm2XM0zXO1qy>i}fd%uUQ*cxpCq zC-(pI%m?ua+6mDAzheH}{E_*6^Iy$>Hot0q!Tg;03G-v-hs^hx?=>GbA51=x{G;Ty z2HHB|4hA4`;qop?SAcmc9v$Tzfhl4->KdRz5Qb4 z73GV{yOdj$^OdFXU&Wt{KO8?2-w@Bneja-|_Kw)0*x4})dil>qAB^sao*wOw{50~J z$Xg@3B4m71{mOF zA6~$UKD4Y7*5I82_!7mP+wi`Hr?})JZLIA>!xOVOKjTrHMdO8yg@jj`I5&@7Mj)>;@hN+mLB^|0+*-u$ zL(Z#AJX^qKM98a5ylZ1$BI8Azo5waKm>2OWdw)U7i@3Fj?aMKEk)2S$ZbrzfM0SIX zjg5p?iN*{2oIqYB8ZYd3WV}i=UfBM~d6j6qup1KdqVdAUNXCoC3;U#CUNm0VFG+dP zcwzH&3|=%|*h>j{(d=boTP5K|lUE+QtUz8gXR#L(WV~qFDq`y;=S4GD0b4OKFB&iG z%4ED~ys$|N=0)R$&6<=KiPv)I+%zyW7W8>kCR&G~htt5$Xz<5Tndn>!9i0Yd;x$n2 z)2K`$Z$oFNp_QT!S+ZC0#RE-SSrLE4lFhmtnm!>Tx?hK({gW`FyLBlvgMp0b9<`w- zlrkbwcc4R*Ga_MM4(+0t5seWvjxt6xM$kqEGomqqZc@gG?oS6gODQ9|L6<|DDP%|6-W+eyuXDK6^mzF~}Eo4M<(=c?_GDc+CFNHQc zm=TQ;^x9HJG)B;O%NfxaK^HD&L}LUUxr7l-O-rFY4`f7Bk_|n&j1kRB4mRwv&m)?b zmSg`e`h=yqX&9S%@h9vEjS;r?L7%YD*e>~_E%5i^Kff2ci-$r_S$^zwI;PPwJ`ag$rqAeNxnb%j^rK5J;_UwW67n-MB*ojZzVpR`0K<2@Ld5j ziHi~=iNy&OP79vVKcK%;pVhC^FV$D;gL+bXN&B|;8SUNLTj1S*E41}mNn4_6>Wk{L z>L=BAsc%wmRxeXeS95Ag`48or%4gw@z#Ej?lq;3(m|{Gs^0@uTrA z@pI!V<3{YiVlTj5f%nJW7`r_-4d)A^v264=(H}=YAN^SL;b=8_Ai6a=8C@PV;m*Jd zk&AdeE6yGpNHQXzCC`gQ0nQr+3aA*~`9e_NM8bS+j~~ioJcI zc`mX)SH*+GzR{j}yWxP~<|+6d)qT6R!NcSoM-Cm{zjyjzriy2Y-MV_sDjp&dUFlpE z&kd!z@a(W#mu^<^*pTQ_a#cJkl;|?6cvc8Q(k_+kP!*2{`$j`)0hRu~DxMMc5B8*a zznE1#9VEJXhpKoUDAASftKx}ZkS;t5B)asWDxUvIbSZsRJOk|3HB`m3KNx8W7-ch6 z$R_AflV{X#R3USK89k=!o}nsa2$`7aZ`(bm^IDPoOTvxQkbn zxwDNkc>A;F9Sj#TyoGu@`~cR2A#g?Jwm?O_=B>POHg!*mcZPY8SJXRnqg+wC??AAk{k$T5XrEk>(ziEI(a@gu zxTJtdHgf|T7fSL>`i<-1=ddc1omDv_SJa!?9jGX6?3$zKI=Lb}GaaZ%F?RBbGCSJ1 z1aEoPyq4iY32#A8!4Dw(34sqX+XEH#n%j6qX?B4`rs(QGMY?$vuSm(^*|S8Ec}07G z?k&j6*?^EPpwQok=g5$kblw@}7G6>B&}DK(>Ap*Y6- zFv(^%v2h`dXVPz62tS7fsdX0Q1#(5bnezh`rHzeq6m5_z(lh4;DpHK~yrRsyHcsU& z&zk2lTuAj6yI2)`=uL1t~BqF(cCUQs%C)-e>}StWet%&|x}@mLZ*`8pOUIXrlj zSY+aHBMeZy1&L>h@Oh^QDD?N?aUv{8g?EOD2Zuya?+_jhN))C0P7YSIidUo$P0AH1 zeG`F-hVTpz42TPuWHS{uE)?gP^c!RFb6Ak^&Vn43E9%XZ0~Mu>(i}x2az%P(WuPL( zIEh!3DYkJj-tw%ug5g3jZ$Ucn0|1_hNvQa z2#$bcij+P$_z6%n1V=qME+Sx(&0rG&yE?)%={K;KfCU-pEJ$o2B#L@7*im#VN*mZn zP(|23NEGQAY$v)EDF${CL{SEt2f&4S%d;l75D=}q6C5O#|M3Dilg9ZaS>k%&tXy7vef&K;1FlewGc~2P7BoHmV zIXqPmYe_GFRv|zOvnL z8{pdm`?VhRhw7KrkEm}~>+1FDMe3wFq{fw>D&J84PWdzCb;LdVc(o#ai=l-;&<+69=Yd+&crK#gTD~_2wusU)}>3 z3))ZgK~k|;kIp0oJSy-cV^$Tn6-tcloVHPmb>}Ea-N$JwTdYe*N%9#^TghVWIY{Qu z<0X?8YtxxT<>r~NSbL6=^crp+;DZf1O3b6&JirIH8cK}EIc>lPHycV)FLK&|5AHf9 zS2cIKCly+B>5<(4fwzwWRk@5oHpQt4LVqmPVaXUS%+Lf z@@`JmsD&*%jz~PgsVZC8x}!jUnNwA=uw@5Ybg zj)K&E+#D5ak1I$%!>JmxZs-V9PSvP&y(>tp;Z&8a8CRem~bY z&do7lUFQl?FLHB?TiCJ#k5pq{#~=E$rV>U`%pzOjy^rg46+Sj&TbccpQ;@l$)bsVF!A9c&>!Mdjat~o;|T3pPF2~$MxF`MoT`$A4Lk^RgBMI% z*ukS9wT_!(!opS_1pW)`1u+HfSRBn!O3mbaip(WOEb5yLeU4ed-Q#EFt zbmWV~+ymQBGCK!Ui7%N|F~$ zTG-g5An`mm$ApEgJrgYC<`}oIrAL9jiJPNhVNZ_&?H*3mm^JDO)K7A%MlEdWafI?J zr>bmWSI-0^oT`$A?L7$MNnS8%VOx)a#BOem2@Csr6zC6ebBtTq;G;l$mYbtut>_5S zoT@R)aRur+PSvPoy8`8IPF2|&b_MY#I8`O9&=$lJykOE=?h5pmxj81R6FY(eH^;cO z%oS+cxj8CU-W90#ajM3wrLI7EhEp|aozM}eoT{?5#1+KXaH>icHvHhkXo43^TG;5L zKtIaOF=1h=j{@y+ZjNyaTYeO%FLHBKENu0eU@@m^%)(|L1>05?a)!bTqj%A=gB zG0Sa9BJt-rRihTR{5T@MkW*E*u;)iXY!j!dWMS_Qg0RjDCM|6GQJ~$!%`stN*N+1A zliVERmfMVkl~=hrDwf-egq0Ca)tKeBBH{RMPSvR8b|T@}L!7EId{1Y4^60ais!|5} z5Dl!-_@7T^pbKF?PjfR&WS|GZpR4P*3C1(ff#A=TyE*-p4D=uP^Y{~-p0NycANcdw z%bcFk4D=rC=LJqrIRl*s{ye(9qX#30|-0-iNTj05W$oQu5xY0DO zGFHO3{{7F?hu}N^u1=NUHvoM1`2Bx!7v}i=|Ks=nN3J-2|6h7PncgKne*d4nW>mmZ zU3j0oM6QO}Si4K^lhc=%@>tFX-Y2K;EV18GAiGaqBKP--Sntd4$(P72z5;duLSFO| z-|_qZkT4$JdHweI{eSVQJAVJ)eMkHMq5Jm_g4aWw{3tqw`FBs>m zcxVj8VmCSZ@|IJ@BV-vl`u88%Ji%$@e z2>xn5?rn6smmPKsN=A-u=OViqB_~IBbb;NM5|X34+GZE1WaP-w$g?|D!Q{w_aM%Sa zDLJwzitN(WG01U|!v`Q@a$Mx_k%)vG-PCz@T`Q0r-OCQUyCow>w{wx*=aQ47JG#Jb zc?rqUU2U^#UovuBXJ9M9H!V`TCRM?Z+fW%ap=x2p8rOvNdeHtR{&WO=5iAWDO=bnW_9o;-^O=qJ;3T|gHp?GIY!EM+!<<8ilJKHgtauQ?a$aON z6mCSgC|>l$+CG3tN$MxtA-^B7fxJlia`qug#*4;lFM_4KXbLOrLAZz)&0_Wqh!pdp zIV^uYVuN_m%;n4=O2UiguHtS4%X!gw?LxSa7u{diAyUGN#%mg}fxJk(iaXn;O1^N7 zZrZ|*wyD8ixJEaweQn!R(HE}KFR#f@wN35*!Zo^Go$ZK{eUTjb+M41v1k1ljj`V_s zYY;B{B016s+E*h|@j(y$_J2+XQdTl4KIK|G~)w;@|Wh+VvQ9=D=YCu!t$49*Fud|M$0PkTu}K- zbE{p0>mwM@_@JQtB|9}=gF7Vxa%7u2HMn9TCr37Fu?81UaIMP7ku6)O!G#oDtO9Z* z!P+%e7m3Ny^qQ~130E*Vnp>S3D~lwNquI4s>p3Plnr91DRvL-O(JX6MS!E<4MWoB#j)c5bdg4AI18E*BVW=`KntW#TL%pNHotnRkuQlS%Qs3 zGi|ZzmPoOTcq5Uo3M*9I8Y$KnxRFT4wX1HCBqB#QbiV3VN!{e=K6a{ZnIt4fw{o%S z)=4sQWK9&RZlNR~M|QMbbt@$yIkGJBRku_MAV>DGQ*~=4F*&l8i&eK+l8~djxlnbh zrC@S&N844mToRF^8#-Tg>!ogTbRRoaw_p;Iqg%OHbt@(rIWBT;$s{1hMb536gygu$ zxkXa|IWBUnYH}(TvGDyy92tTCsW|qNU_$Rx4C5K)i?? z&1`n{e1wY0(ZrUoZbWb(Ig-zu>IQ_#$Z?T757BaR+@4#Hcp*6wxqNk9+unf3TtuT& zJ-2PI@EIO0QHs@b+V;wx;gJ8~hB9?q|DtHbTYE@aT5VSIp_p@C+9ib9(WKra+s}USXj*HxB2$PW`%b{33 z7145XT;xtcyoem#&vx}>go?@0?VPW!LU0f{x{ICaB*G-*=teG9ClDli*zS}RF_$TQ9Yl$t1<@&Fo|9_8utG-cRroF0tNqdL3 zUt6aQs4v4e_q|2EL0zr(D&JQ=qWrNk1;5cR9)C9e!FVHnd3-qbo7gvE?}az?H^y?& z|AcSldslRS^o-~t_~!plN8S?I896PI4*wwhk??GIdw4XghMo<5Fw_WL9vbfXP0u$1 z+kf|m`5LQ`1}kcQZu3Gz;M7=Yk)W#hP0h$;gqUTBy0Dk$@ao6n4!mjfCV#I?dPI(x{spO{GrF zEsccaXf7?*+|o!wjwaPY%`J_B$*WA)bM2_y}e9bM50?2WZb4w#JIWBT;X(S`Z zMb0ga1mw8LvC?R`A|gk7zI=@pNQ0GbavWO9PzDFW0jGN9C2^4#_A$DIkGnjHC7yn$kEJZ*H~#J zCPx!nzQziqAaXRHIW<-pNyyQZR;;nQNJ@@owL*;*M?!LBf7mrv8cE1;kz)l?AUQ5_ ztTK|3<08lEA~`uOa;!KKk)xT-u8pwc)n0;%7kQD7siHAX<9ZPB(I)wjA-ICkb>j+qDxBDt?kc_j10r z48cK966h9oYI%f7o+Qv#uUK1(XgN8ukqfmG5HBQ0mV{kff=~%LvLf=e#Rv{0N4Bt2 z8$y_b9NopmS`N`ta&#jXYJ-Ruk)xa0u4NG_CP(*jzBYj1AaZmIJGDg!laV94xLE5) zw459lxrK-qlH($0Ayh(+i(Cf5LF8y*;?(*OCLu@blVWWFqNU_$g;J>XB3?v}7AJOX zK0?LhXnB&a%|mb?IpSNVmPVM29C2^4W+GZnj(E6GGY~H%NA`zZOCeNDj+Q6w{{OT8 z`~ClUdKRAlKL!2&UhQ;v{{Iu`|L;|=RV%8ld`o#uX)0TlV*GdUufTWp-4@>zUmW{I z>2nLn|v1ax|9~>Z~SORtY9Y6RBNi713}-M2=?Be4W)rgOzS_G=n;IRuDNAAvu~t zi*;5G$;lB97wW7g5|Sfcw(G1Sl8_@#&evI86iANj3a8ErA_+N~LW^})4oS&zwPBsr zL?UurZCGa&k(eA;8`fD}6hw~ZGpEi9A{jZdF^Y9o4#~-Jkz+NHkQ^5|RuM_aagk$n zQ6M=ka;zYdkfSNBSZC#slpM`!g*vN=MC52@v+JxP5|g8eEnjDKQ4l$r&zw3dh-Bor z$gy%rPL7KltBHi`0Tn{ z7m1G9>88%tlUPIr9kbKD?9>wolRQJ9+qqcR5iNa&KzDSZt|4CZ41w-yyRITs{0xEC zDfzmB;6QR@ML6|1!er#gqA1p5h?bKh>!MJPB3?+2>}tCnL8zD<-PHMd7{Nj0=w5c} zA%scD(d}HU_aItIj_&9}?JmTN$kAPG*X~59gdEw_`Pv-_4kSk-SGyfyGIBI>wc8La zCr2Y!yA|;wa0vN1U9m9YSy*IpSNVb`W7Qa%5u^Yd0cVN{(i=LhS(JMdWB^vupbiDkeu0 zTfVjr!9nC`K67e&5hfu=Q(CdM2hnnJWN#E|Hy~a}j*Hy&2$hiIA~%EJKyqBNCnt5d|NoK1J&CImMY#X}to~Q}-TEfD|Nk59%kVq= z4#EBZocizTKdNto`~PRC3zQ!zAA|e&TE+t@c_kH+qZogW*D zz8d{P^db0-d}l@nB0r0KCh|Yv{{QO8yzmR*zk&PzQ{l0&7Wx;s|KA9039ab)t?2&0 z({L-K#F*eDfIgxxHrxs+F)BO>pvT~ahFc*eCW1}^=u>*T;Z{hA@!*pHdNVuUU=`9} zB~h7s5TY$Uj|lQR zZTO0Hw>r{S3iCVN&4s#K9qA_p<#)QH?YdhXiOA6nov*vqQ8zidkDa<(9SOpxLqr16KcdMgd za&$-Ab+esah4^I^0XZ&m z+Yvfk5t8E~w++F8TBH=~S0h?Vj+QBf`c;S*k)sugUB42cVsf-b$=9zy za1c3KojCQ&5hf!?+*_<~MYNn8@o=HO1@S_1#LIU5GK5OVk?oPMUy9%$ax|Yg^-B;Y zAxBeMvA!A6QgSq_73vowUPO*&HoJZiLdE20V$0VzAvlm6*%eOxLWIf4agnkvj|VLUJT>`TCh{djlSI5sgm$jJCbvM_oi| zvA(8luk1M1}(1pX1T}Y}Z#KRDA4Cw{yOJ8iIqKDbQW)-T$Zk|5uDhjKlE$ z|4`}|sVCt#{mnrC-aNFofs(eM3wUEd7%|Nl$- zs`lsFO>qBziTW$`Y4v`%|9`e>DK9A>hx`A#l#>-R{v6!@e?9!BzH&Sa_y6A)yE}Gi zY#H4DeXp~0#nAvv-!?FOrjB;?2*$~Rbr6iAM29;d-d zBN;ieb&3sE7)i;|tXgQW>PSS6rc}GZY9lc@nosi$Rv`tEqq)>+u+m6EjwaJ$gB3<{ za$MwCbtELmMUK@*5^`MRScMcwjzq55a7&})slr`E8@@urEsc_=1nnaFh|g}grBU+a z;9W#}zI?+ijgqUnb`iPH=``HZC^;!e5j2+;8*XVNAxF1zq2ZQB!Q|+6wi|9~BqB$5 zbH3r0Mgio=Qg9k>X(T2`RztDjmPRsiWI+@fZfPVSM;3+Ma7!a0IkKDc4YxGvCPz21 z({M{8AvwB_iw(Cll8~cYxzKP+qhNA$JKGJnG!l`cyE)%*OQQgCT;$x+NKB54oLd^n z$Z?T#OCtd}E^@3i8m@@Q(Vj2gU!<*7#J5i46okph5%(4wCnH);j_i#>V-?~>TLsFC<6yhus)OsDvCBxiW$S z$#Ib@AxuV&i`)pJ<>a`?twg+t9L;QY<0OQN$Q7x3(0Yj8%C&v92dC)f&@qdE*|5f<*|KZrL;r{<4v76!j z|18}9e=_<2y#IeX-2eY^X`~M^1Sm>Lf_lE9<@Bb_GyxQ}5 zdjG%uZ>PzsA*W)l7nJnW?Zqamg;rL~b;5py`o7R)1<|sKd2Ud@LOpLcSvfRZF*gMF zE7bq_CM${tE9QA~`xSas!D+H;NJfrqm|~OFLUM9s*A$wpAQFihR>8hXTlvwcs?}a!5>$EQey# zEr(>}NG>fj-Et_H98IKl(=CTYptZAaW`~ahjwG(yGXXkyDZO#}x;j^;C`X&_8Sj*DCh(QA^-|6an<%h~g;kW-?t5g&{{w?_J zf6e&Tcro_7*jM2G|822NvBl9}!2SPsMh`{LiuS?%|4&8U7P$_-|9@Wi-@+dYzYf0t ze=Mwoz7u*3zW;wq$nN=l&sVzpfA*7N%dL&l8-<-D&8LNyTN|Y}1$L6;w=~!-w>C;& zDCi`~mki`vZf%slU~ZdCv#8T@Yoqk}f;O3^(PGQ3jb!A=dMUKr+DJf-tQNcF)?*ax{$=TW)P6AxHCRq2<;_!Q^N%wOej&BqB#MX};ywM&0CS z7Ij)~Z6qc~(rB^e)T2=`rN0X`DVpY*_MMREf(tL~6MuFsrZ=Dt^ie%)7dy6er63NLC z4;NajE)tR>`@?Rrsz^+ZHhuXPtBr!l(R}7KSy3b*$Jd9fB$ATj>qAx-iOA8+W;ajHCag{Cr2aKWOb2{9F1I)RYhWQG_mEItTqZFNAsD} zWJQsL98GD(CM$`g~8>?!od&a^Ox|y<+nQL`x6c>25AGuSdM-z@6@ByE%hU z@zVslq4Uk%2o8FhK=-lJ+=Va+Il7gL&Fc^?Cr8#qp*f9sAvv-x?B-5{O30CAk#Fum za3DFd8l2{}2$PW`Te;YrLbQ||-OYvOcEpRw(H(6!w;@zaj&A6D^BM#Pk)!+AX9_8IpW@8^I}BH$q^41ninBnNRD{fZf-)T zgdEu(`R0WP4kAbMnbW)gVG?pQr4^g!BU(z1X0<|dBjQEmXlApU8xSfcM-yAVc^-lT z$&p>*G}j|cMvjZzIz-FKagjS0@j`N3|UzvBBo6Tj$ zf5H3ze{S4ltTUFt`~Od;?oZv2Ivd{qe+lmY|8a5`y#H^){r?XpUJvj8m*M{ZH}vX-= zHRuq4W>mZ77Dw~91Rny>hxYlFTO7^5Z0;cd{aOL1ng%PPU-XhyYLtTY;~h{(}&ns2cJX|U2wj;2zl#VRAGA|yw1X|cuXqLmd1Ihs@p zEmj-}$&r0&w^(T;AxAc4zQqcpKyt*lPK#AWGIC@`7F(oKpm63!T&85W_tBd61NK!4dSaBpI$3>2nMiO#dV?kQ^5|RvJmjagk#MQXn}Jxnj$$jOJY`OzyPbE419oXx=43$({CmcFV1d=4}p6 z?zH90x7^BT-onz4 zIvWcpry?Xr_i?dx7NS>HB;@E;F0{@>{IW_gIl7(g))@#Lu87Fd-JEZ&LGWNDfE*XO z(-9^k$3<>6qUGec$eo6GAvrE`ry^8Ljut5S)+q=MB1cOTr*$&IB;;s?Qf#e4w3HmJ zQ3|a|#EZz$^2BaUAXGw*I62=MM{poH;#;RxL70pjac{9ThG;oCvNsB?QN)YL(adJI z$_N#cqlqowDj_(C9L;A=YXo5uax|qCTPqPQB}cPbp>-1Ch2+TouvQ0z$>)XkyE^mLoWb9L;A=>qLY}$kCKmY%N2y zlpM`!g;pN%LULT>mLgO_j*Hw02o5C2MQ#bgWaLQX3a!O$V+GH*h+?}n)HYW5e2eJK zw{mS`10LRyh2XRX+r|nX-qBr*{r_b>&-9qzH2=c91HS)nnel7mi^jW*gT`8;KlM`T z6REePrcej%zfLxjmnT;wexLYS;ysC55*rgk`Y-g)>G$h<^wahE+7GmkX>ZWB zX=N>@{)_sUdY5{!x=eXh`MmOWWuLM}>5cy&{?YjB;#bCDBgVc4xBqX7t&a^ve;WNn z^v%(o(ebDn`Bvny$X$_(B1^-sgr5vQ5Z)7B9ZrY77y3wO7Jm1i)AKtpR`SQeVs?Y2 z&4;4wVCkf(9Xs|P*>|`!xn}z4)ZPPortzDdruNM2oZ7i}X5R{|WH194wke)l3C)D?6SK}5{p_OndV83 z3rJ#B3x+VXPf~=&;^&{*Vl&^}24+QAYRNHR-&l3nOFNi1@Kq`{LM6OhCz7Yt!&pQH$l z#cl`flN6z`*zKTwk^;1Yr7n;lOYPjXa1602P>grR+sA~Y7g9kfqU zgvO$`gZ4=Z&<>WoK$2PFlk7sbNn+6pB$GVJvVbI3yI9?;ulCJc#0%z#cylI77f5KLOb8L$IYWGdzA>;5NSnOtBi}_;u9>F2CNk6%6e06M^Rw zu*XjXo=3nQKLOZIBiPcI86LkbaGRgW6sr-AUsq}1@-y02FtEo@1WpmK$4>-K60pZl z0Jakdwlrpj$FB?A=BG2oYMA5KRTa4Ww6+xt?C}$URRZ?-iNFd0d;A1oJC0yWV`g~# zy1%uZ>*#EDI z9_cZ^W$No6BEmn$!qhF6c0tOuabKC>RJ@EgA2Ub+El7Ox$$7R&fuI{6q zQdd;5mVnlV!M%?RF&xW@4&2MDVmKBQ9k`cTMR2EzB?WX&%os1bcEeeET~WoF0+-(9=>X5Pt2gN| z#YzKgVQ`T zTNvDDDunkCxX)Aszl)%Kq5`yYC!(#kDL&6GbercLOtsq2)9h+iT$;DH4Pj`Xr3if+ zLHi^{=vxWeCn-QXw;G-ioUvJ2fNd5Ec2EuLgo+v1Wu*fxZreUc*djRftJ6rm3gv`H=&K0YBPl{VS0dWdm?a*`ZgiXE6->3to;&-GlDjGhce!5PHiV&lk|Ok0 zg7!&@&|3)FCn-R;@BhR1|8?&FFEw5 z`TsS^QZkNcP3;HAa&H6??tNl!SQhPw#ubr;VQ-7@fz4{jQT6IiK zDbFh(f$#s@qKqh!__Of+f49ar$4`v?2EPCA9qa}`4!-~IlhOO5`=YC(>Bx^FACJ5_ zG8Gw(B*V{zKOAm^FAJ{>heQ7i-~V?@=;F|_p4WoD|F1Z^5s1T`k_A(L*u~J<4TxV> zS+M<>7ei;yL+Eg2!OlP6V(9F81P@jg?0^NobKBH`nes?^yf{)W(u)_)>^g)wl?B)S zAs0hu&qcJ99KA?Ym^}yaB69Q=l|8!_p<;6M3RQmgYy=0ABbRWT*|QKPBS&uG6lc#w zw45BdHB*>91Mxy~V5SEhTB8Lxzk3o)$96l2klOvJK&*B4N zu(x;H+-*d!cRREABpB@Ny+*K&=ne1UEItVadwZ`I=LoVQ3bXhu80_u6Dkw+L{cO+T zlVAu&@0Gzhg1Zhmiw}YW$pEI#;^kfZm!^RxKiH;^2?-tEldBVQRgdc(Uoi_d(IL5_I1 zFpH0U#pH;W?OA;4D3K67UAd9REdO=-nhJf@YCqgkymi|4dLax}Bq zvv@`;Ax9Hieilz?1If{R=FH;Btdtzt7{ysUnLP$ME^>G@D<;Q94v%GJZY zE^>GnD3Lnk;B7R895R;XBLlM!D{iVtweQk7EfNmYT2u;M0;Tt&tAc5!F#)8 zZP>GT?h00m-rJ>{IzNl2t`L&$_jc)Cc3ODcDkDd?bFqcTt#Wd7M;BUn;wmIZceUNZ z<5n3tvNZB7JZ%joM^=Q>!edq`IU2bZ9ueG z=w5bOc)ThjN4GQf{||&V_n1F6KX3jm+yQ8qhs|ru)#gd2Vf@bcf$=%x!^Yc<+l?EH z&Bj@VW9X^ZQr}B`D)m7)7dV=lO07w)Of5|QXYz&QSCa2fz9V@D+!44WIhI_SOeB7i z_*UZ6iN8)f0PhOSBrbyA7O*&>>Oa(<(LbQSQ=iqZgKrI3tqDa%5BP(%6UpznXiQ6 zKZ-vce$|p_rO!(KM%h(e0%tc@VfBw za9`-Rp>Kzt4m}=vQ|OM+PHG|xr$=c@Pt-8b4ZZ}-fu-8WCo9G=;?Yuo;P+u$tW@czBi z2Q%!^X}7LklRYFQy3)BSe#T05;pb|%F5P6$IEgML$DU(KbeUEBc!VKomr8c1il2gg zqoK5bN`GG!Kl=Iyd(ymLO!nZB=;|G+;^&z}SGuo?pI||{@Z*Z;(ub<}nIzGr^s&c* z-MWUV_`w4sO#!29rivdm;8Bxj)NfSrg9X-v>AGjAiXR>lUA-ChsIFUA+Nk0u1E&k~ zzeJawsbbRa)}cZpBfQGy+in5 zNTMj+htGln6yZZ3qDUXY2Rjl)N*_MU=~gtfr#&tyV3N(;z{Z7=Jd=LodiXhnKiOH8 zGjc_}8GOzH(U_wsZS0z(=sLL~Ju@AsNHKQuiZb}j113!HmS@dt87`FY7UUHC0K%UT z_#m@AP*JbBjaQV;U2_aYR|hK6&8v7tO72RzBJ+y&0Nq=Vm$LyOT|l9~Zz}*G{5tOp za|^Gicjz*?qIBP-!HO>773o8p<%*QPivtx6UDO_@37BLvo7lLJ#xvAsVL6|LeG=|hupMM~d9prWDi_PDr!Nj6hq<3e$sNxv}$KZgYw?<~ks zxuV_-9s|Srn4>6d;E69)G$L1|XI2I(QjC*$MHxJrg$ZN4}qR4>OL4yIE^_YWmD-E14 zAke=68U_uPBku_VngpVyHwV3eL`!-Bvy3huN&PPMoz&-{_rDk33Ybn^ z0`~&?Q)=?($uGfo1HL<1P0l3GO;(bFiQgoCl=v$Aj=*;$Zckj#I{#cErvDrKuE3A! zkLqvI_v%;aXX#7yG<;{^H{rd32esE}H)kAVXViaC-=j9vgX-n#qpN;)v?0vB} z#BPmE!S5EBi1o#y(Vs-0jy@567Z5%E=eP%sd*J^J4-C~XlSBT@2@+^FQ^V8_sXND~ z(0-$aIUPQAa-Gk?8YXjzuHH-yvwOF$v{8e!&g+7_F4LuFYLM9jbSXv+k~`IvsX=x} zvLKs2%fDscFRCqeMpnC1OqR`EcH=z_yPJOYyF zg2O*N3<}bPXF^059RA@ckVF?8{^7Aux2~Zoo(93l0Rba8{KJDG@aO=~2oC@7fC$#= zKxeI1@jytT3l9JAY^Ylo9RBT_t81@Z7aacW3Dl(+)f;$SndWF0(q-9@6ixRXiGk zX!HxHz*@yaB3KjsykE>Jo)y9R>ga;Cil;*oU9eX1oG3^a9v2Z^uvYPaNTLhYDxMg1 z>l&)!K@p5xC}0F@70--deJ$h}!CJ+$BUq~oy|s#GMG{@GR`J-VTNkWVJU-%d;c<~f z7pzr0IqKF0YZXt7h^|Z(kBR_k@eE+CUcitc%U`SK!%twXS^{@usv84!!CKwG>w>j< z-m!G857Y%~bset@*6O)(U1s&1_K=LfR@bs2p^ShEtktst0c#?|`^BuD#p{B#dZt_# ztkp9Fy6_|k)@sKtSgUw;1d$cl1#1-#le+C1s^S?EjO-IIg0+grNf7Bio)N57JXV4= z(dVsIJVuh}g0+e#O5M6(t>VcNrwdP#B)VX&;-ONvE?BF0m_&4Cs(6Y7$OSwDSgUxP z1dd$buT?xhg0;Fp;Eqfc50bid!CJ)=C87(~Djp$8b)6Kb3)X6p*9B{JgU2*rwb36B)VX& z;(1fIE(QDlw}m$Jm@k^om`|7wnfI6n%&q1cb2&W!f5rHY@fqVU;oAU?8r$G~fTFP= z^&5El|8(l{)St2wfQ_lq)S^@<`64|1{~LJv-$>q&yg0ckxj3mLewO%V;^T>T!|w^W zF|jpqdg8=HO8=GqZT-{wU+DM38w1zqYxNa+ulAbuUF|9D1KQiPJG7nJ2Ke^CeyvCS zq55U@BkJ4Lx_Z5O5u5=Gsd43}$~Tn1Q~nHom*4?-n_#uF41S-$FX1-|ehS_xcvJkQ z_|@^V;|@Cmcs}+dJo*04oz{2S7qc23i6#a1Y!DuZy6WtV@h~}cP$V-v0 zNB%bQaAY>JH?ldhDl!y_g?|$MYWSn!cZBQVnec_-N_Zd~3jHwjrO<~$_lNEZO@p(7 z|5)tNe7J8(Z~BP?Q!_i;xBf?qBjXl(HjfhXHr$zo0$@WI9mCCROvHsFID8c62N z<0ZfcJ2RA|@8#wJKG>R}#C(dI2l!x*#w0!5JirH=G?W;VoHpQt%^FHl2RLoO2fH+s zBp>Cp0UvDBP?C6_(*}I7T?0uv%}XXNY}1%vAvev0g>4!N%uU=Z;}&*nC@}8fCaGB1 zrJ*48Nlw+6h0PiYlCN^AMlI~om|%ocRkpB6LqTFUr>bOOuLc4$%?l&92W+>2K z=2Vp|?9f19n7m-p!p4jV3fvqM7B*%mFt&4Zj9b{Cp&)f1H%G<7z6=G)XE;@37WQRK zpmM54E$q)wkXXa1DqGl}p+G;%sVZ66rhy=3@`6bV+cOjxk8^WOSlFJSAoU_Q$GC-E z8WSw$=BQZMouMGPl~XllVRwdt#J!xVQ45&8NXH3w;sVZ66qk$l4@`6bV zTQd|GliVB=7WQT+NFCtj7`L!RLqYOUZjOqDof!%e&vUBAENsn~U?Hb!)WQx81^OmV zRoTME3XY0Y6BagPC`i7_%`t9aW5xs{+#D4P`!N(G zc5|x6EbPZnpg+W^8nv)5LxJ`zr>bmWJH`ZQPF2amb_@i%!3!oWY|BuPTF1>XVPRW_ zg5=%Y9OD*tXDCQK!Oc;zuq#7>{xYX(%)+jW2@0I5Q45mCG8CxyajHrd zHf12tQoLZ&!rlx8$!EAZCM@j9m_X&`7`L!BLqTE~5Q$W)y8_276Ge9mlpEn&EUaiX($eAKHWV$YJ{Qv~%wlFCu?0^LsD*%%6R-4*!t% z#(Vd7?|V0X_kQntAq}-;a7o63JqtYBbgX=I)5RisiGPn+7L2^|ZYSG|&j0K4s zrJ=e8mtic3KPe5>F}MsPK|CW1Rt+x4Sde~Mnq$S_D$EniN^>k5T!pb9wMUwxXK*#f zf}|x4wPbKL#)8DN(ol;A7iBDnzaKa^#u^{%0 zG*rjnN{j^2v@BRPxFBOe>J4d*6@%+APf(NQST?v2V?lDiG)K?idW;2$N2Q^b3@*c1 z5dVQR)S|(4m?ua`Lv;-@3qkB5X{fF-Cj`->(oh{^b|46+WWlOo z37Gt6f}zNZ(omflszZ_#+NAvV)fuWnyuXi0Gpx)|4Z?p0S<=n%$J9GP~`fuhYvTtR7L^lF_ ziJt%8mc1l^x*xETNu}SQ8v?$O{zBSHAEa9XPM~xD zvDEAI^#2(;_y0iZ+SH!Za;lgLC0|Q^KlwD>8E{|n>g3r}`OhZ)De+3;yNRa~k0kD< z+XHqd+KH+7KhO;V-;V!5{FCv+@k`^U#~bmn*qdbZ*1s(eY&A4DT^J zed(1Anc+~AP;_z1Gcv`YCZTBKl&58mLrp@_$0<+CB!`-WqLEXclF1A;2}LKTJSnr8 zMZdcN3?t9CM{_m!Xx@Ua}&=>^AH}<@R=KbOB#*vh>p+P_<}SV;SnvLxv>M% zXoN@feCG0xNuv=S(e#;}k|l&kbbV$gUzFw{JfiJ0J2@uJLwH2rXLe$@G!Nksji1@^ z`=rqbkLdi&jy)rdMtDT)XEy(aG#cR%y`R~9O&X2xh~|$mB})j8=>8a!`=xmZk7)lG z6OT&s5FXM0F~)x&%|m!Z1IQRpNTU%R(E&2Xc1fcV9?=3a@`t6-2#@Fi8M&vV(Fl)d z0?kay62c?8Kr<7sN%IgM(FU5C*e1)4rO^nF=m<^c_ei4=9?=q-&RNoEgh%v*rnApVqY)m_6q=flC4@(Gg{H>e zlI9^iqAfHvz97v*ctl@lYV3eC58)Awp{e|1(rAQ7bcUvKFG`~k9?=?_%8f~*5gySS zn#%5$Mk73;IW##TO9+qX4o!~VC(X0M_5a?$Uv7T?-&E#jbjJTdy8my5>iZwl+5V5D z_oZ9jF91B5x{E6MZOLCI|8w$l$y<`|NKPkyp7i`w@XNvb=r;hH!5|r|`q#2}@z8AzGY6Gdd`TBYS-hm^wuTw{LBQLtIYc-1 z@jaEpEBo*QhK9v!jBaa~x!(VZFT94KV)3%0+ZtxBTYrBU-oId4yawsEhM8*>_m|;) zDlLoGr5+q!tWvXhwW`6v)fo+om#+>ST%b|0ct!hzgPSu~XWbE{Z)<*(=(A%_lkC3s`S1Tfc*Qm5CZmUuxrms>j zv(@uE-4hlT=)Zk6i@U8#aCn(Y!{Yv{1{~g`Qn9!v>kAHVQ86v<&}zZKT4-6^w%r6A zEQgxK-CQ*|SP~73`@1@Dup%lJ_k8`q!7VDL#T{TBINZf8i`&9_aJZ3c7I%x4;BYfH zEbbp`z~Nr5SlmA-<| zTm2)ftp^7luJ!L`eI+>9AC3N9tf~fwiLKJVla>9zN%uqb?_f0zI814+{$bYEg2Sv< z>)+1$DsY(D8vWZ?RRd0n+^wwa3r>pMAy(6YlOlHuYwN*Dk-M4omEfev-NdSDaG2OC z{To@?4;g|^x!b7)%x5eQ-Z_H*64HFOb1Si9Cyn0;>0k;Te;ppm4s`rNc5i#3;A z<8H3?uVj7YRmqTx8~rO-RSga|bfy16R`vsj``GMX&T1NPxRqP|{j9AA2Wz6%zl`;j z;9y-e`j@h*2AmYR53sT?I9Ls4{}NWyfrG8w>R-&-T5!0VYyEw!uL6fVy3xOgRn_2d zLs$BHS=kR9?qjq6epb_f!>!!va~E?Ha8l&Bx2XmvMUI=DI&f0txc})7PKq2iLv`Tr zLfTfJ8>D)0NKAwy~OxsR#=hd0w!`rKOe1&3GCntkrGYQcefx&FT=7zyNG z$$vZlMBd3C% z`#`IE>akvk&$A}2(ObeG@D z;cwE3{`6#=YFp~DlOuTlijZEVoy5!p<`BI6wmOS%?e>)N{7MNq<%cF3+tunXL|RAPcdb`9lZ7kG2A zz{G~^njCrreAz8BkCA2uXLicWTGq{cR(E9gLYb)w$tS=v@aR&B*$G)Tl#^xP(8U6? z60&S!Xc@S(TVxVKmQ9R&0{-ljnR2X~89cgBVwNG#y!-T_;hp14%q~bgzP7f(ql*Qm z6J*%<&@k|4x5(Uq3>zOA1`h3%nJ$oF!;yIrflC)k%n;<6*F4%ezQD`?XFlV?k=3gO zCITcJ8=3{q>=yZ3pUfH?nFZeLc-Oa)Sz{0>@aIB_zuU>Ip`0SM=1ww;zteGMaOP@} zzrv-NPdhL&dZox;*d)!5>;h+Y%KRNocICk?aOXmazn00ap`7em^CsEFU&mxue&`YK ziuP9X0LDxi78O&p+~@%OU1XZpBbFF zSU!2<%!_|Evb$S8X&_I5W#G}}(upJ|%Z75Y3>?}koIql-EH$(Y+__Xdo`e*u)W|2` z&&BfU#+kvR-O|d2nHQcP8Qv)^55&o}wGAHa6?!Bl!;(Y8z@JOSB@&Wh$&q2;(8cm1 zNhw&vk$G){OS`4+hMB>mokC~b%&k{PRxcD52ExS9EO6#haVH7Mti;GH@aAH2D+iWIoBQ)sT68JxLLG}q7Ecx!0%!tuq%KpG#}11if>y#^O`dkj`wcITQ_qpHL`oPTp3sv1Ixgp z-O`SMWkWex1`h2MjvZJQ8(IeLTqtfISQZ=kWX+#s*|v={gGX0O$84CnS{xa^QYsI` z(Y3V=9_NFp&RA{@MKJ z^Y`bk$e)(qo)6_-$vv0*-P{AYt8!=Nj?2ZeKclz)e?RMHug#v7ZBh0AcbV^H{xI{2 z%ng}yGp$UHPW=CX&ICL}@A{vYUQAD<{yz15>MN;7Qn#iqOs%F2y65i&s{MZ}d6*{K z`nTnQEe~vYV9Ntr9@z50mIwaDdEglDgz=8>_A(H*fc4CXcdeGalg8WEG>E|le&eN) z!b-tAa7%s;$GQ1c}#o}=f7$9NMWhu9Y7|YI#i(P z*Pb6NES9}v$Yfq=jmQQ*`rXk0WFFV!WL{}#-n~B`Ny$7e$H~0X$h-XY0GiBj%ZQxk4jjMICZ5HCT z=32HbJFso2Kyd@ll5JeD}!u1x=Us6iZC*72FzP? zE}KWiJDWFDAoIYtWFA%TY~IY!Jn$@;N7XxT$5Th>{C_q1tK@f*kJ0)6-Si&+FA~osek*ZJ;zW9X|7Y=U z(C_tK8efU0Vn2@kQS2kJeX&+76#ZfJ@#qn{qrV#YRpdL7$0D~xc1LEze;fW!;YY&P zg-;A;L$8Lu7W#PT($E5(|9>g?$H9*T&kHsKzYaXFqYO3cZo*6lFpQgg50b;FoQ8E5 ztJZtQjs6G8@i0}zx|5YlJ>!P;2g#+Qk=7loX7-F*R0qjd)=r+bti!CmvuE6_K1ja2 zreCx8(Y_KKo~LS9x3Q`Q93081ShupWFE}_aVp@k-O$QFnSX&Pc4(8OXn^|8K zIec%UVco>4YH)a%s$%iU0bg)FGI4N=mSYHJW&r>z553#Bm93G~sSXZ;MA2>Wn zWm;FUng$%6q-t4LvbGi+9;vEXSFpYk9C*25eUMc(;K0ch>vC511qZ%0t^KT~0|zOs zWnISFT5y=vYSyK!uL6getzq%u2@N>uh^Tc5EBk?yj)+Zj0*9HcVV%dSYH*m? zD%Sg0*)MXK&rIvRtfq+^rnHvDhdnj{Cq?c(tgi$oMebZy)qs;CcMdE2f`iDltg{E^ zYF;D6H_~d>Sp#$ZUn9g5-wo^C19Mfc5#oXGiuJC6xf@?2#7%5kyID>18X@lEmUSj; z>)(+Cj#9JU$@;2yByl@8tTR|u{f;E==8APXEBn18*@2~CTJK;r9XMDGE$cMa)`Nou zQL|2EeI+dEo+grwcwDW)T}P+t0IT=q+xYfRUNr5Y>$e?XLS6*!LBf^ovfw< z2kvcIZPwO+4ij6&;>MycIHV?~#qC8MI813RtIpb5aG2F< zR*m&lk;BZ^u(%be11Cj}8m%=8=%|&zmUI`ZvLCky`KA4?$O+b zb0_EW+1IjvN_YM3&vvuv%#SjElDVJG`nNMdy4~-K>BIE9ewEb!N&R{1GpR$VGgHOn z>&gF+{B-i#(YJVK-dCq>SqLmF^Wx|R!C_Xb*&ZQMfy2z!usu4Y28W5QVtb^>7n~G1j}7U-Ns;rokRF^AIgb!2 z!9nCIwxC0^hu3F!EbmNP(4pDe71?2biV%@%ZM_Ex{IF5J%zThO7| zL;hb~xScDuphFwM;Vw39L5Gy!a3i;DL5DQpa4*+vL5CFJU{y41L5Gy!U{O?TL5F<6 z!CEkFL5I}fU^%pGes)V-PX`Y6a?Res`qiF4INZ+-`&d@3_f+6;J6G)OtX%4C1c$rW zw70RE*;9hUjoh-2VQnoq+{-nap9ZVJNs;42+d6Pkn$fz@>2q{z*)wiXqeoo9U&ILvGfJIAVOaG2OCc9xa> zz+pZ!?F_5wz`@37*=g3+gOehcVtpkzDRN0x)qs;CmtbXIa8l&rtfm2nDXnG4SX&DY zvs%rLvc3u&X10bMVO2FaOl%c9%*uY?FrS%rh}Cr9q{szXTMtf(T!8hJ;4pI5y{xJM z2az+adj?i3A15PGX<0`GR%;$7BUR!0|NVjQ4C?>8^5^Dv(3}6?%6%?(6W#bfL-qeR zvkzyl%r4VC|1W2rrr-Cwkm~D$uhrnk{A`aMUd{12vfrKXavB)^jUcyfPo zDH%z;koa=qzQp;7O8m|E_v4Sn57E8-GqIn?o{c>myE3*M%SB&~J{^4^dSSF34Md)g z{9fcpeLT#%4|6B0i1djyI4wk6;|IUW$-*lMi zgzCLQ;G_P$2zy{vhY3%pMA{GNSYOj&suMCv@BV)V?Q1*CbV55xz5mbReRYTVPN+&B z_|46TpXo5&(ILbfS#_B8_#?yw*>sriXc1zHY&%SMHbIEVvhFbH(IA8kXFAMzvA%pA0ej7ro+rf zix87#+hO9f2|`Spb%*JX2B8!~Wz~zFb~q1ps`iC4Gp(&Cfr zygN6+hjm`nzC(=}Hnu{YKXIqnx3aD#e`2Pq+J{)% zH-BP=Gwoa0VhujbaV`61*4E;~ELXR0Vk?yRq)cbu$htawuv4n`4Xo{tPm13`wpfQx zirTaQnQ-}P*T8lM!u>sVKV4>MiWzLvFp@nMED?Q7U#4L;0qE&Bj#Yw=;0tJ@!9 zE0p-8OlM!sx;lJP{H|hce|%E>u4IdK_@wwyXk?8{l%ui=h$ zo@wtNSgd}62dkoGUpBB<^8^p~bEu33RS4ZfUArR zCbzmH2um^G_{9dFwV93}EX9EB9~(?;RYwq(V!&D-8_a4=hY5=oA?CHV!;EDUgqY## zj-V_GgqYt$iXqb$ zJwhpl%v&}?D8-P83lTE~La9V^n70ryQzFC?t?Dp+p&aRh5KFVB!^B045UaDc!@Ok^ zgjk@}9cC{Ygy3z{Ve+Cy2+poL%wZG=!N*O9*^3?_Y?Zdd8|aZ&+7Vb3*>Wk=RE#^a+P+Z7`~5nHLDcg3aL8p zWo_S8+L2n+j$SG>k(qct2^iN2b2gg>zU3utgAtYNw4ah&Dy>QG1oPn zvsh1u5Yt`Tc{i(Xf)FxZ-FX*(K#fp};cnK|A(ZyvnXK)P5MtPN-Z?N?`>+QFt~+N8 zOjbPXK|*6Xrw>e4KhAJUmXJf5|E5#rWvI;*UwLx{V(?X0l69wBbg=qvbHZmBr{E?&3Zb7$Z6V6i`DfA zk=4|lCVxPQ5DCqo{(m8me=+~1{5|<|sQ!O5cQp4qx$CI@pUS?P{d(4>`oEivWM0fX zLG}Nh%(3Y=)8C@{|6qDodW!1*r&AB6E=_f*{{La}OUb*Fd#L_@BXKnGJ5>LlL~sAU z8vi=g|NG+IcqH~B)&Ga-^#8HZH>v)ABziEqD>_B>|I?8NBbP?HRR8}l{H5^S;XPFU zzY#hb`t8uc(8-}(@YUcm!3TpE({KMp0x$TL|MK5VR}hnO;1NZU2d}qPT|rFBfq%WB z$b+2KbOkXf2Od_Gv6!^lt{^7mz^9aDEM~8|D~L%s@JYWiRw5>@ASP;ru;#0-ASQkY zVZAq9K}<9VA!)T;K}>WAF?-crK}-|~F?E@)ASOzLn768~ASONtF=sVhK}^&LF=@43 zK}@s=A$!$bK}=K#A$6IqASP;rQVaz#@k1!Z&?6>hPmK_hR@?Px$xc#NuTsojb=Tu2 zRr-MMD#g@gx*jvBldfVPV&1B{9zQ9O_J)0kIjiY<#6*h_+}w6OTCxd3@O0hvxQQAe zc-wS6W}-s~8>Z@d{KOZb#ME4mnCK8nOwILZi5{WE)Lf67C=p^gX1X3T(ICX6S9Lvp z;*SvaNz?U+i58(0Lywkhf>4T~$4%4-r5Jk5M2Ap{p~p{r5n`@ux*jpnA;fgoc0F36 zM~E4(?t0uri4e1%>3YmWgAkKm)%Ey^KSC*n9x>4(lw#=7l1&gwF*FA}Mv-Y2Acj@9 z!MeUpbnGoR-8$>(9ur_8)^=;Gu5Y4ad%5l&H?Udpm;iUV>AsD1HBEHf*)~U_HMg7SY3}0cX-`BmOr3;On{8Wbhopv4k0X%s=JM~{Sm^tXu8L+o)#f2 zjka56^-U1M4zIf<{(ur8?sC&DvaSXpZt|*IU~ON7xVM|`JnQKY;`VO4bF8jMh&#OQ z&hiJ;2w|6-uEDxGgi;J=Slb_=6vJuO(;}2&IK}FEgh*@Z?j(Oei4Yl$=}xe&1|f2q zsyoiwz6g=cG~F@Q(;-As({}T$z6nC`bluJI2h<3`+oqdkT^&MjcGbVL6-W5|o?k(2-gLzgKFt$4DAC)lIKroU SLWhl(x+{+GDf;HvWBGqzhcRye literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/expected-links.yaml new file mode 100644 index 00000000..ef9cc235 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/expected-links.yaml @@ -0,0 +1,51 @@ +expectedPks: + - table: accounts + columns: [id] + - table: users + columns: [id] + - table: subscriptions + columns: [id] + - table: opportunities + columns: [id] + - table: product_events + columns: [id] + - table: support_tickets + columns: [id] + - table: invoices + columns: [id] +expectedLinks: + - fromTable: users + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: subscriptions + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: opportunities + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: product_events + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: product_events + fromColumns: [user_id] + toTable: users + toColumns: [id] + relationship: many_to_one + - fromTable: support_tickets + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one + - fromTable: invoices + fromColumns: [account_id] + toTable: accounts + toColumns: [id] + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/fixture.yaml new file mode 100644 index 00000000..c5284723 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/fixture.yaml @@ -0,0 +1,10 @@ +id: demo_b2b_no_declared_constraints +name: Packaged B2B demo with declared PK and FK metadata masked +tier: smoke +origin: synthetic +defaultModes: + - declared_pks_and_declared_fks_removed + - profiling_disabled + - validation_disabled + - llm_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/snapshot.json new file mode 100644 index 00000000..894bc4b6 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/demo_b2b_no_declared_constraints/snapshot.json @@ -0,0 +1,137 @@ +{ + "connectionId": "demo_b2b", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "accounts", + "kind": "table", + "comment": null, + "estimatedRows": 30, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "name", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "segment", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "region", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "lifecycle_status", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "users", + "kind": "table", + "comment": null, + "estimatedRows": 150, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "email", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "role", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "active", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "boolean", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "users_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "subscriptions", + "kind": "table", + "comment": null, + "estimatedRows": 90, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "plan", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "arr", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "started_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "ended_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": true, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "subscriptions_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "opportunities", + "kind": "table", + "comment": null, + "estimatedRows": 180, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "stage", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "amount", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "close_date", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "opportunities_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "product_events", + "kind": "table", + "comment": null, + "estimatedRows": 1500, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "user_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "event_name", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "occurred_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "product_events_account_id_fkey" }, + { "fromColumn": "user_id", "toCatalog": null, "toDb": "main", "toTable": "users", "toColumn": "id", "constraintName": "product_events_user_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "support_tickets", + "kind": "table", + "comment": null, + "estimatedRows": 270, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "severity", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "status", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "resolution_hours", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": true, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "support_tickets_account_id_fkey" } + ] + }, + { + "catalog": null, + "db": "main", + "name": "invoices", + "kind": "table", + "comment": null, + "estimatedRows": 240, + "columns": [ + { "name": "id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": true, "comment": null }, + { "name": "account_id", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "amount", "nativeType": "INTEGER", "normalizedType": "integer", "dimensionType": "number", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "status", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "string", "nullable": false, "primaryKey": false, "comment": null }, + { "name": "issued_at", "nativeType": "TEXT", "normalizedType": "text", "dimensionType": "time", "nullable": false, "primaryKey": false, "comment": null } + ], + "foreignKeys": [ + { "fromColumn": "account_id", "toCatalog": null, "toDb": "main", "toTable": "accounts", "toColumn": "id", "constraintName": "invoices_account_id_fkey" } + ] + } + ], + "metadata": { + "fixture": "packaged-b2b-demo" + } +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..3262cad36b20f168eafcb708f6d5067dcdc4eb6f GIT binary patch literal 24576 zcmeI&(NEJr90%}r?U}L+yawq`v)AeV&j4nM)Ykd+aV2IOWF?Mar!s- z&+uqs!apDn@@gUv8WZD_PhPtL>o#5ugm05e@A|tozx%W=zuPum)eMi(ZQJSS9xV&A zf+z}4C=~=D&f64k=SP%xYD~#d&%2ot+qrBYu`3n{9$7eceh)$+bn!TzIR`aDUTkBy>~$(TL1RvOm8zM zG>C@M3595GFw~xgpT0z^UDvZa%&D|mcGvR8Cm4f#J=@bws;xEDRdxMhJ^3D$V#>mT z_+ARqvmKsRlkGFhbBFtKD7oPX%>}b3c<6(a#-R30-I_GNj(IpI^2_OZ!QUZx#+zkZtY7~h z3*;yHN{(+aCzcKY2tWV=5P$##AOHafKmY;|xB&vQQZ$*$+)>N?xBY&FpN#M;N<=O3 zzX1fh1kzbj^e2gGF_`4<4jlCB|5JhdA>YaA4Ol2rga8B}009U<00Izz00bZa0SG`~ zSRgLR$&{%1I|BFaMtDD>`FjDQTOh$p|75yW)=C>MHp>ZK{PR&sEAmk(!K;5hDh8wCKc})-s{jB1 literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/expected-links.yaml new file mode 100644 index 00000000..b1bfb359 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/expected-links.yaml @@ -0,0 +1,39 @@ +expectedPks: + - table: CustomerAccount + columns: + - AccountID + - table: InvoiceHeader + columns: + - InvoiceID + - table: subscriptionPlans + columns: + - planId +expectedLinks: + - fromTable: InvoiceHeader + fromColumns: + - CustomerAccountID + toTable: CustomerAccount + toColumns: + - AccountID + relationship: many_to_one + - fromTable: line_items + fromColumns: + - invoice_id + toTable: InvoiceHeader + toColumns: + - InvoiceID + relationship: many_to_one + - fromTable: order_events + fromColumns: + - accountId + toTable: CustomerAccount + toColumns: + - AccountID + relationship: many_to_one + - fromTable: order_events + fromColumns: + - plan_id + toTable: subscriptionPlans + toColumns: + - planId + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/fixture.yaml new file mode 100644 index 00000000..69000395 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/fixture.yaml @@ -0,0 +1,7 @@ +id: mixed_case_within_schema_no_declared_constraints +name: Mixed case within schema fixture with no declared constraints +tier: row_bearing +origin: synthetic +thresholdEligible: false +defaultModes: + - declared_pks_and_declared_fks_removed diff --git a/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/snapshot.json new file mode 100644 index 00000000..de5847f6 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/mixed_case_within_schema_no_declared_constraints/snapshot.json @@ -0,0 +1,208 @@ +{ + "connectionId": "mixed_case_within_schema_no_declared_constraints", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "CustomerAccount", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "AccountID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "AccountName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "accountTier", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "InvoiceHeader", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "InvoiceID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "CustomerAccountID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "invoice_total", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "line_items", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "line_item_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "invoice_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "skuCode", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "order_events", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "event_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "accountId", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "plan_id", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "amount", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "subscriptionPlans", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "planId", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "display_name", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "BillingCadence", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/natural_keys_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/natural_keys_no_declared_constraints/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..5d77017a6f1d48505c599be98f45501aeca5d181 GIT binary patch literal 12288 zcmeI%&r8EF6bJC6sT-ma>t)D50y3sBy!a0|T{eU=u|L4usBH%Y*Gk(RcHDp9#sA5x ze}Q-uym<8H+iay?Yo_19K%2R#cwp(g`L_QgWSrjWz z-_ZQ`+HuOOwPuq~>+_CLGNMh?FHhoP<;Pg*xw1q}i|m)PTPh7kKl{z@=0ZL*`7U25 zAs_$&2tWV=5P$##AOHafKmY;|SOcK+(9+FJ zBw@(!SQG=slOdN~9WE}WFVh~6CG+mGOtU2G?Cz4Ob_o(O;9?T9Fq|7^7EU!6pd~%| z^O=UeRDXtG;Sxrfssw|0@re8B-Q2X_8}?`Wqw4TL00Izz00bZa0SG_<0uX=z1R$^) zfgRIwc6VM>s?|n2k^_3gRJCAy#KK-8y@weUETqBuAscdf!h%GyCYNJh6p7_Ld6dR# zO~(5Hm)xf>6IB7>Qgxy~;zHm5uMGQ#ssLQA<_97x1Rwwb2tWV=5P$##AOHafKmY>w z7O+*Fz#+{_ty-Vm`7d&+HsEB^h1#q@K*~8)5zs5u*%tCa-~TTS`@6dTU*0*olLI45~fB*y_009U<00Izz00bcLPXZgJwVfxYl}38k UU)#v&Sv@TUm>WdTYH2L{3!qZ_mjD0& literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/expected-links.yaml new file mode 100644 index 00000000..64dbb692 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/expected-links.yaml @@ -0,0 +1,22 @@ +expectedPks: + - table: kundenstamm + columns: + - kundennummer + - table: seihin + columns: + - seihin_bango +expectedLinks: + - fromTable: bestellungen + fromColumns: + - kaeufer_nummer + toTable: kundenstamm + toColumns: + - kundennummer + relationship: many_to_one + - fromTable: uriage + fromColumns: + - hinban + toTable: seihin + toColumns: + - seihin_bango + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/fixture.yaml new file mode 100644 index 00000000..1bf2559b --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/fixture.yaml @@ -0,0 +1,7 @@ +id: non_english_naming_no_declared_constraints +name: Non-English naming fixture with no declared constraints +tier: row_bearing +origin: synthetic +thresholdEligible: false +defaultModes: + - declared_pks_and_declared_fks_removed diff --git a/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/snapshot.json new file mode 100644 index 00000000..422fc709 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/non_english_naming_no_declared_constraints/snapshot.json @@ -0,0 +1,161 @@ +{ + "connectionId": "non_english_naming_no_declared_constraints", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "bestellungen", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "bestellnummer", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "kaeufer_nummer", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "betrag", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "kundenstamm", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "kundennummer", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "firmenname", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "stadt", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "seihin", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "seihin_bango", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "bezeichnung", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "kategorie", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "uriage", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "verkauf_nr", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "hinban", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "menge", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/expected-links.yaml new file mode 100644 index 00000000..83db9ed3 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/expected-links.yaml @@ -0,0 +1,135 @@ +expectedPks: + - table: Categories + columns: + - CategoryID + - table: CustomerCustomerDemo + columns: + - CustomerID + - CustomerTypeID + - table: CustomerDemographics + columns: + - CustomerTypeID + - table: Customers + columns: + - CustomerID + - table: Employees + columns: + - EmployeeID + - table: EmployeeTerritories + columns: + - EmployeeID + - TerritoryID + - table: Order Details + columns: + - OrderID + - ProductID + - table: Orders + columns: + - OrderID + - table: Products + columns: + - ProductID + - table: Regions + columns: + - RegionID + - table: Shippers + columns: + - ShipperID + - table: Suppliers + columns: + - SupplierID + - table: Territories + columns: + - TerritoryID +expectedLinks: + - fromTable: CustomerCustomerDemo + fromColumns: + - CustomerID + toTable: Customers + toColumns: + - CustomerID + relationship: many_to_one + - fromTable: CustomerCustomerDemo + fromColumns: + - CustomerTypeID + toTable: CustomerDemographics + toColumns: + - CustomerTypeID + relationship: many_to_one + - fromTable: Employees + fromColumns: + - ReportsTo + toTable: Employees + toColumns: + - EmployeeID + relationship: many_to_one + - fromTable: EmployeeTerritories + fromColumns: + - EmployeeID + toTable: Employees + toColumns: + - EmployeeID + relationship: many_to_one + - fromTable: EmployeeTerritories + fromColumns: + - TerritoryID + toTable: Territories + toColumns: + - TerritoryID + relationship: many_to_one + - fromTable: Order Details + fromColumns: + - OrderID + toTable: Orders + toColumns: + - OrderID + relationship: many_to_one + - fromTable: Order Details + fromColumns: + - ProductID + toTable: Products + toColumns: + - ProductID + relationship: many_to_one + - fromTable: Orders + fromColumns: + - CustomerID + toTable: Customers + toColumns: + - CustomerID + relationship: many_to_one + - fromTable: Orders + fromColumns: + - EmployeeID + toTable: Employees + toColumns: + - EmployeeID + relationship: many_to_one + - fromTable: Orders + fromColumns: + - ShipVia + toTable: Shippers + toColumns: + - ShipperID + relationship: many_to_one + - fromTable: Products + fromColumns: + - CategoryID + toTable: Categories + toColumns: + - CategoryID + relationship: many_to_one + - fromTable: Products + fromColumns: + - SupplierID + toTable: Suppliers + toColumns: + - SupplierID + relationship: many_to_one + - fromTable: Territories + fromColumns: + - RegionID + toTable: Regions + toColumns: + - RegionID + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/fixture.yaml new file mode 100644 index 00000000..c445ec95 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/fixture.yaml @@ -0,0 +1,14 @@ +id: northwind_with_declared_metadata +name: Northwind (SQLite, declared metadata) +tier: row_bearing +origin: public +thresholdEligible: true +defaultModes: + - metadata_present + - declared_pks_and_declared_fks_removed + - declared_pks_removed + - declared_fks_removed + - profiling_disabled + - validation_disabled + - llm_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/snapshot.json new file mode 100644 index 00000000..a2387244 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/northwind_with_declared_metadata/snapshot.json @@ -0,0 +1,1055 @@ +{ + "connectionId": "northwind_with_declared_metadata", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": "main", + "name": "Categories", + "kind": "table", + "comment": null, + "estimatedRows": 8, + "columns": [ + { + "name": "CategoryID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "CategoryName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Description", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Picture", + "nativeType": "BLOB", + "normalizedType": "blob", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "CustomerCustomerDemo", + "kind": "table", + "comment": null, + "estimatedRows": 0, + "columns": [ + { + "name": "CustomerID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "CustomerTypeID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "CustomerTypeID", + "toCatalog": null, + "toDb": "main", + "toTable": "CustomerDemographics", + "toColumn": "CustomerTypeID", + "constraintName": "CustomerCustomerDemo_CustomerTypeID_fkey" + }, + { + "fromColumn": "CustomerID", + "toCatalog": null, + "toDb": "main", + "toTable": "Customers", + "toColumn": "CustomerID", + "constraintName": "CustomerCustomerDemo_CustomerID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "CustomerDemographics", + "kind": "table", + "comment": null, + "estimatedRows": 0, + "columns": [ + { + "name": "CustomerTypeID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "CustomerDesc", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Customers", + "kind": "table", + "comment": null, + "estimatedRows": 93, + "columns": [ + { + "name": "CustomerID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "CompanyName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ContactName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ContactTitle", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Address", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "City", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Region", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "PostalCode", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Country", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Phone", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Fax", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "EmployeeTerritories", + "kind": "table", + "comment": null, + "estimatedRows": 49, + "columns": [ + { + "name": "EmployeeID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "TerritoryID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "TerritoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Territories", + "toColumn": "TerritoryID", + "constraintName": "EmployeeTerritories_TerritoryID_fkey" + }, + { + "fromColumn": "EmployeeID", + "toCatalog": null, + "toDb": "main", + "toTable": "Employees", + "toColumn": "EmployeeID", + "constraintName": "EmployeeTerritories_EmployeeID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Employees", + "kind": "table", + "comment": null, + "estimatedRows": 9, + "columns": [ + { + "name": "EmployeeID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": true, + "comment": null + }, + { + "name": "LastName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "FirstName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Title", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "TitleOfCourtesy", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "BirthDate", + "nativeType": "DATE", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "HireDate", + "nativeType": "DATE", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Address", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "City", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Region", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "PostalCode", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Country", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "HomePhone", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Extension", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Photo", + "nativeType": "BLOB", + "normalizedType": "blob", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Notes", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ReportsTo", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "PhotoPath", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "ReportsTo", + "toCatalog": null, + "toDb": "main", + "toTable": "Employees", + "toColumn": "EmployeeID", + "constraintName": "Employees_ReportsTo_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Order Details", + "kind": "table", + "comment": null, + "estimatedRows": 609283, + "columns": [ + { + "name": "OrderID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "ProductID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "UnitPrice", + "nativeType": "NUMERIC", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Quantity", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Discount", + "nativeType": "REAL", + "normalizedType": "real", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "ProductID", + "toCatalog": null, + "toDb": "main", + "toTable": "Products", + "toColumn": "ProductID", + "constraintName": "Order Details_ProductID_fkey" + }, + { + "fromColumn": "OrderID", + "toCatalog": null, + "toDb": "main", + "toTable": "Orders", + "toColumn": "OrderID", + "constraintName": "Order Details_OrderID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Orders", + "kind": "table", + "comment": null, + "estimatedRows": 16282, + "columns": [ + { + "name": "OrderID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "CustomerID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "EmployeeID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "OrderDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "RequiredDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShippedDate", + "nativeType": "DATETIME", + "normalizedType": "text", + "dimensionType": "time", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipVia", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Freight", + "nativeType": "NUMERIC", + "normalizedType": "real", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipAddress", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipCity", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipRegion", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipPostalCode", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ShipCountry", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "ShipVia", + "toCatalog": null, + "toDb": "main", + "toTable": "Shippers", + "toColumn": "ShipperID", + "constraintName": "Orders_ShipVia_fkey" + }, + { + "fromColumn": "CustomerID", + "toCatalog": null, + "toDb": "main", + "toTable": "Customers", + "toColumn": "CustomerID", + "constraintName": "Orders_CustomerID_fkey" + }, + { + "fromColumn": "EmployeeID", + "toCatalog": null, + "toDb": "main", + "toTable": "Employees", + "toColumn": "EmployeeID", + "constraintName": "Orders_EmployeeID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Products", + "kind": "table", + "comment": null, + "estimatedRows": 77, + "columns": [ + { + "name": "ProductID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "ProductName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "SupplierID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "CategoryID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "QuantityPerUnit", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "UnitPrice", + "nativeType": "NUMERIC", + "normalizedType": "real", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "UnitsInStock", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "UnitsOnOrder", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ReorderLevel", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Discontinued", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "SupplierID", + "toCatalog": null, + "toDb": "main", + "toTable": "Suppliers", + "toColumn": "SupplierID", + "constraintName": "Products_SupplierID_fkey" + }, + { + "fromColumn": "CategoryID", + "toCatalog": null, + "toDb": "main", + "toTable": "Categories", + "toColumn": "CategoryID", + "constraintName": "Products_CategoryID_fkey" + } + ] + }, + { + "catalog": null, + "db": "main", + "name": "Regions", + "kind": "table", + "comment": null, + "estimatedRows": 4, + "columns": [ + { + "name": "RegionID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "RegionDescription", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Shippers", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "ShipperID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "CompanyName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "Phone", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Suppliers", + "kind": "table", + "comment": null, + "estimatedRows": 29, + "columns": [ + { + "name": "SupplierID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "CompanyName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "ContactName", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "ContactTitle", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Address", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "City", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Region", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "PostalCode", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Country", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Phone", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "Fax", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + }, + { + "name": "HomePage", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": true, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": "main", + "name": "Territories", + "kind": "table", + "comment": null, + "estimatedRows": 53, + "columns": [ + { + "name": "TerritoryID", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": true, + "comment": null + }, + { + "name": "TerritoryDescription", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "RegionID", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [ + { + "fromColumn": "RegionID", + "toCatalog": null, + "toDb": "main", + "toTable": "Regions", + "toColumn": "RegionID", + "constraintName": "Territories_RegionID_fkey" + } + ] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..aee39a2ded2ea615815a39c63347843405f33b31 GIT binary patch literal 28672 zcmeI)y>HV@90zdUof9i@(&Md)LWi26(o+?JP!}fDf}#pk6#+qFtFHDnF(kIJofZa= z0)GPw0~0#~3oi>u46yOGARz|OqldQvAwAMFK4+&14WZAYM}_Jcz9&UF->*IY?sM^Q zXD6pl%)7C)mi@3LVryF4rV*meSeB+~1@$u2t5>qBqW2EeJ6TuXs9Mk}*B%#DG;6-o zcyr|S$id=V;ifu3fdB*`009U<00Izz00e%8K<7}_C{0a~E7P%9YRae`1b!GV#;$!% z#?g0&#*vfN*;>`A%^sewTHgh&-}06!u45fts8#2xC#{9!HEZGY{QTZDAZ*)jd-39i zf6$I(xN$U!Mcj_8TJ?Shm#}ycOTIWwgGY9Y}qBWYc+z zRyr$D9GuV7@S|d({umMS-y8 z#`BS8zUYaT{K1^;VMb|kl5{Sm19sfjV!y{|@JJsR=wR%Z4KkgSA~j0m!l!)~=W|rCyy5Wz~!PTVlgg)x9o*rtsADF2muR zlC?SGk2L<7zfx}~5P$##AOHafKmY;|fB*y_009X6PX%@|os5mKqn^5xZ#UE~S{S#3 z?Nle_GM#JsOY%ZJ^jG7?4x;Bq_srTYsdl7f7=&&lll8yOZ)p4jf6ni!3JL@u009U< z00Izz00bZa0SG_<0$VIl)^$Rri_DR7#_cZeEAoP91x;B}i+-}dK<$e&^>mHj3yi9T ze{P?-B)u7-miql>k~HUqBL;E?z|BHX&fABY3JV2Zd0uX=z1Rwwb2tWV=5P$## zAOL|)3mD1%0jaa1k?aSoCN|9fB*y_009U<00Izz00bZa zf&V11P0y&u1jfw!thg%N*!Ai~mb8qT_h`Wnq&Gq{>KTKwdDpa?ek7d&NsdR&JNYE~ EH~FQ-%m4rY literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/expected-links.yaml new file mode 100644 index 00000000..185991eb --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/expected-links.yaml @@ -0,0 +1,59 @@ +expectedPks: + - table: dim_accounts + columns: [id] + - table: dim_users + columns: [id] + - table: dim_workspaces + columns: [id] + - table: fct_product_events + columns: [id] + - table: fct_invoices + columns: [id] + - table: support_tickets + columns: [id] +expectedLinks: + - fromTable: dim_users + fromColumns: [account_id] + toTable: dim_accounts + toColumns: [id] + relationship: many_to_one + - fromTable: dim_workspaces + fromColumns: [account_id] + toTable: dim_accounts + toColumns: [id] + relationship: many_to_one + - fromTable: dim_workspaces + fromColumns: [user_id] + toTable: dim_users + toColumns: [id] + relationship: many_to_one + - fromTable: fct_product_events + fromColumns: [account_id] + toTable: dim_accounts + toColumns: [id] + relationship: many_to_one + - fromTable: fct_product_events + fromColumns: [user_id] + toTable: dim_users + toColumns: [id] + relationship: many_to_one + - fromTable: fct_product_events + fromColumns: [workspace_id] + toTable: dim_workspaces + toColumns: [id] + relationship: many_to_one + - fromTable: fct_invoices + fromColumns: [account_id] + toTable: dim_accounts + toColumns: [id] + relationship: many_to_one + - fromTable: support_tickets + fromColumns: [account_id] + toTable: dim_accounts + toColumns: [id] + relationship: many_to_one + - fromTable: support_tickets + fromColumns: [user_id] + toTable: dim_users + toColumns: [id] + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/fixture.yaml new file mode 100644 index 00000000..752fb48e --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/fixture.yaml @@ -0,0 +1,11 @@ +id: orbit_style_product_no_declared_constraints +name: Orbit-style product warehouse fixture with no declared constraints +tier: product +origin: synthetic +thresholdEligible: true +defaultModes: + - declared_pks_and_declared_fks_removed + - llm_disabled + - profiling_disabled + - validation_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/snapshot.json new file mode 100644 index 00000000..9ccf6e5e --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/orbit_style_product_no_declared_constraints/snapshot.json @@ -0,0 +1,264 @@ +{ + "connectionId": "orbit_style_product_no_declared_constraints", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": null, + "name": "dim_accounts", + "kind": "table", + "comment": "Customer account dimension", + "estimatedRows": 3, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "name", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "plan_tier", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": null, + "name": "dim_users", + "kind": "table", + "comment": "User dimension", + "estimatedRows": 4, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "account_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "email", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": null, + "name": "dim_workspaces", + "kind": "table", + "comment": "Workspace dimension", + "estimatedRows": 4, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "account_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "user_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": "Workspace owner user" + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": null, + "name": "fct_product_events", + "kind": "table", + "comment": "Product event fact table", + "estimatedRows": 5, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "account_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "user_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "workspace_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "event_name", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": null, + "name": "fct_invoices", + "kind": "table", + "comment": "Invoice fact table", + "estimatedRows": 3, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "account_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "amount_cents", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": null, + "name": "support_tickets", + "kind": "table", + "comment": "Support ticket fact-like table", + "estimatedRows": 4, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "account_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "user_id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "status", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + } + ] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/plan_code_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/plan_code_no_declared_constraints/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..a8ce8a3c488af7154bd8d23c45f7cc50f17c9dca GIT binary patch literal 16384 zcmeI(Z)?*)90%~E2?>b9_jwaQNc}*LQq#x6_TD$qYtvxR7hT9`BXKK0XxHLa`{GyqcIk z!FVPFxe1xLI?~Aa>NCxUtcfsX3QT|YJU5rgoIrk%&*TLw;D7)GAOHafKmY;|fB*y_ z009X6=K_yprBo{IRM;v6_4~dR(41eV>o@Au7zv?X|%RdPMRx{ zvanDp-|G7wd&L86vYXUMR$tF4g+i&a(lG+djNam~YIK_#7`4pZ_V4ujD=1Wd$4%fB*y_009U<00Izz00bZafom!7QdU=^w4VKx;aql} zm_NIgj~HJX0uX=z1Rwwb2tWV=5P$##Adn$&C@XAbV5h?VqLI2VaE0Z7nxri8 vJ%2vra5W$vBx~QlkyDC%-=BviD*fgM7zc2t3G0bvWhz-U4UBkMrz5Ua8gZ5y0IIYkp7lZ$9?Q4?(b&TdG%r-Qce$|IAAI5ku{Td-h3tO%c3pN*SNDnjarL*XP#^#S2tWV=5P$##AOHaf zK;XX=n0M5w(P}C0>nR)goO!7@7U|68$2?4vrKa}erPUu=blBe?Sac~!oB0@5jOoyN zJ*3ZVc{zhYo61aC%D)EE*_1Dj$&yE*8?s<2v@_qRRE_OzWgg`-j^YUm#XFXDPf887 zG}}^$HjB**W>yNHpG2DmEI#CEdDc6TQZ)=inLo~Fc9ifq$v-NkndD#5W>#JBWF9c# zm-Du%FAlgj^ZaFtd~L3OCi)NktE^BU009U<00Izz00bZa0SG_<0uZ<&fm^Cpd${HF zn9ssJe!_yO&v(5j5MjQ7!nZ0~tF&IcFk|5Kv>)W7Se zSM&q%SO`D>0uX=z1Rwwb2tWV=5P$##E*Dsnn**EccJEmz&+&V2l3;XGZU;2(*yd9| z8u1f(bb`mzSR{O1?gP|s+ugnw@IZ_g=MS>=zovg6`Y-wa{}*|K0s#m>00Izz00bZa z0SG_<0uX?}KM}a6U8~jZx2#9DS$u0?Bz#|l6ZdF3W+@+UsIpi6uGMqQ?BKr$b~#i`#jI<`+B~A-2L^NYhK5>u5+0)*E#R={=7e*&l&!xeZPHw zaW0Lr_+xA-5QdFLwYO&SGdrh>y~%~)2{~r#qvSYUxsl$4qa(dHWbY=pp6hV<^5qWC zz9(uO*PB9~JlqGqTh(_fUjyIQW=;!wa{MmSrViE^ac*b=5C9B@(@O!y#3w9TWp&In zF~Ce)F;P1MUy1*m9%X>wU9)FX-2BYtM0fTu`Jmq+h{9SWB~$I&?{)xR0*zOi?Jfr2 zr}WP-6@MHZTU1^YYUX4rR0K0sDtharjkYpC7L1k0iOyM} znb6TivIftcqMhD#ET8v_ulm5nZ$K-UT7HTaH81k7n@V~LF_1T2yWb4s^3xmD_t0(j z;WE%OHTJM@U52~I@;cC$fDx4DKvdAAoS-rLTWc8r=TJs9m7?+&E$9rL-kGSOY z`}Lho?&w|2&Z^03qVVIGyZurTau%M90m|Erm~Q4YvMhX>7Qb<_G_4~ov|Ayqw%aV#rTbG{nr3$(&eXo#>s{Zhn3t9h4YMZE z0^x!C(Yn@-wPf(Z-D%@%CM^hFls9d3O}9mx2jWTlyr$6N$Ak4uo!F|HR%=P*WtP>K z(ZBkb2P(78TP-8YBPWw}Xm;P#3cvoVc5>%=wOTGml+&`;;Rz@s#x2Umtac*so;JCT zjmPQ*n@}`o(eV~~!4D|mvp7bNfglBOYoY%eKp|IE>b}z(K%4Yb6q&`!+_%9FFdvHH z`RGd=5sFYc(B2>2bU{vHxEW=GBQ;lV;p(9qG}a9U@*n}?IU^6eG$WZ$5%4|qGW1mQ zb4GyZV3C5i&s=f2P-CV}y`Nsa-}{Mb(^WIG)iL*aKcPL?=SPF2^;gIIGr_j5q^@S{t%(@^EsQK1{9sx+Wf zbtd@EIR4YqQKzY^sw?Hdr-e#%bIY(7qMA1m1LS)1CQ?;`Znb#mdLp8tJLs~1*yfg4 zdm_55RdMFo)~d7S#G%8L22#HMD?ar@Bco3Dm%ffVH4X_>qL*6^wbyoY%bg+{OIDmA zFLGlf$g)te6lj`Mh1*KGr&t`+CuSKeqI0|ueJu&I=DE*vJP{QowO)w95}TFNt8GIs zNAarGk)wF3HFU-tkQq#b!l2F`E5xpn^ijN%b@iON#R+_SZQa$uBq%KSg93SbX(($H z-)CJ|i5B^Cbj}>OJeUfFd5a}LVg6z-p|Jb6aprL8o0Vwjmm8Jn>MuuU%n8SxI%~(T zT!D}SI}DXpi0?{xJrUB+24>9>+$o9BTD#5+Xsx4j^(cPUx^5JoW9{XMI2IG&g*X=7 z)n1zpnQN;}m#ffRAue6?TOoFqjBcA~^LG@Yh#2ciBC?2kr<$@;O~0C80>^voRQcp~ zs_2_ucsB*d%Sgr`;@R9Fw#s*>D)Ahw27YyLr593TcCKP~szTY|zuE2l1211!Ao!Uo%XSvbKjlzstt!=x{2xr9(yU-0vG~M8nY?H z++Nx?^UlSq;6+}rf@%a?oWKf!zY76WfVX5wf=&70ebAd5c!>376oU)ADgid5VBm@H z&MlX~4m;pG-;LrAK0yN8PBp-7r|S2;>aefyb&I3N`IT zHDQ;hle1BpiIv6`WJ5xH<(`VAI1`JG_0e_XyOjgw2j`BWOHU&!5)`~NoEAC?KjcUw zixbSe@Q%a6$PWo5T7JdsF(1od&1s78^BIFz8!RW=5%#*lGB}(;nIh`752ZXc#iD(A zUDPPeLd`9h*M2V1BZYRpYCUSc6=7)T9{b5FPcN|~g?D9jaa_@861!8@)X5RSrowdm z$f+Y|kNk1u>XBhX4I26H7?==NtD0irK*n2aoP$nNHjVmZ9PG4fidJ z|1iA#<4`(*gE;gyJu{vG&jQ)TS`L7-l;t$WM4d@#j`a3h!*N>$TgTB5akA zXJlm;*%%@C`@sY32aX&#et_q|sRO4EoIN16`)C94w|{-4-QOQ(t>gEQg&h-K8%?cds{Izq z6vh11B zTh_I#hgmx#X3F**>3m`ep7{DJ|zoeJdMuI;@` z>hI^yHQz7-@=wRAT@hX<13(TOa}Ewje18K#o#>A`G;HZ`scI{q0JI4pK*K-;zXZ}> zCETU~!0;7PHLp@q#Ts^bh&C2I>nBGU(+Wn$VV7t8stafRs(rk2U7cf2ovnCIIBc_W z`1HSg)*$$qG}I2woHjY#+zo7O(IAM7Q&RsZuMDSB;CvZChJVfTtj-&DmSqlQVO<;- zHTuOamNiW(#s0Q;&%Qke_8i=EXb<}yjy*^AfU9gW@ZsOwwKJnT_}`j+_$PPm%qZ*k zMpGiVUB z7|v8+b&C!(O}iYiJ$i{HMI}YpIpK*>mR)3$@FD)EyZ<`lzAb{#1 zu=l|~{Af6!_1EATZu1N1eTP#)&-GhrBn3|WVkeialggd3{4BXF?M~A2t6{C5#i+5J z9J(v5qC`QEDJKYY1b?vnce-~LhlY&;8mB=-m#W-dqQkvu3`iAYZ`(gCB36Lx zY4R_ywWEM)3*n#V=OiFe4ID$8sGOkjhy^^(!f1G**H-7MQhsYd| z>jv`kq2}7P;qFf|m=lSAq50KtccE>C*Vt(3^X0=A&vJ2Ng{`lrm`XaJHxKUy zDlJC8z(X(pvu5tu27F~rf9{{Qawl-W=rr#o{(o4s?T`Ya`tD-tYrxOhrb74J*?8`> z<<6Mn41%-0tLlsY1J*V>uwDmYEpS%yN}Xz0r-IHs>tT;hfv) z+y0}#4PL@jx)ym^Lw8?aa}J6*{S3NK{6@b*KMVaL z{T%xXsaX`%&&c5+XZs^h-KIfj(5pi7d|BEh%(bMG$?aBxF_mR}(?fT68l60qCxaDRylvJ>D)0<}i^^Df^n znf=Z|6_T#qzF+>1)`8BTQ_`}jG}6-=s(i4$;{SID|Dl`~jX;Ap%SSn>H1k%%)px>0 zI{|%;$QhQDUYFeUrTm}|cJGd88Js=hnAN>@EFPXoW5HQ?b832YuJ@n|tIs6ZvdOf2 z*391P7*2>1Gca~79(AqkSuf$2mJyi|7GIXui3Kq?N%FtXMqUco16QLcAqC7pa{(yGWjCuO$OIy@3|#js_j z4wVh-UL7t8P;q#%yh}jaI$MY8AV)!mEXLT$rSxW@oC&lLY7|jeP-syR+_lY$_xpe| zhC9<2@_MS$?E%zmxNM8XJx2JtL@K;jGjkz*`J{s-n(sY~O}62ZosqpvhiAyFX9qFC zM4W8MS0Zm4(7T>6t5ncsY)Lf68F3dL#tS;d_(V#%jP(TO5gqW6gMA-xh7uL#3W@+c zBxfn}{ju8$)Ux2g(;VuGIeK^;EyYf=;K1HO`$T%rL3`ik_VUI`xITY6f9t{U{=Jab z5oM{|br~5r`JkCM<&6d8G5328^tbkEnu-fDYgf2}Q&>=iEGXV|+VU>dLB6WMS>W00 zl|iIU;))qZTy4RDI*mQp*++!MM})^Lb=>E+Tt7}!mpmeTVFBBU;=w&6`4h(PA35dx zmy`d#yX3EJC;xqS$zR(}a#)Ju!3`%>6UGbbgN`jenvyKb$1+)iCnRbt!mhZW_Hi*X zHhZk(V(B+XEW;8us2B9Cufb~&GSJN?RuuA7!xMP91GOKBYa2}HrEsFRGd#KI@NtcT zHn&YVrO7kN+l&2vxxrRtn2HjLv)s^)3OkeB!k!AGxV7A8-%rbin6?T@XB_52nVC~{ zvoCU40K-{v%-L;dTq5c2)Y&AxDTySDEDd7~Uvc$UVxnycJa;|swXF%~CZsCJ!f%xf zsUyf6hEl}%_k`PYq)W2PDip?CUNy~{9 z)y`}q3}@&#scP9mc<|-=%OqhbVYvkNHZ9>XVQ9j^_-C(pOg(MdJcM&!1-W_Gl^ZU- zO{51nJ1lnSV$-)v3j60Ox=IFyj}BQ?_+qR~zE#AyJa;KvK8{hZ=ySmhUnnuNt{(Qy zUH%<2U4e5(43CSJyt*y1_R87do6Zek7LBS~q_gVrOD3K=ZHI-eh4Ht#UvB(}BK$~U z%%0n(X~*%?<&f$4 z%&>#B1eeeqsn1DrOJodx8N>-bz$l`}U4jl@v8^lRZNFjLWt(OTvxO$wwjVkxR|D>` z0du&NUPz2yA746Y<|9fI>NUbldheTC+pdV_D~?!pA&o8VBF&QYND{*+M@T)xg-TwA zlv>yV;dC?!I}A7gQFJb<90{JNDVlOe->d>=R4isbc&7q7e1G_9DZS&6C^fJb2T!KW zXS6(ir*L^W@`1!{sPzbybj~*dmN4d);f5yZQ;SGpup|X1NK^1Dxrt+ zLVlGOaa#>tX6f--f7j*Z8oQqni4+RQMz_o7c6$9R>@RKrdf_CNkqPGVa5BrtNBUIR zR%o&Iu+|RL|HS;y(8PC~QC;7C67DUd z%PY+*Z=w9;lVoG^#g(|x-0B`Li>bOVurjI^dDlwISWl&e7fQPElJ)k%o@dK;mpk8_ zV6pe^8p@vEK&9_M?L0cXNYoVKH#S#=CNiV@iV{r$ci7AYkLuB~vGSEOe*Lv&)#%zp z=uqtl-Ik0V#k#{=U`5oHm5eGkPv8q3O3oOC`v7BfZ8F^MRaahW$?~)96OK1Ko|)Ng z9)mH^;PTuUdVyy`&lcg$kX0)l3R8jY#4NlxI>I%jx?$O;$4k0OXhmg(kIdmWQky~5 zA@{Cuj^$SNcvvjejli<0hsgAmmeHOn3m+7oVe=UF<%(pGr0?s)$-VN~UNXp8BC(v{5a?gqJ31-%CoJ?cNm7YqKGg~6#(OkiD7dBEa^_=@zG=q0u_-HZD8eFV2 zNx$JX61-+ayFQfbEK+LG8Z1B!UpfX>e#Vf$Gfhy4?j{}Xzg_@z>(oS^*DjLJUZ=Hp zoJSPaN1V^d3(8!7-cDA~l|R%Y7K4?C^JL{&l#@uU_Fe zTslUc9LSgm7}wQHwd?D2=mk@yZ3xE3kj|`ne6B&dL4f+KRJ^po zxDn;r>~wsOUY$NAy=5wCWzD~0_lSD1ZgS+`dLj;Uuh9j|~4 zTv?N!3TWTR!io1pNV-Uh+6CHorwm_}6#NftLoQKzQM%#Dd5d53vkSC7$QCZ9=0_D2 z6;+iiM(0xt_zD6m76ao!pMw=!fO1~-ODS>u~f7=?Zy+YnBSZ`39R zAc*5rwHWFP@_qu#xO5GnC;bxUB6QFh-U}v6XH)@seLdLw#R5H%d$8(7svac2Du3~q z9;#^DY=FOjahG^&Ku!-JX>D|`>O+y_y|o^05q^GzH-6!MaDwGH0*YFo7m=CAHEJOn zjBn6H@^wN+Z4cCf%y6pk@ue&!l{r0C1XRyJws9qy`AYfG`LcD3{rP1D7xJr07a!+i z3RVgNKQ1QaCl-tsZ4a3&`ap61_!^d)i*D6^CDIFHE!AJD2cgf%rwI$=8#Rgkh_&&h z+CJ(-vIoIy99=``s_0Bt#7IKrc0%!r@N;9nV6ex)AziYno{b5Fc=k_-qfv!7!m;{) z1N>5tQ6~-VlSvmWt!MKUtFcf9h02x~mO^&sWIMCngO*J}}gYJ_D$Hh6}NAKwBRu-0oUr3(|#YS;V^)NldU%M}n#;dxJV) z1wj_;umQhE1WX-)8az|oPG-R(V5kjxFZmc&w9YRG822^)v!fv$;`l();;wu)gkr_^ z%%Ws|52EhF_Q;}1KA|W$Z##1FSUxr{7%Sb3TkTq>z&2nb=tayx;;Q(DEi7UydYzXb zH{e%65X8pUaR(u(D;o-n&@bdtf(y06hj-A3bV+j_E77LKC z02h!4m@p*ikWHWoo(32&u-p73$O4c8bp6?pbDrHDP3^9i$=+STQW1kZW?oiZ52SGY zrlwv#UC*Wb`bRA`47=(iYVS?BZe5;;Ido74>W|L4;?RpD zgCk~6oaG&kW#tk5e0tEJ<@e?GX!Cne+hx{rBzoL{>aZ+XE`hEyqk5PjXP_6M=I_5SFwdMM9(7`c)-dZL*N_?ozBH_xB1LG#th>=_;i+sB3vZ{};Pf^vdPq~k#Pj6Kk zJ`hjJ=PoiaBhOYL24LAMaz400`lpp#AL#-5yA`qz{5-g96~Sf8$DQwzmopMxZdMNM zf9FUpRLCPe87Ud@DYqXm*MSBs6U&i<xj=962LX(`TH_to7U|Rxci%=8R)Z^}U)`Z4IABc|c)UaD z13h_&B5a+aAwPnUhpi1R4pyNvh!F$uT1E;HTL@gI=MW(UfGypO$TkR6GAP?s``I{a?q5?HhF()f($rRj=@%-ETFLPhN2Tlr@QCMRGwI{1Bp>|7MT1 z5^-f)hUi0NS+piD6Ql5PL~1|cy@d(Vi_S2BtJ9^3(rBy^aE(4q>={7m1LmPQ zmVu$Umer3R?{zdB`Z`dos+4bueH?l++Tpm9A=+KPaSv;vv?zi8aH~0lg8za~m5Av7#FI>;5(tLwk#4m1Ff{|;?&l8XS*^SZ$ zmhJZbt4w7~jL)=(hd%E0Wj)Q*!8H6?AS9cqg;|R=mG{$GciV}_pO-_ZOfh@Y_t)_F z%dL7?Kgh23(6Tn0e)PrA8ah4o#b#RZxzkDX6}~?IyVDELaZ=|0DO)l?Bkz~5#wqUa zTJOX%&SLziFEo9^xBl^|J;(M|KZ5Sot%rp&?}^zLy$8Lg=YTnTjl4f>b#OwVUiFdo z9*cu@T(m!ISoFebG6OQd=$!s|wx83O<5mQTh4-Yathd$t$G<#G7Dm+sfrU{y0jV#& zZ|jb^57XIW@~C<@HB=*1G?aNSii3wWSk&KcHKIOaH-d$6ehV|vST7O^XQ?|uJ9SxV z=Ad5q++(Ip_dM~BCJv`Q%6~k+ne){^NhGY!Y?HF{9H3a)({sV!r@op+asNmtW;d*! z_fh73uVTreW217f_w%vK%lkX8&P<%B-v~|IgJ2!!sFB;!S&f`vsh564hzz!{Pix=V)e+=jrzf=!d z^L@LDxs!x@6`=LA5VK>zT{G#Au5A6zH7?=MM~dp1Vrx*e{nv2Hji1}?en#w4v%rKa zsKm9nTm-9P_;T#$Z39yj=kEG$@Dxq&;hFX|?^GX;K(87rKGvSikFBeiO7y%EmS2T1Gkt~2{2rufVo9*oDZ&1O@S6`%J^Mo}E= zVhIye%EjOWi&;Mkrwbb6kTHHe!n6R9=aeKN29geq-i&E4BCB9aV4re*QH6f!@LSqF!Zhqs!b1rvvY8Zq2{%Ez3Tp zf_OI0UaJsjvDrH>*_)W{rV5FU|1w%4Q&Khb$WUyM&xA+g3ki&|AVhXxbAG+7RYJ&5 zh3^?nW<6+z*ow*;bO`Xa`*^0x`c<}^N_95$W<-`yZ)mpa&6uck4!Zef!#r1ShN@fC zI5%Bsb75Yi7p@F>8sB7EGF&oZs4I37BQwa25xrvi!|4SXH`Yl|T6N`_JbQrry6H@L zEA)Cmm<$Kp} zba0CkBvMPimbeez02(MP#D>A}YLEwi`iWgwH^!X}y)Kjk!)Jq#3=Gi{=M0ju4;X$O z9}`Z94~S!syG^FVdBv-d2P-}}45Ma?6A;Ltve{~Jh(qPaS$lDmb7jG7eF6k2k6jP! zKpizcmLQQ@S!_2M%@Q!F#Znzr2kbl+7zIi0ajnavw9j!TySddND7|y`$ri4`)s*Rk zZ%6$)QWL5Y_K24xkP@23+Y(^LgC9S*$h@0*sK#>a!qR_6_xO`&^Vkyyr0(Di@l}Im#u5&-HN@>f6BEJ%*jwWJ z!DA*^M|yGBpnmV1OA2~$=;#BJ{=vSAx8h?J7MM&IRb<+buQ5i(q15< z43eBN1uldNSZQmWLyAhBik#R1E>N`J4hC($9^L_uH12VxTjI5i5l(bHyqgKXE3B00 zR_Im{{PjwC*RcM;6{PsrqpnU)m_nB~E&-VQ3W;)9`Epo=8xn?ER&_Bbr)MprT~JO` zYmCK+`2%R~GB)a+cydWzU@s}^kxB+2^gSw~gD)u*L8kWx2IFlD(G}z00{!qxh1wtM zzHZ;dQ(bgNcwEXxXf7KVZxbg>kxOKSt;=wQ(nz$UT-|biMH%LTV|D5B;|dIB#R>Xx zIjJHMGww?Dzq24n&XS2*JSzSIjLOYyB!1hDqyW4Jy1Z^mZ(aAA4CtQ!8SEpvMf?}P z5g>(=qFVPto`8fbm{MU8bQI1`I){n;9u)Zz9k~Sj3kg|F909SIwiE7qo16#Jq>hlR zN3JKVjkglgBQ`uNn}|u-aa{bUX0IWZ=IHg=%Q!^*}J?{U!Hys z?L5ni+pEq@xx%jPFPyoQXHmQ$Qh9a&iVup_H?ykS0WM=XN=m6VOqc7_zv zQ%O-6ge$6q9!;WR_?_St^Z}9|2C`7>=iE1mAIcQ!?;Y(O?nRgQslJt<;rKLsnX-5wpZ8oJ(0)fXI) zTWE6Cg{7)2JkAX1Gln5X6y;W%FK(-%RmX(KidKfl%EvUvdRNk_q1EI?z7_K^`s@nA z2cAup8^iempQyQG(mudDDtQdP$Z#TKtASy9#@~LVQ%uo4(4}Jb;q8?gEHA=;C}OF) zK&Yz24;DeI_!a{5@y$iZD#fJ=9|Lb_HO@!cSGDSx57}E{LCcSThB=UX$3%QH24Py{ z<}m|5><~5B1Nyq4N43t)C8_oYJVWLYG3C z0!#r)wh25mFH?gXN{@qf&&!~2rRh#k*M-VzTxPmRp!s}2HjX}it~PqysFpEKsjWhm z`=Q5`38mxC1c6#rA|Dp&NEog?Ml>IWDiM}z$wb8w>TN=Y3KnSjE>85cYjU&g$?5g! zTnbb&&)^0z-HjB4#4#%*l4u`ri3)y1`JoM5Iu2@1pco*^ku`pHZkmMX+R72Jz>F2x`*|v`HDdq-IOmBB_KlZ)5*aSj z?nzEBS7_Nx?e_}o2~bGfH0|^9>Pb^z+ms*QFzslIyky*`)}Cno6&gSw)*?yceN;7qZmq<>$4sD%+^=B7uKR^+R$Tz>IV`qL z1*QPg*!~8i5_w1bys?q6Tw+?Ep9gjTCQTaa^@C%rVLhbU5wa^*gid0(Y{Tesn-SY7 zR5>bjo4nGq$^RAZ50ubF$QixxV9*@|(=gxQet;6V;q2UxWDd-W$^@INtEd~>-n*SZ zE?lJE#Trnx=Bd6|GBvp0PY)}x9Xv*^u7i;1IfVE+ziq}Cq!4}+40n_0^$dTwDzHKi zrHgKp36Ku}o{n7tTmiOiiDkeDkf0J4;l)E6knkUhTM>?>DUP8>kWLm^ZblXrgco($ z+tCEMa>7zw)OOKQbtZvSN8RRIhQ1@r)*-2iD-z3oP$ zX1%Sw2&n~X7#iD%1ym)_&0zkS)Vch}MF4{OEHCmNjBMvk$lC{BaN(SjL3Zv2nlM$X z7|VZG-Yq{|$&W|JC2st$Ouysg5e|l@oV}t^_@g-J{=nzNy>+tg4WE(B zb&%EUkb=FzcQ)y$Rvn6E#^74`H;a7GiHum9eYc6MI7Xja;JTMzssJ@ z^s@WxXI4+f_g_~02tO6dgM9m7JsWO+_M0-awieye_U)~ljEGU9>5FH=n7F98wi(4X zskZVq>z5+J+S=)fHKzkK*XL%^+L&L)h-7HgB?P&By>u1!Rr1r={l-(Q7kHLVj#~dA z^@%WXJyi8zG;0yJywmEVznn=Z-5L?|Qdr%<)3@}>@T+HM{Ip1C$hQuO7>J-!X~yf^ zZG_W3nvAQIz8TFn>}hxgO?Ms9h7tjiXg&9b@6S$Dgc3s?i*4N#>lyVqp`y%eJhb+S z`3dCS9v+(IU+GFxpc8^EuEIv;TS{u8^X;22qh2bs3AcHBqGmGNvg1U~Xlta$)m#nm zS^qM_+eVFx5zf%8OAeBzw7r43r(DNQ916{>pR4b#udAss~WK@qf9KuL8QQ=73#4(v@<@{vSQcZk>;j35q3~ z;W=(%Fo=}!tXkJCitdK4>_GB}RJ@wFhOKVI+vr=o8M0D7zKdD?@$b3sB6C+X6p{na z5Iq-{`&*Q9lvXljlzfy}9$P6bD@8@TRH7a%JYg*7-T74=r16a%Zv9}qjmzngfWa0} zcL(nUINIf(DYt&AE0xW%?C?LGgsS4rgG+51PDPfn@ijB#+wIyDheK^vY$T!8yg zL>@o`Q$9-6D#^Zb)$)5s{RYjIT-fz3A~!}eBUMod7Q9`$S-xQM%`cb#79=)UfPQnE zK6$P&deW$oF-d6*{N&@0o@`nzom^TK02dGOdsasrAg*J@f$4RW7~Ga3*8!xZC#1&~ zX}M(_dN;igy}7bbzxi~NvC#O$s;gOGRYeJRnI?NvgW}?s)@}b_|&rDBy#%SGdjANV778K zw7Y>CKK?)E;ek~Ua%q|VoQ0o)nu(&WW*$9HRrTe|r<%Fk3b+2S?zm@DlN6Bj<@V{a zvwd+!FV*cy9X!c0vR+nmA3J4|q9hIMT{}l(DR?Q+~^4~Z&? zrWp7ZEtypMfnw-;I?=FvN<_5CWUYV;Wwy5ZRK$Sh(wriy*m}F`s-#|0Wzw}XsSi4q z5>B*=^?Ya#I=3;zi`sfwQCt-?a+XQT3-xYk`lF){Me>m#jDBMslc!ML#tm=`%q4bBF> zG0x|!-AHei4iA?sRV^6}vy>*Qy+nyb8Looa`7fzCsWx%YBnBfLc?ACHeGZ6p)Lek%_k-bkG?Cfmp ze!gUF+g=LKsHyGS_vFO!<3q)kN)d!P!Gi~#hyI|oQ(&|XdIBmf zqoo@M9gD`*#Gr{iMb?Ckt34Y*H9>~HX62jjRV@=b=w3+$xA)FL3q(q^K#pGyMb=KFM6u_!}@8adX@Q> z1H*cUjI@m);9jMD!-h#n=Va#u=Q`&|=bYODm{Tl?&21I!A4|tB#VzH1xaNCHU&!fY zTAWYeN^Rih#moZLNQwpf?(5&*lW-X%4$hCuMj`sGQzNjKP*TCSP|t(!1g`~SXx_n4 zdS`G8?J2FDzDPHwb$38psqM^d5gje9wKKZ2lCv8#QL~~mPP5n7HP;OTvVy8jB_OAe ztuY-MGkWWN>sKh@s>$=ITUmGD2B1nD(5rFG6UebIha_i)okoB0^i|Z0I@7K-N>FYcc^12m8 zV*Ms1W4)W~8c6W{@g#6^O#GHOF^*EW!9t2!#VjQ*Asc%p{91b!{TdLRh(W@$472E( zn4mX7uY*30Dk-WcDk&P&snwZYNJJz?v}ezn%$%fXRo)3!c8$w;Wzfz&V?P^B4y;3K zZvuNJQMrk@Sq3ViKoR#=^pIWkikDVZl_E~+!Ky_a$sM+QZaX!r=g(qg6UZrAZ@@hb zH7eR@z%8y%1qQK`Bs_K^!r`qfvBv}@l!3n|1F&)he_bqNL8snQXbITrSqJm7P4FC4 z13VP|37!h*gE?APcpPvC%s=Zgq!^kE9Ig6DE!VCiN?h4TIGSz35mwPwnN~?w&u_dR z%6oFRNwnge_tV+~WmB)S)b14J8GJn3U0n-g%mfj4CWFyKK`jA~focYW!M`0rfzM^k z0Ojz)aI?3#I>Lb=$^@#)^aD_`h;eJy`VyxCFZ;gpcd+bJ)V zr&HW2?*vWO(z0~c%CfZ9BC;Bn>A-1R0kFa^x|Xp3>4BF32Y_zGkgc{YWcL2-zFET@q-U~cf@hs)q-W0E z%+WUo4ZfU1o-c17yA`*dS9&ermeKSIjd0FGaoZ0teDLt{#M(o9Ci8>-;uFiip&qg^ z-C$B=Rol09{*=Ofi_en}Yaa6NI?oi$;=sCP5|F)qWp=Ec`Blt$k+T{16mx@&D3908 za!~6^wy)PEZC|cC**?CcK`FdsL5V9J8KYozr-C+jIN&$+&ojSaAIkjBvMXWFhy6Nx zoew@a@PuvX=o^+H?)!(X9@ROb!>7almBZb9SZUZmMoq?4=H^09YJTeb)LhdKrkSP% z2k*Mx{c@f&9uprp`+JT`fT>Z|2M#*?`W9h9YSI(HcBo|Z=>|5}M_r(%j+qq{%zK%q z>;-0>B+zJ1PJUm63ITTt`D)MND#9{giwZW_bZz!26_Hh)sX9w4v->3M+V z(W!mk-!zNPWjd;#vWPX1wMC~g`-K+;s9=$BvD&%nM>$5kN*P|XKFt>#cGKcR~VRqU7 zZTIrQOM7|umsZvSiLzie6`Yd^g%GyA(JZOQx9&}Bgdld;odC0%^lN$(A|Z>PC3f{O zk$3YQ>|~`06klFNf1dk{Vv^W9#^G)M6?UDCSv{1GE%3~I^BeS|@k8q@+o#T+p6`1T z_6G6jTPSG{@1b@tC?MhUgPA|qhX^sX>?;O(|OB}-; z3(u>St8*3Hy60lBpvQdg=zxb_Wd1E59dR5Th%iVJ|`hF;80~N8+i=wvN(v!|})f>yHvLHTb*CmE68VzWGdyNQrU47QeczJ`Jnvx(p6a3UPU^w)WkcN#)G2JGPcVnJ?9o0Le13~EVeklh(431F zTzq~j?!M1zV*P`9=x92ikQxKVYxJ{j45&)!NZpn)p3O|oO3q9!Fn()X^s->E;D%mr zzWDj)j=Al5vRS3?wervFvF4YZS9R~12(AweRoxxKN;;H&R9@6Sc=hE(eEri2*80hM zk9w2P^7?8OkVYBMK+T#_fb2Vf+pL+opN?0|#SIW;5bB}8dN~r0kNlgMvw9Y^0QjSV zn$=HJ_lxl0pQ@TqeEeZ_jnu!u+!_TZ0L<>xM*5uTHvgYgvwITI7GSwR79a(`AtA+{|S{RVK3Ukwm2?Dya*2y6S83yxOL+S2ZxH9 zyZ~lj@8Qx!c1eMjedh$f_XmW$ zFxVdu^h;b5jknAoTKu?QJ6oFC+N!DanAWRy1P=kOWUp+YBy{jzVf>GaM*L<&XnQ@VO@gGik2GtOw?aL zDrOA}=RB%9q~Gu{Z23&5^3a`z1N-Du3|TET!_tI4M6Hg8v7HJ!JDI&NkzFy#-J$`< zUiV_vHVh$Ps=jp-R1*@oY&f~`t@C!QCYST}bIn5M?UZ8f{ngKltM@}67OU=0xi%CR zrl3015mtMBs5*?oNfNNM41?V?d8Zfig%O3F z*)1FQb1S_*W~sYBMSV!V;V5UtqvGx>GzVh~phAs5W^TY!vm`4~Q*=S1sD;@Q;1B_|%%7G36iR=tmm70sSz;fcg1h9KN*(Z8fXV??YUg5O zI4K8XB{(Q2W8F9xRA-o#AG1@=#qL)QRA&Edm+ z$B&cv+npE=sgOL8$iV2l#K^$VJios?4co_gST*wOsi`A}rIarp_K1FRK@iF*^RP8j zIa=`UjTsw3SY)cw%niZB!HmTKG`RHLIEgOYcapiQ8 zeC9i_$dk(b?pa^hXI1*|W!Z5^#pE?B7qb70rI6Zz&2rEtIbQ8T7to7|9OQ&ObYSUi ziM$|C^>n^Z8F?5M^4h#g4Jvxfj-vnLTP0Ut(4=JI6>&`kcrWv~y#QdUak>bR^Y1q0C1_LX zrtXJ;Hl^;r*c6st;BQ*f8U>LGdVAo!wFIyqQ!$nIz&zL>Cy5;RIn)ZQ{a^G z6S)z>6R$wDq^^58tRCvPzITsE4BlD2igV~A?M;>ioUF7K3 z%hS$wRf2_d)eenjbSVwVzwDYGUYaH`;8`03JDk>}!aBJK` z<<$63oK5kE4KzkiOfBVIA7dt7>u=PFxw#w?cwq5F9xlr~EKtT#W|5eubVoIN{E=~J zpopW;qEMboo_1D|^4P;|F-PVUXlW&X3UgV&?v$3&VUteaPw}}A{Q}fAF&n*DgDH;( zj2HuURLGY5)btbVt(t_bv8I$KkXu2&ujM{L2-HPxc{TaPeoYDc{q#X$%Om?aIL^dP zXf*I}9DP~*EKKV3S@j{c8%Or9opu(wuG(PE(RrFjDDz2}C&!|?gTSP`Nnmk~gvs6# z+fP4wL{Tm8PSGQQ7h8W!L4*_|w-Av%rxc^M+#-8K7$Q>zkwDs(?-U>~X~(g0YSNs8 zPjK=g$C=pG%ljo>te)TR7F(Pg_8W(ypuo$p9(6T=Np6ly>aAxcT5zC zhKGcipWYO)+_zsy{kZlcw*7tTuQeN(I1*l-60+poUw>NpMR8J?4TtZG`==(^_v>?L zz0kRNLwvuzT5;K)icQOoYZV7Me^?7*(<}oxVgTz_2aUi=RgcWV;&v+fZ9SYa(nu`N zo%edG_-9eo2f#=5%(p8sK1K)l-{Jnk*)s!QTClt zO=bPNV*x~Z?>!JeI!NyXNGPGI6cH&(P!Op~S9<6PD7}ZKpnw6TtF(ZkBy@ZdB!i4q|&{gz^=8QEx7ib4m z&I1dFv1<{m&`q#RJ%Si|J_Q?rc>)rFj%k`J!aPClf-pnEJ+u*sOR)SA#sV~$JOCjm zp&LfV#{a`qqhYY1g!b34ZGrFS=yjk+sqN?KTluAP+JiydLefd?Jg^Gh?+Wy`u%j#t z2dp6h(+CTAgwcm(Y7$aFYWjo^pk^&XBWOQmn-Asy3f42HhuNp3YMC>`rqHP-1QXDe zlx-E*8tAxfDC-n0fV(`!`SwVVgn%a%(tut8&8q9DiX4Xypx&fGWRYD^G5+f8_5gZY zsH%xM4f+5r7lWAwCGd5}Vah?hAXjc!9Lz?GV1o{WjzP@DU`UWV7-m8^gBAmEfy*N> zqG))^fF6OQZkPUlpasBV1pyv=XwU?}MRz|e*Ld?RIt?u*;Lp|l4y2#llriUwevs_O zyHDGF6>XAyUC&Fr+XY&ea#kc5$_DL`GS^F)(duC|znv_Tma2;p1M9+j0?f5xNTFL4 zBJAdVXh+c33nKF7aI`th;GBrdL=LeT^a=21f$umZc5VR{5;>;;yP>;z2zm}ZiEzH$ zZI8~@+vG;mptB7(X+UY{qf6brAfoQ3uK7K_uW<8qfv*nc7>p{sM;6lr?YWC-GLGdD z;Yp^~?kUB9k~f(|mN2UVU!_F+F-K4X5|LIEVfpu6p5n!(9NGn?nl~p1dI_@D+9XA{ z!4d`hFLm3XAEdbP?+bU6fb#h3qvnLrpHW|Q4Lro}ncy+U%;2ztDmev!WS9|Y|5{!i46}@F;^Ke)Kudi%*H)fr8P;;5j zyb_bkTl<^BpT5)NlJdVpw$5j|;F}`medRXt{b`JOGn4S``TJ*$MKkRXyG7kvTWXn( za;^EUkS%Z~+}wkI$IpC&Z%50#3#E)2D1`>+4qSrhl#d^Vmj6U35JIo0etScnq?c088!x8)iZg+;H$bicQHXcbD$;tzkU-Z< zCnqll##=t4;|4QLRjQmA+ z)iHO3FJiCoa*h<%;&aMSIJN~t#qX}@>IB{5cNcY)g=OiFITEfW*FDAZ z668_LhBo}J3l*z3L*dK`~-1+XZD zhJIGqE(bOP@d_UjT}Qk28L`rL^0+P>%K>eCf^CF0=3(`rTc9;^LayMNAmIjhjf*fT zAfAj(g2nNzQ4q{w{Tqd?`c7$ZXju8jvRDFXH1h2g_6&dqalxI|9?r>q+ua1vP1E%DozF_&Od&;Z7z?-&mOyM^78 zss8=t)2-05&oPm(cu(hnLIYW!TI{{E4Pd-l^!hCykNiNE9E}DVd}P5Pr5%04OZ=_; z588X;4M9E{1<&>NR2uwzh{++cOAnGmB$sGVC#*{;(9q0n7HH^`?blFyw)Mu=#z%Zfe6Nj(Ve9de}+hDxth!-pz zPlP9}|DjCLFuQ2bccnlj;RV$M2_XH;3T%pRKJ-77E#fW?81FyqFK%4=UN1-7cy?=;&2JPR-a>Qmcrm(wCt>p3G2^yu z-)G&{3&&pDUfV19=)xquR+47YWB9h-wtnAuUmnCB*>dLCdYf7%0F6(If9D*}>m1-t6#LTwUZKd65Aj9bF#w(B=ulP6BUOjkX*SmTUXUBq{3Oj*s?-hiyHZva2F6L#5v`a)yX&m3P{?8k5Dgt7*6AMoI>Y*_E={7Zr>%m2ZSe5o%>jRTX{PWpM~eqv5#os3|2c}-d^Zl)Y7w;Aw@}N z$4fguL`gr1&v5=I;3Um<31?-fN6VIlivf-nI8|GBwt=b12bh}30bqDAv5g9wsx(%ubhN)S zmx49GVKK?!LKKR%)9KqgDXum*N(w2l4&JsZY8|Pl3zS*XAg7VX?;cIll3t2pVC|6_ za8wL`2N!bQ<@4svXv1+Dmyl&x;Ig&mugkEY>a&fMGFJX3yQykZuS(O^TB}M?YN0iy z3~k1&_qy)WM3G6yw@J~pS+M4IeY+T?AYJkPn>r~|l!;WCbP$q{s*RhHOnT1o#~B@w zD0^u+rymq;x-uSoTX2~Tfh`A_VrO;uysUEzbpEb$i*X502G1LSNc{PWNCdjADs|@? z?8;Rwhy`-81~@K8J4i**v35Fs>y)Bu6QkUc7JFAkAAYutlyaJNM#`-tzoUvHoV?A9 zl7toRQ1-4bVwy8bLMAXp>Cwjm=R|%B?RjUJ;O=ald2N|ARDG0D@jtAE7kt3e>2Ye1 zS$esiOubGEzP-!kWKVb4GkWcF3-fMH*T^|1`}lAv>MljnB9_i_Ru{+YXiw=*Ce~p` z%J*-`oy0mJ@#E6V(%f$m?;dhGk#~y5mrB<;G`&5!vAXg3gm0Cn`{euTo&(Wm_w#My z*vk{rexv zoe!j1qNQnPyI7s1LLN&q%}T@{O3&Xs;&zhmw3m{1JYsMHA9^?v*H&Ajk7;T`IS;`# zM3G&M&nMSLh$R3Ty*`zJ9q4!2MjpfjU){~QY0q|eXY@f-uwVSm)o@SVP6t+fr|q|j zB(s$9-qI59-*6Uj&IZI6uog+*dsp`E4MP##EM2^OJQz?9+}=K9E($+Z8=Xoz4jcu@{FeCtN39LI{F7^+h>2S_cFUlAQ!CEaz)@Fpt}<9J;D9&}NBeSoaEQxgJyo>LJjH4(o=ZM5eY zaG~ERUC1GTy`LieM#naD$zb|fso)3^`u>GrO>2s}C$;OM zR&$&=y4AJo5>`{3uc0S2cdcu<(gPjWqy44k>C(F#cRN+7w!}VeNlU(KqJMREi}d4k z`mB^kM@2^y$1CzJvyUX{CJry&eTmrToR^^9oB#F)HO<#=!qkSq;PRW~qPetpOP#^V z$4sM3&Km-UQltLPM5rIhXe(;MV0S9rqdkaYzEN7^(Z1P71}P2XzS2iYr;u}7jp@hI zp)pF7TQ2D-QlSY-=e(KcJ){?$PRPA4&reCeb~=e}Q}F&lO+EEtI$H54{Vys{U!#EL z()Cjv9mfr!n$z55N4szm{EgYG0X4Qb^`v^T`IdAt2ku0F%G;73Eu=rUtI}*ae!L|0 zvV$|ipJCqWBQpK9(~y(;qtUQ^#d-RVSJF+K?zIIyZfjQjlGmoAsETfz{cT;S1p=c| z8z8|Fn8;*?Pr3E*LX!~H$%kR?7cbR*n{YqvIKYV`%KoyIpZqtrva)9&QKS3RiCb3s z6wb+}w1OiS0*7Vy&Vf>1g>#-$ipEu=y~DC)yzX=>=_GPdSRLGCi@C-66mO+kW%N$w z+=uI8aaCen{9QHNhQdz~Vz$6h@2l;O?Ox~Os6N2No>;M0Wp~W{Ii4Fdp5#IHm%%J0 zqVyc+pU26hl$Ts2v|#8(z>VsB((>}^-hZ5J?iC_fnvva#tB?80EcU>$p5{ZF6~>CY zYm@RROLbG!S9h@xEA_YZVP7r8$gNu5qZ4oOJZ-butU8J*KKsGL%BK3L_qD?2ntM>- z>+wS(?9(+Opn@d7LUWQRap1_o;Kc08IYjrUaUwFiXpS-2J2so$D-)-r4`+EgX+?UI zE%p{8Gx^hQtDx$S?`mj2$cWvo60UY@r;ilm{va==EEZUuUd`EN9#PC`sP>duY+=8S zY-;!GxYqylC31*h=uczfr6F+Iu&NhJN@LiU+Bs5GBl3%t7en_1{6+4Ub8|L0HTXU9 z`YUs}La!6PP*?>ztR?wg7(!WfIqket=2~&9ps14Q5N&IqzAH6!p(3kV$Z6v7;G-Sd ztCxlttUgo?IjV&ZetPgmg=mrt?7T)EA;CRCFMIGFm)DelLh_fS?gYL)3NMT93ElM& zFK^s&?iWX|P`=lwFB;uz?Ewr!2SkU&)zni zxvMy&SYcazr+U5f&ieqyyJvFn;~cyF{$C|HG6f z^H1sC67-<^ z^(w)1mW8~^h+c@zH<3euLBlOkoT_s(9e`Jx2d%6jZJgds_`)wKn zRzwqQPR)3dVy@X>(sr&=Nb*rr5-tw3;k{{&JB(^F+N|k5Ol;EL3=w*n*rb75dFyws z&bE3YaxKIy>E>4Z?)f^G>gI?wU$>2$fi$+fbxGA7HxJL+iq_eU>^gmTx2sm?=+=7k zuFqc!MyT>^mZT_THIdAT3!3OnDsP5$$MJf|`ghN*@qX5v-_Ci7w>C?p5J%s-V+LC-+zV6GdZE(yoP?s8+vYkdaeuA`Xoqvzp+~Z6`Fc< zcGCo9uM@(szku5)%vT^eC(LT_PWU2-(WvBYT9^`}80ID$$z5Sn^D0!B@WmOC5xvKW zVa<%`B8qxisJ{vHCJ!&sY!jSJ1nqxD5oPv0l8F-gCv&0D5Z3)XR37AHc+Mp`Pxs{Y z+(HVGVSjLr2oF)&Cz{@uC3#?aUy_8z>>DdditxkCFc#s5Pr_acTN!KpLBeB33wyV~ zh%x;}MUuhjZ1IhYB=Xl?o}xUC2Y$l3*^J9GI5FB>K((eHG0nXc)QCAUp6f#AK~MZP zcjkC_r?gKzHw|$w`GEtg+~@4`^I1raMOd}pK`(Y%{HgQK=i+UPsz+hPj5uK{eJv>x zI!3n=-i$CVMpj`($m1tr4249=3q-Z%+}1C|ysytbFZp-8vl$MCfBx-3e7)-dq%{7G z4c-7Y%*LLp2j-;zldb)~SrCE16me((PTb0`qKP~w!l^fnxF~t-dofa0INNZV|6-bj zaOr)u$O1+^i#L(lRFNtoa^?>sv^gTdA{Cgb$M0V;Dhn^e%rs(2E#$%qs~P<*59PIVlXYvI0>5$W>hztE^bwaPs=j4 zK&S6Awip-iMDm!jYEPFk3Yq?3idCboqbs&jsH5)OQo?@JU;@uf1jPd6f z)eP|GV{#wzxULp(xR&tKLP%_#1*UZI(Z4AK;3WeYy9L5gXiY*2 zi}1RxXJ~A+sVaKrJL93LYR=3kqh$U=kq(*IVKYu6&+{EwBJh%jWF4fjG-htaKhAgD zVjMOPigHvDG$ll%AE#0lIm-!hry3PFlL~r^SVOwK4CYcRN42gqp#V97ss4?6JD&n7wb`kbRp@)V2v*Ya2W@tBIl)6yy*= z7J_oo?!Pz3jW@vNSbO$aX*xE1&u&i%eBCZ@EAv!tJ$%!W3;MU*`rxN`{R5C&U;Z~} zK|Ft|F)d*{xwAzVEY(VF6n%3E+^M_8KD5PcB3wGr!Rd zZmkGYl!(0thy<|Y=_^KQN49iXafhr_S-ANz!B(U>j&LXv9AR!^ke9!0ZD3!x?Q1Yq zu&o9t?8oT@R)f}jxhDj>)b&!iXu^hwc*!=40f)$O3IX18uW;3`XT4xmtEbaU9#vB+ z9GF{kO*k;6=1OX_Nk(jq$*B?f%QBC!xRw3v>i`>kR3wt1o=T1JVJ$8a*~$00SS%}? zW4Od`kY*uT>Z4X%kg8|V5NS?TtRiyRJRriHqZlkwiNQW5yh>FTUV&L_^pIM}ht*W4 z`di2+)L5tXmb#JOx+G#@TKBl-{=cDP@-*gzoFg3Bhoo+v?J+UCq2tf+bwPNeuuy^N zTp6oDhp;Hf(x~KpTC9?#7{-x}>28^+c{M6lSaha*L@zQiw%IbhTv1P#j_IrAfCXN= z!(TMebTq9;%aXd(i}J}0OL(!D(37pQ>qcKMiVioZ5d&3zj#0%&wmn-SMC9h3p3u^! zoXya(HyN=iWpC1Aqstz~$HFXCjXNkTn+!T^Eh|d+AH^n>#Q=dvQS-7NQL#p%HRV4N zW3@#?N(vKWHOdzAH%Uc3iZ>-hH%d3@MAgcJ!g^fFgOEMLAiv@ z#jk>?*UUmPH|2XIP4RKDCzg~2jv%Jl@^1x>%1p1ytu#B#MV&1xiuluGFNn%ppo}`G zMJX*g3*Q@bkc;+|l@-rWGWm%X)X+RRSI(*T6(OotmaYAjQgp$*KBniZWzTO~(fkmg z6{pFL8749VfL0)X(+Xn!Q|NVj!+&E31%MrgvEjY|uQ>dF#Z`au3j8Ur;OAP<8iYTK zx8y45(Hn$46D_}!zbtlz`dOg)MAGs*%cT4OvB618lcE6T!L9Ne+B*i%m@GZ=m9v)> zpS?D(EDlf@6nd6qZl}2;^6YNey}|>|LA!zNlG}e=)+uMxO^C?s9{6D{8=V>XH|I6D zqI~7BOxfrP_Z1EED_VmdmbKP`+Dw zp!2M)>?D2}^sKc+Ieb~{S!=O!-f~{qixQwpFkXHVotaf$AD$Ul?jMucRh9wi?H_2; z?0x?CqS{XqpEX-%lq>69p}X?ca?oN>d(i(`py@=~vX&)HX#nNm4NH^a0HMLHvKvM_ z7oQEAb3t~5o;j7nF##%r0ncbm-3tyB2L&y!nFVJq%RiGe?T^bm8K5k<4Z1vA{=ML~ z^5s|M*1Z3gVu9>5AjQ6sBM#pykL-!t!zLoT!GJ0k(1#?VN-Lz=Sa$)|{|j<{a3= zwD9bNm-A>HTP-)2=gk9IlG&dTq*R;Bwf5#&dTZQwXw0(=*JuWwnjc=O?HxGLUDI8@ zSFjFWUN2gASjG&f!6#$~nxPYS2bzs@czk(GS+plg2Lw&`n0%K8){54peEkQGp>`y` zt!Bdp6H^0I&|~+3DdXdF0~!X$=a=(NqaCNgT0qKh=d86YA2e^wvSFyzFBi|>WvsnC zAa53!I3}?iV4?7+p4xsHB@n`hAm-kXv|A^B_8nZv_UX{d2cjLA zzs4$>;o;nWB|C_^*ZilwJKmA~|55H4fO1FivdiOX4R!db#$EgR?>{1?pz3lJE+~kN z>X*af??3LmA-7xVc_Bf-mD8k%CZXRo+j#aYdm2Hw$dfDKot$3Zn~Y9p_6L24{O`00 zSJ_Satn{436I>8jZ+7_J^tL9PW_J6up^yT^tN?0Q%$FRM~pcR9hH zkghk&%}zr|H=L!BdrJ6mDWO;HM0ZvfKJ|V{MJbr4ln0#Is36`;~bRN?QVqYT*D5sHT4;hnj1yG5Q-2?7XC!y<)*enhtJY!G`|-y5_ZNdgCg zvQ#>;vr_Jk>lfo9WM`2abKk?*PImSzgsQ$1OKG5Yz#pa2UID59W2$-p2(!qaj-9XT ziHD6|KYM1XoI6G&w&Y=E9UfEUDrtN>GZAkIu~0JJk*n?V$(`qx+bi~`?QL`A)_KL! zYvGy;+fvPB>2C^~uazUiHzDV(G8tSa;jeP%&&k;$)ZyOby;tP&O6n8lDYBP3GEUY% z5hv+?r2^%eb^GsRo<$tQlt44pOGP3}JP5;#GHFKkI zKO~c<4DXxt>JM>tzx# z^(nFnp7Vac@ALFq&U{J$6}fU5zmwDL!)BB?XZrOy=P%J3U(JN~S?XDc8@nKC`YC&Z z5!}!%sZ9O;Rjn_hRzRkXYhsB%MK62i1A-f7|3a^PrU~IX%==ug zi>u%L60pkw5-^;P_O=7|y@;W?aq6Y7w`0SpfN=OE;ra6aJsVCme6gwR)uEOAUu467 z`W3~1V$*^qpH)R<=za*YdD>TlW+oBmK#-RTCO)miB3ws}-kfap`Y(G*pc3LD=cvm8zt}-YdDBF~o&s zN2o!bKN(`cKE>M@uCUpx{ZD1;SK_*beghDu#)~y95LsH|*T-7q3BOI~A4JzqSc&cxL) zf9=v3!F61J++l1JafqtBI2KY=`J}GZwH38Hj%dx=B}0U!?vi6QV29q=R-;1>Y^!0T z#u^WSTkp^X3&x*ZUSq?$;7`QY_OV1rVC`RP(gSN|7-dPidn0V2<1%AIeI*!o0e3TO zM}J*{JePZ*t8Bktd>#K-kL&Z&%E-Dc>|TDeDpbC%JYQkn8dF> zxFstSePJ%Vk6VF`P8VD`-iNcGZT@dldv)(-DD9!*DnqqKVDMUE;{{ccy-cSb>>EeUDpv8hrFa<^4%vROd|?6 zX}Y+hgD?4s?@o*uk0d!tz74q;NVO}px={51kIzax<3m~O#K=XjEYv@3*u|;LOsI}e zyzC>qGy>IUwfzTw7r@un!D=^Co^Z4XX>A2?>*V|IT9&+b_-u=5tr@e}_pN5(_r^26an7Zr~*So%7 zpf=$&`ST%jm!aZ~uJ6=|d-u=J{9px63Xr^ynHh<6(n#dwoSGHx{7#Xm#~C`ah}jZS z_9)%rR_4KYbLKEC1;u_^SPF{#q_A{dcj3_I+2Tce_eZEJkRYFVWIF5%nBCU^8-oO}G1OnY1S6G8%e-TMUpRJ% z-(SADNcr*-^~j>K6c8ns`WYohgZZT(H)Gz?KIS{C^M8zz3j$H{Zzc@H?AbhthJgOd zk6sk`BorEI4?8ymEoJHqYd1W_*G5SQ+How!7GBDI;qJ4vIA)lZ@xsZ634aBCP0dc+ zCuhNtMq>Bt46g+B!VIfKfTyP@i;I^hHw%lmCke~2mvl^A)*IFj9gyGobzrdinm4sy zniG0buyVU$J%8m!19#zybb}CnCv@a>TUp7<9&t91I!F0dPpY8d)8ypjB6pcDr)|{# zt)&|Ccfa+2|B3D7>ru<3SLo&{XVSz&4eAF$q4(^%I9__^tkjMR_Ku?G>Cp4l=y`tF zRxo;A6t>ldp6?Ru{S3{tPnByGat*O196iSDTfnxiq31UQdt0HI#MJAWUIj&S+L*cb zRA>rvG)1r=3jxHko_rmLy3Q(CFLlGKz{+bnq&w24J5nFSl1sUN;z1d5;K61D-d`x2 z%d${fr(u*}1UIxCO{&~DsJBh9_a1Bu0`o4%_-A73%?PT71XX)N z6NJ!YPH2J?nyd*;I)tWcgdiv(=sF=t&r6HHo;U6KqELN4>iVL1{mta-&?nbfLG?G& zuCt2N|B1Su4yrems5cd;m$LJ6NF4b`pP(>#kC^)B^H$xL{p>ecR-*;((|_V~54U`b zRUuemf|E;ljrEmT%Ym05<_0DOb(OzTUzuS^6%&Vgq1nK^B!n49t!o9)E=glbQSuB! zxl)1-Y8T?bM*3Ev@5Lgh_&&l$6yFC$81q#q&mC(B{#c%pN#|A%e>qL|gXu(@JxY#q z$rtLBJQ+U_vn`LYhDP&oC17cnnxU7H)g!jeFeZG#+)J0BQ;-90LJ}(E@wP2SgYTGY zNh6|GKrF0kg*;ARmHGY0hzYoryo1064nr$ChdsKhOq(Zp^g%no7fz<)z{45EaG$xH z(G`pl#i*f#_>J|H|0ri#GQ~(>7?KZAa9x{dA5ua+l!ebO(T9qV0Da9jk+A&-p$KZi zr)H8KUnLPyPQIcG^iYJ}lhWT7a8q=+_g;wps=%tzMq~G{v=MhG{4yS9ALJ<=nMaSp)4k96xP0l)~{-rj|beuT(`b?J?w@&6cvA#cqKv{T6lBiT zSUQI%kD?MF^IG!c)=D+(&i$xftvcFK8RzFHcg;GcQ3>Zm6qlAC{ixDdZ9z_$--IiR zVWT?Mu)t1vjkN!hR--g_D=LV%!E&?>{Hb#5Jn*~0V)KDFEB+W&!}TPq#fUvxC3pVJA4opBf}7OZ<*WIe9GtL9=G@AE z80Y7Jr6`^7#l{s(^kQWR>+)l_@b_5P^!i^&%^ile{#ie=tgy{X zxuT#=PuZ=g?Vd7cew&$cc43?UoXnhE@^jRlc5cL%h#+1sj?HP@1Poi@*A2BPxwI z^bh}Yn_bDksR<-=FZn|$_78CyS|@^=8_C4@Bbqsu#gWH}c1x_rU0I7|rL&bjA2G9b zd>h1)Kc9zwxT&lxhG~;gt|)0ERdy?GlTcnQZKG4p#|)IJbl& z6L`SO6aFPGNM_RjI1tl?KQB5+dXot^%s&+oq_U}oqv6+xI+EMGtz1`HgicgjWHr&} z;yUcdYj^SeRFf~)n57*M1#F*tXG#Vp%hlajkh8bW*?I=3$-(m2llqr`qgR=C6oL5p zuDFy@ni_Q3mWHzb2}x(|58aX0IphUOI@aG9_7C&%E@X6sJGa zp72k)oQy@L@%m=*Ga-z% z!rWTlL9q*_P4P2Sj2neNE_5siw`hGAh@CP$GG3_jSvOh`R-S^oi02$e1`!p=G>%_W zmIySVzF9|n1_zN(Um~~H|8e{M1KO6fZ#oe?;3LY5w4}K@I2kF-#B{3JJ4hLA%~rLmc{=DA!DiV--WG{4(c-({hG&aovIEDhc@jU-AvY!vS;jlV$ImS|Bf_5xo?+sj1kI@Ns|(Fg@V5xg zfcZnfGd!tlrh55J+_yM;Y)v!GCKF{izFtUmGGi`iqPfM?6K$rRxXG3(Sr|n1bzE3O zb6=$ArD;ggCQE84fvLd{a~EXialx0kQA*KraQgMWH)*%zUye!FYQN6UOjn&9a~a|W za^#?EL@H{woAJZxJh_W0Z?UA_No_V2Dv&GSj3GpMDH~i+C(s!Xt-d-zt35B0Tma~6f*bIoFmUkZ)G_GoG-d_PeYu>(LZoP1rrb>4@%bmgm z9qZ{v`(4zzpAZp590r~k+iktd$2b%uASN5)tc1D@frGuMQ54AQdUJR96=>_9;DG1> zc=H0#|5J^AMkPw}Y`x6UY)9)!m?+oaO-pJ^CsThjV?Chz%spsixn(M@S9^BQ#xl#) zUB8xWP|LE*G)Hgnqwktd3vnep@5Gk0pCLov`3!20)>>~a96}ls6ozv_1|f07$e?H( z4e}@+_Y!&Z7-x?>qPW8P?7ro2L9CXqtf;`VanoA`>``~*o@ra|6+YA4p?#)rrWU#E zZ}~DmfMGDOOkHn>$ebHL3vGQW z?h51u*oy)vIDXNebKp0j^=E#<2>gc(QA;&5!9oMQ%YQs!8Z@<(uw*dZH#63~5`BmC zS$!FEflcBas%Hshbp>My%YQs8DuWj&fA*F6A3Fkge@30oNqJtL#3XiTYC_pGq#l_G zwG=VykGTW7!)&&Nw9y_gC>)}z%_!r7tO+jDnsp(S!)?NB5*s+~P%TfEQI%Pk(P+Et ztI^kz?SKZpmfbZq(N`ne(H{6<`ckj?%+A$;M$--b;PX4($$OzX-v&=#HfC&)37DmX z**;{#*pl3ghIUYMzzL#eJU{_yHW;vlG@A^7A;GYL2asc<0Un5*=END_YXkT)5A90* z;7iL!0}l#5>DWCw;2yYD#slHf1QcQyv(|_M-+_(%6WSe@vR1w0bIbNRlSHa_*G};l zQp|8UU6{d!;W9E4HlSL_%upLvRts_GzoTLn7^4gwkS>HX48}L8-4R^o zE}J!5G2PR5*HSxQ%dn$3pgv$>dadZ5wi?II*#Sedm)gzTJIn(rW*a)etUKhtr>2wQ z6Jq+%a&|C)4j&QCpGNn3fW1(Bx)%&q@(%w=Z(M|riTFyAsOcKA?(Ly-ZTD|I_7o4H z+m-a^|IbbUAmSgVf3TD9pG+zIgoo10v^h{88T`4JI(oHK1Ph)aW|u+j$)a+h3$0q( zqAo-d96*KVKu+py-z%h{4NhIOfaK{5==75$up3Lt!4O6W(!%Njw#3ea+x(g4a{YaB zg&C2z)iVVVR|~D9Z_8$KA$Ct^CXQb-)8hHUL;4)OZ7#?Hty=8jcL)nZum-*sqM^N= z0@2Xl{s3v!+HQn|Lbewmp*q`15Tf4p6k@D!DPHJeZvp~~I4J1W^af{Mm)l3I>sf@& zpOK?NaO-TzWwP`&#msU;Vo+aLo*N80z(NVzT=}bV_aC<`v3i<&r)| zR@2Hxu!5y$W4_bLMyB6!%#8Wni+vTFzSHi;cN;8y7^7tLk=*eU{UyOVhH6pAK>94G zn@sh&m|A+3R4bNhhL{}&l_w+0r^VQ`5#4qPupd=52k>~Ngugry4_`tedFKNhXJYCj zS!IDvY>M&63yz)i7Lbp@?Ur;~QqoBbTa?n0^jqxG9t>M&r1R*vn53r|wkV`S=_#tY zkO`n&Qb&fEHA6S{>Ibx0I;+BtT@1EHtHAmf%ytgH# zqhpldAK4rQ+k22}dKFP4lkMy=#)5vd)qalp^ecRR^wnS6hiFxDMwHqmKz?l1&yf?n zzab(JfQWmQpIn=O*S^(?FMRgNpwdf&a3aCQVkNKlUjU)e3NVQl;5QK~k5NJY&89dd zqOTWwZO%1+q8pAme;2wr`K5V}XD1ZSSCcAnU0&b5B4b?u|oxg3ycPH-X6Kyz6{3+=A;O zGr8b(rkP35y4sAo&^pCTi{LtVCIr0BGc!)dqjj{?-bEKm@%{FU2UtDp2VHxTU`x!8 z!xYGFysm(I~FKC3eu+;efa}8y^~7tlkS)EGe_;lf-e%^ zu(kIJ-lOyUfV9>5qBm1V_d4s%xppqWbwU8dlOy_n@5n*V1o8t*KW$eB zl_aObXKn%&%|t%D;fyo=C0GU6^ER*-4SqpIb8FGw=k1g5Nfzj$ruH@ZLf9MRc{TFa z=w#S?JJ1};RP((WC>8aMm;4NUF7LE8{SE$UU;0VDX&riXf$6*SA-vPq=uh~iF@mU7+2&sHLq+ zu1tTBa+43}#=_}4QGW_JTqGX^$)TjOB1k}Df(}gNE3gsm_i*}M)cy1b22i%3nSmpi z{t9RpB#ml!rYB2A2zm-5CFsG)i(pStq%Y{xY3T6BOBB=fWn$9Z%1@&k01B)Qtz;j+ zsQBc6h5d+lSkYg5VlDaC=mZQ=1>3}~I%ZZ8fKS}6_fA!{R{vzv^KqV7NFA z%R3QQZXVLibKf2{&O5iQWN><3Ptw(>ItrW1V>b{feVKjKkh?l6oRu7F5Nt7`w7?yr zLa`QvHSBuAV?>VXK9Ng_(X4<+1&+ehVXzq*Bz&ECmmD4&lmRU2P%xwV`=i;H35me z6yNe__X^gdAed(jjA(@%a*gHTPaf}{g>gZb4b0_W3MdSC=_*UviTEA>?v z)>N@^_!pXu%xgl}as0YgAnlqowiGX)GLQ=rgsDMe!7zREP@pJF#n%lZD58Tv#&m~& z7cH+aZk_-q+Z@>XM8zGp-eYRmS**(+N8aOUXj_EmtHj-iZD9ytI>=pQz=Ml;9!XG`HVEb*?#SP88 z@DRp>@4&xSm`ZA4J{DNagAh3n((tX1PwcjN3W%J?3X7o-BF90iG`6FUO@}dxLmDBy z-In}#8ctcyx(VQt@vpII@oJ%M^gY=IuEpI#m56o42H8cy#jlHv1;hCp36qc1nT~n4 z{k8@0fqf?0_K7W|$3ENMeUgw?%HuoR9DN?TtrW*0K-nt~5=yPZL3xpKLb}~?F`SP? zM>*;pD<^B`X>;{x7}sgPOboR7#i&(B19JLG-Uw;y?8^5&AIL^=nbRIi|Ao=6LM}1w86K}+ua3KH};b4++}^j8hA6ltE_E= zr-)>hHXa^7D|H|Re`l3YO+M=uPa!4I@#Z4XX0er8Z+}BG>kto?$`#Z#FaVodVzxno zs8na56YNL@sG`(ARZ){o3y_A%<|R-oY%>rPYP87$3N_p`0TJPw8m!|E@oyihbt=Zc zzA5L_#N{O4NypmdDE9s#577OgaSGy`m1VVW|H0n*S~}#>cZ+y?ha>J;Nme3=#9SFo z59^@&#rPtnLdHU&$ zz|uKwUr1k_^?vZNI{ok_{9SgUztnBAdFk}#bgcE-2c=Sy6qnblv&_KI%HJ^hswO`9saH{X1q^tGEI7q*Jh^WGn+&R>&iMeeJ8 zW=(FB{x-{`$sY~jo^g;Fx6$TEfO&xH>$fK3x2KF%$ ze7=I^E6@p}Woq+1bU@6&EGC!FSUWQtdk^CZT`|sljI|_?qHcrTfkOodLa3{78!Fd+ zSeDM1HsL9%E)z?IaTj!FaXo~oXpg}NpQQ7t&M{Iha&I2n^Za5E1C20ozMV7X9MItS zfl^rT%jcSh+P&(Jx8wo(w0&Wu^nq1{K z%)u!(usdqyvalTeWk>UC$qi2jj4}7XzBJ`zFg3kpSMw~`U6V{SmK)QNQX){P;mS;i zN}dO~bGoVn73QnPHVm$>V78iL=7g)Lx?HR{W)0-d;~EW9(HgTRg|hV_&xrneSo%q- z(*RO!!eh`7w@r>Ygyk6oa4pF~^9&F8327-UQQHYXWjI)2i43Y?a3DhXoWdtiM8S4P zCIE$kL?PoJJCER4;Za%fNas|vbV9tI%x#o(Uc8`l1;~kuEvtJ(tMi5oHOezPUY0U7 zqKU{6_LE1zKJx)3on>Cba z5twz6IpLkv#vQiu{nWM+eN>hNV0+2x$ZapeU1%QsKkev2j0bP7W_*d;WA-`m%pZe=R&Q7>Yzj@s1@vXmAci80aTLMoU$p>07xY%sf51`{@o={Bw z!A*>!O01*i;)ejB;k4RbL-WBw3|y5hXkl<0EOW(q7bJ`N=qy8)>?-IjoFO3tPF@0g zi)Q#bf8zC)$cV#ThtIRiD4^>TK2A7u^KKz^y|tzmQ>q-sjAaOxY*R*g)^oW#pyoQ)OuUP3WoP$UkU`$&0mBzo|BA zH;=u={!~xwRn^C?n)4qF#8#@@I&02-xFHr@#o6T*WkjCgCDVv1=X<4)K`LX>%?T;V z`G|53z-2>jD`i}EhJ(GSGbp;R>&@ShQAnw3gRJFVF{;?AlIUzWyJAz3RduiHXWgi)Dz|gY=xSh1s`C!- zrCc#C>*Ma4|6$s!tsa7goF~?pEDvZ=hPaS3y?A-4Sqc#lObc z*_Y4I_^Mb^gbs<-%%{{wR6KF66u2vEt@0Z!bWhO& za3a_Ntx3$S8KBoS$_rP37Sw^;o#R*514_C%7QZh#OT?)Oo0}l}U0&LywFTD>>w;>Ly1dPGp(JdENZn1gib3jNe~PtRx-xar3ScZ?B^qzvISH<=#<8 zF&^TkUzOADm$)kLd_Y)fp#=-eia#r6ttKhhG>e5!bf&QnR$*xh&0;)NEmC5<# z>D=_oO-`yjSNCG1uzDh5ZAQbR<0*1Y!p(o=V%0==CFOYd$k~zHx4U9>X;saMYg}$T zj)#=v!y_SYcQ4ho;;02wvR7AdiF}8*YZ9EN^!%7d+i={*ete^QXgjT49cK-LT?=Oo zlif?st!$geQ>;IciP|>M&A?5qs>7-A$LEs~v5}~o-i{p6z0vL$>+ZN2kKA^$edI1u zcV%SAX*|pwTo*h-?WFQ}O>ES>Mg>*L-v}N>I)hWx9=Tl_)#>&_sXcbn7`5nrArMG9 z>fL<z0CCuEh&v?OmA+X~1u{DlBZDJ>4kqvKy;rAgq&mF6 zdJo9iq&enAHPao6t!`CIzH6aBR9?-h-gw)>a>%#ZRh`GDubr5keQyqdTQW|3oNehv zigE(GfRwnsgisc6IF(X=cZSZV_rJ_=z$6P;KS*di>6~N^bXheU@o0b1F-a3hx~o#X z;P|;KL}8bHmFA|$+Y{E^((2Yn$5&Q8971^x!9Y{)gljjin)vwmtP&S)$p9{=q<|^{ zFI@GK>8_8MlEQ{R-#*qU^1sqT0W(aj%L9NJvO` z&Cn$bt-{bSFd!%)0xF?McL_)hJs=Izr6M2*lG1_#Qqnb`ba%|0-$A|cz3=+1_r9)K z2mTm2Yhr))exCj8{p^c_nw?sU=(Z_*FT#CiP}Cz`8r`(sCP(YVx$n6uTGrdrKyyO4 z`nb%lxq?iAt_@>YxV#)7%}M=Xbd~ZuO8krp-sNcPRHzIu>_YvssEu z68#Q@0}w=?fB<6utgYa^;?m=5pubSZLyQA_zO5Jnuc`l)7T|C2l)4_0q8AKFBjS09 zA|7vrm-=;5{MC~4X9anyKi8}dtAOXUFL+L0tH1a7{)k*K)Ibwzl%;DvZ1z1DV7JJmR{i)_tBS+~t zdiZ+eyiMnt^3zKn+|a4&Pvt(i#TngNzc4QjHdual#-q%$Yu!Pw=kro%C-oRXEN22F0N5#Qet^#B$4Ms{9Q7gj|0pHRxtCree}7Lcn=;pg=8G$B-_Ie zbHhx8>tRbfn0?)RB-GR4AQ!j;PkTG- zO}~H-lW#ks>e+C>1?(WUS>CdH#LiFNxOd*er&FDMgJj$B6Fq#&eH-;Rkd7Gn6lxnW z(j9ss0@f9(8v%>f4Sq+Yb-m{vEY3c~o}|^fMUtU_enBxU4^}Yyq*an=!8=XezNGa7 z$-<*FY5VmykIM@VX_lYjVz{~A(tFTtd&XI^2)?MhIX~J-ubL53r~11)jZwz}s&9rQ zyFmbvQ1O53PB#|(!qRsqJntu!@ClW)34#lo5r1i`E(ZT&VY4wiNY8hl1%i+18-Jb8 zG?ghj^fZ+(bZTiTU+O5+xS(|w>?@n{J`OP&@Ri6_Bkl9%=psJ*KfVHP0PU>iaTuCQ z346>`(pIfMV;KIJqGYXle?}^Fg{EYnI)!#Jqfbrg;J&j=j!VRPuN6^Fh|t*CAj&i+yDK?a-Bb z##Gfz&3lj%a56D^h053t<4HQ)QQh!ykH6%lJvM5ES*gmx`C`snOJ{~0JF|-9zA90v z)IN4lm~zG!fQ*u`4Kznz*hV(zy^sxe&c5)nP>z<+GM@mnM5|g4+`FjO`Xb`-IRW01 ztZL(Gu9@w*6x1t{;VZ^Y4*boP48wHB^bWQymGr}Z7%MxZH@jr?i72_5RiyOQ)1Wjo^0sc{?sY{3EYUc|~wqw09<9 zCSh(p*eI}8;^~ETy@^2kafj&EwI*2tH_3Ini2;Y+rh2jik%?Chb|34h4g@An9GIJY zz;%x8@6BEZlugpoe;$#G&@_AES$NWTm-?4#17|TWuZbg?Z&bq>%{L-&-1B~_IJbE~ z1IIjX`f>6PH-m}3YP;q)9X`D48WV1_JuN2BPM9VhRx`IUe#Bl}w{&QHe&RB*F@qyJ zu&i!u$6i=BbpYB%gxZ5e_odp~h4vq{)6sev{e4U2f6m4FQ5=nTPAlMUZb!%ZrJO#R zSaaZYIQYmIZ4^=~|CDH*wK~1JV|ITwBcPVfjbeSYntJ9QxNv@buexexD$I*{y`>s4 zBlr0Z)IwTVzilKVLi52B5exj^4AXl3AaGEHTo79UOQpI&oAiO#joDHQLua*8+QhMC z&c9R}f=~H{oWTgX_e566`9HC&U<=nTe5&xew$O_JDZ)qELWu!zO1^^o#CY`JP;%2# zLK1EJ_yAqXv<~)`0la8BgrJP7yN3HEOxY}LR7a#GTLw~QU{ef}9Aqh%!RfA#xf^0B zn!)J4kCA-70Lu`KNEEu)bNDo3Sg676(}ADQ=D=A>CtNScObAehxT6CWAnwTl5)k*q zfN02JT7VtoFg9Qsa!8Lbl1blEoc(Y$`WixBhSq((H7=Z+xp&fCZByo1!5_UA?iuYn z=Q9kp>Vg)0WM0l3k@vc}`?l}kXAnhfd^{|Ph<Ry3dOD z8JvS|xMs)aFUSSf<=yd1V8k|cG zvt-I>bJw4_8*WLFvF5%%BN@6tlQH0)LOY$YrPh9M-@EQ#z~QWGJe`gBPz_K=<$P71 zr6ha>eUfc29z+B&R-5G^90rGuo6~;{0CPiR!VOSW6!)m6GTbS)xaJt24Wry3_o$}` z+_CNjm^Urb7dDLR!Z!svYA*hw_0Z3dt>yQoT!V;R< z2sd&{QOzMx@3dx1h<9xBFvL5xnG|vw-~1MGn$c_uIi(PEm&Qc4dR>^hSm(2qKU;sn zj$}&5y4%931r#-LQ_#Ld}zgrqv|Y0aYUobKPekN% z3QGEpMS%Q=N-4`}kURQp^ww#={!iqP)_=U<`!B{7MBL|F>_-D1++FcEtNt&FjkC6~ zUEjd~9fVFJHZWV5(gT{Jp~DH^qwLPLVuFW54R7`tqQrG7(z9hL8FLqD-~~{d_F<-% z=@*j8=!~9Ti0h(BklfZ$UD@ZHHx+)YDhoapOuc-xY+RS z@1Ai1Bs|-UGBUp~Cff^IoVmz~V#-^DqB05=sZdk7i?S%W{6!*EL*61c%C}&V*?Vmh zK2ytN%Ftl6nY=X_B}hMg+1p}^YPOctl&T?QOEzkQ)>{zcMLa!zDEILI+VFPMCwhb0 z+wX4}9mU2ASAaU|i21Y0!*!v>IaZh8PEbR|K1b9y?TXlJTgtWk#dC0Fs1&@-uc73S=>p&YdfP^wBmZf82DZL;WiVl+CbTp#CourKjdQWM~1{|TT zdx0^b+$aDQS=>;$kVax(Jly)B#nwU7%K&*s;QlGesl}$(7NXrWtdR-OINjM4Y;Qad z7@nqX+I2MI15F@!08#tdNrMu^O{zI?wYlk}DLeepcA#ynv=vB+5AhAaSVDcnFvC#a z5DY047lC;T#rb1Qp*SKx8l3&<@N9IGpQtZ8ZhX^nmO0o@40r4F0Q2@EK!#J^k`Dqr zPT$V>Q2<_t^6dZ~&hGD?IHcil_@y~fn*ziYDHCPR+@=dD$*&Ika}53! z7maD5e3^O3=L^MY9o|0l!XR2-M);lc<;7(knjCV^9L&8$HIe{VPMx*{n;RJbx>Ncs zyOu_J;E&&&k`D-@^T3iR^6bF3{lCHJ#ENS)ooCOqmK8$3{Vyd`*$yLqjk2$I0d!0c{uHKw&gfqYHxFLPU~!o za{6gI`uCH>yXJ|d^ru3GbUk_VlH;e~)=YVL@ly)cw0WdRy3AbywU>FjoNA^8yNqh` z|7cFDx~Z8%mlRY9IhW$EiCE@?l?djfeV(8Ze8LznpO%qId5 z!y!gmqjX*3$oG-ulktq1tB{8U5NX7Rh;4!RSI`Z(2l;odE_~$E9Bf+MT z*SbsmZ~&UBFnX&iLFau+IdQyg-V^GFCs65IqpDp!I#a=pc;6?X(Rp>G-=XmZx^hX5 z_}?uc2S~Z(ZMCiy=u6cjnJ#MGhCkDUE0`woz%+3t0e@u(rit2LX#&`Ir!{Pk92AWZU&ERbI+xvRJmx7uW5}AmpTyEetKVr40Au;R8qBn^oIj zJl=X5S=lKlE}loaWzSG~datw1WvPq`M^xM7ct~^|0#NVt)QVOW^H5zGdR)k4jqm4p z-sq9!^~1kkiif%$GMbupuZ{Jw5>-`n_zZ<8Iz*v1iVlNN@KcT z^f+|Pvdvkj#Cd{sXYQIb+Iul*O<9g1oyLClS`B zqHFm1*shAkLc$!LwBnUQRMzq%a0$Sq((P-vTMFqkZ?oP$IGfE^(34dg($$ki4DssK zs}6bT`DhGL>-ne;A@s0XL-Kl6IZ^@3D@9Q`$JdIx^`^C4#D~}mnbcjb4jt&2fP)cd zop>-hEXK6**DS`&^3(5+-7U`5bIDeN-$oRZ7pdjjoV%@G9E|*)ZFBy%Z*e~Iw(`)G z;vdM$(BkiUZ?bIIZ;uy_X%78S9EEg274PVME?mD=nX-IOk6Xt!+mmHTyf{R6E#H%E zC{GXZbI1AH*h#-4z;wzG>{gEk&PRgEjUT+PobJ821myJwgRKave{oI^!B)f)sPR|< zTM`nfnAbFZ1>}ZA=UH8Excq_eE_Q^Y`&>{POk%mzxWT3f85T z%?oRjrPy7nbQ0l{Ld#0J1~h-c`>2%4!6(^|q_2AwN~&}V+i2+yXxMt$ycpci6wfeZ zti%>oYD+&in z@GN^G6IDz^TrL#8LrTO;QM*w5Z{DZ|Rsg$|M(@3j6z=&W0u|n%bxB@vb7u}`)`#EMX;-_?6{n~0RVsv!YN$?hLx`eN#=wKJI zC;4Xh#9r1g#TEy;p0AKB9W4t9y6a;5`F%Gmcp?L>DnkCcDR>|tuug-G8d&I!niZUfr4S}ur*echxXMhtA8 zCw|+%d5w5uV3UB@w11P4Sbks=pP1Te^@fE3U5<5Kn38N{RezEyI4Q9X90TLWf8PC17B&kN*fgzmP0o z;YpqUDtRh=NmWAvoM1T9ql2eS!ID`DQ^68#%DvpB>nT(DOBYh)@|Jj08VZ)EQ*a#T z{G%?i5lq}*@xNhu-umJ?ON_#OO8>2kD1r$@X2M7Iiy0i74E~7(bvI{B&NE*8vcQEZkosp9SBrJAdhX(Zx$wODSknKPucO_jf#h{bD{vlEQsf|A!a#DMoJG=k+(a zxK9DQHN)ZWq+!?M+y+=oljLNV3;;myj$^!zQu6cXtzTR|+nVs-P;@pDwJ;j~VrwGm z^y|_6#aL8#_{$f(3*bNcCtr1x86U>0a)JNnITiW&^A|2qgENhVtoYc^N4VuEr%$2_ z?yG6Qi9_AoiPMS{Q}2rLf+un6tr4*#&6W|$ynR2o4&~2BJpHEytUS0^hMD4;iwh8K za%)x1e3+s(dE|k)8`)#l{mW%q<_K~lr6*!4nAS@Hzvq1&7t%P##bET z!cw&+7%bYn+ke&2Cg z@(!%5m*PaE?Cn*0Lmz)_N1y)kkn6+7z9 zo4T>-0Yq;$9J`TRQ(84QHz2Ex8o6U$MgEw>m9$LUJo>89ja6&&Ub68Ut8jDK);*S< z8^(T^b&ho=Om-MOVuAyce^ny28n2a~^C^`BeP}rMQ+fw3%sz!B_d3i0-ui;VEuoqV z)9~(E7u_7};~Y;NauZLl4gM%{u7~zf>-g zDkyd;{(TX|0*SSR5ob@mYl5%Tx`)@^4Gu3M=EDfDP|@%R>D@2FK?b2W2mn-rX5zc+Of&S9o=~nYq@-&V#j;x61bmTp8 zGi(h7JMVMPAOXDHBX&d8x>FKFw-13Tg*V{H2m}?=3N+&bZCx|=WfUV ztyb)~v*-!xRWEbRQgfUJgNoG&L^Ei7MMU7oKFiD@gJ}NDQKxi^fnXyhMho0H^=<$^J>TOE2EK^HrPf|nU)HV9p{AOrN|-9Lmujb4m^ZD= zoO02Jx~&G}rpl*ym`RSy1fJx_u|lEGvp5vdLJ7|aI5b$zxCnMs?r1aFJ$5Zw`MT{@`8V3`1S!k;xc`tfI5mUust+EV9lBr%$tx3(j zj9zD7Vns7?T3=FiQZ-cUcSL_fSA^v%qTk0o;94S6B|&S&=-on6sV=D4Y=Pg_-Jjo9 zEzAWDISq^jhny-#l%wGWhL@v33u6R|cn{^mPE@|IZLyORsh&o(8iKwkWs$ACpz#Vc zCU;!*m`Zh)1DlXL9k~g8v#l~nQj-gcQr$L*mq-gXzqkqIy#N`MX50B={XX0(CdBJg&(xJPn31wS%pMQY*G$}6X-m`ecZ%bZ%AjNy>v?R4GW z7oR1dvH$jc#d|xMl4CC5Pt*Go5#ZFTVg?u){#oZ0@PY}i3;_ne4mV2p2i3E$&aUrq zGy%P=Ou&2=hca3wVV-~k!Ro>Fmo_n$L48{v9SN!vN0qjP(L|s=ahn!RuhO7W9um)# zOQW)c=t|F>P*4+t&Z#CVc8MV$#g#vc&&z$y_V5C9EB6U|n_`zQay6HMJ1;ce@#O}< zUjGs&jZegVG|Umra^%JV_|MD7of@Feq=>3G>P~>`NS=c`_LK?K4EAswDWG>$aq;su zXh7MKw=+>?NlWb<|ohbt#X|wdziJy%r5dStrZ&>!!K^+=4N=#*kdriN8 zon>t}H*fTLWBc^0}O``%KogXWBT0mj?i3`S(fS~EpUgTM{kM0=W7ToRpROdLajv0a9l#ziy|xN8j@y3j z#zkjA)S|DSY6GS@OJK|_r{HU9fh!$!hQXB%tYG{7KvL%QO7CoOPtkJP@5_=p01TC! zX9u6#9RlW+!T*$3AZK}Hb944;-GQtUf4dxwf)GhN?1}|ty8{ikc^c|(4XZ4uVT}`B zKU<}LF^6JKHR2)7W1)4!$%%#cn{r}_ z`zciX5_hkFL7G$dGf3xTC6t;9J4Q(7SRxdJmCPB!9nQBy|6v(m7SUEs1uoKO0vG9b z2wl4`6S;Mz{Za(Au&2t#%^Lq*T<)~$v23n#oLt7PYA%BPx0%N=PrpPA6XICBU(@Cv zCzAi!*S7vGPI&gJ!2xj4yX{M{;TNtNSVW!AqH#fZG7@@zz$b~Rc^8sToAkrn@VT{Q zer1xlnObWFV;t=Os|E=ufqikmxY?ORmneUu^DDY9B!W!jQk-x^1Vp4gRya1I=)o5@ z3prZ#Pj^(?Jw?P%zy#)HiUrT31+3u|fI1j=2MF^slFkcHMlBg6H49^}4QvQw2?i*V zGTL_;@)L#LH0^TbR|)5tjRh^SsmCX0lFtq^tvh=j|otL@hs2`jLN5npu zK(sS=9um>|vClC-kU_M6eo&{Rl-7IVS8*@|exO2AZ)dSPbsHG$=OpK(V{^J^)|c~z zG%SSL?x#dpOZ~as|2nwa|NVWnwhD?kssSAhk0dR#;#R$orSn`8w6}sYB77fQVY7hH z#y*JPv^YdvdJ4ya@-|q_i2&iXzT!Xy>B$kY& z<-GOy%CJ$>EqQ!AmL`$=5olQmEs>Abw$&Q?q4)a&%Hba`@(5>xPleTLMJ%qUN2G!Z zm6w+(U(oOf>zNm}YKLVV1Am@&5Q#AGwCw{=Tj5#uVCB+R&rAwNU4l)BS8F~Akp5sw zB2GH7XrS1UP0A6D?vNqa5&QB^Sny*VG)9`cW*f6OI?3nf~}g?}K} zsapM3T1#CUI3gP}>Gn=KVDg~sgw<8PYn^sA{Zowaaq2V72(s zFfcOIRid6$YeDi6+_G^~AIEx&N#3 zGT|P%-CV+R=F37<+tBg4Z35rv@{gvZ5VNRe8Im1EPtr6`>y74VIQtI5&zgpGD#AAXVkIYu*&ptopqJO;(*4rqtr{Jw&0x?NiB7`SMnG@ z^(QoW%3^u|fDrnv30QlvgSD5}n^oDaDVhBYb30D2l@Txh`loyW@aQ~d(A72AJgimZ z+UJXWx}{%QMMyuJ^YNCxZXpkU$a{@FLH5p*J5grPkB)SoY59on2$*k`zQsSevbxgS zAU*0vdXhaVk>L|JA`$8H&Sw%hqbOX@%AmA*{E?GCyqrc)O!xIm57hU?-yKNv-QVhP zGh)i8B0a2!Bam1XU z&zCqdobL0-YHg%XS%PxemyaUUpV|06m%VN)Cs1cgSiAG(4vCq=eM$ek zi=dp<>5gHkcds1($#JRRbDzLbi3lH^Rr-#Gu+`MAhPyxU39sxoXP^vtyO!Ne%jDY~ z_+w6BJT0BLVa3X6D(S2MV+RW`WToWoRNdbfpJl*pg-5u##N+F{YdVeDa3YPw9JiVL zRmfjqLawm)j}MU?JZNWs1xkprJxlThY1sYQk5@@xlJ@8e4;cz)KYEZn18D+oeFmm`-Wd87qI8>8e!&hZ6*AUY1{FoZx>&0n0J5lJhy^>$EgfQ z{TT0?V41iDHY(8m>&w6@$4wFvTuLB2>`Gjr~DzocmlhH%q zLa1+SufN~vHMea(=EuLzu-ewdLP9b>GszG!GvmYHt+MuylPCr}YM&nr$#cmpFXLW* zNcf`pfD8R0T|+cE`xiBvJD9oV_Ut>C7e>Nyr%-a{rl4 zxSfcT?a)W!$<`0n*>+fxM<4iVJQ4V z1+3YP;QZG-o(Nia5gReQcPq`a7!O|)de89W<$d0^eZ2QdPp0p$%#<>R*v;BQLN+j) z^#`T{lJ@G0XEoE%cf3_y}UUY=6)TH&ttUW0S{h9ZNo8uu9 z2p-~JzF>NM>bnXYEEac5V!#UP=CCa|<~sc2tiocJQ+C06#M?L(ix>R991j8;Q@70( zjX_k>=K}sTHJ^xh38IqBpLv5^fiqa`|3oDqpp7PAzAh8+8V_vGJ*K)#{|UwYnEozT z`F`9(?iJWw_=6X5E(x!>${X5Bnx=@n$oBn5Hrko|v2v@VW`3050p?UD*xbNyVz`=G z%56R!H5q{=bH(4M7*Q`ZFq03d$2&Rj&io!I=S_wIbI0>vx#NMX!Z)G^^|!vQJgA53 z6W-m3tBiilS1!l)D&}?Xr((_(-tw_eH39Y9`!`mrEC8x~`_;xaoY$&fGwvcNRFx0G za;`ouZ4ATOu3C?{naPLB(w}sUgqQh6)ia$8kBlE>(lhV=2)uuWf_@cP?2qN{s(&(6 z{pMLN9`{1y8%a6e14h=*s^3)0yFL}OuE^XKC>O98HrojIIk(R`(gKQAnF4)8zy*5? z_f126X!nOkB+RD+WT{THR`pkv%NES!gX+n>825=rCcRm_GZzAV|6O3Q|17ZjzyfO( z`kIhu6(ze<;go1i+?orlNN`U^ys5<(%3bYlZ&IA4-8^`qE@e!H+E#T z_2DaDj%tDh&xSS&lXNJ8s@X5i!pt4y47?7nJIZL4MWPsCe6Xk)dkicr=ZZ3M)DFxT zCh!W^n0@+s_s+mh2d=T?mH5IZke>=NsghK_XPUYbEOzL3$S}FL&{y1DG?w=JvZM=4 zzkSsjTKD7Tj_PpORS%yq2T@=0eG32VU(xWhtO87bvyhbExwb$E(0{Td&*iSnJRiH|bxHcp(OX(PlG2@^FHSrtg4T-FRn=mYr2gbEN15iChP5YY zmZY@g=qZD7&!h!7arJn>sOOCRofv!Wq!~HTIjlEXw|HE!{G(?8);seA%o1m}J3}0f zN^v_ZlF>0cFiAx04xgkTdWT2SE++0qPwo7J7^i1xq@OxD7aHgFVsV_nEG=M$syx}swNS~`5iK9W^1xUO?vllxDf7MpExbX8ED-nJ{+l5+#%;W3}S z4N_R!zbLGA!B1G?5A2=wFDK;DC)AFB(z^U7pNxsbPM^pLnBz37MEXSiJQJ5h$OPgO zKPTv!L*I!Fo*>yi{Zt!Nr@g(-wsCJ;Dei#9BRb{)=7ESk;PVhfAMkkC#U$RCs-1rr z=i4J<{zf^LyMITHEWaf+=-d^O|gSbRO?Ijc!aKxAkMI{C_&p zShCs4d$?{##Tn#+m|*)|zeOYvjRo$RR7xtFfH`2uCDmzguTs)ZB@h67NQhRBovQzx zFGj(90X`SHEG3*>A}IW{9ntK|ru`dfoW%&pj{ONYCOFPmR}Xn?$MYc4XqHCmqnq=j z9Kh4pmym3Uox{rU;%iq*UAfj9WhA0KoUM zb3@ga<)|tS;61xz)jW`KDjy5}VMH9ja3mPp76Z%b@N$dwi#dI=Ezbp7uk?1l z^6w!QMH@kT@H`K%j=oAQL0bcEsqKwB52kbeM_@=NZ0CcJRyotsTkgyzr@HUHQE zS;~?BgX#s5vPCmn$vVI-=-97dMoLIbxnHoUMYxeAl+wIKsg-R{7xWHld}9_#XrNQ= z-|S9UrDK(CFCXMe7_ECp(M&SP64Xj=8ll7}_p%|O+5Vs(iq1v&fzB=&JQ^t$nEip0 zKif(Y{unx@sKW*y_xVK!fGn0%`HQ$j$f}mvxu7bYLv_LsT^OnvAL5CIITjjT-{!ULTo_^dMJHRK^}P$i`=SkVO;oSD9`z^in3c~Q;KV}5vJ!6JfE z_GaGzrL1zFI^~%swKF12vIZLCx%xQcxLHGuoi*p!>>K`0PWJGv!{M)j7BVgfPeI z3S5{{Gv7)I?gTMZ=@UWe>w=m*e#*S!KaPM)@{e)C|zpU_xwAMZ@LrFn46TO zc`!7lj1rOQPS_?2%~0MlM;U0>MPcrss*q1|-AUWTQPDcaH@2)%z1kBuw%{n)_}$Fb z8>)V4>$&qxYS~a)L?;KR9urALNX(%P#)Z#cFNRN3+>50R6y-Oaq$=%6rk+D+m;Af z(o#LgS*5eBt`@8-f%uUKs4Cqj`G;g}Bq)To@ht}`aC6+# ztV1ugium0u&^D$Cw6Wnpp&jBiav$(;zJnwqI?2`ak>&UP2XnkV@l#AX108(Z%Q(NW z&ZW3CfjMX5f?lVwwx!SU%aAqorE_W_+H^`wG-?)*bd4ovwFT{(nEr?HCHcTA@xC^K42qz_nw-v?lDVVwDvJ95216+mnVok z=E<|uPPwsE8~;e#^I1PBq&si7FOzf z&=cN=eg&&98`goVX1~w}%qL<1FHNp{Ni^;0%r0EZ3$k{z$iE*KKc%`P3p%ezW%R4Y zYvnkL=Mh0n)RuTaaUdOHNnA}6w0{}YT5?behwvBd^5tp9`>H1gm*0!W(-Ds;uZ4zZ zuF7>WWv}XVJ<=9WEQkLI%`cvg02>4FR8h85Faqu-+V=G5kGWJx+Gzv=A-%CCeoTKC zT;OkVdw`eQ3lNgaSZTTaKS6Ud_7G+5r}?L3-$}X<+V^gGP<>y3*kzr1@l@#TX0bg- zHR)Z+VMC!D^fYqKd8pnMCL!9$dGwKG8K`bO4&AaqLLw^VLVKhG8YRMGQwT!@g`+;} zEf=n;gQ~C)ZCa)8G&~lCX&T?1f0F^=VW-;=z8x5Esf!c--WKntY`2U(Ll8eFR(>*aa~@85&4cXFX9T*^8a0V{ncH6iz=|8zs*&=ulo=*5xNc6 zd!g%+P|Q`_kY}jiB3K+)$fUA2si&;%!#qT)gM|!<7G|jLu@xuk)+a1$>Tx6M(``hD zLJD2AeFB!ri(eIjNR()3+DYkivRoR$?3epJSLQS*E!+P_=mpFC#mGX>m&HDM4|Vpg z-VW09QM%24+f-*rX^09mf?-tIThjwj%l5^7>U$eSIEo>JHdbt?R|iWz^tjMBZn>&R zE60<12*0Q&|Jb59ya<=J> z3;*`D;wbI!xyu~4AL&^0H9r|2& zPjgSHxJo-+ZBMKCjUIEJRP?g0o&nNC8Qk7CnRc~jQF0(!G!P~lFB+Rh`|5Y))y*%z z4-lVKULg7|S_Elc%ukxP38Z-oL7Er&Pn!4TG4O{8r&8;6(v;pBt%%T{G;iGhh336_ z#$K7_1S@O53rKMpY(>6%LB|ftrpk1jB2vzS!YQZVluI@O$hoZd_?4GzPISGvD-X2k zHP_s2xOJo>$3_PQGF_pSuL>t`*veFn7hcz?eF>_^I*RzS-=~ee1C3Np_${xet+kD^ z^pMQ2PX43A-rZPuByk+()&v%CQTXx}R%3ZVr!nc6YVd82T*C32LHeR;h5E>2 zi$Q8_zm#LqK?!ZYSWnUgPy3#kzna}?H3fxU2{qJ3UN34S3e_{$O_oIprEg5)4}M1; zm@e}ao}gQLm{7ARzSxw&Ho9w3-T($;nb9xjf zeV11U84F56YaEw*ki&|TRfCTUH{d>Gdl!ltQ;#n%Z{*-umznc?FYP%MnJQu(mxqy! zVaJNgRl0Wh;0{KTWrVirEg!1Ag+jY5oL7&vqz;?d;<8?+BijlfL!+s zp3MJ-Cn3FkU>k(gxSDeh#pAs#{MUY_pIS<(X2!oooU8Ktosj%-X18? zV@(cb6uV;uyA>E0948jvr>)T9L1!#xoDVA3OzH5y!k)Ja(RF;KQJ8x`S1hMcTtph7P#E6YDPT+H2S^=6KLJ5Fp$bxu}HX#kedt}3RL z;~*YMWM!;2$3rLal?E{>~<-ys#Vm+ndB+@Ny_5Q2(^GDp>+|~j)AKJk)9lYfk zm9A+3oF?Q?5A1TMWMnQ`r)1BgHiT=z7YWm2b!o=ahqf@1!3LDe_q# z;(rMK-PZ4O-s`1FF}F(57;sMc@wFl*|Do8Md~0@1BP+E$P-m#064f8CJOEAMu`bS8 zI+ zfi^-%5kF-pU{AdYHv*6@gN*N4pPc%P-!l2dZvoQ_o24c4A+P=yn*bj{Y;x9rcLA}9 z%YVWqV2>N*>Al@A@?)((#3guHh2=k#;g7RQR{H$J@Y>h%_=MXPPck_(5^O7ytVR*x z=?QZclzm98&v!wuoudSO;y3kH#M$7!9=8Fyn=3vQa*8WC6+M|Q0&l{tOte-E4JGk<z3QsIH7x*Nsl{dl_ zTKO(gZ=9@5R8~Yllb@Q{Z~PVc{WWp=yl<4PyLC?*e`&R{Azj4t;K9D;vlmSeU0nmz zUQA>%kT(vrCvKw(d(2ltP@O;X_ds+A@3molVFiBfzh}L{ z2N&iY54dvM9(=u~Iqp8j)h8bLezbx=(-vy`%4+H?wGs8Z}wQC1ej54-eW46tQK+b3FT!TE~qYcz>*s9 zmZ(>_6!1TLKW=rxY1?8=4ww4(mTIB!M0*0@nc-q(FO+mqk=0Wed~ z_RlN_@m?OjVeB?nb8P3%ToPh0i+X&iBF4$b^!wjuyzj2y9C#ln9{y)| zG=X*2iOUcO*ug&G6R;uD_jg0Y8SC-1ftr-lgFX9gQ0!ZHCH zJv-^)8v=9RX4k{L&Vhr-E%wn(jDcPv>r2%OGxtKhXxE3TB`{L~(o_dp6Z#X%4hyq# zLAB&=jO#?zlNCg3o@Z7`q5D}C%KdT)pmh-<%ZF=i$I4XWgm7%Qk4gPR|12_oX|xt< zl^RZhH>B)OW*hL_AnUdE)8SrX>-Y}x0VfUCB58w>O0Id^wYk+zU$j8c8a36s= zYwc&wqM5$FISFsrnOXNAe^x8MF6}TKC{1_ZJ`rb+XpyBqI6tB8@V2@B4>y^0+iK0~ zgSq!}a#6LH!BO*f)v5M=>b1f9_bTxF@78~Q+jwhB+ZWVQVIQKYJb*N<6x&S#JTSW!&R_@p5O3U+|qJjSqhYzQQ)Ti~ZvtYFCJ=*+fEYv&l$w`?Bcr8Elx4y80 z=Umkk?7JBDDAQ{Fn&xw>b&76#B zWJ9lNzOd>E1IvbKBEzg`GPWz%f(dk*VP@Gvx05)(9Q@rvHz=>(zG^`DVVl;#=EF9X z!C18;Tuc^D0ZAE$@9_3L5C2h>eN=CS<4ECF+^Mk2py*c5zRR8>0pEFV)vx$YT?`){ z$Da}n*X2v;h3g8YyoC=4I8%5MH8l%9pT5i2a;yCCM^RSSP>+(W^ur(JSiKMBx-BLR zx1j3Q>6;Jx5pTr<*X2$j%61@PTMY7TW-%)ygGNwsUDkb`+${5N>Ec;~Icc+S_G*7O zWO83cBGp-%YmzW`+*4+EF$k)XBdakXV z?-QU}u4p7M_Ac9&e66bAgniOW^?+qbPP=BBf08}- z#HSz2x`ZqI)xJNAlcoI(=W+f?k-YJcety#$AD+qLym27c=#_IV$Jlx{G1;1JHa5>H zB-}OLikWP!CEG`%0)g76?Q38eGf+IKv6?2gMOjT#ugUd23fpJzyN22q?fZt>=k0qR zwvTe&fKFb>J6Y?;8uZH=*8uu8z`Yuv^Do$*tt`>70QY$ zlxwPe31RP*Di+`?6*?RiUuq-1KHB-HJAEKDASca_hT|HEsdIbPC;ciVl(>w#_3M=r*dSC##<+E4{wJ~Ct`;Ae5b=m`i{ca_4_7SYfQtRK40|Zu8|L2@6A4WwTKg_ z@k6bfW}j3o;@E3A`s%rBCbK_NPKIQErk*UwesygUP#or|QSX0OyL%-jqe`{cRIPWm z-zolYJRdfFX)Bz%(H3J#-iXCmQa8?F3^;thW8CylQ(8BS`m7a))tzr<(?e=%;nb`( zCyEC#i%e9LSNgiCCfWMj@h6D;xMLMxa%0dd#5HC zzq`9qeFs$7bA6;&eUcVCLgkcp}jaytHo%P9AdXX z(G+?t)n`gpZ{!)i`%Qhrx8Lhb74r*f+i!t?*z_7O+$yhiI(-#z-kRpcg|x{lpUPha zkXyj5Q~EZ5ertoQ{^g?GRiDDe4RUOKHZ%V*J{$;LWTwXIX8Y;y3Zihg`x;q~FZMO^ z9$)EWCd1-q_fmmhVN=#)LHIP^@kXvh|G25=G*A7{S(9JteJ)&hn_tl3TMFRL)-=ds zZcVe}#JA+RaCjI$!_(eSzraINO26nuuh1!$|Bte_fQou;+rE_^nxVU9Xl3XI8M;#i ziy9RWK>_KIt^sCf5NRn<0R`z61_ep!2I-J4VctLL-uJin_q^YCKf_)u_gXCf3!Ibd zJkI01e#gmqPNT`*VYuS0y?Heyt-S$NrQ3TXs*R?5uP;dd+d0!bUh&I^QBzk>=5vI> zzSYY;kqugV)7nnadz;YupuEFtf9}*I4`?EEzx*F$ei#!Jw=mWY=g|C|nVbdU6w z$R3qSI0HeHRSH8+8%&>+`V;Ir>H9XA1*uM3sWIu=r&4p$PU{g)f+&WV@Miwdcxygt zm1u^qQO(fMFV0c_4*?FH0f9Vj;bg==Pli}V0#7tto2b8!#Pi=fB3AYNS#pVa2q9WsCygg zYG}Q+v`{FEh!0n&v4{_6sJe&`cPM%Fr=I_)e~4ubUKjd#2)I-i=Y~#`I%tIJauK-H z1g8Z2#1v--GLwL3s(tMx-W}AXj|{4$ULt$ctsEkxv?_TF?U7c24EfkHmP;VB3xi$QGP;$4Z$`VRhZBT!PyFbD5(GRJ#7);NwZyqBmfFsL{#%LP0* z2WYGBzix=!!C^-=g+YIafP9Q`GSN-+kRP-lpSw8IaZNNJ9}ApAEz2a}YF$7G_$U=` zNPFfcc!Ve5CZwVQ9}2TmnqeT4Gq z1-fbVDuX^JWyL}Z%>osZv%EkjV}H*hQt)Epn-sa0LiLpVZ?yCN&{I z6Py8bAbG9;eULnVzM2( zN3#jUBOk9Pe&!Rn4jwqH^`r#PDbX?)Zwqbt0*^g@1_z{grTHPGN;yziyHo@2r5Uaa zKheRw1UttDtLk7pK&cSU0SKZpSO|rB1uoZyOKNJ?1v|u3#s{w_Q`QEvB%`=M{6ORP z*B(CTyaF#4H8ZXCgB^0z1O9mvbY6>xH3`H*o3TkVSj{JKc#XWxwNPo3K&1rWpWxgy z@HONJO=ORfEcfG=0X-OgFMaPYye6Py1)qXREFO}qlTdiS3p{5 z)FBj+8H~`vXn>9RI5RIq()V8?DFtu(3cy64+>D6tO`#DI~&74|KE23vVEbh&x6U)y)Y0fceV&v)*N1- zv&;gz%a{EH;#v{xn9zF#oF^tWhTprAGeo@PO(iC1?x-ZXZMJ->Fh9_RWegC-;!8-b~B;%8=zXuCd!Bbq> z*iETX^MV%Efdy-b`T3*nsW z6pk2Dhu+d`l`#EO(fgfoPzVHq*G5DNyc|Y)vsDWI-&vOyVq)(z(|2FXZK56m*GSv{ zx`+-E5Gj&6aKu4}+w?GKqqz)=F z>y${F%T`>On&>lR*(i`0q%FeHl}MW_(iS}%0z=`C{t>20LjC^1V@9Zmh zFd*wK-ac2MlCl*ZKZr>im62tHXHJv`HzZdU0}1dJhGu!iK-2^jh_W$p;y=-=D;?&KvXU+rq2Yvs-&M zc-6ZSp+Kmd)<`2>q$VkvflEZyI=t;7T!Iqs*MGr}ATy^}N^AjEwb~vPcfN#nh4-^+ zu|sfCsvJh#jG=AM5u{+jxFXFR6b*N3NFI5o59A(15M82D&e%x~@kX=2omn~=A@rb4 z_!EP`AR;!+()1a#Kx&XASU$s&xp{!-Dn}J^W)x3~vhKO1Mbjac^V|UYIEa54D;Kwh znm{g)L)^L1oQy~aTW64_F*=XilR@ih952e0%acxvI_{I^J@}4noIA=Vd_fFi!F_zW z6N~z(^@9v=TKkDo;1;KJ0_rzVxw(IkY|fcv9R=fS~LIS>RNrx#qE%9;~F%qbJx1<6ZC zF~Jw9<~R{DoR5e`H8GNKH)`kSsNCRmt_At*mqZ)-w(M|1N+#vPv|g)VCL&gq!o=R9 z;6zimv!$rL2(#U(1-M;d z+1tU2nzjPjE5VAowpX(aC~s*kzYo~N_Bmr;`;S{X6R=n}JtauDYEpyOQ1-{8IN$_e zE?!6yN)?U_M)J0)W5nUaMEu+cNlvn0A&xc;j6GbEvOE!WH+Y?QfryhAB%{F>?8f~; zD!Vy2l$uG;76_RJNl?k64dt>2Kv%dv1oq1OLRs>^e94L3!xDZNSVZ2qbI?1hTl@HH z&L&72p$qKI0lYZ8icS0g>KN$c#OT%9! z5y1&of&7zad3hR33%pe$cAA3{NGHBFDUixznrukryPHTf<+wKjJ_z?4=AnTs*Hu)^ zHG6eg#vV`*u81Vl)wOX*bfr|zfVnDj7KE6*lS)2{kKt55#o3& z9C=K6hulD%E8PkN2#;k~xfQr)m$$EJacwMe@BRf-oqk0_Q%U25te+iy9=Rvn;s{xn zY}w?})lLb7U9qB|OoE#+Sy50W>5a_)`34^cbuv~(2h=ipJP*)d^oR^tLc77oSnb&J za8c?U*ST3k2cNr=8VmLpY3`!vxL<~RAYbtT!f^%BZ&hj-SIG0c(QI%pmKDZ4de9o& z%%CNZh*>iwz0|A)3=#uNX26)O28aYH{}GS!w_AS;i? zRC10-uU{(9y&z;8sPb~vQ1^v}UshG6%;vFXpd#REr?di;o$`^v%6i>W=DEMEH_+1E zr(pGa)Yx5AQ!aS{{KlS{Po6)Fwsnb=Eco+@U=T}TCoGZx6vI))S)1WKbL%@%z%2ls zEZ}tbT?(52OytFBz`h24jZ%M4Q`Y8MqF#YIL(yyD6S@oJT!&!KaLl}F2Imf!(cSP> zEmKM(+=w`8S{+j^BlVni>igXGOh)*siBu9V3JG%PD8F;tOBgYSk7*BH%{icc7F-yw z`cj2_#}VVrO`*;fBhTc?vNH?3H?RLbC&C@LoJG*WRL8RDEy`m~bSBlY1X_vmScwY@ zStZDEgch?K9a7S&4N~TS!pdD}YYt(d@JpasT<7ezj<71c%`YIPaRdpbyLe-JWSq|+T zF0Z*D!ev118Bq9Db)TbH*eF|74NR+&5K~B{ni`R&n-EmUsG11At~PilXF~M~r#-FF zTErN9P(SB@>RC!5wdzZ?smq=>7wS=m8f>9=B7FFq>;D0=e13?v1%8R<$S?MvzZ23h z4{~PHruh7wZ#Zir+?o0FBp*c<00X1>yjv)ny&6+YQGB_O%dp6Z3~16M2!M=7Q&Z2v zNTY5zknpG|AY7bSIFvnxv?naCFC@>Fjaelvj#72!tl{lbR;7U4V*3FYPrnKxpg#hL z4UOib&$$!2b3MT++lT!-FJGhTG3)0{qw|eG9f1DNZ)5k6C3kWzRVw{We_acWqzE)F14N+xHPXc`-1EA@%@n<~5#t zB;(OA4|SIOOprigcw+cgz54R^8ws_Bn;M>Y-?b8!3C?$jY4uG)oH`*r3I*q zBMR3G^K%8@*8}Xy1?Y>_3U6fHin6CJ-Y7iCk_}&FC>|)hN;#Rj%%*w>l}%XI{6+0c z_^Q96bcPMssE{GF01gMA)5l+%Zw@V9K))N6IzC`M!~azcjRio{)BrS1)d$qvC!fJU z76fG70!IxJ`ghy(k1U8EXa1$w0skBy3~`#yvB1_h1Y zsnj#>lMi|uvBP~?_8AB1`&^X=sP!n*0&Ew6XhlelNDgCcd*}nEG#v5!whwacuD!n$ zP0?*H&fM#u_MW8MPh6unUDYLqA$&z$*XbWTvdddo`->8Q``KY*C8*@kW^!c%5m&e> z1A)xIn((`->%`W>;M$0hjkZnB1CFwea0R_}{0}et4z7*UKjs_L{2*fc%vK{yBKIY7 zD8P+i%=p7~+ohamxpxEIXvg+#KW0rvxUrA5+8SgjeCZ|s^SE(KVrva*xquN}Yj$WQ zYNR0i4fMd%`gXW4dSp1<_r(ZB_*ufpyYRD&5sUCM+>L!(CuMKmG0G1?z4pCRv674% zW*_2vA7uNIJ_Z3PWkb2Xmmdp`CHFN19NhiDo_)r$A!X~YdMY^9h%<%#_NzHtXp7+* zg0It9yM({ga*!#LX#2`Nq_Xa8Jxp1P9(mlh$#Zb2Od~=;ZQb+3%U(B)al(yDAE<1< z*@k6HWU&SfMY;)$$$$7{`!M^BiX8hJ`VH+5YrT%@wYVGiK7{qMX?PQCsQsyfuin@d zY*@wc;^0M-)N?SNo+kL{7l#b%e*-g)Eunm@-?vEf`_tFwC)oZ2u!`@fN|@4^0qOpK zBOV&tu`5bO*$jD*t4UR=bI7)6-mmHt;&a5|g4~XhPs!S@aD3I6u%DBp{FqpEw{3%W zljtdLgq((an>%-d)OK@Q7&W_|56gCIgcOw$+Cy%8AVQd{A@E41tv1|3^(XP%Ft{-S zz{6~Do^xz>0LRdyU){V3rBrYg-kdsEDh8nGuyJ0^tm2J$&9e#Ifs44UJt5duuQlP_ z=BqnF-{z|`VbFG_?xQj1#C7gA2W^|ysfz>x6b-oi#J8!#dm!gVb0iV|kw>@YGI<+9 zj_4!&qmMpDV4oiuL|`M2ws8M?W;`{$N_tEcRapmI;CqHcQ$ZNS~W8?mM4<3HCLeiN*e;GwXc8xG_~h8(c~18xg_AF}P; z@Iy)ff+Nx91o30uu8mN-_S5L!MIi#bX;edYfOCQQ`Bj{kOp`EI0#}<}^FBSbZBo|EFMrLd)r8Z7z2YcUTdsi@AZ>9d%|LYUHe^II zMIC+_bRW{ilN|s=6dq}s>9r7mE8|K9XU+i9>MGEK;!I91BbEtrqcjdVYD`^Mmf6&k zqc%aViOU&k!@-UsHOam8F`KNe>^Y~Tlf%n?*AA6j{o|hC0@;hbvBBOKv>qZ52!ezF z&>lvJJRGhFGKe$OYjJ^y$CoAq-_-Po4?fg9lK%UFEO%iwPh9(!=X%LB9>S%lUb0TQ zx~eS;^mZ#R!9NmB61zU?dkp``I?3ir*vATh$R_c@vhlq^!41GBIHeYx6bNBdM_KY% ziGs+%4^W2Ma9WUy){_fR(De&Y5D7?54M@v9H<6XQ4=9J$@kcJ?fJ(Lr18PF$ZAte< zs-nODjY_OJ^xv}Kv9O5sRqy2|WoLk}E^n)%Qv#hPU!(e~PeT7Qk{S;f3(aBoG7AmK zjB~ak8|Y8Yms@)+PPPp9y!LW?Z}%D~0}*^YC(e5k%S+jZSr##VWGB(fAG4>U&o7_!q>J`8kx=r z_HOrW!5i7nsP=gKGPyx0mv4^%Y=vzwbI1|Rz#!W;moI}FUB@j=(POmr(v;(In zai0j-7t0*pY#pGz4jjjq3$9+!Lu%;)0Rh6t?wAlxsKK2oVbs##=U|FY7WKy zywu4LDM0_E>>PzC>6gbZJk)fAcv@whVlAt&-!3{(ij2ADT?SdJ<(O=W(8~;5utE{i zR?8+BK@e@LDXAwzq)1nbVVIR7fpBOd!;sYo(uy#&o`Kv-I%bwMGz#L*30#R&hEPDv z^__(}BOuILYzF1g3w98C)E)I52LKp!?^pV6X8)+nGN2n&<$w+;5|}UD8+Lr*QaTqo zb`jBzJzQEoP@CZ)|=W`iIL=;2H=m1*FZCFG2wgl42=I5pP0*1zxBZYPg*tzoO z{k#b{fK&=biH6gV@gK&=>3%jd=DA=Q+2kdlc3hhd{X>9O5Ey@`Z`v$asb^R&m?K$X zZl6X-Z2XZ{KyqZ5^`H$)NMGPn;&v9Km`IXMNwYLXF5$d0@)69jt%_{k5lIMR`{Y49 z?`bTH?tht}m`BP14WyCaZ`Bk*iH6b$@|$RNa=%JRF#ks&1kg|<{yG~@`p=cT#B$wX zUc$K)A}7+hJ+Kq5{HrNm*2FF1l<_PkLiwt?>bmnHqcFWz)8|E`Kw2SL3Z3PS)vahd z-268wZdPNlD@6J6DPPgVMU$EvdiJoQ**x`lCXLl6#(F>+;5Yr%z2}GiuYSEb@pGrG z_QH_o$UY0P6GC-tY@fe%&EOF?7NK@*hC_K2_|84 zD{_Ta*U8$8VxS|Xp=s|*ekSZp-Co1kiEH1@7@9YwQye*Xs;S5IOuUF%vm3JC_xBTJ za-~GP`$Yl5|r~{-0#h3bnX#SqH zM*K4^2za+7UI+)cm}ck5cjVpHk!a*(`^Y3xp=l%+sqkrJH$~2BGp3qi@p1|`?6n9* zt3#7K!F$?W(=N}HD1cgWC(Kjvf$(Bt%COZG(vxtpK83Ux})W|KOX)%CmuI_pr>h zsh?IySm>-Q*viP7F;Xq|J`Ej762F-+Qgq1ZXqV4TCX~yo$I3Q9CEp!BsnU^nr}sH0prc zt}*U_K8d+(BJ}%JA!8w8p<6-}mRikmq2D=$M$Ond=Gt~hfqXrZGNINTfuxV6b+O;6 zg)FR($-nfN`8{lTrdyQ?a)-@2@?cV6DWGjYxAgs%5qaOxl9}Sdc7j^BHO3zFyP+zR zKd9uf@@|mY(U-Y4_fzXoCIy%+m|2%WtVOIDV?TugL4G(w_st&I*fo`3ilX>pFT`Bx z@Tr{Si=U82X?mM;3<#FZOzkDcnF5;C4SxVn2LgUt{Iwqz%w?W8)&Um83~)qDjQ|$J zkH0Ml5n$5F^{6G2DdKvXC z`Wa~Gy4kSJWZ*O2Azw2J8-;*z!}o8N_=~Kl^Gvqq)uuqm{-dc^#I%HY>qGZE z^Sm`J^iP=jrQ#Z*_a^h$InMZ23N)2sE2xJy%}y;@pG)ApW?F-l5C9>Fx61TCrG-^6 ztO=DoGh4FRx4s+kjCH8n%-wP_YM*jw-mJ&69+P)fceOK*gNKaf-?LU=_`>`7JJ2hsaA<8}RZDRj zT-i6mWBH}+fUgqnU|{w7(}8>?orAEoH`)03h+@+Lf91PU-`EX$2mc?v`|_?jO{a}x zzcfoakQ6G7Nl{kkXS|N+g0||lgGFzk%QBEx5bF>dcE+$yY2|iv2iDSOpN2?o`#V^T zB#eYw57;n=bO!=)ek=}RWr;Sgn>p!O=a>g z4RIaN5Q)HiNdg$5t`GUWFmbv6`0Hzcm5S|g_B_p`{g3eg@QMHeUf(_ILSgUak&9{T ze^n3{GMD;$q_Vas86t*rKzq?tbQdGU<3e|l7|J`Kv#2dPjIK`YVnu91cPUdlI^|Us z)zz`h;~+bpAwkz3coL z(1k@G^ByplPDj?WA9$B)Sg{8xTt3xYpD%rA>5u2Fc)(kB)5f3NTkPP~h(+@`&%t4- zKk|%ZecBoeI&}gxgv77uZf7Tpettw(1Y)c+UH<`9DlOtG=YZNGm*`E@FFcSVuM0&g zVW+(MqMfL)Q(hyN(8Y zfWLBD%HHmWb87xS>gj4gPiH<`nKmrnLqv0Zfe+D?J7dzcZpfHRAiBA~s0=xm51}g1 zC65ssJZqQCH9BS0hCEqbK5^~nBkZ`uLM1&2ut{Ju3_-1tHUcs7ET5zw%oetau!qri zXtS(6acD2Y?YPGh22{gu+n66yvuB_)go}8EN>m?GLEMkump0cg&)|HGE(t+g;aTdv5trc>U)z4ox&uj46e%!{WJ0rQS5qAWpA+vYteH3VW@St#Z)pSJO0v{`!Q zY+K40I&`G(b!?NzP(1YsonHgy$PB;DxbmxKQBXm&5V!e!XE|CTx&#M6*re&*z%?fU zl2I$tO2`n8p^xazPdjDd0N&>7>?I)5ul_0CJW&T#G&gz26BZs8>Zc=5bvulLb8$LY zSgx`}Jt=7_qwC;d8GpLo{M7IwE=cLsdzT-;tabTdEL{Z<$sRhutd;Zcj!6*cn85$) zn2Ha;+>7O5F;X8h8ANU-F$~Y~T4BL_f4u1nRSo>be;L$o(SHnF>hHzM+U2KltlDmn zQ4z!%hE2IL6^IIz!(=GC@sD0d^gze;4#4F%(e)YYR}fneZ}!o!PDSMdb7xu9Kw|nm zASRL*$pV#8>FkRai)0~@F~M|lKID=4zA%Uqi)EpZ`O;~w@>Sir692b=zcAv~m8V@oTql>Yn^@p>o>|RcuvX6r=h3Zow9Kfo`L{?-g1O7*ZWG~Tp9U~ z$Wd49%`a!~&Leh~bx!@SF1T@jl&WrC2#sgfb7ZM*?UX>vq3_654|HDP`WQGN10YuJ zs%{cH569I;Ol&M{avpM2cVHCsHt~04y`I$h#F2G?B&EMxoMIv2m1wX;5AV!SgEot2=6g3gV+hX9?0W6NvDkrP0mz%k6K8KHHjojN>DLZ0!M zFI-z9JK7lUv^uYydmQf)>ymLNvETmK)Xs9K@x+OH3kSSX$O#?)*6fAVPFA(IArt%y zix>12XXi(lml|)wCcZDc<#gw+9>+}TZC-Ycjq{EK2s6*P-pP6TVwkVp!}GM?2}M8T za3b}*v6CN{tZ`4o^F8J@x09mhy`39>t#8<$uV7z~+qo3eUK|ypjxS8!ROLusq|0H? zWNOZOg8r;-{957w_BXIIf8;gW|4Lr-u>NLFg30-){<*#sj8#AqSuDC-H?;rT{J&kKtmLPsT z-?D)+1HLwKsBu<-B@I3ualRo+rCFEwGk;;WWyZfW#f9)yGI|Rva`a4ecj0!)f9^8r zk~gs8C9X9Y32V7bqi$H3|AZlgfVjiZ_SF-C5KkI96X~cHS>h#=*Oo~QpC%Dd6@oxT0$VaE#~`sr z#G-rG&(zdL6A#u9DH9LE5O)*4+$XS*Z5l;QD}j&|8bw{Jt04x4W?HlFX(WxRQd*KP z*emXQKT$0`d_U7#*!h04hxi-2;;MZ#9*oWL25{lFL~0i6DqUH#IZ&mX)Po)&+-ci)qMGhU-8A|FaYqC=q!wGA0m>4raP<9^snOs z0~{YI!0}OhAsD8BR_fWGR_gRUP##Qw+Y^9ML0X7?_s6%cd-K2cts{p`>O$sT(g3s( z*e6~-MI9|d?Hc3$sJQclV56Ko1RvB(TiwVy8$k?}N$ASUOOIpHUNoA^M#w-P@r`PB zN@}~AI6sfeO-RpKkbm`(Z$saXU7OI5Nx3Mk-ztHLk5#28vA^g)&NZK3(40H}dH7rp z|9R*fJO6p&Tq9pKI&Pe=Itt7WB?dSqK-v~D)Gk3@RZ9{YYciL&z^Ki{Z?4%Xs(tU( z!WHeQf(2^rhP(wiZNK~lckOq%dk?g?vi2Oc?uS>XgyW{=odIo}VmVJiXsmd!CO~zbK+VB4O;GW7*#d znEJa2pfAeR65eDvT)F>(Z(M(r?K8Qt8?t;L;cn9?<3oyc->0E~Kg1Yh66S)Je8X)C zkxrg2_9SVpq(z1vh_?l&zkc$zqZ)rnKmGIxTe~ODlKE#j!>PobdkKdH@pP7X4QdpCe4i ziqiG(Qzg=xe$DPz`_3hB)37?}v!1DAfip!SVUfK0cRK-L*lV%(Ua;4~@8PMmoh2#% zF|iGe^gg+mikp-rMBXg)h>tuhJW7cKC@M*jnT1EuKx)>M>h}#(?ZUcbZQ8^`qg2)J zwgS9{saoH2P3eq}X%Z_;6%Fo_BnlfJGbK8i9A8e%G(M(HoHjWoN%V)Uc|DkHDUnO1 zZ7EUi>c8+})_?6(rAG<~EE)qrS|Hljg=Pky=mgx1%VHy2P1CdHgc4Z`*R}5p3!IuT zs=tc+oNc;V#G>{pQfGxGCp*J@kI zX4h(K$>pv{%cNWXSONU4Ofw5n7Gv1gJB0%I_H@^unRXWdNm|;)Lq-*;#jK_quU09q zcNEF0d{Zl~HJprJZ|vS!t^Y*SgEKXVD)yNQdy8EEw9?@Fd_qC$6^zToj{#uMZG{FAacybn!_9(2fkASFRqD5KrYZ~xU~JYsBf@F3gmQU%puaS^Nfn}9Kq z!CL@x1jxhsH25{AGG^>}(5vXK)`qOTlu2#SSp6Vd&Vh8r!@_z@`qjT>F@K>}?joms zd68#26R$*%;ecj0+&;-@SK#02Ta6~(tQArw-h>tICW5&D=45+NQS-4t#mb=pi^OQAqvG6DO)fbwzrowe4m(hcW}L6kw}8KNJird5HD6KRna^cYQn7dn5Ck%NWw@C z4RB%xE;uoPV={v^g?Cjqi0y}sYa_-s1~;=0I4U{{74$an7hUQ8HJf6e_Xplie8L;p zXR@p6h7a={MW2zcJ}|tgeV6u`OOcDo$KaalgKr8PNuJ3SjcZI=t?C=PC!LTFy1#0m z-#;s&$i1g^W(W{gVsob8XEduWg_&BuGW!yPJ-JgV7vK@YM)b+!!L9uBOZyr{O0>W) zbKr>=qrb_sL8ysTQjON&eWOE$XV5_fy}Js}gbjzaC%+8VD^J!A`l?PY4f?7~#txoo zPg)F4Yo0wGoK~w(@WS7}waT8Q7;+-M%ATY6;-sU<-55){A3Z3X=f}AJ&KOI#zh#02 z?OPZF@D(M(I&8<$#f&)KL<%X3`^@Gdb1bn&el@b_h1y-#XS+qa#vkKqo((?EcO-n~ zT_mSE`E>QGp*#BI>fm9XV)#kG;FqG+QIr@XaYI10Xx2&?7&bN@hV$0)mCDrq5jf{&<*kR>{$-7 z4qIuxR(vM8fsvs02Jz5-1}z)XXjvwWUfcYqqS53`4|~nDZD^xi>|6(`g4?f)zb$x5 z0)SN}=z*oH@Lx_FG1KNNWwlmcaY!F5Efge<=>Ne-79uXMrgDX>XP_6}8YNz*-a0Ps zbFH;N+()hTQ2Zynm0kRv!Tc2fZDKv58O)Q2 zSN0!?MmzN$U5#e%JM#Q{U%k_gD0jAh`Y|7w7n6FQWwsSDnT&W>UUqsY6VqbfOtzH_ z*(2K#%~nY!w}B_mLvzK`b7tkEU)rwg+pq)N7zX9sw00|TCR-Mj+{E@G@x?wf-FYFV zHQRK9+~oE|@wd58#iHeGCj|cy=-Yc{!3{XjyaG8ueZ0-63VcG=2JN7Y5~NC|8uqv& zd1v4W?TkRH1oKVYxA;{)RX66xo<9FIJX+M#&*Z3Ds)D>Ndi19 zeTT7%tvNAvP3z9Ls*Kie$VdEdZ$7DySW~#eK7WEU{ISZfWuIA()vhKYf7~vv*Hu_g zAZ;_3KO(70vO6;0phZ}Pp>ydzTO zTBSZCI-iF85&!@qjPG2pTAXv9zZLk1_U(RE<;+;bBlfqgRr)isIwpi^HT{E8`RC#x zeR+9ye0pAK>V1f8J7PT<@$S5$^n9kY#lD4XI~l!4wqtxJ*uWov;q8e91c%ke2N{VA z!+UBHP~^G)pNiby$2)?vaXmXX@2m~=Ukz^sFNU|YL4^FCv+|qzRkib*K>!?rXu^J? z^$9ZohwhQPE+y$o_z|v^A5Ol}(rW@?Wi{Bjh$dlC%jjDqAt1Wi}yvZHuH`S_c&fJUgrrzABI-F4qKV{e)s1oj-NL^$T zKYS&Vu&9}4V>{F*ANWA5JKoN%S3YRbIPJA$9Ye(1KEJTD;aagZnTpqtsAHIIIXoAJiOZFV$vi6|r9 z<7*$zl}XzbJssGU*$=7%U=4t!qx(TO@00!)*ISXcMOpV_{`h(iTa?cH@BY~tR~dJ5 zDgMtU)JLp2_rGktEvMf*<1B!!$M;_~P!~Jg5yagb(5R2^Jt(3m%8wYL4Bj8%Onvq6 zp4o_!qH%&uG1SS{asNo$)^A^HlEZc4pAx8Z*{<2X+FVP#vanGdXf&^g&`JJPXR zZX3~MT5o&Sz5L)-Q**E2SG?w4MN2wmwgfLg&S1UxfF>ILS6}`OqFM z&bZ9p4tBzCs5p|G9glIMXb3urnU#y)!ucZmzJ&10Nx4wMeY8-NRcg$AZnFmuMi>6< zrvuxvNewOiQsyUYI!X^#=!MRbUMZFBN%^pDPX=$f`G9>6=S*g6DJ;MZeMdYqU)m1& z8sI(#v#&oL$~VyY2+w(wO^+WbHXZUey!+uByG8HgU%Kv%{ zP%>s&*VUgFsg~_$$rslV=k}#1j?VAY$I$oe%D!77n0d{4WDjXz;yv@{JU5{s+5B06 z&0KubZCAD2yrsVNSikjEg^aMy#q}lrDWwAK7baur{fh~b1s`O9ER*TkD4L|Z5j4hy#e4gKdu+qKxz|dhUVgoetw&UP@L~&&4w6S@2rO_A0TIn17 zIMZg_&EZR14*#YuK9=#xz&0ELRz#`QMU^*G=`PowOd#^Jl{587%!cq7=PDtKs+1V1q#8$#R5Fh{mu_6Dyl#2>N+s(jNy5sPAx4*NZZfo5Ap^?j zk^C_7;HP}ZTuZ=BqhWa1B2Cc_g}n`a%8GnpCEi>`*e=fyWhGB!qk!zNbhY*-bhv5l zE#(l~b|CFw&~o56qNsV`|LbSX8C-<~C|B_c@6PRRZG@aD`EvFrbNWj3OL6!L_IGPe zNA2O{C3Bup_A{gXSXQp4$hPDK7Rl-M!JsV(qyv{yLYu_F`A=bbp>_niE>^cl2Z&RK zT0VgCSz+duV}t-;HW9`e+DR|~N|_iLWt0mldr#=rZe1`0+_@dyU7c1;AzvXuolEvB zT{|hz#1gMCYGUbDjy18M6$wo&>&g{PB{FI&;^38ADLIxkARb)ZkIk-9x)e=&e7XkB zd-A&Pn)V2Ew_5h7bTeB&u;%NgGz1k<=4Vgm0^7y`d6(0TUkn&+b`bts8RTVrpD8KufFN_G-vvWJv4v4e{RnESl`a&PsQvN zVf_m}wrMsjHw%CDwuV^Tvc41X>vof-_1_;@i{Hh<`IZs@Z28J(o)3JG)dP#>bJ*~R zE^^v{L~n4}u!>4@w+15>Fc=ke5m9okTsFoI)NF7yZkKjSZ8HnVg{4aib{OPI+0|;r zhP$hpQqU~Vychx#4)T=fVz!(je$TjA52J6oL$J%4!fd5T{+`6by=_>)XA+=d`?L2H zU=NyIF&<2e!W{G05p@yBL)5m2Xu~yiSqWWC(@?Do<7Pu~W zT~aM2LC16{TfxVQDeR%gpp?vzV`vI}(@)7R_NJfwT^7yhmv=wHmRf&OcS*pou+2TTeQ$%sjbLVm)S1t2RhBf&^kOeu#AyJ#fJC<57vDdn8U6PR zyk}iBIbu4hA!#5_Jp1b>eU%%CAED`a?Vl#O(sG_DgOh55mI0*>pk)YCqfpf>sgF?A zRH^ESP4x%lq7)qOaZ)QV@{ziPqCG7+&tC}&&6c(`^^+vgm=Uyy(j*y`BE#+z3yE2! z5`F=j-DoK%PwFmBu^h$KVH5gP$`bsFEy<=-dRve1&qGfwWf4=cT^gukh-jwD;unk$ zYw8xqt_s30wCgeECsXPu=I7r2IO3|xqCLX#+TsmF8oHW36(1v{vdD>$MG}{pfRzks6j>5=Y1J#NaGQUm zb@SxK%~Y$4mY8GG zVoP3OZiSTWW5xnY?qbTh9x0V;qpwGoaAV53*Jw(fSCEusmmUO5T)v@Da%RO!x<+Ky z(IN{PS}FY+@QkYDi`8A+HFdM-(#=o_!Xez!8%?)J){KDiEXpvzZUn8M5+h6GP|t+R z+2BL-7%u{IVXid^M14rfA$lKF-k>_9kGRjd=8Z_?SffWsaI6_1Dw9cWcZ29QM+``QyO1+F~?vTIdptT`WD(`ZtXlXXTRJ_nDhylHx)vrQW?cqzFL?bs=3dZxG)9ukTdLdz<$GzA7V}o^ zzz89S@}-j@UT{^@mv**B4Uow2V;IBJvoVYz>D!pp!1Pp1YFIiHV;+(I3^R&kp>{st z;H8lH1iZiJl+?;*GJtWJO5Fz;6IY@bnWHp+vrE?oA(yzI6jhAhoQ#9C+0EFINx#am}1sq z>TDOFT*8c=xF{(+Pj0jnxo13D3fI#G{gJ^_5cQGH^9AZ7t|uK(I~ zo6(s1A3e8G-n`CRDx0ksU!3Y5u0!a;)oYvMn6pdOnOuj03lBIBZ!auzAM!2?aU8lY zRB{}O?}XP~Teurl$G0#UERVb6s4_K)@xJ64%eBS7pb%Sk6Lm8|LU%;iq&Jk0G*;_S{j z9651@xuWJM;T+2`95Z2p!QH9h5T@}|Kqo(!SJ*k>c7pAwb2tg@@NhcG?ksWuz=9-h zC$b$$uAcC^$@0H{;`afzYUYCHzQF*G#R&FLt`sY@LyOsp{P|>|0|=$rF+l-0h*i)^ z#K-e!CH!MCv=YU!4yqA+yoPFIJmy*0Qubrpi9-8R9`9q6LVb9NWwWkD1UmACt17b) z#~Jog;0GDPSnzEOVO017hA?J)5~EU5{MSaM%|Kt)`MWPW#kQP_VGcFUzhNj;&dD&t z9E}t^P8#P5|A(^gj%sS(x>ZnW0O<);dH@kZk={cnQWA<4r9;353MgF&1Oid2Qlusb z0ToaI5k*Rnru3kwbdVAekY0qmjULZE_m20y`#$0@|75U9ve$2|IoDcqwgJC3QH13Mp@~H7%Ya5MjYp#vPu=TQy#<}ItMq}IRY}3d6T!AtfG^#=43>xo1dkV(* zc!qFMJIEy^mRy6z=g?cshgp$JCs_0h8jAr#A@khRKt)kG7$LNCp=~iPly~`+gucKQ zuFVFqcVLRcaL;Ti(Rj~&s@q`CaY_-f$1+uJvIo3dgExn_c9NW;e_Bk5BHplV$s{cz z483>`5%U>YTv7QZhE6<%1M+oK_` zUFjOx4~g}W>G?44D_!q^fykX3&o;Wl3iq?kUlp1jAjRA~teH2cx0CroWUsmSyt-hG1M$->y_#R@M*YIVDF>vAM!4fKDSh$fSQ@~`7 z9WlO~T!`nm(fD~U=Gai2j4Hb@A%wIC*3CA{Ff&LVT`2_uWFy<@t zCvS`u^e1bK7ji0*@CXI`<$}mULzF1b@LfU(a`kAuh;eT}H29!NIbqi7UzeU}Fcc!S zM!IQ?!Ce@6fA@poa${O@jRLTYDQ(&IJ!UAP`Cq)AKlsLs>5!rVf%4Xti zlO~;SpV+qt6~^~*LWL9h%%M38D|_V|zY>7)kDJir@E_gKGUFej(5ogtG}HWz%#T=< z8RG<`)QOV3QaAIbE~e>3r?jG$K^7O`?U$fItWt}_DekmTTuKhK-Kc#7Dr?xT2=z8- z&x3j!mxx-F8+kKXl$(0%ScDK)Po<5-^zERwnJn_)b2otC2?nz5j`KO5nf%QI@$yK= zgb5-vFn{xy1rKyCKE*e!IlqwK!WOA~M9LVY%px_3Qa&uDf>P#?I*EM7CG`#Y3?e0+ zHg9MyViAwJene^ws)^;2vCz)9$Ms!>-hq1yS)?LAb3DW#wS*qJA+!OAnBkGYpUv3sMmQu9}>wjVwH#e1~65iaA1UQFIdi0iDP z0<5h0mx_rb?9T)Et$%%ur{lUeA~5^)+6jl$=x0x$D#Xi*9W2OG*k{JjOYl#AX)lo@ zoL}YAZkh(Ec5tI|@yiFHqEXAqP|?`sc4%JY@=K@_ewi2Ql(1|C9gV!s+Objan(6C3 zs6HMLC9ugKx)a|;VnUd3*f$f565`*G%#7bE+2hx@_O303ZUZne_E|ade&n-jsQZb} z@{p%6&qh%nqMr3bWjg3l;R+o@WH@&R8?>J7>rLX%gK7Hl%f-k&oen3%EkS4m|5wq1 zpEuI3nFdLB)EfthbwnV7gga;kLGm5l1q5!nD5RgT9EDT^;VmK6(DQB(Cd7caVfdBr zDxqJZo?U=GCtTL=ut1%<^GpKzoH){M{ zkib&^DB6I#s2woiz?EJl z#lszhN>h*n91a+ynvjDVQjOE0AE_qjz>2B@IXpsD9dtO4@<0q+wCBw)1v$hb2jWU6 zN!Q1nO@-{ooo@=^TN>d)mMx73|GF*!CP|?j_e6=ytoB93WkLIC!m_FTse)y7`xC@v zNx5#K+yRG=`Eqm)>I93}((}jwnbHO00Wv9=&Y_z?m9keb82CckKH!i?pvv2O<99xH|?wPsjBQ;jP!m>=Y;T0jSqZ#Zev~m{w*>g7k7a(l5qX7+(*K7Zn>NJ zeNi1@q+>=NQgR%`K5Pd!$)8xNQW}LCpm%sm>LZjYmi8glKn_Hd+Cc|9lp3qU3aX07 z;SsV*#Nj-$iq+wi{dZFb+0s%}z#)e~`l| zixVjMcTS-A|H296{pJM96E^^Ept8A!>(?G{a&yw3$KK!Nw-}&^y7P-c^UspB?>1Z$ zw^uY>6R^Jqui_kdh04KD=}Bb<0ScvwgwK)G=Ok2qk-Se6iW`%4kz{VzX+Qy)bV4b3 z(@q}Bdw8b~<(Ofo1?91Erza%?`J7QLh$M^XG^9|7(_o)$4a&jTGz|({thgT(kd6F9 z3y{Jg1LeeRVf%7 zzMyV!_=vze-AF+KYKr;O4wTn`9Sd>==JUqDO3UYsq1ADpaJUt(Pq?v_ijOs6T8I7c zSdW?pPo%N>b|?*KS*-4tYy;0xbw>u?A?pMN1d(;T1D^JaCIJUa-yy@82JVpb?)vci zXb})GSqmf_!b;pH1*LL8%^0b|r8bFFfvBklK8gllQ0jC6ZYXu;fPR!ZV?YA(g;2mH z3G2U%b*qISEKZ+-IPT=B2)lSWx)8UhA9QXVmI} z=y^HYSY8f9&&&TFJ$69p)bC%XW=;}~+D;~k#%{Nh@*=lilAQ3{ydqG+C7p!K3I@ylRt6?+l6LA7;~sL6ATluZ_sAOWF=KRf_zBT(u90W^}Q+CR`szF zIau|wA^EuKncfOfwufjEg3PHp*z4Qz7d-(`J(STq+l)vWlnPkwtdJrOuU*UEyFiJE z-tI=#3$JC8XzXh-BpPHbfJCETdqUEW#ag48jK~3KByrqRwU98*t~!)}jqO>|5Xdzo zC!lY!v0W<|4^s_|!YcMwaI>97Z;G%sHXYh$EY#| z;ab$0{BTZcIx(D$s!kY*+a4x$8}1&YoX&s4w`Pi3G}t|>3hn9PWgADw85%397UsJg z6~Gc)cm!IAE-V6`1Q!v3`}rF+98c{zY) zGZ1`nx8Hx8bthsnral}k6x}k87Q(dP(An6QPiV)u77w)JofbCqP&`6KV1w}Vz#R{? zPIL=GRm7CYY9-uTXk>R&)r1I0uD7EFZWoE61#m@v=q!8@1f7*o^cHDVZq5}BPRWEM7I(+9Q;fJ289==7UXY-hVlhGud>YF<)96g(| z0`*323mzEi~5vkJ}1HR@kM z2OqY&mOpnvH7vTQySGx&oYVD}PI9QB`F(o9blDFBjP#43 zD>sU$$4fecg7#%3K7`ONC++ub{wmx6*X@Xg^R8&c;H6gsA;9y0FWYkV%eJn3^C$(O zYVcri<9)g>27}n96D-v~Dy$-}NMBxYK2P_>V(>#_HRJ^M5OhK& zOFRE8Zu%M<=F7N2t@SM~Abszeh)~lePCXsPj?p>w}%=doLqaE~@ea zJ)@67zq%+G1yHr1bAQ7J#_BIFuNbQtT|z+B#V#QSt53Uxa93|Q?|`cnolnMo7aY9q zvcp*|HyDn20;x`|$T7WfAgkQ)#-Xfh@aOEEFY>pnRYzl|V|#xd%E~iYIf=e_d-_H1 z&x2X|CM&1WT?Q*J&|QFzYx&BrT9Nhq%|U|8$!G;g^}!0Z$SabUC!HUFzJLaSeM4|H zZ^e_CE8Lg0ou4;Ub5x5D%DHHR9Uf^;^b7b2(6s9x$F;_n2+jP>G1*M$Z-8ZI6TQJ6P%x!DlTr@&I_Z*o%O zoNG@XtAZ>V5_i7<4T-t`1DzRlKNFo9dtVBD7I!}Y{Rz*P3Y_}DHTU~u^WhavheMd= zKaxc_Laq&7i`hLsxEQ<3IS6zsGzXoqy8(lvk-Nr&`j{}mc_(r%+YU!XI(bii-l~Fj zcn4CsN&Mc0&$8;diw5r=p@X;;77M<*Rfb01_Xh}R z$=?uXP`jZlx=wIDxuUf(e4JJxI*)M{W#0i0$~5V*t$(Uej1Gg$KXMs8uybm#vtlu( z?!db8AV=c}(~jTZ1@v4TAMd$ZqLhFF4sDKjcc2nv_|B{nZ~V@^^1Z=3$I4@fcPy2U zP2NdY&L{mzHM>Bn`TC^u(++{LJ`$+Z>J)!>Zz3YB*Y{o~{v9a45e{aAOJWXq<-rkd ziMjvY>RkN3Ppy3;nm0L}_!&SK@p60?WU3^jI~EsX%xdbS(KiNW0EW(_8(_S5&-%WP zb(0l(Gs$*&MCEqDnp`C|e=WGOg;=G2 z?wZNB3+Ou0CyDuM=9PM}AJTibwa$$iev1XNOi0M@PN$RUk97KooEYPSrl&QU#^WN1 z{6ez6plCO!5OEC5(aNLY*#zUbE9Lt*+D%yK!MIZ6Q^vbG8u8;AOs2X*tz*7~D*AKx z(BH8i%qm+7)R>>__GmFZA@^!AJmK!uVtewuxAw@BcRjV|{=Lz;whwPEbj0k^+#%K? zP3?$mox7>WHME;bqMmDK_xegVWrAi7Xv8*Xa&`InT4Qf8WZxKf1kJE$d~ABj+$HRL zZfuCri!=M4uL!oEVTM^_82t@j?OeHLaQ@imO7ypcYSl_M<1gq+D}yigm7PXkLi08v z>V8Ly^^+ki#=pz5-*f*}7hhhaTuM9S>#cni&s0+@%go z_ID+Y{S@l@?(5mKC7d1Oy9e?T&W>pK#4@8<8DMn&$mcTS2Le~ri3+?|Zsr5pS-R2H zt-b4@&llleF8zxS$UgSA-6vZoDm5@$=djG!Je?ObTX)>@wjGC8ga#v7ldkKjuXWT7 zt?V0Pj)!KhYkX{Y$!RT-{c8MuYzU~^f*966YHup*#LTMc&-kZ4*0u!^LA&({_id)Fc)C6ON)Ii zh1~-}Mf>&tZYn^djWCQ=VE+iN zk#16BEBoZD5Pb>a_-Op|fsdzZ-ur%!d32ymxrV)Qfa#-OjiW|CrQ)op1bC&#a|rJJSUgE$5#BnZ(GtjFIN6Q*`=}g+usm1 z{=UO>A9H+%89$PJbq{`g4ydGmq^lXhext9^i}@yBqdd0FR~A|m9`%R;@T~HQ{HEG< z#EZ^v`dl`yDf5AE%WahxO&~ z;@IfBc3YnBxPoKYxCQ1C*pWD%b>O3Yjc22pD6q@)Bc75+PIk0`? zteN-CX@ZPd@@F%@&CJzEy{!_QJqH+l`gtDe!oSTl{|^EhXqDUa`zX%SW=>3GbIt@# zI z?)jEuLR5yNYc?8k7;nOc7{|+Ecfdo|*^9SVEV37|E6Ez7u`A{pqZ~7g<3D|YL*D&& zF_ygpnyDTi`lyk9=Z5~6qh{A`{0%FO4gRj2$SusLr`tOYHKWlxx;6UP9oCxu@jZqG$C`-fS1b#oV^^^|l0!3odIx688@HIU>p5ov zG$L5LMsX_}{~`h=h8VKH#NY7D{*rLxu*PZZjac6@&Y8QD*{ln9eIJc2u-{Z2!uiUO zbpf|5UqC8MXef0&_uz(O4L`Y#ZlTKeT2u(*5MRwky1Vpc7^b@I7)=joK6;O!%u=uZN@I+xPm)v%*bhy^2R= z>EQh0X|v{%M}i6Zune#vUP_v?AE((Y?yC6|J7@;R4a3>}Zj+As;E2%}jgxY1W$qdf z<8^U6z?FOB$FBm&wEo5HhTg7T!D#yG7xVgK+hI2KT-&=w^$gp)=Jh9<5^Lg_!~n%v z+(`42<_pcc%@WP2%{Q9gHGgZ?ZjNpCYCbV>N&LZPBvd9`Dl4u&PNh&kNDYD*G4N^A z`-{&2T5AtSxwfAaj%saRDaylzhs>s%*1K*C7dq+g-tFx+_dl8jR~Ju-lSqjRZ$9d( z0mEjPSxR_vv*J9OeO&3E-c#ZZ!O;=$8gNyvR%ZTB$@a;^?P%p>%lrRBn}1Osvv*^z zK4VXQw*LO!gV}n_9^dvNF@kefel8+o@5FXqe*J^^F8f1vy_onj`!3E8#qY$s*yD>_ z>3Ho*GEClv%H6kp8Xv!Af4RVwpZAp9x*f{GL=9L_s<88m&uOr}Fesar6Z|g}-%AVH zt~a8o&(eJw-)tE1L5%k2nLn$b4RlBTsyGo+o`&La}Ic@6CyO=Gg+AC#CqoM4w* zcsL;@CsI5Ck-Jzpp(Zy%m>x0p{mE5~E+|4=+T9xNA( zN;Z0HAO{$C#K-@zw=8g#=3OD3F?wqu_W|`ZlZBf%gw#ePWW;0aDRzE%78&gTXBjE& z0JmZ@ps)BNk8oyb78qlGI`TQYWIs_%)&V&F?~Y5#Mt9xcoG~y(26S9RfsTvp{wu%h z!fcQK_q*O6;89htT(jb$Z&7j zA(-K^OT%%7GmwhBgAYL@&jwD0L5Od>1@hCrb$5turoqG9jy-$!qU(q5xs_zYzxvC~ zAYaD)SrkLa-L%Uz23E!7Nsk{cL+`Q6buY`s%>O;72idBj$?RVHR(}h~6I38iMAAbv z(jtjqjoe5(utsX+3b-aZQXW!sH}U}l?K&pHFb%87h-8GE=>~+z{cQ&#<)j>jN%4hp zcS|Kpw^3?wrQ`N@3Vg&J`bk+<14gCyQB_%`Z1z_kc32KvEM+0d5rJj1JNCjGel(ly z(GL+n_6NxBM!ry45@FD5YiI>;s?s|-k39l~rbaHpmfRS^8yox?!pXn@(_UMH9K&8q zgC|3-`&dvZ9jO*u%2ax_(86>;!G73$Ak1FZa)8@j_av}trjSp)QmU6vwJUu?ki(Q- zE9oev%9Q$$wsT7PL^o)Kv@5&(<$K>T-Vlx3k;Y(+l*mc2MrNc6SR*|$0a6ncc?nX3 zi_`|AUB>hn_+S+&k+Bf8+ZZ!LZ3)%VLCHav6#vlTq=QpwA4vc)fG(9rs@*F+3oO?8 z$T~bCowFI>D-A?>JoM3bcxdln7Z5G?>JMn22S9rbE#iTS`J-1rvdDOG!pk<~%MY&4 z+3EXocV2ZH-323uzB_{G2HR!kz}y)>Jeg4aP7Iy@WiDD!2p3&J}}e-X*J}tFbpWq zLNP)ht#{6cF|U+0vKwKbg|}lo3>@xDNQOk$rCSU(E=!gS-EK=<4CU@iS`61bD{ze= z;Oy2hR|e75zcF-c{w;is+62Q5m03;(-S+$$@TxNAqF0YLXgkR;4~7=RyzxYy zz=S&^H8J5HNN&uYYY&8BM`hE8Ar`jDz;FpX6x;YhWmAMfziq4){8JU<5W;easJ( z`+f`p%6&KX7Id+3tO_*JJSGgPR=OpMNdP;_{(H}5zwNDhWf_>hl*|y*1#`VSqF}tO zEO|$a3|7t(;|43=6MGI;&J#Ncc@{7B4e~5T>=oG6qb(eprd*gMW(s!oYLmrA6mN=K45_`0I8h-=#6R1G3C){Kmb_8tV?53c;o+P4Qz@ zm8NX4l`u+T!D*Okbiqdz)!c$QNU*%|qU-D~C@i|bMtM~e`{sSmN60q2vA+AP7ihSp zryP>?{r70YHPQJdn(b+9w2CBFtPY&gloSbqbR?lbknSW|P-c5l5a?`E(ml}G)}&4l z4Cc;ZybQK|pM(LGc(uW>8j#PC1s2#qH?*YjBsi9Qob1$J4P?5TA5IGIo7@?r7^<;&dUK&AIh6=5KG*E3RN zc9r)n72Kd3Dz8<@7{~}_{3+<8@@qwMKUfbxPK4-1j@v=>62?~`Z_>sef#2kgpWiRR z)_uh7EoHddU%=D6LG!7{>rYGEHi!d1w2kZ?xwqQ|B&C?b2jTj{3>w!UTQ z1$a-7tnad!{x3l9v0(?IC)X++OLNhM+;33QC2y--Rw5&mE*~RH!!B!)rqlJBcr zW+R_ae&1Fx40`7>co6HL^!UDuDP-Dt@GMpyJaTV*9F*vCOA%YF6nc~#s~pNhZdD0o zA$uu@ijdQlLZ!$bVWH3enSox@zU|L0e@+8fb5dWmY~ruT7mc&IZRBEb$LBy+Yx#$DY_5vAZy zImx|vOLuo=2}Y&sOcJKb*M%fxRIW2f%qU-nNt_14XaBmuP~YIB`a0S(F?HTAm-8UL z2FuBbuSw?ALJT#f93&{Q=s87?4^2z9fh`9i-ZZtTgSX0lPc{H~@4hbk z^VmDO))lc2ICCJJ@8d-9aURk4Il~~yEpaY*aAzDdKBFVBzJvt?)fp#&uTYubk!Wl?jt6Gi`~yZY-RI9ykK5s5CVaDgB&%?*8*z^da}3&y>%9 zYR-*#Is-v$sZf0oF)%6v!W%pgi+Z6#6+!5>ky^oPs>F-Vlt=jBRMaAj>V^nwB>5x4 z$RtO^ZW~DsvD-rOMC7K+X4tvrELV_8#s)Wxahtbh7Zr^wNctWi>L<%G)KpN zH7xJzvJO7S>vBBxk_Zv;vf(FcDOji(L?aUN>fm=5DzyBGvXy=IJ37AztA{)25|{8oZjn=7ij@B`*Ry+ zOYph>Q@daEc>?TI%ew2C7U2%f3rX239werfpg(QQuA19PDF}VY&l9>^F+GPn9kb zw!?vS{%!NpJmR)TsZ;T`aH)Rbwo2&>)UMjVCwrPffaXBG?J#`J$v)5JvA$cm=Vh?l zHRs3Jr(xg_ZMQ|2c?>>m-{I@`w!wXvbj!SprHETR%UiI9(K;z(P+%qaLJ)SV!{B_@z(zbbqsX8^NRYtQ@}6o`#k`^!1(dCe0Ou?oRovy z%^EG^qwVner2~l5oqoJ{j!#@=9+HE ziyXg9{LWp!82nDAUjTjw1sz8M6pIi9N`wsyp@eR?xST( zBot|@^-?5lQ!P6W_+XJLDOr9L%OXz(wHqa8Ejw9!KG;6TWeHQxkUp7OajM;;2o(YP z1T^wu1<_T_n=ZRv=pYb?|<)(NQZ$t}v)A+OBY_<=H*Y$+Gl$ zLo7*;#6o z%Y`jwomYLXlQa#i4y#pBgdVmScAj0gtvG#poyDshyiWbID3iOtDARuafyx>XFfgDz zD@g~8Pmb*mG0OzehR**-yqE*=avg{l^dj*5-_cER@cvd=@TC+|j?s5ZpBdDvXh2W7 zj2Ng=CGE3;d|-M7R>fHIq<9@%WlhyW78jNY1|(D7qSE0P^r;Le_3T<0#j5bCK)~~o z^Jc@!)KhjZB5R*eGE2Ukyl^nUgCa~QORk-yyv(2^o6AWD0L{TX>aQdI?&so?u*lD+ zOTtn=Z;*B~J}Z)}iq{2wZj|gMf0px!AXaC7PNn1)AHQ9DiIkAPfS~$QhK+`ED8r`1 zWQwlg@N-HXe3*_pQnWx%)qA)gPgN%E+^-F#h7-!LwWB4M3KmpT+X@Y&FMRvg+<#SS zNsi%nM<0H>S5cpsW}D^gXlB~_@9sB`K8IVG$`WsCt&Dc&XDPF@fQ zAd>{Gh7VDTY}FHLw^kX3Z9k>h5W=*U^7+U6J40ud-4kkIM`nF`*N^x^euteZ2Fhz*vk8sq}djGmtbs8JxSJNb=9f9 zB?&eM)t?As6*d`fjm3XlR$+}?B5J$~rxv61A8g0$yV|I&36e%F>N2=M-NuTk;tO4b*iyYS49SiHeEk(s*ecb_1Q$j&K7sXK zB-~hvE!MlD*Efpc#UB!28z2GP6|Y$5EC3;53Jm&x5e!Z5`JX5K^}qg}^w*uxg;oCd z4Vn6`{b6OV9!c0kMZ3NXWxOmgQ#n%BE>k%{b|v#!n(U*DXSuTHGoB^Lo+_I6vXHea z&Ac8h8(5U>Dwpv&rli2~dpqS&5jFF3UI`{sE2_jTQwvwppQ#mFl8{lGU2-X-Hn{{* zwBYH(ZpWci6;)DYsZ0HP-u|v^2`w>9JKm>ukEU^4uk-z%9}0tb6r11)|JfedKdY-k z2Vt%(Er0Aat%2p&b}ryY`L`|S@Bdd$M`+GCssA?EoLJEqS=zc-m&jkfqPO^wDJ-Q2 zZRExrmeu2KP33SrH=+u3&smfAqzp zkA+=wn77Z-Vi?@b2zngp1>m=EeT zv2zC+{X@g)WAf-uMAG7blL+{_2Q7WwRO^s8lP#q0huMIcm$;RY7Kb-`A6?5g0sFflpYtyl*C1Ao&fx z^3dG%!b?e~#KKs;CHgp$!sA!v3r8h(56{t<>NA(Lnd;M*VwocFOG-_7k~&#SXPWY) zbuyRunbbM=SjHblmrI7~jt@lFTisF_7u~9(F>YPlJ*yoTy4r$|eoMG~CH@KyXB4mH>P?8-gM-AxqnRL_lQ<+dEERASoaRh6 zh9BfS8CSvi^gi&Bp5Fz;#5cmW_rJ1F3T+GUK+$Kh$rrX({M?h1>$MBMCOLaPAwrM7PNsrk97eSf;F}9wdJh8;uO*SjMy^is|3^NtgUR zZr;+JLwtuzGp_BZ_(S+zDO-~%koX$BOG-JRO-G!c+gs}TJs|Wd76NCxtOBq8}=>1woA$N2|tae^2BHY zTO4f>CVTwJFOCGsByVtTDJ4h3^{m|`lT+iet=;L8yE!w>r|85J;zD!USn4-u&ZCba zzBzN{>by%$Pr7L^!W*PGwVTXgC=%s}4b{1>ww&3Pn$&Hai+O!ui)-q{)TQKC$=6Iq zqC?e}>Dv;;^<#eOEbHUu;qEuZcaCT#nK365DP!8U;Cx&!)68D| zmf&bT+zrge*z5&L$z3MuNd* zaajn!22m6Kko(?Aig^7ZO(HaJ?>r-``_K%22kWP=yr9l=5ZK`GaU_5n^@2Nq1I59m zmPSg3n;cP#B$dG5a<3gjF%T|_cg%0>Mno_6?|*46!tO(`mK@Ka!#v-S{)k?B6DJ-F z@=!q%0i0kIyq@OhVM~pMXriUY z+vpX`O>PgFv|-e`q2`f?hrUA7uBUOMe2`s6nNiC0UqjPkpy`B4;?V6r46sMRmllE& zF$xm;3Q6O~UA6J&Cxq-9CpN+vxplM9hkNbfK5*$im{kRC&m+HX&qzzOdlRGOW*V~- zY&=BzXHzt-;5Fo{Fm%ym>qti{5?c_;<)PYfCe4OuiP)mE;O;n$yiah%UDxgKNxMe8 zjM(Dr=thNOmQB;@IfV^YpTI{=R(Ik0MytLAGET&F)txXI7ml5^|NDwu(wDuOj!Vyb zHE~L3yEkb{J0d=DdZ?$>q1X*HK_1W!81x>rxPVfSjZqf%INo6bRe+w(_a_YDpRs>s zPYXecz=J?vz0>&PuA2IL5kig(rvpZ#2wq2?$OM=DiHH)QpFdMQFf9}` zVB9dj9}(rhBZ5XdEAG8>BmX}_BBBh)4=<@=2BU$l(T?X`hvI%M`~MGCriMT{k+>SR zoqm!omI2oOwot*+$GOj1`ny}7i*$~2-&yGkMgs>OPTSWZ*G$z8IWWmVN`H_jBu|2c zks5~sdnuj$N#rNw6O*rqDqaWf(qR&NzE(Vyr*wt1=YCyA`li=)QE3kMXVLA%jALHQ zaOq2bT@c6+N#^$~gHz4dEQ1rxMOEflrNiD8=4GsE%OQuc)T{O!`N2E`O!hYU@kYED zs;oVZlxyk%UsEpSvVVtqV(1V!m-ioR0usMy?E~Po~(sfd8J{4CgTG|L0 zZr(vWG*CNyk8j|zy*KF@N&)VG_kqYUmRgZsld6fIV|?^WB>-wiE0(I9s;&G|Ff~La z=s;>LOf#+BE#tBKvg&`vMZtbtNX~ZbiMok-9)?-Fp5V4Keb4T?e&j|{hFf#C^fVnc zkfcpqh^$ql8j>)`lc+?KCq}yTRU&JSl(T^Aq$1*qNgYcS>lz=0jdYVR61%NLS!j5-8DDruw~fol0@ZzLw0#fKq6eL+ukooJ(*!ATt9G4N_)z~teWSg zrw_nPahfLYgIxFE!^{CUDPc$sxPqZBOO?`ecXyL4Ro2{imGt3PMEoYp<0^Cj?s z%KYcg*&iS7+jQku>vQn`LucQ*ew$WuMG7(Nz;O_FMFufjCDn50|K$TO*u1zCk+Ilc z@2DJ#KJxTa^Kr@Qj6siPt+-+M1eb=JT4aip06NBEhT?G($cFE6vjh z25e3HduDsy8>Zfs=I<0&Iwbs=x%f-gkH)Ngn((4?m>IE-_J9W__ za21|TGgVHsbbe;Mv`FW3Gzfjs$PKX~kab=4ThHsrBEHU7Xg&A}B&)pl=k4j!su8eV zzM|3g)t!vJiqQ?4(r7X5L!)G~C+7pxc~{pb&1K>rOy>Dq1>=Uz=k;7UxI>}_pEqAL z>OW|8TD7ic)l}t>6_WrY>j#>G_9U1asc-hK>_g! z^o0Jqbmhg$FTD#!uSCCdR?76|8i!;SYc1QWM{LlJ;(pgo6S>mIGi&(cR5&+S{2n$x zk-VzwH_%*X<*4E(%DJKGH+p14!mqx7%^*bn^DDs7be*%3+>0d&#|A2Y)~M{xw=&rg z{~TIbPEas@CGZ_lsfHdk+|m1N(-V$c*RIqj?qZ6sPLzDv9j0L<-<CVMHFm}}P zd)cfY-eokzGLhYUMY`;6jd1gKv4x|4t#R5`Lx(HB_XHxo&|h%^j8qv4?h%9&PQ?T= zoYVdM68*eqy5IsHusP*D$NyOYE!eXFf5q|LrBbFR*CYg2thHo+`VW!-?Wc+=Kp33m zHC62B!_E_4m*l?-08#PFyl$*oKk=Uir~6US{o)=t<&g;8bY%`W<Fm;Pbi+%2IOyGb{Ze&m`&xWbRC6EC<%;XkiVa2$z9+q7%W zli9QJ&*=#;{m3|M;Lqhx>}l_{f)^XAvtD4S1^KV^@)FL+cip{8uf zzor$uf(#4KE|_c_scr3vDG1@Zpjvyz--dV+u|cQBU3CvnmHwu59zHh%O9ABqePmU6b+Y$SZ#O(II$UI1 zX!Y=Hdj5+7r-YF^;qu$ktFL-R4I(H&j%fRxBbZO`=~^2}GN+nlSR2tXcPA&AH_>s$ zo88KZX5o@E2Yg9`xke<$-MHQy9Y4<%EoXs;3jnQM+wVUPc z)lVFW+w)kJnoTD9fbn!tCo_6#!bh#Y|Yw^y67>- zNFI}Fv<6I9btErK6vRZ=NeYRN10&65HF0lrw~x<;5Ef&@mA0K%iM{fGVQ9IDz98V` zd3lw!m!If_9f=I*4p!X$K6|rZG;uHPPu-*lEOgmi`wgpaQ-HdO{8!xsAPMiC6|;Y8 zhOa)NT|)y%q6|P1WdM?>{r^G|3jmTx#|sI-U9WL5CTZZCkzB1wBuPI3oEFziQiiqb zaa@8~Wl~KPTn6{R{IP@|BQC^D$7}+|Ws`aT64Eq-R!Z{j|*J-l0Q_q>ZN}$az#k}P)xZhHfNo(AOTpJ z(wPTm7n-F=C#&WZ9!klTkWDQ-3>c2CYMJv&I;5;ho3BaM%>Zbk<~N$)3P~Q!YZjBl zCP5|qbZ|agUMU&y2_3E*X2*{DG2>2feN35^_LIk1NiInLIDj)X*D;w0;-X2GaGMXC zQ;qu!QY0nsh?`sY!BbQvJH=&V3PFHrtG!vDZOXjlm80#(eJ4`R{ZrIWc>ku^xw!i^ zrAK}z;(AERtE8dSsc=aSY0ZqOUcfgr*XY^(sZW4!Xtvd|QA&dOMZ`1Al$B(N`H1!M z@sy3!(CDdD$vlZIWm#eKIfTVETZSZMd>YcWC5a&EAt0+|JDrqbZE;*S!R%?$vnU=J z*$2Bo5QdfS?SH~T2Fp(*M@^P@CH0M#eN)KhBBsmkDU)X5*eUxICrQn`snh1iC4$u2 z1Ls_dQzE3dIBc(qe+{%<5c_Iqi%n5ZNn^5=PEx*?cFQ(b>a9V? zNx-5r=uX;o+Yiah*gvRViMjo|*@ESDj+C7gowzA^TC8nKYszEsAO^sWbtWa-v(tm_VZ>}j|R!K@0jUXE;FZer-b6t*bDuo9`eiXnnR2%m}Hf0TT%(q9@4Vr zh3_S#V$P^{|aZPN3*fH zF33*jdM)G?28vf{mxP%Ko0UdWP=y`lIcB9;%GpAoZDdFdwPiKGU{RVwVJi$a8%FrB zmV}t=!l{0?-ATd8mFtB8Vozkecg+MX&nbBm&Fzv-#HcEGKQ})qq41zGrjR$a`u*Y# zjpS5UR7ymXw>3*fleQ&F>XZ2lK0hfOHu=0;sB84ux39@u(Dbu=--KBRw$i@u zqNGAzx z^DQ1S6UfFnuIUJ7To!}_=e9aGFDYZWK<{9W8<8>N3ZOe+y@n-OY`Ics;y6EuCb8?; zw|I2-VxOq^F07Ai7Ll@w?sGGb$Xa#pb2>Ua_2+_?O|R?TYd621e6LM@odEvqHXBtS zgAE5WxG}XT31H~RJfL%+=dePTww8(`#xSf0SO|Q=Wyy8$t_weQ7kt-Q12-;&eOGu> zvQBWW&Ro=BTdB|3T-3NWWz`!nd(JZs)|z`speZuZWQF-(Z`#oGA3ZrS(*~497tyae zHJAUw6|_8hsVfoef7!$THi`#Is7wG@v;n{ZTg@#o^_goD@mq=VZ!*?34iM;Piu(1h ziERQnQVRiKp`+xBJ^~4FpFirx0by*{K(u%(A5Z#p_kEZ9`qAf#nV2?^xShrH8D$6e zuA_D~(~io|+ulpufMuRiKAk)#euFiW&3!@ghG=GNyN==XS!KoKfyl>Y?rY+Y^v@c% zv51{Se7NX3qOV%6SqhsqYWF^CfR-0Y-GEQnsWs*WBNjPPe*;8&1D)--Aytx*LAD z;y!Hkf!0n{^rY2=n!u z?ljAFQSh~V{c$QhWY2PV1jC-9eGwXmyi z2c!MeTj;@wt~xP4bXxS==it^iuWu)*BndHV65EnfRZzc;i6-eL0lYo`7v6TS8lDEd zsy;Cj@JmOJ^u3$>Pv5To@;zF@Uw9kxzrx!(zMDNqud}vauqewJVw(zX8%A7ZeHqfO z3t#ZN-klV}X%}L7L20d-!qM++*_eTFvvtmjGEeY6QKV}5DTB{duB8%`O5ZPSdC~F< z?0ZwCXg{Z=3@nH2pSdNRIeKTSE>r*Z7CA$d`Y*hF-AU|4+R(kJGMN`yLoeE|NN-47 z*O%E4ydELBp>BCFXl2r>&>>R|hB;#UpTH>68rANjox6g4U z4E@>}Bdx3dhq5=1hqCeh|0^L&6l33#FeK|>?AasBjG<^DG+8Q3We*`EV+&(nXT~z5 zLdlZsyX;eB-__W;?aTZw)xCV~@AvoqzCZrRKRudmNB4=YuF+IZSSgL@?3|2L`da{wfL8I0?wDboYp?X^lObetuegx zz*Hip(}b6e*Oa2i()LxS8829A9D>(orj0LHX&#ELAzHf5`K#^(PfG}n868rssZ2Wy zR0r>Wn&vxChVIz4RJYV^iY?UDrHd`hEg!1g`kgK+=UQcAwxqoTr!NURpY}4F{-kXw z;KlcHOT^16dP~Kt`sJ3|nzIm@VQpEE%)iDbpccL_+!`gM7Qdg{YAi&iTr-@m*6?gb996M%ks5|X%bSOVfzH;a+Mt^7GwOD0X@Ydc(GGoH_PXq5|f{vw}7hUT` zA!iPNxhEGrC+Mu?^|58hc=YU=QH)1qW&5d1Yb>1BTMxt-<~HN!(n)(^JUcC4FstgZg6r!0X%lZ-x{ zl*gXqKrU#K8%lj``0Bp>W6O-A3lc#?hsql775!oMq7iTRK_iT9s z6;8gRU-$g%_*q6u*g8E#f%@_BS;54IiTyV2wxj5Bnsuko)RKISPifb^KKmCFVt*-p zpnq{fB+*Fy1Z5ZerC#j`S~S=*Es&O0RKznj(3^G?KWGhFB z;OHuN_H1HC4R^2t)jA@PSxPPH+lw9KXTih-Tg#ww+Vz`>TvGneb`YPftPgqDB@)Ta zxD>!Q{OxLEFKgC{(VBo;GViET27|=1Ym92CXq7<^@R`AZpFrHWaVVu7xa>u_%B*;z zkIhnum*ni7&v>belE$?Kjmlh)IcFbyrjuf`wM2aj@L~q&;ERbK)}v87r)FalyR54| z<0AiW(tEAuC7?4V*lFWHMtB%-GL5!R#*-xE;7MxpruS;BavD}S!cq5 zRsu$h#C3u;G6@peL10?k6@B2Gf+>JQ;v+Vh_ zLqrtSdn|y$?c6Wt0wCo<1M2cXO^9tV0GDTl>o%CU4~j>WMb^oK3$j;k)2d~y$k0~j ztWeYXXRk=0ov`1H&`bF5*60%=-l@1NXdyTx4R;cqjt9r$C_yanKNPiDKxQU?2x)JC zKI!%}gM8I{N6-U z?1;6O&<+7B8uf7e85RaWN9ysA<70+uKs%H!%FV6Z2d=6l9L0p@;Gwd~zha`Ac+KYI z?Ts@p``m%=|BZKXWHu*IM*<>z^$t2ngx9bra8d+xUnJeom9z01NFu8&aS{mvY&5N+ zgvmwFGR{-2Q5USLx2k{!#!;eUoG^dn+&v~(52ne!OE0JoVBm20y9|OqFw-W*FPP{A zFM?f7ij!g#_1$5`Au&OIW8{#%qu~qIE2{z^#$0IXq$|ic2O2qP2R6#y)<&<3_{B|1 zqpu?>qbE7hW7ut9v>JB%D!LlKZHFdjP7CQfobM0o@W}bAC6^#qO#4kSLx9kpss9a*rYO= z4e=n`lciA{T?!k%ylRBrfR$d^Wn{-~8c6Lzv4 zEcQr@xyZiob6rfJAVvFAv0(Ffnrmz_Yu~u`-8k@Q`+ISPOi$w*^CJ{3a#X8JY>(&(FoeTG0%*g9gfIr3ceguEY z=+y-XeGbszQIrA*Ekxk^qJ{8f| zNYh^h$4i-G%yX=JtdoKSd9I4bYnzLiU%TUK^DR7^p`Ag@r;xkh5q-OZ7#sMZu7jPv z4{R`@R~4V9I6@%KO!70WT{K%^!?+_nq|5n~!2Ob4?KAoN)G3Tq<1%Ewjo>9XYEK4qlCya{LH(N|F zeDeab2h(V>2|>1EK3z@*0=-z9jW+?dpnd577N{) zzEa$@dJt4dX&r0B6=oRxkV;PFL-{~);f(do@DB7t7CE*Lmj*Zs7aKk>4p~1CE=+55 z4_#xxcw|?y%^ZmEX?UHQk#1{7yvcbgG^2;+_OGkypdxGj%&Sj)7U*iwz$@DLg||4|pF86_(r? z3R^t$VQQfB&c`cWK5ZXiqp@p#7{(ms#I+vGMR0~jdHmWpOfEd#a7Uvp>^IwScGZ|% z?R)cKXEgADi#_-sdRs8&gBmqjF86PT8_*z?&pt2-Oas#)mH#WjeTxYSc@g)~ygVaL z*W4>6ZqD3`!$)^vq;RWA_1K=|0(PMJ&UKp%)a?*ohRr7nxJ!xyS@V(;VvqNv%{pY`MMy{iKq16K8xk-j{e z@`e32{%m_818Vh$vYUK`Wb}cAs1tV5NOTE5X)Ssp&odQ$MN|kbm4-eknvOr;_qW(C z7;bIzEl~d4#$2G>-gb3B>CQ6x6B~e|2)V;p3v6lee&pOUSkN1ov+k<3eCA8z&b3)G zFfveT;|v_}**m_VTa;1f$Gewd`UHnlV(}%gUkQGWhMTtJC0+?ep)E|aVA1^lLs=~U zG@B(7PTu}TzV2iGDdNkeg3)%8W`SxaNv5Fc1zsgm9O^y!J;YWr@(wrN73^YAi*1zR z(&dbNz)j~0nleY#2iP(*NJK6|-6sa3rcOo1LOUnQJ|{;O{AfxZE#Rn0wl1JScCdd1 zTgXJnDguuv>{snn@aEpeP=V9E4c&sn)GuR&ySmrND1l?muc1qIu4<9GF7(Z3BW<}O zTcMLK%0U7w&0)|r*Ge>=zS)tR%au6pN!`2$RjpkWj9hlv51(TEI^2*vWbv5sKJVQQ z*w^s8h9~a}72XrQKMCX{&7W;yJ@ZdVU)%~tYe^~vsx2hj0@V%@9dvNwdpMptvIIc&?wc-<@qtiVzQ}s0(Ks}0is`;$Va~<- z-bFcT_fdsH&t;{*hTBCX%gX#27bAZFfuRWmMj_CM`uwm|q5khlD962Zmw)*e#y3OH zdIP=O_WzaM{^@VeKPb4_t|&kMq4-Ux+eZtJXKuO{9%#2Yi!I?!Bk0Geg`mghjA%OH z&>5)kByl`7SccM$s&fZQ=PKUl&|H2zmwHYv^%8VzQu+Bhv{MEuQCy`m|FN)2cz&h$WBI9E$~oFpqtq1FuM@)oj~P3IpiuzHZvDjkP*Ii4yiZ}3 z#5{G8>WdANuTd=jqi_ODZn3{i?lxsnHiSd=BbGRE(NfsA%QHrzTX3($HfGcG>@}u0 z3}qFQq~KR-Z}`hJTv}ZsnzNf^nXfaw@hW=+Ank4hwxjM}5|@ATx-8jcvB~XNKm1m% zOIq`WFqcwMkrLNlq3by=!NVnTEz_^iFcr@%e$m#SSCCm8(SzzH?n2^d>e?47#SEyiJ%yO8vJTR*^YJqnnLLV;BqqnE_Y_(k<0D# z{Xe?gNA=T@%gtKyzmeKMUGDr@Se%iWIKXc$hLr@03Y(?m<>MWx&3p)oSH4K)8zJ0| zdngU3xo*{rux*_iRxisgYo5F|ksfd}%13-jY}l)eZR*3MW$;k258V>gu#)R?y)XL~ z&9I%T)no)iWlx;E_g}8F*vovoJZablli9s|OMHH6zMJVMb+b!m%O)HAW*P7eSu-`* z#ziy#i7Lviw6f*;1IeY!!%71iuWDG2t+Vv`jcmBB`wmKc90~-WvBh(*;u&Sk)R&dS)k@?y1~0DK+jz6UEVW zAAso<)UQ*e-%wtj+&R-_=ULUXPLXc4x=|U7r)WhdpEtB{Fr5p&1V@t#H#3Q=>$f69qc!0BElUQ2nr4YZTqei)hM_IvoSbN=VG z`71*iy8oksvN|BA4C#kw0^xuLAi}Keqn#oCAo@;p-QGmA3|yb%<;$vGQ_6 zX!3P}NcKHJ+4;P@oY{wt$$2|3lDqPDP|19GJExtGjow@x4c_vrj2h>Ol;>WCR>rP2 zP54qdi;l9b&QAHVICG4C@syu@UAxCHy0z*TxOKI1?5JP^ocyEVz+{+X6AYhqk5g9B zaTXS9WGF5hMks>43k0ykDZpymJ!)AWg2I&!DMus3lC$10yN%?{ zYmQtdT_8r_jEqLKNDyKNPCU4Ro-~WNer3djv_O2A*{L}qN4h~6I@+fI#Qf3dIxYx7po{P1p=?i@6yi5)4M@h)D#W;0hDcHIveGV6AI3=S@;Y)X`+i zuj+}gsJnQ_F({A6@O9vz2+=Vw&F~>-$v2{S)`!GFBvFwtqV+oA2#}0(S1ZvasOT*! zI7%S;qbr<<8#zwLfK4*GY$tj*eNqlQ7w^R2_JcTMQu;!X&g~-Mu}NuCg(ArvR;v4F zmUw-XB}#uw?3n+j#IDZ%WpHPf{jUvfT$Rj*3Ss%`!8y{~eUWTGu?_TWrlRZR|{1&`-gzXEERBT)|wCz$xtZFrdY^KCF9{AaqF^P;l+m^S+I zj57}vNHTd=mwnG}yhT{4`wDJ2n^+ki_z?XK4%mqPR}YMdWFrd5uulC5MEP<(9ul58 zLBol&e}SD4;bd~~Os5h!Ezt+lsDH%ywHu~ge-mgt>2CkrG{ROfZ;a$EYaUz%enbVd zx55TA&I_$ z=q4w~W{&HzhayQYZsN1wk87&7uL#aja;S*P32~s!`r(><-XU45?@2O~y?11eK{9K%saD@S z#OyDLA6s}JHYCwA5YY4nvWY8Fi_o;E$HH}{JUPEWIi-R764m^dFo-emU} zPF>?nu^%>^I?fr@>xph!haBo5gp(`!h#@%v5IuWN?by_uP>6$lvNnR&kOR_@oA)v& z8}jWhaj^-Ai>tp?H|8j1c>PWp-h&;IbiyLz72ap{#awOwxohG3YuBQAZebh6qgFg6pG;w26*v3QzA9r@*Phqt zF|=dKKHcCkcSl!us{VQyWUg;tv;zrwo8u+WAq%P2T;GHEYpf?g{B_s6AY}D+!}r5R z?JDn;ue8H%==3tFx1YR`)5~Ps{th7IdWEvS1EMneXH@D9xW43R9&kkv)0>n_Ok67R5p%=J_z-n-cG%HF-FG8mv1d-PUD&DKLQGIzu} z61LSgmn8Ck?2;+AS9T2IU%nLJ*A0J`7*OnAHc^PB2r){Mj{9?k@$-Ii(!akr&YWQf zz6J`hnjs8n(lf$y`=lnGwFT~|+m(FHTw@up`d25tEax_cSC6@7$}nh6T;^u@qvp5@ z!yk>uFEY?K2S`CB&r;`1)I8ueKlcx*e;+)4pw>YqWn}kQj*{26_^VItguJv0+n;LL zI!9)r%>sW9473Mbz+H|wAwXLUyQ5iq1r)AXdl&Tia_udUm`3d*P=R`_4(NeqtrLj* zvbqQ@1HL98Fc!RmE2N;+gb%O;7l92ltVL-J!Q9U@lxTJFE+=Snsep@eJ4dwmzH?-s zX>#~J?KC-HpK3Zo{ZJbq@{HdDwZL7dX-&c3BLWRT)0yK=AmwL)y6_bcZMtS{JJ^$g zav3)+0D2l3=nPvCp!HK%XQ4gRQWv1@#7dqD<_A|&2GfE@Lp6A4mo=>AXdlBpC209H zCZ>)rlN}DPlRwlFTgJlnHKr2;$fwrQTGS$*^3f{d&T&(Yz+OHJ1OX(Vn()9{aEoTG zImkex_6g{-daVXXOtaPz^x$&sHPC`atse-JnaM<{3m@00wE*eE255rMg2UCVJwT4| z0qS5@5QFvv7!;{H0s39ht=Ap?AbU8y1ug`mLo!(wOBH;qZl|m#A2Co09+KY~{nIv~ z0AQ#@OPj3Qt7qO<9yDC}SkX|%`=7R;(oO7cAz^aipjA#k07CJ^89IB&tUY`h-!d;*fl*5G6Zv)^W@{j6=lBdzCIz4RQ zT~=*1EP-CT8c*QW&V@bUZM?i{2#(ZRT?8BHxJrQRGCj|sL$KSbAhqyGVc5PH+DdEn zKG=_|kuP%_0rC%?T!!z<0Bo9lYBU$@DODre5r-!QeqOhg2Mot%$?HqQV}fEnQXJhg z`pE>3z(s%uQMeJX;xn8Hh({-yLR%Ot$OC7JjBx_%UDl)3?}gdu zwBEr?haUXBgULzzguI-+S-orLU?Q?;rQ24+J&i4v&^m!m~a!R08CGH^LYWElLz3FLkF2O8uM`~x*|2dwm> z*b&2{nZ$);b9o6J;Go94NHS3SjA!yv?@?8dw@mhahEL`nZkVc|UPkyXhkmur4@U4M zogdEN*P1_QFmm|!EJ%5r0jhWimZOzKh5P}Yk1FN|f22k-;ufSZDiOtV@J-sbQLS%2 z;5~MvRp!D?%)#?wY0N=nu{S17!Dkk-9lYe{?k^iA`O6jU-jSZ>ivV1r@bSF>C)X{!o1bmQN%dw>@Z-KwE;j+JKXQ& zj1eY7r<{?62YxmZ?L@e8S({e07iO)~c*k@aYa=Eqgtbu=oz1l27mdf-$coBk+FTTM z$J(fg>cilnXgt9MJ1NGX=R#m+&>n};Gq^GS`dnu&gP=7#*x`vbEA5?|V1K@ujLiM3 z7=OSvz>v{voEUQOnh>TmbCSyRId<}zX#*Z7EJ{!K9)eaUum=E}hpVSV)3qAOuvI2e zOWj5#!uA=K4D956)BR^?8`FIhTG2H1vbK!q4cG5F+%Xf~520B=_#XYlz$ZtnC4n~O>y9pRj)BK|xri!0%K0IiDXF0KxZo*=LX zp+gCYDD*di`7?Aj!8{nfK^O=`rx6B1(C1CLHMB2@M&j#&&{v@s8nNO~P=sdaKb?9M zz@RSyUddt+hiivc3Jh_va0QCE$MEt~aqaMOsyHrq`RTZiaIa%=*#zZqeQHr-TyI!D z1A$f>SYK$MP2loDumngk*HS1tB>qUyyJkG~bOd7@p5V7!1qTFpa~L zPUWW(6eC3W7(5y#RsaL1NnU3Im9jv2reom&NQ2s2| zQ7ZphrsJi2Ijo~n{${2qpBXjWEF#~Q@CZ*j!N44%^n1f9Rd_F#JeK`QE_Vq?LSKL+ zRP0CI@@;Ge4`ozHg3@}&vvgBN)Nua#u555Bq@KL4LqN&58wT6`M0&p*DY z8xiJsF+T+FO`k7}^XAOA!+RgkPr!MzGC~r-hBCCN35NaBi0UMz64eWoh<%~ zwL>IG;e_a9<%r>09Z;fyP6s{lvv$XMqL^+6C-H$+ha7Q1r{g#ggPWCfRt6+{z}L?@s_J&g6L!wZ zR%hnW%KGDS&dSc60Ip%~156Yu1m62t{vv@lff@LVljW8Nc;vtNN zku*#b@Vlq-(+H{&j(h-JNQhf@1Qz=&8AQy&HiakG5?VDo%!vjX9Z!g#)jKqZVwxR} z#0QrHu=uB@(gbRnyrTdz9Gp3*-dxL2C6r{kgB^9T?#hn2 zneIZ4b69sN$LpEymmKA>?n;hZnR0w3)bN`TgSG@W{N@SLi;7M{n8s~xl6R(2R7Dlx z9qxbPb~2qrNY}7VDx?pYPQs*3?3+_=A+YOQE5O|v|Ba)U)?mATFMNovL=)!2=*XwF zXiME7m z_t}X$TVg>d!Ma~?Y{t4vJKANsi#xJm-4z`BGu;IoOR?_qj*OWfKqVpY`OAxXL~nTi z@gtTJy22b_3yHJ12@y|UPUCUI!{v{=A>i_KZtL*UlWrH`r3`K<@KOplANchb6_1EK zXAk_5m*au+c4rq$6nucv><`Pmhn<#x1yF7YxxDmmThsnA6_K1!lE9vKUHRmv zAwQce%iA`$qW;t3t`YD&ZLR%GK}&hTcrI=lm#ms-st}IIT9@fsDb`3m7lRO625!0u zbE;-Xs9a4EOXO{rg%JG7NPTEz-9+G&81U3BKi@q?Y|gAwB9#!9g8%?g2+j{gYR3D~ zlI(DPyd*ZfA0??D=f_Mc#rrXm){kl_;6s65B}MaVmxlTtW(ErGgK_qg&8MJCAya3e z>CdP9T`EsD&%0DoG>5o6rE2bX**~T$#a&$sr*5W!CQsOFA4Cumu!nNQi}1JVzKWzh zyx%F(vid;~!JmG^7=P$RtPb3jhm(0pYB~pO1TyP}8YNJ%&XE5YlS|}`1VbmszaJ0b zjjVDppI~QbW`_#ZNu7@T<|6j&A1I5uBF4z`P=;DKU9&4x+2xkTC(y=i7+=J$7V#C_ zS<9D|^pL<8y6Z~F2;b!(WQ6T%6RaY4WeH<1b{`S|S`x*-Gtb6R;FRpH9WE~%1DFT5 zC&GheXgW=yhAs@x`WWWyp~5u^ES(lEogt5DQZG*2qMo}4^{6Rl>(qd@PbiySo`r2X#xHIK73RI15n@MUk-+f{@Bclt=dp zKc_04C%OM;!HB3A33$hxde)_><{M9EI~nCW{=P;5)CqTq40%kODl?HvJ!b>GUQ^E6 zY3Sk<@|ZDo$A$O#JI+p9mx_tXXH+<>3@5JpObCkCz8jB~;4H9bk3>n?C%!;A*e6Dy zDDBPBC^LKWDAc7tYc_4*xZU!gWlEn*qh@!-=1?l?6)-CnOGPI8Uwbt~E;##-jozb# zQ~ExEeHTbLUq589@BUW_C+H~Q+~`AuF*m!`1u-X#$JBkHd}Ck6G+|PYe14Hmoa>gr z1SfU>-LiW%vTSd(T`q;(KYfd9Vvwresf?kNqQVDaqOwt?OdVZ1pP0B<}y&Lr{ z(K7G)IGXDWw>;`Mj@^o?+W_6_s@;&jKZf3{tOq{IyZ(0 zb$fdCsY~{znX5(+Qnx1?=ewT7uqL0L?K7Nwx>GXtPmf2u#jU=625=t%7$6{Bp%V?E zeb9-ZR|2;tT-<^#9)ANW>m8S_mt}vWT-H=4OY`Q3OGm(c{#)(iH%|9cm3^oyp*zAm z)R@uq!^>{Dq-o<%Mx=GK2Q;xm+;#D+5x(8_>QgXCq{fsd#7KS00Me;BWdnKh&yg`% z5b(O6_21|+xeth)T!u+gStFi$5qYEA&r)1GP)f8*&@!Kz*C8JzhOd={)T>Y}g}N9A z`N$2^ja$-giI(-(f1+GUb6F1hcxp-0rN7RX+lg^Fq5Fb1g6$WL%MrrSg9#SmO4h+S za3>4l9jKCL|85hEn{pT(L?SkzZnU|b^{3061C^MTh9-09j$6`fDVE9Cf1+6WKKrGL)F$btDxhV=Qx)i< zPNo~JXw@)N7<+iWU-=`T`9v4YQ>RvjH(g}q^K(01<**5h<6SpA_6bVpvlddUN$vb8Lp`Ps@NgL(kEm zIa|-FXLC0_{Q`68)5j+ALgqlqb3OEsvFEc_AY+lUcOhFEkExQWabE1n%bM%;#{q`W zp4tH*Q=5{~pzj#%&@QuQI*F5?T~k-bKMQ(htvm@3=1kY}B(AVev<5a)bzSjns+HkL z=UDk1YA|~4#1N{)$S}E!9rA_&b2$(fF-hb$uCXQ`6=pg#`tgobc?5xWqkN8^m@=^ z|A|}7oh3Cs6qVL1qqNS-l`0dmHA9S}>MN1Yw){N#0=D!#$<)hNSE?A6l~(*tE!(a5 zonB^KVXfces&rdP2Pg%j*N%uNNT@$o=S5e`p!FZnF}}HwlNIFoFTdJq&{Ebpxrt%- z)S;lqJ5Y}^bNQ}I?49MVOD8Iym&#PaSDI>vC@fVfeJ4ull(Ldc~E>p&l=T@QSEkJ`yK8}W0^C$@Am`aMc?m-#_POK4U#xjb!OWs!1(xB z+RBgdk91WJwjWJ=WUC4p_u;Ox8}~V0b$!Bzu4-#y?_`z4#2!P{iEY2&w^UWtUtK@7D&14f2`8KguxkK!$nU*R9C)ny$M7^!inI1@G&d z-4)EQ_quO9|Eg*KT6^C0y?3{FD3gfbZE$GLg=i*OK^^^Ka1mTOCdL(GkaL$x@Ue)6 zR_i1Eudw3in1`5!oatkN*%J1{LGXFn{YcnXjrZ~RjZ-($>{TMJ@d@&X(Bx)u3DRJ) zU@Gi_))n`2pQL&W0mGKxHwRNIfDll1sP4%7SRpqjd(=R)7(28J`nP)dE&o-W)TG{D zPhjt#61FBYfzM0*Wf1|+?60GA>Sx}>0yT=_&kTdjjc3;m7tT2&6YAAqV?$XKKcMwM zc=OCogE}r5s#6w9_naGE>@*XI`EZqTc{>V8hM_03(eB(0nnp-0CY zgmq(?Tk@vG9;uc;zoqv^Kt5Kf<}aE4PsXgtlg5?K1-$6I!>0_7Nn}~9QR<~=MPe|UFoMk45)3SKa8k#qgSmR zzeiPS;w_LZt?!IC)yqTmKGEMpC_eA?0sFq}Z3g=$_s)s<2KUN~_(t_ci5vv>I*S~H z_Dbvf=e|$xMf~ne0FOESQv02YK_N1p^g+VXo}58;GM>kS5~Mv@g8F4V>4H3@e=;wg zzNjssSM9vR7$gA<-M^>H?!gF|Fb&(M)tO1Ix~SG{4nrsqaU35@+K7# zy(1Xj%=ehydW<4iMP!#8xRTN9nm!3sQU*XJWeHSL4}r)0+dK(K3Ry=<;XRNPSpS_A zeyj9{z;kZ$R$S^$^H1RXUr+f3)IW^STf#^6#g5v6U7pVtQTjqCe;z#mpE#zuK+nF1?WF~MQrAmCdW+VVZuH+izvQ9+*8Wlhg^`&)rI|{v&~b)OQ%M?eMjBLI?L3LVx*FcHees@GU-T8hEwPf?%4;3%@UK*(2Bz|fD2z_}h zzanb5@#O>5wDhzkO1a@>GshaersbWMSM*+C{L3=a%&4cWFPGU(YF=7#nAE?lX76oy z$%-1S>6S!|wsm`=Ht!Mwum|jlF8~IJx0)uO_4io%13pbFoA2}Vz9CpX=|5ypejVL> z>^DAlkNxgT04;)!mrL~J`^#TL_*qf($QCdv+^z+L`h2%V86|eFg%(wSY!N~|uxsH( zP0LK5)6D016wdF2x@Es7a@pPvr6UE2#Hz68+u2*8Zg423Vr|(Kqp^MLiYZt+_6iL4 z6?;WG_9lBpEOw9mMnkve|5dO@0D_(GYp`_>{nfjP$<{h3q!fU|h^FUiE^MVwsVyw0 zzu8bYLw~crFoJ%l3BX7w)fAebc>uovG8&*tFDeS7xCfrd` z_qI=>jFH=7s4ly0Ce+)z+lnaXd)o}C<Z|W=djJ9CO@8G+bSHrvSI6-+ z!kUKus>5AFNG%&WXYAe8;&V4`+Y^S8?e0$Kw3j2U5 zHp9HmeL#m)5*gC9IExGczsg>KB6-`yr8L@Jh=;$pr*&R{7%nM&KJ2WVPWashsE)zC z26~;($cFPeVs;IjIu8K5!+GUrqpMzgPJ{v}4`_% zgMH$Q*+hIIic3ZIo)sI5?4gU7MfQ@4Y1Z8joTabj`O%t#9G zJA_e#QGP(;pR;*zJw3_|9ku&d6C!IKBgnHo=2y#bJVu5 z>$uxx#Yg+HJ6KP{bRuo0J#~z%rww#EZKrK?-rSkKq2pvT?XI(AeOFb0QMx!ex|#jQ zz3Jc&WNyiP89a`irJ2AR-Yb(YfiDQ>>?BBqC$|tB!jn4)l;Jlxf3*yNcSijiw*~LO z+bQ2l;-ck&GbSxV?0I*yP|-ia7rI*b!z)9g8Ku{xbW}T9<~Vj}1;*}9`-JZ&Mq5d) z-PAd3Xpz=A1Ww%Qq}xn;=>)*#r9@)@(1vId{1bazEg>MBvx%_Bz9ETEze9W%?i~$( zOZn$`0pi{LOCNddt}}iFtaMEDZFY{17A>884u`vUSp^=3bGNp*hNpgR;RsJ{Z_y6F z)!8B&KHSpsFnqY9g(6%GIc=?@$nF&rZK>mS#}tgOkQR-H>xJthF`;k@cBCEAUdQkb z(Nw3yhWJ$H(w|WyaR{WITfas8LuVlM?ERT~>>loAxcw_=$eWu?0*inb^YDKs;v?SV z#*ff`d!0HGS(p+#V)QO0YAq5If*6fhJK2T+8{>x!cJ8#TgZZxRTxiQhsD`WsVYlXzS9k+hy0I$PH7gL^bNRM0(B94f&* zGJkvR^-h!vn-@QRDIBs|_}buf2{U%ti+!flgxA>1xa~A%3;*=Yi~zVgV@((AAG1b= zA2Q9nMI18B@R=NP%vd1~DP|fmWF0S_8DX$3)ecFng=>MoY=s^Gif4GIOWJSUd+76^40RV zwFyiV_UZYVK5&NN4zOfpQiWdIz&K-{3eW6;C2;#^W=b%8xx<<}f;v|@MuK~K!j0~} zPwvstv6C8X`W_w5+x)#XJpG?rF!#aT2VRcL&CfDp%8jkl zYjhBO3LrrST$n7qIoSHej(m zylWs}+X*ZtX`f;#g<&&+y`d+j!`j+%Eaj%dR~t1=m5mx@Oe-%pzA~-UZrn3{s?nHW z`czl@I7_t&T(i;26aw$58{lH+Y8tr7!PPa89xf~SfzOs&%KfD6GUF4nJmHt4!tpLn z>LdHR3@O3zyt|*_O=awl<7sJF(g-gy&!f;wruKQ?HG_lESkrKvjWP>@kf_(FEXs-~ z3PhKfKET;19U&caC(p5bG3CbB88%Wj#T2D+a5WAOzhq!;;e+($0BG)4XyEX$EV zilJG2uW&jU&?8ljr5z64x6D%32akqgQU?#`EdLcQYE_>aU{-+hg4_d~{r_etrhgkK z)EvAwh|*3hQ(tAA=%(DIo%p8E>>e*%OMqL!P{t{7Da*EP@$jg7PtQ+%373~v4O^9K zin_ZGZ8E;MPv6vq+&|wWVYkoO|1hEs&|HS!}rjxV=_y2O=glyj7_qVOY~FX(yEE8@-mj4 z+e++jZJs(e*-5L0ta`{C9&b`>+sB zWa?@7a?7?$lZWlo6H^HGj3!T+DQX$js8w4Iwb0c*4z-X~I*#h_)mI$V0joDTs>4?I zH~=1seiI+EGHlf@>72EOu#lD{KZ<4~DHEx|CuIK517D|sckg}RQL^AXm;v}Fyv%Ge zu9VFE(yq+Rr82IJ%FORq&OecB?N~yk~>a?Kty~?(7za0pU15^qa~UP zS4e52G!;MEt(ngo*7&(uW1%&}FM2;u*$CK0xw;^Z+!9~l?N|z3c*6h`XuCh>VI)_* zgsSQq9|jX&zXnA)-X=5bpW7^&l_S4%kfQ6eiMo}(;vgVAoJLA2rlyA2?mplqT zijE%~^(&4_;)ZgATMZ2}2Dj=PA`FI`8b%D1YZ^?1c<GH&K*G|P5i5eq**ltBG z@OCbZ45$Q*ApUH6Apldw_`zF1yaPS2>w6E4M*iv(?{2Db{i{z*=2-;rjzEe@TNFlZ zl9;Bv|I;xeH>Q;XtGS~=GO0g^(hf|qIWi6mv8>V#9I@AA94KNxNIS5_Zp!362k!hT zwq0P0?Uc5tn>I3tyz_`Y_M?nN`?RXT2T3n*tge)oa%`@omr(4Sl$TWObxE&FvGP)0 zO0ip#3K^}`Nw+$tZ4KOIc24-bR8f4VYx=i|FQihJC7kq!n$V|)tp21w7-CiL_SzZj zChMt5<|XP?OXWrC2`3UiXI&E-Yq`r2>ug&b)5{c{kbQEO1viFJ7-X2{<$?D0DSxIJmXP!02vqa;uJzoz%Cc z>1YGq=IK_0^xEli1Ivc#83W7u=?H_-rs)v_)tYG&A-;RXL9N0`3&<#jg|ltExBt0= zd=xdgz!Urq;*hXK1}PzQWRNEiInqd0#HFmoRP?Z zNM#4-a}j9qq-*(;c_h8KfP6pG(>TYIW?P7jQ)b2{8*2HMIFeibD3+v|uZkz#&-csB zPs|@9VzaGww{wvp5 zeFLgSOCZ-6?uABs|2NZPW9{T0YFGO{=EmO;D6t?85_t3$p^m}`rJ%ubqBzc7q2xYc zt8D$g2ZbkKA0P9C3bnSu>R1BQzRS0yirfggIM~XQ9hOZa7zZpIjYn9_ z_^upCl9us@)+Au%!-8a$btp?3%Q>VVsbwEtBvt1g+L102$U(qnNEggM8g+-$DM#I? zp63YbW2h-~S^+X_SWf{_)~`oEly&RdA(iU&A0bb*>gOO&HS41w`%Ei`Z?~#axy$bpXNq*&R+e z8nDa*hQlU+Z1D*Qkkh|sTgc>sYo%lhfxqNFApDPSke44wNc{mc2NoUFfbaiJd-%x0 z7Awigjc+o7=ybDd+InzGkV_aNSUE#_q3EVnh!NIRC}Q3ImcbM)XMb;5XwxEO2`iNW ztbJVT^9B2%oTM7@<6TnKvmaNJs?a~? zld6J$C;=7GQI`Q2HShKmh>+%V#>4fdW~y`g1~QuXA)}g8K@e4)sXmCR@zg~~mEM#O z#Lr-g4dQoo$`}&gv#k`d0ZGTLD)-g?k&?*U`o~aG`sY6^4Z7~^Ud~~(-Ssu_PyAsl zd0>%K-SS62fO`M`C_C$@DAz9R8z>?@bk`6=Hw-D=jS8qBAuxnUNQX2-my}4Sf*_0_ zeSo2)2P6e_=oXNU`5x5wob#Ubt@k_MTCDLO*P8Xfv+sNFeeG-iMr0qK6l-EQzn-?V z{)|kK7=7x7AxbSv=9pBBU4cN9GEEE%j8jhq2lnBC^$Ip*C+W3P&VN=6%OP1)d@7DTvk*s2J|ujM5iQwRfTjleP# zM&vV6h7lP>_`;5bFbb_x7-w#2Vo|+K;Oy$9*@uoe|uYWZXabw z^iE$|TXcF~T32*OpHXY{hd+gL;l{Ucj}Jp`*R{-rXVeX(w4`BzL!)tkPbVg4)jxwi zcU3LpLS;llxJi3NQrJ&*#75XpYlKjkY~%-x)^p+4Ii=}E_pHBk<53AcwG2z7qP8v& z%9j-ihnyZ+WxE9-{TeAuD?S9ThlZk)p8cz&r#DcFv}iF$`Br5;mn`K&%yR`=)SJT#G(~S3u!YG;d#owKB8N zXdk~IOB9u;b+lEdxV5HrC!TfRlTK!9ZktZ(|IG7yUyim6n*u65FpVsWm;-RZzx!jS z)ivjX5`Xvd_kFiB-6wz>BW`6>L^+&ol!U##?-Rt6w`qXJo_D zTE#j{sG3HIBrUzIPuMym9h#Qz)koXgr`DG_&y~`w)Vz;zC{guFOKk0C%yt^Xbw`zn4u)eFn70WcR5|*k!R#fS6v=9QmKcev8eo z@qOEX|F2@d)MtOxsJVYln$3|6MRh7#Hm;$ zeasYLJtPr7W{#LuS#L+k$gNi+npM~B5zSib*a(01_4^2ao%KP)y`dBAa(qNq?mjd2 z4cDQ5TJUm?q_}E_ToaTdDcD7PWebo)seejAqqH9ruGcz$eM%tpM_?hXXWDf-TF=Dm zO$hfKk>s&=h)z_7qCIChn+f)iUGa0_auVCu>g(Jl%Omw6V=oc!@}CQpOWMw8*fW(c zBIt8JtF6OK{AfoLg4>`aG}x@qbZ=u9I>)C6-*nZ@`mY%mATI*u=Yiv?r4h5tZ8>1K zeDfZ9@~Z(QC3IlDHLp7XPRhg zD|b_Fiavi6HkDPpNj5cGuqizyQ?!XY)m*sAKK04=^mUE*l=iTDmck!QV$Q1I@Gk>Q zJ+!DO1;M}6`aa`NnRok)2Qzv4CVMjd`XCkqOiQDJ%i^w5eRYbjl52PoN+T(%uQEhg z9E|~n1&!LY_!QfSXmCv(f+x=r>S%8}tvW3_wgbV&Smd!28c%dZHbc2o%4XZK)5skZkJ-f z?c(!qE|`ivVANQj#@jAkGOBJby&cbJW2(B6Y}A?No2- z5h8n$A1C}dy7Cr5)VInl;HZE8t235%isx$fc_i^f6tfgvL)Do}t*6pQtPqx^Wp4x2D zFD(fjGP=G^_ld51x@4O38sSkS9eE%=c3-YCSsr6|*$%1=bPqVR^ zwO!JBJZrerZ!zn;#A80IzEo~GYrVAjc-C}@Ot2!;OC@dDY8JM1?CGI0OGp3blzuxg z;WK1D?GCAmR2MOyE0TtGhgZcHiLsm`$j^8;hlz`l{)>tE0iU?Fudf?0r6&L6Wx8Nt zA@QP#Co0WnrDakHNvDvtGy1DEJ z<2#+}50||iTd}!0EL)4YE33b9k-JrWRUto9&Y=htBl6@5^dR<>4J5hcoAw9$E#H(s zC~o1ge7}3`C04W2V>V!w%@_H{R|6S9wk+R;uF+xnYkl>=y3qW}AQiwfIJdBKu|4m< zzfG;#7SzEVVI@b4d$00Ne~<__f8}FI+%oQo_8>cQ;rCy!;6CIQ9{^^q4;U*61Fz%I zDu1=T)`XKO{}u1rGKhoa8BU-c78@u{Jtyps=UCJ@U*sMO1p45Vu~er25sXF8^c@_y z>`9-s&Hk&K!1cUM_B>#FYbuk3&+EJ0Msq)OXLl;|{^f{)P0N<-Y1H3>vsmRk0OkSz z=Gqf{-|rCk*9-0}^1rNToI6Jsvcu=AvUkw31Fv$7vHICnwTJYBEO6ABZCT|^f<%d| zm~GkR3bF2jMpboj<-%`CC=bDccp2AY9IjO5b}KU_F}{lCzA{N`g_{t{{2+|`s`2fr zyWMWxqAWrz)1hUw#`sl!-Ba;&%I?=DjpMd4Bs19;4L6fOKfrQ>#H5--y;vPYKW@sc zVl`>#63BUBHRetJmHBiU+T{4l(d89n7O(wObi#LsCKH z#H8A~!g9Sh>MXV*a^68TAS)(YF*!1<&m@R!Ms_(B90{c%FRP~beHBrxNmZ-)gkyj^ z=^$El$0{m8i8)H4=2|-SB(>E|tLX&egiX17F=aTDSFLz2agfA++u%fY`M#|C?-RzW z^8%}tC@@7tQo&#AuzGKq%0DA*g<}Hh`cubt;qeG(BIFd8GRynK|#mbQXn(K8 z!YF>A$@G%`1pQ7Hj{9~Q0lF_L?;rtobq!_!moxN%FnSNB8K8{%J+eI?>S#>vFFEO z3dW(5>o$`2#YtE0=9XU~VP?02Fue}}6IUe?@i5Wc5+JThir1It#DReeLgN)eXjvA7 znJ8{;YOP$64+lv^>f+=k;ovj65#-|H;Iq0><>G`C5!FZP(&fIw5h1CM8gh}h|GQ>$ z!>bWc(wTu&t`vT3W)>zN`3PW(P8%${T3L)$0n8dUA*l+ z(K|L@{BJkgYDu2%dtweI zSv8d>Ky7)qG{D?1`SH!yZwY~Ff-5cB4nX%*jD*awaU|s@)Y=A!3BH}*V$eLg)z$^t z(l`=svmutreg^%fffL6n1^@ms#2#nv&PVw7zrq86pB`?$>t783PJh=!M{P_m;DAZE z32aMlYQRXz{b^VRtjp4C7@ZAXfiqsO?^EMIbUgYu(Q(=!dycKj9=&J1Dv*+8y?TS$ zn?&G6M6O~qv%EagiuqbZngUXuSsrertayzWqFBZ(uY$B@?o-@g&dZFTWw8Lo?3J4Pn;F)R*0IHUlO4~{}p%<>n;v$mZ4O|u%zs_#s6KRbA z^VMv{hbqW^#V>NmCPgXL5Lx0l;vHe)K@wtlB$pzRWQUEIO0kU9f`s^3(f5ub6>&Dm zoE^=@ECD}Ch?tA`!MuBu*@<1$ZA2f~rK#&M zAmFeV^R5-lE&T~)z%8w#$j5v_A`lXhpcu{G78kLq*vG~kAJL(x2qIQN-ew+EEMscB zgLG#uQ`}(6%ZOlOCLsF8j20)hiP(^{VqnGwL8H5e6}?HIq1{vflviJQ0mPgip$Qgz z34mw zaVrr=G2?WolbVpy=%xYqRlUeOkP=HIIcc~qtpc#a`bM5NiO7qRT*Vk>b@@@NylW8| z3ZwFQ@^BMn!)tnwtS!t1=PQo0TxX5Dg*=sWm0F2HLNc~9Ba2=caA z*W~if;NHKq=BT!dN9bA&m}6N>ZJg zCSj2QhpP4wTWEt^V~rv&H?GXvYk?i3K$RQgQql41M;d;b-1Ok)D z1<)S7fD+PIQ*-owrG{LFv^Z^Sjh*_Ad1qH4m`k zq!#yx^I5Eg1gtIG0j#JdzQAb&jAe;mfIR}L;hEB01 z;8XlFq1wDms7_43VCHK;HcU^&oXL=%Noupj*_~D3l1g8^&_2X-*+)#sIIzFejz3a| z-CuMk5Xr|TlXxVKzE1=Y2l01qqH9T~z6p6#&rvNj!gih&ec# zt@Tw11l-4H{;un^rS|kQa92W{A4G65ZYiK0kbcUU!5=H&r?DTi;ithz_V53-kf#z6t=C#D+`Ce^EmZ5EJg4 zM=)rf-9mSPwl&U#(Kf`g*`ClH4R{=@4E*RNz#W{s(+EF$jZ6iZsrj<(@E{qOFDQ44 z;VpOMoj^C^`IAHRz^bZgn(|k0ep|x43uSmd~jsrMAe9)}`RmQhJtBkoVS)IST zTeRa&Fc(009I_Ic^31S@9WJtVe*RyUu(!zhQw$bxs$N+waGnLfW7bnaTZ8(+Tg>k> zk<_5Wgb8J|IY=koll+NJvY#G-Jd}2kGFgVBI4=|KLJt6t+x3+5AI1veFWM!0CqK|L z(cMxu4y>YI^iS4KR?*9n_sUvqGMdd^R4|fBT%0%Fy3DMCeq~l5A=QT(Xg1KE+Sv=F zAV`*VSQ~vG?4EeU0&mIvA@6nl(*wQPP+-$EiLOlaE>7}bp6}Hsl%MYFaq2aP3X&`W z-=|BSb4FB%zi7ij$I+sEO!p~U@1k|r-q@d{*U9Esnwz0(SB-uS!~RkB97r^n^t8~_ zAbjvs_F;82J?O2P12cfM;|0YfdER_NnCz!Zpa{iVv`wDnD9*_Ybm3K}`ozkSugjLF zsZ=UgM^H!%FrGU&4)e+sCZl&5Ke2G=}hY32xB^Bj87jbz(i&Rq54tITBgH zI&9Dv3s5j07@EBB1PlOze=&02sosspt24hrr zoyxvjbl{hLd`aeUnL2(nI|q`gcv3BtOCn0uL@P0?pd4(hI;Ew51Kb`Fy>EEzcY4b5co`0$DV$Y*nCS_Rh=LwMT}~l3m@VxHnk9LQ-m7 zKlq#LpASp?<@VW2$Gva11Av)S(cOf(@3! znB?|?OvXj-H(4bodDJ0x;%@{>e08;yL`XhM8Aj=b=0_@jpuJ1|=5xu9lA64#oT->M z*X~NcIWNg9iOrJ;U6U$p`tL*vcwOp&N&}o2bs8|`hO$l~SDmszF{?$fINpHSp~m8^ z8672I_fQA%_q0q%VA!gz3GSH)p@Kukp9DJw>PlLhrg^)wT457o@HiH-7S-Q+`6u%DBow!!BzsKNqS#gq90Sp{#RRlbCG zjOWi$cMDn+yhT^*3R+aWiB}ntOf^m@3Tl%scvfXEV*+@{%a{<2K^PbGvukNj>7y*v zG?*P(VJ0Zicxz@yc35G--9$hZz=egYN-9smP`vT8uYO!DAY}C;T?L`olj!f9(4r_3 z>D5mJQ6#E5Onzjm5-72xr)+){tBVEW@y&5RL{WrE{_?-%mHTTfz<5is`}k6(TTgIm z^{MD`!S6x?3GJ|#(b86iNyCbrSAi-c>C4ZSR9-W~KGrB< zzW=rT>Jqa%bYUlVN7YB2E|KQ+06P6fiL9cd!a0>4_F9ErK=_HxXh6I(50zMgvn17F zyg+QUnw6A#r-2nWntDp&xMP2q8fZj=v zxA0mN#>qf;@QyUa2fTgIs9p()UUA-7j%oN*gg5q@#i)|OB?ZW;RyoE|)+8Ce!VEbOg*^zhWJbl1Ti!8o zzmt`Bg4gx*+C(pk^ur`*U|)ETZ&062k8dC#nwzm2s4YFPvQDha>f^B@k3VKg7mYg% zko{TBP+(F|@~MKX!2oRhY)Df@h(?Psqr;YY7@2Dvs2pRBPklXO)3(g^Ag5G$BxLCd zHATj}?LhowaG#770oau?J-_D%o9pd#q3%qLqUb(WE9HcARcp?4YQgaY7}ym%y~ApY z?AM}}mnSQL+6bHZ06ho#vnNHTqQ#g+z)Y`tX{Mi@3CLpnM^}nr^;g}!wju-6-~T2W z&Vii!I+&!>Yi|pR$k_~c=bs|B?aJF%lEmNSM(hoL8s=4^i4w)xR7aQ~XofNX#`lBB z8jvH)6geV%h+e^mdhzCe@K{!y9qz?9I6cg}k7Q^tDp73KUr zWw9GGcy6x4JwrTl6rsGtk2odp>z}Vguej%MDc4dwqN}%d!ycxS^8n32tC$)XCWCwE zN`F(DYo^VD~jxs&lJzJ0*s&5E*f+?oBA zr*a!_pyJ&TfM_xIlwY;zs@WaOqG}FfYv`-ulBKGQg|$h^L-!=tKS~`Haqr zKl>?ll@s_>v}&8}^lBA*;`yyfoy2q6$yTctO?SLWr})>}2MvEcCXk$(Z!(S1TW-R} zOz(`-%v^C(7>B-Jk1SMyu9VC3Y|6x5jp(=kYY*6c zIMA&r1>T+)R%mgjvGHnu+4a|DvY$>2%4Fl3DNZg{!gCnl{!-quKSqWJl^G)Z1x{3d zIIrB#xeq|bXJ7bBdUKsfue===%awgG8+svrBDC;>v6v8gy@yJ9z}BU)CtZF3&t)l* zg~bL3s<#04{#E7!zsM)&Hf0`_7xrTTT_@Sb0DaDWcrN6^{l{SrL%2WaL}geu)c@)U z#jr|F0KT8#N$s#r?p%aF&58UEhTIBy4D{sQkGUMo)$@uKfYIsg@|0;=+hvPIMSVbF zK}vf-XaOr>4+OpKVtROV!T0>!J@%*FPT{SiDIcSn}IiSGoq zVw0O4XBz7dIWhX7wsJpjJa6vBY=}SmiT;n507<gbw@?>O%FMi}z=G6r}RvS_% z$b~J5BP+RKLo&b!LtPo2^ZW!9D?G=!DI z9i8(IQo*^{VKS6AEhxjxg>O3H8SwZ0!A}v4*ABwdqXY>QSPVTj3U7@t#}2c=^AQ*| zx9xmJ)J}}YE|6(S;;`0L)apRpTJ+vw_sNK`XurjXuV~MokpU>vvN;W+%ORJQ!utQS z*p7RiN;!Mkx0?2|@HG23ty-ODmqLan^|IF1O&6_&EieEnRd=E3ab!9qs_W{O*4nxe z-6TNphdAjxM>HrpN#N$=%wT7xPTlBUel~!O`tf!?!Mt^}b#w!Nie*{rM?y=Q&UQ+j z$5Op1Sq`hcDRK@}11SOyJLbKHHGMw-O`-LtrU0pfE-WRS%G9wg_{%@NUT2m#Ew^>J za3Q~yOK0 zva^EU-QtXR-pOIdq8Hzx?j?`M`$hj6&Bwj%4qk1)`w2joc44h&M*Usrmzjlo@eg#i z`~fa1J?kN9-Lk+z+XTZrQ8bG{Y}ZP-vqa}4<#hqxPD;1{!TjxLyJ&lUYW}BPwjCd_ zEk!!nqOtkw`RThn!+4}8P~kk+Ck3N<@+Z;#DG-PBzuUF`s<(H5dV9Grv6#t;&P#dM zJ^sGttm*Drw>{f7qnApzq}nW0^YL0ZjK7FDt?HJu)- zcm3ckqSf>QD6@t^t0bEEY0b#dz*Qg3md7Vaj^pn)t!rjlYkVdTa)3f6?q?x`;{{nL zXVy`fwchPp*krEAY^2^jNElb1HQ&{a_aHaW(ex9*Zxf~+q12Q~D*~F9F!~~;v^D`H zOOsd#{)nBXx8?%@WB$!f!@yGd5r0i>O%;LHZEjhPO%r3;jtUc^iH`GsMg+j|3Qz_v z2>50D=`tezr^t=zYWP)V$GT@z%A}@J-v6KbB0#@o=j-5$`Np0&ux#uZ*}~f%Glk~B zv(1GEJ#z{G7glS5fpJdh!g+V_@m63R;Zk5)xH)~yB+VXapAi3G2X}iTmMiI!Vqa;R zNZPo$sKsnx9p#>B-}AIr0t`rBtUa~9WlzG{^)8>_%LL!;=b5eUk9Ay;kga(!yDD zSl%=D$95H6df2yZ&f9h-uw>F__V{Vn8P8HSKW4yZ{QGQ7qsQM=?rt-6f9_+og$l5l z2Ef?WTG$4+B%i78()2hnVTgVMX#1K32>*~G1^Atyz9a$c$63K%OiQ|bz5MKtwObo+ zR}<`6_gVKf@TXe7Yi%TSrCDlE(|Ih@>m%(|!MBYM`e#@6PnrA19*2>locN7lQto2L zn4r(3E078Gz|f!>QbS!`omT(bnWVIZUrBE)zyHyOV(2B}W_cgog${orkuqs??LBuV z2+m5-dz;&i+rE;r(xJ?i+l%M9QsiNjV^% zQ9+uw#|$b%gRb3{DfmOV)rM4@<0vC&o8u@wNVf7})FK;qtz{i{uQ;IbNkuL~xN$2R zBUrEX+8ZUlZ;WBZ?N#QUt|YZMzNJByGWZ^0aeT9v(8#}IH}%i59_NNDBwnvFB#7fS z2%=gDU}hz5N0V;k33ZCG(UN6? zrEw%HxdtfZr4hnN^s#{XB)pJRgEd8&o5zU292&MLoPzrwnLGQe7$aK^+zti6?cjeo z@Ui;46&sLUn7AdY@fQeK>D5U71u#*;8nzAeVrAX30loX*1jA*)d%XbVJWHxw#Q&lV zO3GWnpWOC@lub8H{u}4*=cG@I0G2H8?OsS(?lXo^M^gHt?^vNW?+=hu6zhy-?eAk- z*MgsOj-?wdn?DK>p|uwtTQnM}eAoG=tDfxbjrFHxWJc@nx4PDl-lM=#4gM*p6_B17 z98req7#vB3GLbgRn@fc{>8HtkGr29Mm!|U#n>0o52omakyCttOsf_`0hX0mgjHYrn za-X;C(DL<*ee*J%$A0ANxJK=j&5?KE`#fdoM&p($+oZkaUwFd{aePr z98Jh&2pxfBYMC>JMnR$r_+#710Cep`h1|flO;Quc7X@>XP%TpWoFS!e&bJ32Zw7uK z91AoGH>ZpFOtVf}CR91l!P#DqWluV$SXOEzVl-|pVlfl=fpXon?77ja_w-0_#SP4V zs0Fv~#{=(5_UUKYJ8OQ<7(Qw_favQB$xt5Vj$AST7u!OV-jlNGArhH+lxR2v^$ccan(szaxBk6Nj)Gm3xX?dxT$lMkDNEF07HIFuO zm%}?d4-_erTN0SZ2U*Km@rm@$UD1iG$Um}<+|D^Njbtd8$VPW@_-CM(Aq2TcWRdd_ z|0FbC{b_+?%?X@9;gvC=nEB?Cle zo^a$ngMwmPE+z6vcLPH?J!z#=h>N~ka-IR{>*9|T9p?~w`5o2BE`tq~9k)mu16tJ` zi%1qoY`*7>4!g*4U56Y`$&P!Ghq@27oRfYFc%#L@Qv1QRU+VUF&(?J2#qZ&d7Fiz; zj91X#7YccgVgjlfkl!1CszVVNgYx({V^G`M7e5#nU-J*VN|Z^)E3l$u&4<(-4MLLp za;$h+1=Uk@K}u@Otfk4e9L6f}ghaYzQiG~gfbE+BfvWZpt844K#OiZws;oX%SJ9P7 z(Kk~s){}@r9VdT+JW_{o7pf({D=^epy{WF1yj6ez!s2(`tFQj9c!6Aft6)J!o%dpo zNu9oUfl57y!wU|&!CGOEm$yKtUIpnXT##0e+G5qd6Zi6qIHg$VXS zKM0-T>NE8&-8F?(Lv(kxVYvI)lw&wmLOBj=ca2bjXQ z0WU_E(HAYtq&@?e`!Ft&@pB!(Pv#+e`3@3Ksv)=q4gyaSk~{Qjf&@jL*mDT!*1Q(v zdg8XVP|Z1`?qGGV^2@ZxN;POk6GMOR3wUUSGU$0rhqPT@m^ zV~I~OfWVEJKH+y+Z>$9O@2t8Q3Q5z%qUEXe`3=+;CP~=SMvf`!V#6TA@V5^ z;`6GC`;(}CO-z;iCz-sm$f`kzec=YpC;yy{`=2iI&b&W4<(_GO+RlHdeyorzt9>k# zjD5L&;Pxl8;)hDdlF2p(S?jA8n3i%3>QXwy@zJhYrhTG27FX4p9Fya~{iHcrEC1&P zjwAzM*ND-lM2-h)=_ytC$>|)^YQ~T!FOx;|$K)rOKE>*}XH?-P9~SvuSyX^9DAk4R z_9SEL{#>yTuHPK}a{+^yQeEtB|DSiq1#ovXyb>)o^zI2B=bNd4HceG{ODY(}oU0@B7

mqR)K{8S%M)_?|sni55Eh1U_jx(zhhSW;G%nWuF}iA;p%kx#8#WTYz+>;vOU16 zv3cM0f(LK&qAbgXk6U$ZX!qLP zrc&xVpLO3T>#^qA3EsVG*q>|1bT`+KC!aI$4JlwrEzGwQ0?2>vdUv(&;@>?jvCCiM zzsnBzZ=dVDIn|erSo>xempc`{R%$p{;7$KIs)V3eVhYd>r_a|ehX_9DlsM$y3Pgcjz0>H?swt!EZhoI_7a^zacE) z%wIEjvtj6`beH|Eto}aDXH&zuJNxB^b4vT?hB`SX=5MldPK@7}bZ#h7Dy6;%;T$yh9QEeH5I@%re0RQdH{VVIFx=iQuoJkO0N8N>e4@x*`w~Ij z53f170D$6~qBnrlbm=hG{!I18M|UQCEzpoK-&^?etf5Y>_s!3)r73z%A!{7JTvVo| zg8=6)d*sn8*ES$kvZ;4+)&AHhx=ePtk#4f~bTH}fX7{WZZR&O(MxW-4DQ;@pX_S^1)Fe!i+Y##dL{E*3n&!L{1Dv`)O|0Nits>qR_0UnC zzg68L+j<6y{avG@`g=D-Hb;Fm`gKOnv^PaYXH_>HM`yJ*ua9aM91z=O7aTwVgc{)2 zrL>#N5q&-7QQDsixM~ked2$a(>}X2P6Q>C5T8a-S06N^3#M;xMErPYJ!Y#SAvr?bv z`r=Zb*}n?!(X`S@B`v6k$mm9XQA|erXlBk5_Xp|HO^r=PyKn=^FcSv5VtvV^DKEg0 zESfoGYnN*9N_o@HZh!P?;h6m9Q#-Db@?rqirfnCe?-Mz-H@ctqPWHJ%360u}=h|7x zEA1JFwQGMLZ~v)Z03z`90)L<-bs{oKQ!=OB^zuY{^uEEI;`!CJN23hI7kK#6wkrzl zcc{aL`7>@h0lcuEhTCpiBY6c6|hED`U_zQ#xNBt<}fi@5}09xb_2vpGo7_ z4n1SXKOAN({KiC0U;K@VI>^=&e)9%?#Yj%xH#+JnSa;z!Y3fMZqqHy+>gJr>*I`bu zBY4;xtTC*U2G$tU*#Pr>(FqrpLI3%k@=Ylx_@l2%=G6Qk`!7eu^ylQynbzX;0y0i; zg?|*8U%^*^Q7Q$8g!Kqx4e^JEg$jqDDx<@&sP%1)l!9Z!`i0YA6G{qDd=cS|{M?wZ zcHzvNZ`?7`unmoQM(Xep@i1iu>f*@%DBTBu(*5PP(tXkd$a=^>=LgU0F&ljU&@r+B z9m5ys)Si@|BESEu38_mh7?@LFSk@YG$w1kfXeDJ8l?xl&PH7q3(ApQ?EbdVo(aX>t zcBA*<+cxH6=GscOa(2J@cN4R*XG1B_dohf<=LUYNm zx1CjSrRfic$;j;)(@H!4=n%No^^Df?+0oauNK3X@FLCm3x4MX9ETOT&v~QhPoExL}<&K ze{7JkDIB2W%HA)FI-?=rqZRx5k*w>H_*R#|)vk8&tUq?RV=116eDfp=2X#FV7j!P` zdVfRZYAm7i#4}$2={;{xJNNB~s*7QxCFNY~FPVF&F>Of|Q0h$Osxy{EglTMNwX0tO zp3M6rUljMQW;_yRC_cuEr58W>N*Q5E89OZA=&ac)aJ}mxtst$&GsSPJ$b?9RU`sjz zmRLIRNau|Xz3_ahE;(A2XPHRn*z?Mk?Eh37%a_&0`ma&*ioKayDElMRS9_7bwxI25 zMHH}*@t;{#4Nz`eiX0Dta>M7}lpB{**s^z+>6W#6xlNWvn2Vo4c-|wx!&QmoJb?32 zV7MwNMc;%I1}i8GO;JG5<}V8KP}pv2tzR(-M@dB9!zoXK;paRjD94527e1#d$3YYg z)kof=D}MzO8L5vNb1|{Eol&u;DG!H9%dKA;FMg}ss9iA6;4nzDw-zgf=MY}2<|-A%oJObl#xW` zUOST?2#3NB`QE1{oVKOKPh@$tO=3|lktutKgS-#2EH2mNt!X7qvrvVPwvQkl|YCbHRJP3x}KgI6CP4t`sLRx!T2ytWh z0OWXk%CuNZX<8ETWF$pn+6O^4d?4WX0pTb2K+xUxwC z3kW%2hXuqQJf1Quk`6l%v(+g+e>CN4JD7j&H>F{V2|2hq)l$&Z4y12`Khw96ny7#z z7;cUufg>&qx6qNw5eHE?R1+ym=lBXHG*S~)<1%GuJFW77#xWcwDK~9A<%60{J&;2X z7W?6C${=(K{n$45Y|RvX#5ac#e8ti+2cHq^usQXH(1RJo{UKAGQ~#+y2hAxiGpV^p zuSPJ5*cB~6CUs)=J2);jX3m#!{=VL1RS$!y+XA(31OHVP$fR2SO(yko0{co&i1Vy? z_*YuEtCY5E=7{q^Csl;2z<_F|v-th5hVu`DM_#x}YI6-p13Y)pZ&EKtLNEF8Za;GJ z3F)q{QYv@ZN;Iyf$#>yNETNe4Eph1d+`y;=_3*IR=Wdj%6LKm7bUv#kdq)?Sl|PUJ zlP+(HZ?{4Tmcf~EXy4n**E0Fg(SICM4_IY7&G)s{5d)n1P;uEd#@>Z#F83&xWbV)Zz*91 zbcauYL3%(#Y=u%T-^#RfK0#-OxsneYi!X|&qN6*~LtI(4c?R$SV!a2NyTce>O>BcZ zpzxaJ?Fdhk4xW7Lbzz}3%^_ZhA**`A8*#U020__X6W#k*5ytv~kp-?2ZFj#|I*(EOumM0|5ii2m^DCK(%lU3HO3T+I z%nF2p2F6!3i}oBPs#m_KE{jUw0<+~!zh}!odVgIDwr6~2{!w|p9&ldV7{`0)>f8f3 zpU41I{e4be{CCd*T)q2O&dv`M_vzkQKg9mcjhD!+W_TUEN-MW)B;mK3sl3cBu{;7P zc7?otkBRe>BzRiij1kwKKc#{dzPx8LJvB!Dz~Oys0hl$E^cvPJ;UQI)as*19tK89% zi;-E-3U64TOIMGWjLtF^s;JZzFYxn+$+A0MKv9z^^h)4OYS`IuL-vKjJD0lo?{S0k zN8y8q6^HwA<4xP=k-pIT%@4t5yf69gcc;Ex?0x^8l$VORJ_?$zF!@KMXT!^TCm+6U;}R|da7+5ybt%p;2xKA)i@{0C4@yGCghG8C1bak(a6U(Lav6|v%@Vw3 zuU}S3boe!WwCwPU`dE$hEB=U!l-B<;-|>iZ>l8V|xE0=U{_$e~=hiav{LRMzmMxEt za}{UI-7gr@!vN%Y;YXdw(?*1{>N-A(ny0DV#Xk*SSGPE&I)^qeE zM^-s{5|ZVIhH}%#c@DtYIUJ=6d4L(gg7R#{YN2B#VzR(72=P*F+7M=+Tw{z9mEAa( z<@<5j5s-@w;Ovr3oj2>*F>q;AkQ#@xn@EX8u`5VWms(>4Yfzf1D%jM9amCjp zphQil)C)IdVD|Af;f0$!m4}$dp-VT4H?;XdEJOUf%HqfSv1UYZ%2G|pB*rYaogWl& zdQu7Y+DT zW`%dGqr`L?$#-JOD-0TwIv%jhvh56zZ(rMy;hCjCp4NNLk#3)&&vhFaqCDa}2_wgO zW=GKY-H-$W0T)g^?fsVqbuOG{+Qy5c;?TuJ1A_&_M-zvbz}!n0EY-pSw#zOH^cbFb zrm5}E#6G=X)sT6n+?arFQfn+lH$7^+xkEs_&9Orsbx6N+FX~WkM}~QpA?nZu?PuCp z0Y9uo`{^_WfVSt5r=^~<6x;XF(yX%t(C4o+BU22-dQ0)3&%-kXdrHk+?8-7Z6ATVN zl}-U?$#sRxsj`vYo){OGx0zoc?1w!)?Jg)5_Q9T>98aG}G3`bOdXwF>$rC$joNX2d z%|o(nxFZ_rk>a@=A(ctM3b@;+FE^?e}7S~0crP#Zt6bt&2 zxA>$NlAlSG@Qt>;eB3;Nrn=mvA6&IpS$Eb?{>zA3vTsETJ={zph z;hB?MHUvHCmM)#_HoTEKT+Op+4RW)hqni}E?1(%J%CM4r`8Nh~`w#r;5JW@NL|k5AcddzH{A^s!*S# zOs$vIwY|o{nRLuHDm|0v%EJK5mCJT?Q>NFWfkueUY^o5$r}VhRK6zJ{XjQ zC_92r*%yvp=W2eqvlunV+3bvVg*!zats%u8`MP%0s`_el)XMw5?WooErRk`>>nqt& ztK%Equ}nM%-MO*opaZ30w=r07kcGZYtqxu+(;Cp$Jr7VimK)~fz`VoA)-{M0uU27e z4Fd8_5f@-ie0dX7o83PXz^m)1{dvjo*T+ER3FZ*6?soRl3r}s4w0k~*JM#c0cIm)8 zkiunHy`%&#EsWXktBQ1y8t3%F>Gox4;ktRDnvJGX)lSu6W-OcQg?`ltr__TPl>eQn zIQ546Q+yF-EElF|9io8xqtmE4j*DD$ZB#(s5f@Tc>x>7T&2kZnHeZNrKP(%s$N-Q683sKhykmQKkdAl)I|4I%;}ozh5y z(jeU+-JEZu-hAhO|NG9|xifc0XJ2uzz1FjySW9Oiu$;7szKeNBas5H_zKi>g^Q1tt z!u{2(rlHKcLrPcgMsGf9Xx@#Ar3BONdM5?dElCFj$L;e@;5oia z9JnyTg9W-<$AblqTeAZR=G)9**!mJw>$(LOw%-BsJb>w6LkVCl9WbMVnowByc}v1k z&jrb!~olQn&V| z9u?WUd`P;pb$sOe(Fz^mQ>AXu<`2bgq0L9iS%vURw9(YQKG?%5#^vzV*h!SW+(@c# zC3E4;l#H9<@l}kI;h!iQN5FqmG5!c&tzujQkFNN=2)+@!meT0u%C_O5i=S6IInXG9 zN8gnd8HQ3fWc1*T^R=|FVQi$4Qup)G1H{eqFz0I-;afH`SX|`tsKXOsVLJOJcrcxP z7z9f0V)- z#WTimxlAXs1NcYqcb{-`7m}@06v;kD0??$Kp;M_)*d#+ zNFf2h78=*wPF0c_^h~jKkq^=q8`~)nWH1Wrrn7Bj*Ik7$-W+bUD-Yc zK2g~|0RE|peLB2=vi&=FH)Z=wcnpkF)II^RcA#GZMSb^In|`Kf{>fRKF+ECg zJu3gmeDtExxtTt_GW9If@sf^;4`DYbI_j0!b9oWYG}_9k#6PuVA0=O=*}gO$P>ToBf0ho zOaytuD1KG8q41YHrxIs=<#pE0G>-JJe5YjRviL#1(KYrN$NHs>=%q}bl5N0+a6=TU};0co~3Rh z!=7bnlff3Hm&%0a=o5JM>2E)NInX|AZTMZwKy=|38ZbN-IyF6(mYl2@7Eef$LWhp5 zs0v_wZj|Z}k+r4~4jao`-hq@Jf=dO$5sy@W$8PP2qUJsoVputdTeV__>D7rN%ki@+;094@UBzDCab z)q-61=urwq?gbGb`ELwdtFf4uIbz_20z&_kp%JfuHQmXp7NMULXT~EItmR;PEUtiH zVd?@LuEbzDSynpmL#%fpdI}x#)YaG(alziQr1YObqKw{OvQsB%BCv=-dzT-9OCYlZ9ulntW=A>%C_@fU=*=f+738 zG$3!MpLVy%l)hq}emdOjdrDT7I&3S`fU1zFb~yM1i6u>TBPaiLE_L-}s`>b_x+z@qm# zrg)&3J?5-EF`UIi8`NppD({iCoB@TGaw5P|au>JC#iB4bJr?O zP0n11Yv%OdfM$+8Fq>Z9z2nA>RWOAvtYm2u3F;GBbox>G6?p0|!D-4SG%+88(==AV z)QMPiu|0gUWekoyF*6~vYAg4tqp&yW4&k(jb3Cx#O6m%0-Gp#s^TjiYXyJsEP+Add zvB?flSy5@J1V2+BXU_SC{mHk8ZE+9w;3WIbgN`cOh3$?Kqc1zJWO6@-CWKVOgem3> zYf5sZT~=IuMI*#3vgc{YHCoI*_Jsm9OnF5j~z` zgqhX~Rm%w%N3YC<3LAea|N z$WIY5CN8*?x-z3DS$2@-kcm2z?huc9f%1@yS^%pqqQ^jXkKyo93`6iS&R72;#+*yc zxAMCDTJEy?!Nh9uaYendUopSRj$7y8W1jG8abTVZYuRC+h-*=l+_vOgXIT);o*BSe zN>cgPCEI~EF=;!KW}xa_J7BXzRtI5u=1q$BJ(g_s5k$IdzH*iFG3`LZ7z3dWps34#YwA5fR8&+>AU3n;lX{u$;zrUTpxpzHTuEe z1gwv-Nqj(GLi<$%8(*c4U1Sw-ym>~5WEZ5WLN-9MQFQfhHatVh=q{;pC}=K`a-dSj zDzdQ9jhf)w2P06bOA1+7>IMwUJ8k2mtQ)qI7*|Edth6dVmm9W@WWOS~3r9z}-@Ahs z^N91#JFf%Ipjmqi_QCmcHIZRR5c~~uE^GrnXAH;}s*2gjmYoU7P;`DcG8Sx0<$*qe z5@INQ?3nWut0`dvP1cUe198MNL|pcmE5}>bh*n-j)eamR(o1W053t0`V=hygVRJq1 z7{}p>OLFfJmFxf5(J0+dV)Zuw8Txr1b|ekJ7n$4qo0c;ykIbx^%t`VNd90bBNnY^B zkO7)+(mNL5^bU3U>cB~QusKe{>&e0pZ{Nv@5bwCjvyhwU$*7PynH?$cTt@~yzhoLF zfy@hP2k($CbnKaeEDp=TPFMl~wZcmUI0A{aavf}Fp0FX5CLle+TdX(6?sjtIK4H?^K2@96fZmdI)Lkd}O~e(gR0{hr2t z`|;`U6={=y$0;p;-|*yarv^Zu-xwwSCGE}?to3f628w2n5*OL{m?$2!;S??|e))pdP{qq` zDK2D)*5}@V*^&b0KBW)#!Q+kz9ErGd_YMiU>yI5R($^#%Ei%{iOEp+6Vdpm;0G5D1 z@WJ+o#*~w$z#diXp{I(1M?uO}vXA9VenX6%qL@T>dIL4yxQiD;xV%mmhvf`GH^-DS zd5$kqUIn+roYOAZ(HJURQ!WMJ7|CAGbv(s}-8>&q+>&5ZE7gX)(Z#l9Xp{Vmi7g+asrp%rRyXbkB~sKIU~D4%35YVp zDaOn(ywOLy)iTG9kuW(rx-l~qy^H`zIm9rpd@Z_ePNnKkl zO<)24YwtVuX?-XTfGpwI2Jh$IcZFl+qHw`YivK`ey@^OQd878f;I*HiGwy&dCMuUS$WG& z(K~TmBKGpBHcV*?s(2GS9+#KFz~)y)q8P@o`()Hnkwozsl_dn2tWu?auEOFf)8ZfJ zmC~6$V0x}NCDmuBjUM9|j!VHrp!79%I(6Jcn@5REvLtyND^FN4UDgV(_#|(T)`h%S zBj%E|_(Sw1Lh*ideaN^?-X67!M6rURm{dt-6?vXq%(tK_-qA{>FEj(v(-!}R6BpS} zp{a&YOPGd{V|UDc2pydoKmJIn2IJQyxC+WHbw*R zOFNj<_9RKmlM#*eCaH8$1D@jwzwoanr8E#4qUXJnN?qdI=y8iw%cMk7m#2Alq~1Ck zGf~Z#z)tu2SA~p1RWFAWp&7>$iZHg#{g_*ZQJDN?VIEZd@@ZZsb(4Sic26+5?%KmG8nax%LZB>^a9HvWq!D3lf?4 zZshHs97p8uvmFoQ?56{T3H(X#HY0gM4BlLuUlfg$E{R7c;+m6>ck}k?Z|-d>D4oUn z_@bf_HT5V`(xivqnh^F9DhH;AhnVO_b47&XbFW2LM2FLvurfgKh+g(iN_OFMyGB4n6rxhHD_HlsDxhw1r-9ck=y@LCa|oZv`p3)nvWU%$tHs|M@-zAbVqpH3zSD> z+yXIm5&Z@xdkja9k{NoA^JL5{OA*2 zeU9i8VST&U6LEct*b^>&&mLjLgw)b8lZP7&$6?t(ONj0U?W2k=0TsaZjF{x*=`ns@InON~;noeLB?Z6SHF!dm~Z_)i}-Uy%m{) zQD3!~)WmuR?tJL=O(W@5qF(2BbY4NFHWw6_QhSdc8ynkzr{}E8{Ud>1lNa0@j6rY%3<-7f|NHYjDK1MZKB zuNQma0nd*)0sY$H|kV9wAvXqXJ;>h=!s1FV_vES#f4tVqVJw&Olc zjqhwwuXoyPX6zMd-G|sK62}F)v#6EZx3fU(BZXrN?HO&|`iNXs!+itM`sX97StP+_ zm~({AZrLmfZW4gAJSyi3sXjB;6KjrRT}ZeaxQi$g~~0YoJSfVl3)4I#5OD|^_JkaVp3%k1{ls-8<$xs zGKmBEEiHq7w!Fd&^}QQb!+hDP5w`d13rCt>@3}bJ<#f&jY}Di|g!oWg zwr_}S3}s^g)?q4i8RS48!ec@NNI59!bv7zsAdVn310_X)xB^WHpMdrJ0XOdeVKrou zZ12`?{O3E$lQkp>MfAYA1Z{NZa7aNpE}tD^WJ6JJa5kX6w{*uX(lGo>i7Xh(B{T~r za0$znh`eRJR7ahF1b5n_1hS&Kff-vlvD;sv<`eF<55Hanp+-T@nF7~28$%Zb22h`BX|_?A&x1?0 z4Y#a?uv^Sa$cFa@2Y48~5kW;HLmzmbu%A%g*xoqlO*XQOSYR^Y8t4^r`$s~2 zKmyloANKMy;MkTYoZO!T3AhNr{(msaeAvIBtKC?tvpZof=V@C~AQ#DUKI|E1WYKK0fuFsxWGcW*dIEVCWC@u(gUb8@9^o;BcAt|%4_w0&Z$;n%uW={g zew$tdU;&OYzUJ!61bLA!za)f)9f*ZPKc5SPXM(Rum!A@9fOcA)$qFn~1t5K`&WQ9l z1t7pg%iQM7QSeL^1LT1^hLhyuyGf_o^7$CQKmq_Aze)SBrUWxA|3Gq2Fv|!z&MK3?}O~2i76*Vfjr9CQ$QI7 zhw?C&$zGc?*^d}Gg)vF34GuY@;QK>lLySN1xbZL%f_DT_zW`YMvlodO*_u>R5QP1+ zm%y5GKZbFJ?}b8AX2&(^S|S&W_-H0cqV*bUO@k8}c!APylQhExm64)-D4-Rs+_sBL z$Ve#N#)ursR2$@Hg-o9J9Shkb2%1C$4o! zHJChTD!?6iO}KxIYpApF(UEpP3+IQfM)V{3{^wl#9gP@ApnmfHbxuU44J9ekhfKJ~ zxsn=f#>m#Vs##g#e#N-F+MHp27r3ki#zYTU9{(mZ$gj&_3zGn$@g3My&D;?hjeqWD zTds!ub^jE#{M^k#W!yqy+@}2LbFVsj8Y(+#@qwVm3`wm!LCddNxKqshCNvx)e&bWS zF^thwu9bT-k=z|$qEYmC^*lgIt+A2$68JO;*?K#A^x)!FZZs8?1R*_qu)d$EP{L!q zZ{VrdrYH$ydZ%EaH>-sB6bWItR!lo;Ant%unOrRfSEeD5xyAc^;OJ9H$7zmfTM<1x*CWz~emYYC&+re$z#9=sJOj!B!Qp)?gD(`(6 z(=%*66iyJRXpCBwVWogf`L9yq3nYx`!QmnYaN*KxPbPkTfqcYNr7ajKO%R~NSek1u z9N@(CBJbZb6F_m-Yz_c4Am}+!bsXr5&@~A}9+(c%!+fbx>Y%4ne%fBrEll_hZ5zJS zRIdmw17l{MUz^~xi$B5=L7v%#n zOl#8@afpB~s3of1-RuIu{RRXc&K(ZaE5!EOfpQqvj$r#BcPY;Hi5{e4-j5GuJ6#W@ zJhDTW{3X-3t8#v0Z^G?mJ!t5OQ0WKo4ca%iid(F0ql#OcZD_eq$O-MYBh)D4b2QW_ z^YizpL!fiXKmu+r)Vm|vLhbwJubln@-=OD&)&3w)BG+J0H{lv&Qm*t_X;TF~WS~R% z-aDf0N2Tbcapg}y@4JO@wna-dK!A6ZMQ@??68)Q+UJYn2z(wL4jh=?_2~Q(pdoJO8 z(4r;kkl8Epb~aRs0egma2A;;iMYTWVUITzMB^7`bDF9({4EWd;79S#n^ElddvIt^h6QW1;ro|0autUYZx9H4bWm~TFAEI&oa=c{G<7xX5)crM@E-6O$uXcvARACt**$O4>SHbJc3E|-He z5Iskz7Aa2#+9W^zbEIm|imzbLM>CMgjMh^WYYxyAUUg0R&HWjp@|y=UEfqHyGY;0~ z6)xOc9`c>sTln&(TwCh$c$`+Wxm`RZygOHS3kE;}Ax1O7=_p1cz~d-FF@#Ht~%xfEzYO={w zu*jQTLVk+su<%jt5`o-UqPM+8_rtk)tl)D=!&ClU84?o5TXsAffSe1GVcO3U?(WaE z&uNAOspc{xfSmHIJ%Xrnc11NIo-*!umluMDx-x?1{JeQx8BTjAUzQ7iG9$e2pB7fM zyuNYSu>(UXu7V~dx|$KMq;_m~3M&u^S;8Yg*3^(IZvMoIi||i>^3pt-MWP)L&S5GH3K>Nl!F5wYoI|JbZ#ye!;d|a-2lUGompCS`6 zNCLOnL&y^zGcpK2w;2n>mD`L0g3i;`p5zpc1l|u3L-#~J^_e`GM1<6UGG8~LUg_>c zOqzo-qrACJO(&m%jr_gEP6sAyAPW&U_fB&snzd%44{YKn1ZkdD^|49hxe}^Ry z{Mn;&gmBsNERf{BFg=qSc^1mASrw2&hP#`;fhpI`c&O!!6@;%~N7@TG8cy8kNwVw@?34&lo zE((U)&qm~8cRvs``+klU$>1RBzidj{(sjTeiDB>cW*LVxbMYKbfh5wEbb(Vm_CSD3 z{Ov)_UtD)a&J-?N)`n%MVb+FA=zQLWcc^&Y1|;-U(>a=2K(3Gp`8y^lmmoIaABhR0 zp$Nk379?dS)v+I6Op1iCN6HB%Ea~||_##~d`Ija`Aq z%Kr3o@@pP4b7-1;nvBD=gL;R)uM_bgewvd#dq*Aox?|cq`?Aiu`|J2=BKCWo?vK{F z(!|pU>QufMG_}ve27WPrxC;sn4>~b)t_3?K^3R1rjq=Y0a9dOkD41?pI$?Q$0tTvd zpcFc^9Ny~wVDMF1%~F4x5T-fRAeZ3&XZ^?{pR@r%ip~q1G>PsD>OrHWYqG(G#Oy=2Il|LhJM=pirJ%4jbjc&sTw641sgThpVd*KO51_AMoR_^5 zx@k3LF45p-W=%$YL>y#H^R_Qo?C(kr8uN4FRc9FN6?|#WznB)`bbqkWq51WZOPbx1 z7sYyzphQ<4;<{9tZCarNLajT}6~@Fzy1jwVS^w?iJVnUy57iH^mM6KU3UV z+>zJ>bUKX1xhBGNPE7i;z1qr8G%HhLUE;0tOjzhdn5GnZLCT`>X}Pq)?-lp5C-j z{``19po1_mWv>`2TQRu@$96Ni`^6e*9%9qA5jq^E;w%$lI=1-!Y!fOv6-IsaUS{QQ+LgYsZ1Fo;CLDA@j6(7u zQL$mQRka?=%3W_mUfyHpDZ{EIt)*Wq`TRKQC4HHDZN}#l*0QJeO}^V?FH-Go+CQO8 z_`FcCPj6lQr=SWzyrXwjfv^lveaE8`DEYZeF51G$qr}RojOl8daLSnl#}Ac_3Xj#4 z7YdJMlqCwBG4c_aTy#8AM(>$Q>!X^6#GmA|CRTd&mBvTp_RCG-_H8kys!zS>!`A{_ z=Xmk&3#`dZNtkAor_}pSwOj>fZx-?}esO)rqb*eNkvUaKqn?cK3y!6fDYB2d<8Q-9 z*O|N{Mi;a%$@8H($9C~P%A4!)K60B($|8&nk(CyGwgs&Il_HEhTKwUa74dI1T{%oY z#f!;KLE^=vrZ^ZqLn@85PVr5lSvwr@H~y7SozoToYRCP8+8LuNFa#Z|TxYm^(|Gm# zs+dvz_`yPFuKX{I21=X6$`d;E$)mgRdj-e$@)ej)v^{*e(M+2dRkB#qfeZm6CKBc3 z^vaO<$C~^RmH2&Yn$^*jbn$G3)_A5bnPw!X@cUdfs}n0_{_Gmi3E&oivKgs9Y^1kGXCLgNyA!^zd zc)_ot_c3T0%DIcJ@-Ss8T8cd7hc=di95P^(AJ*YYLMFk(DPqh8Gvh_*_UC}b@xqGrkxr?xw!FdA1O1Uw!Y#jA@hDtWm?}=v30?r7Bf_Ob?ENQ zyOH*V2s2b;HM96y7+GD!M7@BQ!tMe^%oqDE(}YrS}>>UsuxrS_R%w&>(ssO zD=hZ%?VBj}it9TozK-sTDxQ@Ae#Ub^hyb_bTTMKf$JD@4>%M&EOu$9IT;!<57f>Qx zQlP_^SR$v(mT&d&@w?(@g}*8BA3Z=1(Fb}6qTy*)?_Da_k3OJ8@_`arzuuF;_#Z1N zAn4Z}p!|;i@055kc|iumDtaqJw_7V++C`$|L>HrATK)vS|)b7}L$6N~WS6c=Zi5%aN) z_h*|?@u?K)1J;s181Vvf^DX@XS8Z>xuNArr(k}R4-LwrPPZUeKbqthGymbXMZ#ch3 z>D=g)WaeHzE`iFNH0Z)&wpz4rG)iDHCyBb5d6%MmSmU#jYNC8K+Ho=ik^nDBBr{+z z9)e=Nan*tzI8@+@<%RtUW z>?==khQf)Cu7t`7i*Ae5iHNRUjt9xthN5|m6RZ*q&E3KRL_V$Y<81nf6l<%)q6$r( z6k-OxxZ>jMf{>J{an`IgODn^F@2Kohn=WTi*Ho@xRbr|lJ-h;A>O5?c?=?weY^XnZ z7Z2p~XFg;3v|Qw(B@!?#Y*nZuk~l3l&Yti3@YB0u1BG3As~YWwxM}Zk@`7(NyFh}# z_(>jnWIgWIrB-2~EvF0r7QNv-~fA{w8UdLJ?k8jsAcelzV^{67Db%8DZQ6v+xIwW~6I8#-N6bo1a7H2aid{5EwO zrsbJ7{c+q?+fB@+0yrdYfbZg_{Zw*J5xD35RC&&OoKN$N(vEi`7DN-w}miQ!Z6{P*L(3ScV-ne>hb-=XHID4Ke(x)%Q26DU4tOPV0 z0;b8v_wr9MKXn(KXnDp6v$-@Cs}!)u)!!daE+&ym_petPf2{RXV@`Ezt@yXFO7s#? zjkvnmsE?Uq5bFR(cgB9xeKSLAdlCfyX8RO?c=%;tujLCYZ4ds_(iYI5+VS9S92Nly z2mD)ZS}mEEG~=0B^BOW3#5q9FfebO)6z|H2aW;q-O`Vv2OjAY%2koP!o@DzNZ=+=U zHgBU+`xUR3VtY65PBxHpU`wn%hMA>h5uVKz)78woHkzc7`?kx0x85R68Uh$apgO-c zw_6ffw=`Q4*|$7fEm_SGm#$ka*;KKCsTPX)i>}0|yr|hAT(nN!Ss58VG>p+7kI*Cv z7R`A@HQHbDUMB>eS)AjbL30;%h{X#RS%}p~hNYHWc;&}l@k9{u&P(Nnbb)vy^DEwT zd0LjD{ZjAWF&Q`VTQ%A(ahKlX6b17#H$q$fyvKP|k-&BF%aN=?5GNWxG5xTne7i0& zv1O86yB9J37!Y{ymbWw;B!-r1IjP=mON>5jACJI+Mqn{MJQ>ynPdsM1qmdiamCZYC znWhMN81X+mA$J098;=`c(z3);ducf>IfkF&I$V`lAj3%F_o4abWz zz{SfjI-9umBIU@kdQ>#hj9@^=qAb^4c)-chQ1(bW<>_!^%9;zWU7kD3fQaS1!VxG% zj5jk^NKr-O*^-4)c0L80$1{X6aIUG}AC4*ru(n!vo>(Jwp7QVd4{W%?Z)|??e`doW zm*4^p$fa_@2hgQQft}fvWanSV07tYZ{q( zHxbG6YU`;=m+C=v^TLl~u0Maz&)?ZR=;FP7)x_SQUS^P}0&TQ~rJrqoSYLG%D_cMD zDzz+EMK;PUfAd)x*{E<>V^IXRi|IP}^AOW@;LohQ&4-E{QXX`p+tKsk&i6)VV&?si zrShVvyoona6pf@ypRn!|WQj&bsgsBh3WhZJGP?EOvQbx(QD5Y|p1 zwzkt}u-WlKSV0>tkO~Oww2_ewaLWogr9ol=sf@Ggi{!`{+OJS8QSxk}U1g`yEba32 z6%lF3BAro3Llg_C(j%%K6(ej?=i%qQi}r}Nm3-#U>+6g%nyz?8B^G`R@ARCzhRjKX zrv}mKid#V8Z=U^i&FL$s;nvp{$ygT;SRx*EYLIG>H4-;VC0~zMPf7Gws+nMGlFGRr zbV`xX9o)ljJtCC*p^XHNNQY8ADA7^!;`c9fS3X96i5^n0m25_>L%Vpka)0G(4@tD0 zeDi|~!<#76G8KTkQE9pqK%Jf?dI~8sL%i4>@eTue26~=Sx=|TL ziTX8?U(m12#OzD?U|w*AKGPfz%!$Nx!TFg?Z<+6Y0lcG)Ytv~L=3YIu?=SexZ>K&9 z4JiT1^y{T-nTY>5>-+2mYU%_qDP2L~*K0U5`#adnUI3$L1SdxusE8UvNX;b+4hEuP z9{mbD{OlfkdLtomve_cZon?d{8p2qRofMJtC;*|88o1K+PV<$fwWO9mwJ#VYG@bD@ zTQGbHzAu4sCVMBm60xtD(Mc0sP6&HdFg{LeP*OkGkzAP!ID_+^aU$zDyHr{~$dRVc zhS4oo^feIYrqlm2{rAIwzb)LToL9)5M~o|shEgEl;2D?>Ss=7VQ&y`nY6*gCly#2Z zsln)@;7--2qGcFx=n-F&MH+mF9)FhO^=NhZ4h2yw(*gH=@N-!e>WK3RR%aL?j1rO; zOno0TeWDHt;-TF8IaPpss8|E0=C7owD#?WhoLl1t4z3YuvdVyDD)8m*$?=6E;HGNr zNLCPZQsH)MnPpjrRpRvRF_{`fmhPfa^X1Ak#(50tI+qO)xAw7uI*8l}5;G9hG3UH2^8gYYX(QHC6y+zB_c zSV?{nv!;+%QwVeTmY6O@&L!&Ci6S4#6PA(|O{4Hjs?&DGcRff`V|tAQP$2L68b&HUL7EHPCDwU>y38$kfbpZ};5e)PZP&#x;1 zAIpB=V+q&W>Z5OQnbj-L13&PH7c!zM%|kXo%8Sg4GMqHbs<15i5nb((8e57y@AHuz z>5mixSiB4))zUNMHUw2-Bc}?fL9T?;S{B{8>3)Fiqt4>71wjs9u7U5j09Uc;N7Ej6 z-Vcqd+stG@Wx}gRpOQL)D(NJW;yElDyvD_=k?VecK*gfU0za2Y2BL6;kV2W_HbOh% z<+rB7yO4cBG(ce?yXXX-)&_9N&hmtQ$+c7a+iCpqQiC$dElFt%qgn z{jyoEbX%xb~DaoMnwm$-McyRk0?qrB`M zmWa50c+8PKkz*9zjM_wYL$cw&v7iR@0Oh-SbGLr&2sX>;&Aguvo(IMy+Q!R2DuCm>0rtUW}7`@eJnsN~id)R(r#2>(NR>oB&A2x>alaqBRF z40>QTp*)x{2*e#oM2HMr2gqtKhxf;8Cq@-VeN4E;Y25)Zd$y>EoTQ!h$dH%eWSp*@ zlPJ0H%aq{{2^An|uN|1X^a)J~*CD*dF9Jsb9c+MdyZ`gEYHuNR1DB7(smv~!;Y+uAIv~5|0>}V$Pe%<5*gHXL_+b-A9#WLT-Sb!8sP+> zM$%o?DnT9a4ZZ$QFdWX03nXFL@fwODgdQK6hDeX7^7CjSAECn&S}%C>(AtCiBgcB zbP5s<--PdEW3pAai?7g}O;R%PL;<7x_E3f=Tp zrGQ+*jnyRid_ETGB+&L^>U^r@CDvJ^DH^>b!q}X+6vXJ0wq(xe69cB}{Gk0J4cOhK zdz{z-_tcl>SjAhHa#_U#iunrAlj^}+0ZM+Bm<4dExm;?kym&xxJ~t4ZPsGXAZqi>=1GrGN zU-_u-s|bJ-(nA&iSNJ0wgXrNvjSymnLF!Q&kn80$`0`fB=Avk)-g-r zM7{<}QUH-h0qU9Lq;>EUkYVstxcfshg0+(*nxt(myU&eeN96s8Tok!^59w?eHx-ox zls$A}<$8^k19i9qV?*Lqb4~c^EU1}^MqkDX8A)|W+wmVRQzP~0Ty%gh3{DeJ|P4NVBvrVLa zHm;iXSt2Kles8XIWAadU+BJtH21tf)9p;f}zi___=X0(PZ9DBR0_yG*LNlEEU0(N& zwE7DfE!C6|mFu-f!?2O){27_DOnB*V7?YKIS(Q!x7LY0!G&&D+0>0J{lViuOpq8&k zt&~X^=TtY*mCv+2f=8G2xXY_LN0C(ASiJ$A3z45Gd_sjDJt#TS6dnIz!b3e(y#$DZ zv3?NUn|yo~nuPayuAr%?)kg_&@t<-^#Y_ztx0UpveTR%B@>7g`T{`@9yEhUHCtG*@ zHs42iA)8lyQz70*eN!O*R&mo#kG<6qDS@TpMbN4l>x1&y$W?My46r-uFEI@^K3hVk zUSIiSMWZiW@Tvi;5^JXM1@;#=J)f>aL{m*yBGacjt`AIa7{BE2sQ2yV?tuC-6oK~e za~z}3b{5OWQr>2olihSxKGXDw9);*VGq&{4r%ABY^MLez<|h1N&k8A5?chfHV$CWC z=$G(86~_7jo&Kf|5{OxmSf7{gbW~z|0kJYLzEsi|=@ZuZ=3m(yzpBX}Ul{{1Le`bP zqV8DOfC=e2|HN9q*s@HQ7*G*TLuCW*>crQuGa;SPYo~(0C*2i%g|(E|b@T`Uzjkhb zL4I(GuIu7|yZm`XyL$z-1^R=OnOO^K-~0Fv*|Q4nc&je)c6oPn2NN)OYxQY>Y$85A z_*SOp33_Wx_67)awNKY~a;)0vkmp`M=#$95X6b7wye8;_b$DQX z`%+=(eAq0vbkUtv?YzfqTey?~K!v(r1;J>fUgv-45+B6)A-^%^@O}J#j+ex$ z0;5F8C%kW`dJV?AA)h3^iI%pPM}8J2ze|{}80;+hT#pH;g^)|x0r8W3`8UYi8yAsp zVtN>0F?l%@=4s85m{42Y;{xii&^BGvzhA@^iU&TSi5@2$J^jK>u%}i+{wnFF1s+PMDkqZHK={tZtrkEJy3)$DR?ij;k1GeG+U#&QDfN zH(5IQ&G-oP?(>r%8=7jHNmq!->$`~NQtN>}^wOQaKEu-ZJRs~K-|!|C;Y;2j9&?0l zf_7c3vcOwAEu#EGZ)S;tLlowg{6hw2Xtyg>wMvCSz^;dNjWJ2^F1q#Eop5J)GNbL3 znxC%vX$2mHrMsFqm0)IO5 zhjf?&al~hb22FaT8Q+#>;18{P9k=wSoTq2y(BK=M9U}NEZ7SVaxQS$%3}+ z4g!&g_M*TY0Meo5=;c=rgRm5sBHs$m3eJDV>O2sSjB@buU4Bj~v3Q>Tm7wxc`-#!p zKVUj1(kam-O2trnS$i{l1Kt(!j`+^w$?bBxj%Ktg+a1JmicTM7g_opc_RgInC_(Vu zl%x*@<(HFQLzpA=1x*CkmN~!c1-1u5*dp&mT9Kj&@-I*NLO3F21S3BR`+v33pHuqB zyaeFpvOC-yo$T!abBpX0LRxnQqrY9tP7`6rF{}4aj@;ILs|DpQ?SqI z_=^10^M!#u5QInZl>9C!Zjm_ZCGuSm-jep>v*0h`FCVfK09piUApg$h^Cz+i^1G0@ z%13wXW;!EvuZNgGX>uSddeHmxjnZ0-LHqQ&A<;(jM zA8K!v-C#p+^X}>{?mMZ!6ZN2_p!qWE=i;`yM}-g9nbXMANbJ9TSN%Tg=fp7REr_u! z>idqa%(Yf{dCT`5M;Yl08Hb1tKJ*Eb7d-ZOODRDrK`)WmS<3ioy=qJBzb;M(za%M} zt404V9J(F8qCRE0lSO1A{mFj$RYr8?YAMpC2B~1?>BH zMSb_gUT`|U4bq6%=kuaFXFh0(9E#c}^K$T_$p7PUao=|%YcS@m8)be(wu<2I(>9JB z47R;%ZMgRVY2UFCy>a_T;E!)hHXRLk`|OQZ2jp#@*hVZ~y9X|96F5dtFZc7nMUntJ z(PpM|v!&E9jQ~3;ah_(p^B3pemczck>#kAyb>#N#X)JKM&?W`&@d*M2kOVY=2ZO)n zBB;bVFD#Aa?`haT6X37~fWloDs0X@$@PB##?|>!ny_(@bfs$O49k{hGkia%25SJ+; zp{TbZ=Y@-z5iO5e5Mb2T_}A}#c>b_!HS(A6g`(W1pPwyCgu~!{6X2(@HdM0^eVgGO zAEDB5#gD-{uhWP`lNcrkkf)d!&0?X5Dg5?%C*r#*5&(O=E znI8)gu>03hOHgFk(fR7RQJYRAR*i-qcAia?L z?g3YWNvQZ89qtVKRdDMvdicxQPmyqO7#Ye_UMIHB+<}t3J5WO3wm=L;yf!#E zyL+6}Im(ZyE{Xdv-RqA(zWf;Kwrg!cZ9#)QyS{(@^_=8~U02HknBjHM4+@58sVp{M zb@+Om4H?SRR!lgW|5w?yI5l;rQ7fo7P)c%xAfiB$o5T(v+VCb6@*of74T@Ee8c_ii z@d32Tqr?o{3xW`8DUt$-sDM^MM3ILyA**FwC}>oC0K%}8jR=VfV#H)G)vD9knce*h zesgDj=YGHQo$osbI7(iF{Bm&N0bmv2X#X{_Uh>_MU%VC)J__`duo9*W_iLjJ!hf{g zer?Dh?e^f;FO|<2w_+#w^}cetuoEaGdQ?2N5YjL`NMMKgFY0J=fTmH0vt2 z1M~qJXQmatMcCUUN+N#GA9rJ>;SYV+ds0+Ed_gC8N}~X4%yDp2*$29u)4AJ&QBRhq z8hhg>81>Hb6+#xK5b`K|94Dj!mY{?i%1y>F9OSOX8OCRUySPWZWE=T_u{o9=ps|qN z6?tLR0X0?xjFcfV0SYKC*}aj>MxYDmrpDP{I$9L;Cj@TIL!camzCki(2uMERS*xRzU)ek?Dx$ zpyB}F065$4CbmiHk7&FU1Vxj`2D6vkmaoZFaDhtbRvzy#F$R7C-Kyl-7H?&9aMmGl zCdFm!F)$4G)sOR|ALwZ)F^J3vzT+N=u8{8)IoqdWh8-}m+}RxpBVZ_a)>dH-W**ht z7RB1nfm@x1|0cQ#Jj8S`Jb1G)3cjKes~2n1L|K%%B{(>mGnzVOVLB@B(K?Kcu>>`m{aHy%`ycZ6GEbw zC*!u74f~6nL{YdI?4VzBPO?yAsqn^3kPIKz6gteEejUw$%=`8y0vc0irOt4iLrjNlkg$pTO z+2fJ2M)>b=4>i%gB$TI^QZ?L|unWcepM~stb`>;o2qRuI#vVEyKvpzdS&*@&Iie`2nS4DHXyH%f7jin|| zO2-(9wkPMuTRx)6EkCHy?+b6wSHFq0lj@+p4eW>br_4*#K0W(6{29qBZ~Pls1l$98 zmDc2qGi5tu4sZ*6p1Pc{DdZr&?$k9yQbV)AI3RyL#h&8C1IR%Zdf5; zAqDf6C~*QByc8_w26A^});ct}D$|v9@EzPk9=eG!p!^K$9WY^$@^kGIznCcXp)jg6 z37fJ=Rd70qiN1;6s=Qy+rbN89Yzj*pIy18^v7>5cb7GckCQV=kwtnBgUU{CHYTA&k zoU~7MZpejwf+ws6&R_(1`1r=P4f_RWk#CXqAV#9h*1rM43N;OaNr#xT7P)3py`gvEwr`l{m2cn9lLAYIef#UQir`saTE0e zf^A3;(e0=H6#^51DO?6SQOE2r7jA?%IyG4?Xu+l%8lm6vCBkgwO9_2mI9D9K>=$B5@u2jARec5v!X4Wp3PD$}*0}1G`k((Jy zZ_pcF>Gu)_F-wpX zhzTX~UBvwX0xRSVR9NJ}zqqt(25K#u;9vCZqKi9ug+$MaQ42AR-$Sk?UM;FF8#NKH z7C+&0cy~)4vG;p-`9S7kI%k{d@`t_s#n+QuW`a?1t@5q7hLrN+`Hqw@jv{!(3}k>) zVxD+bq%4K5k+d|#m&bheg*d9DEj!Lvd=;94zP0Ph<$uX|_LVqX>|CNMi4M3ZtS%WX zhdQAl(knvOZ@kQ)XG_E#;<^0m`3Y^p>$?)#K1bVBnSWFx5cG#!Aj{(VqK*CvmcR&E z4r0K~ya}k`m~tyfkIEv07z($fPwy%rPZ9KNKZ`C3Y; zOs|<4*U@6+S5X~Be2Ka3cB9H9(0Jul5vI0ULyADJ*Tlz}T1m@(6;Y!l+8d>%ZW@WX zpp;71Dij!R~6Z*wPdNNPKA&R7Mhz>dP*as z76?gspv2gy6*d{y^Mz!XIZH!z>ZmlC7xgmPp`)sFSuzPJYSxzYR3KHF zrIb;r(_hn2f?}#I$43@8lay1PM7||UN`El$ibb1384Zf#hGc4UlEQQ+mV7n7)2H2s zj4>NU%KbzhrIb` z&=EI8L~~qQrBp?XR*=_}mNH@d(at$<%P!XWg~b0MJ>z0sRAy(w@w~C=`tRzV&Mw}0 zV_LvJD;wrS%X_={xU%7;X!-E$;<@z4db#k+E-s<}l3dyFTC}{Ei=17R4KGH^U9*cP z-I!*7k^9BUwj)%Ta0k(nq%gL$l1i~qDADCS*>Oa%u|;a2J`$)!79W+z&gQ|#_8r*Y zJormwJo0>%%Yx)e?>@%s^Hm;)-d(f5ppWtGSfzPv$C}*g5#mF~E@h7rZ$idZ$FxJ{ zP0cQ@>0_K5lRMzpxU7&q#$|hG!XBjwD{&HG`3zxCIEOI)<1}F>Ylh69I$glbnr*z# z*~ve;Q!wlDjcJK}jK^CNj43y7ZcxVLZay|n7r{(AH{S1;_+MtY>S{zEW5R-}$+F_2 z=7jwl1}=ELqOF)E)Grz##1?Gzkv5@Ge%X~L#EhcMwtny^JqBJAlJny>}Vk!UJ2mzJ`f&XF>C zEG=cjogiiO<{~_wq8*auoXzDxEUoWvJApgQnx+IzZhU)FZ8K$QGRb1{t1l53$w40~ z^?Ze?R%r^pLnIN}R`Pc-o;?*94(Sd`PsmLr!^hPLjhP&eNrkEXD?ZM=Iqsj?u#>~FIpU3nsc(;^W>x~T}*k@(BPCx!-e{(()6On*jugp zl$UR|<+i_HTwS+j?w?c*BQ+efvrx4Ug*Z{OB+ zd%=>m9rdBnM5oX~Me+4EeIU&b&QjRdrqUY_9m^VKswFVLTCT{W_o$G1H)``~vQ7j9r4I3-ja7Pm4jL_Kk& z{6}h&VQ|>Gs#%43r!$xH-aYfYkfHv4e`Geh5n+2|_QCHqo@(lv{(_F5o|mU4Pl))M z8lW5UT|@5X-SLs(D(@dw#YhmE*x95Xj(tRSe9<+DS=Kd)eT?r^cTIm{^XZrf-;@_@ z95TVQ;VW3u6l0e2O#D?H6-<3z8aKeH6NGTdsQ1owPxn}Y2b zC;SqP8~?~o_I4xWWBvk{o;^dU8GGnvZ{+wa;UE0^g~Zo zjlN@3+apDxk-W7}6nM7@ujg(~O%v?O)E{q1-TKtYl2S(1uu|5gy1Ho$PCxXbs!`?5 zscql9Z%Vvtr+mn9h)XzsC@g6P_Z2p+^^EBqSQSYGeP^A>0rW&3$_xS~O%;xk_76bP z!Xuh*yvavp4R{w-%s^%BIapaUCWIkdO*!zYB~3y1?B#%~M(^2KOHmfN0;~Jyeu{21 zX|Pt^qQN!}InbhWb~Pr>@Vit+43 zy3k0>yo&L_q-pI)gLLVq$j|H(PDPo-9hkP1@u=w^4y&{@^H2@>6R;8a#svgH5CdyS z9%`{_7Q$-CzuI9;a#&d_8pCnOfZg%_z+Sd!7pjjxgH`_KnUAvo!qRnBn+gMgkwJb6 zN}3#yv`_XHw3?50(Z-_wz{`fg5yu>8U~t4Sr+FA0xvL}g3y^WOcXVyg4%8aQ;0Ojs zQt?rZ?S$Tur`ZoLII=o`c-Q8Q?#lFmb$xN10VXz zZTbO?f$sU5i;8Ar)bXLOW5QG(EW3-+j_vky_!|$DW=(}Of(Ihm$0{E;_ zbPzSi_rMxW^U;nt@9%NNvkk!|f%U`^A^0^I+V$;r@2eR%BSyU{0 z3N`%`Vf_qq(QpJC&jZev7TaaHuY@%ssR<}nEr2v4>BL)C)M6C_A4B~KO)Ss=2VjN| z<2b~nFwhg&%l5fw^P%2s`wy(cMjq_SV4fTxo)s})#w z1ei7qjyR?c2nI(S6A_HT5l0l1h~NnfI6tpjO(c9&gTWD)B2W);eqI{R&vQ_U296xT zkt27tHCepk;myyxhV%0rVb{@BB60B|hm@qWFJ7d?#f$nK5$U|*MTOpTgUGV4G0N(Q zTA~NDqI7c!>`hTaB%e5H5fH7u0XTdA?}y z8T;B1A!sOa-P@uifSuCR}|y= z6>7lpMp84b`6U{Xm`d_LcbO#N5SzHhgCPf)E_+=6Oa)w$iYON5FE=YNOiKHRd!rLxo;6w#S|VcF}a@OzFO#JFHsZ65Zk&_l20#G)xj7*B3Vn zcp|lQt4@yl3E+uj3NCWoFNWoQBNOe){g?t)7t5JWogDKEVJX|o)LY~jLS8{V!pAkB{yVV7VtU%pi^i5)^nqP8V+^{dfoiM>p3D%+ zRBh#K1Go-4R@;DJ8g+6EnFOn`a+$W_9E-pu0$VpVYmqeW$I4+v?Hi^~C%4VOHS)Gn zX6+izF$l0&Gw4azCbKD%?*=Ss15rs60-LIOrnnKkXx66T7Pxj!)pD+wpPV7#^PiRSU*~;l5jr;SGLT`Bi1xvPO#xx zRarGZ(-(_#)PSwWNT$+amq9nHqErgV{fvjP8FdR~u;!y-JrfE_FPgP2776KL(5}3Q zX^X{e*F}JvT2w@HDY2Yxi)G^xtVC=jVtS%n=sPyV%PMMEi6n-gSSyFD-gkJ86@al* zB1u?@^rO3yl^va{ME(_s-I?BN=b|Ff^ED?ViP)Wy_bjvj%j+>%Emb>rMuy!Pq~v8! z7DV6a#*WT&XUf$zGD_i&4H2|@9X3R|nOMJ_H-in4i4TS$B4(O%av((|pzlOu?_{&h z1olA_STuYL-F)ct2IylarobWxWQkw_!HO-Yzbk{q4PN^cWsv2tCm7ZqHZ0cT zQYZ-oKD~2WfnsK-z&gQM{R;)t3yTIRT;ztq^V=YyP6q5bi-J8OKtWsVmxvE%f`4@| z74~L;ycv8r6Z$}SGx3rr)Hcunsw^C8<}l#k|0LARfJ4ptkv(dSd~|B4*+Cp?_G{ZE zSOYC_s99F(Aor78ft9U1uU^cPduh>LLd`Y+$E4s;vtO4PRUFQvOd85mN>Mt+(u4A{ Jf#Y9y`5(}5IsyOy literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/column-embeddings.json b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/column-embeddings.json new file mode 100644 index 00000000..454be240 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/column-embeddings.json @@ -0,0 +1,6 @@ +{ + "customers.id": [1, 0, 0], + "customers.name": [0, 1, 0], + "orders.id": [0, 0, 1], + "orders.buyer_ref": [0.995, 0.005, 0] +} diff --git a/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/data.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..256acb3e285b329ee3be75931767ecd47c3f8efb GIT binary patch literal 12288 zcmeI$F-yZh6bJBocL|}@a4r%u^+pTg;s?+up$MVH8o|kuv{xuJ2GSIC?1ymn>-aHT zy16+BPF}1ji0B^pKe)Tc<;Y8Z+r6vHXjW=9&5JB4)iK*+MC?>4#+YS>FryA`l4`FG zs*`_Kiyb{ayXF9wCrmz>2>}5JKmY;|fB*y_009U<00Izbp1>`4Jm064S0?FP=fy-9 zi_I*~hhY$hDh|$~P;K_q!EB-~`f=C`hpIn_RsTAQ4pq8*(8Z|G(_hcJBXYb>hgR;_ z`^U>gnP;ZNHsL?2Y`6a?beCjW#o>)9{9U|T-OYu3Ve(zRngsy?2tWV=5P$##AOHaf zKmY;|fItHU+Cq>odhGOLaEk=B+MjoPQ#=3w literal 0 HcmV?d00001 diff --git a/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/expected-links.yaml new file mode 100644 index 00000000..150d4e05 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/expected-links.yaml @@ -0,0 +1,11 @@ +expectedPks: + - table: customers + columns: [id] + - table: orders + columns: [id] +expectedLinks: + - fromTable: orders + fromColumns: [buyer_ref] + toTable: customers + toColumns: [id] + relationship: many_to_one diff --git a/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/fixture.yaml new file mode 100644 index 00000000..0132b1d3 --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/fixture.yaml @@ -0,0 +1,8 @@ +id: semantic_embedding_aliases_no_declared_constraints +name: Semantic embedding alias fixture with no declared constraints +tier: row_bearing +origin: synthetic +defaultModes: + - declared_pks_and_declared_fks_removed + - llm_disabled + - embeddings_disabled diff --git a/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/snapshot.json new file mode 100644 index 00000000..9ce50f7e --- /dev/null +++ b/packages/context/test/fixtures/relationship-benchmarks/semantic_embedding_aliases_no_declared_constraints/snapshot.json @@ -0,0 +1,67 @@ +{ + "connectionId": "semantic_embedding_aliases_no_declared_constraints", + "driver": "sqlite", + "extractedAt": "2026-05-07T00:00:00.000Z", + "scope": {}, + "metadata": {}, + "tables": [ + { + "catalog": null, + "db": null, + "name": "customers", + "kind": "table", + "comment": null, + "estimatedRows": 3, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "name", + "nativeType": "TEXT", + "normalizedType": "text", + "dimensionType": "string", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + }, + { + "catalog": null, + "db": null, + "name": "orders", + "kind": "table", + "comment": null, + "estimatedRows": 4, + "columns": [ + { + "name": "id", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + }, + { + "name": "buyer_ref", + "nativeType": "INTEGER", + "normalizedType": "integer", + "dimensionType": "number", + "nullable": false, + "primaryKey": false, + "comment": null + } + ], + "foreignKeys": [] + } + ] +} diff --git a/packages/context/tsconfig.json b/packages/context/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/context/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/context/vitest.config.ts b/packages/context/vitest.config.ts new file mode 100644 index 00000000..2339ffd3 --- /dev/null +++ b/packages/context/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + root: '.', + include: ['src/**/*.test.ts'], + }, +}); diff --git a/packages/llm/package.json b/packages/llm/package.json new file mode 100644 index 00000000..54920022 --- /dev/null +++ b/packages/llm/package.json @@ -0,0 +1,49 @@ +{ + "name": "@klo/llm", + "version": "0.0.0-private", + "description": "Canonical KLO LLM and embedding provider package", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@ai-sdk/anthropic": "3.0.71", + "@ai-sdk/google-vertex": "^4.0.112", + "ai": "^6.0.168", + "openai": "^6.25.0" + }, + "devDependencies": { + "@types/node": "^24.3.0", + "typescript": "^5.9.3", + "vitest": "^4.0.18" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/llm" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/llm/src/embedding-health.test.ts b/packages/llm/src/embedding-health.test.ts new file mode 100644 index 00000000..70e15515 --- /dev/null +++ b/packages/llm/src/embedding-health.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, it, vi } from 'vitest'; +import { runKloEmbeddingHealthCheck } from './embedding-health.js'; + +describe('KLO embedding health check', () => { + it('runs a one-shot OpenAI embedding check through the configured provider', async () => { + const createOpenAIClient = vi.fn(() => ({ + embeddings: { + create: vi.fn().mockResolvedValue({ + data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }], + }), + }, + })); + + await expect( + runKloEmbeddingHealthCheck( + { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 3, + openai: { apiKey: 'sk-openai-test' }, + }, + { deps: { createOpenAIClient } }, + ), + ).resolves.toEqual({ ok: true }); + + expect(createOpenAIClient).toHaveBeenCalledWith({ apiKey: 'sk-openai-test', baseURL: undefined }); + }); + + it('returns failed when the provider returns the wrong dimensions', async () => { + const createOpenAIClient = vi.fn(() => ({ + embeddings: { + create: vi.fn().mockResolvedValue({ + data: [{ index: 0, embedding: [0.1, 0.2] }], + }), + }, + })); + + await expect( + runKloEmbeddingHealthCheck( + { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 3, + openai: { apiKey: 'sk-openai-test' }, + }, + { deps: { createOpenAIClient } }, + ), + ).resolves.toEqual({ + ok: false, + message: 'Embedding provider openai returned vector with 2 dimensions; expected 3', + }); + }); + + it('redacts credential values from health-check failures', async () => { + const createOpenAIClient = vi.fn(() => ({ + embeddings: { + create: vi.fn(async () => { + throw new Error('401 invalid api key sk-openai-secret'); + }), + }, + })); + + await expect( + runKloEmbeddingHealthCheck( + { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 3, + openai: { apiKey: 'sk-openai-secret' }, + }, + { deps: { createOpenAIClient } }, + ), + ).resolves.toEqual({ + ok: false, + message: '401 invalid api key [redacted]', + }); + }); + + it('returns failed when the health check times out', async () => { + const createOpenAIClient = vi.fn(() => ({ + embeddings: { + create: vi.fn( + () => + new Promise<{ data: Array<{ index?: number; embedding: number[] }>; usage?: { total_tokens?: number } }>( + () => undefined, + ), + ), + }, + })); + + await expect( + runKloEmbeddingHealthCheck( + { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 3, + openai: { apiKey: 'sk-openai-test' }, + }, + { timeoutMs: 1, deps: { createOpenAIClient } }, + ), + ).resolves.toEqual({ + ok: false, + message: 'Embedding health check timed out after 1ms', + }); + }); +}); diff --git a/packages/llm/src/embedding-health.ts b/packages/llm/src/embedding-health.ts new file mode 100644 index 00000000..4662169e --- /dev/null +++ b/packages/llm/src/embedding-health.ts @@ -0,0 +1,54 @@ +import { createKloEmbeddingProvider, type KloEmbeddingProviderDeps } from './embedding-provider.js'; +import type { KloEmbeddingConfig } from './types.js'; + +export type KloEmbeddingHealthCheckResult = { ok: true } | { ok: false; message: string }; + +export interface KloEmbeddingHealthCheckOptions { + text?: string; + timeoutMs?: number; + deps?: KloEmbeddingProviderDeps; +} + +function redactHealthCheckMessage(message: string, config: KloEmbeddingConfig): string { + const secrets = [config.openai?.apiKey].filter( + (value): value is string => typeof value === 'string' && value.length > 0, + ); + return secrets.reduce((current, secret) => current.split(secret).join('[redacted]'), message); +} + +async function withTimeout(promise: Promise, timeoutMs: number): Promise { + let timeout: NodeJS.Timeout | undefined; + const timeoutPromise = new Promise((_resolve, reject) => { + timeout = setTimeout(() => reject(new Error(`Embedding health check timed out after ${timeoutMs}ms`)), timeoutMs); + }); + try { + return await Promise.race([promise, timeoutPromise]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + +export async function runKloEmbeddingHealthCheck( + config: KloEmbeddingConfig, + options: KloEmbeddingHealthCheckOptions = {}, +): Promise { + try { + const provider = createKloEmbeddingProvider(config, options.deps); + const embedding = await withTimeout( + provider.embed(options.text ?? 'KLO embedding health check'), + options.timeoutMs ?? 15_000, + ); + if (embedding.length !== config.dimensions) { + return { + ok: false, + message: `Embedding provider ${config.backend} returned vector with ${embedding.length} dimensions; expected ${config.dimensions}`, + }; + } + return { ok: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { ok: false, message: redactHealthCheckMessage(message, config) }; + } +} diff --git a/packages/llm/src/embedding-provider.test.ts b/packages/llm/src/embedding-provider.test.ts new file mode 100644 index 00000000..f6f63522 --- /dev/null +++ b/packages/llm/src/embedding-provider.test.ts @@ -0,0 +1,146 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createKloEmbeddingProvider } from './embedding-provider.js'; +import type { KloEmbeddingConfig } from './types.js'; + +describe('createKloEmbeddingProvider', () => { + it('creates deterministic embeddings with stable dimensions', async () => { + const provider = createKloEmbeddingProvider({ + backend: 'deterministic', + model: 'sha256', + dimensions: 6, + batchSize: 4, + }); + + await expect(provider.embed('Revenue policy')).resolves.toHaveLength(6); + await expect(provider.embed('Revenue policy')).resolves.toEqual(await provider.embed('Revenue policy')); + await expect(provider.embed('Revenue policy')).resolves.not.toEqual(await provider.embed('Approval policy')); + await expect(provider.embedMany(['a', 'b'])).resolves.toHaveLength(2); + expect(provider.maxBatchSize).toBe(4); + }); + + it('rejects gateway embeddings', () => { + const config = JSON.parse( + JSON.stringify({ + backend: 'gateway', + model: 'provider/text-embedding', + dimensions: 2, + gateway: { apiKey: 'gateway-key' }, // pragma: allowlist secret + }), + ) as KloEmbeddingConfig; + + expect(() => createKloEmbeddingProvider(config)).toThrow('Unsupported KLO embedding backend: gateway'); + }); + + it('uses OpenAI embeddings with configured dimensions', async () => { + const createOpenAIClient = vi.fn(() => ({ + embeddings: { + create: vi.fn().mockResolvedValue({ + data: [{ index: 0, embedding: [0.1, 0.2] }], + usage: { total_tokens: 7 }, + }), + }, + })); + + const provider = createKloEmbeddingProvider( + { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 2, + openai: { apiKey: 'openai-key', baseURL: 'https://openai.test/v1' }, // pragma: allowlist secret + }, + { createOpenAIClient }, + ); + + await expect(provider.embed('hello')).resolves.toEqual([0.1, 0.2]); + expect(createOpenAIClient).toHaveBeenCalledWith({ + apiKey: 'openai-key', // pragma: allowlist secret + baseURL: 'https://openai.test/v1', + }); + }); + + it('supports sentence-transformers pathPrefix defaults and explicit empty prefix', async () => { + const fetch = vi + .fn() + .mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.1, 0.2] }), { status: 200 })) + .mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.3, 0.4] }), { status: 200 })); + + const provider = createKloEmbeddingProvider( + { + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 2, + sentenceTransformers: { baseURL: 'https://python.test/' }, + }, + { fetch }, + ); + + await expect(provider.embed('hello')).resolves.toEqual([0.3, 0.4]); + expect(fetch).toHaveBeenNthCalledWith( + 1, + 'https://python.test/api/embeddings/compute', + expect.objectContaining({ method: 'POST' }), + ); + expect(fetch).toHaveBeenNthCalledWith( + 2, + 'https://python.test/api/embeddings/compute', + expect.objectContaining({ method: 'POST' }), + ); + + const daemonFetch = vi + .fn() + .mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.1, 0.2] }), { status: 200 })) + .mockResolvedValueOnce(new Response(JSON.stringify({ embeddings: [[0.5, 0.6]] }), { status: 200 })); + + const daemonProvider = createKloEmbeddingProvider( + { + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 2, + sentenceTransformers: { baseURL: 'https://daemon.test/base/', pathPrefix: '' }, + }, + { fetch: daemonFetch }, + ); + + await expect(daemonProvider.embedMany(['hello'])).resolves.toEqual([[0.5, 0.6]]); + expect(daemonFetch).toHaveBeenNthCalledWith( + 1, + 'https://daemon.test/base/embeddings/compute', + expect.objectContaining({ method: 'POST' }), + ); + expect(daemonFetch).toHaveBeenNthCalledWith( + 2, + 'https://daemon.test/base/embeddings/compute-bulk', + expect.objectContaining({ method: 'POST' }), + ); + }); + + it('falls back to one-shot klo-daemon inference when the local HTTP daemon is unavailable', async () => { + const fetch = vi.fn().mockRejectedValue(new TypeError('fetch failed')); + const runSentenceTransformersJson = vi + .fn() + .mockResolvedValueOnce({ embedding: [0.1, 0.2] }) + .mockResolvedValueOnce({ embeddings: [[0.3, 0.4], [0.5, 0.6]] }); + + const provider = createKloEmbeddingProvider( + { + backend: 'sentence-transformers', + model: 'all-MiniLM-L6-v2', + dimensions: 2, + sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' }, + }, + { fetch, runSentenceTransformersJson }, + ); + + await expect(provider.embedMany(['hello', 'world'])).resolves.toEqual([ + [0.3, 0.4], + [0.5, 0.6], + ]); + expect(fetch).toHaveBeenCalledTimes(1); + expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(1, 'embedding-compute', { + text: '__klo_embedding_probe__', + }); + expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(2, 'embedding-compute-bulk', { + texts: ['hello', 'world'], + }); + }); +}); diff --git a/packages/llm/src/embedding-provider.ts b/packages/llm/src/embedding-provider.ts new file mode 100644 index 00000000..804dbdbc --- /dev/null +++ b/packages/llm/src/embedding-provider.ts @@ -0,0 +1,379 @@ +import { createHash } from 'node:crypto'; +import { spawn } from 'node:child_process'; +import { join } from 'node:path'; +import OpenAI from 'openai'; +import type { KloEmbeddingConfig, KloEmbeddingProvider } from './types.js'; + +type FetchFn = typeof fetch; +type SentenceTransformersCommand = 'embedding-compute' | 'embedding-compute-bulk'; +type SentenceTransformersJsonRunner = ( + subcommand: SentenceTransformersCommand, + payload: Record, +) => Promise>; +type SentenceTransformersProcessCommand = { command: string; args: string[] }; + +export interface KloEmbeddingProviderDeps { + createOpenAIClient?: (options: { apiKey?: string; baseURL?: string }) => { + embeddings: { + create(input: { + model: string; + input: string | string[]; + dimensions: number; + encoding_format: 'float'; + }): Promise<{ data: Array<{ index?: number; embedding: number[] }>; usage?: { total_tokens?: number } }>; + }; + }; + fetch?: FetchFn; + runSentenceTransformersJson?: SentenceTransformersJsonRunner; + sentenceTransformersCommand?: string; + sentenceTransformersArgs?: string[]; + sentenceTransformersCwd?: string; + sentenceTransformersEnv?: NodeJS.ProcessEnv; +} + +const DEFAULT_BATCH_SIZE = 100; + +function deterministicVector(text: string, dimensions: number): number[] { + const digest = createHash('sha256').update(text).digest(); + return Array.from({ length: dimensions }, (_, index) => { + const byte = digest[index % digest.length]; + return Number(((byte / 255) * 2 - 1).toFixed(6)); + }); +} + +function assertNonEmptyText(text: string): void { + if (!text.trim()) { + throw new Error('Embedding text must be non-empty'); + } +} + +function assertBatchSize(texts: string[], maxBatchSize: number): void { + if (texts.length === 0) { + throw new Error('Embedding text batch must not be empty'); + } + if (texts.length > maxBatchSize) { + throw new Error(`Embedding batch size ${texts.length} exceeds maximum ${maxBatchSize}`); + } + for (const text of texts) { + assertNonEmptyText(text); + } +} + +function assertVectorDimensions(vector: number[], expected: number, backend: string): number[] { + if (!Array.isArray(vector) || vector.some((item) => typeof item !== 'number')) { + throw new Error(`Embedding provider ${backend} returned a malformed vector`); + } + if (vector.length !== expected) { + throw new Error( + `Embedding provider ${backend} returned vector with ${vector.length} dimensions; expected ${expected}`, + ); + } + return vector; +} + +function joinUrl(baseURL: string, pathPrefix: string, path: string): string { + const base = baseURL.replace(/\/+$/, ''); + const prefix = pathPrefix.replace(/^\/+|\/+$/g, ''); + const suffix = path.replace(/^\/+/, ''); + return prefix ? `${base}/${prefix}/${suffix}` : `${base}/${suffix}`; +} + +function errorText(error: unknown): string { + if (error instanceof Error) { + return error.cause + ? `${error.name}: ${error.message}; cause: ${errorText(error.cause)}` + : `${error.name}: ${error.message}`; + } + return String(error); +} + +function parseJsonObject(raw: string, subcommand: string): Record { + const parsed = JSON.parse(raw) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error(`klo-daemon ${subcommand} returned non-object JSON`); + } + return parsed as Record; +} + +function isCommandNotFound(error: unknown): boolean { + return ( + error instanceof Error && + ('code' in error || 'errno' in error) && + ((error as { code?: unknown }).code === 'ENOENT' || (error as { errno?: unknown }).errno === 'ENOENT') + ); +} + +function defaultSentenceTransformersProcessCommands(): SentenceTransformersProcessCommand[] { + const venvBin = + process.platform === 'win32' ? join('.venv', 'Scripts', 'klo-daemon.exe') : join('.venv', 'bin', 'klo-daemon'); + const repoVenvBin = + process.platform === 'win32' + ? join('klo', '.venv', 'Scripts', 'klo-daemon.exe') + : join('klo', '.venv', 'bin', 'klo-daemon'); + return [ + { command: 'klo-daemon', args: [] }, + { command: venvBin, args: [] }, + { command: repoVenvBin, args: [] }, + ]; +} + +function runSentenceTransformersProcessCommand( + options: SentenceTransformersProcessCommand & { + cwd?: string; + env?: NodeJS.ProcessEnv; + }, +): SentenceTransformersJsonRunner { + return async ( + subcommand: SentenceTransformersCommand, + payload: Record, + ): Promise> => + new Promise((resolve, reject) => { + const child = spawn(options.command, [...options.args, subcommand], { + cwd: options.cwd, + env: { ...process.env, ...options.env }, + stdio: ['pipe', 'pipe', 'pipe'], + }); + const stdout: Buffer[] = []; + const stderr: Buffer[] = []; + + child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk)); + child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk)); + child.on('error', reject); + child.on('close', (code) => { + const stdoutText = Buffer.concat(stdout).toString('utf8').trim(); + const stderrText = Buffer.concat(stderr).toString('utf8').trim(); + if (code !== 0) { + reject(new Error(`klo-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`)); + return; + } + try { + resolve(parseJsonObject(stdoutText, subcommand)); + } catch (error) { + reject(error); + } + }); + child.stdin.end(`${JSON.stringify(payload)}\n`); + }); +} + +function runSentenceTransformersProcessJson(options: { + commands: SentenceTransformersProcessCommand[]; + cwd?: string; + env?: NodeJS.ProcessEnv; +}): SentenceTransformersJsonRunner { + return async ( + subcommand: SentenceTransformersCommand, + payload: Record, + ): Promise> => { + const errors: string[] = []; + for (const command of options.commands) { + try { + return await runSentenceTransformersProcessCommand({ + ...command, + cwd: options.cwd, + env: options.env, + })(subcommand, payload); + } catch (error) { + errors.push(`${command.command}: ${errorText(error)}`); + if (!isCommandNotFound(error)) { + break; + } + } + } + throw new Error(`klo-daemon ${subcommand} failed: ${errors.join('; ')}`); + }; +} + +class DeterministicEmbeddingProvider implements KloEmbeddingProvider { + readonly maxBatchSize: number; + + constructor(readonly dimensions: number, batchSize = DEFAULT_BATCH_SIZE) { + this.maxBatchSize = batchSize; + } + + async embed(text: string): Promise { + assertNonEmptyText(text); + return deterministicVector(text, this.dimensions); + } + + async embedMany(texts: string[]): Promise { + assertBatchSize(texts, this.maxBatchSize); + return texts.map((text) => deterministicVector(text, this.dimensions)); + } +} + +class OpenAIEmbeddingProvider implements KloEmbeddingProvider { + readonly dimensions: number; + readonly maxBatchSize: number; + private readonly client: ReturnType>; + + constructor( + private readonly config: KloEmbeddingConfig, + deps: KloEmbeddingProviderDeps, + ) { + this.dimensions = config.dimensions; + this.maxBatchSize = config.batchSize ?? DEFAULT_BATCH_SIZE; + if (!config.openai?.apiKey) { + throw new Error('openai.apiKey is required when KLO embedding backend is openai'); + } + this.client = deps.createOpenAIClient + ? deps.createOpenAIClient({ apiKey: config.openai.apiKey, baseURL: config.openai.baseURL }) + : new OpenAI({ + apiKey: config.openai.apiKey, + ...(config.openai.baseURL ? { baseURL: config.openai.baseURL } : {}), + }); + } + + async embed(text: string): Promise { + const [embedding] = await this.embedMany([text]); + if (!embedding) { + throw new Error('Embedding provider openai returned no embedding'); + } + return embedding; + } + + async embedMany(texts: string[]): Promise { + assertBatchSize(texts, this.maxBatchSize); + const response = await this.client.embeddings.create({ + model: this.config.model, + input: texts.length === 1 ? texts[0] : texts, + dimensions: this.dimensions, + encoding_format: 'float', + }); + const sorted = [...response.data].sort((a, b) => (a.index ?? 0) - (b.index ?? 0)); + const embeddings = sorted.map((item) => item.embedding); + if (embeddings.length !== texts.length) { + throw new Error(`Embedding provider openai returned ${embeddings.length} embeddings for ${texts.length} texts`); + } + return embeddings.map((embedding) => assertVectorDimensions(embedding, this.dimensions, 'openai')); + } +} + +class SentenceTransformersEmbeddingProvider implements KloEmbeddingProvider { + readonly dimensions: number; + readonly maxBatchSize: number; + private readonly fetch: FetchFn; + private readonly baseURL: string; + private readonly pathPrefix: string; + private readonly runJson: SentenceTransformersJsonRunner; + private readonly startupProbe: Promise; + private useProcessRunner = false; + + constructor(config: KloEmbeddingConfig, deps: KloEmbeddingProviderDeps) { + if (!config.sentenceTransformers?.baseURL) { + throw new Error('sentenceTransformers.baseURL is required when KLO embedding backend is sentence-transformers'); + } + this.dimensions = config.dimensions; + this.maxBatchSize = config.batchSize ?? DEFAULT_BATCH_SIZE; + this.fetch = deps.fetch ?? fetch; + this.baseURL = config.sentenceTransformers.baseURL; + this.pathPrefix = config.sentenceTransformers.pathPrefix ?? '/api'; + this.runJson = + deps.runSentenceTransformersJson ?? + runSentenceTransformersProcessJson({ + commands: deps.sentenceTransformersCommand + ? [{ command: deps.sentenceTransformersCommand, args: deps.sentenceTransformersArgs ?? [] }] + : defaultSentenceTransformersProcessCommands(), + cwd: deps.sentenceTransformersCwd, + env: deps.sentenceTransformersEnv, + }); + this.startupProbe = this.requestSingle('__klo_embedding_probe__').then((embedding) => { + assertVectorDimensions(embedding, this.dimensions, 'sentence-transformers'); + }); + } + + async embed(text: string): Promise { + assertNonEmptyText(text); + await this.startupProbe; + return assertVectorDimensions(await this.requestSingle(text), this.dimensions, 'sentence-transformers'); + } + + async embedMany(texts: string[]): Promise { + assertBatchSize(texts, this.maxBatchSize); + await this.startupProbe; + const response = await this.requestJson('embedding-compute-bulk', '/embeddings/compute-bulk', { texts }); + if ( + !response || + typeof response !== 'object' || + !('embeddings' in response) || + !Array.isArray(response.embeddings) + ) { + throw new Error('Embedding provider sentence-transformers returned malformed bulk response'); + } + if (response.embeddings.length !== texts.length) { + const count = response.embeddings.length; + throw new Error( + `Embedding provider sentence-transformers returned ${count} embeddings for ${texts.length} texts`, + ); + } + return response.embeddings.map((embedding: unknown) => + assertVectorDimensions(embedding as number[], this.dimensions, 'sentence-transformers'), + ); + } + + private async requestSingle(text: string): Promise { + const response = await this.requestJson('embedding-compute', '/embeddings/compute', { text }); + if (!response || typeof response !== 'object' || !('embedding' in response) || !Array.isArray(response.embedding)) { + throw new Error('Embedding provider sentence-transformers returned malformed single response'); + } + return response.embedding; + } + + private async requestJson( + command: SentenceTransformersCommand, + path: string, + body: Record, + ): Promise> { + if (this.useProcessRunner) { + return this.runJson(command, body); + } + + try { + return await this.postJson(path, body); + } catch (httpError) { + try { + const response = await this.runJson(command, body); + this.useProcessRunner = true; + return response; + } catch (processError) { + throw new Error( + `Embedding provider sentence-transformers local HTTP request failed (${errorText( + httpError, + )}) and klo-daemon fallback failed (${errorText(processError)})`, + ); + } + } + } + + private async postJson(path: string, body: Record): Promise> { + const response = await this.fetch(joinUrl(this.baseURL, this.pathPrefix, path), { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify(body), + }); + if (!response.ok) { + throw new Error(`Embedding provider sentence-transformers request failed with HTTP ${response.status}`); + } + const parsed = (await response.json()) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error('Embedding provider sentence-transformers returned non-object JSON'); + } + return parsed as Record; + } +} + +export function createKloEmbeddingProvider( + config: KloEmbeddingConfig, + deps: KloEmbeddingProviderDeps = {}, +): KloEmbeddingProvider { + switch (config.backend) { + case 'deterministic': + return new DeterministicEmbeddingProvider(config.dimensions, config.batchSize); + case 'openai': + return new OpenAIEmbeddingProvider(config, deps); + case 'sentence-transformers': + return new SentenceTransformersEmbeddingProvider(config, deps); + default: + throw new Error(`Unsupported KLO embedding backend: ${String((config as { backend?: string }).backend)}`); + } +} diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts new file mode 100644 index 00000000..bcd9e8e4 --- /dev/null +++ b/packages/llm/src/index.ts @@ -0,0 +1,30 @@ +export { createKloEmbeddingProvider } from './embedding-provider.js'; +export { runKloEmbeddingHealthCheck } from './embedding-health.js'; +export { KloMessageBuilder } from './message-builder.js'; +export type { KloEmbeddingHealthCheckOptions, KloEmbeddingHealthCheckResult } from './embedding-health.js'; +export type { KloEmbeddingProviderDeps } from './embedding-provider.js'; +export type { KloLlmHealthCheckDeps, KloLlmHealthCheckOptions, KloLlmHealthCheckResult } from './model-health.js'; +export { runKloLlmHealthCheck } from './model-health.js'; +export { + createKloLlmProvider, + isAnthropicProtocolModel, + modelIdFromLanguageModel, + type KloLlmProviderFactoryDeps, +} from './model-provider.js'; +export type { + KloEmbeddingBackend, + KloEmbeddingConfig, + KloEmbeddingProvider, + KloEmbeddingTokenUsageEvent, + KloJsonValue, + KloLlmBackend, + KloLlmConfig, + KloLlmProvider, + KloModelRole, + KloPromptCacheTtl, + KloPromptCachingConfig, + KloPromptParts, + KloProviderOptions, + KloTokenUsageEvent, +} from './types.js'; +export { KLO_MODEL_ROLES } from './types.js'; diff --git a/packages/llm/src/message-builder.test.ts b/packages/llm/src/message-builder.test.ts new file mode 100644 index 00000000..d5c2e002 --- /dev/null +++ b/packages/llm/src/message-builder.test.ts @@ -0,0 +1,113 @@ +import type { ModelMessage } from 'ai'; +import { describe, expect, it } from 'vitest'; +import { KloMessageBuilder } from './message-builder.js'; +import { createKloLlmProvider } from './model-provider.js'; + +function makeBuilder(overrides: Parameters[0]['promptCaching'] = {}) { + const provider = createKloLlmProvider({ + backend: 'gateway', + gateway: { baseURL: 'https://gateway.test' }, + modelSlots: { default: 'anthropic/claude-sonnet-4-6' }, + promptCaching: { enabled: true, ...overrides }, + }); + return new KloMessageBuilder(provider); +} + +describe('KloMessageBuilder.build', () => { + it('caches static system, last sorted tool, and last history message', () => { + const builder = makeBuilder(); + + const out = builder.build({ + parts: { staticSystem: 'STATIC', dynamicSystem: 'DYNAMIC' }, + history: [ + { role: 'user', content: 'first' }, + { role: 'assistant', content: [{ type: 'text', text: 'reply A' }, { type: 'text', text: 'reply B' }] } as ModelMessage, + ], + currentMessage: { role: 'user', content: 'now' }, + tools: { + zoo: { description: 'z' }, + apple: { description: 'a' }, + }, + model: 'anthropic/claude-sonnet-4-6', + }); + + expect(out.messages[0]).toMatchObject({ + role: 'system', + content: 'STATIC', + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, + }); + expect(out.messages[1]).toMatchObject({ role: 'system', content: 'DYNAMIC' }); + expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); + expect((out.messages[3] as { content: Array<{ providerOptions?: unknown }> }).content[1].providerOptions).toEqual({ + anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } }, + }); + expect(Object.keys(out.tools)).toEqual(['apple', 'zoo']); + expect((out.tools.zoo as { providerOptions?: unknown }).providerOptions).toEqual({ + anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } }, + }); + }); + + it('wraps leading user context onto currentMessage as a system reminder part', () => { + const builder = makeBuilder(); + + const out = builder.build({ + parts: { staticSystem: 'STATIC', leadingUserContext: 'current_date: 2026-05-04' }, + history: [], + currentMessage: { role: 'user', content: 'question' }, + tools: {}, + model: 'anthropic/claude-sonnet-4-6', + }); + + expect(out.messages[out.messages.length - 1]).toMatchObject({ + role: 'user', + content: [ + { type: 'text', text: '\ncurrent_date: 2026-05-04\n' }, + { type: 'text', text: 'question' }, + ], + }); + }); + + it('omits cache markers for non-Anthropic protocol models', () => { + const builder = makeBuilder(); + + const out = builder.wrapSimple({ + system: 'SYS', + messages: [{ role: 'user', content: 'q' }], + tools: { z: {} }, + model: 'gpt-5', + }); + + expect((out.messages[0] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); + expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined(); + }); + + it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => { + const provider = createKloLlmProvider({ + backend: 'vertex', + vertex: { project: 'klo-test', location: 'us-east5' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { + enabled: true, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '1h', + vertexFallbackTo5m: true, + }, + }); + const builder = new KloMessageBuilder(provider); + + const out = builder.build({ + parts: { staticSystem: 'STATIC' }, + history: [{ role: 'user', content: 'history' }], + currentMessage: { role: 'user', content: 'now' }, + tools: { z: {} }, + model: 'claude-sonnet-4-6', + }); + + expect((out.messages[0] as { providerOptions: any }).providerOptions.anthropic.cacheControl.ttl).toBe('5m'); + expect((out.messages[1] as { content: Array<{ providerOptions: any }> }).content[0].providerOptions.anthropic.cacheControl.ttl).toBe( + '5m', + ); + expect((out.tools.z as { providerOptions: any }).providerOptions.anthropic.cacheControl.ttl).toBe('5m'); + }); +}); diff --git a/packages/llm/src/message-builder.ts b/packages/llm/src/message-builder.ts new file mode 100644 index 00000000..920e647e --- /dev/null +++ b/packages/llm/src/message-builder.ts @@ -0,0 +1,197 @@ +import type { LanguageModel, ModelMessage, ToolSet } from 'ai'; +import { isAnthropicProtocolModel } from './model-provider.js'; +import type { KloLlmProvider, KloPromptCacheTtl, KloPromptParts } from './types.js'; + +type ToolMap = ToolSet | Record>; + +interface KloMessageBuilderOptions { + cacheSystem?: boolean; + cacheTools?: boolean; + cacheLastHistory?: boolean; +} + +interface KloBuildInput { + parts: KloPromptParts; + history: ModelMessage[]; + currentMessage: ModelMessage; + tools: ToolMap; + model: LanguageModel | string; +} + +interface KloWrapSimpleInput { + system?: string; + messages?: ModelMessage[]; + tools?: ToolMap; + model: LanguageModel | string; +} + +interface KloBuildOutput { + messages: ModelMessage[]; + tools: ToolMap; +} + +export class KloMessageBuilder { + constructor( + private readonly provider: KloLlmProvider, + private readonly options: KloMessageBuilderOptions = {}, + ) {} + + build(input: KloBuildInput): KloBuildOutput { + const cfg = this.provider.promptCachingConfig(); + const cachingActive = cfg.enabled && isAnthropicProtocolModel(input.model); + const ttls = this.resolveTtls(input.model); + const messages: ModelMessage[] = []; + + const systemMessage: ModelMessage & { providerOptions?: unknown } = { + role: 'system', + content: input.parts.staticSystem, + }; + if (cachingActive && this.cacheSystemEnabled()) { + systemMessage.providerOptions = this.provider.cacheMarker(ttls.systemTtl, input.model); + } + messages.push(systemMessage); + + if (input.parts.dynamicSystem) { + messages.push({ role: 'system', content: input.parts.dynamicSystem }); + } + + const historyToEmit = + cachingActive && this.cacheHistoryEnabled() + ? this.markLastHistoryMessage(input.history, ttls.historyTtl, input.model) + : input.history; + messages.push(...historyToEmit); + messages.push(this.wrapLeading(input.currentMessage, input.parts.leadingUserContext)); + + return { + messages, + tools: this.sortAndMarkTools(input.tools, cachingActive, this.cacheToolsEnabled(), ttls.toolsTtl, input.model), + }; + } + + wrapSimple(input: KloWrapSimpleInput): KloBuildOutput { + const cfg = this.provider.promptCachingConfig(); + const cachingActive = cfg.enabled && isAnthropicProtocolModel(input.model); + const ttls = this.resolveTtls(input.model); + const messages: ModelMessage[] = []; + + if (input.system) { + const systemMessage: ModelMessage & { providerOptions?: unknown } = { + role: 'system', + content: input.system, + }; + if (cachingActive && this.cacheSystemEnabled()) { + systemMessage.providerOptions = this.provider.cacheMarker(ttls.systemTtl, input.model); + } + messages.push(systemMessage); + } + + if (input.messages) { + messages.push( + ...(cachingActive && this.cacheHistoryEnabled() + ? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model) + : input.messages), + ); + } + + return { + messages, + tools: this.sortAndMarkTools(input.tools ?? {}, cachingActive, this.cacheToolsEnabled(), ttls.toolsTtl, input.model), + }; + } + + private cacheSystemEnabled(): boolean { + return this.options.cacheSystem ?? this.provider.promptCachingConfig().cacheSystem; + } + + private cacheToolsEnabled(): boolean { + return this.options.cacheTools ?? this.provider.promptCachingConfig().cacheTools; + } + + private cacheHistoryEnabled(): boolean { + return this.options.cacheLastHistory ?? this.provider.promptCachingConfig().cacheHistory; + } + + private resolveTtls(model: LanguageModel | string): { + systemTtl: KloPromptCacheTtl; + toolsTtl: KloPromptCacheTtl; + historyTtl: KloPromptCacheTtl; + } { + const cfg = this.provider.promptCachingConfig(); + if (cfg.vertexFallbackTo5m && this.provider.activeBackend() === 'vertex' && isAnthropicProtocolModel(model)) { + return { systemTtl: '5m', toolsTtl: '5m', historyTtl: '5m' }; + } + return { systemTtl: cfg.systemTtl, toolsTtl: cfg.toolsTtl, historyTtl: cfg.historyTtl }; + } + + private wrapLeading(currentMessage: ModelMessage, leadingUserContext?: string): ModelMessage { + if (!leadingUserContext) { + return currentMessage; + } + const reminderPart = { + type: 'text' as const, + text: `\n${leadingUserContext}\n`, + }; + if (typeof currentMessage.content === 'string') { + return { + ...currentMessage, + content: [reminderPart, { type: 'text' as const, text: currentMessage.content }], + } as ModelMessage; + } + if (Array.isArray(currentMessage.content)) { + return { ...currentMessage, content: [reminderPart, ...currentMessage.content] } as ModelMessage; + } + return currentMessage; + } + + private markLastHistoryMessage( + history: ModelMessage[], + ttl: KloPromptCacheTtl, + model: LanguageModel | string, + ): ModelMessage[] { + if (history.length === 0) { + return history; + } + const out = [...history]; + const last = out[out.length - 1]; + const marker = this.provider.cacheMarker(ttl, model); + if (!marker) { + return history; + } + if (typeof last.content === 'string') { + out[out.length - 1] = { + ...last, + content: [{ type: 'text', text: last.content, providerOptions: marker }], + } as ModelMessage; + return out; + } + if (Array.isArray(last.content) && last.content.length > 0) { + const parts = [...last.content]; + const lastPart = parts[parts.length - 1]; + parts[parts.length - 1] = Object.assign({}, lastPart, { providerOptions: marker }); + out[out.length - 1] = { ...last, content: parts } as ModelMessage; + } + return out; + } + + private sortAndMarkTools( + tools: ToolMap, + cachingActive: boolean, + cacheTools: boolean, + ttl: KloPromptCacheTtl, + model: LanguageModel | string, + ): ToolMap { + const keys = Object.keys(tools).sort(); + const sorted: Record = {}; + for (const key of keys) { + sorted[key] = tools[key as keyof typeof tools]; + } + if (cachingActive && cacheTools && keys.length > 0) { + const lastKey = keys[keys.length - 1]; + const marker = this.provider.cacheMarker(ttl, model); + if (marker) { + sorted[lastKey] = { ...(sorted[lastKey] as Record), providerOptions: marker }; + } + } + return sorted as ToolMap; + } +} diff --git a/packages/llm/src/model-health.test.ts b/packages/llm/src/model-health.test.ts new file mode 100644 index 00000000..3305886d --- /dev/null +++ b/packages/llm/src/model-health.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it, vi } from 'vitest'; +import { runKloLlmHealthCheck } from './model-health.js'; + +const anthropicModel = { modelId: 'claude-sonnet-4-6' } as never; + +describe('KLO LLM health check', () => { + it('runs a minimal non-streaming model call through the configured provider', async () => { + const generateText = vi.fn(async () => ({ text: 'ok' })); + const createAnthropic = vi.fn(() => vi.fn(() => anthropicModel)); + + await expect( + runKloLlmHealthCheck( + { + backend: 'anthropic', + anthropic: { apiKey: 'sk-ant-test' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + }, + { deps: { createAnthropic, generateText } }, + ), + ).resolves.toEqual({ ok: true }); + + expect(createAnthropic).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: 'sk-ant-test', + }), + ); + expect(generateText).toHaveBeenCalledWith( + expect.objectContaining({ + model: anthropicModel, + prompt: 'Reply with exactly: ok', + temperature: 0, + maxOutputTokens: 8, + }), + ); + }); + + it('returns a failed result without exposing secret values', async () => { + const generateText = vi.fn(async () => { + throw new Error('401 invalid x-api-key sk-ant-secret'); + }); + + await expect( + runKloLlmHealthCheck( + { + backend: 'anthropic', + anthropic: { apiKey: 'sk-ant-secret' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + }, + { + deps: { + createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)), + generateText, + }, + }, + ), + ).resolves.toEqual({ + ok: false, + message: '401 invalid x-api-key [redacted]', + }); + }); +}); diff --git a/packages/llm/src/model-health.ts b/packages/llm/src/model-health.ts new file mode 100644 index 00000000..78454b17 --- /dev/null +++ b/packages/llm/src/model-health.ts @@ -0,0 +1,60 @@ +import { generateText } from 'ai'; +import { createKloLlmProvider, type KloLlmProviderFactoryDeps } from './model-provider.js'; +import type { KloLlmConfig } from './types.js'; + +export type KloLlmHealthCheckResult = { ok: true } | { ok: false; message: string }; + +export interface KloLlmHealthCheckDeps extends Omit { + generateText?: (options: Parameters[0]) => Promise; +} + +export interface KloLlmHealthCheckOptions { + prompt?: string; + timeoutMs?: number; + deps?: KloLlmHealthCheckDeps; +} + +function redactHealthCheckMessage(message: string, config: KloLlmConfig): string { + const secrets = [config.anthropic?.apiKey, config.gateway?.apiKey].filter( + (value): value is string => typeof value === 'string' && value.length > 0, + ); + return secrets.reduce((current, secret) => current.split(secret).join('[redacted]'), message); +} + +async function withTimeout(promise: Promise, timeoutMs: number): Promise { + let timeout: NodeJS.Timeout | undefined; + const timeoutPromise = new Promise((_resolve, reject) => { + timeout = setTimeout(() => reject(new Error(`LLM health check timed out after ${timeoutMs}ms`)), timeoutMs); + }); + try { + return await Promise.race([promise, timeoutPromise]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + +export async function runKloLlmHealthCheck( + config: KloLlmConfig, + options: KloLlmHealthCheckOptions = {}, +): Promise { + try { + const { generateText: runGenerateTextOverride, ...providerDeps } = options.deps ?? {}; + const provider = createKloLlmProvider(config, providerDeps); + const runGenerateText = runGenerateTextOverride ?? generateText; + await withTimeout( + runGenerateText({ + model: provider.getModel('default'), + prompt: options.prompt ?? 'Reply with exactly: ok', + temperature: 0, + maxOutputTokens: 8, + }), + options.timeoutMs ?? 15_000, + ); + return { ok: true }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { ok: false, message: redactHealthCheckMessage(message, config) }; + } +} diff --git a/packages/llm/src/model-provider.test.ts b/packages/llm/src/model-provider.test.ts new file mode 100644 index 00000000..2af58f4c --- /dev/null +++ b/packages/llm/src/model-provider.test.ts @@ -0,0 +1,173 @@ +import type { LanguageModel } from 'ai'; +import { describe, expect, it, vi } from 'vitest'; +import { createKloLlmProvider } from './model-provider.js'; + +const languageModel = (modelId: string, provider = 'test'): LanguageModel => ({ modelId, provider }) as LanguageModel; + +describe('createKloLlmProvider', () => { + it('uses direct Anthropic with both beta headers', () => { + const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic'); + const anthropic = vi.fn(() => anthropicModel); + const createAnthropic = vi.fn(() => anthropic); + + const provider = createKloLlmProvider( + { + backend: 'anthropic', + anthropic: { apiKey: 'test-anthropic-key', baseURL: 'https://anthropic.test' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { createAnthropic }, + ); + + expect(provider.getModel('default')).toBe(anthropicModel); + expect(createAnthropic).toHaveBeenCalledWith({ + apiKey: 'test-anthropic-key', // pragma: allowlist secret + baseURL: 'https://anthropic.test', + headers: { + 'anthropic-beta': 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11', + }, + }); + expect(anthropic).toHaveBeenCalledWith('claude-sonnet-4-6'); + }); + + it('uses Vertex Anthropic without the direct-Anthropic beta header', () => { + const vertexModel = languageModel('claude-sonnet-4-6', 'vertex'); + const vertex = vi.fn(() => vertexModel); + const createVertexAnthropic = vi.fn(() => vertex); + + const provider = createKloLlmProvider( + { + backend: 'vertex', + vertex: { project: 'klo-test', location: 'us-east5' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { createVertexAnthropic }, + ); + + expect(provider.getModel('default')).toBe(vertexModel); + expect(createVertexAnthropic).toHaveBeenCalledWith({ project: 'klo-test', location: 'us-east5' }); + expect(vertex).toHaveBeenCalledWith('claude-sonnet-4-6'); + }); + + it('uses Gateway and supports role fallback to default', () => { + const gatewayModel = languageModel('anthropic/claude-sonnet-4-6', 'gateway'); + const gateway = vi.fn(() => gatewayModel); + const createGateway = vi.fn(() => gateway); + + const provider = createKloLlmProvider( + { + backend: 'gateway', + gateway: { apiKey: 'gateway-key', baseURL: 'https://gateway.test/v1' }, // pragma: allowlist secret + modelSlots: { default: 'anthropic/claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { createGateway }, + ); + + expect(provider.getModel('curator')).toBe(gatewayModel); + expect(createGateway).toHaveBeenCalledWith({ + apiKey: 'gateway-key', // pragma: allowlist secret + baseURL: 'https://gateway.test/v1', + }); + expect(gateway).toHaveBeenCalledWith('anthropic/claude-sonnet-4-6'); + }); + + it('uses explicit role overrides before default', () => { + const anthropic = vi.fn((modelId: string) => languageModel(modelId, 'anthropic')); + + const provider = createKloLlmProvider( + { + backend: 'anthropic', + anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret + modelSlots: { + default: 'claude-sonnet-4-6', + triage: 'claude-haiku-4-5', + repair: 'claude-opus-4-7', + }, + promptCaching: { enabled: false }, + }, + { createAnthropic: vi.fn(() => anthropic) }, + ); + + expect((provider.getModel('triage') as { modelId: string }).modelId).toBe('claude-haiku-4-5'); + expect((provider.getModel('repair') as { modelId: string }).modelId).toBe('claude-opus-4-7'); + expect((provider.getModel('reconcile') as { modelId: string }).modelId).toBe('claude-sonnet-4-6'); + }); + + it('emits cache markers only when enabled and the model speaks Anthropic protocol', () => { + const provider = createKloLlmProvider( + { + backend: 'gateway', + gateway: { baseURL: 'https://gateway.test/v1' }, + modelSlots: { default: 'anthropic/claude-sonnet-4-6' }, + promptCaching: { enabled: true }, + }, + { createGateway: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'gateway'))) }, + ); + + expect(provider.cacheMarker('1h', 'anthropic/claude-sonnet-4-6')).toEqual({ + anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } }, + }); + expect(provider.cacheMarker('1h', 'gpt-5')).toBeUndefined(); + }); + + it('returns Anthropic thinking provider options', () => { + const provider = createKloLlmProvider( + { + backend: 'anthropic', + anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { createAnthropic: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'anthropic'))) }, + ); + + expect(provider.thinkingProviderOptions('default', 12000)).toEqual({ + anthropic: { + thinking: { type: 'enabled', budgetTokens: 12000 }, + }, + }); + }); + + it('defaults prompt caching to enabled with canonical TTLs', () => { + const provider = createKloLlmProvider( + { + backend: 'gateway', + gateway: { baseURL: 'https://gateway.test/v1' }, + modelSlots: { default: 'anthropic/claude-sonnet-4-6' }, + }, + { createGateway: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'gateway'))) }, + ); + + expect(provider.promptCachingConfig()).toEqual({ + enabled: true, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + }); + expect(provider.cacheMarker('1h', 'anthropic/claude-sonnet-4-6')).toEqual({ + anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } }, + }); + }); + + it('preserves explicit prompt caching opt-out', () => { + const provider = createKloLlmProvider( + { + backend: 'anthropic', + anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { createAnthropic: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'anthropic'))) }, + ); + + expect(provider.promptCachingConfig().enabled).toBe(false); + expect(provider.cacheMarker('1h', 'claude-sonnet-4-6')).toBeUndefined(); + }); +}); diff --git a/packages/llm/src/model-provider.ts b/packages/llm/src/model-provider.ts new file mode 100644 index 00000000..a240480d --- /dev/null +++ b/packages/llm/src/model-provider.ts @@ -0,0 +1,152 @@ +import { createAnthropic } from '@ai-sdk/anthropic'; +import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic'; +import { createGateway, generateText, type LanguageModel } from 'ai'; +import { createKloToolCallRepairHandler } from './repair.js'; +import type { + KloLlmConfig, + KloLlmProvider, + KloModelRole, + KloPromptCacheTtl, + KloPromptCachingConfig, + KloProviderOptions, +} from './types.js'; + +type AnthropicFactory = typeof createAnthropic; +type AnthropicModelFactory = (modelId: string) => LanguageModel; +type VertexAnthropicFactory = (options?: Parameters[0]) => AnthropicModelFactory; +type GatewayFactory = (options?: Parameters[0]) => AnthropicModelFactory; + +export interface KloLlmProviderFactoryDeps { + createAnthropic?: (options?: Parameters[0]) => AnthropicModelFactory; + createVertexAnthropic?: VertexAnthropicFactory; + createGateway?: GatewayFactory; + generateText?: typeof generateText; +} + +const DEFAULT_PROMPT_CACHING: KloPromptCachingConfig = { + enabled: true, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, +}; + +const DIRECT_ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11'; + +function resolvePromptCaching(config: KloLlmConfig): KloPromptCachingConfig { + return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching }; +} + +export function modelIdFromLanguageModel(model: LanguageModel | string): string { + return typeof model === 'string' ? model : ((model as { modelId?: string }).modelId ?? ''); +} + +export function isAnthropicProtocolModel(model: LanguageModel | string): boolean { + const modelId = modelIdFromLanguageModel(model); + return modelId.startsWith('claude-') || modelId.startsWith('anthropic/') || modelId.includes('/claude-'); +} + +class DefaultKloLlmProvider implements KloLlmProvider { + private readonly promptCaching: KloPromptCachingConfig; + private readonly getModelByResolvedName: (modelId: string) => LanguageModel; + private readonly runGenerateText: typeof generateText; + + constructor( + private readonly config: KloLlmConfig, + deps: KloLlmProviderFactoryDeps, + ) { + this.promptCaching = resolvePromptCaching(config); + this.runGenerateText = deps.generateText ?? generateText; + this.getModelByResolvedName = this.createModelFactory(config, deps); + } + + getModel(role: KloModelRole): LanguageModel { + return this.getModelByName(this.resolveRole(role)); + } + + getModelByName(modelId: string): LanguageModel { + return this.getModelByResolvedName(modelId); + } + + cacheMarker(ttl: KloPromptCacheTtl, model?: LanguageModel | string) { + if (!this.promptCaching.enabled) { + return undefined; + } + if (model && !isAnthropicProtocolModel(model)) { + return undefined; + } + return { anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } } }; + } + + repairToolCallHandler(options: { source?: string } = {}) { + return createKloToolCallRepairHandler({ + source: options.source ?? 'klo-llm', + getRepairModel: () => this.getModel('repair'), + generateText: this.runGenerateText, + }); + } + + thinkingProviderOptions(_role: KloModelRole, budgetTokens: number): KloProviderOptions { + return { + anthropic: { + thinking: { type: 'enabled', budgetTokens }, + }, + }; + } + + telemetryConfig() { + return this.config.telemetry?.experimentalTelemetry; + } + + promptCachingConfig(): KloPromptCachingConfig { + return this.promptCaching; + } + + activeBackend() { + return this.config.backend; + } + + private resolveRole(role: KloModelRole): string { + return this.config.modelSlots[role] ?? this.config.modelSlots.default; + } + + private createModelFactory(config: KloLlmConfig, deps: KloLlmProviderFactoryDeps): (modelId: string) => LanguageModel { + if (config.backend === 'anthropic') { + const anthropic = (deps.createAnthropic ?? createAnthropic)({ + ...(config.anthropic?.apiKey ? { apiKey: config.anthropic.apiKey } : {}), + ...(config.anthropic?.baseURL ? { baseURL: config.anthropic.baseURL } : {}), + headers: { + 'anthropic-beta': DIRECT_ANTHROPIC_BETA_HEADER, + }, + }); + return (modelId) => anthropic(modelId); + } + + if (config.backend === 'vertex') { + if (!config.vertex?.location) { + throw new Error('vertex.location is required when KLO LLM backend is vertex'); + } + const vertex = (deps.createVertexAnthropic ?? createVertexAnthropic)({ + ...(config.vertex.project ? { project: config.vertex.project } : {}), + location: config.vertex.location, + }); + return (modelId) => vertex(modelId); + } + + const gateway = (deps.createGateway ?? createGateway)({ + ...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}), + ...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}), + }); + return (modelId) => gateway(modelId); + } +} + +export function createKloLlmProvider(config: KloLlmConfig, deps: KloLlmProviderFactoryDeps = {}): KloLlmProvider { + if (!config.modelSlots.default) { + throw new Error('modelSlots.default is required'); + } + return new DefaultKloLlmProvider(config, deps); +} diff --git a/packages/llm/src/package-exports.test.ts b/packages/llm/src/package-exports.test.ts new file mode 100644 index 00000000..37485e1a --- /dev/null +++ b/packages/llm/src/package-exports.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from 'vitest'; + +describe('@klo/llm package exports', () => { + it('exports the canonical LLM and embedding surfaces', async () => { + const llm = await import('./index.js'); + + expect(llm.KLO_MODEL_ROLES).toEqual([ + 'default', + 'triage', + 'candidateExtraction', + 'curator', + 'reconcile', + 'repair', + ]); + expect(llm.createKloLlmProvider).toBeTypeOf('function'); + expect(llm.KloMessageBuilder).toBeTypeOf('function'); + expect(llm.createKloEmbeddingProvider).toBeTypeOf('function'); + }); +}); diff --git a/packages/llm/src/repair.test.ts b/packages/llm/src/repair.test.ts new file mode 100644 index 00000000..6b0ad850 --- /dev/null +++ b/packages/llm/src/repair.test.ts @@ -0,0 +1,93 @@ +import { NoSuchToolError, type LanguageModel } from 'ai'; +import { describe, expect, it, vi } from 'vitest'; +import { createKloToolCallRepairHandler } from './repair.js'; + +const repairModel = { modelId: 'claude-repair', provider: 'anthropic' } as LanguageModel; + +describe('createKloToolCallRepairHandler', () => { + it('returns null for NoSuchToolError', async () => { + const handler = createKloToolCallRepairHandler({ + source: 'unit', + getRepairModel: () => repairModel, + generateText: vi.fn(), + }); + + await expect( + handler({ + system: undefined, + messages: [], + toolCall: { type: 'tool-call', toolName: 'missing', toolCallId: 'tc_1', input: '{}' }, + tools: {}, + inputSchema: async () => ({}), + error: new NoSuchToolError({ toolName: 'missing' }), + }), + ).resolves.toBeNull(); + }); + + it('repairs string input by local JSON extraction without an LLM call', async () => { + const generateText = vi.fn(); + const handler = createKloToolCallRepairHandler({ + source: 'unit', + getRepairModel: () => repairModel, + generateText, + }); + + await expect( + handler({ + system: undefined, + messages: [], + toolCall: { + type: 'tool-call', + toolName: 'write_source', + toolCallId: 'tc_2', + input: 'prefix {"path":"orders.yaml"} suffix', + }, + tools: { write_source: {} as never }, + inputSchema: async () => ({ type: 'object' }), + error: new Error('Invalid tool input') as never, + }), + ).resolves.toEqual({ + type: 'tool-call', + toolName: 'write_source', + toolCallId: 'tc_2', + input: '{"path":"orders.yaml"}', + }); + expect(generateText).not.toHaveBeenCalled(); + }); + + it('falls back to the repair model when local extraction fails', async () => { + const generateText = vi.fn().mockResolvedValue({ text: '{"path":"customers.yaml"}' }); + const handler = createKloToolCallRepairHandler({ + source: 'unit', + getRepairModel: () => repairModel, + generateText, + }); + + await expect( + handler({ + system: undefined, + messages: [], + toolCall: { + type: 'tool-call', + toolName: 'write_source', + toolCallId: 'tc_3', + input: 'not json', + }, + tools: { write_source: {} as never }, + inputSchema: async () => ({ type: 'object', properties: { path: { type: 'string' } } }), + error: new Error('Invalid tool input') as never, + }), + ).resolves.toEqual({ + type: 'tool-call', + toolName: 'write_source', + toolCallId: 'tc_3', + input: '{"path":"customers.yaml"}', + }); + expect(generateText).toHaveBeenCalledWith( + expect.objectContaining({ + model: repairModel, + prompt: expect.stringContaining('The model tried to call the tool "write_source"'), + }), + ); + }); +}); diff --git a/packages/llm/src/repair.ts b/packages/llm/src/repair.ts new file mode 100644 index 00000000..4d0cd0ab --- /dev/null +++ b/packages/llm/src/repair.ts @@ -0,0 +1,88 @@ +import { NoSuchToolError, type LanguageModel, type ToolCallRepairFunction, type ToolSet, generateText } from 'ai'; + +interface KloToolCallRepairHandlerInput { + source: string; + getRepairModel: () => LanguageModel; + generateText?: typeof generateText; +} + +function extractJsonFromText(text: string): string | null { + const trimmed = text.trim(); + if (!trimmed) { + return null; + } + try { + JSON.parse(trimmed); + return trimmed; + } catch {} + + let start = trimmed.indexOf('{'); + while (start >= 0) { + let end = trimmed.lastIndexOf('}'); + while (end > start) { + const candidate = trimmed.slice(start, end + 1); + try { + JSON.parse(candidate); + return candidate; + } catch {} + end = trimmed.lastIndexOf('}', end - 1); + } + start = trimmed.indexOf('{', start + 1); + } + return null; +} + +export function createKloToolCallRepairHandler( + input: KloToolCallRepairHandlerInput, +): ToolCallRepairFunction { + const runGenerateText = input.generateText ?? generateText; + + return async ({ toolCall, tools, inputSchema, error }) => { + if (NoSuchToolError.isInstance(error)) { + return null; + } + + if (typeof toolCall.input === 'string') { + const extracted = extractJsonFromText(toolCall.input); + if (extracted) { + return { + type: 'tool-call', + toolName: toolCall.toolName, + toolCallId: toolCall.toolCallId, + input: extracted, + }; + } + } + + if (!(toolCall.toolName in tools)) { + return null; + } + + try { + const schema = await inputSchema({ toolName: toolCall.toolName }); + const { text } = await runGenerateText({ + model: input.getRepairModel(), + prompt: `The model tried to call the tool "${toolCall.toolName}" with the following inputs: +${JSON.stringify(toolCall.input)} + +However, this caused a validation error: ${error.message} + +The tool accepts the following schema: +${JSON.stringify(schema)} + +Please generate corrected inputs that match the schema. Return ONLY valid JSON, no explanation or markdown formatting.`, + }); + + const cleaned = extractJsonFromText(text) ?? text.trim(); + const parsed = JSON.parse(cleaned); + return { + type: 'tool-call', + toolName: toolCall.toolName, + toolCallId: toolCall.toolCallId, + input: JSON.stringify(parsed), + }; + } catch { + return null; + } + }; +} diff --git a/packages/llm/src/types.ts b/packages/llm/src/types.ts new file mode 100644 index 00000000..09486643 --- /dev/null +++ b/packages/llm/src/types.ts @@ -0,0 +1,95 @@ +import type { LanguageModel, TelemetrySettings, ToolCallRepairFunction, ToolSet } from 'ai'; + +export const KLO_MODEL_ROLES = ['default', 'triage', 'candidateExtraction', 'curator', 'reconcile', 'repair'] as const; + +export type KloModelRole = (typeof KLO_MODEL_ROLES)[number]; +export type KloLlmBackend = 'anthropic' | 'vertex' | 'gateway'; +export type KloPromptCacheTtl = '5m' | '1h'; + +export type KloJsonValue = + | null + | string + | number + | boolean + | KloJsonValue[] + | { [key: string]: KloJsonValue | undefined }; + +export type KloProviderOptions = Record; + +export interface KloPromptCachingConfig { + enabled: boolean; + systemTtl: KloPromptCacheTtl; + toolsTtl: KloPromptCacheTtl; + historyTtl: KloPromptCacheTtl; + cacheSystem: boolean; + cacheTools: boolean; + cacheHistory: boolean; + vertexFallbackTo5m: boolean; +} + +export interface KloTokenUsageEvent { + source?: string; + modelId?: string; + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; +} + +export interface KloLlmConfig { + backend: KloLlmBackend; + vertex?: { project?: string; location: string }; + anthropic?: { apiKey?: string; baseURL?: string }; + gateway?: { baseURL?: string; apiKey?: string }; + modelSlots: { default: string } & Partial>; + promptCaching?: Partial; + telemetry?: { + experimentalTelemetry?: TelemetrySettings; + onTokenUsage?: (event: KloTokenUsageEvent) => void; + }; +} + +export interface KloLlmProvider { + getModel(role: KloModelRole): LanguageModel; + getModelByName(modelId: string): LanguageModel; + cacheMarker( + ttl: KloPromptCacheTtl, + model?: LanguageModel | string, + ): { anthropic: { cacheControl: { type: 'ephemeral'; ttl: KloPromptCacheTtl } } } | undefined; + repairToolCallHandler(options?: { source?: string }): ToolCallRepairFunction; + thinkingProviderOptions(role: KloModelRole, budgetTokens: number): KloProviderOptions; + telemetryConfig(): TelemetrySettings | undefined; + promptCachingConfig(): KloPromptCachingConfig; + activeBackend(): KloLlmBackend; +} + +export type KloEmbeddingBackend = 'openai' | 'deterministic' | 'sentence-transformers'; + +export interface KloEmbeddingTokenUsageEvent { + backend: KloEmbeddingBackend; + model: string; + inputCount: number; + totalTokens?: number; +} + +export interface KloEmbeddingConfig { + backend: KloEmbeddingBackend; + model: string; + dimensions: number; + openai?: { apiKey?: string; baseURL?: string }; + sentenceTransformers?: { baseURL: string; pathPrefix?: string }; + batchSize?: number; + telemetry?: { onTokenUsage?: (event: KloEmbeddingTokenUsageEvent) => void }; +} + +export interface KloEmbeddingProvider { + readonly dimensions: number; + readonly maxBatchSize: number; + embed(text: string): Promise; + embedMany(texts: string[]): Promise; +} + +export interface KloPromptParts { + staticSystem: string; + dynamicSystem?: string; + leadingUserContext?: string; +} diff --git a/packages/llm/tsconfig.json b/packages/llm/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/llm/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/packages/llm/vitest.config.ts b/packages/llm/vitest.config.ts new file mode 100644 index 00000000..2339ffd3 --- /dev/null +++ b/packages/llm/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + root: '.', + include: ['src/**/*.test.ts'], + }, +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml new file mode 100644 index 00000000..db51390e --- /dev/null +++ b/pnpm-lock.yaml @@ -0,0 +1,6313 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + injectWorkspacePackages: true + +overrides: + '@types/node': ^24.3.0 + +importers: + + .: + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/cli: + dependencies: + '@clack/prompts': + specifier: 1.3.0 + version: 1.3.0 + '@commander-js/extra-typings': + specifier: 14.0.0 + version: 14.0.0(commander@14.0.3) + '@klo/connector-bigquery': + specifier: workspace:* + version: file:packages/connector-bigquery(ws@8.20.0) + '@klo/connector-clickhouse': + specifier: workspace:* + version: file:packages/connector-clickhouse(ws@8.20.0) + '@klo/connector-mysql': + specifier: workspace:* + version: file:packages/connector-mysql(@types/node@24.12.2)(ws@8.20.0) + '@klo/connector-postgres': + specifier: workspace:* + version: file:packages/connector-postgres(ws@8.20.0) + '@klo/connector-posthog': + specifier: workspace:* + version: file:packages/connector-posthog(ws@8.20.0) + '@klo/connector-snowflake': + specifier: workspace:* + version: file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0) + '@klo/connector-sqlite': + specifier: workspace:* + version: file:packages/connector-sqlite(ws@8.20.0) + '@klo/connector-sqlserver': + specifier: workspace:* + version: file:packages/connector-sqlserver(ws@8.20.0) + '@klo/context': + specifier: workspace:* + version: file:packages/context(ws@8.20.0) + '@klo/llm': + specifier: workspace:* + version: link:../llm + '@modelcontextprotocol/sdk': + specifier: ^1.27.1 + version: 1.29.0(zod@4.4.3) + commander: + specifier: 14.0.3 + version: 14.0.3 + ink: + specifier: ^7.0.1 + version: 7.0.1(@types/react@19.2.14)(react@19.2.5) + react: + specifier: ^19.2.5 + version: 19.2.5 + zod: + specifier: ^4.4.3 + version: 4.4.3 + devDependencies: + '@types/better-sqlite3': + specifier: ^7.6.13 + version: 7.6.13 + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + '@types/react': + specifier: ^19.2.14 + version: 19.2.14 + better-sqlite3: + specifier: ^12.6.2 + version: 12.9.0 + ink-testing-library: + specifier: ^4.0.0 + version: 4.0.0(@types/react@19.2.14) + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-bigquery: + dependencies: + '@google-cloud/bigquery': + specifier: ^8.1.1 + version: 8.3.0 + '@klo/context': + specifier: workspace:* + version: file:packages/context + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-clickhouse: + dependencies: + '@clickhouse/client': + specifier: ^1.18.2 + version: 1.18.3 + '@klo/context': + specifier: workspace:* + version: file:packages/context + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-mysql: + dependencies: + '@klo/context': + specifier: workspace:* + version: file:packages/context + mysql2: + specifier: ^3.18.1 + version: 3.22.3(@types/node@24.12.2) + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-postgres: + dependencies: + '@klo/context': + specifier: workspace:* + version: file:packages/context + pg: + specifier: ^8.19.0 + version: 8.20.0 + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + '@types/pg': + specifier: ^8.16.0 + version: 8.20.0 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-posthog: + dependencies: + '@klo/context': + specifier: workspace:* + version: file:packages/context + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-snowflake: + dependencies: + '@klo/context': + specifier: workspace:* + version: file:packages/context + snowflake-sdk: + specifier: ^2.3.4 + version: 2.4.0(asn1.js@5.4.1) + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-sqlite: + dependencies: + '@klo/context': + specifier: workspace:* + version: file:packages/context + better-sqlite3: + specifier: ^12.6.2 + version: 12.9.0 + devDependencies: + '@types/better-sqlite3': + specifier: ^7.6.13 + version: 7.6.13 + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/connector-sqlserver: + dependencies: + '@klo/context': + specifier: workspace:* + version: file:packages/context + mssql: + specifier: ^12.2.0 + version: 12.5.0(@azure/core-client@1.10.1) + devDependencies: + '@types/mssql': + specifier: ^9.1.8 + version: 9.1.11(@azure/core-client@1.10.1) + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/context: + dependencies: + '@klo/llm': + specifier: workspace:* + version: file:packages/llm(ws@8.20.0)(zod@4.3.6) + '@looker/sdk': + specifier: ^26.6.1 + version: 26.6.1 + '@looker/sdk-node': + specifier: ^26.6.1 + version: 26.6.1 + '@looker/sdk-rtl': + specifier: ^21.6.5 + version: 21.6.5 + '@modelcontextprotocol/sdk': + specifier: ^1.27.1 + version: 1.29.0(zod@4.3.6) + '@notionhq/client': + specifier: ^5.20.0 + version: 5.20.0 + ai: + specifier: ^6.0.168 + version: 6.0.168(zod@4.3.6) + better-sqlite3: + specifier: ^12.6.2 + version: 12.9.0 + handlebars: + specifier: ^4.7.8 + version: 4.7.9 + lookml-parser: + specifier: 7.1.0 + version: 7.1.0 + minimatch: + specifier: ^10.2.4 + version: 10.2.5 + p-limit: + specifier: ^7.3.0 + version: 7.3.0 + pg: + specifier: ^8.19.0 + version: 8.20.0 + simple-git: + specifier: 3.32.2 + version: 3.32.2 + yaml: + specifier: ^2.8.2 + version: 2.8.3 + zod: + specifier: ^4.1.13 + version: 4.3.6 + devDependencies: + '@electric-sql/pglite': + specifier: ^0.4.5 + version: 0.4.5 + '@electric-sql/pglite-socket': + specifier: ^0.1.5 + version: 0.1.5(@electric-sql/pglite@0.4.5) + '@types/better-sqlite3': + specifier: ^7.6.13 + version: 7.6.13 + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + '@types/pg': + specifier: ^8.16.0 + version: 8.20.0 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + + packages/llm: + dependencies: + '@ai-sdk/anthropic': + specifier: 3.0.71 + version: 3.0.71(zod@4.4.3) + '@ai-sdk/google-vertex': + specifier: ^4.0.112 + version: 4.0.118(zod@4.4.3) + ai: + specifier: ^6.0.168 + version: 6.0.168(zod@4.4.3) + openai: + specifier: ^6.25.0 + version: 6.35.0(ws@8.20.0)(zod@4.4.3) + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.0.18 + version: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + +packages: + + '@ai-sdk/anthropic@3.0.71': + resolution: {integrity: sha512-bUWOzrzR0gJKJO/PLGMR4uH2dqEgqGhrsCV+sSpk4KtOEnUQlfjZI/F7BFlqSvVpFbjdgYRRLysAeEZpJ6S1lg==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/anthropic@3.0.74': + resolution: {integrity: sha512-Xew9rfz9WWhDSyF8rNhjT/XWOWelNfJrMlmG0Ahw210hStisRpQZ1s+7VeI9JTJOZ5y5tXqBi5kfPwYnCfyRTA==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/gateway@3.0.104': + resolution: {integrity: sha512-ZKX5n74io8VIRlhIMSLWVlvT3sXC8Z7cZ9GHuWBWZDVi96+62AIsWuLGvMfcBA1STYuSoDrp6rIziZmvrTq0TA==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/google-vertex@4.0.118': + resolution: {integrity: sha512-jmXAi5n+vixA3ig/RhkjV8k/YuFD8uTnd3+yr2pnYEQVe3Ocp3hn0mS99S5yKgn+6cGNGbG0CWJH44oSwSOs8w==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/google@3.0.67': + resolution: {integrity: sha512-Qeq+SidYtzMrcf0fdw3L0QLmtXK+ErwdBzbxS4+0Q/2UP85Ges8RJJcbAj7SO8e2JbeJoM35BLqkeNy1o3wJvQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/openai-compatible@2.0.45': + resolution: {integrity: sha512-5YBvurNL7Oj7mT3srws4Rh4cQidoorfEGObAOb5jV40eld8IC7EkXWARZjnWYqgYzabUs6Sn6muiXfQVkgOyOQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/provider-utils@4.0.23': + resolution: {integrity: sha512-z8GlDaCmRSDlqkMF2f4/RFgWxdarvIbyuk+m6WXT1LYgsnGiXRJGTD2Z1+SDl3LqtFuRtGX1aghYvQLoHL/9pg==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/provider-utils@4.0.26': + resolution: {integrity: sha512-CsKNLKsOpvPujRlIYvoz+Ybw+kGn7J4/fIZa/58+R7iWLLfwn6ifE2G6Yq8K9XvH/I/3bzaDAJ3NhRwEMsLBKQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/provider@3.0.10': + resolution: {integrity: sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw==} + engines: {node: '>=18'} + + '@ai-sdk/provider@3.0.8': + resolution: {integrity: sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ==} + engines: {node: '>=18'} + + '@alcalzone/ansi-tokenize@0.3.0': + resolution: {integrity: sha512-p+CMKJ93HFmLkjXKlXiVGlMQEuRb6H0MokBSwUsX+S6BRX8eV5naFZpQJFfJHjRZY0Hmnqy1/r6UWl3x+19zYA==} + engines: {node: '>=18'} + + '@aws-crypto/crc32@5.2.0': + resolution: {integrity: sha512-nLbCWqQNgUiwwtFsen1AdzAtvuLRsQS8rYgMuxCrdKf9kOssamGLuPwyTY9wyYblNr9+1XM8v6zoDTPPSIeANg==} + engines: {node: '>=16.0.0'} + + '@aws-crypto/crc32c@5.2.0': + resolution: {integrity: sha512-+iWb8qaHLYKrNvGRbiYRHSdKRWhto5XlZUEBwDjYNf+ly5SVYG6zEoYIdxvf5R3zyeP16w4PLBn3rH1xc74Rag==} + + '@aws-crypto/sha1-browser@5.2.0': + resolution: {integrity: sha512-OH6lveCFfcDjX4dbAvCFSYUjJZjDr/3XJ3xHtjn3Oj5b9RjojQo8npoLeA/bNwkOkrSQ0wgrHzXk4tDRxGKJeg==} + + '@aws-crypto/sha256-browser@5.2.0': + resolution: {integrity: sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==} + + '@aws-crypto/sha256-js@5.2.0': + resolution: {integrity: sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==} + engines: {node: '>=16.0.0'} + + '@aws-crypto/supports-web-crypto@5.2.0': + resolution: {integrity: sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==} + + '@aws-crypto/util@5.2.0': + resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==} + + '@aws-sdk/client-s3@3.1039.0': + resolution: {integrity: sha512-PVH9v0pHYBQnBADSR/m88NgcuJcYqPXfpmkcME66vRF75Y4swwbEVVFbTBFuvxu0YcZiLFXu3lw0FDK00vEa3A==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/client-sts@3.1039.0': + resolution: {integrity: sha512-FKwfa4agOd6rctrp6D+W/sUKT+c+wI5m23oYR0VyE01oad6ZYff+113JP/X2xbrxIFJtVz7GieQZdVUFqOuMuQ==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/core@3.974.7': + resolution: {integrity: sha512-YhRC90ofz5oolTJZlA8voU/oUrCB2azi8Usx51k8hhB5LpWbYQMMXKUqSqkoL0Cru+RQJgWTHpAfEDDIwfUhJw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/crc64-nvme@3.972.7': + resolution: {integrity: sha512-QUagVVBbC8gODCF6e1aV0mE2TXWB9Opz4k8EJFdNrujUVQm5R4AjJa1mpOqzwOuROBzqJU9zawzig7M96L8Ejg==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-env@3.972.33': + resolution: {integrity: sha512-bJV7eViSJV6GSuuN+VIdNVPdwPsNSf75BiC2v5alPrjR/OCcqgKwSZInKbDFz9mNeizldsyf67jt6YSIiv53Cw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-http@3.972.35': + resolution: {integrity: sha512-x/BQGEIdq0oI+4WxLjKmnQvT7CnF9r8ezdGt7wXwxb7ckHXQz0Zmgxt8v3Ne0JaT3R5YefmuybHX6E8EnsDXyA==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-ini@3.972.37': + resolution: {integrity: sha512-eUTpmWfd/BKsq9medhCRcu+GRAhFP2Zrn7/2jKDHHOOjCkhrMoTp/t4cEthqFoG7gE0VGp5wUxrXTdvBCmSmJg==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-login@3.972.37': + resolution: {integrity: sha512-Ty68y8ISSC+g5Q3D0K8uAaoINwvfaOslnNpsF/LgVUxyosYXHawcK2yV4HLXDVugiTTYLQfJfcw0ce5meAGkKw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-node@3.972.38': + resolution: {integrity: sha512-BQ9XYnBDVxR2HuV5huXYQYF/PZMTsY+EnwfGnCU2cA8Zw63XpkOtPY8WqiMIZMQCrKPQQEiFURS/o9CIolRLqg==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-process@3.972.33': + resolution: {integrity: sha512-yfjGksI9WQbdMObb0VeLXqzTLI+a0qXLJT9gCDiv0+X/xjPpI3mTz6a5FibrhpuEKIe0gSgvs3MaoFZy5cx4WA==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-sso@3.972.37': + resolution: {integrity: sha512-fpwE+20ntpp3i9Xb9vUuQfXLDKYHH+5I2V+ZG96SX1nBzrruhy10RXDgmN7t1etOz3c55stlA3TeQASUA451NQ==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/credential-provider-web-identity@3.972.37': + resolution: {integrity: sha512-aryawqyebf+3WhAFNHfF62rekFpYtVcVN7dQ89qnAWsa4n5hJst8qBG6gXC24WHtW7Nnhkf9ScYnjwo0Brn3bw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/ec2-metadata-service@3.1039.0': + resolution: {integrity: sha512-GJN627v8DHyXSy1/Y8YNlLnc7cbeON2tDxGFt+DyyDjFKnRX6U1RdV6/bt/i102QNGIxpYSLh9BDDZVESd4rzw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-bucket-endpoint@3.972.10': + resolution: {integrity: sha512-Vbc2frZH7wXlMNd+ZZSXUEs/l1Sv8Jj4zUnIfwrYF5lwaLdXHZ9xx4U3rjUcaye3HRhFVc+E5DbBxpRAbB16BA==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-expect-continue@3.972.10': + resolution: {integrity: sha512-2Yn0f1Qiq/DjxYR3wfI3LokXnjOhFM7Ssn4LTdFDIxRMCE6I32MAsVnhPX1cUZsuVA9tiZtwwhlSLAtFGxAZlQ==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-flexible-checksums@3.974.15': + resolution: {integrity: sha512-j4Zp7rA1HfhDTteICnx/tPax4N/v5wmytgguXExUGyEwQ8Ug4EBA4kjp9puFAN1UZoBVpxoiXMiuTFvjaHjeEw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-host-header@3.972.10': + resolution: {integrity: sha512-IJSsIMeVQ8MMCPbuh1AbltkFhLBLXn7aejzfX5YKT/VLDHn++Dcz8886tXckE+wQssyPUhaXrJhdakO2VilRhg==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-location-constraint@3.972.10': + resolution: {integrity: sha512-rI3NZvJcEvjoD0+0PI0iUAwlPw2IlSlhyvgBK/3WkKJQE/YiKFedd9dMN2lVacdNxPNhxL/jzQaKQdrGtQagjQ==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-logger@3.972.10': + resolution: {integrity: sha512-OOuGvvz1Dm20SjZo5oEBePFqxt5nf8AwkNDSyUHvD9/bfNASmstcYxFAHUowy4n6Io7mWUZ04JURZwSBvyQanQ==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-recursion-detection@3.972.11': + resolution: {integrity: sha512-+zz6f79Kj9V5qFK2P+D8Ehjnw4AhphAlCAsPjUqEcInA9umtSSKMrHbSagEeOIsDNuvVrH98bjRHcyQukTrhaQ==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-sdk-s3@3.972.36': + resolution: {integrity: sha512-YhPix+0x/MdQrb1Ug1GDKeS5fqylIy+naz800asX8II4jqfTk2KY2KhmmYCwZcky8YWtRQQwWCGdoqeAnip8Uw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-ssec@3.972.10': + resolution: {integrity: sha512-Gli9A0u8EVVb+5bFDGS/QbSVg28w/wpEidg1ggVcSj65BDTdGR6punsOcVjqdiu1i42WHWo51MCvARPIIz9juw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/middleware-user-agent@3.972.37': + resolution: {integrity: sha512-N1oNpdiLoVAWYD3WFBnUi3LlfoDA06ZHo4ozyjbsJNLvILzvt//0CnR8N+CZ0NWeYgVB/5V59ivixHCWCx2ALw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/nested-clients@3.997.5': + resolution: {integrity: sha512-jGFr6DxtcMTmzOkG/a0jCZYv4BBDmeNYVeO+/memSoDkYCJu4Y58xviYmzwJfYyIVSts+X/BVjJm1uGBnwHEMg==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/region-config-resolver@3.972.13': + resolution: {integrity: sha512-CvJ2ZIjK/jVD/lbOpowBVElJyC1YxLTIJ13yM0AEo0t2v7swOzGjSA6lJGH+DwZXQhcjUjoYwc8bVYCX5MDr1A==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/signature-v4-multi-region@3.996.24': + resolution: {integrity: sha512-amP7tLikppN940wbBFISYqiuzVmpzMS9U3mcgtmVLjX4fdWI/SNCvrXv6ZxfVzTT4cT0rPKOLhFah2xLwzREWw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/token-providers@3.1039.0': + resolution: {integrity: sha512-NMSFL2HwkAOoCeLCQiqoOq5pT3vVbSjww2QZTuYgYknVwhhv125PSDzZIcL5EYnlxuPWjEOdauZK+FspkZDVdw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/types@3.973.8': + resolution: {integrity: sha512-gjlAdtHMbtR9X5iIhVUvbVcy55KnznpC6bkDUWW9z915bi0ckdUr5cjf16Kp6xq0bP5HBD2xzgbL9F9Quv5vUw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/util-arn-parser@3.972.3': + resolution: {integrity: sha512-HzSD8PMFrvgi2Kserxuff5VitNq2sgf3w9qxmskKDiDTThWfVteJxuCS9JXiPIPtmCrp+7N9asfIaVhBFORllA==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/util-endpoints@3.996.8': + resolution: {integrity: sha512-oOZHcRDihk5iEe5V25NVWg45b3qEA8OpHWVdU/XQh8Zj4heVPAJqWvMphQnU7LkufmUo10EpvFPZuQMiFLJK3g==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/util-locate-window@3.965.5': + resolution: {integrity: sha512-WhlJNNINQB+9qtLtZJcpQdgZw3SCDCpXdUJP7cToGwHbCWCnRckGlc6Bx/OhWwIYFNAn+FIydY8SZ0QmVu3xTQ==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/util-user-agent-browser@3.972.10': + resolution: {integrity: sha512-FAzqXvfEssGdSIz8ejatan0bOdx1qefBWKF/gWmVBXIP1HkS7v/wjjaqrAGGKvyihrXTXW00/2/1nTJtxpXz7g==} + + '@aws-sdk/util-user-agent-node@3.973.23': + resolution: {integrity: sha512-gGwq8L2Euw0aNG6Ey4EktiAo3fSCVoDy1CaBIthd+oeaKHPXUrNaApMewQ6La5Hv0lcznOtECZaNvYyc5LXXfA==} + engines: {node: '>=20.0.0'} + peerDependencies: + aws-crt: '>=1.0.0' + peerDependenciesMeta: + aws-crt: + optional: true + + '@aws-sdk/xml-builder@3.972.22': + resolution: {integrity: sha512-PMYKKtJd70IsSG0yHrdAbxBr+ZWBKLvzFZfD3/urxgf6hXVMzuU5M+3MJ5G67RpOmLBu1fAUN65SbWuKUCOlAA==} + engines: {node: '>=20.0.0'} + + '@aws/lambda-invoke-store@0.2.4': + resolution: {integrity: sha512-iY8yvjE0y651BixKNPgmv1WrQc+GZ142sb0z4gYnChDDY2YqI4P/jsSopBWrKfAt7LOJAkOXt7rC/hms+WclQQ==} + engines: {node: '>=18.0.0'} + + '@azure-rest/core-client@2.6.0': + resolution: {integrity: sha512-iuFKDm8XPzNxPfRjhyU5/xKZmcRDzSuEghXDHHk4MjBV/wFL34GmYVBZnn9wmuoLBeS1qAw9ceMdaeJBPcB1QQ==} + engines: {node: '>=20.0.0'} + + '@azure/abort-controller@2.1.2': + resolution: {integrity: sha512-nBrLsEWm4J2u5LpAPjxADTlq3trDgVZZXHNKabeXZtpq3d3AbN/KGO82R87rdDz5/lYB024rtEf10/q0urNgsA==} + engines: {node: '>=18.0.0'} + + '@azure/core-auth@1.10.1': + resolution: {integrity: sha512-ykRMW8PjVAn+RS6ww5cmK9U2CyH9p4Q88YJwvUslfuMmN98w/2rdGRLPqJYObapBCdzBVeDgYWdJnFPFb7qzpg==} + engines: {node: '>=20.0.0'} + + '@azure/core-client@1.10.1': + resolution: {integrity: sha512-Nh5PhEOeY6PrnxNPsEHRr9eimxLwgLlpmguQaHKBinFYA/RU9+kOYVOQqOrTsCL+KSxrLLl1gD8Dk5BFW/7l/w==} + engines: {node: '>=20.0.0'} + + '@azure/core-http-compat@2.4.0': + resolution: {integrity: sha512-f1P96IB399YiN2ARYHP7EpZi3Bf3wH4SN2lGzrw7JVwm7bbsVYtf2iKSBwTywD2P62NOPZGHFSZi+6jjb75JuA==} + engines: {node: '>=20.0.0'} + peerDependencies: + '@azure/core-client': ^1.10.0 + '@azure/core-rest-pipeline': ^1.22.0 + + '@azure/core-lro@2.7.2': + resolution: {integrity: sha512-0YIpccoX8m/k00O7mDDMdJpbr6mf1yWo2dfmxt5A8XVZVVMz2SSKaEbMCeJRvgQ0IaSlqhjT47p4hVIRRy90xw==} + engines: {node: '>=18.0.0'} + + '@azure/core-paging@1.6.2': + resolution: {integrity: sha512-YKWi9YuCU04B55h25cnOYZHxXYtEvQEbKST5vqRga7hWY9ydd3FZHdeQF8pyh+acWZvppw13M/LMGx0LABUVMA==} + engines: {node: '>=18.0.0'} + + '@azure/core-rest-pipeline@1.23.0': + resolution: {integrity: sha512-Evs1INHo+jUjwHi1T6SG6Ua/LHOQBCLuKEEE6efIpt4ZOoNonaT1kP32GoOcdNDbfqsD2445CPri3MubBy5DEQ==} + engines: {node: '>=20.0.0'} + + '@azure/core-tracing@1.3.1': + resolution: {integrity: sha512-9MWKevR7Hz8kNzzPLfX4EAtGM2b8mr50HPDBvio96bURP/9C+HjdH3sBlLSNNrvRAr5/k/svoH457gB5IKpmwQ==} + engines: {node: '>=20.0.0'} + + '@azure/core-util@1.13.1': + resolution: {integrity: sha512-XPArKLzsvl0Hf0CaGyKHUyVgF7oDnhKoP85Xv6M4StF/1AhfORhZudHtOyf2s+FcbuQ9dPRAjB8J2KvRRMUK2A==} + engines: {node: '>=20.0.0'} + + '@azure/core-xml@1.5.1': + resolution: {integrity: sha512-xcNRHqCoSp4AunOALEae6A8f3qATb83gSrm31Iqb01OzblvC3/W/bfXozcq78EzIdzZzuH1bZ2NvRR0TdX709w==} + engines: {node: '>=20.0.0'} + + '@azure/identity@4.13.1': + resolution: {integrity: sha512-5C/2WD5Vb1lHnZS16dNQRPMjN6oV/Upba+C9nBIs15PmOi6A3ZGs4Lr2u60zw4S04gi+u3cEXiqTVP7M4Pz3kw==} + engines: {node: '>=20.0.0'} + + '@azure/keyvault-common@2.1.0': + resolution: {integrity: sha512-aCDidWuKY06LWQ4x7/8TIXK6iRqTaRWRL3t7T+LC+j1b07HtoIsOxP/tU90G4jCSBn5TAyUTCtA4MS/y5Hudaw==} + engines: {node: '>=20.0.0'} + + '@azure/keyvault-keys@4.10.0': + resolution: {integrity: sha512-eDT7iXoBTRZ2n3fLiftuGJFD+yjkiB1GNqzU2KbY1TLYeXeSPVTVgn2eJ5vmRTZ11978jy2Kg2wI7xa9Tyr8ag==} + engines: {node: '>=18.0.0'} + + '@azure/logger@1.3.0': + resolution: {integrity: sha512-fCqPIfOcLE+CGqGPd66c8bZpwAji98tZ4JI9i/mlTNTlsIWslCfpg48s/ypyLxZTump5sypjrKn2/kY7q8oAbA==} + engines: {node: '>=20.0.0'} + + '@azure/msal-browser@5.9.0': + resolution: {integrity: sha512-CzE+4PefDSJWj26zU7G1bKchlGRRHMBFreG4tAlGuzyI8hAPiYGobaJvZBgZBf6L63iphX7VH+ityL8VgEQz9Q==} + engines: {node: '>=0.8.0'} + + '@azure/msal-common@16.5.2': + resolution: {integrity: sha512-GkDEL6TYo3HgT3UuqakdgE9PZfc1hMki6+Hwgy1uddb/EauvAKfu85vVhuofRSo22D1xTnWt8Ucwfg4vSCVwvA==} + engines: {node: '>=0.8.0'} + + '@azure/msal-node@5.1.5': + resolution: {integrity: sha512-ObTeMoNPmq19X3z40et9Xvs4ZoWVeJg43PZMRLG5iwVL+2nCtAerG3YTDItqPp1CfXNwmCXBbg8jn1DOx65c3g==} + engines: {node: '>=20'} + + '@azure/storage-blob@12.26.0': + resolution: {integrity: sha512-SriLPKezypIsiZ+TtlFfE46uuBIap2HeaQVS78e1P7rz5OSbq0rsd52WE1mC5f7vAeLiXqv7I7oRhL3WFZEw3Q==} + engines: {node: '>=18.0.0'} + + '@clack/core@1.3.0': + resolution: {integrity: sha512-xJPHpAmEQUBrXSLx0gF+q5K/IyihXpsHZcha+jB+tyahsKRK3Dxo4D0coZDewHo12NhiuzC3dTtMPbm53GEAAA==} + engines: {node: '>= 20.12.0'} + + '@clack/prompts@1.3.0': + resolution: {integrity: sha512-GgcWwRCs/xPtaqlMy8qRhPnZf9vlWcWZNHAitnVQ3yk7JmSralSiq5q07yaffYE8SogtDm7zFeKccx1QNVARpw==} + engines: {node: '>= 20.12.0'} + + '@clickhouse/client-common@1.18.3': + resolution: {integrity: sha512-3axzO3zvrsGT5PzDenxgWscltYCNRDbhaHWUgdsmcM9OnW/VnZn9EarOcZogr9P82Z0mQh+Jd2x+p2K4TFD2fA==} + + '@clickhouse/client@1.18.3': + resolution: {integrity: sha512-340ngdYktL8PLUBK2QKSwe0o02tYfZSz1mSn1uXCEU8TxHvwh9pnQxElf9YHumDGj5gX/IdgxPsJTGMs82Hgug==} + engines: {node: '>=16'} + + '@colors/colors@1.6.0': + resolution: {integrity: sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA==} + engines: {node: '>=0.1.90'} + + '@commander-js/extra-typings@14.0.0': + resolution: {integrity: sha512-hIn0ncNaJRLkZrxBIp5AsW/eXEHNKYQBh0aPdoUqNgD+Io3NIykQqpKFyKcuasZhicGaEZJX/JBSIkZ4e5x8Dg==} + peerDependencies: + commander: ~14.0.0 + + '@dabh/diagnostics@2.0.8': + resolution: {integrity: sha512-R4MSXTVnuMzGD7bzHdW2ZhhdPC/igELENcq5IjEverBvq5hn1SXCWcsi6eSsdWP0/Ur+SItRRjAktmdoX/8R/Q==} + + '@electric-sql/pglite-socket@0.1.5': + resolution: {integrity: sha512-/RAye+3EPKfO9nY4tljzxXmkT7yIpFDm0L3F+c28b+Z6uxPOjy/Zz/QEHYHXcrfuUC88/a9S72EO0+3E0j97wQ==} + hasBin: true + peerDependencies: + '@electric-sql/pglite': 0.4.5 + + '@electric-sql/pglite@0.4.5': + resolution: {integrity: sha512-aGG2zGEyZzGWKy8P+9ZoNUV0jxt1+hgbeTf+bVAYyxVZZLXg3/9aFlfLxb08AYZVAfAkQlQIysmWjhc5hwDG8g==} + + '@emnapi/core@1.10.0': + resolution: {integrity: sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==} + + '@emnapi/runtime@1.10.0': + resolution: {integrity: sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==} + + '@emnapi/wasi-threads@1.2.1': + resolution: {integrity: sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==} + + '@esbuild/aix-ppc64@0.27.7': + resolution: {integrity: sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + + '@esbuild/android-arm64@0.27.7': + resolution: {integrity: sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] + + '@esbuild/android-arm@0.27.7': + resolution: {integrity: sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==} + engines: {node: '>=18'} + cpu: [arm] + os: [android] + + '@esbuild/android-x64@0.27.7': + resolution: {integrity: sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + + '@esbuild/darwin-arm64@0.27.7': + resolution: {integrity: sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [darwin] + + '@esbuild/darwin-x64@0.27.7': + resolution: {integrity: sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [darwin] + + '@esbuild/freebsd-arm64@0.27.7': + resolution: {integrity: sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==} + engines: {node: '>=18'} + cpu: [arm64] + os: [freebsd] + + '@esbuild/freebsd-x64@0.27.7': + resolution: {integrity: sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [freebsd] + + '@esbuild/linux-arm64@0.27.7': + resolution: {integrity: sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [linux] + + '@esbuild/linux-arm@0.27.7': + resolution: {integrity: sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==} + engines: {node: '>=18'} + cpu: [arm] + os: [linux] + + '@esbuild/linux-ia32@0.27.7': + resolution: {integrity: sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + + '@esbuild/linux-loong64@0.27.7': + resolution: {integrity: sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + + '@esbuild/linux-mips64el@0.27.7': + resolution: {integrity: sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + + '@esbuild/linux-ppc64@0.27.7': + resolution: {integrity: sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [linux] + + '@esbuild/linux-riscv64@0.27.7': + resolution: {integrity: sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==} + engines: {node: '>=18'} + cpu: [riscv64] + os: [linux] + + '@esbuild/linux-s390x@0.27.7': + resolution: {integrity: sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==} + engines: {node: '>=18'} + cpu: [s390x] + os: [linux] + + '@esbuild/linux-x64@0.27.7': + resolution: {integrity: sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==} + engines: {node: '>=18'} + cpu: [x64] + os: [linux] + + '@esbuild/netbsd-arm64@0.27.7': + resolution: {integrity: sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==} + engines: {node: '>=18'} + cpu: [arm64] + os: [netbsd] + + '@esbuild/netbsd-x64@0.27.7': + resolution: {integrity: sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==} + engines: {node: '>=18'} + cpu: [x64] + os: [netbsd] + + '@esbuild/openbsd-arm64@0.27.7': + resolution: {integrity: sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openbsd] + + '@esbuild/openbsd-x64@0.27.7': + resolution: {integrity: sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==} + engines: {node: '>=18'} + cpu: [x64] + os: [openbsd] + + '@esbuild/openharmony-arm64@0.27.7': + resolution: {integrity: sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openharmony] + + '@esbuild/sunos-x64@0.27.7': + resolution: {integrity: sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==} + engines: {node: '>=18'} + cpu: [x64] + os: [sunos] + + '@esbuild/win32-arm64@0.27.7': + resolution: {integrity: sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==} + engines: {node: '>=18'} + cpu: [arm64] + os: [win32] + + '@esbuild/win32-ia32@0.27.7': + resolution: {integrity: sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==} + engines: {node: '>=18'} + cpu: [ia32] + os: [win32] + + '@esbuild/win32-x64@0.27.7': + resolution: {integrity: sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==} + engines: {node: '>=18'} + cpu: [x64] + os: [win32] + + '@google-cloud/bigquery@8.3.0': + resolution: {integrity: sha512-aAOWE/tGQkcnbsmglMW4fz7wpT0PnD3kTcBp7C8KPhLgYKjtVn/i0Ya/mHzfdRQdeMGQO4ApzQBiW24ZBa49Xw==} + engines: {node: '>=18'} + + '@google-cloud/common@6.0.0': + resolution: {integrity: sha512-IXh04DlkLMxWgYLIUYuHHKXKOUwPDzDgke1ykkkJPe48cGIS9kkL2U/o0pm4ankHLlvzLF/ma1eO86n/bkumIA==} + engines: {node: '>=18'} + + '@google-cloud/paginator@6.0.0': + resolution: {integrity: sha512-g5nmMnzC+94kBxOKkLGpK1ikvolTFCC3s2qtE4F+1EuArcJ7HHC23RDQVt3Ra3CqpUYZ+oXNKZ8n5Cn5yug8DA==} + engines: {node: '>=18'} + + '@google-cloud/precise-date@5.0.0': + resolution: {integrity: sha512-9h0Gvw92EvPdE8AK8AgZPbMnH5ftDyPtKm7/KUfcJVaPEPjwGDsJd1QV0H8esBDV4II41R/2lDWH1epBqIoKUw==} + engines: {node: '>=18'} + + '@google-cloud/projectify@4.0.0': + resolution: {integrity: sha512-MmaX6HeSvyPbWGwFq7mXdo0uQZLGBYCwziiLIGq5JVX+/bdI3SAq6bP98trV5eTWfLuvsMcIC1YJOF2vfteLFA==} + engines: {node: '>=14.0.0'} + + '@google-cloud/promisify@4.1.0': + resolution: {integrity: sha512-G/FQx5cE/+DqBbOpA5jKsegGwdPniU6PuIEMt+qxWgFxvxuFOzVmp6zYchtYuwAWV5/8Dgs0yAmjvNZv3uXLQg==} + engines: {node: '>=18'} + + '@google-cloud/promisify@5.0.0': + resolution: {integrity: sha512-N8qS6dlORGHwk7WjGXKOSsLjIjNINCPicsOX6gyyLiYk7mq3MtII96NZ9N2ahwA2vnkLmZODOIH9rlNniYWvCQ==} + engines: {node: '>=18'} + + '@hono/node-server@1.19.14': + resolution: {integrity: sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==} + engines: {node: '>=18.14.1'} + peerDependencies: + hono: ^4 + + '@jridgewell/sourcemap-codec@1.5.5': + resolution: {integrity: sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==} + + '@js-joda/core@5.7.0': + resolution: {integrity: sha512-WBu4ULVVxySLLzK1Ppq+OdfP+adRS4ntmDQT915rzDJ++i95gc2jZkM5B6LWEAwN3lGXpfie3yPABozdD3K3Vg==} + + '@klo/connector-bigquery@file:packages/connector-bigquery': + resolution: {directory: packages/connector-bigquery, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/connector-clickhouse@file:packages/connector-clickhouse': + resolution: {directory: packages/connector-clickhouse, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/connector-mysql@file:packages/connector-mysql': + resolution: {directory: packages/connector-mysql, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/connector-postgres@file:packages/connector-postgres': + resolution: {directory: packages/connector-postgres, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/connector-posthog@file:packages/connector-posthog': + resolution: {directory: packages/connector-posthog, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/connector-snowflake@file:packages/connector-snowflake': + resolution: {directory: packages/connector-snowflake, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/connector-sqlite@file:packages/connector-sqlite': + resolution: {directory: packages/connector-sqlite, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/connector-sqlserver@file:packages/connector-sqlserver': + resolution: {directory: packages/connector-sqlserver, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/context@file:packages/context': + resolution: {directory: packages/context, type: directory} + engines: {node: '>=22.0.0'} + + '@klo/llm@file:packages/llm': + resolution: {directory: packages/llm, type: directory} + engines: {node: '>=22.0.0'} + + '@kwsites/file-exists@1.1.1': + resolution: {integrity: sha512-m9/5YGR18lIwxSFDwfE3oA7bWuq9kdau6ugN4H2rJeyhFQZcG9AgSHkQtSD15a8WvTgfz9aikZMrKPHvbpqFiw==} + + '@kwsites/promise-deferred@1.1.1': + resolution: {integrity: sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw==} + + '@looker/sdk-node@26.6.1': + resolution: {integrity: sha512-KxjVY0j23BfBs5t1uZcJmvz/PRxto9AvrVElZZ8EkswIwugxOdhLkLBkY8bb2Ki1PFtwbJDVp1nx1xWDdlcATg==} + engines: {node: '>=12', npm: '>=5.5.1'} + + '@looker/sdk-rtl@21.6.5': + resolution: {integrity: sha512-gQLUpMQD0O6o/rPt19rxudOWfpCnvBrQ7j1KhPBV//RqJCQanFNkzxuO+BaRiMzRp1Qx2bSvwKnr47/HyY5TKg==} + engines: {node: '>=12', npm: '>=5.5.1'} + + '@looker/sdk@26.6.1': + resolution: {integrity: sha512-uTYkic5Au52vMPAJDBGd55IPCJlbiM6NvByryfKqBiCD6X/lDi5qRPbxCYJKxm7xZrLC8i4azNkSHscx4U9Lgw==} + engines: {node: '>=12', npm: '>=5.5.1'} + + '@modelcontextprotocol/sdk@1.29.0': + resolution: {integrity: sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==} + engines: {node: '>=18'} + peerDependencies: + '@cfworker/json-schema': ^4.1.1 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + '@cfworker/json-schema': + optional: true + + '@napi-rs/wasm-runtime@1.1.4': + resolution: {integrity: sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow==} + peerDependencies: + '@emnapi/core': ^1.7.1 + '@emnapi/runtime': ^1.7.1 + + '@nodable/entities@2.1.0': + resolution: {integrity: sha512-nyT7T3nbMyBI/lvr6L5TyWbFJAI9FTgVRakNoBqCD+PmID8DzFrrNdLLtHMwMszOtqZa8PAOV24ZqDnQrhQINA==} + + '@notionhq/client@5.20.0': + resolution: {integrity: sha512-MS0DFSfHPLZ0wi+e9mOP16gCCYbWAkaiMuu/rVK9KxDlKac4oAgHReOfTglcd77g/nlg78snp+KB7fNWP75bEw==} + engines: {node: '>=18'} + + '@opentelemetry/api@1.9.0': + resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==} + engines: {node: '>=8.0.0'} + + '@oxc-project/types@0.127.0': + resolution: {integrity: sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ==} + + '@rolldown/binding-android-arm64@1.0.0-rc.17': + resolution: {integrity: sha512-s70pVGhw4zqGeFnXWvAzJDlvxhlRollagdCCKRgOsgUOH3N1l0LIxf83AtGzmb5SiVM4Hjl5HyarMRfdfj3DaQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [android] + + '@rolldown/binding-darwin-arm64@1.0.0-rc.17': + resolution: {integrity: sha512-4ksWc9n0mhlZpZ9PMZgTGjeOPRu8MB1Z3Tz0Mo02eWfWCHMW1zN82Qz/pL/rC+yQa+8ZnutMF0JjJe7PjwasYw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [darwin] + + '@rolldown/binding-darwin-x64@1.0.0-rc.17': + resolution: {integrity: sha512-SUSDOI6WwUVNcWxd02QEBjLdY1VPHvlEkw6T/8nYG322iYWCTxRb1vzk4E+mWWYehTp7ERibq54LSJGjmouOsw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [darwin] + + '@rolldown/binding-freebsd-x64@1.0.0-rc.17': + resolution: {integrity: sha512-hwnz3nw9dbJ05EDO/PvcjaaewqqDy7Y1rn1UO81l8iIK1GjenME75dl16ajbvSSMfv66WXSRCYKIqfgq2KCfxw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [freebsd] + + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.17': + resolution: {integrity: sha512-IS+W7epTcwANmFSQFrS1SivEXHtl1JtuQA9wlxrZTcNi6mx+FDOYrakGevvvTwgj2JvWiK8B29/qD9BELZPyXQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm] + os: [linux] + + '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.17': + resolution: {integrity: sha512-e6usGaHKW5BMNZOymS1UcEYGowQMWcgZ71Z17Sl/h2+ZziNJ1a9n3Zvcz6LdRyIW5572wBCTH/Z+bKuZouGk9Q==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + + '@rolldown/binding-linux-arm64-musl@1.0.0-rc.17': + resolution: {integrity: sha512-b/CgbwAJpmrRLp02RPfhbudf5tZnN9nsPWK82znefso832etkem8H7FSZwxrOI9djcdTP7U6YfNhbRnh7djErg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + + '@rolldown/binding-linux-ppc64-gnu@1.0.0-rc.17': + resolution: {integrity: sha512-4EII1iNGRUN5WwGbF/kOh/EIkoDN9HsupgLQoXfY+D1oyJm7/F4t5PYU5n8SWZgG0FEwakyM8pGgwcBYruGTlA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [ppc64] + os: [linux] + + '@rolldown/binding-linux-s390x-gnu@1.0.0-rc.17': + resolution: {integrity: sha512-AH8oq3XqQo4IibpVXvPeLDI5pzkpYn0WiZAfT05kFzoJ6tQNzwRdDYQ45M8I/gslbodRZwW8uxLhbSBbkv96rA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [s390x] + os: [linux] + + '@rolldown/binding-linux-x64-gnu@1.0.0-rc.17': + resolution: {integrity: sha512-cLnjV3xfo7KslbU41Z7z8BH/E1y5mzUYzAqih1d1MDaIGZRCMqTijqLv76/P7fyHuvUcfGsIpqCdddbxLLK9rA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + + '@rolldown/binding-linux-x64-musl@1.0.0-rc.17': + resolution: {integrity: sha512-0phclDw1spsL7dUB37sIARuis2tAgomCJXAHZlpt8PXZ4Ba0dRP1e+66lsRqrfhISeN9bEGNjQs+T/Fbd7oYGw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + + '@rolldown/binding-openharmony-arm64@1.0.0-rc.17': + resolution: {integrity: sha512-0ag/hEgXOwgw4t8QyQvUCxvEg+V0KBcA6YuOx9g0r02MprutRF5dyljgm3EmR02O292UX7UeS6HzWHAl6KgyhA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [openharmony] + + '@rolldown/binding-wasm32-wasi@1.0.0-rc.17': + resolution: {integrity: sha512-LEXei6vo0E5wTGwpkJ4KoT3OZJRnglwldt5ziLzOlc6qqb55z4tWNq2A+PFqCJuvWWdP53CVhG1Z9NtToDPJrA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [wasm32] + + '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.17': + resolution: {integrity: sha512-gUmyzBl3SPMa6hrqFUth9sVfcLBlYsbMzBx5PlexMroZStgzGqlZ26pYG89rBb45Mnia+oil6YAIFeEWGWhoZA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [win32] + + '@rolldown/binding-win32-x64-msvc@1.0.0-rc.17': + resolution: {integrity: sha512-3hkiolcUAvPB9FLb3UZdfjVVNWherN1f/skkGWJP/fgSQhYUZpSIRr0/I8ZK9TkF3F7kxvJAk0+IcKvPHk9qQg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [win32] + + '@rolldown/pluginutils@1.0.0-rc.17': + resolution: {integrity: sha512-n8iosDOt6Ig1UhJ2AYqoIhHWh/isz0xpicHTzpKBeotdVsTEcxsSA/i3EVM7gQAj0rU27OLAxCjzlj15IWY7bg==} + + '@smithy/chunked-blob-reader-native@4.2.3': + resolution: {integrity: sha512-jA5k5Udn7Y5717L86h4EIv06wIr3xn8GM1qHRi/Nf31annXcXHJjBKvgztnbn2TxH3xWrPBfgwHsOwZf0UmQWw==} + engines: {node: '>=18.0.0'} + + '@smithy/chunked-blob-reader@5.2.2': + resolution: {integrity: sha512-St+kVicSyayWQca+I1rGitaOEH6uKgE8IUWoYnnEX26SWdWQcL6LvMSD19Lg+vYHKdT9B2Zuu7rd3i6Wnyb/iw==} + engines: {node: '>=18.0.0'} + + '@smithy/config-resolver@4.4.17': + resolution: {integrity: sha512-TzDZcAnhTyAHbXVxWZo7/tEcrIeFq20IBk8So3OLOetWpR8EwY/yEqBMBFaJMeyEiREDq4NfEl+qO3OAUD+vbQ==} + engines: {node: '>=18.0.0'} + + '@smithy/core@3.23.17': + resolution: {integrity: sha512-x7BlLbUFL8NWCGjMF9C+1N5cVCxcPa7g6Tv9B4A2luWx3be3oU8hQ96wIwxe/s7OhIzvoJH73HAUSg5JXVlEtQ==} + engines: {node: '>=18.0.0'} + + '@smithy/credential-provider-imds@4.2.14': + resolution: {integrity: sha512-Au28zBN48ZAoXdooGUHemuVBrkE+Ie6RPmGNIAJsFqj33Vhb6xAgRifUydZ2aY+M+KaMAETAlKk5NC5h1G7wpg==} + engines: {node: '>=18.0.0'} + + '@smithy/eventstream-codec@4.2.14': + resolution: {integrity: sha512-erZq0nOIpzfeZdCyzZjdJb4nVSKLUmSkaQUVkRGQTXs30gyUGeKnrYEg+Xe1W5gE3aReS7IgsvANwVPxSzY6Pw==} + engines: {node: '>=18.0.0'} + + '@smithy/eventstream-serde-browser@4.2.14': + resolution: {integrity: sha512-8IelTCtTctWRbb+0Dcy+C0aICh1qa0qWXqgjcXDmMuCvPJRnv26hiDZoAau2ILOniki65mCPKqOQs/BaWvO4CQ==} + engines: {node: '>=18.0.0'} + + '@smithy/eventstream-serde-config-resolver@4.3.14': + resolution: {integrity: sha512-sqHiHpYRYo3FJlaIxD1J8PhbcmJAm7IuM16mVnwSkCToD7g00IBZzKuiLNMGmftULmEUX6/UAz8/NN5uMP8bVA==} + engines: {node: '>=18.0.0'} + + '@smithy/eventstream-serde-node@4.2.14': + resolution: {integrity: sha512-Ht/8BuGlKfFTy0H3+8eEu0vdpwGztCnaLLXtpXNdQqiR7Hj4vFScU3T436vRAjATglOIPjJXronY+1WxxNLSiw==} + engines: {node: '>=18.0.0'} + + '@smithy/eventstream-serde-universal@4.2.14': + resolution: {integrity: sha512-lWyt4T2XQZUZgK3tQ3Wn0w3XBvZsK/vjTuJl6bXbnGZBHH0ZUSONTYiK9TgjTTzU54xQr3DRFwpjmhp0oLm3gg==} + engines: {node: '>=18.0.0'} + + '@smithy/fetch-http-handler@5.3.17': + resolution: {integrity: sha512-bXOvQzaSm6MnmLaWA1elgfQcAtN4UP3vXqV97bHuoOrHQOJiLT3ds6o9eo5bqd0TJfRFpzdGnDQdW3FACiAVdw==} + engines: {node: '>=18.0.0'} + + '@smithy/hash-blob-browser@4.2.15': + resolution: {integrity: sha512-0PJ4Al3fg2nM4qKrAIxyNcApgqHAXcBkN8FeizOz69z0rb26uZ6lMESYtxegaTlXB5Hj84JfwMPavMrwDMjucA==} + engines: {node: '>=18.0.0'} + + '@smithy/hash-node@4.2.14': + resolution: {integrity: sha512-8ZBDY2DD4wr+GGjTpPtiglEsqr0lUP+KHqgZcWczFf6qeZ/YRjMIOoQWVQlmwu7EtxKTd8YXD8lblmYcpBIA1g==} + engines: {node: '>=18.0.0'} + + '@smithy/hash-stream-node@4.2.14': + resolution: {integrity: sha512-tw4GANWkZPb6+BdD4Fgucqzey2+r73Z/GRo9zklsCdwrnxxumUV83ZIaBDdudV4Ylazw3EPTiJZhpX42105ruQ==} + engines: {node: '>=18.0.0'} + + '@smithy/invalid-dependency@4.2.14': + resolution: {integrity: sha512-c21qJiTSb25xvvOp+H2TNZzPCngrvl5vIPqPB8zQ/DmJF4QWXO19x1dWfMJZ6wZuuWUPPm0gV8C0cU3+ifcWuw==} + engines: {node: '>=18.0.0'} + + '@smithy/is-array-buffer@2.2.0': + resolution: {integrity: sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==} + engines: {node: '>=14.0.0'} + + '@smithy/is-array-buffer@4.2.2': + resolution: {integrity: sha512-n6rQ4N8Jj4YTQO3YFrlgZuwKodf4zUFs7EJIWH86pSCWBaAtAGBFfCM7Wx6D2bBJ2xqFNxGBSrUWswT3M0VJow==} + engines: {node: '>=18.0.0'} + + '@smithy/md5-js@4.2.14': + resolution: {integrity: sha512-V2v0vx+h0iUSNG1Alt+GNBMSLGCrl9iVsdd+Ap67HPM9PN479x12V8LkuMoKImNZxn3MXeuyUjls+/7ZACZghA==} + engines: {node: '>=18.0.0'} + + '@smithy/middleware-content-length@4.2.14': + resolution: {integrity: sha512-xhHq7fX4/3lv5NHxLUk3OeEvl0xZ+Ek3qIbWaCL4f9JwgDZEclPBElljaZCAItdGPQl/kSM4LPMOpy1MYgprpw==} + engines: {node: '>=18.0.0'} + + '@smithy/middleware-endpoint@4.4.32': + resolution: {integrity: sha512-ZZkgyjnJppiZbIm6Qbx92pbXYi1uzenIvGhBSCDlc7NwuAkiqSgS75j1czAD25ZLs2FjMjYy1q7gyRVWG6JA0Q==} + engines: {node: '>=18.0.0'} + + '@smithy/middleware-retry@4.5.7': + resolution: {integrity: sha512-bRt6ZImqVSeTk39Nm81K20ObIiAZ3WefY7G6+iz/0tZjs4dgRRjvRX2sgsH+zi6iDCRR/aQvQofLKxxz4rPBZg==} + engines: {node: '>=18.0.0'} + + '@smithy/middleware-serde@4.2.20': + resolution: {integrity: sha512-Lx9JMO9vArPtiChE3wbEZ5akMIDQpWQtlu90lhACQmNOXcGXRbaDywMHDzuDZ2OkZzP+9wQfZi3YJT9F67zTQQ==} + engines: {node: '>=18.0.0'} + + '@smithy/middleware-stack@4.2.14': + resolution: {integrity: sha512-2dvkUKLuFdKsCRmOE4Mn63co0Djtsm+JMh0bYZQupN1pJwMeE8FmQmRLLzzEMN0dnNi7CDCYYH8F0EVwWiPBeA==} + engines: {node: '>=18.0.0'} + + '@smithy/node-config-provider@4.3.14': + resolution: {integrity: sha512-S+gFjyo/weSVL0P1b9Ts8C/CwIfNCgUPikk3sl6QVsfE/uUuO+QsF+NsE/JkpvWqqyz1wg7HFdiaZuj5CoBMRg==} + engines: {node: '>=18.0.0'} + + '@smithy/node-http-handler@4.6.1': + resolution: {integrity: sha512-iB+orM4x3xrr57X3YaXazfKnntl0LHlZB1kcXSGzMV1Tt0+YwEjGlbjk/44qEGtBzXAz6yFDzkYTKSV6Pj2HUg==} + engines: {node: '>=18.0.0'} + + '@smithy/property-provider@4.2.14': + resolution: {integrity: sha512-WuM31CgfsnQ/10i7NYr0PyxqknD72Y5uMfUMVSniPjbEPceiTErb4eIqJQ+pdxNEAUEWrewrGjIRjVbVHsxZiQ==} + engines: {node: '>=18.0.0'} + + '@smithy/protocol-http@5.3.14': + resolution: {integrity: sha512-dN5F8kHx8RNU0r+pCwNmFZyz6ChjMkzShy/zup6MtkRmmix4vZzJdW+di7x//b1LiynIev88FM18ie+wwPcQtQ==} + engines: {node: '>=18.0.0'} + + '@smithy/querystring-builder@4.2.14': + resolution: {integrity: sha512-XYA5Z0IqTeF+5XDdh4BBmSA0HvbgVZIyv4cmOoUheDNR57K1HgBp9ukUMx3Cr3XpDHHpLBnexPE3LAtDsZkj2A==} + engines: {node: '>=18.0.0'} + + '@smithy/querystring-parser@4.2.14': + resolution: {integrity: sha512-hr+YyqBD23GVvRxGGrcc/oOeNlK3PzT5Fu4dzrDXxzS1LpFiuL2PQQqKPs87M79aW7ziMs+nvB3qdw77SqE7Lw==} + engines: {node: '>=18.0.0'} + + '@smithy/service-error-classification@4.3.1': + resolution: {integrity: sha512-aUQuDGh760ts/8MU+APjIZhlLPKhIIfqyzZaJikLEIMrdxFvxuLYD0WxWzaYWpmLbQlXDe9p7EWM3HsBe0K6Gw==} + engines: {node: '>=18.0.0'} + + '@smithy/shared-ini-file-loader@4.4.9': + resolution: {integrity: sha512-495/V2I15SHgedSJoDPD23JuSfKAp726ZI1V0wtjB07Wh7q/0tri/0e0DLefZCHgxZonrGKt/OCTpAtP1wE1kQ==} + engines: {node: '>=18.0.0'} + + '@smithy/signature-v4@5.3.14': + resolution: {integrity: sha512-1D9Y/nmlVjCeSivCbhZ7hgEpmHyY1h0GvpSZt3l0xcD9JjmjVC1CHOozS6+Gh+/ldMH8JuJ6cujObQqfayAVFA==} + engines: {node: '>=18.0.0'} + + '@smithy/smithy-client@4.12.13': + resolution: {integrity: sha512-y/Pcj1V9+qG98gyu1gvftHB7rDpdh+7kIBIggs55yGm3JdtBV8GT8IFF3a1qxZ79QnaJHX9GXzvBG6tAd+czJA==} + engines: {node: '>=18.0.0'} + + '@smithy/types@4.14.1': + resolution: {integrity: sha512-59b5HtSVrVR/eYNei3BUj3DCPKD/G7EtDDe7OEJE7i7FtQFugYo6MxbotS8mVJkLNVf8gYaAlEBwwtJ9HzhWSg==} + engines: {node: '>=18.0.0'} + + '@smithy/url-parser@4.2.14': + resolution: {integrity: sha512-p06BiBigJ8bTA3MgnOfCtDUWnAMY0YfedO/GRpmc7p+wg3KW8vbXy1xwSu5ASy0wV7rRYtlfZOIKH4XqfhjSQQ==} + engines: {node: '>=18.0.0'} + + '@smithy/util-base64@4.3.2': + resolution: {integrity: sha512-XRH6b0H/5A3SgblmMa5ErXQ2XKhfbQB+Fm/oyLZ2O2kCUrwgg55bU0RekmzAhuwOjA9qdN5VU2BprOvGGUkOOQ==} + engines: {node: '>=18.0.0'} + + '@smithy/util-body-length-browser@4.2.2': + resolution: {integrity: sha512-JKCrLNOup3OOgmzeaKQwi4ZCTWlYR5H4Gm1r2uTMVBXoemo1UEghk5vtMi1xSu2ymgKVGW631e2fp9/R610ZjQ==} + engines: {node: '>=18.0.0'} + + '@smithy/util-body-length-node@4.2.3': + resolution: {integrity: sha512-ZkJGvqBzMHVHE7r/hcuCxlTY8pQr1kMtdsVPs7ex4mMU+EAbcXppfo5NmyxMYi2XU49eqaz56j2gsk4dHHPG/g==} + engines: {node: '>=18.0.0'} + + '@smithy/util-buffer-from@2.2.0': + resolution: {integrity: sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==} + engines: {node: '>=14.0.0'} + + '@smithy/util-buffer-from@4.2.2': + resolution: {integrity: sha512-FDXD7cvUoFWwN6vtQfEta540Y/YBe5JneK3SoZg9bThSoOAC/eGeYEua6RkBgKjGa/sz6Y+DuBZj3+YEY21y4Q==} + engines: {node: '>=18.0.0'} + + '@smithy/util-config-provider@4.2.2': + resolution: {integrity: sha512-dWU03V3XUprJwaUIFVv4iOnS1FC9HnMHDfUrlNDSh4315v0cWyaIErP8KiqGVbf5z+JupoVpNM7ZB3jFiTejvQ==} + engines: {node: '>=18.0.0'} + + '@smithy/util-defaults-mode-browser@4.3.49': + resolution: {integrity: sha512-a5bNrdiONYB/qE2BuKegvUMd/+ZDwdg4vsNuuSzYE8qs2EYAdK9CynL+Rzn29PbPiUqoz/cbpRbcLzD5lEevHw==} + engines: {node: '>=18.0.0'} + + '@smithy/util-defaults-mode-node@4.2.54': + resolution: {integrity: sha512-g1cvrJvOnzeJgEdf7AE4luI7gp6L8weE0y9a9wQUSGtjb8QRHDbCJYuE4Sy0SD9N8RrnNPFsPltAz/OSoBR9Zw==} + engines: {node: '>=18.0.0'} + + '@smithy/util-endpoints@3.4.2': + resolution: {integrity: sha512-a55Tr+3OKld4TTtnT+RhKOQHyPxm3j/xL4OR83WBUhLJaKDS9dnJ7arRMOp3t31dcLhApwG9bgvrRXBHlLdIkg==} + engines: {node: '>=18.0.0'} + + '@smithy/util-hex-encoding@4.2.2': + resolution: {integrity: sha512-Qcz3W5vuHK4sLQdyT93k/rfrUwdJ8/HZ+nMUOyGdpeGA1Wxt65zYwi3oEl9kOM+RswvYq90fzkNDahPS8K0OIg==} + engines: {node: '>=18.0.0'} + + '@smithy/util-middleware@4.2.14': + resolution: {integrity: sha512-1Su2vj9RYNDEv/V+2E+jXkkwGsgR7dc4sfHn9Z7ruzQHJIEni9zzw5CauvRXlFJfmgcqYP8fWa0dkh2Q2YaQyw==} + engines: {node: '>=18.0.0'} + + '@smithy/util-retry@4.3.6': + resolution: {integrity: sha512-p6/FO1n2KxMeQyna067i0uJ6TSbb165ZhnRtCpWh4Foxqbfc6oW+XITaL8QkFJj3KFnDe2URt4gOhgU06EP9ew==} + engines: {node: '>=18.0.0'} + + '@smithy/util-stream@4.5.25': + resolution: {integrity: sha512-/PFpG4k8Ze8Ei+mMKj3oiPICYekthuzePZMgZbCqMiXIHHf4n2aZ4Ps0aSRShycFTGuj/J6XldmC0x0DwednIA==} + engines: {node: '>=18.0.0'} + + '@smithy/util-uri-escape@4.2.2': + resolution: {integrity: sha512-2kAStBlvq+lTXHyAZYfJRb/DfS3rsinLiwb+69SstC9Vb0s9vNWkRwpnj918Pfi85mzi42sOqdV72OLxWAISnw==} + engines: {node: '>=18.0.0'} + + '@smithy/util-utf8@2.3.0': + resolution: {integrity: sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==} + engines: {node: '>=14.0.0'} + + '@smithy/util-utf8@4.2.2': + resolution: {integrity: sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw==} + engines: {node: '>=18.0.0'} + + '@smithy/util-waiter@4.3.0': + resolution: {integrity: sha512-JyjYmLAfS+pdxF92o4yLgEoy0zhayKTw73FU1aofLWwLcJw7iSqIY2exGmMTrl/lmZugP5p/zxdFSippJDfKWA==} + engines: {node: '>=18.0.0'} + + '@smithy/uuid@1.1.2': + resolution: {integrity: sha512-O/IEdcCUKkubz60tFbGA7ceITTAJsty+lBjNoorP4Z6XRqaFb/OjQjZODophEcuq68nKm6/0r+6/lLQ+XVpk8g==} + engines: {node: '>=18.0.0'} + + '@so-ric/colorspace@1.1.6': + resolution: {integrity: sha512-/KiKkpHNOBgkFJwu9sh48LkHSMYGyuTcSFK/qMBdnOAlrRJzRSXAOFB5qwzaVQuDl8wAvHVMkaASQDReTahxuw==} + + '@standard-schema/spec@1.1.0': + resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} + + '@techteamer/ocsp@1.0.1': + resolution: {integrity: sha512-q4pW5wAC6Pc3JI8UePwE37CkLQ5gDGZMgjSX4MEEm4D4Di59auDQ8UNIDzC4gRnPNmmcwjpPxozq8p5pjiOmOw==} + + '@tediousjs/connection-string@1.1.0': + resolution: {integrity: sha512-z9ZBWEG+8pIB5V1zYzlRPXx0oRJ5H7coPnMQK8EZOw03UTPI9Umn6viL36f5w+CuqkKsnCM50RVStpjZmR0Bng==} + + '@tybys/wasm-util@0.10.1': + resolution: {integrity: sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==} + + '@types/better-sqlite3@7.6.13': + resolution: {integrity: sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA==} + + '@types/chai@5.2.3': + resolution: {integrity: sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==} + + '@types/deep-eql@4.0.2': + resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==} + + '@types/estree@1.0.8': + resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} + + '@types/mssql@9.1.11': + resolution: {integrity: sha512-vcujgrDbDezCxNDO4KY6gjwduLYOKfrexpRUwhoysRvcXZ3+IgZ/PMYFDgh8c3cQIxZ6skAwYo+H6ibMrBWPjQ==} + + '@types/node@24.12.2': + resolution: {integrity: sha512-A1sre26ke7HDIuY/M23nd9gfB+nrmhtYyMINbjI1zHJxYteKR6qSMX56FsmjMcDb3SMcjJg5BiRRgOCC/yBD0g==} + + '@types/pg@8.20.0': + resolution: {integrity: sha512-bEPFOaMAHTEP1EzpvHTbmwR8UsFyHSKsRisLIHVMXnpNefSbGA1bD6CVy+qKjGSqmZqNqBDV2azOBo8TgkcVow==} + + '@types/react@19.2.14': + resolution: {integrity: sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==} + + '@types/readable-stream@4.0.23': + resolution: {integrity: sha512-wwXrtQvbMHxCbBgjHaMGEmImFTQxxpfMOR/ZoQnXxB1woqkUbdLGFDgauo00Py9IudiaqSeiBiulSV9i6XIPig==} + + '@types/triple-beam@1.3.5': + resolution: {integrity: sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==} + + '@typespec/ts-http-runtime@0.3.5': + resolution: {integrity: sha512-yURCknZhvywvQItHMMmFSo+fq5arCUIyz/CVk7jD89MSai7dkaX8ufjCWp3NttLojoTVbcE72ri+be/TnEbMHw==} + engines: {node: '>=20.0.0'} + + '@vercel/oidc@3.2.0': + resolution: {integrity: sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug==} + engines: {node: '>= 20'} + + '@vitest/expect@4.1.5': + resolution: {integrity: sha512-PWBaRY5JoKuRnHlUHfpV/KohFylaDZTupcXN1H9vYryNLOnitSw60Mw9IAE2r67NbwwzBw/Cc/8q9BK3kIX8Kw==} + + '@vitest/mocker@4.1.5': + resolution: {integrity: sha512-/x2EmFC4mT4NNzqvC3fmesuV97w5FC903KPmey4gsnJiMQ3Be1IlDKVaDaG8iqaLFHqJ2FVEkxZk5VmeLjIItw==} + peerDependencies: + msw: ^2.4.9 + vite: ^6.0.0 || ^7.0.0 || ^8.0.0 + peerDependenciesMeta: + msw: + optional: true + vite: + optional: true + + '@vitest/pretty-format@4.1.5': + resolution: {integrity: sha512-7I3q6l5qr03dVfMX2wCo9FxwSJbPdwKjy2uu/YPpU3wfHvIL4QHwVRp57OfGrDFeUJ8/8QdfBKIV12FTtLn00g==} + + '@vitest/runner@4.1.5': + resolution: {integrity: sha512-2D+o7Pr82IEO46YPpoA/YU0neeyr6FTerQb5Ro7BUnBuv6NQtT/kmVnczngiMEBhzgqz2UZYl5gArejsyERDSQ==} + + '@vitest/snapshot@4.1.5': + resolution: {integrity: sha512-zypXEt4KH/XgKGPUz4eC2AvErYx0My5hfL8oDb1HzGFpEk1P62bxSohdyOmvz+d9UJwanI68MKwr2EquOaOgMQ==} + + '@vitest/spy@4.1.5': + resolution: {integrity: sha512-2lNOsh6+R2Idnf1TCZqSwYlKN2E/iDlD8sgU59kYVl+OMDmvldO1VDk39smRfpUNwYpNRVn3w4YfuC7KfbBnkQ==} + + '@vitest/utils@4.1.5': + resolution: {integrity: sha512-76wdkrmfXfqGjueGgnb45ITPyUi1ycZ4IHgC2bhPDUfWHklY/q3MdLOAB+TF1e6xfl8NxNY0ZYaPCFNWSsw3Ug==} + + abort-controller@3.0.0: + resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} + engines: {node: '>=6.5'} + + accepts@2.0.0: + resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==} + engines: {node: '>= 0.6'} + + agent-base@7.1.4: + resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} + engines: {node: '>= 14'} + + ai@6.0.168: + resolution: {integrity: sha512-2HqCJuO+1V2aV7vfYs5LFEUfxbkGX+5oa54q/gCCTL7KLTdbxcCu5D7TdLA5kwsrs3Szgjah9q6D9tpjHM3hUQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + ajv-formats@3.0.1: + resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} + peerDependencies: + ajv: ^8.0.0 + peerDependenciesMeta: + ajv: + optional: true + + ajv@8.20.0: + resolution: {integrity: sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==} + + ansi-escapes@7.3.0: + resolution: {integrity: sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg==} + engines: {node: '>=18'} + + ansi-regex@6.2.2: + resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==} + engines: {node: '>=12'} + + ansi-styles@6.2.3: + resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==} + engines: {node: '>=12'} + + arrify@2.0.1: + resolution: {integrity: sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug==} + engines: {node: '>=8'} + + arrify@3.0.0: + resolution: {integrity: sha512-tLkvA81vQG/XqE2mjDkGQHoOINtMHtysSnemrmoGe6PydDPMRbVugqyk4A6V/WDWEfm3l+0d8anA9r8cv/5Jaw==} + engines: {node: '>=12'} + + asn1.js-rfc2560@5.0.1: + resolution: {integrity: sha512-1PrVg6kuBziDN3PGFmRk3QrjpKvP9h/Hv5yMrFZvC1kpzP6dQRzf5BpKstANqHBkaOUmTpakJWhicTATOA/SbA==} + peerDependencies: + asn1.js: ^5.0.0 + + asn1.js-rfc5280@3.0.0: + resolution: {integrity: sha512-Y2LZPOWeZ6qehv698ZgOGGCZXBQShObWnGthTrIFlIQjuV1gg2B8QOhWFRExq/MR1VnPpIIe7P9vX2vElxv+Pg==} + + asn1.js@5.4.1: + resolution: {integrity: sha512-+I//4cYPccV8LdmBLiX8CYvf9Sp3vQsrqu2QNXRcrbiWvcx/UdlFiqUJJzxRQxgsZmvhXhn4cSKeSmoFjVdupA==} + + assertion-error@2.0.1: + resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} + engines: {node: '>=12'} + + async@3.2.6: + resolution: {integrity: sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==} + + asynckit@0.4.0: + resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + + auto-bind@5.0.1: + resolution: {integrity: sha512-ooviqdwwgfIfNmDwo94wlshcdzfO64XV0Cg6oDsDYBJfITDz1EngD2z7DkbvCWn+XIMsIqW27sEVF6qcpJrRcg==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + aws-ssl-profiles@1.1.2: + resolution: {integrity: sha512-NZKeq9AfyQvEeNlN0zSYAaWrmBffJh3IELMZfRpJVWgrpEbtEpnjvzqBPf+mxoI287JohRDoa+/nsfqqiZmF6g==} + engines: {node: '>= 6.0.0'} + + axios@1.15.2: + resolution: {integrity: sha512-wLrXxPtcrPTsNlJmKjkPnNPK2Ihe0hn0wGSaTEiHRPxwjvJwT3hKmXF4dpqxmPO9SoNb2FsYXj/xEo0gHN+D5A==} + + balanced-match@1.0.2: + resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + + balanced-match@4.0.4: + resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} + engines: {node: 18 || 20 || >=22} + + base64-js@1.5.1: + resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + + better-sqlite3@12.9.0: + resolution: {integrity: sha512-wqUv4Gm3toFpHDQmaKD4QhZm3g1DjUBI0yzS4UBl6lElUmXFYdTQmmEDpAFa5o8FiFiymURypEnfVHzILKaxqQ==} + engines: {node: 20.x || 22.x || 23.x || 24.x || 25.x} + + big-integer@1.6.52: + resolution: {integrity: sha512-QxD8cf2eVqJOOz63z6JIN9BzvVs/dlySa5HGSBH5xtR8dPteIRQnBxxKqkNTiT6jbDTF6jAfrd4oMcND9RGbQg==} + engines: {node: '>=0.6'} + + big.js@7.0.1: + resolution: {integrity: sha512-iFgV784tD8kq4ccF1xtNMZnXeZzVuXWWM+ERFzKQjv+A5G9HC8CY3DuV45vgzFFcW+u2tIvmF95+AzWgs6BjCg==} + + bignumber.js@9.3.1: + resolution: {integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==} + + bindings@1.5.0: + resolution: {integrity: sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==} + + bl@4.1.0: + resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==} + + bl@6.1.6: + resolution: {integrity: sha512-jLsPgN/YSvPUg9UX0Kd73CXpm2Psg9FxMeCSXnk3WBO3CMT10JMwijubhGfHCnFu6TPn1ei3b975dxv7K2pWVg==} + + bluebird@3.7.2: + resolution: {integrity: sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==} + + bn.js@4.12.3: + resolution: {integrity: sha512-fGTi3gxV/23FTYdAoUtLYp6qySe2KE3teyZitipKNRuVYcBkoP/bB3guXN/XVKUe9mxCHXnc9C4ocyz8OmgN0g==} + + body-parser@2.2.2: + resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==} + engines: {node: '>=18'} + + bowser@2.14.1: + resolution: {integrity: sha512-tzPjzCxygAKWFOJP011oxFHs57HzIhOEracIgAePE4pqB3LikALKnSzUyU4MGs9/iCEUuHlAJTjTc5M+u7YEGg==} + + brace-expansion@1.1.14: + resolution: {integrity: sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==} + + brace-expansion@5.0.5: + resolution: {integrity: sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==} + engines: {node: 18 || 20 || >=22} + + browser-request@0.3.3: + resolution: {integrity: sha512-YyNI4qJJ+piQG6MMEuo7J3Bzaqssufx04zpEKYfSrl/1Op59HWali9zMtBpXnkmqMcOuWJPZvudrm9wISmnCbg==} + engines: {'0': node} + + buffer-equal-constant-time@1.0.1: + resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==} + + buffer@5.7.1: + resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} + + buffer@6.0.3: + resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==} + + bundle-name@4.1.0: + resolution: {integrity: sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==} + engines: {node: '>=18'} + + bytes@3.1.2: + resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} + engines: {node: '>= 0.8'} + + call-bind-apply-helpers@1.0.2: + resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} + engines: {node: '>= 0.4'} + + call-bound@1.0.4: + resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==} + engines: {node: '>= 0.4'} + + chai@6.2.2: + resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} + engines: {node: '>=18'} + + chalk@5.6.2: + resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} + engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} + + chownr@1.1.4: + resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} + + cli-boxes@4.0.1: + resolution: {integrity: sha512-5IOn+jcCEHEraYolBPs/sT4BxYCe2nHg374OPiItB1O96KZFseS2gthU4twyYzeDcFew4DaUM/xwc5BQf08JJw==} + engines: {node: '>=18.20 <19 || >=20.10'} + + cli-cursor@4.0.0: + resolution: {integrity: sha512-VGtlMu3x/4DOtIUwEkRezxUZ2lBacNJCHash0N0WeZDBS+7Ux1dm3XWAgWYxLJFMMdOeXMHXorshEFhbMSGelg==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + cli-truncate@6.0.0: + resolution: {integrity: sha512-3+YKIUFsohD9MIoOFPFBldjAlnfCmCDcqe6aYGFqlDTRKg80p4wg35L+j83QQ63iOlKRccEkbn8IuM++HsgEjA==} + engines: {node: '>=22'} + + code-excerpt@4.0.0: + resolution: {integrity: sha512-xxodCmBen3iy2i0WtAK8FlFNrRzjUqjRsMfho58xT/wvZU1YTM3fCnRjcy1gJPMepaRlgm/0e6w8SpWHpn3/cA==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + color-convert@3.1.3: + resolution: {integrity: sha512-fasDH2ont2GqF5HpyO4w0+BcewlhHEZOFn9c1ckZdHpJ56Qb7MHhH/IcJZbBGgvdtwdwNbLvxiBEdg336iA9Sg==} + engines: {node: '>=14.6'} + + color-name@2.1.0: + resolution: {integrity: sha512-1bPaDNFm0axzE4MEAzKPuqKWeRaT43U/hyxKPBdqTfmPF+d6n7FSoTFxLVULUJOmiLp01KjhIPPH+HrXZJN4Rg==} + engines: {node: '>=12.20'} + + color-string@2.1.4: + resolution: {integrity: sha512-Bb6Cq8oq0IjDOe8wJmi4JeNn763Xs9cfrBcaylK1tPypWzyoy2G3l90v9k64kjphl/ZJjPIShFztenRomi8WTg==} + engines: {node: '>=18'} + + color@5.0.3: + resolution: {integrity: sha512-ezmVcLR3xAVp8kYOm4GS45ZLLgIE6SPAFoduLr6hTDajwb3KZ2F46gulK3XpcwRFb5KKGCSezCBAY4Dw4HsyXA==} + engines: {node: '>=18'} + + combined-stream@1.0.8: + resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} + engines: {node: '>= 0.8'} + + commander@11.1.0: + resolution: {integrity: sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==} + engines: {node: '>=16'} + + commander@14.0.3: + resolution: {integrity: sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw==} + engines: {node: '>=20'} + + concat-map@0.0.1: + resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + + content-disposition@1.1.0: + resolution: {integrity: sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==} + engines: {node: '>=18'} + + content-type@1.0.5: + resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==} + engines: {node: '>= 0.6'} + + convert-source-map@2.0.0: + resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} + + convert-to-spaces@2.0.1: + resolution: {integrity: sha512-rcQ1bsQO9799wq24uE5AM2tAILy4gXGIK/njFWcVQkGNZ96edlpY+A7bjwvzjYvLDyzmG1MmMLZhpcsb+klNMQ==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + cookie-signature@1.2.2: + resolution: {integrity: sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==} + engines: {node: '>=6.6.0'} + + cookie@0.7.2: + resolution: {integrity: sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==} + engines: {node: '>= 0.6'} + + cors@2.8.6: + resolution: {integrity: sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==} + engines: {node: '>= 0.10'} + + cross-spawn@7.0.6: + resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} + engines: {node: '>= 8'} + + csstype@3.2.3: + resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==} + + data-uri-to-buffer@4.0.1: + resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==} + engines: {node: '>= 12'} + + debug@4.4.3: + resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} + engines: {node: '>=6.0'} + peerDependencies: + supports-color: '*' + peerDependenciesMeta: + supports-color: + optional: true + + decompress-response@6.0.0: + resolution: {integrity: sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==} + engines: {node: '>=10'} + + deep-extend@0.6.0: + resolution: {integrity: sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==} + engines: {node: '>=4.0.0'} + + default-browser-id@5.0.1: + resolution: {integrity: sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q==} + engines: {node: '>=18'} + + default-browser@5.5.0: + resolution: {integrity: sha512-H9LMLr5zwIbSxrmvikGuI/5KGhZ8E2zH3stkMgM5LpOWDutGM2JZaj460Udnf1a+946zc7YBgrqEWwbk7zHvGw==} + engines: {node: '>=18'} + + define-lazy-prop@3.0.0: + resolution: {integrity: sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg==} + engines: {node: '>=12'} + + delayed-stream@1.0.0: + resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} + engines: {node: '>=0.4.0'} + + denque@2.1.0: + resolution: {integrity: sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw==} + engines: {node: '>=0.10'} + + depd@2.0.0: + resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==} + engines: {node: '>= 0.8'} + + detect-libc@2.1.2: + resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} + engines: {node: '>=8'} + + dunder-proto@1.0.1: + resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} + engines: {node: '>= 0.4'} + + duplexify@4.1.3: + resolution: {integrity: sha512-M3BmBhwJRZsSx38lZyhE53Csddgzl5R7xGJNk7CVddZD6CcmwMCH8J+7AprIrQKH7TonKxaCjcv27Qmf+sQ+oA==} + + ecdsa-sig-formatter@1.0.11: + resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==} + + ee-first@1.1.1: + resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} + + enabled@2.0.0: + resolution: {integrity: sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ==} + + encodeurl@2.0.0: + resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==} + engines: {node: '>= 0.8'} + + end-of-stream@1.4.5: + resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==} + + environment@1.1.0: + resolution: {integrity: sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==} + engines: {node: '>=18'} + + es-define-property@1.0.1: + resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==} + engines: {node: '>= 0.4'} + + es-errors@1.3.0: + resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} + engines: {node: '>= 0.4'} + + es-module-lexer@2.1.0: + resolution: {integrity: sha512-n27zTYMjYu1aj4MjCWzSP7G9r75utsaoc8m61weK+W8JMBGGQybd43GstCXZ3WNmSFtGT9wi59qQTW6mhTR5LQ==} + + es-object-atoms@1.1.1: + resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==} + engines: {node: '>= 0.4'} + + es-set-tostringtag@2.1.0: + resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==} + engines: {node: '>= 0.4'} + + es-toolkit@1.46.1: + resolution: {integrity: sha512-5eNtXOs3tbfxXOj04tjjseeWkRWaoCjdEI+96DgwzZoe6c9juL49pXlzAFTI72aWC9Y8p7168g6XIKjh7k6pyQ==} + + esbuild@0.27.7: + resolution: {integrity: sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==} + engines: {node: '>=18'} + hasBin: true + + escape-html@1.0.3: + resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==} + + escape-string-regexp@2.0.0: + resolution: {integrity: sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==} + engines: {node: '>=8'} + + estree-walker@3.0.3: + resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + + etag@1.8.1: + resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==} + engines: {node: '>= 0.6'} + + event-target-shim@5.0.1: + resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} + engines: {node: '>=6'} + + events@3.3.0: + resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==} + engines: {node: '>=0.8.x'} + + eventsource-parser@3.0.8: + resolution: {integrity: sha512-70QWGkr4snxr0OXLRWsFLeRBIRPuQOvt4s8QYjmUlmlkyTZkRqS7EDVRZtzU3TiyDbXSzaOeF0XUKy8PchzukQ==} + engines: {node: '>=18.0.0'} + + eventsource@3.0.7: + resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==} + engines: {node: '>=18.0.0'} + + expand-template@2.0.3: + resolution: {integrity: sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==} + engines: {node: '>=6'} + + expand-tilde@2.0.2: + resolution: {integrity: sha512-A5EmesHW6rfnZ9ysHQjPdJRni0SRar0tjtG5MNtm9n5TUvsYU8oozprtRD4AqHxcZWWlVuAmQo2nWKfN9oyjTw==} + engines: {node: '>=0.10.0'} + + expect-type@1.3.0: + resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==} + engines: {node: '>=12.0.0'} + + express-rate-limit@8.4.1: + resolution: {integrity: sha512-NGVYwQSAyEQgzxX1iCM978PP9AdO/hW93gMcF6ZwQCm+rFvLsBH6w4xcXWTcliS8La5EPRN3p9wzItqBwJrfNw==} + engines: {node: '>= 16'} + peerDependencies: + express: '>= 4.11' + + express@5.2.1: + resolution: {integrity: sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==} + engines: {node: '>= 18'} + + extend@3.0.2: + resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} + + fast-deep-equal@3.1.3: + resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} + + fast-string-truncated-width@3.0.3: + resolution: {integrity: sha512-0jjjIEL6+0jag3l2XWWizO64/aZVtpiGE3t0Zgqxv0DPuxiMjvB3M24fCyhZUO4KomJQPj3LTSUnDP3GpdwC0g==} + + fast-string-width@3.0.2: + resolution: {integrity: sha512-gX8LrtNEI5hq8DVUfRQMbr5lpaS4nMIWV+7XEbXk2b8kiQIizgnlr12B4dA3ZEx3308ze0O4Q1R+cHts8kyUJg==} + + fast-uri@3.1.0: + resolution: {integrity: sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==} + + fast-wrap-ansi@0.2.0: + resolution: {integrity: sha512-rLV8JHxTyhVmFYhBJuMujcrHqOT2cnO5Zxj37qROj23CP39GXubJRBUFF0z8KFK77Uc0SukZUf7JZhsVEQ6n8w==} + + fast-xml-builder@1.1.5: + resolution: {integrity: sha512-4TJn/8FKLeslLAH3dnohXqE3QSoxkhvaMzepOIZytwJXZO69Bfz0HBdDHzOTOon6G59Zrk6VQ2bEiv1t61rfkA==} + + fast-xml-parser@5.7.2: + resolution: {integrity: sha512-P7oW7tLbYnhOLQk/Gv7cZgzgMPP/XN03K02/Jy6Y/NHzyIAIpxuZIM/YqAkfiXFPxA2CTm7NtCijK9EDu09u2w==} + hasBin: true + + fastest-levenshtein@1.0.16: + resolution: {integrity: sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==} + engines: {node: '>= 4.9.1'} + + fdir@6.5.0: + resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} + engines: {node: '>=12.0.0'} + peerDependencies: + picomatch: ^3 || ^4 + peerDependenciesMeta: + picomatch: + optional: true + + fecha@4.2.3: + resolution: {integrity: sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw==} + + fetch-blob@3.2.0: + resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==} + engines: {node: ^12.20 || >= 14.13} + + file-uri-to-path@1.0.0: + resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==} + + finalhandler@2.1.1: + resolution: {integrity: sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==} + engines: {node: '>= 18.0.0'} + + fn.name@1.1.0: + resolution: {integrity: sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==} + + follow-redirects@1.16.0: + resolution: {integrity: sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==} + engines: {node: '>=4.0'} + peerDependencies: + debug: '*' + peerDependenciesMeta: + debug: + optional: true + + form-data@4.0.5: + resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} + engines: {node: '>= 6'} + + formdata-polyfill@4.0.10: + resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} + engines: {node: '>=12.20.0'} + + forwarded@0.2.0: + resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==} + engines: {node: '>= 0.6'} + + fresh@2.0.0: + resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==} + engines: {node: '>= 0.8'} + + fs-constants@1.0.0: + resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==} + + fs.realpath@1.0.0: + resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + + fsevents@2.3.3: + resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + + function-bind@1.1.2: + resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + + gaxios@7.1.4: + resolution: {integrity: sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==} + engines: {node: '>=18'} + + gcp-metadata@8.1.2: + resolution: {integrity: sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==} + engines: {node: '>=18'} + + generate-function@2.3.1: + resolution: {integrity: sha512-eeB5GfMNeevm/GRYq20ShmsaGcmI81kIX2K9XQx5miC8KdHaC6Jm0qQ8ZNeGOi7wYB8OsdxKs+Y2oVuTFuVwKQ==} + + generic-pool@3.9.0: + resolution: {integrity: sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==} + engines: {node: '>= 4'} + + get-east-asian-width@1.5.0: + resolution: {integrity: sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA==} + engines: {node: '>=18'} + + get-intrinsic@1.3.0: + resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==} + engines: {node: '>= 0.4'} + + get-proto@1.0.1: + resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} + engines: {node: '>= 0.4'} + + github-from-package@0.0.0: + resolution: {integrity: sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==} + + glob@7.2.3: + resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me + + google-auth-library@10.6.2: + resolution: {integrity: sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==} + engines: {node: '>=18'} + + google-logging-utils@1.1.3: + resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==} + engines: {node: '>=14'} + + gopd@1.2.0: + resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} + engines: {node: '>= 0.4'} + + handlebars@4.7.9: + resolution: {integrity: sha512-4E71E0rpOaQuJR2A3xDZ+GM1HyWYv1clR58tC8emQNeQe3RH7MAzSbat+V0wG78LQBo6m6bzSG/L4pBuCsgnUQ==} + engines: {node: '>=0.4.7'} + hasBin: true + + has-symbols@1.1.0: + resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==} + engines: {node: '>= 0.4'} + + has-tostringtag@1.0.2: + resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==} + engines: {node: '>= 0.4'} + + hasown@2.0.3: + resolution: {integrity: sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==} + engines: {node: '>= 0.4'} + + homedir-polyfill@1.0.3: + resolution: {integrity: sha512-eSmmWE5bZTK2Nou4g0AI3zZ9rswp7GRKoKXS1BLUkvPviOqs4YTN1djQIqrXy9k5gEtdLPy86JjRwsNM9tnDcA==} + engines: {node: '>=0.10.0'} + + hono@4.12.15: + resolution: {integrity: sha512-qM0jDhFEaCBb4TxoW7f53Qrpv9RBiayUHo0S52JudprkhvpjIrGoU1mnnr29Fvd1U335ZFPZQY1wlkqgfGXyLg==} + engines: {node: '>=16.9.0'} + + html-entities@2.6.0: + resolution: {integrity: sha512-kig+rMn/QOVRvr7c86gQ8lWXq+Hkv6CbAH1hLu+RG338StTpE8Z0b44SDVaqVu7HGKf27frdmUYEs9hTUX/cLQ==} + + http-errors@2.0.1: + resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==} + engines: {node: '>= 0.8'} + + http-proxy-agent@7.0.2: + resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} + engines: {node: '>= 14'} + + https-proxy-agent@7.0.6: + resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} + engines: {node: '>= 14'} + + iconv-lite@0.7.2: + resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==} + engines: {node: '>=0.10.0'} + + ieee754@1.2.1: + resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} + + indent-string@5.0.0: + resolution: {integrity: sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==} + engines: {node: '>=12'} + + inflight@1.0.6: + resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==} + deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful. + + inherits@2.0.4: + resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + + ini@1.3.8: + resolution: {integrity: sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==} + + ini@5.0.0: + resolution: {integrity: sha512-+N0ngpO3e7cRUWOJAS7qw0IZIVc6XPrW4MlFBdD066F2L4k1L6ker3hLqSq7iXxU5tgS4WGkIUElWn5vogAEnw==} + engines: {node: ^18.17.0 || >=20.5.0} + + ink-testing-library@4.0.0: + resolution: {integrity: sha512-yF92kj3pmBvk7oKbSq5vEALO//o7Z9Ck/OaLNlkzXNeYdwfpxMQkSowGTFUCS5MSu9bWfSZMewGpp7bFc66D7Q==} + engines: {node: '>=18'} + peerDependencies: + '@types/react': '>=18.0.0' + peerDependenciesMeta: + '@types/react': + optional: true + + ink@7.0.1: + resolution: {integrity: sha512-o6LAC268PLawlGVYrXTyaTfke4VtJftEheuwbgkQf7yvSXyWp1nRwBbAyKEkWXFZZsW/la5wrMuNbuBvZK2C1w==} + engines: {node: '>=22'} + peerDependencies: + '@types/react': '>=19.2.0' + react: '>=19.2.0' + react-devtools-core: '>=6.1.2' + peerDependenciesMeta: + '@types/react': + optional: true + react-devtools-core: + optional: true + + ip-address@10.1.0: + resolution: {integrity: sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==} + engines: {node: '>= 12'} + + ipaddr.js@1.9.1: + resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==} + engines: {node: '>= 0.10'} + + is-docker@2.2.1: + resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==} + engines: {node: '>=8'} + hasBin: true + + is-docker@3.0.0: + resolution: {integrity: sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + hasBin: true + + is-fullwidth-code-point@5.1.0: + resolution: {integrity: sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==} + engines: {node: '>=18'} + + is-in-ci@2.0.0: + resolution: {integrity: sha512-cFeerHriAnhrQSbpAxL37W1wcJKUUX07HyLWZCW1URJT/ra3GyUTzBgUnh24TMVfNTV2Hij2HLxkPHFZfOZy5w==} + engines: {node: '>=20'} + hasBin: true + + is-inside-container@1.0.0: + resolution: {integrity: sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA==} + engines: {node: '>=14.16'} + hasBin: true + + is-promise@4.0.0: + resolution: {integrity: sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==} + + is-property@1.0.2: + resolution: {integrity: sha512-Ks/IoX00TtClbGQr4TWXemAnktAQvYB7HzcCxDGqEZU6oCmb2INHuOoKxbtR+HFkmYWBKv/dOZtGRiAjDhj92g==} + + is-stream@2.0.1: + resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==} + engines: {node: '>=8'} + + is-wsl@2.2.0: + resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==} + engines: {node: '>=8'} + + is-wsl@3.1.1: + resolution: {integrity: sha512-e6rvdUCiQCAuumZslxRJWR/Doq4VpPR82kqclvcS0efgt430SlGIk05vdCN58+VrzgtIcfNODjozVielycD4Sw==} + engines: {node: '>=16'} + + isexe@2.0.0: + resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + + jose@6.2.2: + resolution: {integrity: sha512-d7kPDd34KO/YnzaDOlikGpOurfF0ByC2sEV4cANCtdqLlTfBlw2p14O/5d/zv40gJPbIQxfES3nSx1/oYNyuZQ==} + + js-md4@0.3.2: + resolution: {integrity: sha512-/GDnfQYsltsjRswQhN9fhv3EMw2sCpUdrdxyWDOUK7eyD++r3gRhzgiQgc/x4MAv2i1iuQ4lxO5mvqM3vj4bwA==} + + json-bigint@1.0.0: + resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} + + json-schema-traverse@1.0.0: + resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==} + + json-schema-typed@8.0.2: + resolution: {integrity: sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==} + + json-schema@0.4.0: + resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==} + + jsonwebtoken@9.0.3: + resolution: {integrity: sha512-MT/xP0CrubFRNLNKvxJ2BYfy53Zkm++5bX9dtuPbqAeQpTVe0MQTFhao8+Cp//EmJp244xt6Drw/GVEGCUj40g==} + engines: {node: '>=12', npm: '>=6'} + + jwa@2.0.1: + resolution: {integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==} + + jws@4.0.1: + resolution: {integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==} + + kuler@2.0.0: + resolution: {integrity: sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==} + + lightningcss-android-arm64@1.32.0: + resolution: {integrity: sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [android] + + lightningcss-darwin-arm64@1.32.0: + resolution: {integrity: sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [darwin] + + lightningcss-darwin-x64@1.32.0: + resolution: {integrity: sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [darwin] + + lightningcss-freebsd-x64@1.32.0: + resolution: {integrity: sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [freebsd] + + lightningcss-linux-arm-gnueabihf@1.32.0: + resolution: {integrity: sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==} + engines: {node: '>= 12.0.0'} + cpu: [arm] + os: [linux] + + lightningcss-linux-arm64-gnu@1.32.0: + resolution: {integrity: sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [linux] + + lightningcss-linux-arm64-musl@1.32.0: + resolution: {integrity: sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [linux] + + lightningcss-linux-x64-gnu@1.32.0: + resolution: {integrity: sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [linux] + + lightningcss-linux-x64-musl@1.32.0: + resolution: {integrity: sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [linux] + + lightningcss-win32-arm64-msvc@1.32.0: + resolution: {integrity: sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [win32] + + lightningcss-win32-x64-msvc@1.32.0: + resolution: {integrity: sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [win32] + + lightningcss@1.32.0: + resolution: {integrity: sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==} + engines: {node: '>= 12.0.0'} + + lodash.includes@4.3.0: + resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==} + + lodash.isboolean@3.0.3: + resolution: {integrity: sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==} + + lodash.isinteger@4.0.4: + resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==} + + lodash.isnumber@3.0.3: + resolution: {integrity: sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==} + + lodash.isplainobject@4.0.6: + resolution: {integrity: sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==} + + lodash.isstring@4.0.1: + resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==} + + lodash.once@4.1.1: + resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==} + + logform@2.7.0: + resolution: {integrity: sha512-TFYA4jnP7PVbmlBIfhlSe+WKxs9dklXMTEGcBCIvLhE/Tn3H6Gk1norupVW7m5Cnd4bLcr08AytbyV/xj7f/kQ==} + engines: {node: '>= 12.0.0'} + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + + lookml-parser@7.1.0: + resolution: {integrity: sha512-gaHQ8h3ixOar8OPrv1rY4rZ5S3Tm8+SGn4oQjP7Km1/16u0BOaK4vXeguqsYLQHlmbaxv7yr3jUXINxFNZ1r/Q==} + hasBin: true + peerDependencies: + js-yaml: ^4.1.0 + peerDependenciesMeta: + js-yaml: + optional: true + + lru.min@1.1.4: + resolution: {integrity: sha512-DqC6n3QQ77zdFpCMASA1a3Jlb64Hv2N2DciFGkO/4L9+q/IpIAuRlKOvCXabtRW6cQf8usbmM6BE/TOPysCdIA==} + engines: {bun: '>=1.0.0', deno: '>=1.30.0', node: '>=8.0.0'} + + magic-string@0.30.21: + resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} + + math-intrinsics@1.1.0: + resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} + engines: {node: '>= 0.4'} + + media-typer@1.1.0: + resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==} + engines: {node: '>= 0.8'} + + merge-descriptors@2.0.0: + resolution: {integrity: sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==} + engines: {node: '>=18'} + + mime-db@1.52.0: + resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} + engines: {node: '>= 0.6'} + + mime-db@1.54.0: + resolution: {integrity: sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==} + engines: {node: '>= 0.6'} + + mime-types@2.1.35: + resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} + engines: {node: '>= 0.6'} + + mime-types@3.0.2: + resolution: {integrity: sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==} + engines: {node: '>=18'} + + mimic-fn@2.1.0: + resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==} + engines: {node: '>=6'} + + mimic-response@3.1.0: + resolution: {integrity: sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==} + engines: {node: '>=10'} + + minimalistic-assert@1.0.1: + resolution: {integrity: sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==} + + minimatch@10.2.5: + resolution: {integrity: sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==} + engines: {node: 18 || 20 || >=22} + + minimatch@3.1.5: + resolution: {integrity: sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==} + + minimist@1.2.8: + resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} + + mkdirp-classic@0.5.3: + resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} + + moment-timezone@0.5.48: + resolution: {integrity: sha512-f22b8LV1gbTO2ms2j2z13MuPogNoh5UzxL3nzNAYKGraILnbGc9NEE6dyiiiLv46DGRb8A4kg8UKWLjPthxBHw==} + + moment@2.30.1: + resolution: {integrity: sha512-uEmtNhbDOrWPFS+hdjFCBfy9f2YoyzRpwcl+DqpC6taX21FzsTLQVbMV/W7PzNSX6x/bhC1zA3c2UQ5NzH6how==} + + ms@2.1.3: + resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + + mssql@12.5.0: + resolution: {integrity: sha512-nTbhxS1qi5SPwuKygwfRzmp2p6e/2v37ZFzvwvMf27wRSI+09J7J2pP7zaAUzqT4znMyHYBrcUyxkjSeeNyDTg==} + engines: {node: '>=18.19.0'} + hasBin: true + + mysql2@3.22.3: + resolution: {integrity: sha512-uWWxvZSRvRhtBdh2CdcuK83YcOfPdmEeEYB069bAmPnV93QApDGVPuvCQOLjlh7tYHEWdgQPrn6kosDxHBVLkA==} + engines: {node: '>= 8.0'} + peerDependencies: + '@types/node': ^24.3.0 + + named-placeholders@1.1.6: + resolution: {integrity: sha512-Tz09sEL2EEuv5fFowm419c1+a/jSMiBjI9gHxVLrVdbUkkNUUfjsVYs9pVZu5oCon/kmRh9TfLEObFtkVxmY0w==} + engines: {node: '>=8.0.0'} + + nanoid@3.3.11: + resolution: {integrity: sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==} + engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + hasBin: true + + napi-build-utils@2.0.0: + resolution: {integrity: sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==} + + native-duplexpair@1.0.0: + resolution: {integrity: sha512-E7QQoM+3jvNtlmyfqRZ0/U75VFgCls+fSkbml2MpgWkWyz3ox8Y58gNhfuziuQYGNNQAbFZJQck55LHCnCK6CA==} + + negotiator@1.0.0: + resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==} + engines: {node: '>= 0.6'} + + neo-async@2.6.2: + resolution: {integrity: sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==} + + node-abi@3.89.0: + resolution: {integrity: sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==} + engines: {node: '>=10'} + + node-domexception@1.0.0: + resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} + engines: {node: '>=10.5.0'} + deprecated: Use your platform's native DOMException instead + + node-fetch@3.3.2: + resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + oauth4webapi@3.8.6: + resolution: {integrity: sha512-iwemM91xz8nryHti2yTmg5fhyEMVOkOXwHNqbvcATjyajb5oQxCQzrNOA6uElRHuMhQQTKUyFKV9y/CNyg25BQ==} + + object-assign@4.1.1: + resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} + engines: {node: '>=0.10.0'} + + object-inspect@1.13.4: + resolution: {integrity: sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==} + engines: {node: '>= 0.4'} + + obug@2.1.1: + resolution: {integrity: sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==} + + on-finished@2.4.1: + resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==} + engines: {node: '>= 0.8'} + + once@1.4.0: + resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + + one-time@1.0.0: + resolution: {integrity: sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g==} + + onetime@5.1.2: + resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} + engines: {node: '>=6'} + + open@10.2.0: + resolution: {integrity: sha512-YgBpdJHPyQ2UE5x+hlSXcnejzAvD0b22U2OuAP+8OnlJT+PjWPxtgmGqKKc+RgTM63U9gN0YzrYc71R2WT/hTA==} + engines: {node: '>=18'} + + open@7.4.2: + resolution: {integrity: sha512-MVHddDVweXZF3awtlAS+6pgKLlm/JgxZ90+/NBurBoQctVOOB/zDdVjcyPzQ+0laDGbsWgrRkflI65sQeOgT9Q==} + engines: {node: '>=8'} + + openai@6.35.0: + resolution: {integrity: sha512-L/skwIGnt5xQZHb0UfTu9uAUKbis3ehKypOuJKi20QvG7UStV6C8IC3myGYHcdiF4kms/bAvOJ9UqqNWqi8x/Q==} + hasBin: true + peerDependencies: + ws: ^8.18.0 + zod: ^3.25 || ^4.0 + peerDependenciesMeta: + ws: + optional: true + zod: + optional: true + + p-limit@7.3.0: + resolution: {integrity: sha512-7cIXg/Z0M5WZRblrsOla88S4wAK+zOQQWeBYfV3qJuJXMr+LnbYjaadrFaS0JILfEDPVqHyKnZ1Z/1d6J9VVUw==} + engines: {node: '>=20'} + + parse-passwd@1.0.0: + resolution: {integrity: sha512-1Y1A//QUXEZK7YKz+rD9WydcE1+EuPr6ZBgKecAB8tmoW6UFv0NREVJe1p+jRxtThkcbbKkfwIbWJe/IeE6m2Q==} + engines: {node: '>=0.10.0'} + + parseurl@1.3.3: + resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} + engines: {node: '>= 0.8'} + + patch-console@2.0.0: + resolution: {integrity: sha512-0YNdUceMdaQwoKce1gatDScmMo5pu/tfABfnzEqeG0gtTmd7mh/WcwgUjtAeOU7N8nFFlbQBnFK2gXW5fGvmMA==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + path-expression-matcher@1.5.0: + resolution: {integrity: sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ==} + engines: {node: '>=14.0.0'} + + path-is-absolute@1.0.1: + resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} + engines: {node: '>=0.10.0'} + + path-key@3.1.1: + resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} + engines: {node: '>=8'} + + path-to-regexp@8.4.2: + resolution: {integrity: sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==} + + pathe@2.0.3: + resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} + + pegjs@0.10.0: + resolution: {integrity: sha512-qI5+oFNEGi3L5HAxDwN2LA4Gg7irF70Zs25edhjld9QemOgp0CbvMtbFcMvFtEo1OityPrcCzkQFB8JP/hxgow==} + engines: {node: '>=0.10'} + hasBin: true + + pg-cloudflare@1.3.0: + resolution: {integrity: sha512-6lswVVSztmHiRtD6I8hw4qP/nDm1EJbKMRhf3HCYaqud7frGysPv7FYJ5noZQdhQtN2xJnimfMtvQq21pdbzyQ==} + + pg-connection-string@2.12.0: + resolution: {integrity: sha512-U7qg+bpswf3Cs5xLzRqbXbQl85ng0mfSV/J0nnA31MCLgvEaAo7CIhmeyrmJpOr7o+zm0rXK+hNnT5l9RHkCkQ==} + + pg-int8@1.0.1: + resolution: {integrity: sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==} + engines: {node: '>=4.0.0'} + + pg-pool@3.13.0: + resolution: {integrity: sha512-gB+R+Xud1gLFuRD/QgOIgGOBE2KCQPaPwkzBBGC9oG69pHTkhQeIuejVIk3/cnDyX39av2AxomQiyPT13WKHQA==} + peerDependencies: + pg: '>=8.0' + + pg-protocol@1.13.0: + resolution: {integrity: sha512-zzdvXfS6v89r6v7OcFCHfHlyG/wvry1ALxZo4LqgUoy7W9xhBDMaqOuMiF3qEV45VqsN6rdlcehHrfDtlCPc8w==} + + pg-types@2.2.0: + resolution: {integrity: sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==} + engines: {node: '>=4'} + + pg@8.20.0: + resolution: {integrity: sha512-ldhMxz2r8fl/6QkXnBD3CR9/xg694oT6DZQ2s6c/RI28OjtSOpxnPrUCGOBJ46RCUxcWdx3p6kw/xnDHjKvaRA==} + engines: {node: '>= 16.0.0'} + peerDependencies: + pg-native: '>=3.0.1' + peerDependenciesMeta: + pg-native: + optional: true + + pgpass@1.0.5: + resolution: {integrity: sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==} + + picocolors@1.1.1: + resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} + + picomatch@4.0.4: + resolution: {integrity: sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==} + engines: {node: '>=12'} + + pkce-challenge@5.0.1: + resolution: {integrity: sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==} + engines: {node: '>=16.20.0'} + + postcss@8.5.12: + resolution: {integrity: sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==} + engines: {node: ^10 || ^12 || >=14} + + postgres-array@2.0.0: + resolution: {integrity: sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==} + engines: {node: '>=4'} + + postgres-bytea@1.0.1: + resolution: {integrity: sha512-5+5HqXnsZPE65IJZSMkZtURARZelel2oXUEO8rH83VS/hxH5vv1uHquPg5wZs8yMAfdv971IU+kcPUczi7NVBQ==} + engines: {node: '>=0.10.0'} + + postgres-date@1.0.7: + resolution: {integrity: sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==} + engines: {node: '>=0.10.0'} + + postgres-interval@1.2.0: + resolution: {integrity: sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==} + engines: {node: '>=0.10.0'} + + prebuild-install@7.1.3: + resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} + engines: {node: '>=10'} + deprecated: No longer maintained. Please contact the author of the relevant native addon; alternatives are available. + hasBin: true + + process@0.11.10: + resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==} + engines: {node: '>= 0.6.0'} + + proxy-addr@2.0.7: + resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==} + engines: {node: '>= 0.10'} + + proxy-from-env@2.1.0: + resolution: {integrity: sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA==} + engines: {node: '>=10'} + + pump@3.0.4: + resolution: {integrity: sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==} + + qs@6.15.1: + resolution: {integrity: sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg==} + engines: {node: '>=0.6'} + + range-parser@1.2.1: + resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==} + engines: {node: '>= 0.6'} + + raw-body@3.0.2: + resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==} + engines: {node: '>= 0.10'} + + rc@1.2.8: + resolution: {integrity: sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==} + hasBin: true + + react-reconciler@0.33.0: + resolution: {integrity: sha512-KetWRytFv1epdpJc3J4G75I4WrplZE5jOL7Yq0p34+OVOKF4Se7WrdIdVC45XsSSmUTlht2FM/fM1FZb1mfQeA==} + engines: {node: '>=0.10.0'} + peerDependencies: + react: ^19.2.0 + + react@19.2.5: + resolution: {integrity: sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==} + engines: {node: '>=0.10.0'} + + readable-stream@3.6.2: + resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} + engines: {node: '>= 6'} + + readable-stream@4.7.0: + resolution: {integrity: sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==} + engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} + + require-from-string@2.0.2: + resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} + engines: {node: '>=0.10.0'} + + restore-cursor@4.0.0: + resolution: {integrity: sha512-I9fPXU9geO9bHOt9pHHOhOkYerIMsmVaWB0rA2AI9ERh/+x/i7MV5HKBNrg+ljO5eoPVgCcnFuRjJ9uH6I/3eg==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + + retry-request@8.0.2: + resolution: {integrity: sha512-JzFPAfklk1kjR1w76f0QOIhoDkNkSqW8wYKT08n9yysTmZfB+RQ2QoXoTAeOi1HD9ZipTyTAZg3c4pM/jeqgSw==} + engines: {node: '>=18'} + + rolldown@1.0.0-rc.17: + resolution: {integrity: sha512-ZrT53oAKrtA4+YtBWPQbtPOxIbVDbxT0orcYERKd63VJTF13zPcgXTvD4843L8pcsI7M6MErt8QtON6lrB9tyA==} + engines: {node: ^20.19.0 || >=22.12.0} + hasBin: true + + router@2.2.0: + resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==} + engines: {node: '>= 18'} + + run-applescript@7.1.0: + resolution: {integrity: sha512-DPe5pVFaAsinSaV6QjQ6gdiedWDcRCbUuiQfQa2wmWV7+xC9bGulGI8+TdRmoFkAPaBXk8CrAbnlY2ISniJ47Q==} + engines: {node: '>=18'} + + safe-buffer@5.2.1: + resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} + + safe-stable-stringify@2.5.0: + resolution: {integrity: sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==} + engines: {node: '>=10'} + + safer-buffer@2.1.2: + resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} + + scheduler@0.27.0: + resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==} + + semver@7.7.4: + resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==} + engines: {node: '>=10'} + hasBin: true + + send@1.2.1: + resolution: {integrity: sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==} + engines: {node: '>= 18'} + + serve-static@2.2.1: + resolution: {integrity: sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==} + engines: {node: '>= 18'} + + setprototypeof@1.2.0: + resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==} + + shebang-command@2.0.0: + resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==} + engines: {node: '>=8'} + + shebang-regex@3.0.0: + resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==} + engines: {node: '>=8'} + + side-channel-list@1.0.1: + resolution: {integrity: sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==} + engines: {node: '>= 0.4'} + + side-channel-map@1.0.1: + resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==} + engines: {node: '>= 0.4'} + + side-channel-weakmap@1.0.2: + resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==} + engines: {node: '>= 0.4'} + + side-channel@1.1.0: + resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==} + engines: {node: '>= 0.4'} + + siginfo@2.0.0: + resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==} + + signal-exit@3.0.7: + resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} + + simple-concat@1.0.1: + resolution: {integrity: sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==} + + simple-get@4.0.1: + resolution: {integrity: sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==} + + simple-git@3.32.2: + resolution: {integrity: sha512-n/jhNmvYh8dwyfR6idSfpXrFazuyd57jwNMzgjGnKZV/1lTh0HKvPq20v4AQ62rP+l19bWjjXPTCdGHMt0AdrQ==} + + simple-lru-cache@0.0.2: + resolution: {integrity: sha512-uEv/AFO0ADI7d99OHDmh1QfYzQk/izT1vCmu/riQfh7qjBVUUgRT87E5s5h7CxWCA/+YoZerykpEthzVrW3LIw==} + + sisteransi@1.0.5: + resolution: {integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==} + + slice-ansi@9.0.0: + resolution: {integrity: sha512-SO/3iYL5S3W57LLEniscOGPZgOqZUPCx6d3dB+52B80yJ0XstzsC/eV8gnA4tM3MHDrKz+OCFSLNjswdSC+/bA==} + engines: {node: '>=22'} + + snowflake-sdk@2.4.0: + resolution: {integrity: sha512-0nEQoGMPpCpe1Rvj9tlBp0z4QbOCxfyUdRXyFPKRDneR3ok7qNnlgUXEgldvryolUwSRNbsqyjRC4AyPdIyezg==} + engines: {node: '>=18'} + peerDependencies: + asn1.js: ^5.4.1 + + source-map-js@1.2.1: + resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} + engines: {node: '>=0.10.0'} + + source-map@0.6.1: + resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==} + engines: {node: '>=0.10.0'} + + split2@4.2.0: + resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==} + engines: {node: '>= 10.x'} + + sprintf-js@1.1.3: + resolution: {integrity: sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==} + + sql-escaper@1.3.3: + resolution: {integrity: sha512-BsTCV265VpTp8tm1wyIm1xqQCS+Q9NHx2Sr+WcnUrgLrQ6yiDIvHYJV5gHxsj1lMBy2zm5twLaZao8Jd+S8JJw==} + engines: {bun: '>=1.0.0', deno: '>=2.0.0', node: '>=12.0.0'} + + stack-trace@0.0.10: + resolution: {integrity: sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg==} + + stack-utils@2.0.6: + resolution: {integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==} + engines: {node: '>=10'} + + stackback@0.0.2: + resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==} + + statuses@2.0.2: + resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==} + engines: {node: '>= 0.8'} + + std-env@4.1.0: + resolution: {integrity: sha512-Rq7ybcX2RuC55r9oaPVEW7/xu3tj8u4GeBYHBWCychFtzMIr86A7e3PPEBPT37sHStKX3+TiX/Fr/ACmJLVlLQ==} + + stream-events@1.0.5: + resolution: {integrity: sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg==} + + stream-shift@1.0.3: + resolution: {integrity: sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==} + + string-width@8.2.1: + resolution: {integrity: sha512-IIaP0g3iy9Cyy18w3M9YcaDudujEAVHKt3a3QJg1+sr/oX96TbaGUubG0hJyCjCBThFH+tFpcIyoUHUn1ogaLA==} + engines: {node: '>=20'} + + string_decoder@1.3.0: + resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} + + strip-ansi@7.1.2: + resolution: {integrity: sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==} + engines: {node: '>=12'} + + strip-json-comments@2.0.1: + resolution: {integrity: sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==} + engines: {node: '>=0.10.0'} + + strnum@2.2.3: + resolution: {integrity: sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg==} + + stubs@3.0.0: + resolution: {integrity: sha512-PdHt7hHUJKxvTCgbKX9C1V/ftOcjJQgz8BZwNfV5c4B6dcGqlpelTbJ999jBGZ2jYiPAwcX5dP6oBwVlBlUbxw==} + + tagged-tag@1.0.0: + resolution: {integrity: sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng==} + engines: {node: '>=20'} + + tar-fs@2.1.4: + resolution: {integrity: sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==} + + tar-stream@2.2.0: + resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==} + engines: {node: '>=6'} + + tarn@3.0.2: + resolution: {integrity: sha512-51LAVKUSZSVfI05vjPESNc5vwqqZpbXCsU+/+wxlOrUjk2SnFTt97v9ZgQrD4YmxYW1Px6w2KjaDitCfkvgxMQ==} + engines: {node: '>=8.0.0'} + + tedious@19.2.1: + resolution: {integrity: sha512-pk1Q16Yl62iocuQB+RWbg6rFUFkIyzqOFQ6NfysCltRvQqKwfurgj8v/f2X+CKvDhSL4IJ0cCOfCHDg9PWEEYA==} + engines: {node: '>=18.17'} + + teeny-request@10.1.2: + resolution: {integrity: sha512-Xj0ZAQ0CeuQn6UxCDPLbFRlgcSTUEyO3+wiepr2grjIjyL/lMMs1Z4OwXn8kLvn/V1OuaEP0UY7Na6UDNNsYrQ==} + engines: {node: '>=18'} + + terminal-size@4.0.1: + resolution: {integrity: sha512-avMLDQpUI9I5XFrklECw1ZEUPJhqzcwSWsyyI8blhRLT+8N1jLJWLWWYQpB2q2xthq8xDvjZPISVh53T/+CLYQ==} + engines: {node: '>=18'} + + text-hex@1.0.0: + resolution: {integrity: sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==} + + tinybench@2.9.0: + resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} + + tinyexec@1.1.1: + resolution: {integrity: sha512-VKS/ZaQhhkKFMANmAOhhXVoIfBXblQxGX1myCQ2faQrfmobMftXeJPcZGp0gS07ocvGJWDLZGyOZDadDBqYIJg==} + engines: {node: '>=18'} + + tinyglobby@0.2.16: + resolution: {integrity: sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==} + engines: {node: '>=12.0.0'} + + tinyrainbow@3.1.0: + resolution: {integrity: sha512-Bf+ILmBgretUrdJxzXM0SgXLZ3XfiaUuOj/IKQHuTXip+05Xn+uyEYdVg0kYDipTBcLrCVyUzAPz7QmArb0mmw==} + engines: {node: '>=14.0.0'} + + toidentifier@1.0.1: + resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==} + engines: {node: '>=0.6'} + + toml@3.0.0: + resolution: {integrity: sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w==} + + triple-beam@1.4.1: + resolution: {integrity: sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==} + engines: {node: '>= 14.0.0'} + + tslib@2.8.1: + resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + + tunnel-agent@0.6.0: + resolution: {integrity: sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==} + + type-fest@5.6.0: + resolution: {integrity: sha512-8ZiHFm91orbSAe2PSAiSVBVko18pbhbiB3U9GglSzF/zCGkR+rxpHx6sEMCUm4kxY4LjDIUGgCfUMtwfZfjfUA==} + engines: {node: '>=20'} + + type-is@2.0.1: + resolution: {integrity: sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==} + engines: {node: '>= 0.6'} + + typescript@5.9.3: + resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} + engines: {node: '>=14.17'} + hasBin: true + + uglify-js@3.19.3: + resolution: {integrity: sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ==} + engines: {node: '>=0.8.0'} + hasBin: true + + undici-types@7.16.0: + resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} + + unpipe@1.0.0: + resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==} + engines: {node: '>= 0.8'} + + util-deprecate@1.0.2: + resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + + uuid@8.3.2: + resolution: {integrity: sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==} + deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028). + hasBin: true + + vary@1.1.2: + resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==} + engines: {node: '>= 0.8'} + + vite@8.0.10: + resolution: {integrity: sha512-rZuUu9j6J5uotLDs+cAA4O5H4K1SfPliUlQwqa6YEwSrWDZzP4rhm00oJR5snMewjxF5V/K3D4kctsUTsIU9Mw==} + engines: {node: ^20.19.0 || >=22.12.0} + hasBin: true + peerDependencies: + '@types/node': ^24.3.0 + '@vitejs/devtools': ^0.1.0 + esbuild: ^0.27.0 || ^0.28.0 + jiti: '>=1.21.0' + less: ^4.0.0 + sass: ^1.70.0 + sass-embedded: ^1.70.0 + stylus: '>=0.54.8' + sugarss: ^5.0.0 + terser: ^5.16.0 + tsx: ^4.8.1 + yaml: ^2.4.2 + peerDependenciesMeta: + '@types/node': + optional: true + '@vitejs/devtools': + optional: true + esbuild: + optional: true + jiti: + optional: true + less: + optional: true + sass: + optional: true + sass-embedded: + optional: true + stylus: + optional: true + sugarss: + optional: true + terser: + optional: true + tsx: + optional: true + yaml: + optional: true + + vitest@4.1.5: + resolution: {integrity: sha512-9Xx1v3/ih3m9hN+SbfkUyy0JAs72ap3r7joc87XL6jwF0jGg6mFBvQ1SrwaX+h8BlkX6Hz9shdd1uo6AF+ZGpg==} + engines: {node: ^20.0.0 || ^22.0.0 || >=24.0.0} + hasBin: true + peerDependencies: + '@edge-runtime/vm': '*' + '@opentelemetry/api': ^1.9.0 + '@types/node': ^24.3.0 + '@vitest/browser-playwright': 4.1.5 + '@vitest/browser-preview': 4.1.5 + '@vitest/browser-webdriverio': 4.1.5 + '@vitest/coverage-istanbul': 4.1.5 + '@vitest/coverage-v8': 4.1.5 + '@vitest/ui': 4.1.5 + happy-dom: '*' + jsdom: '*' + vite: ^6.0.0 || ^7.0.0 || ^8.0.0 + peerDependenciesMeta: + '@edge-runtime/vm': + optional: true + '@opentelemetry/api': + optional: true + '@types/node': + optional: true + '@vitest/browser-playwright': + optional: true + '@vitest/browser-preview': + optional: true + '@vitest/browser-webdriverio': + optional: true + '@vitest/coverage-istanbul': + optional: true + '@vitest/coverage-v8': + optional: true + '@vitest/ui': + optional: true + happy-dom: + optional: true + jsdom: + optional: true + + web-streams-polyfill@3.3.3: + resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} + engines: {node: '>= 8'} + + which@2.0.2: + resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} + engines: {node: '>= 8'} + hasBin: true + + why-is-node-running@2.3.0: + resolution: {integrity: sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==} + engines: {node: '>=8'} + hasBin: true + + widest-line@6.0.0: + resolution: {integrity: sha512-U89AsyEeAsyoF0zVJBkG9zBgekjgjK7yk9sje3F4IQpXBJ10TF6ByLlIfjMhcmHMJgHZI4KHt4rdNfktzxIAMA==} + engines: {node: '>=20'} + + winston-transport@4.9.0: + resolution: {integrity: sha512-8drMJ4rkgaPo1Me4zD/3WLfI/zPdA9o2IipKODunnGDcuqbHwjsbB79ylv04LCGGzU0xQ6vTznOMpQGaLhhm6A==} + engines: {node: '>= 12.0.0'} + + winston@3.19.0: + resolution: {integrity: sha512-LZNJgPzfKR+/J3cHkxcpHKpKKvGfDZVPS4hfJCc4cCG0CgYzvlD6yE/S3CIL/Yt91ak327YCpiF/0MyeZHEHKA==} + engines: {node: '>= 12.0.0'} + + wordwrap@1.0.0: + resolution: {integrity: sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==} + + wrap-ansi@10.0.0: + resolution: {integrity: sha512-SGcvg80f0wUy2/fXES19feHMz8E0JoXv2uNgHOu4Dgi2OrCy1lqwFYEJz1BLbDI0exjPMe/ZdzZ/YpGECBG/aQ==} + engines: {node: '>=20'} + + wrappy@1.0.2: + resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + + ws@8.20.0: + resolution: {integrity: sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==} + engines: {node: '>=10.0.0'} + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: '>=5.0.2' + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + + wsl-utils@0.1.0: + resolution: {integrity: sha512-h3Fbisa2nKGPxCpm89Hk33lBLsnaGBvctQopaBSOW/uIs6FTe1ATyAnKFJrzVs9vpGdsTe73WF3V4lIsk4Gacw==} + engines: {node: '>=18'} + + xtend@4.0.2: + resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==} + engines: {node: '>=0.4'} + + yaml@2.8.3: + resolution: {integrity: sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==} + engines: {node: '>= 14.6'} + hasBin: true + + yocto-queue@1.2.2: + resolution: {integrity: sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==} + engines: {node: '>=12.20'} + + yoga-layout@3.2.1: + resolution: {integrity: sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ==} + + zod-to-json-schema@3.25.2: + resolution: {integrity: sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==} + peerDependencies: + zod: ^3.25.28 || ^4 + + zod@4.3.6: + resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} + + zod@4.4.3: + resolution: {integrity: sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==} + +snapshots: + + '@ai-sdk/anthropic@3.0.71(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.23(zod@4.3.6) + zod: 4.3.6 + + '@ai-sdk/anthropic@3.0.71(zod@4.4.3)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.23(zod@4.4.3) + zod: 4.4.3 + + '@ai-sdk/anthropic@3.0.74(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.3.6) + zod: 4.3.6 + + '@ai-sdk/anthropic@3.0.74(zod@4.4.3)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.4.3) + zod: 4.4.3 + + '@ai-sdk/gateway@3.0.104(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.23(zod@4.3.6) + '@vercel/oidc': 3.2.0 + zod: 4.3.6 + + '@ai-sdk/gateway@3.0.104(zod@4.4.3)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.23(zod@4.4.3) + '@vercel/oidc': 3.2.0 + zod: 4.4.3 + + '@ai-sdk/google-vertex@4.0.118(zod@4.3.6)': + dependencies: + '@ai-sdk/anthropic': 3.0.74(zod@4.3.6) + '@ai-sdk/google': 3.0.67(zod@4.3.6) + '@ai-sdk/openai-compatible': 2.0.45(zod@4.3.6) + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.3.6) + google-auth-library: 10.6.2 + zod: 4.3.6 + transitivePeerDependencies: + - supports-color + + '@ai-sdk/google-vertex@4.0.118(zod@4.4.3)': + dependencies: + '@ai-sdk/anthropic': 3.0.74(zod@4.4.3) + '@ai-sdk/google': 3.0.67(zod@4.4.3) + '@ai-sdk/openai-compatible': 2.0.45(zod@4.4.3) + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.4.3) + google-auth-library: 10.6.2 + zod: 4.4.3 + transitivePeerDependencies: + - supports-color + + '@ai-sdk/google@3.0.67(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.3.6) + zod: 4.3.6 + + '@ai-sdk/google@3.0.67(zod@4.4.3)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.4.3) + zod: 4.4.3 + + '@ai-sdk/openai-compatible@2.0.45(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.3.6) + zod: 4.3.6 + + '@ai-sdk/openai-compatible@2.0.45(zod@4.4.3)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@ai-sdk/provider-utils': 4.0.26(zod@4.4.3) + zod: 4.4.3 + + '@ai-sdk/provider-utils@4.0.23(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@standard-schema/spec': 1.1.0 + eventsource-parser: 3.0.8 + zod: 4.3.6 + + '@ai-sdk/provider-utils@4.0.23(zod@4.4.3)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@standard-schema/spec': 1.1.0 + eventsource-parser: 3.0.8 + zod: 4.4.3 + + '@ai-sdk/provider-utils@4.0.26(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@standard-schema/spec': 1.1.0 + eventsource-parser: 3.0.8 + zod: 4.3.6 + + '@ai-sdk/provider-utils@4.0.26(zod@4.4.3)': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@standard-schema/spec': 1.1.0 + eventsource-parser: 3.0.8 + zod: 4.4.3 + + '@ai-sdk/provider@3.0.10': + dependencies: + json-schema: 0.4.0 + + '@ai-sdk/provider@3.0.8': + dependencies: + json-schema: 0.4.0 + + '@alcalzone/ansi-tokenize@0.3.0': + dependencies: + ansi-styles: 6.2.3 + is-fullwidth-code-point: 5.1.0 + + '@aws-crypto/crc32@5.2.0': + dependencies: + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.973.8 + tslib: 2.8.1 + + '@aws-crypto/crc32c@5.2.0': + dependencies: + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.973.8 + tslib: 2.8.1 + + '@aws-crypto/sha1-browser@5.2.0': + dependencies: + '@aws-crypto/supports-web-crypto': 5.2.0 + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-locate-window': 3.965.5 + '@smithy/util-utf8': 2.3.0 + tslib: 2.8.1 + + '@aws-crypto/sha256-browser@5.2.0': + dependencies: + '@aws-crypto/sha256-js': 5.2.0 + '@aws-crypto/supports-web-crypto': 5.2.0 + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-locate-window': 3.965.5 + '@smithy/util-utf8': 2.3.0 + tslib: 2.8.1 + + '@aws-crypto/sha256-js@5.2.0': + dependencies: + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.973.8 + tslib: 2.8.1 + + '@aws-crypto/supports-web-crypto@5.2.0': + dependencies: + tslib: 2.8.1 + + '@aws-crypto/util@5.2.0': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/util-utf8': 2.3.0 + tslib: 2.8.1 + + '@aws-sdk/client-s3@3.1039.0': + dependencies: + '@aws-crypto/sha1-browser': 5.2.0 + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.974.7 + '@aws-sdk/credential-provider-node': 3.972.38 + '@aws-sdk/middleware-bucket-endpoint': 3.972.10 + '@aws-sdk/middleware-expect-continue': 3.972.10 + '@aws-sdk/middleware-flexible-checksums': 3.974.15 + '@aws-sdk/middleware-host-header': 3.972.10 + '@aws-sdk/middleware-location-constraint': 3.972.10 + '@aws-sdk/middleware-logger': 3.972.10 + '@aws-sdk/middleware-recursion-detection': 3.972.11 + '@aws-sdk/middleware-sdk-s3': 3.972.36 + '@aws-sdk/middleware-ssec': 3.972.10 + '@aws-sdk/middleware-user-agent': 3.972.37 + '@aws-sdk/region-config-resolver': 3.972.13 + '@aws-sdk/signature-v4-multi-region': 3.996.24 + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-endpoints': 3.996.8 + '@aws-sdk/util-user-agent-browser': 3.972.10 + '@aws-sdk/util-user-agent-node': 3.973.23 + '@smithy/config-resolver': 4.4.17 + '@smithy/core': 3.23.17 + '@smithy/eventstream-serde-browser': 4.2.14 + '@smithy/eventstream-serde-config-resolver': 4.3.14 + '@smithy/eventstream-serde-node': 4.2.14 + '@smithy/fetch-http-handler': 5.3.17 + '@smithy/hash-blob-browser': 4.2.15 + '@smithy/hash-node': 4.2.14 + '@smithy/hash-stream-node': 4.2.14 + '@smithy/invalid-dependency': 4.2.14 + '@smithy/md5-js': 4.2.14 + '@smithy/middleware-content-length': 4.2.14 + '@smithy/middleware-endpoint': 4.4.32 + '@smithy/middleware-retry': 4.5.7 + '@smithy/middleware-serde': 4.2.20 + '@smithy/middleware-stack': 4.2.14 + '@smithy/node-config-provider': 4.3.14 + '@smithy/node-http-handler': 4.6.1 + '@smithy/protocol-http': 5.3.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + '@smithy/url-parser': 4.2.14 + '@smithy/util-base64': 4.3.2 + '@smithy/util-body-length-browser': 4.2.2 + '@smithy/util-body-length-node': 4.2.3 + '@smithy/util-defaults-mode-browser': 4.3.49 + '@smithy/util-defaults-mode-node': 4.2.54 + '@smithy/util-endpoints': 3.4.2 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-retry': 4.3.6 + '@smithy/util-stream': 4.5.25 + '@smithy/util-utf8': 4.2.2 + '@smithy/util-waiter': 4.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sts@3.1039.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.974.7 + '@aws-sdk/credential-provider-node': 3.972.38 + '@aws-sdk/middleware-host-header': 3.972.10 + '@aws-sdk/middleware-logger': 3.972.10 + '@aws-sdk/middleware-recursion-detection': 3.972.11 + '@aws-sdk/middleware-user-agent': 3.972.37 + '@aws-sdk/region-config-resolver': 3.972.13 + '@aws-sdk/signature-v4-multi-region': 3.996.24 + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-endpoints': 3.996.8 + '@aws-sdk/util-user-agent-browser': 3.972.10 + '@aws-sdk/util-user-agent-node': 3.973.23 + '@smithy/config-resolver': 4.4.17 + '@smithy/core': 3.23.17 + '@smithy/fetch-http-handler': 5.3.17 + '@smithy/hash-node': 4.2.14 + '@smithy/invalid-dependency': 4.2.14 + '@smithy/middleware-content-length': 4.2.14 + '@smithy/middleware-endpoint': 4.4.32 + '@smithy/middleware-retry': 4.5.7 + '@smithy/middleware-serde': 4.2.20 + '@smithy/middleware-stack': 4.2.14 + '@smithy/node-config-provider': 4.3.14 + '@smithy/node-http-handler': 4.6.1 + '@smithy/protocol-http': 5.3.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + '@smithy/url-parser': 4.2.14 + '@smithy/util-base64': 4.3.2 + '@smithy/util-body-length-browser': 4.2.2 + '@smithy/util-body-length-node': 4.2.3 + '@smithy/util-defaults-mode-browser': 4.3.49 + '@smithy/util-defaults-mode-node': 4.2.54 + '@smithy/util-endpoints': 3.4.2 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-retry': 4.3.6 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/core@3.974.7': + dependencies: + '@aws-sdk/types': 3.973.8 + '@aws-sdk/xml-builder': 3.972.22 + '@smithy/core': 3.23.17 + '@smithy/node-config-provider': 4.3.14 + '@smithy/property-provider': 4.2.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/signature-v4': 5.3.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + '@smithy/util-base64': 4.3.2 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-retry': 4.3.6 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@aws-sdk/crc64-nvme@3.972.7': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/credential-provider-env@3.972.33': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/types': 3.973.8 + '@smithy/property-provider': 4.2.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/credential-provider-http@3.972.35': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/types': 3.973.8 + '@smithy/fetch-http-handler': 5.3.17 + '@smithy/node-http-handler': 4.6.1 + '@smithy/property-provider': 4.2.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + '@smithy/util-stream': 4.5.25 + tslib: 2.8.1 + + '@aws-sdk/credential-provider-ini@3.972.37': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/credential-provider-env': 3.972.33 + '@aws-sdk/credential-provider-http': 3.972.35 + '@aws-sdk/credential-provider-login': 3.972.37 + '@aws-sdk/credential-provider-process': 3.972.33 + '@aws-sdk/credential-provider-sso': 3.972.37 + '@aws-sdk/credential-provider-web-identity': 3.972.37 + '@aws-sdk/nested-clients': 3.997.5 + '@aws-sdk/types': 3.973.8 + '@smithy/credential-provider-imds': 4.2.14 + '@smithy/property-provider': 4.2.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/credential-provider-login@3.972.37': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/nested-clients': 3.997.5 + '@aws-sdk/types': 3.973.8 + '@smithy/property-provider': 4.2.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/credential-provider-node@3.972.38': + dependencies: + '@aws-sdk/credential-provider-env': 3.972.33 + '@aws-sdk/credential-provider-http': 3.972.35 + '@aws-sdk/credential-provider-ini': 3.972.37 + '@aws-sdk/credential-provider-process': 3.972.33 + '@aws-sdk/credential-provider-sso': 3.972.37 + '@aws-sdk/credential-provider-web-identity': 3.972.37 + '@aws-sdk/types': 3.973.8 + '@smithy/credential-provider-imds': 4.2.14 + '@smithy/property-provider': 4.2.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/credential-provider-process@3.972.33': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/types': 3.973.8 + '@smithy/property-provider': 4.2.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/credential-provider-sso@3.972.37': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/nested-clients': 3.997.5 + '@aws-sdk/token-providers': 3.1039.0 + '@aws-sdk/types': 3.973.8 + '@smithy/property-provider': 4.2.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/credential-provider-web-identity@3.972.37': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/nested-clients': 3.997.5 + '@aws-sdk/types': 3.973.8 + '@smithy/property-provider': 4.2.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/ec2-metadata-service@3.1039.0': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/node-config-provider': 4.3.14 + '@smithy/node-http-handler': 4.6.1 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-stream': 4.5.25 + tslib: 2.8.1 + + '@aws-sdk/middleware-bucket-endpoint@3.972.10': + dependencies: + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-arn-parser': 3.972.3 + '@smithy/node-config-provider': 4.3.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-config-provider': 4.2.2 + tslib: 2.8.1 + + '@aws-sdk/middleware-expect-continue@3.972.10': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/middleware-flexible-checksums@3.974.15': + dependencies: + '@aws-crypto/crc32': 5.2.0 + '@aws-crypto/crc32c': 5.2.0 + '@aws-crypto/util': 5.2.0 + '@aws-sdk/core': 3.974.7 + '@aws-sdk/crc64-nvme': 3.972.7 + '@aws-sdk/types': 3.973.8 + '@smithy/is-array-buffer': 4.2.2 + '@smithy/node-config-provider': 4.3.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-stream': 4.5.25 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@aws-sdk/middleware-host-header@3.972.10': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/middleware-location-constraint@3.972.10': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/middleware-logger@3.972.10': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/middleware-recursion-detection@3.972.11': + dependencies: + '@aws-sdk/types': 3.973.8 + '@aws/lambda-invoke-store': 0.2.4 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/middleware-sdk-s3@3.972.36': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-arn-parser': 3.972.3 + '@smithy/core': 3.23.17 + '@smithy/node-config-provider': 4.3.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/signature-v4': 5.3.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + '@smithy/util-config-provider': 4.2.2 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-stream': 4.5.25 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@aws-sdk/middleware-ssec@3.972.10': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/middleware-user-agent@3.972.37': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-endpoints': 3.996.8 + '@smithy/core': 3.23.17 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-retry': 4.3.6 + tslib: 2.8.1 + + '@aws-sdk/nested-clients@3.997.5': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.974.7 + '@aws-sdk/middleware-host-header': 3.972.10 + '@aws-sdk/middleware-logger': 3.972.10 + '@aws-sdk/middleware-recursion-detection': 3.972.11 + '@aws-sdk/middleware-user-agent': 3.972.37 + '@aws-sdk/region-config-resolver': 3.972.13 + '@aws-sdk/signature-v4-multi-region': 3.996.24 + '@aws-sdk/types': 3.973.8 + '@aws-sdk/util-endpoints': 3.996.8 + '@aws-sdk/util-user-agent-browser': 3.972.10 + '@aws-sdk/util-user-agent-node': 3.973.23 + '@smithy/config-resolver': 4.4.17 + '@smithy/core': 3.23.17 + '@smithy/fetch-http-handler': 5.3.17 + '@smithy/hash-node': 4.2.14 + '@smithy/invalid-dependency': 4.2.14 + '@smithy/middleware-content-length': 4.2.14 + '@smithy/middleware-endpoint': 4.4.32 + '@smithy/middleware-retry': 4.5.7 + '@smithy/middleware-serde': 4.2.20 + '@smithy/middleware-stack': 4.2.14 + '@smithy/node-config-provider': 4.3.14 + '@smithy/node-http-handler': 4.6.1 + '@smithy/protocol-http': 5.3.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + '@smithy/url-parser': 4.2.14 + '@smithy/util-base64': 4.3.2 + '@smithy/util-body-length-browser': 4.2.2 + '@smithy/util-body-length-node': 4.2.3 + '@smithy/util-defaults-mode-browser': 4.3.49 + '@smithy/util-defaults-mode-node': 4.2.54 + '@smithy/util-endpoints': 3.4.2 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-retry': 4.3.6 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/region-config-resolver@3.972.13': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/config-resolver': 4.4.17 + '@smithy/node-config-provider': 4.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/signature-v4-multi-region@3.996.24': + dependencies: + '@aws-sdk/middleware-sdk-s3': 3.972.36 + '@aws-sdk/types': 3.973.8 + '@smithy/protocol-http': 5.3.14 + '@smithy/signature-v4': 5.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/token-providers@3.1039.0': + dependencies: + '@aws-sdk/core': 3.974.7 + '@aws-sdk/nested-clients': 3.997.5 + '@aws-sdk/types': 3.973.8 + '@smithy/property-provider': 4.2.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/types@3.973.8': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@aws-sdk/util-arn-parser@3.972.3': + dependencies: + tslib: 2.8.1 + + '@aws-sdk/util-endpoints@3.996.8': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/types': 4.14.1 + '@smithy/url-parser': 4.2.14 + '@smithy/util-endpoints': 3.4.2 + tslib: 2.8.1 + + '@aws-sdk/util-locate-window@3.965.5': + dependencies: + tslib: 2.8.1 + + '@aws-sdk/util-user-agent-browser@3.972.10': + dependencies: + '@aws-sdk/types': 3.973.8 + '@smithy/types': 4.14.1 + bowser: 2.14.1 + tslib: 2.8.1 + + '@aws-sdk/util-user-agent-node@3.973.23': + dependencies: + '@aws-sdk/middleware-user-agent': 3.972.37 + '@aws-sdk/types': 3.973.8 + '@smithy/node-config-provider': 4.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-config-provider': 4.2.2 + tslib: 2.8.1 + + '@aws-sdk/xml-builder@3.972.22': + dependencies: + '@nodable/entities': 2.1.0 + '@smithy/types': 4.14.1 + fast-xml-parser: 5.7.2 + tslib: 2.8.1 + + '@aws/lambda-invoke-store@0.2.4': {} + + '@azure-rest/core-client@2.6.0': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@typespec/ts-http-runtime': 0.3.5 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/abort-controller@2.1.2': + dependencies: + tslib: 2.8.1 + + '@azure/core-auth@1.10.1': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-util': 1.13.1 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/core-client@1.10.1': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/core-http-compat@2.4.0(@azure/core-client@1.10.1)(@azure/core-rest-pipeline@1.23.0)': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-client': 1.10.1 + '@azure/core-rest-pipeline': 1.23.0 + + '@azure/core-http-compat@2.4.0(@azure/core-rest-pipeline@1.23.0)': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-rest-pipeline': 1.23.0 + + '@azure/core-lro@2.7.2': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-util': 1.13.1 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/core-paging@1.6.2': + dependencies: + tslib: 2.8.1 + + '@azure/core-rest-pipeline@1.23.0': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/logger': 1.3.0 + '@typespec/ts-http-runtime': 0.3.5 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/core-tracing@1.3.1': + dependencies: + tslib: 2.8.1 + + '@azure/core-util@1.13.1': + dependencies: + '@azure/abort-controller': 2.1.2 + '@typespec/ts-http-runtime': 0.3.5 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/core-xml@1.5.1': + dependencies: + fast-xml-parser: 5.7.2 + tslib: 2.8.1 + + '@azure/identity@4.13.1': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-client': 1.10.1 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/logger': 1.3.0 + '@azure/msal-browser': 5.9.0 + '@azure/msal-node': 5.1.5 + open: 10.2.0 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/keyvault-common@2.1.0': + dependencies: + '@azure-rest/core-client': 2.6.0 + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/keyvault-keys@4.10.0': + dependencies: + '@azure-rest/core-client': 2.6.0 + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-http-compat': 2.4.0(@azure/core-rest-pipeline@1.23.0) + '@azure/core-lro': 2.7.2 + '@azure/core-paging': 1.6.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/keyvault-common': 2.1.0 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + + '@azure/keyvault-keys@4.10.0(@azure/core-client@1.10.1)': + dependencies: + '@azure-rest/core-client': 2.6.0 + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-http-compat': 2.4.0(@azure/core-client@1.10.1)(@azure/core-rest-pipeline@1.23.0) + '@azure/core-lro': 2.7.2 + '@azure/core-paging': 1.6.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/keyvault-common': 2.1.0 + '@azure/logger': 1.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + + '@azure/logger@1.3.0': + dependencies: + '@typespec/ts-http-runtime': 0.3.5 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@azure/msal-browser@5.9.0': + dependencies: + '@azure/msal-common': 16.5.2 + + '@azure/msal-common@16.5.2': {} + + '@azure/msal-node@5.1.5': + dependencies: + '@azure/msal-common': 16.5.2 + jsonwebtoken: 9.0.3 + + '@azure/storage-blob@12.26.0': + dependencies: + '@azure/abort-controller': 2.1.2 + '@azure/core-auth': 1.10.1 + '@azure/core-client': 1.10.1 + '@azure/core-http-compat': 2.4.0(@azure/core-client@1.10.1)(@azure/core-rest-pipeline@1.23.0) + '@azure/core-lro': 2.7.2 + '@azure/core-paging': 1.6.2 + '@azure/core-rest-pipeline': 1.23.0 + '@azure/core-tracing': 1.3.1 + '@azure/core-util': 1.13.1 + '@azure/core-xml': 1.5.1 + '@azure/logger': 1.3.0 + events: 3.3.0 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@clack/core@1.3.0': + dependencies: + fast-wrap-ansi: 0.2.0 + sisteransi: 1.0.5 + + '@clack/prompts@1.3.0': + dependencies: + '@clack/core': 1.3.0 + fast-string-width: 3.0.2 + fast-wrap-ansi: 0.2.0 + sisteransi: 1.0.5 + + '@clickhouse/client-common@1.18.3': {} + + '@clickhouse/client@1.18.3': + dependencies: + '@clickhouse/client-common': 1.18.3 + + '@colors/colors@1.6.0': {} + + '@commander-js/extra-typings@14.0.0(commander@14.0.3)': + dependencies: + commander: 14.0.3 + + '@dabh/diagnostics@2.0.8': + dependencies: + '@so-ric/colorspace': 1.1.6 + enabled: 2.0.0 + kuler: 2.0.0 + + '@electric-sql/pglite-socket@0.1.5(@electric-sql/pglite@0.4.5)': + dependencies: + '@electric-sql/pglite': 0.4.5 + + '@electric-sql/pglite@0.4.5': {} + + '@emnapi/core@1.10.0': + dependencies: + '@emnapi/wasi-threads': 1.2.1 + tslib: 2.8.1 + optional: true + + '@emnapi/runtime@1.10.0': + dependencies: + tslib: 2.8.1 + optional: true + + '@emnapi/wasi-threads@1.2.1': + dependencies: + tslib: 2.8.1 + optional: true + + '@esbuild/aix-ppc64@0.27.7': + optional: true + + '@esbuild/android-arm64@0.27.7': + optional: true + + '@esbuild/android-arm@0.27.7': + optional: true + + '@esbuild/android-x64@0.27.7': + optional: true + + '@esbuild/darwin-arm64@0.27.7': + optional: true + + '@esbuild/darwin-x64@0.27.7': + optional: true + + '@esbuild/freebsd-arm64@0.27.7': + optional: true + + '@esbuild/freebsd-x64@0.27.7': + optional: true + + '@esbuild/linux-arm64@0.27.7': + optional: true + + '@esbuild/linux-arm@0.27.7': + optional: true + + '@esbuild/linux-ia32@0.27.7': + optional: true + + '@esbuild/linux-loong64@0.27.7': + optional: true + + '@esbuild/linux-mips64el@0.27.7': + optional: true + + '@esbuild/linux-ppc64@0.27.7': + optional: true + + '@esbuild/linux-riscv64@0.27.7': + optional: true + + '@esbuild/linux-s390x@0.27.7': + optional: true + + '@esbuild/linux-x64@0.27.7': + optional: true + + '@esbuild/netbsd-arm64@0.27.7': + optional: true + + '@esbuild/netbsd-x64@0.27.7': + optional: true + + '@esbuild/openbsd-arm64@0.27.7': + optional: true + + '@esbuild/openbsd-x64@0.27.7': + optional: true + + '@esbuild/openharmony-arm64@0.27.7': + optional: true + + '@esbuild/sunos-x64@0.27.7': + optional: true + + '@esbuild/win32-arm64@0.27.7': + optional: true + + '@esbuild/win32-ia32@0.27.7': + optional: true + + '@esbuild/win32-x64@0.27.7': + optional: true + + '@google-cloud/bigquery@8.3.0': + dependencies: + '@google-cloud/common': 6.0.0 + '@google-cloud/paginator': 6.0.0 + '@google-cloud/precise-date': 5.0.0 + '@google-cloud/promisify': 5.0.0 + arrify: 3.0.0 + big.js: 7.0.1 + duplexify: 4.1.3 + extend: 3.0.2 + stream-events: 1.0.5 + teeny-request: 10.1.2 + transitivePeerDependencies: + - supports-color + + '@google-cloud/common@6.0.0': + dependencies: + '@google-cloud/projectify': 4.0.0 + '@google-cloud/promisify': 4.1.0 + arrify: 2.0.1 + duplexify: 4.1.3 + extend: 3.0.2 + google-auth-library: 10.6.2 + html-entities: 2.6.0 + retry-request: 8.0.2 + teeny-request: 10.1.2 + transitivePeerDependencies: + - supports-color + + '@google-cloud/paginator@6.0.0': + dependencies: + extend: 3.0.2 + + '@google-cloud/precise-date@5.0.0': {} + + '@google-cloud/projectify@4.0.0': {} + + '@google-cloud/promisify@4.1.0': {} + + '@google-cloud/promisify@5.0.0': {} + + '@hono/node-server@1.19.14(hono@4.12.15)': + dependencies: + hono: 4.12.15 + + '@jridgewell/sourcemap-codec@1.5.5': {} + + '@js-joda/core@5.7.0': {} + + '@klo/connector-bigquery@file:packages/connector-bigquery(ws@8.20.0)': + dependencies: + '@google-cloud/bigquery': 8.3.0 + '@klo/context': file:packages/context(ws@8.20.0) + transitivePeerDependencies: + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/connector-clickhouse@file:packages/connector-clickhouse(ws@8.20.0)': + dependencies: + '@clickhouse/client': 1.18.3 + '@klo/context': file:packages/context(ws@8.20.0) + transitivePeerDependencies: + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/connector-mysql@file:packages/connector-mysql(@types/node@24.12.2)(ws@8.20.0)': + dependencies: + '@klo/context': file:packages/context(ws@8.20.0) + mysql2: 3.22.3(@types/node@24.12.2) + transitivePeerDependencies: + - '@cfworker/json-schema' + - '@types/node' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/connector-postgres@file:packages/connector-postgres(ws@8.20.0)': + dependencies: + '@klo/context': file:packages/context(ws@8.20.0) + pg: 8.20.0 + transitivePeerDependencies: + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/connector-posthog@file:packages/connector-posthog(ws@8.20.0)': + dependencies: + '@klo/context': file:packages/context(ws@8.20.0) + transitivePeerDependencies: + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/connector-snowflake@file:packages/connector-snowflake(asn1.js@5.4.1)(ws@8.20.0)': + dependencies: + '@klo/context': file:packages/context(ws@8.20.0) + snowflake-sdk: 2.4.0(asn1.js@5.4.1) + transitivePeerDependencies: + - '@cfworker/json-schema' + - asn1.js + - aws-crt + - debug + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/connector-sqlite@file:packages/connector-sqlite(ws@8.20.0)': + dependencies: + '@klo/context': file:packages/context(ws@8.20.0) + better-sqlite3: 12.9.0 + transitivePeerDependencies: + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/connector-sqlserver@file:packages/connector-sqlserver(ws@8.20.0)': + dependencies: + '@klo/context': file:packages/context(ws@8.20.0) + mssql: 12.5.0 + transitivePeerDependencies: + - '@azure/core-client' + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/context@file:packages/context': + dependencies: + '@klo/llm': file:packages/llm(zod@4.4.3) + '@looker/sdk': 26.6.1 + '@looker/sdk-node': 26.6.1 + '@looker/sdk-rtl': 21.6.5 + '@modelcontextprotocol/sdk': 1.29.0(zod@4.4.3) + '@notionhq/client': 5.20.0 + ai: 6.0.168(zod@4.4.3) + better-sqlite3: 12.9.0 + handlebars: 4.7.9 + lookml-parser: 7.1.0 + minimatch: 10.2.5 + p-limit: 7.3.0 + pg: 8.20.0 + simple-git: 3.32.2 + yaml: 2.8.3 + zod: 4.4.3 + transitivePeerDependencies: + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/context@file:packages/context(ws@8.20.0)': + dependencies: + '@klo/llm': file:packages/llm(ws@8.20.0)(zod@4.4.3) + '@looker/sdk': 26.6.1 + '@looker/sdk-node': 26.6.1 + '@looker/sdk-rtl': 21.6.5 + '@modelcontextprotocol/sdk': 1.29.0(zod@4.4.3) + '@notionhq/client': 5.20.0 + ai: 6.0.168(zod@4.4.3) + better-sqlite3: 12.9.0 + handlebars: 4.7.9 + lookml-parser: 7.1.0 + minimatch: 10.2.5 + p-limit: 7.3.0 + pg: 8.20.0 + simple-git: 3.32.2 + yaml: 2.8.3 + zod: 4.4.3 + transitivePeerDependencies: + - '@cfworker/json-schema' + - js-yaml + - pg-native + - supports-color + - ws + + '@klo/llm@file:packages/llm(ws@8.20.0)(zod@4.3.6)': + dependencies: + '@ai-sdk/anthropic': 3.0.71(zod@4.3.6) + '@ai-sdk/google-vertex': 4.0.118(zod@4.3.6) + ai: 6.0.168(zod@4.3.6) + openai: 6.35.0(ws@8.20.0)(zod@4.3.6) + transitivePeerDependencies: + - supports-color + - ws + - zod + + '@klo/llm@file:packages/llm(ws@8.20.0)(zod@4.4.3)': + dependencies: + '@ai-sdk/anthropic': 3.0.71(zod@4.4.3) + '@ai-sdk/google-vertex': 4.0.118(zod@4.4.3) + ai: 6.0.168(zod@4.4.3) + openai: 6.35.0(ws@8.20.0)(zod@4.4.3) + transitivePeerDependencies: + - supports-color + - ws + - zod + + '@klo/llm@file:packages/llm(zod@4.4.3)': + dependencies: + '@ai-sdk/anthropic': 3.0.71(zod@4.4.3) + '@ai-sdk/google-vertex': 4.0.118(zod@4.4.3) + ai: 6.0.168(zod@4.4.3) + openai: 6.35.0(zod@4.4.3) + transitivePeerDependencies: + - supports-color + - ws + - zod + + '@kwsites/file-exists@1.1.1': + dependencies: + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + + '@kwsites/promise-deferred@1.1.1': {} + + '@looker/sdk-node@26.6.1': + dependencies: + '@looker/sdk': 26.6.1 + '@looker/sdk-rtl': 21.6.5 + ini: 5.0.0 + + '@looker/sdk-rtl@21.6.5': {} + + '@looker/sdk@26.6.1': + dependencies: + '@looker/sdk-rtl': 21.6.5 + + '@modelcontextprotocol/sdk@1.29.0(zod@4.3.6)': + dependencies: + '@hono/node-server': 1.19.14(hono@4.12.15) + ajv: 8.20.0 + ajv-formats: 3.0.1(ajv@8.20.0) + content-type: 1.0.5 + cors: 2.8.6 + cross-spawn: 7.0.6 + eventsource: 3.0.7 + eventsource-parser: 3.0.8 + express: 5.2.1 + express-rate-limit: 8.4.1(express@5.2.1) + hono: 4.12.15 + jose: 6.2.2 + json-schema-typed: 8.0.2 + pkce-challenge: 5.0.1 + raw-body: 3.0.2 + zod: 4.3.6 + zod-to-json-schema: 3.25.2(zod@4.3.6) + transitivePeerDependencies: + - supports-color + + '@modelcontextprotocol/sdk@1.29.0(zod@4.4.3)': + dependencies: + '@hono/node-server': 1.19.14(hono@4.12.15) + ajv: 8.20.0 + ajv-formats: 3.0.1(ajv@8.20.0) + content-type: 1.0.5 + cors: 2.8.6 + cross-spawn: 7.0.6 + eventsource: 3.0.7 + eventsource-parser: 3.0.8 + express: 5.2.1 + express-rate-limit: 8.4.1(express@5.2.1) + hono: 4.12.15 + jose: 6.2.2 + json-schema-typed: 8.0.2 + pkce-challenge: 5.0.1 + raw-body: 3.0.2 + zod: 4.4.3 + zod-to-json-schema: 3.25.2(zod@4.4.3) + transitivePeerDependencies: + - supports-color + + '@napi-rs/wasm-runtime@1.1.4(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)': + dependencies: + '@emnapi/core': 1.10.0 + '@emnapi/runtime': 1.10.0 + '@tybys/wasm-util': 0.10.1 + optional: true + + '@nodable/entities@2.1.0': {} + + '@notionhq/client@5.20.0': {} + + '@opentelemetry/api@1.9.0': {} + + '@oxc-project/types@0.127.0': {} + + '@rolldown/binding-android-arm64@1.0.0-rc.17': + optional: true + + '@rolldown/binding-darwin-arm64@1.0.0-rc.17': + optional: true + + '@rolldown/binding-darwin-x64@1.0.0-rc.17': + optional: true + + '@rolldown/binding-freebsd-x64@1.0.0-rc.17': + optional: true + + '@rolldown/binding-linux-arm-gnueabihf@1.0.0-rc.17': + optional: true + + '@rolldown/binding-linux-arm64-gnu@1.0.0-rc.17': + optional: true + + '@rolldown/binding-linux-arm64-musl@1.0.0-rc.17': + optional: true + + '@rolldown/binding-linux-ppc64-gnu@1.0.0-rc.17': + optional: true + + '@rolldown/binding-linux-s390x-gnu@1.0.0-rc.17': + optional: true + + '@rolldown/binding-linux-x64-gnu@1.0.0-rc.17': + optional: true + + '@rolldown/binding-linux-x64-musl@1.0.0-rc.17': + optional: true + + '@rolldown/binding-openharmony-arm64@1.0.0-rc.17': + optional: true + + '@rolldown/binding-wasm32-wasi@1.0.0-rc.17': + dependencies: + '@emnapi/core': 1.10.0 + '@emnapi/runtime': 1.10.0 + '@napi-rs/wasm-runtime': 1.1.4(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0) + optional: true + + '@rolldown/binding-win32-arm64-msvc@1.0.0-rc.17': + optional: true + + '@rolldown/binding-win32-x64-msvc@1.0.0-rc.17': + optional: true + + '@rolldown/pluginutils@1.0.0-rc.17': {} + + '@smithy/chunked-blob-reader-native@4.2.3': + dependencies: + '@smithy/util-base64': 4.3.2 + tslib: 2.8.1 + + '@smithy/chunked-blob-reader@5.2.2': + dependencies: + tslib: 2.8.1 + + '@smithy/config-resolver@4.4.17': + dependencies: + '@smithy/node-config-provider': 4.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-config-provider': 4.2.2 + '@smithy/util-endpoints': 3.4.2 + '@smithy/util-middleware': 4.2.14 + tslib: 2.8.1 + + '@smithy/core@3.23.17': + dependencies: + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + '@smithy/url-parser': 4.2.14 + '@smithy/util-base64': 4.3.2 + '@smithy/util-body-length-browser': 4.2.2 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-stream': 4.5.25 + '@smithy/util-utf8': 4.2.2 + '@smithy/uuid': 1.1.2 + tslib: 2.8.1 + + '@smithy/credential-provider-imds@4.2.14': + dependencies: + '@smithy/node-config-provider': 4.3.14 + '@smithy/property-provider': 4.2.14 + '@smithy/types': 4.14.1 + '@smithy/url-parser': 4.2.14 + tslib: 2.8.1 + + '@smithy/eventstream-codec@4.2.14': + dependencies: + '@aws-crypto/crc32': 5.2.0 + '@smithy/types': 4.14.1 + '@smithy/util-hex-encoding': 4.2.2 + tslib: 2.8.1 + + '@smithy/eventstream-serde-browser@4.2.14': + dependencies: + '@smithy/eventstream-serde-universal': 4.2.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/eventstream-serde-config-resolver@4.3.14': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/eventstream-serde-node@4.2.14': + dependencies: + '@smithy/eventstream-serde-universal': 4.2.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/eventstream-serde-universal@4.2.14': + dependencies: + '@smithy/eventstream-codec': 4.2.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/fetch-http-handler@5.3.17': + dependencies: + '@smithy/protocol-http': 5.3.14 + '@smithy/querystring-builder': 4.2.14 + '@smithy/types': 4.14.1 + '@smithy/util-base64': 4.3.2 + tslib: 2.8.1 + + '@smithy/hash-blob-browser@4.2.15': + dependencies: + '@smithy/chunked-blob-reader': 5.2.2 + '@smithy/chunked-blob-reader-native': 4.2.3 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/hash-node@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + '@smithy/util-buffer-from': 4.2.2 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@smithy/hash-stream-node@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@smithy/invalid-dependency@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/is-array-buffer@2.2.0': + dependencies: + tslib: 2.8.1 + + '@smithy/is-array-buffer@4.2.2': + dependencies: + tslib: 2.8.1 + + '@smithy/md5-js@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@smithy/middleware-content-length@4.2.14': + dependencies: + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/middleware-endpoint@4.4.32': + dependencies: + '@smithy/core': 3.23.17 + '@smithy/middleware-serde': 4.2.20 + '@smithy/node-config-provider': 4.3.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + '@smithy/url-parser': 4.2.14 + '@smithy/util-middleware': 4.2.14 + tslib: 2.8.1 + + '@smithy/middleware-retry@4.5.7': + dependencies: + '@smithy/core': 3.23.17 + '@smithy/node-config-provider': 4.3.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/service-error-classification': 4.3.1 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-retry': 4.3.6 + '@smithy/uuid': 1.1.2 + tslib: 2.8.1 + + '@smithy/middleware-serde@4.2.20': + dependencies: + '@smithy/core': 3.23.17 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/middleware-stack@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/node-config-provider@4.3.14': + dependencies: + '@smithy/property-provider': 4.2.14 + '@smithy/shared-ini-file-loader': 4.4.9 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/node-http-handler@4.6.1': + dependencies: + '@smithy/protocol-http': 5.3.14 + '@smithy/querystring-builder': 4.2.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/property-provider@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/protocol-http@5.3.14': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/querystring-builder@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + '@smithy/util-uri-escape': 4.2.2 + tslib: 2.8.1 + + '@smithy/querystring-parser@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/service-error-classification@4.3.1': + dependencies: + '@smithy/types': 4.14.1 + + '@smithy/shared-ini-file-loader@4.4.9': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/signature-v4@5.3.14': + dependencies: + '@smithy/is-array-buffer': 4.2.2 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-hex-encoding': 4.2.2 + '@smithy/util-middleware': 4.2.14 + '@smithy/util-uri-escape': 4.2.2 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@smithy/smithy-client@4.12.13': + dependencies: + '@smithy/core': 3.23.17 + '@smithy/middleware-endpoint': 4.4.32 + '@smithy/middleware-stack': 4.2.14 + '@smithy/protocol-http': 5.3.14 + '@smithy/types': 4.14.1 + '@smithy/util-stream': 4.5.25 + tslib: 2.8.1 + + '@smithy/types@4.14.1': + dependencies: + tslib: 2.8.1 + + '@smithy/url-parser@4.2.14': + dependencies: + '@smithy/querystring-parser': 4.2.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/util-base64@4.3.2': + dependencies: + '@smithy/util-buffer-from': 4.2.2 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@smithy/util-body-length-browser@4.2.2': + dependencies: + tslib: 2.8.1 + + '@smithy/util-body-length-node@4.2.3': + dependencies: + tslib: 2.8.1 + + '@smithy/util-buffer-from@2.2.0': + dependencies: + '@smithy/is-array-buffer': 2.2.0 + tslib: 2.8.1 + + '@smithy/util-buffer-from@4.2.2': + dependencies: + '@smithy/is-array-buffer': 4.2.2 + tslib: 2.8.1 + + '@smithy/util-config-provider@4.2.2': + dependencies: + tslib: 2.8.1 + + '@smithy/util-defaults-mode-browser@4.3.49': + dependencies: + '@smithy/property-provider': 4.2.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/util-defaults-mode-node@4.2.54': + dependencies: + '@smithy/config-resolver': 4.4.17 + '@smithy/credential-provider-imds': 4.2.14 + '@smithy/node-config-provider': 4.3.14 + '@smithy/property-provider': 4.2.14 + '@smithy/smithy-client': 4.12.13 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/util-endpoints@3.4.2': + dependencies: + '@smithy/node-config-provider': 4.3.14 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/util-hex-encoding@4.2.2': + dependencies: + tslib: 2.8.1 + + '@smithy/util-middleware@4.2.14': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/util-retry@4.3.6': + dependencies: + '@smithy/service-error-classification': 4.3.1 + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/util-stream@4.5.25': + dependencies: + '@smithy/fetch-http-handler': 5.3.17 + '@smithy/node-http-handler': 4.6.1 + '@smithy/types': 4.14.1 + '@smithy/util-base64': 4.3.2 + '@smithy/util-buffer-from': 4.2.2 + '@smithy/util-hex-encoding': 4.2.2 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + + '@smithy/util-uri-escape@4.2.2': + dependencies: + tslib: 2.8.1 + + '@smithy/util-utf8@2.3.0': + dependencies: + '@smithy/util-buffer-from': 2.2.0 + tslib: 2.8.1 + + '@smithy/util-utf8@4.2.2': + dependencies: + '@smithy/util-buffer-from': 4.2.2 + tslib: 2.8.1 + + '@smithy/util-waiter@4.3.0': + dependencies: + '@smithy/types': 4.14.1 + tslib: 2.8.1 + + '@smithy/uuid@1.1.2': + dependencies: + tslib: 2.8.1 + + '@so-ric/colorspace@1.1.6': + dependencies: + color: 5.0.3 + text-hex: 1.0.0 + + '@standard-schema/spec@1.1.0': {} + + '@techteamer/ocsp@1.0.1': + dependencies: + asn1.js: 5.4.1 + asn1.js-rfc2560: 5.0.1(asn1.js@5.4.1) + asn1.js-rfc5280: 3.0.0 + async: 3.2.6 + simple-lru-cache: 0.0.2 + + '@tediousjs/connection-string@1.1.0': {} + + '@tybys/wasm-util@0.10.1': + dependencies: + tslib: 2.8.1 + optional: true + + '@types/better-sqlite3@7.6.13': + dependencies: + '@types/node': 24.12.2 + + '@types/chai@5.2.3': + dependencies: + '@types/deep-eql': 4.0.2 + assertion-error: 2.0.1 + + '@types/deep-eql@4.0.2': {} + + '@types/estree@1.0.8': {} + + '@types/mssql@9.1.11(@azure/core-client@1.10.1)': + dependencies: + '@types/node': 24.12.2 + tarn: 3.0.2 + tedious: 19.2.1(@azure/core-client@1.10.1) + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + + '@types/node@24.12.2': + dependencies: + undici-types: 7.16.0 + + '@types/pg@8.20.0': + dependencies: + '@types/node': 24.12.2 + pg-protocol: 1.13.0 + pg-types: 2.2.0 + + '@types/react@19.2.14': + dependencies: + csstype: 3.2.3 + + '@types/readable-stream@4.0.23': + dependencies: + '@types/node': 24.12.2 + + '@types/triple-beam@1.3.5': {} + + '@typespec/ts-http-runtime@0.3.5': + dependencies: + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.6 + tslib: 2.8.1 + transitivePeerDependencies: + - supports-color + + '@vercel/oidc@3.2.0': {} + + '@vitest/expect@4.1.5': + dependencies: + '@standard-schema/spec': 1.1.0 + '@types/chai': 5.2.3 + '@vitest/spy': 4.1.5 + '@vitest/utils': 4.1.5 + chai: 6.2.2 + tinyrainbow: 3.1.0 + + '@vitest/mocker@4.1.5(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3))': + dependencies: + '@vitest/spy': 4.1.5 + estree-walker: 3.0.3 + magic-string: 0.30.21 + optionalDependencies: + vite: 8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3) + + '@vitest/pretty-format@4.1.5': + dependencies: + tinyrainbow: 3.1.0 + + '@vitest/runner@4.1.5': + dependencies: + '@vitest/utils': 4.1.5 + pathe: 2.0.3 + + '@vitest/snapshot@4.1.5': + dependencies: + '@vitest/pretty-format': 4.1.5 + '@vitest/utils': 4.1.5 + magic-string: 0.30.21 + pathe: 2.0.3 + + '@vitest/spy@4.1.5': {} + + '@vitest/utils@4.1.5': + dependencies: + '@vitest/pretty-format': 4.1.5 + convert-source-map: 2.0.0 + tinyrainbow: 3.1.0 + + abort-controller@3.0.0: + dependencies: + event-target-shim: 5.0.1 + + accepts@2.0.0: + dependencies: + mime-types: 3.0.2 + negotiator: 1.0.0 + + agent-base@7.1.4: {} + + ai@6.0.168(zod@4.3.6): + dependencies: + '@ai-sdk/gateway': 3.0.104(zod@4.3.6) + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.23(zod@4.3.6) + '@opentelemetry/api': 1.9.0 + zod: 4.3.6 + + ai@6.0.168(zod@4.4.3): + dependencies: + '@ai-sdk/gateway': 3.0.104(zod@4.4.3) + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.23(zod@4.4.3) + '@opentelemetry/api': 1.9.0 + zod: 4.4.3 + + ajv-formats@3.0.1(ajv@8.20.0): + optionalDependencies: + ajv: 8.20.0 + + ajv@8.20.0: + dependencies: + fast-deep-equal: 3.1.3 + fast-uri: 3.1.0 + json-schema-traverse: 1.0.0 + require-from-string: 2.0.2 + + ansi-escapes@7.3.0: + dependencies: + environment: 1.1.0 + + ansi-regex@6.2.2: {} + + ansi-styles@6.2.3: {} + + arrify@2.0.1: {} + + arrify@3.0.0: {} + + asn1.js-rfc2560@5.0.1(asn1.js@5.4.1): + dependencies: + asn1.js: 5.4.1 + asn1.js-rfc5280: 3.0.0 + + asn1.js-rfc5280@3.0.0: + dependencies: + asn1.js: 5.4.1 + + asn1.js@5.4.1: + dependencies: + bn.js: 4.12.3 + inherits: 2.0.4 + minimalistic-assert: 1.0.1 + safer-buffer: 2.1.2 + + assertion-error@2.0.1: {} + + async@3.2.6: {} + + asynckit@0.4.0: {} + + auto-bind@5.0.1: {} + + aws-ssl-profiles@1.1.2: {} + + axios@1.15.2: + dependencies: + follow-redirects: 1.16.0 + form-data: 4.0.5 + proxy-from-env: 2.1.0 + transitivePeerDependencies: + - debug + + balanced-match@1.0.2: {} + + balanced-match@4.0.4: {} + + base64-js@1.5.1: {} + + better-sqlite3@12.9.0: + dependencies: + bindings: 1.5.0 + prebuild-install: 7.1.3 + + big-integer@1.6.52: {} + + big.js@7.0.1: {} + + bignumber.js@9.3.1: {} + + bindings@1.5.0: + dependencies: + file-uri-to-path: 1.0.0 + + bl@4.1.0: + dependencies: + buffer: 5.7.1 + inherits: 2.0.4 + readable-stream: 3.6.2 + + bl@6.1.6: + dependencies: + '@types/readable-stream': 4.0.23 + buffer: 6.0.3 + inherits: 2.0.4 + readable-stream: 4.7.0 + + bluebird@3.7.2: {} + + bn.js@4.12.3: {} + + body-parser@2.2.2: + dependencies: + bytes: 3.1.2 + content-type: 1.0.5 + debug: 4.4.3 + http-errors: 2.0.1 + iconv-lite: 0.7.2 + on-finished: 2.4.1 + qs: 6.15.1 + raw-body: 3.0.2 + type-is: 2.0.1 + transitivePeerDependencies: + - supports-color + + bowser@2.14.1: {} + + brace-expansion@1.1.14: + dependencies: + balanced-match: 1.0.2 + concat-map: 0.0.1 + + brace-expansion@5.0.5: + dependencies: + balanced-match: 4.0.4 + + browser-request@0.3.3: {} + + buffer-equal-constant-time@1.0.1: {} + + buffer@5.7.1: + dependencies: + base64-js: 1.5.1 + ieee754: 1.2.1 + + buffer@6.0.3: + dependencies: + base64-js: 1.5.1 + ieee754: 1.2.1 + + bundle-name@4.1.0: + dependencies: + run-applescript: 7.1.0 + + bytes@3.1.2: {} + + call-bind-apply-helpers@1.0.2: + dependencies: + es-errors: 1.3.0 + function-bind: 1.1.2 + + call-bound@1.0.4: + dependencies: + call-bind-apply-helpers: 1.0.2 + get-intrinsic: 1.3.0 + + chai@6.2.2: {} + + chalk@5.6.2: {} + + chownr@1.1.4: {} + + cli-boxes@4.0.1: {} + + cli-cursor@4.0.0: + dependencies: + restore-cursor: 4.0.0 + + cli-truncate@6.0.0: + dependencies: + slice-ansi: 9.0.0 + string-width: 8.2.1 + + code-excerpt@4.0.0: + dependencies: + convert-to-spaces: 2.0.1 + + color-convert@3.1.3: + dependencies: + color-name: 2.1.0 + + color-name@2.1.0: {} + + color-string@2.1.4: + dependencies: + color-name: 2.1.0 + + color@5.0.3: + dependencies: + color-convert: 3.1.3 + color-string: 2.1.4 + + combined-stream@1.0.8: + dependencies: + delayed-stream: 1.0.0 + + commander@11.1.0: {} + + commander@14.0.3: {} + + concat-map@0.0.1: {} + + content-disposition@1.1.0: {} + + content-type@1.0.5: {} + + convert-source-map@2.0.0: {} + + convert-to-spaces@2.0.1: {} + + cookie-signature@1.2.2: {} + + cookie@0.7.2: {} + + cors@2.8.6: + dependencies: + object-assign: 4.1.1 + vary: 1.1.2 + + cross-spawn@7.0.6: + dependencies: + path-key: 3.1.1 + shebang-command: 2.0.0 + which: 2.0.2 + + csstype@3.2.3: {} + + data-uri-to-buffer@4.0.1: {} + + debug@4.4.3: + dependencies: + ms: 2.1.3 + + decompress-response@6.0.0: + dependencies: + mimic-response: 3.1.0 + + deep-extend@0.6.0: {} + + default-browser-id@5.0.1: {} + + default-browser@5.5.0: + dependencies: + bundle-name: 4.1.0 + default-browser-id: 5.0.1 + + define-lazy-prop@3.0.0: {} + + delayed-stream@1.0.0: {} + + denque@2.1.0: {} + + depd@2.0.0: {} + + detect-libc@2.1.2: {} + + dunder-proto@1.0.1: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-errors: 1.3.0 + gopd: 1.2.0 + + duplexify@4.1.3: + dependencies: + end-of-stream: 1.4.5 + inherits: 2.0.4 + readable-stream: 3.6.2 + stream-shift: 1.0.3 + + ecdsa-sig-formatter@1.0.11: + dependencies: + safe-buffer: 5.2.1 + + ee-first@1.1.1: {} + + enabled@2.0.0: {} + + encodeurl@2.0.0: {} + + end-of-stream@1.4.5: + dependencies: + once: 1.4.0 + + environment@1.1.0: {} + + es-define-property@1.0.1: {} + + es-errors@1.3.0: {} + + es-module-lexer@2.1.0: {} + + es-object-atoms@1.1.1: + dependencies: + es-errors: 1.3.0 + + es-set-tostringtag@2.1.0: + dependencies: + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + has-tostringtag: 1.0.2 + hasown: 2.0.3 + + es-toolkit@1.46.1: {} + + esbuild@0.27.7: + optionalDependencies: + '@esbuild/aix-ppc64': 0.27.7 + '@esbuild/android-arm': 0.27.7 + '@esbuild/android-arm64': 0.27.7 + '@esbuild/android-x64': 0.27.7 + '@esbuild/darwin-arm64': 0.27.7 + '@esbuild/darwin-x64': 0.27.7 + '@esbuild/freebsd-arm64': 0.27.7 + '@esbuild/freebsd-x64': 0.27.7 + '@esbuild/linux-arm': 0.27.7 + '@esbuild/linux-arm64': 0.27.7 + '@esbuild/linux-ia32': 0.27.7 + '@esbuild/linux-loong64': 0.27.7 + '@esbuild/linux-mips64el': 0.27.7 + '@esbuild/linux-ppc64': 0.27.7 + '@esbuild/linux-riscv64': 0.27.7 + '@esbuild/linux-s390x': 0.27.7 + '@esbuild/linux-x64': 0.27.7 + '@esbuild/netbsd-arm64': 0.27.7 + '@esbuild/netbsd-x64': 0.27.7 + '@esbuild/openbsd-arm64': 0.27.7 + '@esbuild/openbsd-x64': 0.27.7 + '@esbuild/openharmony-arm64': 0.27.7 + '@esbuild/sunos-x64': 0.27.7 + '@esbuild/win32-arm64': 0.27.7 + '@esbuild/win32-ia32': 0.27.7 + '@esbuild/win32-x64': 0.27.7 + optional: true + + escape-html@1.0.3: {} + + escape-string-regexp@2.0.0: {} + + estree-walker@3.0.3: + dependencies: + '@types/estree': 1.0.8 + + etag@1.8.1: {} + + event-target-shim@5.0.1: {} + + events@3.3.0: {} + + eventsource-parser@3.0.8: {} + + eventsource@3.0.7: + dependencies: + eventsource-parser: 3.0.8 + + expand-template@2.0.3: {} + + expand-tilde@2.0.2: + dependencies: + homedir-polyfill: 1.0.3 + + expect-type@1.3.0: {} + + express-rate-limit@8.4.1(express@5.2.1): + dependencies: + express: 5.2.1 + ip-address: 10.1.0 + + express@5.2.1: + dependencies: + accepts: 2.0.0 + body-parser: 2.2.2 + content-disposition: 1.1.0 + content-type: 1.0.5 + cookie: 0.7.2 + cookie-signature: 1.2.2 + debug: 4.4.3 + depd: 2.0.0 + encodeurl: 2.0.0 + escape-html: 1.0.3 + etag: 1.8.1 + finalhandler: 2.1.1 + fresh: 2.0.0 + http-errors: 2.0.1 + merge-descriptors: 2.0.0 + mime-types: 3.0.2 + on-finished: 2.4.1 + once: 1.4.0 + parseurl: 1.3.3 + proxy-addr: 2.0.7 + qs: 6.15.1 + range-parser: 1.2.1 + router: 2.2.0 + send: 1.2.1 + serve-static: 2.2.1 + statuses: 2.0.2 + type-is: 2.0.1 + vary: 1.1.2 + transitivePeerDependencies: + - supports-color + + extend@3.0.2: {} + + fast-deep-equal@3.1.3: {} + + fast-string-truncated-width@3.0.3: {} + + fast-string-width@3.0.2: + dependencies: + fast-string-truncated-width: 3.0.3 + + fast-uri@3.1.0: {} + + fast-wrap-ansi@0.2.0: + dependencies: + fast-string-width: 3.0.2 + + fast-xml-builder@1.1.5: + dependencies: + path-expression-matcher: 1.5.0 + + fast-xml-parser@5.7.2: + dependencies: + '@nodable/entities': 2.1.0 + fast-xml-builder: 1.1.5 + path-expression-matcher: 1.5.0 + strnum: 2.2.3 + + fastest-levenshtein@1.0.16: {} + + fdir@6.5.0(picomatch@4.0.4): + optionalDependencies: + picomatch: 4.0.4 + + fecha@4.2.3: {} + + fetch-blob@3.2.0: + dependencies: + node-domexception: 1.0.0 + web-streams-polyfill: 3.3.3 + + file-uri-to-path@1.0.0: {} + + finalhandler@2.1.1: + dependencies: + debug: 4.4.3 + encodeurl: 2.0.0 + escape-html: 1.0.3 + on-finished: 2.4.1 + parseurl: 1.3.3 + statuses: 2.0.2 + transitivePeerDependencies: + - supports-color + + fn.name@1.1.0: {} + + follow-redirects@1.16.0: {} + + form-data@4.0.5: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + es-set-tostringtag: 2.1.0 + hasown: 2.0.3 + mime-types: 2.1.35 + + formdata-polyfill@4.0.10: + dependencies: + fetch-blob: 3.2.0 + + forwarded@0.2.0: {} + + fresh@2.0.0: {} + + fs-constants@1.0.0: {} + + fs.realpath@1.0.0: {} + + fsevents@2.3.3: + optional: true + + function-bind@1.1.2: {} + + gaxios@7.1.4: + dependencies: + extend: 3.0.2 + https-proxy-agent: 7.0.6 + node-fetch: 3.3.2 + transitivePeerDependencies: + - supports-color + + gcp-metadata@8.1.2: + dependencies: + gaxios: 7.1.4 + google-logging-utils: 1.1.3 + json-bigint: 1.0.0 + transitivePeerDependencies: + - supports-color + + generate-function@2.3.1: + dependencies: + is-property: 1.0.2 + + generic-pool@3.9.0: {} + + get-east-asian-width@1.5.0: {} + + get-intrinsic@1.3.0: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-define-property: 1.0.1 + es-errors: 1.3.0 + es-object-atoms: 1.1.1 + function-bind: 1.1.2 + get-proto: 1.0.1 + gopd: 1.2.0 + has-symbols: 1.1.0 + hasown: 2.0.3 + math-intrinsics: 1.1.0 + + get-proto@1.0.1: + dependencies: + dunder-proto: 1.0.1 + es-object-atoms: 1.1.1 + + github-from-package@0.0.0: {} + + glob@7.2.3: + dependencies: + fs.realpath: 1.0.0 + inflight: 1.0.6 + inherits: 2.0.4 + minimatch: 3.1.5 + once: 1.4.0 + path-is-absolute: 1.0.1 + + google-auth-library@10.6.2: + dependencies: + base64-js: 1.5.1 + ecdsa-sig-formatter: 1.0.11 + gaxios: 7.1.4 + gcp-metadata: 8.1.2 + google-logging-utils: 1.1.3 + jws: 4.0.1 + transitivePeerDependencies: + - supports-color + + google-logging-utils@1.1.3: {} + + gopd@1.2.0: {} + + handlebars@4.7.9: + dependencies: + minimist: 1.2.8 + neo-async: 2.6.2 + source-map: 0.6.1 + wordwrap: 1.0.0 + optionalDependencies: + uglify-js: 3.19.3 + + has-symbols@1.1.0: {} + + has-tostringtag@1.0.2: + dependencies: + has-symbols: 1.1.0 + + hasown@2.0.3: + dependencies: + function-bind: 1.1.2 + + homedir-polyfill@1.0.3: + dependencies: + parse-passwd: 1.0.0 + + hono@4.12.15: {} + + html-entities@2.6.0: {} + + http-errors@2.0.1: + dependencies: + depd: 2.0.0 + inherits: 2.0.4 + setprototypeof: 1.2.0 + statuses: 2.0.2 + toidentifier: 1.0.1 + + http-proxy-agent@7.0.2: + dependencies: + agent-base: 7.1.4 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + + https-proxy-agent@7.0.6: + dependencies: + agent-base: 7.1.4 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + + iconv-lite@0.7.2: + dependencies: + safer-buffer: 2.1.2 + + ieee754@1.2.1: {} + + indent-string@5.0.0: {} + + inflight@1.0.6: + dependencies: + once: 1.4.0 + wrappy: 1.0.2 + + inherits@2.0.4: {} + + ini@1.3.8: {} + + ini@5.0.0: {} + + ink-testing-library@4.0.0(@types/react@19.2.14): + optionalDependencies: + '@types/react': 19.2.14 + + ink@7.0.1(@types/react@19.2.14)(react@19.2.5): + dependencies: + '@alcalzone/ansi-tokenize': 0.3.0 + ansi-escapes: 7.3.0 + ansi-styles: 6.2.3 + auto-bind: 5.0.1 + chalk: 5.6.2 + cli-boxes: 4.0.1 + cli-cursor: 4.0.0 + cli-truncate: 6.0.0 + code-excerpt: 4.0.0 + es-toolkit: 1.46.1 + indent-string: 5.0.0 + is-in-ci: 2.0.0 + patch-console: 2.0.0 + react: 19.2.5 + react-reconciler: 0.33.0(react@19.2.5) + scheduler: 0.27.0 + signal-exit: 3.0.7 + slice-ansi: 9.0.0 + stack-utils: 2.0.6 + string-width: 8.2.1 + terminal-size: 4.0.1 + type-fest: 5.6.0 + widest-line: 6.0.0 + wrap-ansi: 10.0.0 + ws: 8.20.0 + yoga-layout: 3.2.1 + optionalDependencies: + '@types/react': 19.2.14 + transitivePeerDependencies: + - bufferutil + - utf-8-validate + + ip-address@10.1.0: {} + + ipaddr.js@1.9.1: {} + + is-docker@2.2.1: {} + + is-docker@3.0.0: {} + + is-fullwidth-code-point@5.1.0: + dependencies: + get-east-asian-width: 1.5.0 + + is-in-ci@2.0.0: {} + + is-inside-container@1.0.0: + dependencies: + is-docker: 3.0.0 + + is-promise@4.0.0: {} + + is-property@1.0.2: {} + + is-stream@2.0.1: {} + + is-wsl@2.2.0: + dependencies: + is-docker: 2.2.1 + + is-wsl@3.1.1: + dependencies: + is-inside-container: 1.0.0 + + isexe@2.0.0: {} + + jose@6.2.2: {} + + js-md4@0.3.2: {} + + json-bigint@1.0.0: + dependencies: + bignumber.js: 9.3.1 + + json-schema-traverse@1.0.0: {} + + json-schema-typed@8.0.2: {} + + json-schema@0.4.0: {} + + jsonwebtoken@9.0.3: + dependencies: + jws: 4.0.1 + lodash.includes: 4.3.0 + lodash.isboolean: 3.0.3 + lodash.isinteger: 4.0.4 + lodash.isnumber: 3.0.3 + lodash.isplainobject: 4.0.6 + lodash.isstring: 4.0.1 + lodash.once: 4.1.1 + ms: 2.1.3 + semver: 7.7.4 + + jwa@2.0.1: + dependencies: + buffer-equal-constant-time: 1.0.1 + ecdsa-sig-formatter: 1.0.11 + safe-buffer: 5.2.1 + + jws@4.0.1: + dependencies: + jwa: 2.0.1 + safe-buffer: 5.2.1 + + kuler@2.0.0: {} + + lightningcss-android-arm64@1.32.0: + optional: true + + lightningcss-darwin-arm64@1.32.0: + optional: true + + lightningcss-darwin-x64@1.32.0: + optional: true + + lightningcss-freebsd-x64@1.32.0: + optional: true + + lightningcss-linux-arm-gnueabihf@1.32.0: + optional: true + + lightningcss-linux-arm64-gnu@1.32.0: + optional: true + + lightningcss-linux-arm64-musl@1.32.0: + optional: true + + lightningcss-linux-x64-gnu@1.32.0: + optional: true + + lightningcss-linux-x64-musl@1.32.0: + optional: true + + lightningcss-win32-arm64-msvc@1.32.0: + optional: true + + lightningcss-win32-x64-msvc@1.32.0: + optional: true + + lightningcss@1.32.0: + dependencies: + detect-libc: 2.1.2 + optionalDependencies: + lightningcss-android-arm64: 1.32.0 + lightningcss-darwin-arm64: 1.32.0 + lightningcss-darwin-x64: 1.32.0 + lightningcss-freebsd-x64: 1.32.0 + lightningcss-linux-arm-gnueabihf: 1.32.0 + lightningcss-linux-arm64-gnu: 1.32.0 + lightningcss-linux-arm64-musl: 1.32.0 + lightningcss-linux-x64-gnu: 1.32.0 + lightningcss-linux-x64-musl: 1.32.0 + lightningcss-win32-arm64-msvc: 1.32.0 + lightningcss-win32-x64-msvc: 1.32.0 + + lodash.includes@4.3.0: {} + + lodash.isboolean@3.0.3: {} + + lodash.isinteger@4.0.4: {} + + lodash.isnumber@3.0.3: {} + + lodash.isplainobject@4.0.6: {} + + lodash.isstring@4.0.1: {} + + lodash.once@4.1.1: {} + + logform@2.7.0: + dependencies: + '@colors/colors': 1.6.0 + '@types/triple-beam': 1.3.5 + fecha: 4.2.3 + ms: 2.1.3 + safe-stable-stringify: 2.5.0 + triple-beam: 1.4.1 + + long@5.3.2: {} + + lookml-parser@7.1.0: + dependencies: + bluebird: 3.7.2 + glob: 7.2.3 + minimist: 1.2.8 + pegjs: 0.10.0 + + lru.min@1.1.4: {} + + magic-string@0.30.21: + dependencies: + '@jridgewell/sourcemap-codec': 1.5.5 + + math-intrinsics@1.1.0: {} + + media-typer@1.1.0: {} + + merge-descriptors@2.0.0: {} + + mime-db@1.52.0: {} + + mime-db@1.54.0: {} + + mime-types@2.1.35: + dependencies: + mime-db: 1.52.0 + + mime-types@3.0.2: + dependencies: + mime-db: 1.54.0 + + mimic-fn@2.1.0: {} + + mimic-response@3.1.0: {} + + minimalistic-assert@1.0.1: {} + + minimatch@10.2.5: + dependencies: + brace-expansion: 5.0.5 + + minimatch@3.1.5: + dependencies: + brace-expansion: 1.1.14 + + minimist@1.2.8: {} + + mkdirp-classic@0.5.3: {} + + moment-timezone@0.5.48: + dependencies: + moment: 2.30.1 + + moment@2.30.1: {} + + ms@2.1.3: {} + + mssql@12.5.0: + dependencies: + '@tediousjs/connection-string': 1.1.0 + commander: 11.1.0 + debug: 4.4.3 + tarn: 3.0.2 + tedious: 19.2.1 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + + mssql@12.5.0(@azure/core-client@1.10.1): + dependencies: + '@tediousjs/connection-string': 1.1.0 + commander: 11.1.0 + debug: 4.4.3 + tarn: 3.0.2 + tedious: 19.2.1(@azure/core-client@1.10.1) + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + + mysql2@3.22.3(@types/node@24.12.2): + dependencies: + '@types/node': 24.12.2 + aws-ssl-profiles: 1.1.2 + denque: 2.1.0 + generate-function: 2.3.1 + iconv-lite: 0.7.2 + long: 5.3.2 + lru.min: 1.1.4 + named-placeholders: 1.1.6 + sql-escaper: 1.3.3 + + named-placeholders@1.1.6: + dependencies: + lru.min: 1.1.4 + + nanoid@3.3.11: {} + + napi-build-utils@2.0.0: {} + + native-duplexpair@1.0.0: {} + + negotiator@1.0.0: {} + + neo-async@2.6.2: {} + + node-abi@3.89.0: + dependencies: + semver: 7.7.4 + + node-domexception@1.0.0: {} + + node-fetch@3.3.2: + dependencies: + data-uri-to-buffer: 4.0.1 + fetch-blob: 3.2.0 + formdata-polyfill: 4.0.10 + + oauth4webapi@3.8.6: {} + + object-assign@4.1.1: {} + + object-inspect@1.13.4: {} + + obug@2.1.1: {} + + on-finished@2.4.1: + dependencies: + ee-first: 1.1.1 + + once@1.4.0: + dependencies: + wrappy: 1.0.2 + + one-time@1.0.0: + dependencies: + fn.name: 1.1.0 + + onetime@5.1.2: + dependencies: + mimic-fn: 2.1.0 + + open@10.2.0: + dependencies: + default-browser: 5.5.0 + define-lazy-prop: 3.0.0 + is-inside-container: 1.0.0 + wsl-utils: 0.1.0 + + open@7.4.2: + dependencies: + is-docker: 2.2.1 + is-wsl: 2.2.0 + + openai@6.35.0(ws@8.20.0)(zod@4.3.6): + optionalDependencies: + ws: 8.20.0 + zod: 4.3.6 + + openai@6.35.0(ws@8.20.0)(zod@4.4.3): + optionalDependencies: + ws: 8.20.0 + zod: 4.4.3 + + openai@6.35.0(zod@4.4.3): + optionalDependencies: + zod: 4.4.3 + + p-limit@7.3.0: + dependencies: + yocto-queue: 1.2.2 + + parse-passwd@1.0.0: {} + + parseurl@1.3.3: {} + + patch-console@2.0.0: {} + + path-expression-matcher@1.5.0: {} + + path-is-absolute@1.0.1: {} + + path-key@3.1.1: {} + + path-to-regexp@8.4.2: {} + + pathe@2.0.3: {} + + pegjs@0.10.0: {} + + pg-cloudflare@1.3.0: + optional: true + + pg-connection-string@2.12.0: {} + + pg-int8@1.0.1: {} + + pg-pool@3.13.0(pg@8.20.0): + dependencies: + pg: 8.20.0 + + pg-protocol@1.13.0: {} + + pg-types@2.2.0: + dependencies: + pg-int8: 1.0.1 + postgres-array: 2.0.0 + postgres-bytea: 1.0.1 + postgres-date: 1.0.7 + postgres-interval: 1.2.0 + + pg@8.20.0: + dependencies: + pg-connection-string: 2.12.0 + pg-pool: 3.13.0(pg@8.20.0) + pg-protocol: 1.13.0 + pg-types: 2.2.0 + pgpass: 1.0.5 + optionalDependencies: + pg-cloudflare: 1.3.0 + + pgpass@1.0.5: + dependencies: + split2: 4.2.0 + + picocolors@1.1.1: {} + + picomatch@4.0.4: {} + + pkce-challenge@5.0.1: {} + + postcss@8.5.12: + dependencies: + nanoid: 3.3.11 + picocolors: 1.1.1 + source-map-js: 1.2.1 + + postgres-array@2.0.0: {} + + postgres-bytea@1.0.1: {} + + postgres-date@1.0.7: {} + + postgres-interval@1.2.0: + dependencies: + xtend: 4.0.2 + + prebuild-install@7.1.3: + dependencies: + detect-libc: 2.1.2 + expand-template: 2.0.3 + github-from-package: 0.0.0 + minimist: 1.2.8 + mkdirp-classic: 0.5.3 + napi-build-utils: 2.0.0 + node-abi: 3.89.0 + pump: 3.0.4 + rc: 1.2.8 + simple-get: 4.0.1 + tar-fs: 2.1.4 + tunnel-agent: 0.6.0 + + process@0.11.10: {} + + proxy-addr@2.0.7: + dependencies: + forwarded: 0.2.0 + ipaddr.js: 1.9.1 + + proxy-from-env@2.1.0: {} + + pump@3.0.4: + dependencies: + end-of-stream: 1.4.5 + once: 1.4.0 + + qs@6.15.1: + dependencies: + side-channel: 1.1.0 + + range-parser@1.2.1: {} + + raw-body@3.0.2: + dependencies: + bytes: 3.1.2 + http-errors: 2.0.1 + iconv-lite: 0.7.2 + unpipe: 1.0.0 + + rc@1.2.8: + dependencies: + deep-extend: 0.6.0 + ini: 1.3.8 + minimist: 1.2.8 + strip-json-comments: 2.0.1 + + react-reconciler@0.33.0(react@19.2.5): + dependencies: + react: 19.2.5 + scheduler: 0.27.0 + + react@19.2.5: {} + + readable-stream@3.6.2: + dependencies: + inherits: 2.0.4 + string_decoder: 1.3.0 + util-deprecate: 1.0.2 + + readable-stream@4.7.0: + dependencies: + abort-controller: 3.0.0 + buffer: 6.0.3 + events: 3.3.0 + process: 0.11.10 + string_decoder: 1.3.0 + + require-from-string@2.0.2: {} + + restore-cursor@4.0.0: + dependencies: + onetime: 5.1.2 + signal-exit: 3.0.7 + + retry-request@8.0.2: + dependencies: + extend: 3.0.2 + teeny-request: 10.1.2 + transitivePeerDependencies: + - supports-color + + rolldown@1.0.0-rc.17: + dependencies: + '@oxc-project/types': 0.127.0 + '@rolldown/pluginutils': 1.0.0-rc.17 + optionalDependencies: + '@rolldown/binding-android-arm64': 1.0.0-rc.17 + '@rolldown/binding-darwin-arm64': 1.0.0-rc.17 + '@rolldown/binding-darwin-x64': 1.0.0-rc.17 + '@rolldown/binding-freebsd-x64': 1.0.0-rc.17 + '@rolldown/binding-linux-arm-gnueabihf': 1.0.0-rc.17 + '@rolldown/binding-linux-arm64-gnu': 1.0.0-rc.17 + '@rolldown/binding-linux-arm64-musl': 1.0.0-rc.17 + '@rolldown/binding-linux-ppc64-gnu': 1.0.0-rc.17 + '@rolldown/binding-linux-s390x-gnu': 1.0.0-rc.17 + '@rolldown/binding-linux-x64-gnu': 1.0.0-rc.17 + '@rolldown/binding-linux-x64-musl': 1.0.0-rc.17 + '@rolldown/binding-openharmony-arm64': 1.0.0-rc.17 + '@rolldown/binding-wasm32-wasi': 1.0.0-rc.17 + '@rolldown/binding-win32-arm64-msvc': 1.0.0-rc.17 + '@rolldown/binding-win32-x64-msvc': 1.0.0-rc.17 + + router@2.2.0: + dependencies: + debug: 4.4.3 + depd: 2.0.0 + is-promise: 4.0.0 + parseurl: 1.3.3 + path-to-regexp: 8.4.2 + transitivePeerDependencies: + - supports-color + + run-applescript@7.1.0: {} + + safe-buffer@5.2.1: {} + + safe-stable-stringify@2.5.0: {} + + safer-buffer@2.1.2: {} + + scheduler@0.27.0: {} + + semver@7.7.4: {} + + send@1.2.1: + dependencies: + debug: 4.4.3 + encodeurl: 2.0.0 + escape-html: 1.0.3 + etag: 1.8.1 + fresh: 2.0.0 + http-errors: 2.0.1 + mime-types: 3.0.2 + ms: 2.1.3 + on-finished: 2.4.1 + range-parser: 1.2.1 + statuses: 2.0.2 + transitivePeerDependencies: + - supports-color + + serve-static@2.2.1: + dependencies: + encodeurl: 2.0.0 + escape-html: 1.0.3 + parseurl: 1.3.3 + send: 1.2.1 + transitivePeerDependencies: + - supports-color + + setprototypeof@1.2.0: {} + + shebang-command@2.0.0: + dependencies: + shebang-regex: 3.0.0 + + shebang-regex@3.0.0: {} + + side-channel-list@1.0.1: + dependencies: + es-errors: 1.3.0 + object-inspect: 1.13.4 + + side-channel-map@1.0.1: + dependencies: + call-bound: 1.0.4 + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + object-inspect: 1.13.4 + + side-channel-weakmap@1.0.2: + dependencies: + call-bound: 1.0.4 + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + object-inspect: 1.13.4 + side-channel-map: 1.0.1 + + side-channel@1.1.0: + dependencies: + es-errors: 1.3.0 + object-inspect: 1.13.4 + side-channel-list: 1.0.1 + side-channel-map: 1.0.1 + side-channel-weakmap: 1.0.2 + + siginfo@2.0.0: {} + + signal-exit@3.0.7: {} + + simple-concat@1.0.1: {} + + simple-get@4.0.1: + dependencies: + decompress-response: 6.0.0 + once: 1.4.0 + simple-concat: 1.0.1 + + simple-git@3.32.2: + dependencies: + '@kwsites/file-exists': 1.1.1 + '@kwsites/promise-deferred': 1.1.1 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + + simple-lru-cache@0.0.2: {} + + sisteransi@1.0.5: {} + + slice-ansi@9.0.0: + dependencies: + ansi-styles: 6.2.3 + is-fullwidth-code-point: 5.1.0 + + snowflake-sdk@2.4.0(asn1.js@5.4.1): + dependencies: + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-s3': 3.1039.0 + '@aws-sdk/client-sts': 3.1039.0 + '@aws-sdk/credential-provider-node': 3.972.38 + '@aws-sdk/ec2-metadata-service': 3.1039.0 + '@azure/identity': 4.13.1 + '@azure/storage-blob': 12.26.0 + '@smithy/node-http-handler': 4.6.1 + '@smithy/protocol-http': 5.3.14 + '@smithy/signature-v4': 5.3.14 + '@techteamer/ocsp': 1.0.1 + asn1.js: 5.4.1 + asn1.js-rfc2560: 5.0.1(asn1.js@5.4.1) + asn1.js-rfc5280: 3.0.0 + axios: 1.15.2 + big-integer: 1.6.52 + bignumber.js: 9.3.1 + browser-request: 0.3.3 + expand-tilde: 2.0.2 + fast-xml-parser: 5.7.2 + fastest-levenshtein: 1.0.16 + generic-pool: 3.9.0 + google-auth-library: 10.6.2 + https-proxy-agent: 7.0.6 + jsonwebtoken: 9.0.3 + mime-types: 2.1.35 + moment: 2.30.1 + moment-timezone: 0.5.48 + oauth4webapi: 3.8.6 + open: 7.4.2 + simple-lru-cache: 0.0.2 + toml: 3.0.0 + uuid: 8.3.2 + winston: 3.19.0 + transitivePeerDependencies: + - aws-crt + - debug + - supports-color + + source-map-js@1.2.1: {} + + source-map@0.6.1: {} + + split2@4.2.0: {} + + sprintf-js@1.1.3: {} + + sql-escaper@1.3.3: {} + + stack-trace@0.0.10: {} + + stack-utils@2.0.6: + dependencies: + escape-string-regexp: 2.0.0 + + stackback@0.0.2: {} + + statuses@2.0.2: {} + + std-env@4.1.0: {} + + stream-events@1.0.5: + dependencies: + stubs: 3.0.0 + + stream-shift@1.0.3: {} + + string-width@8.2.1: + dependencies: + get-east-asian-width: 1.5.0 + strip-ansi: 7.1.2 + + string_decoder@1.3.0: + dependencies: + safe-buffer: 5.2.1 + + strip-ansi@7.1.2: + dependencies: + ansi-regex: 6.2.2 + + strip-json-comments@2.0.1: {} + + strnum@2.2.3: {} + + stubs@3.0.0: {} + + tagged-tag@1.0.0: {} + + tar-fs@2.1.4: + dependencies: + chownr: 1.1.4 + mkdirp-classic: 0.5.3 + pump: 3.0.4 + tar-stream: 2.2.0 + + tar-stream@2.2.0: + dependencies: + bl: 4.1.0 + end-of-stream: 1.4.5 + fs-constants: 1.0.0 + inherits: 2.0.4 + readable-stream: 3.6.2 + + tarn@3.0.2: {} + + tedious@19.2.1: + dependencies: + '@azure/core-auth': 1.10.1 + '@azure/identity': 4.13.1 + '@azure/keyvault-keys': 4.10.0 + '@js-joda/core': 5.7.0 + '@types/node': 24.12.2 + bl: 6.1.6 + iconv-lite: 0.7.2 + js-md4: 0.3.2 + native-duplexpair: 1.0.0 + sprintf-js: 1.1.3 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + + tedious@19.2.1(@azure/core-client@1.10.1): + dependencies: + '@azure/core-auth': 1.10.1 + '@azure/identity': 4.13.1 + '@azure/keyvault-keys': 4.10.0(@azure/core-client@1.10.1) + '@js-joda/core': 5.7.0 + '@types/node': 24.12.2 + bl: 6.1.6 + iconv-lite: 0.7.2 + js-md4: 0.3.2 + native-duplexpair: 1.0.0 + sprintf-js: 1.1.3 + transitivePeerDependencies: + - '@azure/core-client' + - supports-color + + teeny-request@10.1.2: + dependencies: + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.6 + node-fetch: 3.3.2 + stream-events: 1.0.5 + transitivePeerDependencies: + - supports-color + + terminal-size@4.0.1: {} + + text-hex@1.0.0: {} + + tinybench@2.9.0: {} + + tinyexec@1.1.1: {} + + tinyglobby@0.2.16: + dependencies: + fdir: 6.5.0(picomatch@4.0.4) + picomatch: 4.0.4 + + tinyrainbow@3.1.0: {} + + toidentifier@1.0.1: {} + + toml@3.0.0: {} + + triple-beam@1.4.1: {} + + tslib@2.8.1: {} + + tunnel-agent@0.6.0: + dependencies: + safe-buffer: 5.2.1 + + type-fest@5.6.0: + dependencies: + tagged-tag: 1.0.0 + + type-is@2.0.1: + dependencies: + content-type: 1.0.5 + media-typer: 1.1.0 + mime-types: 3.0.2 + + typescript@5.9.3: {} + + uglify-js@3.19.3: + optional: true + + undici-types@7.16.0: {} + + unpipe@1.0.0: {} + + util-deprecate@1.0.2: {} + + uuid@8.3.2: {} + + vary@1.1.2: {} + + vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3): + dependencies: + lightningcss: 1.32.0 + picomatch: 4.0.4 + postcss: 8.5.12 + rolldown: 1.0.0-rc.17 + tinyglobby: 0.2.16 + optionalDependencies: + '@types/node': 24.12.2 + esbuild: 0.27.7 + fsevents: 2.3.3 + yaml: 2.8.3 + + vitest@4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)): + dependencies: + '@vitest/expect': 4.1.5 + '@vitest/mocker': 4.1.5(vite@8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3)) + '@vitest/pretty-format': 4.1.5 + '@vitest/runner': 4.1.5 + '@vitest/snapshot': 4.1.5 + '@vitest/spy': 4.1.5 + '@vitest/utils': 4.1.5 + es-module-lexer: 2.1.0 + expect-type: 1.3.0 + magic-string: 0.30.21 + obug: 2.1.1 + pathe: 2.0.3 + picomatch: 4.0.4 + std-env: 4.1.0 + tinybench: 2.9.0 + tinyexec: 1.1.1 + tinyglobby: 0.2.16 + tinyrainbow: 3.1.0 + vite: 8.0.10(@types/node@24.12.2)(esbuild@0.27.7)(yaml@2.8.3) + why-is-node-running: 2.3.0 + optionalDependencies: + '@opentelemetry/api': 1.9.0 + '@types/node': 24.12.2 + transitivePeerDependencies: + - msw + + web-streams-polyfill@3.3.3: {} + + which@2.0.2: + dependencies: + isexe: 2.0.0 + + why-is-node-running@2.3.0: + dependencies: + siginfo: 2.0.0 + stackback: 0.0.2 + + widest-line@6.0.0: + dependencies: + string-width: 8.2.1 + + winston-transport@4.9.0: + dependencies: + logform: 2.7.0 + readable-stream: 3.6.2 + triple-beam: 1.4.1 + + winston@3.19.0: + dependencies: + '@colors/colors': 1.6.0 + '@dabh/diagnostics': 2.0.8 + async: 3.2.6 + is-stream: 2.0.1 + logform: 2.7.0 + one-time: 1.0.0 + readable-stream: 3.6.2 + safe-stable-stringify: 2.5.0 + stack-trace: 0.0.10 + triple-beam: 1.4.1 + winston-transport: 4.9.0 + + wordwrap@1.0.0: {} + + wrap-ansi@10.0.0: + dependencies: + ansi-styles: 6.2.3 + string-width: 8.2.1 + strip-ansi: 7.1.2 + + wrappy@1.0.2: {} + + ws@8.20.0: {} + + wsl-utils@0.1.0: + dependencies: + is-wsl: 3.1.1 + + xtend@4.0.2: {} + + yaml@2.8.3: {} + + yocto-queue@1.2.2: {} + + yoga-layout@3.2.1: {} + + zod-to-json-schema@3.25.2(zod@4.3.6): + dependencies: + zod: 4.3.6 + + zod-to-json-schema@3.25.2(zod@4.4.3): + dependencies: + zod: 4.4.3 + + zod@4.3.6: {} + + zod@4.4.3: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml new file mode 100644 index 00000000..c3874c84 --- /dev/null +++ b/pnpm-workspace.yaml @@ -0,0 +1,13 @@ +packages: + - "packages/*" + +overrides: + "@types/node": ^24.3.0 + +dedupePeerDependents: false +preferWorkspacePackages: true +injectWorkspacePackages: true +syncInjectedDepsAfterScripts: + - build +shamefullyHoist: false +verifyDepsBeforeRun: install diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..9987127d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,43 @@ +[project] +name = "klo-workspace" +version = "0.0.0" +description = "Workspace root for klo Python packages" +requires-python = ">=3.13" +license = "Apache-2.0" +dependencies = [] + +[project.urls] +Homepage = "https://github.com/kaelio/ktx" +Repository = "https://github.com/kaelio/ktx" +Issues = "https://github.com/kaelio/ktx/issues" + +[dependency-groups] +dev = [ + "pytest>=9.0.2", + "ruff>=0.8.4", +] + +[tool.uv] +required-version = "0.11.11" + +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +explicit = true + +[tool.uv.sources] +torch = { index = "pytorch-cpu" } + +[tool.uv.workspace] +members = [ + "python/klo-sl", + "python/klo-daemon", +] + +[tool.pytest.ini_options] +addopts = ["--import-mode=importlib"] +pythonpath = ["python/klo-sl/tests"] +testpaths = [ + "python/klo-sl/tests", + "python/klo-daemon/tests", +] diff --git a/python/klo-daemon/README.md b/python/klo-daemon/README.md new file mode 100644 index 00000000..f886bde1 --- /dev/null +++ b/python/klo-daemon/README.md @@ -0,0 +1,104 @@ +# klo-daemon + +`klo-daemon` is the portable Python compute package for KLO. + +It supports portable compute in two modes: + +- One-shot commands, used by default by `@klo/context`. +- An explicit HTTP server for long-running local MCP sessions. + +## One-shot semantic query + +```bash +printf '%s\n' '{"sources":[],"query":{"measures":[],"dimensions":[]},"dialect":"postgres"}' \ + | klo-daemon semantic-query +``` + +## One-shot source generation + +Generate semantic-layer sources from schema scan data: + +```bash +printf '%s\n' '{"tables":[{"name":"orders","db":"public","columns":[{"name":"id","type":"integer","primary_key":true}]}],"links":[],"dialect":"postgres"}' \ + | klo-daemon semantic-generate-sources +``` + +## One-shot database introspection + +Introspect a Postgres database schema: + +```bash +printf '%s\n' '{"connection_id":"warehouse","driver":"postgres","url":"postgresql://readonly@example.test/warehouse","schemas":["public"]}' \ + | klo-daemon database-introspect +``` + +## One-shot LookML parsing + +Parse LookML projects into resolved, KSL-ready structures: + +```bash +printf '%s\n' '{"files":[{"path":"views/orders.view.lkml","content":"view: orders { sql_table_name: public.orders ;; measure: order_count { type: count } }"}],"dialect":"postgres"}' \ + | klo-daemon lookml-parse +``` + +## One-shot embeddings + +Compute text embeddings locally: + +```bash +printf '%s\n' '{"text":"hello"}' \ + | klo-daemon embedding-compute +``` + +Compute text embeddings locally in bulk: + +```bash +printf '%s\n' '{"texts":["hello","world"]}' \ + | klo-daemon embedding-compute-bulk +``` + +## One-shot code execution + +Execute Python code with the current in-process boundary: + +```bash +printf '%s\n' '{"code":"result = 1 + 2"}' \ + | klo-daemon code-execute +``` + +## HTTP compute server + +Start the HTTP compute server with code execution disabled: + +```bash +klo-daemon serve-http --host 127.0.0.1 --port 8765 +``` + +Enable HTTP code execution explicitly: + +```bash +klo-daemon serve-http --host 127.0.0.1 --port 8765 --enable-code-execution +``` + +Available HTTP endpoints: + +- `GET /health` +- `POST /database/introspect` +- `POST /embeddings/compute` +- `POST /embeddings/compute-bulk` +- `POST /lookml/parse` +- `POST /semantic-layer/generate-sources` +- `POST /semantic-layer/query` +- `POST /semantic-layer/validate` +- `POST /code/execute` when `--enable-code-execution` is passed + +The HTTP server exposes Postgres database introspection, LookML parsing, local +embedding compute, and semantic-layer compute for source generation, query +compilation, and validation. +Code execution is off by default. When enabled, it runs Python `exec` in the +daemon process with the same in-process boundary as the one-shot +`code-execute` command and does not provide OS-level sandboxing. + +HTTP code execution uses the standalone KLO boundary. It does not forward +caller authorization headers to a host app and does not connect scratchpad or +visualization helpers to host application APIs. diff --git a/python/klo-daemon/pyproject.toml b/python/klo-daemon/pyproject.toml new file mode 100644 index 00000000..10ed78d6 --- /dev/null +++ b/python/klo-daemon/pyproject.toml @@ -0,0 +1,50 @@ +[project] +name = "klo-daemon" +version = "0.1.0" +description = "Portable compute package for KLO semantic-layer operations" +readme = "README.md" +requires-python = ">=3.13" +license = "Apache-2.0" +dependencies = [ + "fastapi>=0.115.0", + "klo-sl", + "lkml>=1.3.7", + "numpy>=2.2.6", + "orjson>=3.11.4", + "pandas>=2.2.3", + "psycopg[binary]>=3.2.0", + "pydantic>=2.9.0", + "requests>=2.32.0", + "sentence-transformers>=5.1.1", + "sqlglot>=26", + "torch>=2.2.0", + "uvicorn[standard]>=0.32.0", +] + +[project.scripts] +klo-daemon = "klo_daemon.__main__:main" + +[project.urls] +Homepage = "https://github.com/kaelio/ktx" +Repository = "https://github.com/kaelio/ktx" +Issues = "https://github.com/kaelio/ktx/issues" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/klo_daemon"] + +[dependency-groups] +dev = [ + "httpx>=0.28.1", + "pytest>=9.0.2", +] + +[tool.uv.sources] +klo-sl = { workspace = true } + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] diff --git a/python/klo-daemon/src/klo_daemon/__init__.py b/python/klo-daemon/src/klo_daemon/__init__.py new file mode 100644 index 00000000..5fa3a0a8 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/__init__.py @@ -0,0 +1,6 @@ +"""Portable compute package for KLO.""" + +PACKAGE_NAME = "klo-daemon" +VERSION = "0.1.0" + +__all__ = ["PACKAGE_NAME", "VERSION"] diff --git a/python/klo-daemon/src/klo_daemon/__main__.py b/python/klo-daemon/src/klo_daemon/__main__.py new file mode 100644 index 00000000..a6bac682 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/__main__.py @@ -0,0 +1,172 @@ +"""Command entry point for one-shot KLO daemon compute operations.""" + +from __future__ import annotations + +import argparse +import json +import sys +from typing import Any + +from pydantic import ValidationError + +from klo_daemon.code_execution import ExecuteCodeRequest, execute_code_response +from klo_daemon.database_introspection import ( + DatabaseIntrospectionRequest, + introspect_database_response, +) +from klo_daemon.embeddings import ( + ComputeEmbeddingBulkRequest, + ComputeEmbeddingRequest, + compute_embedding_bulk_response, + compute_embedding_response, +) +from klo_daemon.lookml import ParseLookMLRequest, parse_lookml_project +from klo_daemon.semantic_layer import ( + SemanticLayerQueryRequest, + ValidateSourcesRequest, + query_semantic_layer, + validate_semantic_layer, +) +from klo_daemon.source_generation import ( + GenerateSourcesRequest, + generate_sources_response, +) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="klo-daemon") + subcommands = parser.add_subparsers(dest="command", required=True) + subcommands.add_parser("semantic-query", help="Compile a semantic-layer query") + subcommands.add_parser("semantic-validate", help="Validate semantic-layer sources") + subcommands.add_parser( + "semantic-generate-sources", + help="Generate semantic-layer sources from schema scan data", + ) + subcommands.add_parser( + "database-introspect", + help="Introspect a Postgres database schema", + ) + subcommands.add_parser( + "lookml-parse", + help="Parse LookML files into KSL-ready structures", + ) + subcommands.add_parser( + "embedding-compute", + help="Compute one local text embedding", + ) + subcommands.add_parser( + "embedding-compute-bulk", + help="Compute local text embeddings in bulk", + ) + subcommands.add_parser( + "code-execute", + help="Execute Python code with the current in-process boundary", + ) + serve_http = subcommands.add_parser( + "serve-http", + help="Run the KLO daemon portable compute HTTP server", + ) + serve_http.add_argument("--host", default="127.0.0.1") + serve_http.add_argument("--port", type=int, default=8765) + serve_http.add_argument( + "--log-level", + default="info", + choices=["critical", "error", "warning", "info", "debug", "trace"], + ) + serve_http.add_argument( + "--enable-code-execution", + action="store_true", + help="Expose POST /code/execute on the HTTP server", + ) + return parser + + +def _read_stdin_json() -> dict[str, Any]: + raw = sys.stdin.read() + parsed = json.loads(raw) + if not isinstance(parsed, dict): + raise ValueError("stdin JSON must be an object") + return parsed + + +def run_http_server( + *, + host: str, + port: int, + log_level: str, + enable_code_execution: bool, +) -> None: + import uvicorn + + from klo_daemon.app import create_app + + uvicorn.run( + create_app(enable_code_execution=enable_code_execution), + host=host, + port=port, + log_level=log_level, + ) + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + + if args.command == "serve-http": + run_http_server( + host=args.host, + port=args.port, + log_level=args.log_level, + enable_code_execution=args.enable_code_execution, + ) + return 0 + + try: + payload = _read_stdin_json() + if args.command == "semantic-query": + response = query_semantic_layer( + SemanticLayerQueryRequest.model_validate(payload) + ) + elif args.command == "semantic-validate": + response = validate_semantic_layer( + ValidateSourcesRequest.model_validate(payload) + ) + elif args.command == "semantic-generate-sources": + response = generate_sources_response( + GenerateSourcesRequest.model_validate(payload) + ) + elif args.command == "database-introspect": + response = introspect_database_response( + DatabaseIntrospectionRequest.model_validate(payload) + ) + elif args.command == "lookml-parse": + response = parse_lookml_project(ParseLookMLRequest.model_validate(payload)) + elif args.command == "embedding-compute": + response = compute_embedding_response( + ComputeEmbeddingRequest.model_validate(payload) + ) + elif args.command == "embedding-compute-bulk": + response = compute_embedding_bulk_response( + ComputeEmbeddingBulkRequest.model_validate(payload) + ) + elif args.command == "code-execute": + response = execute_code_response( + ExecuteCodeRequest.model_validate(payload), + nest_api_url=None, + auth_header=None, + ) + else: + parser.error(f"Unknown command: {args.command}") + return 2 + sys.stdout.write(response.model_dump_json() + "\n") + return 0 + except (json.JSONDecodeError, ValidationError, ValueError) as error: + sys.stderr.write(f"{error}\n") + return 1 + except Exception as error: + sys.stderr.write(f"{type(error).__name__}: {error}\n") + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/python/klo-daemon/src/klo_daemon/app.py b/python/klo-daemon/src/klo_daemon/app.py new file mode 100644 index 00000000..fcf7a298 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/app.py @@ -0,0 +1,228 @@ +"""FastAPI app factory for the KLO daemon semantic compute server.""" + +from __future__ import annotations + +import logging +from collections.abc import Callable +from typing import Any + +from fastapi import FastAPI, HTTPException +from fastapi.responses import Response + +from klo_daemon.code_execution import ( + ExecuteCodeRequest, + ExecuteCodeResponse, + dumps_numpy_json, + execute_code_response, +) +from klo_daemon.database_introspection import ( + DatabaseIntrospectionRequest, + DatabaseIntrospectionResponse, + introspect_database_response, +) +from klo_daemon.embeddings import ( + ComputeEmbeddingBulkRequest, + ComputeEmbeddingBulkResponse, + ComputeEmbeddingRequest, + ComputeEmbeddingResponse, + EmbeddingProvider, + compute_embedding_bulk_response, + compute_embedding_response, +) +from klo_daemon.lookml import ( + ParseLookMLRequest, + ParseLookMLResponse, + parse_lookml_project, +) +from klo_daemon.semantic_layer import ( + SemanticLayerQueryRequest, + SemanticLayerQueryResponse, + ValidateSourcesRequest, + ValidateSourcesResponse, + query_semantic_layer, + validate_semantic_layer, +) +from klo_daemon.source_generation import ( + GenerateSourcesRequest, + GenerateSourcesResponse, + generate_sources_response, +) +from klo_daemon.table_identifier import ( + ParseTableIdentifierBatchRequest, + ParseTableIdentifierBatchResponse, + parse_table_identifier_response, +) + +logger = logging.getLogger(__name__) + + +class NumpyORJSONResponse(Response): + media_type = "application/json" + + def render(self, content: Any) -> bytes: + return dumps_numpy_json(content) + + +def create_app( + *, + embedding_provider: EmbeddingProvider | None = None, + database_introspector: Callable[ + [DatabaseIntrospectionRequest], DatabaseIntrospectionResponse + ] + | None = None, + enable_code_execution: bool = False, +) -> FastAPI: + app = FastAPI( + title="KLO Daemon", + description="Stateless portable compute server for KLO.", + version="0.1.0", + ) + + @app.get("/health") + async def health() -> dict[str, str]: + return {"status": "healthy"} + + @app.post("/database/introspect", response_model=DatabaseIntrospectionResponse) + async def database_introspect( + request: DatabaseIntrospectionRequest, + ) -> DatabaseIntrospectionResponse: + try: + introspector = database_introspector or introspect_database_response + return introspector(request) + except ValueError as error: + logger.warning("Database introspection rejected: %s", error) + raise HTTPException(status_code=400, detail=str(error)) from error + except Exception as error: + logger.exception("Database introspection failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"Database introspection failed: {error}", + ) from error + + @app.post("/embeddings/compute", response_model=ComputeEmbeddingResponse) + async def embedding_compute( + request: ComputeEmbeddingRequest, + ) -> ComputeEmbeddingResponse: + try: + return compute_embedding_response( + request, + provider=embedding_provider, + ) + except ValueError as error: + logger.warning("Embedding compute rejected: %s", error) + raise HTTPException(status_code=400, detail=str(error)) from error + except Exception as error: + logger.exception("Embedding compute failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"Embedding compute failed: {error}", + ) from error + + @app.post( + "/embeddings/compute-bulk", + response_model=ComputeEmbeddingBulkResponse, + ) + async def embedding_compute_bulk( + request: ComputeEmbeddingBulkRequest, + ) -> ComputeEmbeddingBulkResponse: + try: + return compute_embedding_bulk_response( + request, + provider=embedding_provider, + ) + except ValueError as error: + logger.warning("Bulk embedding compute rejected: %s", error) + raise HTTPException(status_code=400, detail=str(error)) from error + except Exception as error: + logger.exception("Bulk embedding compute failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"Bulk embedding compute failed: {error}", + ) from error + + if enable_code_execution: + + @app.post( + "/code/execute", + response_model=ExecuteCodeResponse, + response_class=NumpyORJSONResponse, + ) + async def code_execute(request: ExecuteCodeRequest) -> ExecuteCodeResponse: + try: + return execute_code_response( + request, + nest_api_url=None, + auth_header=None, + ) + except Exception as error: + logger.exception("Code execution failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"Code execution failed: {error}", + ) from error + + @app.post("/lookml/parse", response_model=ParseLookMLResponse) + async def lookml_parse(request: ParseLookMLRequest) -> ParseLookMLResponse: + try: + return parse_lookml_project(request) + except Exception as error: + logger.exception("LookML parsing failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"LookML parsing failed: {error}", + ) from error + + @app.post( + "/sql/parse-table-identifier", + response_model=ParseTableIdentifierBatchResponse, + ) + async def sql_parse_table_identifier( + request: ParseTableIdentifierBatchRequest, + ) -> ParseTableIdentifierBatchResponse: + try: + return parse_table_identifier_response(request) + except Exception as error: + logger.exception("Table identifier parsing failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"Table identifier parsing failed: {error}", + ) from error + + @app.post( + "/semantic-layer/generate-sources", response_model=GenerateSourcesResponse + ) + async def semantic_generate_sources( + request: GenerateSourcesRequest, + ) -> GenerateSourcesResponse: + try: + return generate_sources_response(request) + except Exception as error: + logger.exception("Semantic source generation failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"Semantic source generation failed: {error}", + ) from error + + @app.post("/semantic-layer/query", response_model=SemanticLayerQueryResponse) + async def semantic_query( + request: SemanticLayerQueryRequest, + ) -> SemanticLayerQueryResponse: + try: + return query_semantic_layer(request) + except ValueError as error: + logger.warning("Semantic query rejected: %s", error) + raise HTTPException(status_code=400, detail=str(error)) from error + except Exception as error: + logger.exception("Semantic query failed: %s", error) + raise HTTPException( + status_code=500, + detail=f"Semantic layer query failed: {error}", + ) from error + + @app.post("/semantic-layer/validate", response_model=ValidateSourcesResponse) + async def semantic_validate( + request: ValidateSourcesRequest, + ) -> ValidateSourcesResponse: + return validate_semantic_layer(request) + + return app diff --git a/python/klo-daemon/src/klo_daemon/code_execution.py b/python/klo-daemon/src/klo_daemon/code_execution.py new file mode 100644 index 00000000..f8d1a425 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/code_execution.py @@ -0,0 +1,333 @@ +"""Portable in-process code execution helpers for KLO daemon. + +This module preserves the host application's current Python execution behavior. +It runs code with Python ``exec`` in the current process and does not provide +OS-level sandboxing. +""" + +from __future__ import annotations + +import json +import logging +import re +import sys +from collections.abc import Callable +from io import BytesIO, StringIO +from typing import Any + +import numpy as np +import orjson +import pandas as pd +import requests +from pydantic import BaseModel, Field + +logger = logging.getLogger(__name__) + +VALID_VISUALIZATION_TYPES = ["pie", "bar", "line", "area", "table", "boxplot"] + + +class ExecuteCodeRequest(BaseModel): + """Request schema for executing Python code.""" + + code: str = Field(..., description="Python code to execute") + source_id: str | None = Field( + None, + description="Chat/dashboard ID for scratchpad file access", + ) + message_id: str | None = Field( + None, + description="Message ID for visualization association", + ) + + +class VisualizationSpec(BaseModel): + """Specification for a visualization to be saved by the host application.""" + + type: str = Field(..., description="Type marker, always 'visualization'") + vis_type: str = Field( + ..., + description="Visualization type: pie, bar, line, area, table", + ) + config: dict[str, Any] = Field( + ..., + description="Visualization configuration", + ) + data: list[dict[str, Any]] = Field( + ..., + description="Visualization data", + ) + title: str | None = Field(None, description="Optional title") + + +class ExecuteCodeResponse(BaseModel): + """Response schema for code execution.""" + + formatted_result: str = Field( + ..., + description="Formatted execution result for display", + ) + result: Any | None = Field( + None, + description="The value of the 'result' variable if set", + ) + console_output: str | None = Field( + None, + description="Captured stdout from print statements", + ) + error: str | None = Field(None, description="Error message if execution failed") + message: str | None = Field( + None, + description="Message if no clear result was returned", + ) + visualizations: list[VisualizationSpec] | None = Field( + None, + description="List of visualizations detected in the result", + ) + + +ScratchpadHelpers = tuple[ + Callable[[pd.DataFrame, str | None], str], + Callable[[str], pd.DataFrame], + Callable[[str, dict[str, Any], list[dict[str, Any]]], str], +] + + +def dumps_numpy_json(content: Any) -> bytes: + """Serialize JSON response content with numpy scalar and array support.""" + + return orjson.dumps(content, option=orjson.OPT_SERIALIZE_NUMPY) + + +def _strip_ansi_sequences(text: str) -> str: + ansi_escape = re.compile( + r"\x1b\[[0-9;]*[a-zA-Z]|\x1b\([0-9;]*[a-zA-Z]|\x1b\[[0-9;]*~" + ) + return ansi_escape.sub("", text) + + +def create_scratchpad_helpers( + nest_api_url: str | None, + auth_header: str | None, + source_id: str | None, + message_id: str | None = None, + http_client: Any = requests, +) -> ScratchpadHelpers: + """Create scratchpad and visualization helpers that call host app APIs.""" + + def save_df_to_scratchpad(df: pd.DataFrame, filename: str | None = None) -> str: + if not nest_api_url or not auth_header or not source_id: + raise ValueError( + "nest_api_url, Authorization header, and source_id are required " + "for scratchpad operations" + ) + + data_json = df.to_dict(orient="records") + url = f"{nest_api_url}/private_api/scratchpad/{source_id}/files" + response = http_client.post( + url, + data=dumps_numpy_json( + {"filename": filename, "data": data_json, "format": "json"} + ), + headers={"Authorization": auth_header, "Content-Type": "application/json"}, + timeout=30, + ) + response.raise_for_status() + + saved_filename = response.json()["filename"] + rows, _cols = df.shape + return f"{rows} rows saved to {saved_filename}" + + def read_scratchpad_file(filename: str) -> pd.DataFrame: + if not nest_api_url or not auth_header or not source_id: + raise ValueError( + "nest_api_url, Authorization header, and source_id are required " + "for scratchpad operations" + ) + + url = f"{nest_api_url}/private_api/scratchpad/{source_id}/files/{filename}?format=raw" + response = http_client.get( + url, + headers={"Authorization": auth_header, "Accept": "text/csv"}, + timeout=30, + ) + response.raise_for_status() + + content_type = response.headers.get("content-type", "") + if "text/csv" in content_type: + return pd.read_csv(BytesIO(response.content)) + + data = response.json()["data"] + return pd.DataFrame(data) + + def save_visualization( + vis_type: str, + config: dict[str, Any], + data: list[dict[str, Any]], + ) -> str: + if not nest_api_url or not auth_header or not source_id: + raise ValueError( + "nest_api_url, Authorization header, and source_id are required " + "for visualization operations" + ) + + if not message_id: + raise ValueError("message_id is required for visualization operations") + + if vis_type not in VALID_VISUALIZATION_TYPES: + raise ValueError( + f"Invalid visualization type: {vis_type}. Must be one of {VALID_VISUALIZATION_TYPES}" + ) + + url = f"{nest_api_url}/private_api/visualizations/{source_id}" + payload = { + "visualizationType": vis_type, + "config": config, + "data": data, + "messageId": message_id, + } + + response = http_client.post( + url, + data=dumps_numpy_json(payload), + headers={"Authorization": auth_header, "Content-Type": "application/json"}, + timeout=30, + ) + response.raise_for_status() + + filename = response.json()["filename"] + print(f"Visualization saved: {filename}") + return f"![viz]({filename})" + + return save_df_to_scratchpad, read_scratchpad_file, save_visualization + + +def detect_visualizations(result: Any) -> list[dict[str, Any]]: + """Detect visualization specs in a code execution result value.""" + + visualizations = [] + + if isinstance(result, dict) and result.get("type") == "visualization": + visualizations.append(result) + elif isinstance(result, list): + for item in result: + if isinstance(item, dict) and item.get("type") == "visualization": + visualizations.append(item) + + return visualizations + + +def execute_code( + code: str, + nest_api_url: str | None = None, + auth_header: str | None = None, + source_id: str | None = None, + message_id: str | None = None, + scratchpad_helpers: ScratchpadHelpers | None = None, +) -> dict[str, Any]: + """Execute Python code with the current in-process execution boundary.""" + + logger.info("Starting code execution") + save_df, read_file, save_viz = scratchpad_helpers or create_scratchpad_helpers( + nest_api_url, + auth_header, + source_id, + message_id, + ) + + namespace = { + "pd": pd, + "np": np, + "json": json, + "requests": requests, + "save_df_to_scratchpad": save_df, + "read_scratchpad_file": read_file, + "save_visualization": save_viz, + } + + stdout_capture = StringIO() + original_stdout = sys.stdout + sys.stdout = stdout_capture + console_output = "" + + try: + logger.info("Executing code in current process namespace") + exec(code, namespace) + + console_output = stdout_capture.getvalue() + if "result" in namespace: + logger.info("Code execution complete, 'result' variable found") + result_value = namespace["result"] + visualizations = detect_visualizations(result_value) + + result = {"result": result_value} + if console_output: + result["console_output"] = console_output + if visualizations: + result["visualizations"] = visualizations + + return result + + logger.info("No result variable found") + result = { + "message": "Code executed successfully but no result variable was set" + } + if console_output: + result["console_output"] = console_output + return result + + except Exception as error: + logger.exception("Error executing code: %s", error) + result = {"error": str(error)} + if console_output: + result["console_output"] = console_output + return result + + finally: + sys.stdout = original_stdout + + +def format_execution_result(result: dict[str, Any]) -> str: + """Format execution output for display in host chat responses.""" + + formatted_result = "" + if "console_output" in result: + formatted_result += "=== Console Output ===\n\n" + formatted_result += _strip_ansi_sequences(result["console_output"]) + + if "result" in result: + formatted_result += "\n\n=== Result ===\n\n" + formatted_result += str(result["result"]) + elif "message" in result: + formatted_result += "\n\n=== Message ===\n\n" + formatted_result += result["message"] + elif "error" in result: + formatted_result += "\n\n=== Error ===\n\n" + formatted_result += result["error"] + + return formatted_result + + +def execute_code_response( + request: ExecuteCodeRequest, + *, + nest_api_url: str | None, + auth_header: str | None, +) -> ExecuteCodeResponse: + """Execute a validated request and return the public response model.""" + + result = execute_code( + code=request.code, + nest_api_url=nest_api_url, + auth_header=auth_header, + source_id=request.source_id, + message_id=request.message_id, + ) + + return ExecuteCodeResponse( + formatted_result=format_execution_result(result), + result=result.get("result"), + console_output=result.get("console_output"), + error=result.get("error"), + message=result.get("message"), + visualizations=result.get("visualizations"), + ) diff --git a/python/klo-daemon/src/klo_daemon/database_introspection.py b/python/klo-daemon/src/klo_daemon/database_introspection.py new file mode 100644 index 00000000..abb58e37 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/database_introspection.py @@ -0,0 +1,284 @@ +"""Portable database introspection helpers for KLO daemon.""" + +from __future__ import annotations + +from collections.abc import Callable, Mapping, Sequence +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any + +from pydantic import BaseModel, Field, field_validator + + +TABLES_SQL = """ +select + t.table_catalog, + t.table_schema, + t.table_name, + obj_description(c.oid) as table_comment +from information_schema.tables t +join pg_catalog.pg_namespace n + on n.nspname = t.table_schema +join pg_catalog.pg_class c + on c.relnamespace = n.oid + and c.relname = t.table_name +where t.table_schema = any(%s) + and t.table_type = 'BASE TABLE' +order by t.table_schema, t.table_name +""" + +COLUMNS_SQL = """ +select + current_database() as table_catalog, + n.nspname as table_schema, + c.relname as table_name, + a.attname as column_name, + pg_catalog.format_type(a.atttypid, a.atttypmod) as formatted_type, + not a.attnotnull as is_nullable, + exists ( + select 1 + from pg_catalog.pg_index i + where i.indrelid = c.oid + and i.indisprimary + and a.attnum = any(i.indkey) + ) as is_primary_key, + pg_catalog.col_description(c.oid, a.attnum) as column_comment +from pg_catalog.pg_attribute a +join pg_catalog.pg_class c + on c.oid = a.attrelid +join pg_catalog.pg_namespace n + on n.oid = c.relnamespace +where n.nspname = any(%s) + and c.relkind in ('r', 'p') + and a.attnum > 0 + and not a.attisdropped +order by n.nspname, c.relname, a.attnum +""" + +FOREIGN_KEYS_SQL = """ +select + current_database() as table_catalog, + source_constraint.table_schema, + source_constraint.table_name, + source_key.column_name as from_column, + target_key.table_name as to_table, + target_key.column_name as to_column, + source_constraint.constraint_name +from information_schema.table_constraints source_constraint +join information_schema.key_column_usage source_key + on source_key.constraint_catalog = source_constraint.constraint_catalog + and source_key.constraint_schema = source_constraint.constraint_schema + and source_key.constraint_name = source_constraint.constraint_name +join information_schema.referential_constraints ref_constraint + on ref_constraint.constraint_catalog = source_constraint.constraint_catalog + and ref_constraint.constraint_schema = source_constraint.constraint_schema + and ref_constraint.constraint_name = source_constraint.constraint_name +join information_schema.key_column_usage target_key + on target_key.constraint_catalog = ref_constraint.unique_constraint_catalog + and target_key.constraint_schema = ref_constraint.unique_constraint_schema + and target_key.constraint_name = ref_constraint.unique_constraint_name + and target_key.ordinal_position = source_key.position_in_unique_constraint +where source_constraint.constraint_type = 'FOREIGN KEY' + and source_constraint.table_schema = any(%s) +order by source_constraint.table_schema, source_constraint.table_name, source_constraint.constraint_name, source_key.ordinal_position +""" + + +class LiveDatabaseColumn(BaseModel): + name: str + type: str + nullable: bool = True + primary_key: bool = False + comment: str | None = None + + +class LiveDatabaseForeignKey(BaseModel): + from_column: str + to_table: str + to_column: str + constraint_name: str | None = None + + +class LiveDatabaseTable(BaseModel): + catalog: str | None = None + db: str | None = None + name: str + comment: str | None = None + columns: list[LiveDatabaseColumn] = Field(default_factory=list) + foreign_keys: list[LiveDatabaseForeignKey] = Field(default_factory=list) + + +class DatabaseIntrospectionRequest(BaseModel): + connection_id: str + driver: str = "postgres" + url: str + schemas: list[str] = Field(default_factory=lambda: ["public"]) + statement_timeout_ms: int = Field(default=30_000, ge=1) + connection_timeout_seconds: int = Field(default=5, ge=1) + + @field_validator("schemas") + @classmethod + def _schemas_must_not_be_empty(cls, value: list[str]) -> list[str]: + if not value: + raise ValueError("database introspection requires at least one schema") + return value + + +class DatabaseIntrospectionResponse(BaseModel): + connection_id: str + extracted_at: str + metadata: dict[str, Any] + tables: list[LiveDatabaseTable] + + +@dataclass(frozen=True) +class DatabaseIntrospectionRows: + table_rows: Sequence[Mapping[str, Any]] + column_rows: Sequence[Mapping[str, Any]] + foreign_key_rows: Sequence[Mapping[str, Any]] + + +DatabaseRowsLoader = Callable[[DatabaseIntrospectionRequest], DatabaseIntrospectionRows] +NowProvider = Callable[[], str] + + +def _driver_name(driver: str) -> str: + return driver.strip().lower() + + +def _table_key(catalog: str | None, db: str | None, name: str) -> str: + return f"{catalog or ''}\u0000{db or ''}\u0000{name}" + + +def _optional_string(row: Mapping[str, Any], key: str) -> str | None: + value = row.get(key) + return value if isinstance(value, str) else None + + +def _required_string(row: Mapping[str, Any], key: str) -> str: + value = row.get(key) + if not isinstance(value, str) or not value: + raise ValueError(f"database introspection row is missing string field {key}") + return value + + +def _statement_timeout_config(statement_timeout_ms: int) -> tuple[str, tuple[str]]: + return ( + "SELECT set_config('statement_timeout', %s, true)", + (f"{int(statement_timeout_ms)}ms",), + ) + + +def _load_postgres_rows( + request: DatabaseIntrospectionRequest, +) -> DatabaseIntrospectionRows: + try: + import psycopg + from psycopg.rows import dict_row + except ImportError as error: + raise RuntimeError( + "psycopg is required for Postgres database introspection" + ) from error + + connection = psycopg.connect( + request.url, + connect_timeout=request.connection_timeout_seconds, + application_name="klo-daemon-database-introspection", + row_factory=dict_row, + ) + try: + connection.execute("BEGIN READ ONLY") + try: + connection.execute(*_statement_timeout_config(request.statement_timeout_ms)) + params = (request.schemas,) + table_rows = list(connection.execute(TABLES_SQL, params)) + column_rows = list(connection.execute(COLUMNS_SQL, params)) + foreign_key_rows = list(connection.execute(FOREIGN_KEYS_SQL, params)) + connection.execute("COMMIT") + except Exception: + connection.execute("ROLLBACK") + raise + finally: + connection.close() + + return DatabaseIntrospectionRows( + table_rows=table_rows, + column_rows=column_rows, + foreign_key_rows=foreign_key_rows, + ) + + +def _map_rows_to_tables(rows: DatabaseIntrospectionRows) -> list[LiveDatabaseTable]: + tables: dict[str, LiveDatabaseTable] = {} + + for row in rows.table_rows: + catalog = _optional_string(row, "table_catalog") + db = _required_string(row, "table_schema") + name = _required_string(row, "table_name") + key = _table_key(catalog, db, name) + tables[key] = LiveDatabaseTable( + catalog=catalog, + db=db, + name=name, + comment=_optional_string(row, "table_comment"), + ) + + for row in rows.column_rows: + catalog = _optional_string(row, "table_catalog") + db = _required_string(row, "table_schema") + table_name = _required_string(row, "table_name") + table = tables.get(_table_key(catalog, db, table_name)) + if table is None: + continue + + table.columns.append( + LiveDatabaseColumn( + name=_required_string(row, "column_name"), + type=_required_string(row, "formatted_type"), + nullable=bool(row.get("is_nullable")), + primary_key=bool(row.get("is_primary_key")), + comment=_optional_string(row, "column_comment"), + ) + ) + + for row in rows.foreign_key_rows: + catalog = _optional_string(row, "table_catalog") + db = _required_string(row, "table_schema") + table_name = _required_string(row, "table_name") + table = tables.get(_table_key(catalog, db, table_name)) + if table is None: + continue + + table.foreign_keys.append( + LiveDatabaseForeignKey( + from_column=_required_string(row, "from_column"), + to_table=_required_string(row, "to_table"), + to_column=_required_string(row, "to_column"), + constraint_name=_optional_string(row, "constraint_name"), + ) + ) + + return sorted( + tables.values(), + key=lambda table: _table_key(table.catalog, table.db, table.name), + ) + + +def introspect_database_response( + request: DatabaseIntrospectionRequest, + *, + load_rows: DatabaseRowsLoader | None = None, + now: NowProvider | None = None, +) -> DatabaseIntrospectionResponse: + driver = _driver_name(request.driver) + if driver not in {"postgres", "postgresql"}: + raise ValueError('database introspection supports only driver "postgres"') + + rows = (load_rows or _load_postgres_rows)(request) + timestamp = now() if now else datetime.now(timezone.utc).isoformat() + return DatabaseIntrospectionResponse( + connection_id=request.connection_id, + extracted_at=timestamp, + metadata={"driver": driver, "schemas": list(request.schemas)}, + tables=_map_rows_to_tables(rows), + ) diff --git a/python/klo-daemon/src/klo_daemon/embeddings.py b/python/klo-daemon/src/klo_daemon/embeddings.py new file mode 100644 index 00000000..a105cf11 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/embeddings.py @@ -0,0 +1,172 @@ +"""Portable embedding compute helpers for KLO daemon.""" + +from __future__ import annotations + +import logging +import threading +from typing import TYPE_CHECKING, Protocol + +from pydantic import BaseModel, Field + +if TYPE_CHECKING: + from sentence_transformers import SentenceTransformer + +logger = logging.getLogger(__name__) + +DEFAULT_SENTENCE_TRANSFORMER_MODEL = "all-MiniLM-L6-v2" +DEFAULT_EMBEDDING_DIMENSIONS = 384 +DEFAULT_MAX_BATCH_SIZE = 100 + + +class EmbeddingProvider(Protocol): + """Provider interface for local embedding compute.""" + + @property + def name(self) -> str: ... + + @property + def dimensions(self) -> int: ... + + @property + def max_batch_size(self) -> int: ... + + def encode(self, texts: list[str]) -> list[list[float]]: ... + + +class ComputeEmbeddingRequest(BaseModel): + """Request schema for computing a single embedding.""" + + text: str = Field(..., description="Text to compute embedding for", min_length=1) + + +class ComputeEmbeddingResponse(BaseModel): + """Response schema for single embedding computation.""" + + embedding: list[float] = Field(..., description="384-dimensional embedding vector") + + +class ComputeEmbeddingBulkRequest(BaseModel): + """Request schema for computing multiple embeddings.""" + + texts: list[str] = Field( + ..., + description="List of texts to compute embeddings for", + min_length=1, + max_length=DEFAULT_MAX_BATCH_SIZE, + ) + + +class ComputeEmbeddingBulkResponse(BaseModel): + """Response schema for bulk embedding computation.""" + + embeddings: list[list[float]] = Field( + ..., + description="List of 384-dimensional embedding vectors", + ) + + +class SentenceTransformersEmbeddingProvider: + """Lazy sentence-transformers provider for local embeddings.""" + + def __init__( + self, + model_name: str = DEFAULT_SENTENCE_TRANSFORMER_MODEL, + model: SentenceTransformer | None = None, + ) -> None: + self.model_name = model_name + self._model = model + self._model_lock = threading.Lock() + + @property + def name(self) -> str: + return "sentence-transformers" + + @property + def dimensions(self) -> int: + return DEFAULT_EMBEDDING_DIMENSIONS + + @property + def max_batch_size(self) -> int: + return DEFAULT_MAX_BATCH_SIZE + + def _get_model(self) -> SentenceTransformer: + if self._model is not None: + return self._model + + with self._model_lock: + if self._model is None: + from sentence_transformers import SentenceTransformer + + logger.info("Loading SentenceTransformer model: %s", self.model_name) + self._model = SentenceTransformer(self.model_name) + logger.info("SentenceTransformer model loaded successfully") + + return self._model + + def encode(self, texts: list[str]) -> list[list[float]]: + model = self._get_model() + if len(texts) == 1: + raw_single = model.encode(texts[0]).tolist() + return [[float(value) for value in raw_single]] + + raw_bulk = model.encode(texts).tolist() + return [[float(value) for value in embedding] for embedding in raw_bulk] + + +_default_provider: SentenceTransformersEmbeddingProvider | None = None +_default_provider_lock = threading.Lock() + + +def get_default_embedding_provider() -> SentenceTransformersEmbeddingProvider: + """Return the process-wide default embedding provider.""" + + global _default_provider + + if _default_provider is not None: + return _default_provider + + with _default_provider_lock: + if _default_provider is None: + _default_provider = SentenceTransformersEmbeddingProvider() + + return _default_provider + + +def _validate_texts(texts: list[str], max_batch_size: int) -> None: + if not texts: + raise ValueError("Texts array must not be empty") + if len(texts) > max_batch_size: + raise ValueError(f"Maximum {max_batch_size} texts allowed per batch") + + empty_indices = [ + index for index, text in enumerate(texts) if not text or not text.strip() + ] + if empty_indices: + joined_indices = ", ".join(str(index) for index in empty_indices) + raise ValueError(f"Empty texts found at indices: {joined_indices}") + + +def compute_embedding_response( + request: ComputeEmbeddingRequest, + provider: EmbeddingProvider | None = None, +) -> ComputeEmbeddingResponse: + """Compute one embedding from a request model.""" + + selected_provider = provider or get_default_embedding_provider() + _validate_texts([request.text], selected_provider.max_batch_size) + return ComputeEmbeddingResponse( + embedding=selected_provider.encode([request.text])[0] + ) + + +def compute_embedding_bulk_response( + request: ComputeEmbeddingBulkRequest, + provider: EmbeddingProvider | None = None, +) -> ComputeEmbeddingBulkResponse: + """Compute multiple embeddings from a request model.""" + + selected_provider = provider or get_default_embedding_provider() + _validate_texts(request.texts, selected_provider.max_batch_size) + return ComputeEmbeddingBulkResponse( + embeddings=selected_provider.encode(request.texts) + ) diff --git a/python/klo-daemon/src/klo_daemon/lookml.py b/python/klo-daemon/src/klo_daemon/lookml.py new file mode 100644 index 00000000..cb74a717 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/lookml.py @@ -0,0 +1,1056 @@ +"""Parse LookML projects into resolved, KSL-ready structures. + +Pipeline: parse files, collect constants, substitute constants, resolve +extends/refinements, resolve column references, and build measures/joins. +""" + +from __future__ import annotations + +import logging +import re +from typing import Any, Literal + +import lkml +import sqlglot +from pydantic import BaseModel +from sqlglot import exp + +logger = logging.getLogger(__name__) + +# ── Public models ────────────────────────────────────────────────────── + +CONSTANT_RE = re.compile(r"@\{(\w+)\}") +TABLE_REF_RE = re.compile(r"^\s*\$\{TABLE\}\.(\w+)\s*$") +FIELD_REF_RE = re.compile(r"\$\{(\w+)\}") +VIEW_FIELD_REF_RE = re.compile(r"\$\{(\w+)\.(\w+)\}") +LIQUID_RE = re.compile(r"\{%.*?%\}") +AGGREGATE_FUNC_RE = re.compile( + r"\b(min|max|sum|avg|count|count_distinct|median)\s*\(", re.IGNORECASE +) + +LOOKML_TYPE_MAP: dict[str, str] = { + "string": "string", + "tier": "string", + "zipcode": "string", + "location": "string", + "number": "number", + "yesno": "boolean", + "time": "time", + "date": "time", + "date_time": "time", + "date_raw": "time", + "date_date": "time", + "date_week": "time", + "date_month": "time", + "date_quarter": "time", + "date_year": "time", + "duration": "number", +} + +MEASURE_TYPE_MAP: dict[str, str] = { + "count": "count(*)", + "count_distinct": "count_distinct", + "sum": "sum", + "sum_distinct": "sum", + "average": "avg", + "average_distinct": "avg", + "min": "min", + "max": "max", + "median": "median", +} + + +class LookMLFileInput(BaseModel): + path: str + content: str + + +class ParseLookMLRequest(BaseModel): + files: list[LookMLFileInput] + constant_overrides: dict[str, str] | None = None + dialect: str = "postgres" + + +class SkippedItem(BaseModel): + name: str + item_type: str + reason: str + + +class ParsedColumn(BaseModel): + name: str + lookml_name: str + type: str + role: str + visibility: str + description: str | None = None + is_computed: bool = False + expr: str | None = None + + +class ParsedMeasure(BaseModel): + name: str + expr: str + filter: str | None = None + description: str | None = None + + +class ParsedJoin(BaseModel): + source_view: str + to: str + alias: str | None = None + on: str + relationship: str + + +class ParsedLookMLView(BaseModel): + name: str + source_type: Literal["table", "sql"] + table_ref: str | None = None + sql: str | None = None + grain: list[str] + columns: list[ParsedColumn] + measures: list[ParsedMeasure] + description: str | None = None + skipped_dimensions: list[SkippedItem] = [] + skipped_measures: list[SkippedItem] = [] + + +class ParseLookMLResponse(BaseModel): + views: list[ParsedLookMLView] + joins: list[ParsedJoin] + skipped_views: list[SkippedItem] + warnings: list[str] + + +# ── Internal types ───────────────────────────────────────────────────── + + +class _RawView: + """Mutable intermediate view before resolution.""" + + def __init__(self, name: str, data: dict[str, Any], source_file: str) -> None: + self.name = name + self.source_file = source_file + self.extension_required: bool = data.get("extension") == "required" + self.is_refinement: bool = name.startswith("+") + self.sql_table_name: str | None = data.get("sql_table_name") + self.label: str | None = data.get("label") + self.description: str | None = data.get("description") + + self.extends: list[str] = _flatten_all(data.get("extends__all")) + self.derived_table: dict[str, Any] | None = data.get("derived_table") + + self.dimensions: dict[str, dict[str, Any]] = { + d["name"]: d for d in data.get("dimensions", []) + } + self.dimension_groups: dict[str, dict[str, Any]] = { + dg["name"]: dg for dg in data.get("dimension_groups", []) + } + self.measures: dict[str, dict[str, Any]] = { + m["name"]: m for m in data.get("measures", []) + } + + +# ── Main entry point ────────────────────────────────────────────────── + + +def parse_lookml_project(request: ParseLookMLRequest) -> ParseLookMLResponse: + """Parse and resolve a LookML project into KSL-ready structures.""" + constants = _collect_constants(request.files, request.constant_overrides or {}) + raw_views, raw_explores = _parse_all_files(request.files, constants) + resolved = _resolve_inheritance(raw_views) + views, skipped_views, warnings = _build_parsed_views(resolved, request.dialect) + all_joins = _build_parsed_joins(raw_explores, resolved) + + # Filter out joins that reference skipped views (as source or target) + emitted_view_names = {v.name for v in views} + joins = [ + j + for j in all_joins + if j.source_view in emitted_view_names + and (j.to in emitted_view_names or j.alias) + ] + + return ParseLookMLResponse( + views=views, + joins=joins, + skipped_views=skipped_views, + warnings=warnings, + ) + + +# ── Parsing ──────────────────────────────────────────────────────────── + + +def _collect_constants( + files: list[LookMLFileInput], overrides: dict[str, str] +) -> dict[str, str]: + """Extract constants from manifest.lkml and apply overrides.""" + constants: dict[str, str] = {} + for f in files: + if "manifest" in f.path.lower(): + parsed = lkml.load(f.content) + for c in parsed.get("constants", []): + constants[c["name"]] = c.get("value", "") + constants.update(overrides) + return constants + + +def _substitute_constants(text: str | None, constants: dict[str, str]) -> str | None: + if text is None: + return None + return CONSTANT_RE.sub(lambda m: constants.get(m.group(1), m.group(0)), text) + + +def _substitute_constants_in_view( + view_data: dict[str, Any], constants: dict[str, str] +) -> None: + """In-place constant substitution across all SQL fields in a view dict.""" + for key in ("sql_table_name", "sql"): + if key in view_data: + view_data[key] = _substitute_constants(view_data[key], constants) + + dt = view_data.get("derived_table") + if dt and "sql" in dt: + dt["sql"] = _substitute_constants(dt["sql"], constants) + + for dim in view_data.get("dimensions", []): + if "sql" in dim: + dim["sql"] = _substitute_constants(dim["sql"], constants) + for dg in view_data.get("dimension_groups", []): + if "sql" in dg: + dg["sql"] = _substitute_constants(dg["sql"], constants) + for m in view_data.get("measures", []): + if "sql" in m: + m["sql"] = _substitute_constants(m["sql"], constants) + + +def _parse_all_files( + files: list[LookMLFileInput], constants: dict[str, str] +) -> tuple[dict[str, _RawView], list[dict[str, Any]]]: + """Parse all .lkml files and return raw views + explores.""" + all_views: dict[str, _RawView] = {} + all_explores: list[dict[str, Any]] = [] + + for f in files: + try: + parsed = lkml.load(f.content) + except Exception: + logger.warning("Failed to parse %s, skipping", f.path) + continue + + for view_data in parsed.get("views", []): + _substitute_constants_in_view(view_data, constants) + name = view_data["name"] + rv = _RawView(name, view_data, f.path) + all_views[name] = rv + + for explore_data in parsed.get("explores", []): + # Substitute constants in sql_on for joins + for j in explore_data.get("joins", []): + if "sql_on" in j: + j["sql_on"] = _substitute_constants(j["sql_on"], constants) + all_explores.append(explore_data) + + return all_views, all_explores + + +# ── Inheritance resolution ───────────────────────────────────────────── + + +def _flatten_all(val: Any) -> list[str]: + """Flatten lkml's nested extends__all / filters__all structures.""" + if val is None: + return [] + if isinstance(val, list): + result: list[str] = [] + for item in val: + if isinstance(item, list): + result.extend(_flatten_all(item)) + elif isinstance(item, str): + result.append(item) + return result + return [] + + +def _merge_view(parent: _RawView, child: _RawView) -> None: + """Merge parent fields into child (child takes precedence).""" + # Dimensions: parent first, child overrides + merged_dims = dict(parent.dimensions) + merged_dims.update(child.dimensions) + child.dimensions = merged_dims + + merged_dgs = dict(parent.dimension_groups) + merged_dgs.update(child.dimension_groups) + child.dimension_groups = merged_dgs + + merged_measures = dict(parent.measures) + merged_measures.update(child.measures) + child.measures = merged_measures + + # Inherit sql_table_name and derived_table if child doesn't have them + if child.sql_table_name is None and parent.sql_table_name is not None: + child.sql_table_name = parent.sql_table_name + if child.derived_table is None and parent.derived_table is not None: + child.derived_table = parent.derived_table + if child.description is None and parent.description is not None: + child.description = parent.description + + +def _apply_refinement(target: _RawView, refinement: _RawView) -> None: + """Apply refinement fields to the target view. Metadata-only merge for existing fields.""" + for name, dim in refinement.dimensions.items(): + if name in target.dimensions: + # Merge metadata: label, description, hidden, tags, group_label + for key in ("label", "description", "hidden", "tags", "group_label"): + if key in dim: + target.dimensions[name][key] = dim[key] + else: + target.dimensions[name] = dim + + for name, dg in refinement.dimension_groups.items(): + if name in target.dimension_groups: + for key in ("label", "description", "hidden", "tags", "group_label"): + if key in dg: + target.dimension_groups[name][key] = dg[key] + else: + target.dimension_groups[name] = dg + + for name, m in refinement.measures.items(): + if name in target.measures: + for key in ("label", "description", "hidden", "tags", "group_label"): + if key in m: + target.measures[name][key] = m[key] + else: + target.measures[name] = m + + if refinement.label: + target.label = refinement.label + if refinement.description: + target.description = refinement.description + + +def _resolve_inheritance(raw_views: dict[str, _RawView]) -> dict[str, _RawView]: + """Resolve extends and refinements. Returns only concrete views.""" + # Separate refinements from regular views + refinements: list[_RawView] = [] + views: dict[str, _RawView] = {} + + for name, rv in raw_views.items(): + if rv.is_refinement: + refinements.append(rv) + else: + views[name] = rv + + # Resolve extends (topological order via iterative resolution) + resolved: set[str] = set() + max_passes = len(views) + 1 + for _ in range(max_passes): + progress = False + for name, view in views.items(): + if name in resolved: + continue + if not view.extends: + resolved.add(name) + progress = True + continue + # Check if all parents are resolved + parents_ready = all(p in resolved for p in view.extends) + if parents_ready: + for parent_name in view.extends: + parent = views.get(parent_name) + if parent: + _merge_view(parent, view) + resolved.add(name) + progress = True + if not progress: + break + + # Apply refinements + for ref in refinements: + target_name = ref.name.lstrip("+") + target = views.get(target_name) + if target: + _apply_refinement(target, ref) + else: + logger.warning( + "Refinement target '%s' not found for '%s'", target_name, ref.name + ) + + return views + + +# ── Alias view detection ────────────────────────────────────────────── + + +def _detect_alias_views(resolved: dict[str, _RawView]) -> dict[str, str]: + """Detect views that are aliases of another view (same table_ref, extends parent). + + Returns dict of {alias_name: parent_name}. + """ + # Build table_ref → first view name map (canonical view per table) + table_to_canonical: dict[str, str] = {} + for name, rv in resolved.items(): + if rv.extension_required or rv.is_refinement: + continue + if rv.sql_table_name and not rv.extends: + table_ref = rv.sql_table_name.strip() + if table_ref not in table_to_canonical: + table_to_canonical[table_ref] = name + + # Find views that extend another, share the same table_ref, + # and add no new fields (pure aliases used only for join renaming). + alias_views: dict[str, str] = {} + for name, rv in resolved.items(): + if rv.extension_required or rv.is_refinement: + continue + if rv.extends and rv.sql_table_name: + table_ref = rv.sql_table_name.strip() + canonical = table_to_canonical.get(table_ref) + if canonical and canonical != name and _is_pure_alias(rv, resolved): + alias_views[name] = canonical + + return alias_views + + +def _is_pure_alias(rv: _RawView, resolved: dict[str, _RawView]) -> bool: + """A view is a pure alias if it adds no new fields and does not override any + inherited field's definition. Compare dict values, not just names — a child + that redefines a measure/dimension must not be classified as alias-only.""" + parent_dims: dict[str, dict] = {} + parent_dgs: dict[str, dict] = {} + parent_meas: dict[str, dict] = {} + for parent_name in rv.extends: + parent = resolved.get(parent_name) + if parent: + parent_dims.update(parent.dimensions) + parent_dgs.update(parent.dimension_groups) + parent_meas.update(parent.measures) + + for name, d in rv.dimensions.items(): + if parent_dims.get(name) != d: + return False + for name, d in rv.dimension_groups.items(): + if parent_dgs.get(name) != d: + return False + for name, d in rv.measures.items(): + if parent_meas.get(name) != d: + return False + return True + + +# ── View → ParsedLookMLView conversion ──────────────────────────────── + + +def _build_parsed_views( + resolved: dict[str, _RawView], + dialect: str, +) -> tuple[list[ParsedLookMLView], list[SkippedItem], list[str]]: + views: list[ParsedLookMLView] = [] + skipped: list[SkippedItem] = [] + warnings: list[str] = [] + + # Detect aliased views: views that extend another and share the same table_ref. + # These are used only as join aliases (e.g., customer_nation extends nation). + alias_view_names = _detect_alias_views(resolved) + + for name, rv in resolved.items(): + # Skip abstract base views + if rv.extension_required: + skipped.append( + SkippedItem( + name=name, + item_type="view", + reason="abstract base view (extension: required)", + ) + ) + continue + + # Skip alias-only views (handled via join aliases) + if name in alias_view_names: + skipped.append( + SkippedItem( + name=name, + item_type="view", + reason=f"alias view (same table as '{alias_view_names[name]}', used as join alias)", + ) + ) + continue + + # Determine source type + has_explore_source = rv.derived_table and "explore_source" in rv.derived_table + has_sql_dt = rv.derived_table and "sql" in rv.derived_table + has_table_ref = rv.sql_table_name is not None + + if has_explore_source: + skipped.append( + SkippedItem( + name=name, + item_type="view", + reason="native derived table (explore_source not supported)", + ) + ) + continue + if not has_table_ref and not has_sql_dt: + skipped.append( + SkippedItem( + name=name, + item_type="view", + reason="no sql_table_name or derived_table.sql", + ) + ) + continue + + source_type: Literal["table", "sql"] = "sql" if has_sql_dt else "table" + table_ref = rv.sql_table_name.strip() if rv.sql_table_name else None + raw_sql = rv.derived_table["sql"].strip() if has_sql_dt else None + + # Build field→column lookup for this view + field_to_col = _build_field_column_map(rv) + # For sql sources, extract columns (always parse as postgres — LookML source dialect) + # then transpile to target dialect for the output SQL + sqlglot_columns: dict[str, str] = {} + sql_text = raw_sql + if raw_sql: + sqlglot_columns = _extract_sql_columns(raw_sql, "postgres", warnings, name) + sql_text = _transpile_sql(raw_sql, "postgres", dialect, warnings, name) + + # Build columns + columns: list[ParsedColumn] = [] + skipped_dims: list[SkippedItem] = [] + grain: list[str] = [] + + for dim_name, dim in rv.dimensions.items(): + col = _convert_dimension(dim_name, dim, source_type, field_to_col) + if isinstance(col, SkippedItem): + skipped_dims.append(col) + elif col is not None: + columns.append(col) + if dim.get("primary_key") == "yes": + grain.append(col.name) + + for dg_name, dg in rv.dimension_groups.items(): + col = _convert_dimension_group(dg_name, dg) + if col is not None: + columns.append(col) + + # For sql sources, fill in any columns from sqlglot that aren't already declared + if source_type == "sql" and sqlglot_columns: + existing_names = {c.name for c in columns} + for col_name, col_type in sqlglot_columns.items(): + if col_name not in existing_names: + columns.append( + ParsedColumn( + name=col_name, + lookml_name=col_name, + type=col_type, + role="default", + visibility="public", + ) + ) + + # Build measures + measures: list[ParsedMeasure] = [] + skipped_measures: list[SkippedItem] = [] + + for m_name, m in rv.measures.items(): + result = _convert_measure(m_name, m, field_to_col, rv) + if isinstance(result, SkippedItem): + skipped_measures.append(result) + elif result is not None: + measures.append(result) + + views.append( + ParsedLookMLView( + name=name, + source_type=source_type, + table_ref=table_ref, + sql=sql_text, + grain=grain, + columns=columns, + measures=measures, + description=rv.description or rv.label, + skipped_dimensions=skipped_dims, + skipped_measures=skipped_measures, + ) + ) + + return views, skipped, warnings + + +def _build_field_column_map(rv: _RawView) -> dict[str, str]: + """Build a map from LookML field name → actual DB column name or resolved expression. + + For simple ${TABLE}.col dimensions, maps to the column name. + For computed dimensions, resolves ${TABLE}.col refs inline so measures can reference them. + """ + # First pass: collect direct column references + field_to_col: dict[str, str] = {} + for dim_name, dim in rv.dimensions.items(): + sql = dim.get("sql", "") + match = TABLE_REF_RE.match(sql) + if match: + field_to_col[dim_name] = match.group(1) + + for dg_name, dg in rv.dimension_groups.items(): + sql = dg.get("sql", "") + match = TABLE_REF_RE.match(sql) + if match: + field_to_col[dg_name] = match.group(1) + for tf in dg.get("timeframes", []): + field_to_col[f"{dg_name}_{tf}"] = match.group(1) + + # Second pass: resolve computed dimensions to their SQL with ${TABLE}.col replaced + for dim_name, dim in rv.dimensions.items(): + if dim_name in field_to_col: + continue + sql = dim.get("sql", "") + if sql: + # Replace ${TABLE}.col → col + resolved = re.sub(r"\$\{TABLE\}\.(\w+)", r"\1", sql.strip()) + # Replace ${field_name} with already-resolved column names + resolved = FIELD_REF_RE.sub( + lambda m: field_to_col.get(m.group(1), m.group(1)), resolved + ) + field_to_col[dim_name] = resolved + + return field_to_col + + +def _convert_dimension( + name: str, + dim: dict[str, Any], + source_type: str, + field_to_col: dict[str, str], +) -> ParsedColumn | SkippedItem | None: + """Convert a LookML dimension to a ParsedColumn, or skip it.""" + sql = dim.get("sql", "") + + # Skip Liquid templating + if LIQUID_RE.search(sql): + return SkippedItem( + name=name, item_type="dimension", reason="Liquid templating not supported" + ) + + is_direct = TABLE_REF_RE.match(sql) is not None + + if is_direct: + col_name = field_to_col.get(name, name) + expr = None + else: + # Computed dimension: use LookML dim name, store resolved expression in expr + col_name = name + expr = field_to_col.get(name) + if expr is None: + expr = re.sub(r"\$\{TABLE\}\.(\w+)", r"\1", sql.strip()) + + lookml_type = dim.get("type", "string") + ksl_type = LOOKML_TYPE_MAP.get(lookml_type, "string") + + return ParsedColumn( + name=col_name, + lookml_name=name, + type=ksl_type, + role="default", + visibility="hidden" if dim.get("hidden") == "yes" else "public", + description=dim.get("description") or dim.get("label"), + is_computed=not is_direct, + expr=expr, + ) + + +def _convert_dimension_group( + name: str, + dg: dict[str, Any], +) -> ParsedColumn | None: + """Convert a dimension_group to a single time column.""" + if dg.get("type") != "time": + return None + + sql = dg.get("sql", "") + match = TABLE_REF_RE.match(sql) + col_name = match.group(1) if match else name + + return ParsedColumn( + name=col_name, + lookml_name=name, + type="time", + role="time", + visibility="hidden" if dg.get("hidden") == "yes" else "public", + description=dg.get("description") or dg.get("label"), + ) + + +def _convert_measure( + name: str, + m: dict[str, Any], + field_to_col: dict[str, str], + rv: _RawView, +) -> ParsedMeasure | SkippedItem | None: + """Convert a LookML measure to a ParsedMeasure, or skip it.""" + measure_type = m.get("type", "") + sql = m.get("sql", "") + + # Skip Liquid templating + if sql and LIQUID_RE.search(sql): + return SkippedItem( + name=name, item_type="measure", reason="Liquid templating not supported" + ) + + # Skip cross-view references + if sql and VIEW_FIELD_REF_RE.search(sql): + return SkippedItem( + name=name, item_type="measure", reason="cross-view measure reference" + ) + + # Handle count (no sql needed) + if measure_type == "count" and not sql: + expr = "count(*)" + elif measure_type in MEASURE_TYPE_MAP: + if measure_type == "count": + # count with sql + expr = "count(*)" + else: + func = MEASURE_TYPE_MAP[measure_type] + # Resolve ${field_name} to column name + resolved_col = _resolve_measure_sql(sql, field_to_col) + if resolved_col is None: + return SkippedItem( + name=name, + item_type="measure", + reason=f"could not resolve sql reference: {sql}", + ) + expr = f"{func}({resolved_col})" + elif measure_type == "date": + resolved = _resolve_measure_sql(sql, field_to_col) + if resolved is None: + return SkippedItem( + name=name, + item_type="measure", + reason=f"could not resolve sql reference for date measure: {sql}", + ) + if _sql_contains_aggregate(resolved): + expr = resolved + else: + func = "max" if "max" in name.lower() else "min" + expr = f"{func}({resolved})" + elif measure_type == "number": + resolved = _resolve_derived_measure_sql(sql, field_to_col, rv) + if resolved is None: + return SkippedItem( + name=name, + item_type="measure", + reason="computed measure (type: number) with unresolvable references", + ) + expr = resolved + else: + return SkippedItem( + name=name, + item_type="measure", + reason=f"unsupported measure type: {measure_type}", + ) + + # Handle filters + filter_str = _resolve_measure_filter(m, field_to_col, rv) + + return ParsedMeasure( + name=name, + expr=expr, + filter=filter_str, + description=m.get("description") or m.get("label"), + ) + + +SINGLE_FIELD_REF_RE = re.compile(r"^\s*\$\{(\w+)\}\s*$") + + +def _resolve_measure_sql(sql: str, field_to_col: dict[str, str]) -> str | None: + """Resolve ${field_name} and ${TABLE}.col references in measure SQL to column expressions.""" + if not sql: + return None + sql = sql.strip() + + # Simple ${TABLE}.col → col (exact match, entire string) + table_match = TABLE_REF_RE.match(sql) + if table_match: + return table_match.group(1) + + # Simple ${field_name} → resolved column (exact match, entire string) + field_match = SINGLE_FIELD_REF_RE.match(sql) + if field_match: + field_name = field_match.group(1) + return field_to_col.get(field_name, field_name) + + # Complex expression with ${TABLE}.col or ${field} refs — resolve all inline + resolved = _resolve_field_refs_in_sql(sql, field_to_col) + if resolved != sql: + return resolved + + return None + + +def _sql_contains_aggregate(sql: str) -> bool: + """Check if an SQL expression already contains an aggregate function call.""" + return bool(AGGREGATE_FUNC_RE.search(sql)) + + +def _resolve_derived_measure_sql( + sql: str, + field_to_col: dict[str, str], + rv: _RawView, +) -> str | None: + """Resolve a type:number measure's SQL, replacing ${field} refs with + measure names (for measure refs) or column expressions (for dimension refs). + + Returns the resolved expression, or None if any reference is unresolvable. + """ + if not sql: + return None + + if VIEW_FIELD_REF_RE.search(sql): + return None + + def _replace_ref(match: re.Match[str]) -> str: + field_name = match.group(1) + if field_name == "TABLE": + return match.group(0) + if field_name in rv.measures: + return field_name + if field_name in field_to_col: + return field_to_col[field_name] + return match.group(0) + + resolved = re.sub(r"\$\{TABLE\}\.(\w+)", r"\1", sql.strip()) + resolved = FIELD_REF_RE.sub(_replace_ref, resolved) + + if FIELD_REF_RE.search(resolved): + return None + + return resolved + + +def _resolve_measure_filter( + m: dict[str, Any], + field_to_col: dict[str, str], + rv: _RawView, +) -> str | None: + """Convert LookML measure filters to a KSL filter string.""" + filters_all = m.get("filters__all") + if not filters_all: + return None + + filter_parts: list[str] = [] + for group in _iter_filter_groups(filters_all): + for field_name, value in group.items(): + # Try to resolve the filter field to its underlying SQL + dim = rv.dimensions.get(field_name) + if dim and dim.get("type") == "yesno": + # yesno: the sql IS the boolean expression — resolve ${field} refs in it + dim_sql = _resolve_field_refs_in_sql( + dim.get("sql", "").strip(), field_to_col + ) + if value == "yes": + filter_parts.append(dim_sql) + else: + filter_parts.append(f"NOT ({dim_sql})") + else: + col = field_to_col.get(field_name, field_name) + filter_parts.append(f"{col} = '{value}'") + + return " AND ".join(filter_parts) if filter_parts else None + + +def _resolve_field_refs_in_sql(sql: str, field_to_col: dict[str, str]) -> str: + """Replace ${field_name} and ${TABLE}.col references in SQL with actual column names.""" + # First replace ${TABLE}.col + sql = re.sub(r"\$\{TABLE\}\.(\w+)", r"\1", sql) + # Then replace ${field_name} + sql = FIELD_REF_RE.sub(lambda m: field_to_col.get(m.group(1), m.group(1)), sql) + return sql + + +def _iter_filter_groups(filters_all: Any) -> list[dict[str, str]]: + """Iterate through lkml's filters__all nested structure.""" + result: list[dict[str, str]] = [] + if isinstance(filters_all, list): + for item in filters_all: + if isinstance(item, dict): + result.append(item) + elif isinstance(item, list): + for sub in item: + if isinstance(sub, dict): + result.append(sub) + return result + + +# ── sqlglot: transpile and extract columns from derived table SQL ───── + + +def _transpile_sql( + sql: str, + source_dialect: str, + target_dialect: str, + warnings: list[str], + view_name: str, +) -> str: + """Transpile SQL from source dialect to target dialect using sqlglot.""" + if source_dialect == target_dialect: + return sql + try: + results = sqlglot.transpile(sql, read=source_dialect, write=target_dialect) + return results[0] if results else sql + except Exception as e: + warnings.append( + f"sqlglot transpile failed for view '{view_name}' " + f"({source_dialect} → {target_dialect}): {e}" + ) + return sql + + +def _extract_sql_columns( + sql: str, dialect: str, warnings: list[str], view_name: str +) -> dict[str, str]: + """Extract output column names and inferred types from a SELECT statement.""" + try: + tree = sqlglot.parse_one(sql, read=dialect) + except Exception as e: + warnings.append(f"sqlglot parse failed for view '{view_name}': {e}") + return {} + + columns: dict[str, str] = {} + for expr_node in tree.expressions: + alias = expr_node.alias + if not alias and isinstance(expr_node, exp.Column): + alias = expr_node.name + if alias: + col_type = _infer_column_type(expr_node) + columns[alias.lower()] = col_type + + return columns + + +def _infer_column_type(node: exp.Expression) -> str: + """Infer KSL column type from a sqlglot AST node.""" + # Check if it's an aggregate + inner = node.unalias() if hasattr(node, "unalias") else node + + if isinstance(inner, (exp.Count, exp.Sum, exp.Avg, exp.Min, exp.Max)): + return "number" + if isinstance(inner, exp.Anonymous): + func_name = inner.name.upper() if hasattr(inner, "name") else "" + if func_name in ("COUNT", "SUM", "AVG", "MIN", "MAX", "COUNT_DISTINCT"): + return "number" + + # Check for CASE expressions (usually string or number) + if isinstance(inner, exp.Case): + return "string" + + # Check for date functions + if isinstance( + inner, + ( + exp.DateTrunc, + exp.DateAdd, + exp.DateSub, + exp.CurrentDate, + exp.CurrentTimestamp, + ), + ): + return "time" + + # Default: assume number for aggregates, string otherwise + if inner.find(exp.AggFunc): + return "number" + + return "string" + + +# ── Joins ────────────────────────────────────────────────────────────── + + +def _build_parsed_joins( + raw_explores: list[dict[str, Any]], + resolved_views: dict[str, _RawView], +) -> list[ParsedJoin]: + """Convert explore join definitions to ParsedJoin list.""" + joins: list[ParsedJoin] = [] + seen: set[tuple[str, str, str | None]] = set() # (source, target, alias) + + for explore in raw_explores: + explore_base = explore.get("view_name") or explore.get("name", "") + + for join_data in explore.get("joins", []): + join_name = join_data.get("name", "") + from_view = join_data.get("from") + target_view = from_view or join_name + alias = join_name if from_view else None + relationship = join_data.get("relationship", "many_to_one") + sql_on = join_data.get("sql_on", "") + + # Parse sql_on to extract source view and resolved condition + source_view, on_clause = _parse_sql_on( + sql_on, explore_base, join_name, resolved_views + ) + + key = (source_view, target_view, alias) + if key in seen: + continue + seen.add(key) + + joins.append( + ParsedJoin( + source_view=source_view, + to=target_view, + alias=alias, + on=on_clause, + relationship=relationship, + ) + ) + + return joins + + +def _parse_sql_on( + sql_on: str, + explore_base: str, + join_name: str, + resolved_views: dict[str, _RawView], +) -> tuple[str, str]: + """Parse a sql_on expression to extract the source view and KSL-format on clause. + + Returns (source_view, on_clause). + """ + refs = VIEW_FIELD_REF_RE.findall(sql_on) + if not refs: + return explore_base, sql_on.strip() + + # Resolve each ${view.field} to view.actual_column + def resolve_ref(match: re.Match[str]) -> str: + view_name = match.group(1) + field_name = match.group(2) + rv = resolved_views.get(view_name) + if rv: + # Look up actual column name + dim = rv.dimensions.get(field_name) + if dim: + table_match = TABLE_REF_RE.match(dim.get("sql", "")) + if table_match: + return f"{view_name}.{table_match.group(1)}" + # Check dimension_groups + for dg_name, dg in rv.dimension_groups.items(): + if field_name == dg_name or field_name.startswith(f"{dg_name}_"): + dg_match = TABLE_REF_RE.match(dg.get("sql", "")) + if dg_match: + return f"{view_name}.{dg_match.group(1)}" + return f"{view_name}.{field_name}" + + on_clause = VIEW_FIELD_REF_RE.sub(resolve_ref, sql_on).strip() + + # Determine source view: first referenced view that isn't the join target + source_view = explore_base + for view_name, _ in refs: + if view_name != join_name: + source_view = view_name + break + return source_view, on_clause diff --git a/python/klo-daemon/src/klo_daemon/semantic_layer.py b/python/klo-daemon/src/klo_daemon/semantic_layer.py new file mode 100644 index 00000000..56b25886 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/semantic_layer.py @@ -0,0 +1,136 @@ +"""Semantic-layer compute helpers for the KLO daemon package.""" + +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel, Field +from semantic_layer.duplicate_check import validate_measure_duplicates +from semantic_layer.engine import SemanticEngine +from semantic_layer.models import QueryResult, SourceDefinition + + +class SemanticLayerQueryRequest(BaseModel): + sources: list[dict[str, Any]] + query: dict[str, Any] + dialect: str = "postgres" + + +class SemanticLayerQueryResponse(BaseModel): + sql: str + dialect: str + columns: list[dict[str, Any]] + plan: dict[str, Any] + + +class ValidateSourcesRequest(BaseModel): + sources: list[dict[str, Any]] + dialect: str = "postgres" + recently_touched: list[str] | None = None + + +class ValidateSourcesResponse(BaseModel): + valid: bool + errors: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + per_source_warnings: dict[str, list[str]] = Field(default_factory=dict) + + +def _load_sources(raw_sources: list[dict[str, Any]]) -> dict[str, SourceDefinition]: + sources: dict[str, SourceDefinition] = {} + for raw_source in raw_sources: + source = SourceDefinition(**raw_source) + if source.name in sources: + raise ValueError(f"Duplicate source name '{source.name}'") + sources[source.name] = source + return sources + + +def _validate_duplicate_measure_names(source: SourceDefinition) -> list[str]: + errors: list[str] = [] + seen: set[str] = set() + for measure in source.measures: + if measure.name in seen: + errors.append( + f"Duplicate measure '{measure.name}' on source '{source.name}'" + ) + continue + seen.add(measure.name) + return errors + + +def _response_columns(result: QueryResult) -> list[dict[str, Any]]: + measure_names = { + measure.name: measure.qualified_ref + for measure in result.resolved_plan.measures + if measure.qualified_ref + } + columns: list[dict[str, Any]] = [] + for column in result.columns: + dumped = column.model_dump(mode="json") + if column.provenance.value == "dimension" and column.expr: + dumped["name"] = column.expr + elif column.name in measure_names: + dumped["name"] = measure_names[column.name] + columns.append(dumped) + return columns + + +def query_semantic_layer( + request: SemanticLayerQueryRequest, +) -> SemanticLayerQueryResponse: + sources = _load_sources(request.sources) + engine = SemanticEngine.from_sources(sources, dialect=request.dialect) + result = engine.query(request.query) + return SemanticLayerQueryResponse( + sql=result.sql, + dialect=result.dialect, + columns=_response_columns(result), + plan=result.resolved_plan.model_dump(mode="json"), + ) + + +def validate_semantic_layer(request: ValidateSourcesRequest) -> ValidateSourcesResponse: + errors: list[str] = [] + warnings: list[str] = [] + per_source_warnings: dict[str, list[str]] = {} + sources: dict[str, SourceDefinition] = {} + seen_names: set[str] = set() + + for raw_source in request.sources: + raw_name = raw_source.get("name") if isinstance(raw_source, dict) else None + try: + source = SourceDefinition(**raw_source) + except Exception as error: + label = raw_name or "" + errors.append(f"Source '{label}' failed to parse: {error}") + continue + + if source.name in seen_names: + errors.append(f"Duplicate source name '{source.name}'") + continue + seen_names.add(source.name) + sources[source.name] = source + errors.extend(_validate_duplicate_measure_names(source)) + + if sources: + try: + engine = SemanticEngine.from_sources(sources, dialect=request.dialect) + report = engine.validate( + recently_touched=set(request.recently_touched) + if request.recently_touched + else None + ) + errors.extend(report.errors) + warnings.extend(report.warnings) + per_source_warnings.update(report.per_source_warnings) + errors.extend(validate_measure_duplicates(sources, dialect=request.dialect)) + except Exception as error: + errors.append(f"Validation failed: {error}") + + return ValidateSourcesResponse( + valid=len(errors) == 0, + errors=errors, + warnings=warnings, + per_source_warnings=per_source_warnings, + ) diff --git a/python/klo-daemon/src/klo_daemon/source_generation.py b/python/klo-daemon/src/klo_daemon/source_generation.py new file mode 100644 index 00000000..9a5af5a4 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/source_generation.py @@ -0,0 +1,254 @@ +"""Generate klo-sl YAML source definitions from database schema scan data.""" + +from __future__ import annotations + +import logging +import re +from typing import Any + +from pydantic import BaseModel +from semantic_layer.models import ( + ColumnRole, + JoinDeclaration, + MeasureDefinition, + SourceColumn, + SourceDefinition, +) + +logger = logging.getLogger(__name__) + +_NUMBER_PATTERN = re.compile( + r"int|integer|bigint|smallint|tinyint|numeric|decimal|float|double|real|number|money", + re.IGNORECASE, +) +_TIME_PATTERN = re.compile( + r"timestamp|datetime|date|time(?!stamp)", + re.IGNORECASE, +) +_BOOLEAN_PATTERN = re.compile(r"bool|boolean|bit", re.IGNORECASE) +_ID_PATTERN = re.compile( + r"^id$|_id$|^uuid$|_uuid$|_key$|_pk$|identifier$", + re.IGNORECASE, +) + +_RELATIONSHIP_MAP = { + "MANY_TO_ONE": "many_to_one", + "ONE_TO_MANY": "one_to_many", + "ONE_TO_ONE": "one_to_one", + "many_to_one": "many_to_one", + "one_to_many": "one_to_many", + "one_to_one": "one_to_one", +} + +_RELATIONSHIP_INVERSE = { + "many_to_one": "one_to_many", + "one_to_many": "many_to_one", + "one_to_one": "one_to_one", +} + + +class ColumnInput(BaseModel): + name: str + type: str + primary_key: bool = False + nullable: bool = True + comment: str | None = None + + +class TableInput(BaseModel): + name: str + catalog: str | None = None + db: str | None = None + comment: str | None = None + columns: list[ColumnInput] + + +class LinkInput(BaseModel): + from_table: str + from_column: str + to_table: str + to_column: str + relationship_type: str + + +class GenerateSourcesRequest(BaseModel): + tables: list[TableInput] + links: list[LinkInput] + dialect: str = "postgres" + + +class GenerateSourcesResponse(BaseModel): + sources: list[dict[str, Any]] + source_count: int + + +def _map_column_type(db_type: str) -> str: + if _BOOLEAN_PATTERN.search(db_type): + return "boolean" + if _TIME_PATTERN.search(db_type): + return "time" + if _NUMBER_PATTERN.search(db_type): + return "number" + return "string" + + +def _build_table_ref(table: TableInput) -> str: + parts = [] + if table.catalog: + parts.append(table.catalog) + if table.db: + parts.append(table.db) + parts.append(table.name) + return ".".join(parts) + + +def _generate_measures( + table_name: str, + columns: list[ColumnInput], + pk_columns: list[str], +) -> list[MeasureDefinition]: + measures: list[MeasureDefinition] = [] + + if pk_columns: + pk = pk_columns[0] + measures.append( + MeasureDefinition( + name="record_count", + expr=f"count({pk})", + description=f"Count of {table_name} records", + ) + ) + + for col in columns: + if _map_column_type(col.type) != "number": + continue + if _ID_PATTERN.search(col.name): + continue + measures.append( + MeasureDefinition( + name=f"total_{col.name}", + expr=f"sum({col.name})", + description=f"Sum of {col.name}" + + (f" \u2014 {col.comment}" if col.comment else ""), + ) + ) + measures.append( + MeasureDefinition( + name=f"avg_{col.name}", + expr=f"avg({col.name})", + description=f"Average of {col.name}" + + (f" \u2014 {col.comment}" if col.comment else ""), + ) + ) + + return measures + + +def generate_sources(request: GenerateSourcesRequest) -> list[dict[str, Any]]: + links_by_from: dict[str, list[LinkInput]] = {} + links_by_to: dict[str, list[LinkInput]] = {} + for link in request.links: + links_by_from.setdefault(link.from_table, []).append(link) + links_by_to.setdefault(link.to_table, []).append(link) + + table_names = {table.name for table in request.tables} + sources: list[dict[str, Any]] = [] + + for table in request.tables: + pk_columns = [column.name for column in table.columns if column.primary_key] + grain = ( + pk_columns + if pk_columns + else [table.columns[0].name] + if table.columns + else ["id"] + ) + + sl_columns: list[SourceColumn] = [] + for column in table.columns: + sl_type = _map_column_type(column.type) + role = ColumnRole.TIME if sl_type == "time" else ColumnRole.DEFAULT + sl_columns.append( + SourceColumn( + name=column.name, + type=sl_type, + role=role, + description=column.comment, + ) + ) + + joins: list[JoinDeclaration] = [] + for link in links_by_from.get(table.name, []): + if link.to_table not in table_names: + logger.warning( + "Skipping link from %s.%s to %s.%s: target table not in scan", + link.from_table, + link.from_column, + link.to_table, + link.to_column, + ) + continue + + relationship = _RELATIONSHIP_MAP.get(link.relationship_type, "many_to_one") + joins.append( + JoinDeclaration( + to=link.to_table, + on=f"{link.from_column} = {link.to_table}.{link.to_column}", + relationship=relationship, + ) + ) + + for link in links_by_to.get(table.name, []): + if link.from_table not in table_names: + logger.warning( + "Skipping reverse link from %s.%s to %s.%s: source table not in scan", + link.from_table, + link.from_column, + link.to_table, + link.to_column, + ) + continue + + forward_relationship = _RELATIONSHIP_MAP.get( + link.relationship_type, "many_to_one" + ) + reverse_relationship = _RELATIONSHIP_INVERSE.get( + forward_relationship, "one_to_many" + ) + joins.append( + JoinDeclaration( + to=link.from_table, + on=f"{link.to_column} = {link.from_table}.{link.from_column}", + relationship=reverse_relationship, + ) + ) + + to_counts: dict[str, int] = {} + for join in joins: + to_counts[join.to] = to_counts.get(join.to, 0) + 1 + if any(count > 1 for count in to_counts.values()): + for join in joins: + if to_counts[join.to] > 1: + fk_col = join.on.split(" = ")[0].strip().lower() + join.alias = f"{join.to}_{fk_col}" + + source = SourceDefinition( + name=table.name, + description=table.comment, + table=_build_table_ref(table), + grain=grain, + columns=sl_columns, + joins=joins, + measures=_generate_measures(table.name, table.columns, pk_columns), + ) + sources.append(source.model_dump(exclude_none=True)) + + logger.info("Generated %d klo-sl source definitions", len(sources)) + return sources + + +def generate_sources_response( + request: GenerateSourcesRequest, +) -> GenerateSourcesResponse: + sources = generate_sources(request) + return GenerateSourcesResponse(sources=sources, source_count=len(sources)) diff --git a/python/klo-daemon/src/klo_daemon/table_identifier.py b/python/klo-daemon/src/klo_daemon/table_identifier.py new file mode 100644 index 00000000..748f2dd8 --- /dev/null +++ b/python/klo-daemon/src/klo_daemon/table_identifier.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from dataclasses import asdict +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field +from semantic_layer.table_identifier_parser import ( + ParseTableIdentifierItem as SharedParseTableIdentifierItem, + parse_table_identifier_batch, +) + +ParseTableIdentifierReason = Literal[ + "looker_template_unresolved", + "derived_table_not_supported", + "no_physical_table", + "multiple_table_references", + "unsupported_dialect", + "parse_error", +] + + +class ParseTableIdentifierItem(BaseModel): + key: str + sql_table_name: str + dialect: str + + +class ParseTableIdentifierBatchRequest(BaseModel): + items: list[ParseTableIdentifierItem] + + +class ParsedIdentifier(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + ok: bool + catalog: str | None = None + schema_: str | None = Field(default=None, alias="schema") + name: str | None = None + canonical_table: str | None = None + reason: ParseTableIdentifierReason | None = None + detail: str | None = None + + +class ParseTableIdentifierBatchResponse(BaseModel): + results: dict[str, ParsedIdentifier] + + +def parse_table_identifier_response( + request: ParseTableIdentifierBatchRequest, +) -> ParseTableIdentifierBatchResponse: + shared_results = parse_table_identifier_batch( + [ + SharedParseTableIdentifierItem( + key=item.key, + sql_table_name=item.sql_table_name, + dialect=item.dialect, + ) + for item in request.items + ] + ) + return ParseTableIdentifierBatchResponse( + results={ + key: ParsedIdentifier.model_validate(asdict(value)) + for key, value in shared_results.items() + } + ) diff --git a/python/klo-daemon/tests/test_app.py b/python/klo-daemon/tests/test_app.py new file mode 100644 index 00000000..59c2982e --- /dev/null +++ b/python/klo-daemon/tests/test_app.py @@ -0,0 +1,442 @@ +from __future__ import annotations + +from fastapi.testclient import TestClient + +from klo_daemon.app import create_app +from klo_daemon.database_introspection import ( + DatabaseIntrospectionResponse, + LiveDatabaseColumn, + LiveDatabaseTable, +) + + +ORDERS_SOURCE = { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "amount", "type": "number"}, + ], + "joins": [], + "measures": [{"name": "order_count", "expr": "count(*)"}], +} + +LOOKML_ORDER_VIEW = """ +view: orders { + sql_table_name: public.orders ;; + + dimension: id { + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } + + measure: order_count { + type: count + } +} +""" + + +class FakeEmbeddingProvider: + name = "fake" + dimensions = 3 + max_batch_size = 2 + + def __init__(self) -> None: + self.calls: list[list[str]] = [] + + def encode(self, texts: list[str]) -> list[list[float]]: + self.calls.append(list(texts)) + return [ + [float(len(text)), float(index), 1.0] for index, text in enumerate(texts) + ] + + +def test_health_endpoint_returns_healthy() -> None: + client = TestClient(create_app()) + + response = client.get("/health") + + assert response.status_code == 200 + assert response.json() == {"status": "healthy"} + + +def test_database_introspect_endpoint_returns_snapshot() -> None: + calls = [] + + def fake_introspector(request): + calls.append(request) + return DatabaseIntrospectionResponse( + connection_id=request.connection_id, + extracted_at="2026-04-28T10:00:00+00:00", + metadata={"driver": request.driver, "schemas": request.schemas}, + tables=[ + LiveDatabaseTable( + catalog="warehouse", + db="public", + name="orders", + columns=[ + LiveDatabaseColumn( + name="id", + type="integer", + nullable=False, + primary_key=True, + ) + ], + ) + ], + ) + + client = TestClient(create_app(database_introspector=fake_introspector)) + + response = client.post( + "/database/introspect", + json={ + "connection_id": "warehouse", + "driver": "postgres", + "url": "postgresql://readonly@example.test/warehouse", + "schemas": ["public"], + }, + ) + + assert response.status_code == 200 + assert response.json()["connection_id"] == "warehouse" + assert response.json()["tables"][0]["name"] == "orders" + assert calls[0].connection_id == "warehouse" + + +def test_database_introspect_endpoint_maps_value_error_to_400() -> None: + def fake_introspector(request): + raise ValueError('database introspection supports only driver "postgres"') + + client = TestClient(create_app(database_introspector=fake_introspector)) + + response = client.post( + "/database/introspect", + json={ + "connection_id": "warehouse", + "driver": "snowflake", + "url": "snowflake://example", + }, + ) + + assert response.status_code == 400 + assert response.json() == { + "detail": 'database introspection supports only driver "postgres"' + } + + +def test_embedding_compute_endpoint_returns_embedding() -> None: + provider = FakeEmbeddingProvider() + client = TestClient(create_app(embedding_provider=provider)) + + response = client.post("/embeddings/compute", json={"text": "hello"}) + + assert response.status_code == 200 + assert response.json() == {"embedding": [5.0, 0.0, 1.0]} + assert provider.calls == [["hello"]] + + +def test_embedding_compute_bulk_endpoint_returns_embeddings() -> None: + provider = FakeEmbeddingProvider() + client = TestClient(create_app(embedding_provider=provider)) + + response = client.post( + "/embeddings/compute-bulk", + json={"texts": ["one", "three"]}, + ) + + assert response.status_code == 200 + assert response.json() == {"embeddings": [[3.0, 0.0, 1.0], [5.0, 1.0, 1.0]]} + assert provider.calls == [["one", "three"]] + + +def test_embedding_compute_bulk_endpoint_maps_value_error_to_400() -> None: + provider = FakeEmbeddingProvider() + client = TestClient(create_app(embedding_provider=provider)) + + response = client.post( + "/embeddings/compute-bulk", + json={"texts": ["one", "two", "three"]}, + ) + + assert response.status_code == 400 + assert response.json() == {"detail": "Maximum 2 texts allowed per batch"} + assert provider.calls == [] + + +def test_code_execute_endpoint_is_not_registered_by_default() -> None: + client = TestClient(create_app()) + + response = client.post("/code/execute", json={"code": "result = 7"}) + + assert response.status_code == 404 + + +def test_code_execute_endpoint_returns_result_when_enabled() -> None: + client = TestClient(create_app(enable_code_execution=True)) + + response = client.post( + "/code/execute", + json={"code": 'print("ran")\nresult = {"value": 7}'}, + ) + + assert response.status_code == 200 + body = response.json() + assert body["result"] == {"value": 7} + assert body["console_output"] == "ran\n" + assert body["error"] is None + assert body["message"] is None + assert body["visualizations"] is None + assert "=== Console Output ===" in body["formatted_result"] + assert "=== Result ===" in body["formatted_result"] + + +def test_code_execute_endpoint_serializes_numpy_result_when_enabled() -> None: + client = TestClient(create_app(enable_code_execution=True)) + + response = client.post( + "/code/execute", + json={"code": "import numpy as np\nresult = {'value': np.float64(1.25)}"}, + ) + + assert response.status_code == 200 + body = response.json() + assert body["result"] == {"value": 1.25} + assert body["error"] is None + + +def test_code_execute_endpoint_uses_host_free_boundary_when_enabled() -> None: + client = TestClient(create_app(enable_code_execution=True)) + + response = client.post( + "/code/execute", + json={ + "source_id": "chat_123", + "message_id": "message_456", + "code": ( + "import pandas as pd\n" + "result = save_df_to_scratchpad(pd.DataFrame({'value': [1]}), 'out.json')" + ), + }, + headers={"Authorization": "Bearer should-not-forward"}, + ) + + assert response.status_code == 200 + body = response.json() + assert body["result"] is None + assert ( + body["error"] + == "nest_api_url, Authorization header, and source_id are required for scratchpad operations" + ) + assert "=== Error ===" in body["formatted_result"] + + +def test_sql_parse_table_identifier_endpoint() -> None: + client = TestClient(create_app()) + + response = client.post( + "/sql/parse-table-identifier", + json={ + "items": [ + { + "key": "orders", + "sql_table_name": "public.orders", + "dialect": "postgres", + }, + { + "key": "template", + "sql_table_name": "${orders.SQL_TABLE_NAME}", + "dialect": "postgres", + }, + ] + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["results"]["orders"]["ok"] is True + assert body["results"]["orders"]["schema"] == "public" + assert body["results"]["orders"]["name"] == "orders" + assert body["results"]["template"]["ok"] is False + assert body["results"]["template"]["reason"] == "looker_template_unresolved" + + +def test_semantic_query_endpoint_returns_sql() -> None: + client = TestClient(create_app()) + + response = client.post( + "/semantic-layer/query", + json={ + "sources": [ORDERS_SOURCE], + "dialect": "postgres", + "query": { + "measures": ["orders.order_count"], + "dimensions": ["orders.status"], + }, + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["dialect"] == "postgres" + assert "public.orders" in body["sql"] + assert body["columns"][0]["name"] == "orders.status" + + +def test_semantic_query_endpoint_maps_value_error_to_400() -> None: + client = TestClient(create_app()) + + response = client.post( + "/semantic-layer/query", + json={ + "sources": [ORDERS_SOURCE], + "dialect": "postgres", + "query": { + "measures": ["missing.order_count"], + "dimensions": [], + }, + }, + ) + + assert response.status_code == 400 + assert "missing.order_count" in response.json()["detail"] + + +def test_semantic_validate_endpoint_returns_structured_validation() -> None: + client = TestClient(create_app()) + invalid_source = { + **ORDERS_SOURCE, + "measures": [ + {"name": "revenue", "expr": "sum(amount)"}, + {"name": "revenue", "expr": "sum(amount)"}, + ], + } + + response = client.post( + "/semantic-layer/validate", + json={"sources": [invalid_source], "dialect": "postgres"}, + ) + + assert response.status_code == 200 + body = response.json() + assert body["valid"] is False + assert any("Duplicate measure" in error for error in body["errors"]) + assert body["warnings"] == [] + assert body["per_source_warnings"] == {} + + +def test_semantic_generate_sources_endpoint_returns_sources() -> None: + client = TestClient(create_app()) + + response = client.post( + "/semantic-layer/generate-sources", + json={ + "tables": [ + { + "name": "orders", + "db": "public", + "comment": "Orders table", + "columns": [ + { + "name": "id", + "type": "integer", + "primary_key": True, + "nullable": False, + "comment": "Order ID", + }, + {"name": "customer_id", "type": "integer"}, + { + "name": "amount", + "type": "decimal", + "comment": "Order amount", + }, + ], + }, + { + "name": "customers", + "db": "public", + "columns": [ + {"name": "id", "type": "integer", "primary_key": True}, + {"name": "email", "type": "varchar"}, + ], + }, + ], + "links": [ + { + "from_table": "orders", + "from_column": "customer_id", + "to_table": "customers", + "to_column": "id", + "relationship_type": "MANY_TO_ONE", + } + ], + "dialect": "postgres", + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["source_count"] == 2 + sources = {source["name"]: source for source in body["sources"]} + assert sources["orders"]["table"] == "public.orders" + assert sources["orders"]["description"] == "Orders table" + assert sources["orders"]["grain"] == ["id"] + assert sources["orders"]["joins"] == [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ] + assert [measure["name"] for measure in sources["orders"]["measures"]] == [ + "record_count", + "total_amount", + "avg_amount", + ] + + +def test_lookml_parse_endpoint_returns_resolved_views() -> None: + client = TestClient(create_app()) + + response = client.post( + "/lookml/parse", + json={ + "files": [ + { + "path": "views/orders.view.lkml", + "content": LOOKML_ORDER_VIEW, + } + ], + "dialect": "postgres", + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["joins"] == [] + assert body["skipped_views"] == [] + assert body["warnings"] == [] + assert len(body["views"]) == 1 + view = body["views"][0] + assert view["name"] == "orders" + assert view["source_type"] == "table" + assert view["table_ref"] == "public.orders" + assert view["grain"] == ["id"] + assert [column["name"] for column in view["columns"]] == ["id", "status"] + assert view["measures"] == [ + { + "name": "order_count", + "expr": "count(*)", + "filter": None, + "description": None, + } + ] diff --git a/python/klo-daemon/tests/test_cli.py b/python/klo-daemon/tests/test_cli.py new file mode 100644 index 00000000..44dd76a3 --- /dev/null +++ b/python/klo-daemon/tests/test_cli.py @@ -0,0 +1,426 @@ +from __future__ import annotations + +import io +import json +import os +import subprocess +import sys +from pathlib import Path +from typing import Any + + +ORDERS_SOURCE = { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "amount", "type": "number"}, + ], + "joins": [], + "measures": [{"name": "order_count", "expr": "count(*)"}], +} + + +def run_daemon_command( + command: str, payload: dict[str, object] +) -> subprocess.CompletedProcess[str]: + env = os.environ.copy() + src_path = str(Path(__file__).resolve().parents[1] / "src") + env["PYTHONPATH"] = src_path + os.pathsep + env.get("PYTHONPATH", "") + return subprocess.run( + [sys.executable, "-m", "klo_daemon", command], + input=json.dumps(payload), + text=True, + capture_output=True, + check=False, + env=env, + ) + + +def test_semantic_query_command_reads_stdin_and_writes_json() -> None: + result = run_daemon_command( + "semantic-query", + { + "sources": [ORDERS_SOURCE], + "dialect": "postgres", + "query": { + "measures": ["orders.order_count"], + "dimensions": ["orders.status"], + }, + }, + ) + + assert result.returncode == 0, result.stderr + parsed = json.loads(result.stdout) + assert "public.orders" in parsed["sql"] + assert parsed["columns"][0]["name"] == "orders.status" + + +def test_semantic_validate_command_reads_stdin_and_writes_json() -> None: + result = run_daemon_command( + "semantic-validate", + {"sources": [ORDERS_SOURCE], "dialect": "postgres"}, + ) + + assert result.returncode == 0, result.stderr + parsed = json.loads(result.stdout) + assert parsed == { + "valid": True, + "errors": [], + "warnings": [], + "per_source_warnings": {}, + } + + +def test_command_returns_nonzero_for_invalid_json() -> None: + env = os.environ.copy() + src_path = str(Path(__file__).resolve().parents[1] / "src") + env["PYTHONPATH"] = src_path + os.pathsep + env.get("PYTHONPATH", "") + result = subprocess.run( + [sys.executable, "-m", "klo_daemon", "semantic-query"], + input="{", + text=True, + capture_output=True, + check=False, + env=env, + ) + + assert result.returncode == 1 + assert "Expecting property name enclosed in double quotes" in result.stderr + + +def test_serve_http_command_starts_uvicorn_without_reading_stdin( + monkeypatch, +) -> None: + from klo_daemon import __main__ as daemon_main + + calls: list[dict[str, object]] = [] + + class FailingStdin: + def read(self) -> str: + raise AssertionError("serve-http must not read stdin JSON") + + def fake_run_http_server( + *, + host: str, + port: int, + log_level: str, + enable_code_execution: bool, + ) -> None: + calls.append( + { + "host": host, + "port": port, + "log_level": log_level, + "enable_code_execution": enable_code_execution, + } + ) + + monkeypatch.setattr(sys, "stdin", FailingStdin()) + monkeypatch.setattr(daemon_main, "run_http_server", fake_run_http_server) + + assert ( + daemon_main.main( + [ + "serve-http", + "--host", + "127.0.0.1", + "--port", + "9191", + "--log-level", + "warning", + ] + ) + == 0 + ) + assert calls == [ + { + "host": "127.0.0.1", + "port": 9191, + "log_level": "warning", + "enable_code_execution": False, + } + ] + + +def test_serve_http_command_defaults_to_loopback(monkeypatch) -> None: + from klo_daemon import __main__ as daemon_main + + calls: list[dict[str, object]] = [] + + def fake_run_http_server( + *, + host: str, + port: int, + log_level: str, + enable_code_execution: bool, + ) -> None: + calls.append( + { + "host": host, + "port": port, + "log_level": log_level, + "enable_code_execution": enable_code_execution, + } + ) + + monkeypatch.setattr(daemon_main, "run_http_server", fake_run_http_server) + + assert daemon_main.main(["serve-http"]) == 0 + assert calls == [ + { + "host": "127.0.0.1", + "port": 8765, + "log_level": "info", + "enable_code_execution": False, + } + ] + + +def test_serve_http_command_can_enable_code_execution(monkeypatch) -> None: + from klo_daemon import __main__ as daemon_main + + calls: list[dict[str, object]] = [] + + def fake_run_http_server( + *, + host: str, + port: int, + log_level: str, + enable_code_execution: bool, + ) -> None: + calls.append( + { + "host": host, + "port": port, + "log_level": log_level, + "enable_code_execution": enable_code_execution, + } + ) + + monkeypatch.setattr(daemon_main, "run_http_server", fake_run_http_server) + + assert daemon_main.main(["serve-http", "--enable-code-execution"]) == 0 + assert calls == [ + { + "host": "127.0.0.1", + "port": 8765, + "log_level": "info", + "enable_code_execution": True, + } + ] + + +def test_lookml_parse_command_reads_stdin_and_writes_json() -> None: + result = run_daemon_command( + "lookml-parse", + { + "files": [ + { + "path": "views/orders.view.lkml", + "content": """ +view: orders { + sql_table_name: public.orders ;; + + dimension: id { + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } + + measure: order_count { + type: count + } +} +""", + } + ], + "dialect": "postgres", + }, + ) + + assert result.returncode == 0, result.stderr + parsed = json.loads(result.stdout) + assert parsed["views"][0]["name"] == "orders" + assert parsed["views"][0]["table_ref"] == "public.orders" + assert parsed["views"][0]["measures"][0]["expr"] == "count(*)" + assert parsed["joins"] == [] + assert parsed["skipped_views"] == [] + assert parsed["warnings"] == [] + + +def test_semantic_generate_sources_command_reads_stdin_and_writes_json() -> None: + result = run_daemon_command( + "semantic-generate-sources", + { + "tables": [ + { + "name": "orders", + "db": "public", + "columns": [ + {"name": "id", "type": "integer", "primary_key": True}, + {"name": "amount", "type": "decimal"}, + ], + } + ], + "links": [], + "dialect": "postgres", + }, + ) + + assert result.returncode == 0, result.stderr + parsed = json.loads(result.stdout) + assert parsed["source_count"] == 1 + assert parsed["sources"][0]["name"] == "orders" + assert parsed["sources"][0]["table"] == "public.orders" + assert parsed["sources"][0]["measures"] == [ + { + "name": "record_count", + "expr": "count(id)", + "segments": [], + "description": "Count of orders records", + }, + { + "name": "total_amount", + "expr": "sum(amount)", + "segments": [], + "description": "Sum of amount", + }, + { + "name": "avg_amount", + "expr": "avg(amount)", + "segments": [], + "description": "Average of amount", + }, + ] + + +def test_database_introspect_command_reads_stdin_and_writes_json( + monkeypatch, capsys +) -> None: + from klo_daemon import __main__ as daemon_main + from klo_daemon.database_introspection import ( + DatabaseIntrospectionResponse, + LiveDatabaseColumn, + LiveDatabaseTable, + ) + + def fake_introspect(request): + assert request.connection_id == "warehouse" + assert request.driver == "postgres" + assert request.schemas == ["public"] + return DatabaseIntrospectionResponse( + connection_id="warehouse", + extracted_at="2026-04-28T10:00:00+00:00", + metadata={"driver": "postgres", "schemas": ["public"]}, + tables=[ + LiveDatabaseTable( + catalog="warehouse", + db="public", + name="orders", + columns=[ + LiveDatabaseColumn( + name="id", + type="integer", + nullable=False, + primary_key=True, + ) + ], + ) + ], + ) + + monkeypatch.setattr(daemon_main, "introspect_database_response", fake_introspect) + monkeypatch.setattr( + sys, + "stdin", + io.StringIO( + '{"connection_id":"warehouse","driver":"postgres","url":"postgresql://readonly@example.test/warehouse","schemas":["public"]}' + ), + ) + + assert daemon_main.main(["database-introspect"]) == 0 + captured = capsys.readouterr() + parsed = json.loads(captured.out) + assert parsed["connection_id"] == "warehouse" + assert parsed["metadata"] == {"driver": "postgres", "schemas": ["public"]} + assert parsed["tables"][0]["name"] == "orders" + assert captured.err == "" + + +def test_embedding_compute_command_reads_stdin_and_writes_json( + monkeypatch, capsys +) -> None: + from klo_daemon import __main__ as daemon_main + from klo_daemon.embeddings import ComputeEmbeddingResponse + + def fake_compute(request): + assert request.text == "hello" + return ComputeEmbeddingResponse(embedding=[1.0, 2.0, 3.0]) + + monkeypatch.setattr(daemon_main, "compute_embedding_response", fake_compute) + monkeypatch.setattr(sys, "stdin", io.StringIO('{"text": "hello"}')) + + assert daemon_main.main(["embedding-compute"]) == 0 + captured = capsys.readouterr() + assert json.loads(captured.out) == {"embedding": [1.0, 2.0, 3.0]} + assert captured.err == "" + + +def test_embedding_compute_bulk_command_reads_stdin_and_writes_json( + monkeypatch, capsys +) -> None: + from klo_daemon import __main__ as daemon_main + from klo_daemon.embeddings import ComputeEmbeddingBulkResponse + + def fake_compute(request): + assert request.texts == ["hello", "world"] + return ComputeEmbeddingBulkResponse(embeddings=[[1.0, 2.0], [3.0, 4.0]]) + + monkeypatch.setattr(daemon_main, "compute_embedding_bulk_response", fake_compute) + monkeypatch.setattr(sys, "stdin", io.StringIO('{"texts": ["hello", "world"]}')) + + assert daemon_main.main(["embedding-compute-bulk"]) == 0 + captured = capsys.readouterr() + assert json.loads(captured.out) == {"embeddings": [[1.0, 2.0], [3.0, 4.0]]} + assert captured.err == "" + + +def test_code_execute_command_reads_stdin_and_writes_json(monkeypatch, capsys) -> None: + from klo_daemon import __main__ as daemon_main + from klo_daemon.code_execution import ExecuteCodeResponse + + calls: list[dict[str, Any]] = [] + + def fake_execute(request, *, nest_api_url, auth_header): + calls.append( + { + "request": request, + "nest_api_url": nest_api_url, + "auth_header": auth_header, + } + ) + return ExecuteCodeResponse( + formatted_result="\n\n=== Result ===\n\n7", + result=7, + ) + + monkeypatch.setattr(daemon_main, "execute_code_response", fake_execute) + monkeypatch.setattr(sys, "stdin", io.StringIO('{"code": "result = 7"}')) + + assert daemon_main.main(["code-execute"]) == 0 + captured = capsys.readouterr() + assert json.loads(captured.out) == { + "formatted_result": "\n\n=== Result ===\n\n7", + "result": 7, + "console_output": None, + "error": None, + "message": None, + "visualizations": None, + } + assert captured.err == "" + assert calls[0]["request"].code == "result = 7" + assert calls[0]["nest_api_url"] is None + assert calls[0]["auth_header"] is None diff --git a/python/klo-daemon/tests/test_code_execution.py b/python/klo-daemon/tests/test_code_execution.py new file mode 100644 index 00000000..d50eaa26 --- /dev/null +++ b/python/klo-daemon/tests/test_code_execution.py @@ -0,0 +1,210 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import Any + +import numpy as np +import orjson +import pandas as pd +import pytest + +from klo_daemon.code_execution import ( + ExecuteCodeRequest, + create_scratchpad_helpers, + detect_visualizations, + dumps_numpy_json, + execute_code_response, +) + + +@dataclass +class FakeResponse: + json_payload: dict[str, Any] | None = None + content: bytes = b"" + headers: dict[str, str] | None = None + + def raise_for_status(self) -> None: + return None + + def json(self) -> dict[str, Any]: + return self.json_payload or {} + + +class FakeHttpClient: + def __init__(self) -> None: + self.posts: list[dict[str, Any]] = [] + self.gets: list[dict[str, Any]] = [] + + def post( + self, + url: str, + data: bytes, + headers: dict[str, str], + timeout: int, + ) -> FakeResponse: + self.posts.append( + { + "url": url, + "data": orjson.loads(data), + "headers": headers, + "timeout": timeout, + } + ) + return FakeResponse(json_payload={"filename": "saved.json"}) + + def get( + self, + url: str, + headers: dict[str, str], + timeout: int, + ) -> FakeResponse: + self.gets.append({"url": url, "headers": headers, "timeout": timeout}) + return FakeResponse( + content=b"value,name\n1.25,alpha\n", + headers={"content-type": "text/csv; charset=utf-8"}, + ) + + +def test_execute_code_response_captures_console_result_and_strips_ansi() -> None: + response = execute_code_response( + ExecuteCodeRequest( + code='print("\\x1b[31mhello\\x1b[0m")\nresult = {"value": 3}', + ), + nest_api_url=None, + auth_header=None, + ) + + assert response.result == {"value": 3} + assert response.console_output == "\x1b[31mhello\x1b[0m\n" + assert "=== Console Output ===" in response.formatted_result + assert "hello" in response.formatted_result + assert "\x1b" not in response.formatted_result + assert "=== Result ===" in response.formatted_result + + +def test_execute_code_response_returns_message_when_result_is_absent() -> None: + response = execute_code_response( + ExecuteCodeRequest(code='print("ran")'), + nest_api_url=None, + auth_header=None, + ) + + assert response.result is None + assert ( + response.message == "Code executed successfully but no result variable was set" + ) + assert response.console_output == "ran\n" + assert "=== Message ===" in response.formatted_result + + +def test_execute_code_response_detects_visualization_records() -> None: + response = execute_code_response( + ExecuteCodeRequest( + code="result = " + + json.dumps( + { + "type": "visualization", + "vis_type": "bar", + "config": {"title": "Revenue"}, + "data": [{"month": "Jan", "revenue": 10}], + "title": "Revenue", + } + ), + ), + nest_api_url=None, + auth_header=None, + ) + + assert response.visualizations is not None + assert len(response.visualizations) == 1 + assert response.visualizations[0].vis_type == "bar" + assert response.visualizations[0].title == "Revenue" + + +def test_detect_visualizations_filters_mixed_lists() -> None: + visualizations = detect_visualizations( + [ + {"type": "note", "text": "skip"}, + { + "type": "visualization", + "vis_type": "table", + "config": {"title": "Rows"}, + "data": [{"row": 1}], + }, + ] + ) + + assert visualizations == [ + { + "type": "visualization", + "vis_type": "table", + "config": {"title": "Rows"}, + "data": [{"row": 1}], + } + ] + + +def test_scratchpad_and_visualization_helpers_serialize_numpy_scalars() -> None: + client = FakeHttpClient() + save_df, read_file, save_viz = create_scratchpad_helpers( + nest_api_url="http://nest", + auth_header="Bearer token", + source_id="source_123", + message_id="message_456", + http_client=client, + ) + + df = pd.DataFrame({"value": [np.float64(1.25)]}) + assert save_df(df, filename="df.json") == "1 rows saved to saved.json" + + read_df = read_file("input.csv") + assert read_df.to_dict(orient="records") == [{"value": 1.25, "name": "alpha"}] + + viz_ref = save_viz( + vis_type="bar", + config={"title": "Test", "x": "a", "y": np.float64(2.5)}, + data=[{"a": "row1", "b": np.float64(3.75)}], + ) + assert viz_ref == "![viz](saved.json)" + + assert ( + client.posts[0]["url"] == "http://nest/private_api/scratchpad/source_123/files" + ) + assert client.posts[0]["data"]["data"][0]["value"] == 1.25 + assert ( + client.gets[0]["url"] + == "http://nest/private_api/scratchpad/source_123/files/input.csv?format=raw" + ) + assert client.posts[1]["url"] == "http://nest/private_api/visualizations/source_123" + assert client.posts[1]["data"]["config"]["y"] == 2.5 + assert client.posts[1]["data"]["data"][0]["b"] == 3.75 + + +def test_scratchpad_helpers_require_app_context_only_when_called() -> None: + save_df, read_file, save_viz = create_scratchpad_helpers( + nest_api_url=None, + auth_header=None, + source_id=None, + message_id=None, + ) + + with pytest.raises(ValueError, match="required for scratchpad operations"): + save_df(pd.DataFrame({"value": [1]}), filename="df.json") + + with pytest.raises(ValueError, match="required for scratchpad operations"): + read_file("df.csv") + + with pytest.raises(ValueError, match="required for visualization operations"): + save_viz("bar", {"title": "Chart"}, [{"value": 1}]) + + +def test_dumps_numpy_json_serializes_numpy_values() -> None: + rendered = dumps_numpy_json( + { + "scalar": np.float64(1.5), + "array": np.array([1, 2, 3]), + } + ) + + assert orjson.loads(rendered) == {"scalar": 1.5, "array": [1, 2, 3]} diff --git a/python/klo-daemon/tests/test_database_introspection.py b/python/klo-daemon/tests/test_database_introspection.py new file mode 100644 index 00000000..3d51b698 --- /dev/null +++ b/python/klo-daemon/tests/test_database_introspection.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import pytest + +from klo_daemon.database_introspection import ( + DatabaseIntrospectionRequest, + DatabaseIntrospectionRows, + _statement_timeout_config, + introspect_database_response, +) + + +def test_introspect_database_response_maps_postgres_catalog_rows() -> None: + def fake_load_rows( + request: DatabaseIntrospectionRequest, + ) -> DatabaseIntrospectionRows: + assert request.connection_id == "warehouse" + assert request.driver == "postgres" + assert request.schemas == ["public"] + return DatabaseIntrospectionRows( + table_rows=[ + { + "table_catalog": "warehouse", + "table_schema": "public", + "table_name": "customers", + "table_comment": None, + }, + { + "table_catalog": "warehouse", + "table_schema": "public", + "table_name": "orders", + "table_comment": "Orders table", + }, + ], + column_rows=[ + { + "table_catalog": "warehouse", + "table_schema": "public", + "table_name": "orders", + "column_name": "id", + "formatted_type": "integer", + "is_nullable": False, + "is_primary_key": True, + "column_comment": "Order ID", + }, + { + "table_catalog": "warehouse", + "table_schema": "public", + "table_name": "orders", + "column_name": "customer_id", + "formatted_type": "integer", + "is_nullable": False, + "is_primary_key": False, + "column_comment": None, + }, + { + "table_catalog": "warehouse", + "table_schema": "public", + "table_name": "customers", + "column_name": "id", + "formatted_type": "integer", + "is_nullable": False, + "is_primary_key": True, + "column_comment": None, + }, + ], + foreign_key_rows=[ + { + "table_catalog": "warehouse", + "table_schema": "public", + "table_name": "orders", + "from_column": "customer_id", + "to_table": "customers", + "to_column": "id", + "constraint_name": "orders_customer_id_fkey", + } + ], + ) + + response = introspect_database_response( + DatabaseIntrospectionRequest( + connection_id="warehouse", + driver="postgres", + url="postgresql://readonly@example.test/warehouse", + schemas=["public"], + ), + load_rows=fake_load_rows, + now=lambda: "2026-04-28T10:00:00+00:00", + ) + + assert response.connection_id == "warehouse" + assert response.extracted_at == "2026-04-28T10:00:00+00:00" + assert response.metadata == {"driver": "postgres", "schemas": ["public"]} + assert [table.name for table in response.tables] == ["customers", "orders"] + orders = response.tables[1] + assert orders.model_dump(exclude_none=True) == { + "catalog": "warehouse", + "db": "public", + "name": "orders", + "comment": "Orders table", + "columns": [ + { + "name": "id", + "type": "integer", + "nullable": False, + "primary_key": True, + "comment": "Order ID", + }, + { + "name": "customer_id", + "type": "integer", + "nullable": False, + "primary_key": False, + }, + ], + "foreign_keys": [ + { + "from_column": "customer_id", + "to_table": "customers", + "to_column": "id", + "constraint_name": "orders_customer_id_fkey", + } + ], + } + + +def test_introspect_database_response_rejects_non_postgres_driver() -> None: + with pytest.raises(ValueError, match='supports only driver "postgres"'): + introspect_database_response( + DatabaseIntrospectionRequest( + connection_id="warehouse", + driver="snowflake", + url="snowflake://example", + ), + load_rows=lambda request: DatabaseIntrospectionRows([], [], []), + ) + + +def test_database_introspection_request_rejects_empty_schema_list() -> None: + with pytest.raises(ValueError, match="at least one schema"): + DatabaseIntrospectionRequest( + connection_id="warehouse", + driver="postgres", + url="postgresql://readonly@example.test/warehouse", + schemas=[], + ) + + +def test_statement_timeout_config_uses_parameterized_set_config() -> None: + assert _statement_timeout_config(30_000) == ( + "SELECT set_config('statement_timeout', %s, true)", + ("30000ms",), + ) diff --git a/python/klo-daemon/tests/test_embeddings.py b/python/klo-daemon/tests/test_embeddings.py new file mode 100644 index 00000000..f923c997 --- /dev/null +++ b/python/klo-daemon/tests/test_embeddings.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +import pytest + +from klo_daemon.embeddings import ( + ComputeEmbeddingBulkRequest, + ComputeEmbeddingRequest, + SentenceTransformersEmbeddingProvider, + compute_embedding_bulk_response, + compute_embedding_response, +) + + +class FakeEmbeddingProvider: + name = "fake" + dimensions = 3 + max_batch_size = 2 + + def __init__(self) -> None: + self.calls: list[list[str]] = [] + + def encode(self, texts: list[str]) -> list[list[float]]: + self.calls.append(list(texts)) + return [ + [float(len(text)), float(index), 1.0] for index, text in enumerate(texts) + ] + + +class ArrayLike: + def __init__(self, value: list[float] | list[list[float]]) -> None: + self.value = value + + def tolist(self) -> list[float] | list[list[float]]: + return self.value + + +class FakeSentenceTransformerModel: + def __init__(self) -> None: + self.calls: list[str | list[str]] = [] + + def encode(self, value: str | list[str]) -> ArrayLike: + self.calls.append(value) + if isinstance(value, str): + return ArrayLike([0.1, 0.2, 0.3]) + return ArrayLike( + [[float(index), float(len(text)), 0.5] for index, text in enumerate(value)] + ) + + +def test_compute_embedding_response_uses_injected_provider() -> None: + provider = FakeEmbeddingProvider() + + response = compute_embedding_response( + ComputeEmbeddingRequest(text="hello"), + provider=provider, + ) + + assert response.embedding == [5.0, 0.0, 1.0] + assert provider.calls == [["hello"]] + + +def test_compute_embedding_bulk_response_uses_injected_provider() -> None: + provider = FakeEmbeddingProvider() + + response = compute_embedding_bulk_response( + ComputeEmbeddingBulkRequest(texts=["one", "three"]), + provider=provider, + ) + + assert response.embeddings == [[3.0, 0.0, 1.0], [5.0, 1.0, 1.0]] + assert provider.calls == [["one", "three"]] + + +def test_compute_embedding_bulk_rejects_empty_texts() -> None: + provider = FakeEmbeddingProvider() + + with pytest.raises(ValueError, match="Empty texts found at indices: 1"): + compute_embedding_bulk_response( + ComputeEmbeddingBulkRequest(texts=["valid", " "]), + provider=provider, + ) + + assert provider.calls == [] + + +def test_compute_embedding_bulk_respects_provider_batch_size() -> None: + provider = FakeEmbeddingProvider() + + with pytest.raises(ValueError, match="Maximum 2 texts allowed per batch"): + compute_embedding_bulk_response( + ComputeEmbeddingBulkRequest(texts=["one", "two", "three"]), + provider=provider, + ) + + assert provider.calls == [] + + +def test_sentence_transformers_provider_normalizes_single_and_bulk_outputs() -> None: + model = FakeSentenceTransformerModel() + provider = SentenceTransformersEmbeddingProvider(model=model) + + assert provider.encode(["hello"]) == [[0.1, 0.2, 0.3]] + assert provider.encode(["one", "three"]) == [ + [0.0, 3.0, 0.5], + [1.0, 5.0, 0.5], + ] + assert model.calls == ["hello", ["one", "three"]] diff --git a/python/klo-daemon/tests/test_lookml.py b/python/klo-daemon/tests/test_lookml.py new file mode 100644 index 00000000..cb41664a --- /dev/null +++ b/python/klo-daemon/tests/test_lookml.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +from klo_daemon.lookml import ( + LookMLFileInput, + ParseLookMLRequest, + parse_lookml_project, +) + + +ORDER_VIEW = """ +view: orders { + sql_table_name: public.orders ;; + + dimension: id { + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } + + dimension: user_id { + type: number + sql: ${TABLE}.user_id ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } + + measure: order_count { + type: count + } + + measure: revenue { + type: sum + sql: ${TABLE}.amount ;; + } +} +""" + + +USER_VIEW = """ +view: users { + sql_table_name: public.users ;; + + dimension: id { + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } +} +""" + + +ORDER_MODEL = """ +explore: orders { + join: users { + relationship: many_to_one + sql_on: ${orders.user_id} = ${users.id} ;; + } +} +""" + + +DERIVED_VIEW = """ +view: order_rollup { + derived_table: { + sql: + SELECT status, SUM(amount) AS total_amount + FROM public.orders + GROUP BY status ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } +} +""" + + +def test_parse_lookml_project_returns_views_and_joins() -> None: + response = parse_lookml_project( + ParseLookMLRequest( + files=[ + LookMLFileInput(path="views/orders.view.lkml", content=ORDER_VIEW), + LookMLFileInput(path="views/users.view.lkml", content=USER_VIEW), + LookMLFileInput( + path="models/ecommerce.model.lkml", content=ORDER_MODEL + ), + ], + dialect="postgres", + ) + ) + + views = {view.name: view for view in response.views} + assert sorted(views) == ["orders", "users"] + assert views["orders"].source_type == "table" + assert views["orders"].table_ref == "public.orders" + assert views["orders"].grain == ["id"] + assert [measure.name for measure in views["orders"].measures] == [ + "order_count", + "revenue", + ] + assert views["orders"].measures[0].expr == "count(*)" + assert views["orders"].measures[1].expr == "sum(amount)" + assert response.joins[0].source_view == "orders" + assert response.joins[0].to == "users" + assert response.joins[0].relationship == "many_to_one" + assert response.joins[0].on == "orders.user_id = users.id" + assert response.skipped_views == [] + assert response.warnings == [] + + +def test_parse_lookml_project_extracts_derived_table_columns() -> None: + response = parse_lookml_project( + ParseLookMLRequest( + files=[ + LookMLFileInput( + path="views/order_rollup.view.lkml", content=DERIVED_VIEW + ) + ], + dialect="postgres", + ) + ) + + assert len(response.views) == 1 + view = response.views[0] + assert view.name == "order_rollup" + assert view.source_type == "sql" + assert "SELECT status, SUM(amount) AS total_amount" in (view.sql or "") + assert [column.name for column in view.columns] == ["status", "total_amount"] + assert response.skipped_views == [] + assert response.warnings == [] diff --git a/python/klo-daemon/tests/test_package.py b/python/klo-daemon/tests/test_package.py new file mode 100644 index 00000000..790c59b0 --- /dev/null +++ b/python/klo-daemon/tests/test_package.py @@ -0,0 +1,6 @@ +from klo_daemon import PACKAGE_NAME, VERSION + + +def test_package_metadata() -> None: + assert PACKAGE_NAME == "klo-daemon" + assert VERSION == "0.1.0" diff --git a/python/klo-daemon/tests/test_semantic_layer.py b/python/klo-daemon/tests/test_semantic_layer.py new file mode 100644 index 00000000..cd51e197 --- /dev/null +++ b/python/klo-daemon/tests/test_semantic_layer.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from klo_daemon.semantic_layer import ( + SemanticLayerQueryRequest, + ValidateSourcesRequest, + query_semantic_layer, + validate_semantic_layer, +) + + +ORDERS_SOURCE = { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "amount", "type": "number"}, + ], + "joins": [], + "measures": [ + {"name": "order_count", "expr": "count(*)"}, + {"name": "revenue", "expr": "sum(amount)"}, + ], +} + + +def test_query_semantic_layer_generates_sql_and_plan() -> None: + response = query_semantic_layer( + SemanticLayerQueryRequest( + sources=[ORDERS_SOURCE], + dialect="postgres", + query={ + "measures": ["orders.order_count"], + "dimensions": ["orders.status"], + "limit": 25, + }, + ) + ) + + assert response.dialect == "postgres" + assert "public.orders" in response.sql + assert "orders.status" in response.sql + assert response.columns[0]["name"] == "orders.status" + assert response.columns[1]["name"] == "orders.order_count" + assert response.plan["sources_used"] == ["orders"] + + +def test_validate_semantic_layer_reports_duplicate_measure_names() -> None: + invalid_source = { + **ORDERS_SOURCE, + "measures": [ + {"name": "revenue", "expr": "sum(amount)"}, + {"name": "revenue", "expr": "sum(amount)"}, + ], + } + + response = validate_semantic_layer( + ValidateSourcesRequest(sources=[invalid_source], dialect="postgres") + ) + + assert response.valid is False + assert any("Duplicate measure" in error for error in response.errors) + assert response.warnings == [] diff --git a/python/klo-daemon/tests/test_source_generation.py b/python/klo-daemon/tests/test_source_generation.py new file mode 100644 index 00000000..7f4ed877 --- /dev/null +++ b/python/klo-daemon/tests/test_source_generation.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +from klo_daemon.source_generation import ( + ColumnInput, + GenerateSourcesRequest, + LinkInput, + TableInput, + generate_sources, + generate_sources_response, +) + + +def test_generate_sources_maps_tables_columns_measures_and_joins() -> None: + response = generate_sources_response( + GenerateSourcesRequest( + tables=[ + TableInput( + name="orders", + db="public", + comment="Orders table", + columns=[ + ColumnInput( + name="id", + type="integer", + primary_key=True, + nullable=False, + comment="Order ID", + ), + ColumnInput(name="customer_id", type="integer"), + ColumnInput( + name="amount", type="decimal", comment="Order amount" + ), + ColumnInput(name="created_at", type="timestamp"), + ColumnInput(name="status", type="varchar"), + ], + ), + TableInput( + name="customers", + db="public", + columns=[ + ColumnInput(name="id", type="integer", primary_key=True), + ColumnInput(name="email", type="varchar"), + ], + ), + ], + links=[ + LinkInput( + from_table="orders", + from_column="customer_id", + to_table="customers", + to_column="id", + relationship_type="MANY_TO_ONE", + ) + ], + ) + ) + + assert response.source_count == 2 + sources = {source["name"]: source for source in response.sources} + assert sources["orders"]["description"] == "Orders table" + assert sources["orders"]["table"] == "public.orders" + assert sources["orders"]["grain"] == ["id"] + assert sources["orders"]["columns"] == [ + { + "name": "id", + "type": "number", + "visibility": "public", + "role": "default", + "description": "Order ID", + }, + { + "name": "customer_id", + "type": "number", + "visibility": "public", + "role": "default", + }, + { + "name": "amount", + "type": "number", + "visibility": "public", + "role": "default", + "description": "Order amount", + }, + {"name": "created_at", "type": "time", "visibility": "public", "role": "time"}, + {"name": "status", "type": "string", "visibility": "public", "role": "default"}, + ] + assert sources["orders"]["joins"] == [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ] + assert [measure["name"] for measure in sources["orders"]["measures"]] == [ + "record_count", + "total_amount", + "avg_amount", + ] + assert sources["orders"]["measures"][0]["expr"] == "count(id)" + assert sources["orders"]["measures"][1]["expr"] == "sum(amount)" + assert sources["orders"]["measures"][2]["expr"] == "avg(amount)" + assert sources["customers"]["joins"] == [ + { + "to": "orders", + "on": "id = orders.customer_id", + "relationship": "one_to_many", + } + ] + + +def test_generate_sources_aliases_multiple_joins_to_same_table() -> None: + sources = generate_sources( + GenerateSourcesRequest( + tables=[ + TableInput( + name="orders", + columns=[ + ColumnInput(name="id", type="integer", primary_key=True), + ColumnInput(name="buyer_id", type="integer"), + ColumnInput(name="seller_id", type="integer"), + ], + ), + TableInput( + name="users", + columns=[ColumnInput(name="id", type="integer", primary_key=True)], + ), + ], + links=[ + LinkInput( + from_table="orders", + from_column="buyer_id", + to_table="users", + to_column="id", + relationship_type="many_to_one", + ), + LinkInput( + from_table="orders", + from_column="seller_id", + to_table="users", + to_column="id", + relationship_type="many_to_one", + ), + ], + ) + ) + + orders = next(source for source in sources if source["name"] == "orders") + assert orders["joins"] == [ + { + "to": "users", + "on": "buyer_id = users.id", + "relationship": "many_to_one", + "alias": "users_buyer_id", + }, + { + "to": "users", + "on": "seller_id = users.id", + "relationship": "many_to_one", + "alias": "users_seller_id", + }, + ] diff --git a/python/klo-sl/AGENTS.md b/python/klo-sl/AGENTS.md new file mode 100644 index 00000000..591ed9da --- /dev/null +++ b/python/klo-sl/AGENTS.md @@ -0,0 +1,161 @@ +# Semantic Layer Engine + +Python semantic layer that generates SQL from structured JSON queries. No `from` clause — sources are inferred from fully-qualified field names (`source.column`). + +## Quick Start + +```bash +uv run pytest -q # run all tests +uv run python -m semantic_layer.cli --help +``` + +## Testing Corner Cases via CLI + +Use `--model` to pass a self-contained YAML model (list of source definitions) instead of a directory. This lets you test any join topology or edge case without creating files. + +### 1. Create an inline model file + +```yaml +# /tmp/model.yaml — a YAML list of source definitions +- name: orders + table: public.orders + grain: [id] + columns: + - {name: id, type: number} + - {name: amount, type: number} + - {name: status, type: string} + joins: + - to: customers + "on": "customer_id = customers.id" + relationship: many_to_one + measures: + - {name: revenue, expr: "sum(amount)", filter: "status != 'refunded'"} + +- name: customers + table: public.customers + grain: [id] + columns: + - {name: id, type: number} + - {name: segment, type: string} +``` + +### 2. Run queries against it + +```bash +# Basic query +uv run python -m semantic_layer.cli --model /tmp/model.yaml \ + -q '{"measures":["sum(orders.amount)"],"dimensions":["customers.segment"]}' + +# Pre-defined measure + filter +uv run python -m semantic_layer.cli --model /tmp/model.yaml \ + -q '{"measures":["orders.revenue"],"dimensions":["orders.status"],"filters":["orders.status != '"'"'cancelled'"'"'"]}' + +# Show resolved plan alongside SQL +uv run python -m semantic_layer.cli --model /tmp/model.yaml \ + -q '{"measures":["orders.revenue"],"dimensions":["customers.segment"]}' --plan + +# Validate without generating SQL +uv run python -m semantic_layer.cli --model /tmp/model.yaml \ + -q '{"measures":["orders.revenue"],"dimensions":["customers.segment"]}' --suggest +``` + +### 3. Test fan-out / chasm traps + +Add multiple measure sources that fan out from a shared dimension hub: + +```yaml +# Two independent fact tables joining to the same dimension +- name: hub + table: public.hub + grain: [id] + columns: [{name: id, type: number}, {name: segment, type: string}] + +- name: fact_a + table: public.fact_a + grain: [id] + columns: [{name: id, type: number}, {name: hub_id, type: number}, {name: val, type: number}] + joins: [{to: hub, "on": "hub_id = hub.id", relationship: many_to_one}] + +- name: fact_b + table: public.fact_b + grain: [id] + columns: [{name: id, type: number}, {name: hub_id, type: number}, {name: val, type: number}] + joins: [{to: hub, "on": "hub_id = hub.id", relationship: many_to_one}] +``` + +```bash +# This triggers aggregate locality (separate CTEs per fact table, FULL JOIN) +uv run python -m semantic_layer.cli --model /tmp/chasm.yaml \ + -q '{"measures":["sum(fact_a.val)","sum(fact_b.val)"],"dimensions":["hub.segment"]}' +``` + +### 4. Test derived measures + +```bash +uv run python -m semantic_layer.cli --model /tmp/model.yaml \ + -q '{"measures":[{"expr":"sum(orders.amount)","name":"total"},{"expr":"count(orders.id)","name":"cnt"},{"expr":"total / cnt","name":"avg_order"}],"dimensions":["customers.segment"]}' +``` + +### 5. Test dialects + +```bash +uv run python -m semantic_layer.cli --model /tmp/model.yaml \ + -q '{"measures":["sum(orders.amount)"],"dimensions":["customers.segment"]}' --dialect bigquery +``` + +### 6. Useful flags + +| Flag | Purpose | +|------|---------| +| `--model FILE` | Single YAML file with all sources (alternative to `--sources DIR`) | +| `--plan` | Show resolved plan + SQL | +| `--plan-only` | Show plan without SQL | +| `--suggest` | Validate query, show suggestions on failure | +| `--list-sources` | Print all sources, columns, measures, joins | +| `--dialect X` | postgres (default), bigquery, snowflake, duckdb, mysql | +| `--compact` | SQL without header comment | +| `-q JSON` | Pass query as JSON string | +| `--json` | Read JSON query from stdin | + +## Coding Guidelines + +### Expression handling — always use sqlglot AST, never regex on SQL + +- **Parse expressions** with `sqlglot.parse_one(f"SELECT {expr}")` and walk/transform the AST. Never use `str.replace()`, `re.sub()`, or string splitting on SQL fragments — these corrupt string literals, aliases, and nested expressions. +- **Quote reserved words first**: always call `quote_reserved_identifiers(expr)` before passing to `sqlglot.parse_one()`. Column/source names like `group`, `key`, `order` will fail to parse otherwise. +- **Use the parse cache** in `parser.py` (`ExpressionParser._parse_as_select()`) for read-only AST walks. Direct `sqlglot.parse_one()` calls are fine when you need to `.transform()` the tree. +- **Regex is fine for non-SQL tasks**: sanitizing alias names, masking string literals before parse, etc. The rule is: don't use regex to interpret SQL structure. + +### Error handling + +- Never use bare `except Exception: pass`. At minimum add `logger.debug(...)` so failures are observable. Prefer catching `sqlglot.errors.ParseError` specifically. +- Regex fallback paths in generator.py exist for edge cases where sqlglot can't parse user-provided SQL sources. These are acceptable as last-resort fallbacks with logging, not as primary code paths. + +### SQL generation strategy + +- **Write postgres, transpile on output.** All SQL is generated as postgres dialect. `_transpile()` converts to the target dialect at the very end. Never add dialect-specific SQL generation logic. +- **f-strings for SQL skeleton** (`SELECT/FROM/JOIN/GROUP BY`) are fine and readable. Use sqlglot AST only for expression-level transformations (substitution, function translation, filter rewriting). +- **Don't build SQL via sqlglot node construction** (`exp.Select().from_(...)`). It's harder to read and debug than f-strings for structural SQL. + +### Testing + +- Run `uv run pytest -q` after every change. All tests must pass. +- Test CLI queries with `--model /tmp/model.yaml` for quick iteration on edge cases (see examples above). +- When adding expression handling logic, test with reserved-word identifiers (`group.key`, `order.select`) and string literals containing dots (`status = 'group.value'`). + +## Project Structure + +``` +semantic_layer/ + models.py # Pydantic data models (sources, queries, plans, results) + loader.py # YAML source file loader + graph.py # Bidirectional join graph with Dijkstra + Steiner tree + parser.py # Expression parser (source refs, aggregate detection) + planner.py # 12-step query planning pipeline + generator.py # SQL generation (simple path + aggregate locality) + engine.py # Orchestrator tying loader/graph/planner/generator + cli.py # CLI entry point +sources/ + ecommerce/ # Test fixtures (6 YAML source definitions) +tests/ # 353 tests +``` diff --git a/python/klo-sl/CLAUDE.md b/python/klo-sl/CLAUDE.md new file mode 120000 index 00000000..47dc3e3d --- /dev/null +++ b/python/klo-sl/CLAUDE.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/python/klo-sl/README.md b/python/klo-sl/README.md new file mode 100644 index 00000000..e69de29b diff --git a/python/klo-sl/demos/complex_cte_join.yaml b/python/klo-sl/demos/complex_cte_join.yaml new file mode 100644 index 00000000..64be4cfc --- /dev/null +++ b/python/klo-sl/demos/complex_cte_join.yaml @@ -0,0 +1,222 @@ +# Complex CTE Runtime Join Demo +# +# Demonstrates: +# 1. Two SQL sources with internal CTEs (customer_lifetime_value, churn_risk) +# 2. Both join to `customers` at the source level (many_to_one) +# 3. `customers` joins to `regions` (many_to_one) +# 4. A query requesting measures from BOTH SQL sources + dimensions from `regions` +# triggers chasm trap detection and aggregate locality +# +# Join graph: +# customer_lifetime_value --m2o--> customers --m2o--> regions +# churn_risk --m2o--> customers --m2o--> regions + +# --- Table sources --- + +- name: regions + table: public.regions + grain: [id] + columns: + - name: id + type: number + - name: name + type: string + - name: continent + type: string + +- name: customers + table: public.customers + grain: [id] + columns: + - name: id + type: number + - name: name + type: string + - name: segment + type: string + - name: region_id + type: number + - name: signed_at + type: time + role: time + - name: arr + type: number + joins: + - to: regions + "on": region_id = regions.id + relationship: many_to_one + +- name: orders + table: public.orders + grain: [id] + columns: + - name: id + type: number + - name: customer_id + type: number + - name: amount + type: number + - name: created_at + type: time + role: time + joins: + - to: customers + "on": customer_id = customers.id + relationship: many_to_one + +- name: order_items + table: public.order_items + grain: [id] + columns: + - name: id + type: number + - name: order_id + type: number + - name: quantity + type: number + - name: unit_price + type: number + joins: + - to: orders + "on": order_id = orders.id + relationship: many_to_one + +# --- SQL source: Customer Lifetime Value (uses internal CTEs) --- + +- name: customer_lifetime_value + description: | + Customer lifetime value estimate using monthly revenue cohort analysis. + Internal CTEs aggregate orders+order_items by month, then compute + active_months and avg_mrr per customer before estimating LTV. + sql: | + WITH monthly_revenue AS ( + SELECT + o.customer_id, + DATE_TRUNC('month', o.created_at) AS month, + SUM(oi.quantity * oi.unit_price) AS mrr + FROM orders o + JOIN order_items oi ON o.id = oi.order_id + GROUP BY o.customer_id, DATE_TRUNC('month', o.created_at) + ), + cohort_stats AS ( + SELECT + customer_id, + MIN(month) AS first_month, + COUNT(DISTINCT month) AS active_months, + AVG(mrr) AS avg_mrr + FROM monthly_revenue + GROUP BY customer_id + ) + SELECT + cs.customer_id, + cs.first_month, + cs.active_months, + cs.avg_mrr, + cs.avg_mrr * cs.active_months * 1.2 AS ltv_estimate + FROM cohort_stats cs + grain: [customer_id] + columns: + - name: customer_id + type: number + - name: first_month + type: time + - name: active_months + type: number + - name: avg_mrr + type: number + - name: ltv_estimate + type: number + joins: + - to: customers + "on": customer_id = customers.id + relationship: many_to_one + measures: + - name: avg_ltv + expr: avg(ltv_estimate) + description: "Average customer lifetime value" + - name: total_ltv + expr: sum(ltv_estimate) + description: "Total lifetime value across customers" + - name: avg_active_months + expr: avg(active_months) + description: "Average number of active months per customer" + +# --- SQL source: Churn Risk (uses internal CTEs) --- + +- name: churn_risk + description: | + Customer churn risk score combining recency, frequency, and support burden. + Internal CTEs compute rfm_scores from orders and ticket_counts from a + support table before producing a weighted composite score. + sql: | + WITH rfm_scores AS ( + SELECT + customer_id, + EXTRACT(DAY FROM NOW() - MAX(created_at)) AS days_since_last_order, + COUNT(*) AS order_frequency, + AVG(amount) AS avg_order_value + FROM orders + GROUP BY customer_id + ), + ticket_counts AS ( + SELECT + customer_id, + COUNT(*) AS open_tickets, + AVG(EXTRACT(DAY FROM resolved_at - created_at)) AS avg_resolution_days + FROM support_tickets + WHERE status = 'open' + GROUP BY customer_id + ) + SELECT + r.customer_id, + r.days_since_last_order, + r.order_frequency, + COALESCE(t.open_tickets, 0) AS open_tickets, + CASE + WHEN r.days_since_last_order > 180 THEN 0.9 + WHEN r.days_since_last_order > 90 THEN 0.6 + ELSE 0.2 + END * 0.4 + + CASE + WHEN r.order_frequency < 2 THEN 0.8 + WHEN r.order_frequency < 5 THEN 0.4 + ELSE 0.1 + END * 0.3 + + CASE + WHEN COALESCE(t.open_tickets, 0) > 3 THEN 0.9 + WHEN COALESCE(t.open_tickets, 0) > 1 THEN 0.5 + ELSE 0.1 + END * 0.3 AS score, + CASE + WHEN r.avg_order_value < 100 THEN 'SMB' + WHEN r.avg_order_value < 1000 THEN 'Mid-Market' + ELSE 'Enterprise' + END AS customer_type + FROM rfm_scores r + LEFT JOIN ticket_counts t ON r.customer_id = t.customer_id + grain: [customer_id] + columns: + - name: customer_id + type: number + - name: days_since_last_order + type: number + - name: order_frequency + type: number + - name: open_tickets + type: number + - name: score + type: number + - name: customer_type + type: string + joins: + - to: customers + "on": customer_id = customers.id + relationship: many_to_one + measures: + - name: avg_risk + expr: avg(score) + description: "Average churn risk score" + - name: high_risk_count + expr: count(customer_id) + filter: "score > 0.7" + description: "Number of high-risk customers" diff --git a/python/klo-sl/demos/run_complex_cte_join.sh b/python/klo-sl/demos/run_complex_cte_join.sh new file mode 100755 index 00000000..752e921b --- /dev/null +++ b/python/klo-sl/demos/run_complex_cte_join.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# Complex CTE Runtime Join Demo +# +# Shows how two SQL sources with internal CTEs (customer_lifetime_value, churn_risk) +# are joined at runtime through the join graph to a dimension table (regions), +# triggering chasm trap detection and aggregate locality. + +set -euo pipefail +cd "$(dirname "$0")/.." + +MODEL="demos/complex_cte_join.yaml" + +echo "============================================" +echo " Demo 1: Chasm Trap — Two CTE metrics + regions dimension" +echo "============================================" +echo "" +echo "Query: Average LTV and average churn risk by region," +echo " for customers with churn score > 0.7" +echo "" + +echo '{ + "measures": ["customer_lifetime_value.avg_ltv", "churn_risk.avg_risk"], + "dimensions": ["regions.name"], + "filters": ["churn_risk.score > 0.7"] +}' | uv run python -m semantic_layer.cli --model "$MODEL" --json --plan + +echo "" +echo "============================================" +echo " Demo 2: Single CTE metric enriched with regions" +echo "============================================" +echo "" +echo "Query: LTV breakdown by region and customer segment," +echo " only customers with 6+ active months" +echo "" + +echo '{ + "measures": [ + "customer_lifetime_value.avg_ltv", + "customer_lifetime_value.avg_active_months", + {"expr": "count(customer_lifetime_value.customer_id)", "name": "customer_count"} + ], + "dimensions": ["regions.name", "customers.segment"], + "filters": ["customer_lifetime_value.active_months >= 6"] +}' | uv run python -m semantic_layer.cli --model "$MODEL" --json --plan + +echo "" +echo "============================================" +echo " Demo 3: Runtime aggregation on CTE columns + cross-source join" +echo "============================================" +echo "" +echo "Query: P90 churn score and max LTV by region continent" +echo "" + +echo '{ + "measures": [ + {"expr": "percentile(churn_risk.score, 0.9)", "name": "p90_churn"}, + {"expr": "max(customer_lifetime_value.ltv_estimate)", "name": "max_ltv"} + ], + "dimensions": ["regions.continent"] +}' | uv run python -m semantic_layer.cli --model "$MODEL" --json --plan diff --git a/python/klo-sl/pyproject.toml b/python/klo-sl/pyproject.toml new file mode 100644 index 00000000..ccd46e25 --- /dev/null +++ b/python/klo-sl/pyproject.toml @@ -0,0 +1,59 @@ +[project] +name = "klo-sl" +version = "0.1.0" +description = "Agent-first semantic layer engine with aggregate locality" +readme = "README.md" +requires-python = ">=3.13" +license = "Apache-2.0" +dependencies = [ + "sqlglot>=26", + "pydantic>=2", + "pyyaml>=6", +] + +[project.urls] +Homepage = "https://github.com/kaelio/ktx" +Repository = "https://github.com/kaelio/ktx" +Issues = "https://github.com/kaelio/ktx/issues" + +[project.optional-dependencies] +dev = [ + "pytest>=8", + "pytest-cov", + "ruff", + "pre-commit", +] +tpch = [ + "duckdb>=1.0", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["tests"] +addopts = "--cov=semantic_layer --cov-report=term-missing --cov-report=html" + +[tool.coverage.run] +source = ["semantic_layer"] +branch = true + +[tool.coverage.report] +show_missing = true +skip_empty = true +exclude_lines = [ + "pragma: no cover", + "if __name__ == .__main__.", + "if TYPE_CHECKING:", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["semantic_layer"] + +[dependency-groups] +dev = [ + "pytest>=9.0.2", + "pytest-cov>=7.1.0", +] diff --git a/python/klo-sl/scripts/gen_b2b_saas_model.py b/python/klo-sl/scripts/gen_b2b_saas_model.py new file mode 100644 index 00000000..5a6721d4 --- /dev/null +++ b/python/klo-sl/scripts/gen_b2b_saas_model.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +"""Generate semantic layer YAML sources from demo DB metadata. + +Usage: + kubectl port-forward -n klo-demo deployment/klo-demo-db 5433:5432 & + KLO_DEMO_DB_PASSWORD=local-demo-password python scripts/gen_b2b_saas_model.py +""" + +import os +import psycopg2 +import yaml + +CONNECTION_ID = "256bc76b-cc47-4d5d-a9fc-5bcfb0364d44" +OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "..", "sources", "b2b_saas") + +DB_PARAMS = { + "host": os.environ.get("KLO_DEMO_DB_HOST", "127.0.0.1"), + "port": int(os.environ.get("KLO_DEMO_DB_PORT", "5433")), + "user": os.environ.get("KLO_DEMO_DB_USER", "klo-demo-user"), + "password": os.environ.get("KLO_DEMO_DB_PASSWORD", ""), + "dbname": os.environ.get("KLO_DEMO_DB_NAME", "klo-demo-db"), +} + +# Map DB types to semantic layer types +TYPE_MAP = { + "INTEGER": "number", + "FLOAT": "number", + "NUMERIC": "number", + "DECIMAL": "number", + "BIGINT": "number", + "SMALLINT": "number", + "DOUBLE": "number", + "REAL": "number", + "VARCHAR": "string", + "TEXT": "string", + "CHAR": "string", + "DATE": "time", + "TIMESTAMP": "time", + "TIMESTAMPTZ": "time", + "DATETIME": "time", + "TIME": "time", + "BOOLEAN": "boolean", + "BOOL": "boolean", +} + +# Columns whose names suggest a time role +TIME_PATTERNS = {"_at", "_date", "date", "timestamp", "created", "updated"} + + +def is_time_column(name: str, db_type: str) -> bool: + sl_type = TYPE_MAP.get(db_type.upper(), "string") + if sl_type == "time": + return True + # VARCHAR columns with date-like names (e.g. created_at stored as VARCHAR) + lower = name.lower() + return any(p in lower for p in TIME_PATTERNS) and sl_type == "string" + + +def map_type(db_type: str, col_name: str) -> str: + upper = db_type.upper() + if upper in TYPE_MAP: + base = TYPE_MAP[upper] + # Override string→time for date-like column names + if base == "string" and is_time_column(col_name, db_type): + return "time" + return base + return "string" + + +def main(): + conn = psycopg2.connect(**DB_PARAMS) + cur = conn.cursor() + + # 1. Fetch tables + cur.execute( + "SELECT id, name FROM source_tables WHERE connection_id = %s ORDER BY name", + (CONNECTION_ID,), + ) + tables = {row[0]: row[1] for row in cur.fetchall()} + table_ids = tuple(tables.keys()) + + # 2. Fetch columns + cur.execute( + """ + SELECT id, name, type, nullable, primary_key, table_id + FROM source_columns + WHERE table_id = ANY(%s::uuid[]) + ORDER BY table_id, primary_key DESC, name + """, + (list(table_ids),), + ) + columns_by_table: dict[str, list] = {} + col_id_to_info: dict[str, dict] = {} + for row in cur.fetchall(): + col_id, col_name, col_type, nullable, is_pk, table_id = row + info = { + "id": col_id, + "name": col_name, + "type": col_type, + "nullable": nullable, + "primary_key": is_pk, + "table_id": table_id, + } + col_id_to_info[col_id] = info + columns_by_table.setdefault(table_id, []).append(info) + + # 3. Fetch links (joins) + cur.execute( + """ + SELECT from_table_id, from_column_id, to_table_id, to_column_id, relationship_type + FROM column_links + WHERE from_table_id = ANY(%s::uuid[]) OR to_table_id = ANY(%s::uuid[]) + """, + (list(table_ids), list(table_ids)), + ) + # Group links by from_table + joins_by_table: dict[str, list] = {} + for row in cur.fetchall(): + from_table_id, from_col_id, to_table_id, to_col_id, rel_type = row + # Only include joins where both sides are in our connection + if from_table_id not in tables or to_table_id not in tables: + continue + joins_by_table.setdefault(from_table_id, []).append( + { + "from_col_id": from_col_id, + "to_table_id": to_table_id, + "to_col_id": to_col_id, + "relationship_type": rel_type, + } + ) + + conn.close() + + # 4. Generate YAML files + os.makedirs(OUTPUT_DIR, exist_ok=True) + + for table_id, table_name in sorted(tables.items(), key=lambda x: x[1]): + cols = columns_by_table.get(table_id, []) + joins = joins_by_table.get(table_id, []) + + # Find primary key columns + pk_cols = [c for c in cols if c["primary_key"]] + if pk_cols: + grain = [c["name"] for c in pk_cols] + else: + # Fallback: use row_id if present, else first column + row_id_col = next((c for c in cols if c["name"] == "row_id"), None) + if row_id_col: + grain = ["row_id"] + elif cols: + grain = [cols[0]["name"]] + else: + grain = [table_name + "_id"] + + # Build column definitions + yaml_columns = [] + for c in cols: + sl_type = map_type(c["type"], c["name"]) + col_def: dict = {"name": c["name"], "type": sl_type} + if is_time_column(c["name"], c["type"]): + col_def["role"] = "time" + yaml_columns.append(col_def) + + # Build join definitions + yaml_joins = [] + # Track target sources to handle aliases for multiple joins to same target + target_counts: dict[str, int] = {} + for j in joins: + to_name = tables.get(j["to_table_id"]) + if not to_name: + continue + target_counts[to_name] = target_counts.get(to_name, 0) + 1 + + target_seen: dict[str, int] = {} + for j in joins: + to_name = tables.get(j["to_table_id"]) + from_col = col_id_to_info.get(j["from_col_id"], {}).get("name") + to_col = col_id_to_info.get(j["to_col_id"], {}).get("name") + if not (to_name and from_col and to_col): + continue + + rel = j["relationship_type"].lower() + + join_def: dict = { + "to": to_name, + "on": f"{from_col} = {to_name}.{to_col}", + "relationship": rel, + } + + # Add alias if multiple joins to same target + target_seen[to_name] = target_seen.get(to_name, 0) + 1 + if target_counts.get(to_name, 0) > 1: + join_def["alias"] = f"{to_name}_{target_seen[to_name]}" + + yaml_joins.append(join_def) + + # Build source definition + source: dict = { + "name": table_name, + "table": table_name, + } + if grain: + source["grain"] = grain + source["columns"] = yaml_columns + if yaml_joins: + source["joins"] = yaml_joins + + # Write YAML + filepath = os.path.join(OUTPUT_DIR, f"{table_name}.yaml") + with open(filepath, "w") as f: + yaml.dump( + source, f, default_flow_style=False, sort_keys=False, allow_unicode=True + ) + + print(f"Generated {len(tables)} source files in {OUTPUT_DIR}") + + +if __name__ == "__main__": + main() diff --git a/python/klo-sl/scripts/slquery.py b/python/klo-sl/scripts/slquery.py new file mode 100644 index 00000000..70c92442 --- /dev/null +++ b/python/klo-sl/scripts/slquery.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +"""Run a semantic layer query against the b2b_saas SQLite database. + +Usage: + uv run python scripts/slquery.py '{"measures":["count(opportunities.opportunity_id)"],"dimensions":["accounts.segment"]}' + uv run python scripts/slquery.py '{"measures":["churn_risk.avg_risk_score"],"dimensions":["accounts.industry"]}' + echo '{"measures":["sum(contracts.arr)"],"dimensions":["accounts.segment"]}' | uv run python scripts/slquery.py --stdin +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import sqlite3 +import sys +from pathlib import Path + +from semantic_layer.engine import SemanticEngine + +SOURCES_DIR = Path(__file__).resolve().parent.parent / "sources" / "b2b_saas" +DB_PATH = Path( + os.environ.get("KLO_B2B_SQLITE_DB", "sample-data-generator/b2b_data.db") +).expanduser() + +# sqlglot's sqlite dialect handles most transpilation, but has a few gaps. +# These fixups patch what sqlglot misses. +_SQLITE_FIXUPS = [ + # GROUP_CONCAT(DISTINCT x, sep) → GROUP_CONCAT(DISTINCT x) — sqlite + # only allows 1 arg with DISTINCT + (r"GROUP_CONCAT\(DISTINCT (\w+),\s*'[^']*'\)", r"GROUP_CONCAT(DISTINCT \1)"), + # CURRENT_DATE - col → integer days via julianday + ( + r"CURRENT_DATE - DATE\((\w+)\)", + r"CAST(julianday('now') - julianday(\1) AS INTEGER)", + ), + (r"CURRENT_DATE - (\w+)", r"CAST(julianday('now') - julianday(\1) AS INTEGER)"), + # col - CURRENT_DATE → integer days via julianday + (r"(\w+) - CURRENT_DATE", r"CAST(julianday(\1) - julianday('now') AS INTEGER)"), + # CURRENT_DATE > col → julianday comparison + (r"CURRENT_DATE > (\w+)", r"julianday('now') > julianday(\1)"), + # NULLS LAST — not supported in sqlite + (r"\s+NULLS LAST", ""), +] + + +def fixup_sqlite(sql: str) -> str: + for pattern, repl in _SQLITE_FIXUPS: + sql = re.sub(pattern, repl, sql) + return sql + + +def main() -> None: + p = argparse.ArgumentParser(description="Run SL query against b2b_saas SQLite DB") + p.add_argument("query", nargs="?", help="JSON query string") + p.add_argument("--stdin", action="store_true", help="Read JSON from stdin") + p.add_argument( + "--sql-only", action="store_true", help="Print SQL without executing" + ) + p.add_argument("--db", default=str(DB_PATH), help="Path to SQLite database") + p.add_argument( + "--sources", default=str(SOURCES_DIR), help="Path to sources directory" + ) + args = p.parse_args() + + if args.stdin: + query_dict = json.loads(sys.stdin.read()) + elif args.query: + query_dict = json.loads(args.query) + else: + p.error("Provide a JSON query string or use --stdin") + + # Use sqlite dialect — sqlglot handles STRING_AGG→GROUP_CONCAT, + # DECIMAL→REAL, ::DATE→DATE(), etc. + engine = SemanticEngine(args.sources, dialect="sqlite") + result = engine.query(query_dict) + sql = fixup_sqlite(result.sql) + + if args.sql_only: + print(sql) + return + + conn = sqlite3.connect(args.db) + conn.row_factory = sqlite3.Row + try: + rows = conn.execute(sql).fetchall() + except sqlite3.OperationalError as e: + print(f"SQL error: {e}", file=sys.stderr) + print(f"\nGenerated SQL:\n{sql}", file=sys.stderr) + sys.exit(1) + finally: + conn.close() + + if not rows: + print("(no rows)") + return + + cols = rows[0].keys() + widths = [max(len(str(c)), max(len(str(r[c])) for r in rows)) for c in cols] + header = " ".join(str(c).ljust(w) for c, w in zip(cols, widths)) + sep = " ".join("-" * w for w in widths) + print(header) + print(sep) + for r in rows: + print(" ".join(str(r[c]).ljust(w) for c, w in zip(cols, widths))) + + +if __name__ == "__main__": + main() diff --git a/python/klo-sl/scripts/tpch_runner.py b/python/klo-sl/scripts/tpch_runner.py new file mode 100644 index 00000000..1ef01fe5 --- /dev/null +++ b/python/klo-sl/scripts/tpch_runner.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""Run TPC-H queries end-to-end: generate data + semantic layer SQL + execute. + +Usage: + uv run python scripts/tpch_runner.py +""" + +from __future__ import annotations + +import json + +import duckdb +import sqlglot + +from semantic_layer.engine import SemanticEngine + +TPCH_TABLES = [ + "region", + "nation", + "supplier", + "customer", + "part", + "partsupp", + "orders", + "lineitem", +] + + +def setup_tpch(sf: float = 0.01) -> duckdb.DuckDBPyConnection: + """Create in-memory DuckDB with TPC-H data at the given scale factor.""" + conn = duckdb.connect() + conn.execute("INSTALL tpch; LOAD tpch") + conn.execute(f"CALL dbgen(sf={sf})") + # YAML files use public.

— create views to match + conn.execute("CREATE SCHEMA IF NOT EXISTS public") + for t in TPCH_TABLES: + conn.execute(f"CREATE VIEW public.{t} AS SELECT * FROM main.{t}") + return conn + + +def run_query( + conn: duckdb.DuckDBPyConnection, + engine: SemanticEngine, + title: str, + query_dict: dict, +) -> None: + """Generate SQL via semantic layer, execute it, and print results.""" + print(f"\n{'=' * 60}") + print(f" {title}") + print(f"{'=' * 60}") + + print("\n>> Request:") + print(json.dumps(query_dict, indent=2)) + + result = engine.query(query_dict) + formatted_sql = sqlglot.transpile( + result.sql, read=result.dialect, write=result.dialect, pretty=True + )[0] + print(f"\n-- dialect: {result.dialect}") + print(formatted_sql) + + cursor = conn.execute(result.sql) + col_names = [desc[0] for desc in cursor.description] + rows = cursor.fetchall() + + # Simple table formatting + widths = [ + max(len(str(c)), *(len(str(r[i])) for r in rows)) + for i, c in enumerate(col_names) + ] + header = " ".join(str(c).ljust(w) for c, w in zip(col_names, widths)) + print(f"\n{header}") + print(" ".join("-" * w for w in widths)) + for row in rows: + print(" ".join(str(v).ljust(w) for v, w in zip(row, widths))) + print(f"\n({len(rows)} rows)") + + +def main() -> None: + conn = setup_tpch() + engine = SemanticEngine("sources/tpch", dialect="duckdb") + + # Q1: Pricing summary by return flag / line status + run_query( + conn, + engine, + "Q1: Pricing Summary", + { + "measures": [ + "lineitem.revenue", + "lineitem.total_quantity", + "lineitem.avg_discount", + "lineitem.line_count", + ], + "dimensions": ["lineitem.l_returnflag", "lineitem.l_linestatus"], + }, + ) + + # Q5-style: Revenue by nation (4-hop join) with ASIA filter + run_query( + conn, + engine, + "Q5: Revenue by Nation (ASIA)", + { + "measures": ["lineitem.revenue"], + "dimensions": ["nation.n_name"], + "filters": ["region.r_name = 'ASIA'"], + }, + ) + + # Q3-style: Revenue by order month for BUILDING segment + run_query( + conn, + engine, + "Q3: Revenue by Month (BUILDING)", + { + "measures": ["lineitem.revenue"], + "dimensions": [{"field": "orders.o_orderdate", "granularity": "month"}], + "filters": ["customer.c_mktsegment = 'BUILDING'"], + "limit": 12, + }, + ) + + # Q10-style: Returned revenue by customer (filtered measure) + run_query( + conn, + engine, + "Q10: Returned Revenue by Customer", + { + "measures": ["lineitem.returned_revenue"], + "dimensions": ["customer.c_name"], + "order_by": [{"field": "lineitem.returned_revenue", "direction": "desc"}], + "limit": 10, + }, + ) + + # Multi-measure: revenue + charge + counts + run_query( + conn, + engine, + "Multi-measure: Revenue, Charge, Counts", + { + "measures": [ + "lineitem.revenue", + "lineitem.charge", + "orders.order_count", + ], + "dimensions": ["customer.c_mktsegment"], + }, + ) + + # Supply cost by nation (through partsupp bridge) + run_query( + conn, + engine, + "Supply Cost by Nation", + { + "measures": ["partsupp.total_supply_cost"], + "dimensions": ["nation.n_name"], + "limit": 10, + }, + ) + + +if __name__ == "__main__": + main() diff --git a/python/klo-sl/semantic_layer/__init__.py b/python/klo-sl/semantic_layer/__init__.py new file mode 100644 index 00000000..b70583b7 --- /dev/null +++ b/python/klo-sl/semantic_layer/__init__.py @@ -0,0 +1,4 @@ +from semantic_layer.engine import SemanticEngine +from semantic_layer.models import QueryResult, SemanticQuery + +__all__ = ["SemanticEngine", "SemanticQuery", "QueryResult"] diff --git a/python/klo-sl/semantic_layer/__main__.py b/python/klo-sl/semantic_layer/__main__.py new file mode 100644 index 00000000..22ec0dd6 --- /dev/null +++ b/python/klo-sl/semantic_layer/__main__.py @@ -0,0 +1,3 @@ +from semantic_layer.cli import main + +main() diff --git a/python/klo-sl/semantic_layer/cli.py b/python/klo-sl/semantic_layer/cli.py new file mode 100644 index 00000000..a2782f38 --- /dev/null +++ b/python/klo-sl/semantic_layer/cli.py @@ -0,0 +1,268 @@ +"""CLI for the semantic layer engine. + +Usage: + # Simple query + uv run python -m semantic_layer.cli \ + --sources sources/ecommerce \ + -q '{"measures": ["sum(orders.amount)"], "dimensions": ["orders.status"]}' + + # Pre-defined measure with filter + uv run python -m semantic_layer.cli \ + --sources sources/ecommerce \ + -q '{"measures": ["orders.revenue"], "dimensions": ["orders.status"]}' + + # Cross-source with time granularity + uv run python -m semantic_layer.cli \ + --sources sources/ecommerce \ + -q '{"measures": ["sum(orders.amount)"], "dimensions": ["regions.name", {"field": "orders.created_at", "granularity": "month"}], "filters": ["regions.name = '"'"'LATAM'"'"'"]}' + + # Multiple dialects + uv run python -m semantic_layer.cli \ + --sources sources/ecommerce \ + -q '{"measures": ["sum(orders.amount)"], "dimensions": ["orders.status"]}' \ + --dialect bigquery + + # Plan only (no SQL generation) + uv run python -m semantic_layer.cli \ + --sources sources/ecommerce \ + -q '{"measures": ["sum(orders.amount)"], "dimensions": ["orders.status"]}' \ + --plan-only + + # JSON input from stdin + echo '{"measures":["sum(orders.amount)"],"dimensions":["orders.status"]}' | \ + uv run python -m semantic_layer.cli --sources sources/ecommerce --json + + # Custom ORDER BY + uv run python -m semantic_layer.cli \ + --sources sources/ecommerce \ + -q '{"measures": ["sum(orders.amount)"], "dimensions": ["orders.status"], "order_by": [{"field": "sum(orders.amount)", "direction": "desc"}]}' + + # Validate query (suggest fixes on failure) + uv run python -m semantic_layer.cli \ + --sources sources/ecommerce \ + -q '{"measures": ["sum(orders.amount)"], "dimensions": ["orders.status"]}' \ + --suggest +""" + +from __future__ import annotations + +import argparse +import json +import sys + +import yaml + +from semantic_layer.engine import SemanticEngine +from semantic_layer.models import SourceDefinition + + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + prog="semantic-layer", + description="Query the semantic layer engine and generate SQL", + ) + p.add_argument( + "--sources", + "-s", + help="Path to the sources directory (e.g. sources/ecommerce)", + ) + p.add_argument( + "--model", + help="Path to a single YAML file containing all source definitions as a list", + ) + p.add_argument( + "--dialect", + "-d", + default="postgres", + help="SQL dialect (postgres, bigquery, snowflake, etc.)", + ) + + # Query input + p.add_argument( + "--query", + "-q", + help='Raw JSON query string (e.g. \'{"measures": ["orders.revenue"], "dimensions": ["orders.status"]}\')', + ) + + # Output modes + p.add_argument( + "--json", + action="store_true", + dest="json_input", + help="Read query as JSON from stdin", + ) + p.add_argument( + "--plan-only", + action="store_true", + help="Show the resolved plan instead of SQL", + ) + p.add_argument( + "--plan", + action="store_true", + help="Show the resolved plan alongside SQL", + ) + p.add_argument( + "--compact", + action="store_true", + help="Output SQL without formatting", + ) + + # Info commands + p.add_argument( + "--list-sources", + action="store_true", + help="List all available sources and exit", + ) + p.add_argument( + "--suggest", + action="store_true", + help="Validate the query and suggest fixes if it fails", + ) + + return p + + +def list_sources(engine: SemanticEngine) -> None: + for name, src in sorted(engine.sources.items()): + print(f"\n{'─' * 40}") + print(f" {name}") + src_type = "sql" if src.is_sql_source else "table" + print(f" type: {src_type}", end="") + if src.table: + print(f" table: {src.table}", end="") + print(f" grain: {src.grain}") + if src.description: + print(f" {src.description.strip()}") + + if src.columns: + print(" columns:") + for col in src.columns: + role_tag = f" [{col.role.value}]" if col.role.value != "default" else "" + print(f" {col.name}: {col.type}{role_tag}") + + if src.measures: + print(" measures:") + for m in src.measures: + filt = f" (filter: {m.filter})" if m.filter else "" + print(f" {m.name}: {m.expr}{filt}") + + if src.joins: + print(" joins:") + for j in src.joins: + print(f" → {j.to} ({j.relationship}) on {j.on}") + + +def print_plan(plan) -> None: + print("\n── Resolved Plan ──") + print(f" Sources: {', '.join(plan.sources_used)}") + print(f" Anchor: {plan.anchor_source}") + if plan.join_paths: + print(" Joins:") + for jp in plan.join_paths: + print(f" {jp}") + print(f" Fan-out: {plan.fan_out_description}") + if plan.aggregate_locality: + print(" Locality:") + for al in plan.aggregate_locality: + print(f" {al}") + if plan.where_filters: + print(f" WHERE: {' AND '.join(plan.where_filters)}") + if plan.having_filters: + print(f" HAVING: {' AND '.join(plan.having_filters)}") + print(" Columns:") + for col in plan.columns: + prov = col.provenance.value + gran = f" ({col.granularity})" if col.granularity else "" + print(f" {col.name} [{prov}]{gran}") + + +def _load_model_file(path: str) -> dict[str, SourceDefinition]: + """Load a YAML file containing a list of source definitions.""" + with open(path) as f: + data = yaml.safe_load(f) + if not isinstance(data, list): + raise ValueError("Model file must contain a YAML list of source definitions") + sources: dict[str, SourceDefinition] = {} + for item in data: + src = SourceDefinition(**item) + if src.name in sources: + raise ValueError(f"Duplicate source name: '{src.name}'") + sources[src.name] = src + return sources + + +def main(argv: list[str] | None = None) -> None: + parser = build_parser() + args = parser.parse_args(argv) + + if args.model: + sources = _load_model_file(args.model) + engine = SemanticEngine.from_sources(sources, dialect=args.dialect) + elif args.sources: + engine = SemanticEngine(args.sources, dialect=args.dialect) + else: + parser.error("Provide --sources or --model") + + # List sources mode + if args.list_sources: + list_sources(engine) + return + + # Build query + if args.query: + query_dict = json.loads(args.query) + elif args.json_input: + raw = sys.stdin.read() + query_dict = json.loads(raw) + else: + parser.error("Provide --query or --json") + return + + # Suggest mode + if args.suggest: + result = engine.suggest(query_dict) + if result["success"]: + print("Query is valid.") + print_plan(result["plan"]) + else: + print(f"Query failed: {result['error']}") + if result.get("graph_errors"): + for err in result["graph_errors"]: + print(f" Graph error: {err}") + for s in result.get("suggestions", []): + if isinstance(s, dict): + print(f" Suggestion: {s.get('description', '')}") + for src in s.get("required_sources", []): + print(f" - Define source: {src}") + for j in s.get("required_joins", []): + print( + f" - Add join: {j['source']}.{j['on']} ({j['relationship']})" + ) + for note in s.get("notes", []): + print(f" Note: {note}") + else: + print(f" Suggestion: {s}") + return + + # Plan-only mode + if args.plan_only: + plan = engine.plan_only(query_dict) + print_plan(plan) + return + + # Full query + result = engine.query(query_dict) + + if args.plan: + print_plan(result.resolved_plan) + print() + + if args.compact: + print(result.sql) + else: + print(f"-- dialect: {result.dialect}") + print(result.sql) + + +if __name__ == "__main__": + main() diff --git a/python/klo-sl/semantic_layer/duplicate_check.py b/python/klo-sl/semantic_layer/duplicate_check.py new file mode 100644 index 00000000..05f91cb4 --- /dev/null +++ b/python/klo-sl/semantic_layer/duplicate_check.py @@ -0,0 +1,99 @@ +"""Detect semantically-redundant measure definitions on the same source.""" + +from __future__ import annotations + +import sqlglot +from sqlglot import exp + +from semantic_layer.models import SourceDefinition +from semantic_layer.parser import quote_reserved_identifiers + +# DIALECT CONVENTION: +# Measure `expr` values are compared structurally. They must be parsed with +# the connection's native dialect (per sl_capture); parsing as postgres +# would drop dialect-specific tokens and miss duplicates across BigQuery +# `SAFE_DIVIDE` / Snowflake `DIV0` etc. + + +def validate_measure_duplicates( + sources: dict[str, SourceDefinition], + *, + dialect: str = "postgres", +) -> list[str]: + """ + Flag pairs of measures on the same source whose `expr` is structurally + equivalent. Intended to prevent capture-time churn like: + + - name: active_subscription_count + expr: count(*) + filter: is_active = true + - name: new_subscription_count + expr: count(*) # same base aggregation — should be query-time filter + + Returns a list of human-readable error strings (empty list = no duplicates). + Compares every pair of measures within a single source; does not compare + across sources (measures on different sources are never redundant). + """ + errors: list[str] = [] + for source_name, source in sources.items(): + if len(source.measures) < 2: + continue + + parsed: list[tuple[str, exp.Expression | None, str | None, frozenset[str]]] = [] + for m in source.measures: + try: + quoted = quote_reserved_identifiers(m.expr) + tree = sqlglot.parse_one(f"SELECT {quoted}", read=dialect) + expr_node = tree.expressions[0] if tree.expressions else None + except Exception: + # Unparseable expressions are left for the caller's normal + # validation to surface; don't block on parse failure here. + expr_node = None + parsed.append((m.name, expr_node, m.filter, frozenset(m.segments))) + + for i, (name_a, expr_a, filter_a, segments_a) in enumerate(parsed): + if expr_a is None: + continue + for name_b, expr_b, filter_b, segments_b in parsed[i + 1 :]: + if expr_b is None: + continue + if not _expressions_equivalent(expr_a, expr_b): + continue + + # Segments are named, reusable filter predicates; two measures + # sharing an expr but applying different segments are by design + # distinct and must not be flagged. + if segments_a != segments_b: + continue + + fa = (filter_a or "").strip() + fb = (filter_b or "").strip() + if fa == fb: + errors.append( + f"{source_name}: measures '{name_a}' and '{name_b}' have the same " + f"expression and filter — remove one or differentiate them." + ) + else: + errors.append( + f"{source_name}: measure '{name_b}' has the same expression as " + f"'{name_a}' — differs only by `filter`. Use query-time filtering " + f"on '{name_a}' (via semantic_query filters), or, if the filter " + f"encodes a named business segment, add a segments[] entry on this " + f"source and reference it instead." + ) + return errors + + +def _expressions_equivalent(a: exp.Expression, b: exp.Expression) -> bool: + """ + Structural equality on sqlglot ASTs. + + Normalizes via sqlglot's .sql() canonical form (handles whitespace, case, + aliasing). Does NOT reorder operands — `safe_divide(a, b)` is NOT equal to + `safe_divide(b, a)`, nor is `a - b` equal to `b - a`. This is deliberate: + the check's purpose is catching accidental redundancy, not proving + mathematical equivalence. + """ + if type(a) is not type(b): + return False + return a.sql(dialect="postgres") == b.sql(dialect="postgres") diff --git a/python/klo-sl/semantic_layer/engine.py b/python/klo-sl/semantic_layer/engine.py new file mode 100644 index 00000000..79547c20 --- /dev/null +++ b/python/klo-sl/semantic_layer/engine.py @@ -0,0 +1,360 @@ +from __future__ import annotations + +from semantic_layer.generator import SqlGenerator +from semantic_layer.graph import JoinGraph +from semantic_layer.loader import SourceLoader +from semantic_layer.models import ( + QueryResult, + ResolvedPlan, + SemanticQuery, + SourceDefinition, + ValidationReport, +) +from semantic_layer.planner import QueryPlanner +from semantic_layer.sql_table_extractor import ( + extract_table_refs, + ref_matches_source_table, +) + + +class SemanticEngine: + def __init__(self, sources_dir: str, dialect: str = "postgres"): + self.loader = SourceLoader(sources_dir) + self.sources = self.loader.load_all() + self._init_engine(dialect) + + @classmethod + def from_sources( + cls, sources: dict[str, SourceDefinition], dialect: str = "postgres" + ) -> SemanticEngine: + """Create engine from pre-loaded source definitions.""" + obj = object.__new__(cls) + obj.loader = None + obj.sources = sources + obj._init_engine(dialect) + return obj + + def _init_engine(self, dialect: str) -> None: + # Validate the dialect up-front with the user-facing "Unknown SQL + # dialect" error, before JoinGraph.build() hits sqlglot's parser. + SqlGenerator(dialect) + self.graph = JoinGraph(self.sources, dialect=dialect) + self.graph.build() + self.planner = QueryPlanner(self.sources, self.graph, dialect=dialect) + self.generator = SqlGenerator(dialect, alias_map=self.graph.alias_map) + + def query(self, query: dict | SemanticQuery) -> QueryResult: + if isinstance(query, dict): + query = SemanticQuery(**query) + orphan_errors = self._collect_orphan_join_target_errors() + if orphan_errors: + raise ValueError("Cannot query semantic layer: " + "; ".join(orphan_errors)) + plan = self.planner.plan(query) + sql = self.generator.generate(plan, self.sources) + return QueryResult( + resolved_plan=plan, + sql=sql, + dialect=self.generator.dialect, + columns=plan.columns, + ) + + def validate(self, recently_touched: set[str] | None = None) -> ValidationReport: + report = ValidationReport() + self._check_orphan_join_targets(report) + self._check_invalid_grain(report) + self._check_sql_join_coverage(report, recently_touched=recently_touched) + self._check_disconnected_components(report, recently_touched=recently_touched) + return report + + def _collect_orphan_join_target_errors(self) -> list[str]: + known = set(self.sources.keys()) + errors: list[str] = [] + for source in self.sources.values(): + for join in source.joins: + if join.to not in known: + errors.append( + f"Source '{source.name}' joins to '{join.to}', " + f"but '{join.to}' is not defined" + ) + return errors + + def _check_orphan_join_targets(self, report: ValidationReport) -> None: + report.errors.extend(self._collect_orphan_join_target_errors()) + + def _check_invalid_grain(self, report: ValidationReport) -> None: + for source in self.sources.values(): + column_names = {c.name for c in source.columns} + for grain_col in source.grain: + if grain_col not in column_names: + report.errors.append( + f"Source '{source.name}' has grain column '{grain_col}' " + f"that is not in its columns list" + ) + + def _check_sql_join_coverage( + self, + report: ValidationReport, + recently_touched: set[str] | None = None, + ) -> None: + """Block writes whose SQL references a known source's base table + without declaring a join to that source. + + Scoped to `recently_touched` so existing fragmentation isn't flagged + on every write. Only sources with `sql:` are checked. CTE + self-references are filtered by the extractor. + """ + if not recently_touched: + return + + table_index: list[tuple[SourceDefinition, str]] = [ + (src, src.table) for src in self.sources.values() if src.table is not None + ] + if not table_index: + return + + dialect = getattr(self.generator, "dialect", "postgres") + + for source_name in sorted(recently_touched): + source = self.sources.get(source_name) + if source is None or not source.is_sql_source or not source.sql: + continue + + declared = {j.to.lower() for j in source.joins} + refs = extract_table_refs(source.sql, dialect=dialect) + + missing: list[str] = [] + for ref in refs: + hit_name: str | None = None + for candidate, table_value in table_index: + if candidate.name == source.name: + continue + if ref_matches_source_table(ref, table_value): + hit_name = candidate.name + break + if hit_name is None: + continue + if hit_name.lower() in declared: + continue + if hit_name not in missing: + missing.append(hit_name) + + if not missing: + continue + + ref_list = ", ".join(missing) + example = missing[0] + grain_col = ( + self.sources[example].grain[0] if self.sources[example].grain else "id" + ) + msg = ( + f"Source '{source.name}' SQL joins manifest table(s) [{ref_list}] " + f"that are not declared in joins[]. Add a join entry for each, " + f"e.g. {{to: {example}, on: '{source.name}. = " + f"{example}.{grain_col}', relationship: many_to_one}}. If a " + f"reference is intentionally absent, document it with a " + f"`unmapped-table-*` wiki note and remove the SQL reference." + ) + report.errors.append(msg) + + def _check_disconnected_components( + self, + report: ValidationReport, + recently_touched: set[str] | None = None, + ) -> None: + components = self.graph.find_components() + if len(components) <= 1: + return + + sorted_components = sorted( + components, key=lambda c: (-len(c), sorted(c)[0] if c else "") + ) + lines = [ + f"Model has {len(components)} disconnected components. " + f"Queries that span components will fail with 'No join path' errors:" + ] + for i, component in enumerate(sorted_components, start=1): + names = sorted(component) + if len(names) > 3: + sample = ", ".join(names[:2]) + lines.append( + f" - Component {i} ({len(names)} sources): {sample}, ... (+{len(names) - 2} more)" + ) + else: + lines.append( + f" - Component {i} ({len(names)} sources): {', '.join(names)}" + ) + report.warnings.append("\n".join(lines)) + + if recently_touched: + singleton_components = {next(iter(c)) for c in components if len(c) == 1} + for source_name in sorted(recently_touched & singleton_components): + report.per_source_warnings.setdefault(source_name, []).append( + f"Source '{source_name}' is now a singleton component (no joins to any " + f"other source). Queries that combine '{source_name}' with anything else " + f"will fail with 'No join path' errors. Run sl_discover for each table " + f"named in this source's SQL and add joins via sl_edit_source." + ) + + def plan_only(self, query: dict | SemanticQuery) -> ResolvedPlan: + if isinstance(query, dict): + query = SemanticQuery(**query) + return self.planner.plan(query) + + def suggest(self, query: dict | SemanticQuery) -> dict: + """Try to plan. If it fails, suggest config extensions with structured info.""" + if isinstance(query, dict): + query = SemanticQuery(**query) + try: + plan = self.planner.plan(query) + # Also validate that SQL generation succeeds + try: + self.generator.generate(plan, self.sources) + except Exception as gen_err: + return { + "success": False, + "error": f"SQL generation failed: {gen_err}", + "plan": plan, + "referenced_sources": sorted(set(plan.sources_used)), + "missing_sources": [], + "graph_errors": [], + "suggestions": [ + { + "description": f"SQL generation error: {gen_err}", + "required_sources": [], + "required_joins": [], + "notes": [ + "The query plan was valid but the SQL generator encountered an error.", + "This may indicate a limitation in the aggregate locality system.", + ], + } + ], + } + return { + "success": True, + "plan": plan, + "suggestions": [], + } + except Exception as e: + from semantic_layer.parser import ExpressionParser + + parser = ExpressionParser() + + # Collect all source references from the query + referenced_sources: set[str] = set() + all_exprs: list[str] = [] + for m in query.measures: + if isinstance(m, str): + all_exprs.append(m) + elif isinstance(m, dict): + all_exprs.append(m.get("expr", "")) + for d in query.dimensions: + if isinstance(d, str): + all_exprs.append(d) + elif isinstance(d, dict): + all_exprs.append(d.get("field", "")) + all_exprs.extend(query.filters) + for expr in all_exprs: + referenced_sources.update(parser.extract_source_refs(expr)) + + # Identify missing sources + known_sources = set(self.sources.keys()) + missing_sources = sorted(referenced_sources - known_sources) + + graph_errors = _format_component_errors(self.graph.find_components()) + suggestions = [] + + if missing_sources: + # Suggest source definitions for missing sources + required_joins = [] + for ms in missing_sources: + # Infer potential join targets from column naming (e.g. orders → orders.id) + for known_name, known_src in self.sources.items(): + candidate_fk = f"{known_name}_id" + # Check if the missing source might join to this known source + if any(c.name == candidate_fk for c in known_src.columns): + required_joins.append( + { + "source": known_name, + "to": ms, + "on": f"{candidate_fk} = {ms}.id", + "relationship": "many_to_one", + } + ) + suggestions.append( + { + "description": f"Define missing source(s): {', '.join(missing_sources)}", + "required_sources": missing_sources, + "required_joins": required_joins, + "notes": [ + f"Create YAML definition(s) for: {', '.join(missing_sources)}", + "Each source needs at minimum: name, table (or sql), grain, and columns", + ], + } + ) + + if not missing_sources and len(referenced_sources) > 1: + # Identify which specific pairs are disconnected + present_sources = sorted(referenced_sources & known_sources) + disconnected_pairs = [] + for i, src_a in enumerate(present_sources): + for src_b in present_sources[i + 1 :]: + path = self.graph.find_path(src_a, src_b) + if path is None: + disconnected_pairs.append((src_a, src_b)) + + required_joins = [] + for src_a, src_b in disconnected_pairs: + required_joins.append( + { + "source": src_a, + "to": src_b, + "on": f"{src_b}_id = {src_b}.id", + "relationship": "many_to_one", + } + ) + + suggestions.append( + { + "description": f"Add join path(s) connecting: {', '.join(present_sources)}", + "required_sources": [], + "required_joins": required_joins, + "notes": [ + f"Disconnected pairs: {[f'{a} ↔ {b}' for a, b in disconnected_pairs]}" + if disconnected_pairs + else "Sources are connected but query failed for another reason", + ] + if disconnected_pairs + else [ + "All sources are connected; check the error message for details", + ], + } + ) + + return { + "success": False, + "error": str(e), + "referenced_sources": sorted(referenced_sources), + "missing_sources": missing_sources, + "graph_errors": graph_errors, + "suggestions": suggestions, + } + + +def _format_component_errors(components: list[set[str]]) -> list[str]: + """Render multi-component topology as graph_error strings for `suggest()` / CLI.""" + if len(components) <= 1: + return [] + sorted_components = sorted( + components, key=lambda c: (-len(c), sorted(c)[0] if c else "") + ) + lines = [] + for i, component in enumerate(sorted_components, start=1): + names = sorted(component) + if len(names) > 3: + sample = ", ".join(names[:2]) + lines.append( + f"Component {i} ({len(names)} sources): {sample}, ... (+{len(names) - 2} more)" + ) + else: + lines.append(f"Component {i} ({len(names)} sources): {', '.join(names)}") + return [f"Disconnected components: {len(components)}"] + lines diff --git a/python/klo-sl/semantic_layer/generator.py b/python/klo-sl/semantic_layer/generator.py new file mode 100644 index 00000000..4e1ec891 --- /dev/null +++ b/python/klo-sl/semantic_layer/generator.py @@ -0,0 +1,1419 @@ +from __future__ import annotations + +import logging +from collections import Counter + +import sqlglot +from sqlglot import exp + +from semantic_layer.graph import RELATIONSHIP_INVERSE +from semantic_layer.models import ( + MeasureGroup, + QueryDimension, + ResolvedJoin, + ResolvedMeasure, + ResolvedPlan, + SourceDefinition, +) +from semantic_layer.parser import ExpressionParser, quote_reserved_identifiers + +# DIALECT CONVENTION: +# User-authored SQL fragments (measure `expr`, segment `expr`, filter, +# computed-column `expr`, `sql:` source bodies, join `on:` clauses) must +# be parsed with `read=self.dialect`. The `sl_capture` skill instructs +# authors to write in the connection's native dialect; parsing as postgres +# silently drops dialect-specific tokens (e.g. BigQuery `INTERVAL 30 DAY`). +# Source CTE bodies stay verbatim; the outer scaffold is written in +# postgres-compatible form with dialect-specific helpers where needed +# (see `_time_trunc`), and `_transpile()` round-trips it through +# `self.dialect` so embedded user exprs survive intact. + +logger = logging.getLogger(__name__) + + +def _qi(name: str) -> str: + """Quote an identifier if it is a SQL reserved word.""" + from semantic_layer.parser import _SQL_RESERVED + + if name.lower() in _SQL_RESERVED: + return f'"{name}"' + return name + + +def _build_on_clause( + from_source: str, from_column: str, to_source: str, to_column: str +) -> str: + """Build ON clause supporting composite keys (comma-separated columns).""" + from_cols = [c.strip() for c in from_column.split(",")] + to_cols = [c.strip() for c in to_column.split(",")] + conditions = [ + f"{_qi(from_source)}.{_qi(fc)} = {_qi(to_source)}.{_qi(tc)}" + for fc, tc in zip(from_cols, to_cols) + ] + return " AND ".join(conditions) + + +class SqlGenerator: + def __init__( + self, dialect: str = "postgres", alias_map: dict[str, str] | None = None + ): + if dialect != "postgres": + from sqlglot import Dialect + + try: + Dialect.get_or_raise(dialect) + except ValueError: + raise ValueError( + f"Unknown SQL dialect '{dialect}'. Use a dialect supported by sqlglot " + f"(e.g., postgres, bigquery, snowflake, mysql, duckdb)." + ) + self.dialect = dialect + self._parser = ExpressionParser(dialect=dialect) + self._alias_map: dict[str, str] = alias_map or {} + + def generate(self, plan: ResolvedPlan, sources: dict[str, SourceDefinition]) -> str: + native_source_ctes = self._build_source_ctes(plan, sources) + if plan.has_fan_out and plan.measure_groups: + outer_sql = self._generate_with_locality(plan, sources) + else: + outer_sql = self._generate_simple(plan, sources) + + outer_transpiled = self._transpile(outer_sql) + + if not native_source_ctes: + return outer_transpiled + + source_header = ",\n".join(native_source_ctes) + stripped = outer_transpiled.lstrip() + if stripped[:5].upper() == "WITH ": + # Outer scaffold already has a WITH clause (e.g. locality CTEs). + # Merge the native source CTEs into the same WITH clause. + rest = stripped[5:].lstrip() + return "WITH " + source_header + ",\n" + rest + return "WITH " + source_header + "\n" + outer_transpiled + + # ── Path A: Simple (no fan-out) ──────────────────────────────────── + + def _generate_simple( + self, plan: ResolvedPlan, sources: dict[str, SourceDefinition] + ) -> str: + parts: list[str] = [] + + # SELECT — use DISTINCT when no measures (dimension-only query) + has_measures = any(not m.is_derived for m in plan.measures) + select_cols = self._build_select_columns(plan, sources) + if not has_measures and plan.dimensions: + parts.append("SELECT DISTINCT\n " + ",\n ".join(select_cols)) + else: + parts.append("SELECT\n " + ",\n ".join(select_cols)) + + # FROM + anchor = plan.anchor_source + if anchor: + from_ref = self._source_ref(anchor, sources) + parts.append(f"FROM {from_ref}") + + # JOINs + for join in plan.joins: + join_sql = self._build_join(join, sources, plan) + parts.append(join_sql) + + # WHERE + if plan.where_filters: + where_clauses = [ + self._qualify_filter(f, sources, plan) for f in plan.where_filters + ] + parts.append("WHERE " + " AND ".join(where_clauses)) + + # GROUP BY (skip for dimension-only queries — DISTINCT handles dedup) + dim_exprs = self._build_group_by_exprs(plan, sources) + if dim_exprs and has_measures: + parts.append("GROUP BY " + ", ".join(dim_exprs)) + + # HAVING — expand predefined measure references to aggregate expressions + if plan.having_filters: + having_clauses = [ + self._expand_having_filter(f, plan, sources) + for f in plan.having_filters + ] + parts.append("HAVING " + " AND ".join(having_clauses)) + + # ORDER BY + if plan.order_by: + order_parts = [] + for ob in plan.order_by: + field = self._resolve_order_field(ob.field, plan) + direction = ( + ob.direction.upper() if ob.direction.lower() != "asc" else "" + ) + order_parts.append(f"{field} {direction}".strip()) + parts.append("ORDER BY " + ", ".join(order_parts)) + elif dim_exprs: + parts.append( + "ORDER BY " + ", ".join(str(i) for i in range(1, len(dim_exprs) + 1)) + ) + + # LIMIT + if plan.limit is not None: + parts.append(f"LIMIT {plan.limit}") + + return "\n".join(parts) + + # ── Path B: Aggregate locality ───────────────────────────────────── + + def _generate_with_locality( + self, plan: ResolvedPlan, sources: dict[str, SourceDefinition] + ) -> str: + parts: list[str] = [] + # Only locality CTEs — source CTEs are concatenated by generate() so + # the native-dialect source body never reaches the postgres transpile. + cte_parts: list[str] = [] + + # All dimension key expressions + all_dim_keys = self._build_dim_key_exprs(plan, sources) + + # Compute per-CTE reachable dimensions + safe_adj = self._build_safe_adjacency(plan) + per_cte_dims: dict[str, list[dict]] = {} + for group in plan.measure_groups: + reachable = [] + for dk in all_dim_keys: + dim_sources = self._parser.extract_source_refs(dk["expr"]) + can_reach = True + for ds in dim_sources: + if ds == group.source_name: + continue + path = self._find_join_path_steps(group.source_name, ds, safe_adj) + if not path and ds != group.source_name: + can_reach = False + break + if can_reach: + reachable.append(dk) + per_cte_dims[group.source_name] = reachable + + # Validate: every dimension must be reachable from at least one CTE + for dk in all_dim_keys: + reachable_from_any = any( + any(d["alias"] == dk["alias"] for d in per_cte_dims[g.source_name]) + for g in plan.measure_groups + ) + if not reachable_from_any: + dim_sources = self._parser.extract_source_refs(dk["expr"]) + source_names = [g.source_name for g in plan.measure_groups] + raise ValueError( + f"Aggregate locality cannot safely reach '{', '.join(dim_sources)}' from " + f"any measure source ({', '.join(source_names)}) without traversing one_to_many edges" + ) + + # Shared dimensions: reachable from ALL CTEs (used for JOIN condition) + shared_dim_aliases = None + for group in plan.measure_groups: + aliases = {dk["alias"] for dk in per_cte_dims[group.source_name]} + if shared_dim_aliases is None: + shared_dim_aliases = aliases + else: + shared_dim_aliases &= aliases + shared_dim_aliases = shared_dim_aliases or set() + shared_dims = [dk for dk in all_dim_keys if dk["alias"] in shared_dim_aliases] + + # Validate grain consistency: asymmetric dims cause FULL JOIN fan-out + if len(plan.measure_groups) > 1: + for group in plan.measure_groups: + cte_dim_aliases = { + dk["alias"] for dk in per_cte_dims[group.source_name] + } + non_shared = cte_dim_aliases - shared_dim_aliases + if non_shared: + for other_group in plan.measure_groups: + if other_group.source_name == group.source_name: + continue + other_aliases = { + dk["alias"] for dk in per_cte_dims[other_group.source_name] + } + missing_from_other = non_shared - other_aliases + if missing_from_other: + raise ValueError( + f"Asymmetric dimension grain in chasm trap: " + f"'{group.source_name}' groups by {sorted(cte_dim_aliases)} " + f"but '{other_group.source_name}' cannot reach " + f"{sorted(missing_from_other)}. " + f"FULL JOIN on shared dimensions ({sorted(shared_dim_aliases)}) " + f"would fan out '{other_group.source_name}' measures across " + f"the extra dimensions, producing incorrect results. " + f"Remove the asymmetric dimensions or query each measure " + f"source separately." + ) + + # Collect all names that could collide with CTE aliases + reserved_names: set[str] = set(sources.keys()) + for name in plan.sources_used: + src = sources.get(name) + if src and src.is_sql_source: + reserved_names.add(name) + assigned_aliases: set[str] = set() + + # Pre-aggregation CTEs for each measure group + cte_aliases: list[str] = [] + for group in plan.measure_groups: + alias = f"{group.source_name}_agg" + # Resolve collisions with existing source/CTE names + if alias in reserved_names or alias in assigned_aliases: + suffix = 1 + while ( + f"{group.source_name}_agg_{suffix}" in reserved_names + or f"{group.source_name}_agg_{suffix}" in assigned_aliases + ): + suffix += 1 + alias = f"{group.source_name}_agg_{suffix}" + cte_aliases.append(alias) + assigned_aliases.add(alias) + cte_dim_keys = per_cte_dims[group.source_name] + cte_sql = self._build_agg_cte(group, plan, sources, cte_dim_keys) + cte_parts.append(f"{alias} AS (\n{cte_sql}\n)") + + if cte_parts: + parts.append("WITH " + ",\n".join(cte_parts)) + + # Final SELECT combining CTEs + select_cols, derived_inline_map = self._build_locality_select( + plan, cte_aliases, all_dim_keys, per_cte_dims + ) + parts.append("SELECT\n " + ",\n ".join(select_cols)) + + # FROM + JOINs between CTEs + cte_join_type = "FULL JOIN" if plan.include_empty else "JOIN" + if cte_aliases: + parts.append(f"FROM {cte_aliases[0]}") + for i, alias in enumerate(cte_aliases[1:], 1): + join_conditions = [] + for dk in shared_dims: + if i == 1: + lhs = f"{cte_aliases[0]}.{dk['alias']}" + else: + coalesce_args = ", ".join( + f"{cte_aliases[j]}.{dk['alias']}" for j in range(i) + ) + lhs = f"COALESCE({coalesce_args})" + join_conditions.append(f"{lhs} = {alias}.{dk['alias']}") + if join_conditions: + parts.append( + f"{cte_join_type} {alias} ON " + " AND ".join(join_conditions) + ) + else: + parts.append(f"CROSS JOIN {alias}") + + # HAVING filters applied as WHERE on outer query (no GROUP BY at this level) + if plan.having_filters: + measure_cte_map: dict[str, str] = {} + for i, plan_group in enumerate(plan.measure_groups): + for m in plan_group.measures: + measure_cte_map[m.name] = cte_aliases[i] + + having_clauses = [] + for f in plan.having_filters: + resolved_f = self._resolve_having_for_locality( + f, plan, measure_cte_map, derived_inline_map + ) + having_clauses.append(resolved_f) + parts.append("WHERE " + " AND ".join(having_clauses)) + + # ORDER BY + if plan.order_by: + order_parts = [] + for ob in plan.order_by: + field = self._resolve_order_field(ob.field, plan) + direction = ( + ob.direction.upper() if ob.direction.lower() != "asc" else "" + ) + order_parts.append(f"{field} {direction}".strip()) + parts.append("ORDER BY " + ", ".join(order_parts)) + elif all_dim_keys: + parts.append( + "ORDER BY " + ", ".join(str(i) for i in range(1, len(all_dim_keys) + 1)) + ) + + # LIMIT + if plan.limit is not None: + parts.append(f"LIMIT {plan.limit}") + + return "\n".join(parts) + + def _build_agg_cte( + self, + group: MeasureGroup, + plan: ResolvedPlan, + sources: dict[str, SourceDefinition], + dim_keys: list[dict], + ) -> str: + """Build a pre-aggregation CTE for one measure group.""" + parts: list[str] = [] + + # SELECT: dimension keys + aggregated measures + select_cols: list[str] = [] + for dk in dim_keys: + select_cols.append(f"{dk['expr']} AS {dk['alias']}") + + for m in group.measures: + measure_expr = self._build_measure_expr(m, sources) + select_cols.append(f"{measure_expr} AS {m.name}") + + parts.append(" SELECT\n " + ",\n ".join(select_cols)) + + # FROM the measure's source + from_ref = self._source_ref(group.source_name, sources) + parts.append(f" FROM {from_ref}") + + joined_sources = {group.source_name} + target_sources = self._collect_cte_target_sources(group, plan, dim_keys) + join_steps = self._build_group_join_steps( + group.source_name, target_sources, plan + ) + for join, next_source in join_steps: + if next_source in joined_sources: + continue + join_ref = self._source_ref(next_source, sources) + on_clause = _build_on_clause( + join.from_source, join.from_column, join.to_source, join.to_column + ) + parts.append(f" JOIN {join_ref} ON {on_clause}") + joined_sources.add(next_source) + + # WHERE filters — only push down filters whose sources are within this CTE + if plan.where_filters: + relevant_where = [] + for f in plan.where_filters: + filter_sources = self._parser.extract_source_refs(f) + if not filter_sources or filter_sources <= joined_sources: + relevant_where.append(f) + if relevant_where: + where_clauses = [ + self._qualify_filter(f, sources, plan) for f in relevant_where + ] + parts.append(" WHERE " + " AND ".join(where_clauses)) + + # GROUP BY dimension keys + if dim_keys: + group_by = ", ".join(dk["expr"] for dk in dim_keys) + parts.append(f" GROUP BY {group_by}") + + # HAVING filters are NOT placed here — they go on the outer query + # after the FULL JOIN to ensure correct semantics across CTEs + + return "\n".join(parts) + + def _build_dim_key_exprs( + self, plan: ResolvedPlan, sources: dict[str, SourceDefinition] + ) -> list[dict]: + """Build dimension key expressions for aggregate locality CTEs.""" + colliding = self._colliding_dim_leaves(plan.dimensions) + result = [] + for dim in plan.dimensions: + expr = self._dim_expr(dim, sources) + result.append( + {"expr": expr, "alias": self._dimension_alias(dim, colliding)} + ) + return result + + def _build_locality_select( + self, + plan: ResolvedPlan, + cte_aliases: list[str], + dim_keys: list[dict], + per_cte_dims: dict[str, list[dict]] | None = None, + ) -> list[str]: + """Build SELECT columns for the final query combining pre-aggregated CTEs.""" + cols: list[str] = [] + + # Build mapping from CTE alias to source name + cte_source_names = [g.source_name for g in plan.measure_groups] + + # Dimensions: COALESCE across CTEs that have the dim + for dk in dim_keys: + if per_cte_dims: + available_ctes = [ + alias + for alias, src_name in zip(cte_aliases, cte_source_names) + if any( + d["alias"] == dk["alias"] + for d in per_cte_dims.get(src_name, []) + ) + ] + else: + available_ctes = cte_aliases + + if len(available_ctes) > 1: + coalesce_args = ", ".join(f"{a}.{dk['alias']}" for a in available_ctes) + cols.append(f"COALESCE({coalesce_args}) AS {dk['alias']}") + elif len(available_ctes) == 1: + cols.append(f"{available_ctes[0]}.{dk['alias']} AS {dk['alias']}") + else: + cols.append(f"NULL AS {dk['alias']}") + + # Non-derived measures from CTEs + measure_cte_map: dict[str, str] = {} + for i, plan_group in enumerate(plan.measure_groups): + alias = cte_aliases[i] + for m in plan_group.measures: + cols.append(f"{alias}.{m.name}") + measure_cte_map[m.name] = alias + + # Derived measures — wrap cross-CTE refs in COALESCE for FULL JOIN NULL safety. + # Process in order (topological) so that derived-of-derived gets inlined. + derived_inline_map: dict[str, str] = {} # measure_name → fully inlined expr + for m in plan.measures: + if m.is_derived: + # Collect all transitive CTE aliases used by this derived measure + dep_ctes: set[str | None] = set() + for d in m.depends_on: + if d in measure_cte_map: + dep_ctes.add(measure_cte_map.get(d)) + elif d in derived_inline_map: + # Derived dep — inherit its CTE references + # (cross-CTE if the inlined dep already has multiple CTEs) + dep_ctes.add("__derived__") + use_coalesce = len(dep_ctes - {None}) > 1 + # Detect which deps are used as divisors + divisor_deps = self._find_divisor_deps(m.expr, m.depends_on) + replacements = {} + for dep_name in m.depends_on: + if dep_name in measure_cte_map: + ref = f"{measure_cte_map[dep_name]}.{dep_name}" + if use_coalesce: + if dep_name in divisor_deps: + ref = f"NULLIF(COALESCE({measure_cte_map[dep_name]}.{dep_name}, 0), 0)" + else: + ref = f"COALESCE({ref}, 0)" + replacements[dep_name] = ref + elif dep_name in derived_inline_map: + # Derived dependency — inline its already-resolved expression + ref = derived_inline_map[dep_name] + if dep_name in divisor_deps: + ref = f"NULLIF({ref}, 0)" + replacements[dep_name] = ref + expr = self._substitute_measure_refs(m.expr, replacements) + derived_inline_map[m.name] = expr + cols.append(f"{expr} AS {m.name}") + + return cols, derived_inline_map + + def _find_divisor_deps(self, expr: str, depends_on: list[str]) -> set[str]: + """Find which dependency names appear as divisors in the expression.""" + divisors: set[str] = set() + try: + tree = sqlglot.parse_one(f"SELECT {expr}", read=self.dialect) + for div_node in tree.find_all(exp.Div): + rhs = div_node.right + if isinstance(rhs, exp.Column) and not rhs.table: + if rhs.name in depends_on: + divisors.add(rhs.name) + except Exception: + logger.debug("Failed to parse expression for divisor detection: %s", expr) + return divisors + + # ── Shared helpers ───────────────────────────────────────────────── + + def _build_source_ctes( + self, plan: ResolvedPlan, sources: dict[str, SourceDefinition] + ) -> list[str]: + """Build CTEs for SQL-based sources, flattening inner WITH clauses.""" + ctes = [] + for name in plan.sources_used: + src = sources.get(name) + if src and src.is_sql_source and src.sql: + sql_text = src.sql.strip() + inner_ctes, final_select = self._extract_inner_ctes(sql_text) + if inner_ctes: + # Promote inner CTEs with prefixed names + renames: list[tuple[str, str]] = [] + for inner_name, inner_body in inner_ctes: + prefixed = f"{name}__{inner_name}" + renames.append((inner_name, prefixed)) + + # Apply all renames to inner CTE bodies and final SELECT + promoted: list[tuple[str, str]] = [] + for inner_name, inner_body in inner_ctes: + renamed_body = inner_body + for old, new in renames: + renamed_body = self._rename_table_ref( + renamed_body, old, new + ) + prefixed = f"{name}__{inner_name}" + promoted.append((prefixed, renamed_body)) + + renamed_final = final_select + for old, new in renames: + renamed_final = self._rename_table_ref(renamed_final, old, new) + + for prefixed, body in promoted: + ctes.append(f"{prefixed} AS (\n{body}\n)") + ctes.append(f"{name} AS (\n{renamed_final}\n)") + else: + ctes.append(f"{name} AS (\n{sql_text}\n)") + return ctes + + def _extract_inner_ctes(self, sql_text: str) -> tuple[list[tuple[str, str]], str]: + """Parse SQL and extract CTEs if present. + + Source SQL is user-provided in the target dialect, so we parse and + serialize using ``self.dialect`` to avoid lossy cross-dialect + conversion (e.g. Snowflake DATEDIFF → postgres AGE). + """ + try: + tree = sqlglot.parse_one(sql_text, read=self.dialect) + with_clause = tree.find(exp.With) + if not with_clause: + return [], sql_text + cte_list = [] + for cte in with_clause.expressions: + cte_name = cte.alias + cte_body = cte.this.sql(dialect=self.dialect) + cte_list.append((cte_name, cte_body)) + # Get the main query without the WITH clause + tree_copy = tree.copy() + w = tree_copy.find(exp.With) + if w: + w.pop() + final_select = tree_copy.sql(dialect=self.dialect) + return cte_list, final_select + except Exception: + logger.debug( + "Failed to extract inner CTEs from SQL source, treating as raw SQL" + ) + return [], sql_text + + def _rename_table_ref(self, sql_text: str, old_name: str, new_name: str) -> str: + """Rename table references in SQL text. + + Uses ``self.dialect`` for parsing/serialization to preserve + dialect-specific constructs in user-provided source SQL. + """ + try: + tree = sqlglot.parse_one(sql_text, read=self.dialect) + + def _rename(node): + if ( + isinstance(node, exp.Table) + and node.name == old_name + and not node.db + ): + alias = node.args.get("alias") or exp.TableAlias( + this=exp.to_identifier(old_name) + ) + return exp.Table(this=exp.to_identifier(new_name), alias=alias) + return node + + transformed = tree.transform(_rename) + return transformed.sql(dialect=self.dialect) + except Exception: + logger.debug( + "AST-based table rename failed for '%s' -> '%s', falling back to regex", + old_name, + new_name, + ) + import re + + return re.sub(rf"\b{re.escape(old_name)}\b", new_name, sql_text) + + def _build_select_columns( + self, plan: ResolvedPlan, sources: dict[str, SourceDefinition] + ) -> list[str]: + """Build SELECT columns for simple (non-locality) path.""" + colliding = self._colliding_dim_leaves(plan.dimensions) + cols: list[str] = [] + + # Dimensions + for dim in plan.dimensions: + expr = self._dim_expr(dim, sources) + cols.append(f"{expr} AS {self._dimension_alias(dim, colliding)}") + + # Build map of measure names to their expressions (for derived measures) + measure_expr_map: dict[str, str] = {} + for m in plan.measures: + if not m.is_derived: + measure_expr = self._build_measure_expr(m, sources) + cols.append(f"{measure_expr} AS {m.name}") + measure_expr_map[m.name] = measure_expr + else: + # Derived: substitute dependencies with their expressions + expr = self._substitute_measure_refs( + m.expr, + { + dep: measure_expr_map[dep] + for dep in m.depends_on + if dep in measure_expr_map + }, + ) + cols.append(f"{expr} AS {m.name}") + measure_expr_map[m.name] = expr + + return cols + + def _build_measure_expr( + self, m: ResolvedMeasure, sources: dict[str, SourceDefinition] + ) -> str: + """Build the SQL expression for a single measure.""" + expr = self._qualify_expr(m.expr, sources) + + # Translate custom functions (median, percentile, count_distinct) + expr = self._translate_custom_funcs(expr) + + if m.filter: + filter_sql = self._qualify_expr(m.filter, sources) + return self._apply_measure_filter(expr, filter_sql) + + return expr + + def _apply_measure_filter(self, expr: str, filter_sql: str) -> str: + """Apply a measure-level filter by injecting CASE WHEN into each aggregate.""" + try: + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", read=self.dialect + ) + select_expr = tree.expressions[0] + if isinstance(select_expr, exp.Alias): + select_expr = select_expr.this + + filter_cond = sqlglot.parse_one( + f"SELECT {filter_sql}", read=self.dialect + ).expressions[0] + + def _make_case(inner_node): + return exp.Case( + ifs=[exp.If(this=filter_cond.copy(), true=inner_node.copy())] + ) + + def _inject_filter(node): + """Walk the AST and inject CASE WHEN filter into each aggregate's argument.""" + if isinstance(node, exp.AggFunc): + if isinstance(node, exp.Count): + count_arg = node.this + if ( + isinstance(count_arg, exp.Distinct) + and count_arg.expressions + ): + inner = count_arg.expressions[0] + node.set( + "this", exp.Distinct(expressions=[_make_case(inner)]) + ) + return node + if node.this is not None: + node.set("this", _make_case(node.this)) + return node + return node + + transformed = select_expr.transform(_inject_filter) + result = transformed.sql(dialect=self.dialect) + if result != expr: + return result + except Exception: + logger.debug( + "Failed to inject filter into aggregates for measure: %s", expr + ) + + return f"CASE WHEN {filter_sql} THEN {expr} END" + + def _translate_custom_funcs(self, expr: str) -> str: + """Translate custom functions: median(), percentile(), count_distinct().""" + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", read=self.dialect + ) + + has_custom = False + has_custom = has_custom or any(True for _ in tree.find_all(exp.Median)) + for node in tree.find_all(exp.Anonymous): + if node.name.lower() in ("percentile", "count_distinct"): + has_custom = True + break + if not has_custom: + return expr + + def _replace(node): + if isinstance(node, exp.Median): + col_sql = node.this.sql(dialect=self.dialect) + return sqlglot.parse_one( + f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY {col_sql})", + read=self.dialect, + ).expressions[0] + if isinstance(node, exp.Anonymous) and node.name.lower() == "percentile": + if len(node.expressions) >= 2: + col_sql = node.expressions[0].sql(dialect=self.dialect) + p_sql = node.expressions[1].sql(dialect=self.dialect) + return sqlglot.parse_one( + f"SELECT PERCENTILE_CONT({p_sql}) WITHIN GROUP (ORDER BY {col_sql})", + read=self.dialect, + ).expressions[0] + if ( + isinstance(node, exp.Anonymous) + and node.name.lower() == "count_distinct" + ): + if node.expressions: + col_sql = node.expressions[0].sql(dialect=self.dialect) + return sqlglot.parse_one( + f"SELECT COUNT(DISTINCT {col_sql})", read=self.dialect + ).expressions[0] + return node + + transformed = tree.transform(_replace) + return transformed.expressions[0].sql(dialect=self.dialect) + + def _extract_outer_aggregate(self, expr: str) -> tuple[str | None, str | None]: + """Use AST to extract the outer aggregate function name and inner expression.""" + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", read=self.dialect + ) + select_expr = tree.expressions[0] + if isinstance(select_expr, exp.Alias): + select_expr = select_expr.this + if isinstance(select_expr, exp.AggFunc): + func_name = select_expr.sql_name() + inner = select_expr.this.sql(dialect=self.dialect) + return func_name, inner + if isinstance(select_expr, exp.Anonymous): + func_name = select_expr.name + if select_expr.expressions: + inner = select_expr.expressions[0].sql(dialect=self.dialect) + return func_name, inner + return None, None + + def _substitute_measure_refs(self, expr: str, replacements: dict[str, str]) -> str: + """Replace bare measure name references in an expression using AST transform.""" + if not replacements: + return expr + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", read=self.dialect + ) + + def _replace(node): + if ( + isinstance(node, exp.Column) + and not node.table + and node.name in replacements + ): + replacement_sql = replacements[node.name] + return sqlglot.parse_one( + f"SELECT {replacement_sql}", read=self.dialect + ).expressions[0] + return node + + transformed = tree.transform(_replace) + return transformed.expressions[0].sql(dialect=self.dialect) + + def _build_join( + self, + join: ResolvedJoin, + sources: dict[str, SourceDefinition], + plan: ResolvedPlan, + ) -> str: + join_type = "LEFT JOIN" if plan.include_empty else "JOIN" + # If to_source is an alias, resolve the actual source for the table ref + actual_source = self._alias_map.get(join.to_source, join.to_source) + if actual_source != join.to_source: + # This is an aliased join: JOIN actual_table AS alias + src = sources.get(actual_source) + if src and src.is_table_source and src.table: + join_ref = f"{src.table} AS {join.to_source}" + else: + join_ref = f"{actual_source} AS {join.to_source}" + else: + join_ref = self._source_ref(join.to_source, sources) + on_clause = _build_on_clause( + join.from_source, join.from_column, join.to_source, join.to_column + ) + return f"{join_type} {join_ref} ON {on_clause}" + + def _build_group_by_exprs( + self, plan: ResolvedPlan, sources: dict[str, SourceDefinition] + ) -> list[str]: + exprs = [] + for dim in plan.dimensions: + exprs.append(self._dim_expr(dim, sources)) + return exprs + + # SQLite strftime format strings for time truncation. + # None entries require special date arithmetic (handled in _sqlite_time_trunc). + _SQLITE_STRFTIME: dict[str, str | None] = { + "year": "%Y-01-01", + "month": "%Y-%m-01", + "day": "%Y-%m-%d", + "hour": "%Y-%m-%d %H:00:00", + "quarter": None, + "week": None, + } + + def _dim_expr( + self, dim: QueryDimension, sources: dict[str, SourceDefinition] + ) -> str: + """Build dimension expression, including time truncation and computed column expansion.""" + field = self._expand_computed_columns(dim.field, sources) + if dim.granularity: + return self._time_trunc(dim.granularity, field) + return field + + def _time_trunc(self, granularity: str, field: str) -> str: + """Generate dialect-appropriate time truncation expression.""" + g = granularity.lower() + if self.dialect == "sqlite": + return self._sqlite_time_trunc(g, field) + if self.dialect == "bigquery": + return f"DATE_TRUNC({field}, {g.upper()})" + if self.dialect == "mysql": + return self._mysql_time_trunc(g, field) + return f"DATE_TRUNC('{g}', {field})" + + def _sqlite_time_trunc(self, granularity: str, field: str) -> str: + """SQLite time truncation using strftime / date arithmetic.""" + fmt = self._SQLITE_STRFTIME.get(granularity) + if fmt is not None: + return f"DATE(STRFTIME('{fmt}', {field}))" + if granularity == "quarter": + return ( + f"DATE(STRFTIME('%Y', {field}) || '-' || " + f"PRINTF('%02d', ((CAST(STRFTIME('%m', {field}) AS INTEGER) - 1) / 3) * 3 + 1) || '-01')" + ) + if granularity == "week": + return f"DATE({field}, 'weekday 1', '-7 days')" + logger.warning( + "Unsupported SQLite granularity '%s', returning raw field", granularity + ) + return field + + _MYSQL_DATE_FORMAT: dict[str, str] = { + "year": "%Y-01-01", + "quarter": "%Y-01-01", + "month": "%Y-%m-01", + "week": "%Y-%m-%d", + "day": "%Y-%m-%d", + "hour": "%Y-%m-%d %H:00:00", + } + + def _mysql_time_trunc(self, granularity: str, field: str) -> str: + """MySQL time truncation using DATE_FORMAT / quarter arithmetic.""" + if granularity == "quarter": + return ( + f"DATE(CONCAT(YEAR({field}), '-', " + f"LPAD((QUARTER({field}) - 1) * 3 + 1, 2, '0'), '-01'))" + ) + if granularity == "week": + return f"DATE(DATE_SUB({field}, INTERVAL WEEKDAY({field}) DAY))" + fmt = self._MYSQL_DATE_FORMAT.get(granularity) + if fmt is not None: + return f"DATE(DATE_FORMAT({field}, '{fmt}'))" + logger.warning( + "Unsupported MySQL granularity '%s', returning raw field", granularity + ) + return field + + def _colliding_dim_leaves(self, dims: list[QueryDimension]) -> set[str]: + leaves = [d.field.split(".")[-1] if "." in d.field else d.field for d in dims] + return {leaf for leaf, count in Counter(leaves).items() if count > 1} + + def _dimension_alias( + self, dim: QueryDimension, colliding_leaves: set[str] | None = None + ) -> str: + leaf = dim.field.split(".")[-1] if "." in dim.field else dim.field + if colliding_leaves and leaf in colliding_leaves: + alias = dim.field.replace(".", "_") + else: + alias = leaf + if dim.granularity: + alias = f"{alias}_{dim.granularity}" + return alias + + def _resolve_order_field(self, field: str, plan: ResolvedPlan) -> str: + colliding = self._colliding_dim_leaves(plan.dimensions) + field_lower = field.lower() + for measure in plan.measures: + if field_lower == measure.name.lower(): + return measure.name + if field_lower == measure.expr.lower(): + return measure.name + if measure.qualified_ref and field_lower == measure.qualified_ref.lower(): + return measure.name + if ( + measure.source_name not in {"__derived__", ""} + and field_lower == f"{measure.source_name}.{measure.name}".lower() + ): + return measure.name + + for dim in plan.dimensions: + alias = self._dimension_alias(dim, colliding) + if field_lower == dim.field.lower() or field_lower == alias.lower(): + return alias + + raise ValueError( + f"ORDER BY field '{field}' is not a recognized measure or dimension in this query" + ) + + def _collect_cte_target_sources( + self, + group: MeasureGroup, + plan: ResolvedPlan, + dim_keys: list[dict] | None = None, + ) -> set[str]: + """Collect sources needed for this CTE — only safely reachable ones.""" + safe_adj = self._build_safe_adjacency(plan) + target_sources: set[str] = set() + + # Only include dimension sources reachable via safe edges + dims_to_check = ( + dim_keys + if dim_keys is not None + else [{"expr": dim.field} for dim in plan.dimensions] + ) + for dk in dims_to_check: + dim_sources = self._parser.extract_source_refs(dk["expr"]) + for ds in dim_sources: + if ds == group.source_name: + target_sources.add(ds) + continue + path = self._find_join_path_steps(group.source_name, ds, safe_adj) + if path is not None: + target_sources.add(ds) + + known_sources = set(target_sources) + known_sources.add(group.source_name) + for filter_expr in plan.where_filters: + filter_sources = self._parser.extract_source_refs(filter_expr) + if not filter_sources or filter_sources & known_sources: + for fs in filter_sources: + if fs == group.source_name: + target_sources.add(fs) + continue + path = self._find_join_path_steps(group.source_name, fs, safe_adj) + if path is not None: + target_sources.add(fs) + known_sources.add(fs) + + # Include sources from measure-level filters + for m in group.measures: + if m.filter: + filter_sources = self._parser.extract_source_refs(m.filter) + for fs in filter_sources: + if fs == group.source_name: + target_sources.add(fs) + continue + path = self._find_join_path_steps(group.source_name, fs, safe_adj) + if path is not None: + target_sources.add(fs) + known_sources.add(fs) + + # Include sources from measure expressions themselves + for m in group.measures: + measure_sources = self._parser.extract_source_refs(m.expr) + for ms in measure_sources: + if ms == group.source_name or ms in known_sources: + target_sources.add(ms) + continue + path = self._find_join_path_steps(group.source_name, ms, safe_adj) + if path is not None: + target_sources.add(ms) + known_sources.add(ms) + + return target_sources + + def _build_safe_adjacency( + self, plan: ResolvedPlan + ) -> dict[str, list[tuple[str, ResolvedJoin]]]: + """Build adjacency graph using only many_to_one and one_to_one edges.""" + adjacency: dict[str, list[tuple[str, ResolvedJoin]]] = {} + for join in plan.joins: + if join.relationship in ("many_to_one", "one_to_one"): + adjacency.setdefault(join.from_source, []).append( + (join.to_source, join) + ) + if RELATIONSHIP_INVERSE[join.relationship] in ("many_to_one", "one_to_one"): + adjacency.setdefault(join.to_source, []).append( + (join.from_source, join) + ) + return adjacency + + def _resolve_having_for_locality( + self, + filter_expr: str, + plan: ResolvedPlan, + measure_cte_map: dict[str, str], + derived_expr_map: dict[str, str] | None = None, + ) -> str: + """Rewrite HAVING filter to reference CTE output columns. + + Handles: raw aggregates (sum(orders.amount)), predefined measure refs + (orders.revenue), bare measure names, derived measure names (inlined), + and case-insensitive matching. + """ + # Build comprehensive replacement map + replacement_map: dict[str, str] = {} + for m in plan.measures: + if m.is_derived: + # For derived measures, inline the full expression so the outer + # WHERE clause doesn't reference a SELECT alias (which is illegal). + if derived_expr_map and m.name in derived_expr_map: + replacement_map[m.name.lower()] = f"({derived_expr_map[m.name]})" + continue + cte_alias = measure_cte_map.get(m.name) + if not cte_alias: + continue + # In multi-CTE (FULL JOIN) mode, NULL from unmatched rows should + # be treated as 0 so that filters like "count(x) = 0" work. + if len(plan.measure_groups) > 1: + cte_ref = f"COALESCE({cte_alias}.{m.name}, 0)" + else: + cte_ref = f"{cte_alias}.{m.name}" + replacement_map[m.expr.lower()] = cte_ref + if m.qualified_ref: + replacement_map[m.qualified_ref.lower()] = cte_ref + elif m.source_name and m.source_name not in ("__derived__", ""): + replacement_map[f"{m.source_name}.{m.name}".lower()] = cte_ref + replacement_map[m.name.lower()] = cte_ref + + # AST-based rewriting for robustness + try: + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(filter_expr)}", + read=self.dialect, + ) + + def _rewrite(node): + if isinstance(node, (exp.AggFunc, exp.Anonymous)): + node_sql = node.sql(dialect=self.dialect).lower() + if node_sql in replacement_map: + return sqlglot.parse_one( + f"SELECT {replacement_map[node_sql]}", read=self.dialect + ).expressions[0] + if isinstance(node, exp.Column): + if node.table: + ref = f"{node.table}.{node.name}".lower() + if ref in replacement_map: + return sqlglot.parse_one( + f"SELECT {replacement_map[ref]}", read=self.dialect + ).expressions[0] + if not node.table and node.name.lower() in replacement_map: + return sqlglot.parse_one( + f"SELECT {replacement_map[node.name.lower()]}", + read=self.dialect, + ).expressions[0] + return node + + transformed = tree.transform(_rewrite) + return transformed.expressions[0].sql(dialect=self.dialect) + except Exception: + logger.debug( + "AST-based HAVING rewrite failed for locality filter, falling back to regex: %s", + filter_expr, + ) + import re as _re + + result = filter_expr + for pattern, replacement in sorted( + replacement_map.items(), key=lambda x: -len(x[0]) + ): + result = _re.sub( + _re.escape(pattern), replacement, result, flags=_re.IGNORECASE + ) + return result + + def _build_group_join_steps( + self, + source_name: str, + target_sources: set[str], + plan: ResolvedPlan, + ) -> list[tuple[ResolvedJoin, str]]: + if not target_sources: + return [] + + adjacency: dict[str, list[tuple[str, ResolvedJoin]]] = {} + for join in plan.joins: + if join.relationship in ("many_to_one", "one_to_one"): + adjacency.setdefault(join.from_source, []).append( + (join.to_source, join) + ) + if RELATIONSHIP_INVERSE[join.relationship] in ("many_to_one", "one_to_one"): + adjacency.setdefault(join.to_source, []).append( + (join.from_source, join) + ) + + steps: list[tuple[ResolvedJoin, str]] = [] + seen_edges: set[tuple[str, str, str, str]] = set() + + for target in sorted(target_sources - {source_name}): + path_steps = self._find_join_path_steps(source_name, target, adjacency) + if not path_steps: + raise ValueError( + f"Aggregate locality cannot safely reach '{target}' from " + f"'{source_name}' without traversing one_to_many edges" + ) + for join, next_source in path_steps: + edge_key = ( + join.from_source, + join.to_source, + join.from_column, + join.to_column, + ) + if edge_key in seen_edges: + continue + seen_edges.add(edge_key) + steps.append((join, next_source)) + + return steps + + def _find_join_path_steps( + self, + start: str, + target: str, + adjacency: dict[str, list[tuple[str, ResolvedJoin]]], + ) -> list[tuple[ResolvedJoin, str]]: + if start == target: + return [] + + queue = [start] + parents: dict[str, tuple[str | None, ResolvedJoin | None]] = { + start: (None, None) + } + + while queue: + current = queue.pop(0) + if current == target: + break + + for next_source, join in adjacency.get(current, []): + if next_source in parents: + continue + parents[next_source] = (current, join) + queue.append(next_source) + + if target not in parents: + return [] + + steps: list[tuple[ResolvedJoin, str]] = [] + current = target + while current != start: + parent, join = parents[current] + if parent is None or join is None: + break + steps.append((join, current)) + current = parent + + steps.reverse() + return steps + + def _source_ref(self, name: str, sources: dict[str, SourceDefinition]) -> str: + """Get the FROM reference for a source (table or CTE name).""" + qname = _qi(name) + src = sources.get(name) + if not src and name in self._alias_map: + actual_name = self._alias_map[name] + actual_src = sources.get(actual_name) + if actual_src is None: + raise ValueError( + f"Cannot generate SQL: alias '{name}' refers to source " + f"'{actual_name}', which is not defined" + ) + if actual_src.is_table_source and actual_src.table: + return f"{actual_src.table} AS {qname}" + if actual_src.is_sql_source: + return f"{_qi(actual_name)} AS {qname}" + return f"{_qi(actual_name)} AS {qname}" + if not src: + raise ValueError(f"Cannot generate SQL: source '{name}' is not defined") + if src.is_sql_source: + return qname # references the CTE + return f"{src.table} AS {qname}" if src.table else qname + + # ── Computed column expansion ───────────────────────────────────── + + def _get_computed_col_map( + self, sources: dict[str, SourceDefinition] + ) -> dict[str, str]: + """Get or build the computed column map: {"source.col": "(qualified_expr)"}.""" + cache_key = id(sources) + if getattr(self, "_computed_cache_key", None) != cache_key: + self._computed_col_map = self._build_computed_col_map(sources) + self._computed_cache_key = cache_key + return self._computed_col_map + + def _build_computed_col_map( + self, sources: dict[str, SourceDefinition] + ) -> dict[str, str]: + """Build a lookup from 'source.column' to qualified expression for computed columns.""" + result: dict[str, str] = {} + for src_name, src in sources.items(): + col_names = {c.name for c in src.columns} + for col in src.columns: + if col.expr is None: + continue + qualified = self._qualify_bare_refs_in_expr( + col.expr, src_name, col_names + ) + result[f"{src_name}.{col.name}"] = f"({qualified})" + return result + + def _qualify_bare_refs_in_expr( + self, expr: str, source_name: str, col_names: set[str] + ) -> str: + """Qualify bare column references in a computed column expression with the source name.""" + try: + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", read=self.dialect + ) + + def _qualify(node: exp.Expression) -> exp.Expression: + if ( + isinstance(node, exp.Column) + and not node.table + and node.name in col_names + ): + return exp.Column( + this=node.this.copy(), + table=exp.to_identifier(source_name), + ) + return node + + transformed = tree.transform(_qualify) + return transformed.expressions[0].sql(dialect=self.dialect) + except Exception: + logger.debug( + "AST-based bare ref qualification failed for expr '%s' on source '%s'", + expr, + source_name, + ) + return expr + + def _expand_computed_columns( + self, expr: str, sources: dict[str, SourceDefinition] + ) -> str: + """Expand computed column references to their underlying expressions.""" + computed_map = self._get_computed_col_map(sources) + if not computed_map: + return expr + + try: + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", read=self.dialect + ) + + changed = False + + def _replace(node: exp.Expression) -> exp.Expression: + nonlocal changed + if isinstance(node, exp.Column) and node.table: + qualified = f"{node.table}.{node.name}" + if qualified in computed_map: + changed = True + return sqlglot.parse_one( + f"SELECT {computed_map[qualified]}", read=self.dialect + ).expressions[0] + return node + + transformed = tree.transform(_replace) + if changed: + return transformed.expressions[0].sql(dialect=self.dialect) + except Exception: + logger.debug("AST-based computed column expansion failed for: %s", expr) + + return expr + + def _qualify_expr(self, expr: str, sources: dict[str, SourceDefinition]) -> str: + """Expand computed column references in expressions.""" + return self._expand_computed_columns(expr, sources) + + def _qualify_filter( + self, f: str, sources: dict[str, SourceDefinition], plan: ResolvedPlan + ) -> str: + """Expand computed column references in WHERE filters.""" + return self._expand_computed_columns(f, sources) + + def _expand_having_filter( + self, f: str, plan: ResolvedPlan, sources: dict[str, SourceDefinition] + ) -> str: + """Expand predefined measure references in HAVING filters to aggregate expressions. + + e.g., 'orders.revenue > 1000' → 'SUM(orders.amount) > 1000' + when revenue is a predefined measure with expr='sum(amount)'. + """ + # Build a map of qualified measure ref → SQL aggregate expression + measure_expr_map: dict[str, str] = {} + for m in plan.measures: + if m.source_name and m.source_name != "__derived__": + if not m.is_derived: + if m.qualified_ref: + measure_expr_map[m.qualified_ref] = self._build_measure_expr( + m, sources + ) + qualified_ref = f"{m.source_name}.{m.name}" + measure_expr_map[qualified_ref] = self._build_measure_expr( + m, sources + ) + # Also map bare measure name for unqualified references + if not m.is_derived: + measure_expr_map[m.name] = self._build_measure_expr(m, sources) + + if not measure_expr_map: + return f + + # Use AST to find and replace column references matching measure names + try: + tree = sqlglot.parse_one( + f"SELECT * WHERE {quote_reserved_identifiers(f)}", + dialect=self.dialect, + ) + where = tree.find(exp.Where) + if not where: + return f + + changed = False + + def _replace(node): + nonlocal changed + if isinstance(node, exp.Column): + table = node.table + col_name = node.name + if table: + qualified = f"{table}.{col_name}" + if qualified in measure_expr_map: + changed = True + return sqlglot.parse_one( + f"SELECT {measure_expr_map[qualified]}", + read=self.dialect, + ).expressions[0] + elif col_name in measure_expr_map: + changed = True + return sqlglot.parse_one( + f"SELECT {measure_expr_map[col_name]}", + read=self.dialect, + ).expressions[0] + return node + + new_where = where.this.transform(_replace) + if changed: + return new_where.sql(dialect=self.dialect) + except Exception: + logger.debug( + "AST-based HAVING expansion failed, returning filter unchanged: %s", f + ) + return f + + def _transpile(self, outer_sql: str) -> str: + """Normalize the outer scaffold for the target dialect. + + Source CTEs are concatenated by generate() verbatim, so only the + engine-generated outer scaffold (which embeds user-authored expr: + fragments already in self.dialect) reaches this function. Reading and + writing in self.dialect preserves dialect-specific constructs + (TIMESTAMP_SUB, DATEADD, APPROX_COUNT_DISTINCT, etc.) that a + postgres-round-trip would mangle. + """ + if self.dialect == "postgres": + return outer_sql + try: + # Quote reserved-word identifiers so target dialect parsers do not + # confuse them with keywords (e.g. Snowflake's SAMPLE, QUALIFY). + quoted_outer = quote_reserved_identifiers(outer_sql) + results = sqlglot.transpile( + quoted_outer, read=self.dialect, write=self.dialect + ) + return results[0] if results else outer_sql + except Exception: + logger.debug( + "Outer transpile in '%s' failed; returning un-normalized outer", + self.dialect, + ) + return outer_sql diff --git a/python/klo-sl/semantic_layer/graph.py b/python/klo-sl/semantic_layer/graph.py new file mode 100644 index 00000000..b37b54d7 --- /dev/null +++ b/python/klo-sl/semantic_layer/graph.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +import heapq +import logging +from dataclasses import dataclass, field + +from semantic_layer.models import SourceDefinition + +# DIALECT CONVENTION: +# YAML-authored join `on:` clauses may contain dialect-specific casts +# (e.g. BigQuery `SAFE_CAST(x AS INT64)`). `_parse_on` parses them with +# `read=self.dialect` so the AST reflects the author's intent. + +logger = logging.getLogger(__name__) + + +RELATIONSHIP_INVERSE = { + "many_to_one": "one_to_many", + "one_to_many": "many_to_one", + "one_to_one": "one_to_one", +} + + +@dataclass +class JoinEdge: + from_source: str + to_source: str + from_column: str + to_column: str + relationship: str + alias: str | None = None + + +@dataclass +class JoinPath: + edges: list[JoinEdge] + has_one_to_many: bool = False + is_ambiguous: bool = False + + @property + def source_names(self) -> list[str]: + if not self.edges: + return [] + names = [self.edges[0].from_source] + for e in self.edges: + names.append(e.to_source) + return names + + +@dataclass +class JoinTree: + edges: list[JoinEdge] = field(default_factory=list) + sources: set[str] = field(default_factory=set) + has_one_to_many: bool = False + + +class JoinGraph: + def __init__( + self, + sources: dict[str, SourceDefinition], + *, + dialect: str = "postgres", + ): + self.sources = sources + self.dialect = dialect + self.adjacency: dict[str, list[JoinEdge]] = {} + + def build(self) -> None: + # alias_name → actual source name + self.alias_map: dict[str, str] = {} + + for name in self.sources: + self.adjacency.setdefault(name, []) + + for source in self.sources.values(): + for join in source.joins: + from_col, to_col = self._parse_on(join.on, join.to) + target_name = join.alias if join.alias else join.to + + if join.alias: + self.alias_map[join.alias] = join.to + + # Forward edge: source → alias (or target) + fwd = JoinEdge( + from_source=source.name, + to_source=target_name, + from_column=from_col, + to_column=to_col, + relationship=join.relationship, + alias=join.alias, + ) + self.adjacency.setdefault(target_name, []) + self.adjacency[source.name].append(fwd) + + # Reverse edge: alias (or target) → source + rev = JoinEdge( + from_source=target_name, + to_source=source.name, + from_column=to_col, + to_column=from_col, + relationship=RELATIONSHIP_INVERSE[join.relationship], + alias=join.alias, + ) + self.adjacency[target_name].append(rev) + + def find_path(self, from_source: str, to_source: str) -> JoinPath | None: + """Dijkstra shortest path between two sources. + + Also detects ambiguity: if multiple equal-cost paths exist to the + destination, the returned ``JoinPath`` has ``is_ambiguous=True``. + """ + if from_source == to_source: + return JoinPath(edges=[], has_one_to_many=False) + if from_source not in self.adjacency or to_source not in self.adjacency: + return None + + # (cost, counter, current_node, path_edges) + counter = 0 + heap: list[tuple[int, int, str, list[JoinEdge]]] = [ + (0, counter, from_source, []) + ] + visited: set[str] = set() + first_path: JoinPath | None = None + first_cost: int | None = None + + while heap: + cost, _, current, path = heapq.heappop(heap) + + # All equal-cost alternatives exhausted — stop. + if first_cost is not None and cost > first_cost: + break + + if current == to_source: + has_o2m = any(e.relationship == "one_to_many" for e in path) + if first_path is None: + first_path = JoinPath(edges=path, has_one_to_many=has_o2m) + first_cost = cost + continue # don't visit dest — keep looking for alternatives + else: + first_path.is_ambiguous = True + return first_path + + if current in visited: + continue + visited.add(current) + + for edge in self.adjacency.get(current, []): + if edge.to_source not in visited: + counter += 1 + # Prefer safe (many_to_one / one_to_one) paths over one_to_many + edge_cost = ( + 1 if edge.relationship in ("many_to_one", "one_to_one") else 10 + ) + heapq.heappush( + heap, (cost + edge_cost, counter, edge.to_source, path + [edge]) + ) + + return first_path + + def resolve_join_tree( + self, source_names: set[str], root: str | None = None + ) -> JoinTree: + """ + Steiner tree approximation: pick root source, + find shortest path to each other source, merge paths. + """ + if len(source_names) <= 1: + return JoinTree(sources=source_names) + + if root is not None and root in source_names: + names = [root] + sorted(source_names - {root}) + else: + names = sorted(source_names) + root = names[0] + tree = JoinTree(sources={root}) + + for target in names[1:]: + if target in tree.sources: + continue + path = self.find_path(root, target) + if path is not None and path.is_ambiguous: + logger.warning( + "Ambiguous join path from '%s' to '%s': multiple equal-cost " + "paths exist. The engine picked one arbitrarily. Use join " + "aliases to disambiguate.", + root, + target, + ) + if path is None: + raise ValueError( + f"No join path from '{root}' to '{target}'. " + f"These sources are not connected in the join graph." + ) + for edge in path.edges: + if not any( + e.from_source == edge.from_source and e.to_source == edge.to_source + for e in tree.edges + ): + tree.edges.append(edge) + if edge.relationship == "one_to_many": + tree.has_one_to_many = True + tree.sources.add(edge.from_source) + tree.sources.add(edge.to_source) + + return tree + + def find_components(self) -> list[set[str]]: + """Partition the graph into connected components. + + Returns one set per component. For an empty graph, returns []. For a + fully connected graph, returns a single-element list. Used both for + validation (multi-component → warning) and for suggest(). + + Aliases and their base source are treated as belonging to the same + component, since alias-scoped queries resolve back to the base table. + """ + # Bidirectional alias↔base adjacency so BFS treats them as one node + alias_neighbors: dict[str, list[str]] = {} + for alias, base in self.alias_map.items(): + alias_neighbors.setdefault(alias, []).append(base) + alias_neighbors.setdefault(base, []).append(alias) + + components: list[set[str]] = [] + unvisited = set(self.adjacency) + while unvisited: + start = next(iter(unvisited)) + component: set[str] = set() + queue = [start] + while queue: + node = queue.pop() + if node in component: + continue + component.add(node) + for edge in self.adjacency.get(node, []): + if edge.to_source not in component: + queue.append(edge.to_source) + for neighbor in alias_neighbors.get(node, []): + if neighbor not in component: + queue.append(neighbor) + components.append(component) + unvisited -= component + return components + + def _parse_on(self, on_clause: str, target_source: str) -> tuple[str, str]: + """ + Parse join conditions into (from_columns, to_columns) using sqlglot AST. + + Single key: "customer_id = customers.id" → ("customer_id", "id") + Composite: "a = t.x AND b = t.y" → ("a,b", "x,y") + + Composite keys are stored as comma-separated strings. + """ + import sqlglot + from sqlglot import exp as _exp + from semantic_layer.parser import quote_reserved_identifiers + + quoted = quote_reserved_identifiers(on_clause) + tree = sqlglot.parse_one( + f"SELECT 1 FROM _a JOIN _b ON {quoted}", read=self.dialect + ) + + from_cols: list[str] = [] + to_cols: list[str] = [] + + for eq_node in tree.find_all(_exp.EQ): + left = eq_node.left + right = eq_node.right + + # Reject nested equality (e.g., "a = b = c") + if isinstance(left, _exp.EQ) or isinstance(right, _exp.EQ): + raise ValueError(f"Invalid join condition: '{on_clause}'") + + # Extract column name, stripping any source qualifier + def _col_name(node: _exp.Expression) -> str: + if isinstance(node, _exp.Column): + return node.name + return node.sql(dialect="postgres") + + from_cols.append(_col_name(left)) + to_cols.append(_col_name(right)) + + if not from_cols: + raise ValueError(f"Invalid join condition: '{on_clause}'") + + return ",".join(from_cols), ",".join(to_cols) diff --git a/python/klo-sl/semantic_layer/loader.py b/python/klo-sl/semantic_layer/loader.py new file mode 100644 index 00000000..c6956c19 --- /dev/null +++ b/python/klo-sl/semantic_layer/loader.py @@ -0,0 +1,210 @@ +from __future__ import annotations + +import logging +import re +from copy import deepcopy +from pathlib import Path + +import yaml + +from semantic_layer.manifest import ( + Manifest, + _description_sources, + _resolve_description, + project_manifest_entry, + validate_overlay, +) +from semantic_layer.models import ( + JoinDeclaration, + MeasureDefinition, + Segment, + SourceColumn, + SourceDefinition, +) + +logger = logging.getLogger(__name__) + +_SCHEMA_DIR = "_schema" + + +def _normalize_ws(s: str) -> str: + """Collapse whitespace for join deduplication.""" + return re.sub(r"\s+", " ", s.strip()) + + +class SourceLoader: + def __init__(self, sources_dir: str | Path): + self.sources_dir = Path(sources_dir) + + def load_all(self) -> dict[str, SourceDefinition]: + """Load all sources using two-tier architecture. + + 1. Load _schema/*.yaml manifest shards → project to SourceDefinitions + 2. Load *.yaml files outside _schema/ + - Has `sql` or `table` → standalone source (load directly) + - Otherwise → overlay (compose with matching manifest entry) + 3. Validate cross-references + """ + sources: dict[str, SourceDefinition] = {} + description_sources: dict[str, dict[str, str] | None] = {} + + # 1. Load manifest shards + schema_dir = self.sources_dir / _SCHEMA_DIR + if schema_dir.is_dir(): + for path in sorted(schema_dir.glob("*.yaml")): + manifest = self._load_manifest_shard(path) + for name, entry in manifest.tables.items(): + if name in sources: + raise ValueError( + f"Duplicate source name '{name}' in manifest shard {path}" + ) + sources[name] = project_manifest_entry(name, entry) + description_sources[name] = _description_sources( + entry.descriptions, entry.description, entry.db_description + ) + + # 2. Load files outside _schema/ + for path in sorted(self.sources_dir.rglob("*.yaml")): + # Skip manifest shards + if _is_in_schema_dir(path, self.sources_dir): + continue + + with open(path) as f: + data = yaml.safe_load(f) + + if not isinstance(data, dict): + continue + + name = data.get("name") + if not name: + continue + + if data.get("sql") or data.get("table"): + # Standalone source — load directly + if name in sources: + raise ValueError( + f"Duplicate source name '{name}': standalone file {path} " + f"conflicts with manifest entry" + ) + sources[name] = SourceDefinition(**data) + else: + # Overlay — validate and compose with matching manifest entry + errors = validate_overlay(data) + if errors: + raise ValueError( + f"Invalid overlay '{name}' in {path}: {'; '.join(errors)}" + ) + base = sources.get(name) + if base: + ( + sources[name], + description_sources[name], + ) = self._compose(base, data, description_sources.get(name)) + else: + logger.warning( + "Orphan overlay '%s' in %s: no matching manifest entry, skipping", + name, + path, + ) + + self._validate_cross_references(sources) + return sources + + def load_file(self, path: str | Path) -> SourceDefinition: + """Load and validate a single standalone YAML source definition.""" + path = Path(path) + with open(path) as f: + data = yaml.safe_load(f) + source = SourceDefinition(**data) + if not source.table and not source.sql: + raise ValueError( + f"Standalone source '{source.name}' in {path} must have 'table' or 'sql'" + ) + return source + + def _load_manifest_shard(self, path: Path) -> Manifest: + """Load a single manifest shard file.""" + with open(path) as f: + data = yaml.safe_load(f) + return Manifest(**data) + + def _compose( + self, + base: SourceDefinition, + overlay: dict, + base_description_sources: dict[str, str] | None = None, + ) -> tuple[SourceDefinition, dict[str, str] | None]: + """Compose a manifest-projected SourceDefinition with an overlay.""" + source = deepcopy(base) + description_sources = dict(base_description_sources or {}) + + # Overlay description semantics match the server: `description` writes the + # `user` source key, and `descriptions` merges keyed sources before a single + # visible description is resolved from the full map. + if overlay.get("description"): + description_sources["user"] = overlay["description"] + if overlay.get("descriptions"): + description_sources.update( + { + source_name: text + for source_name, text in overlay["descriptions"].items() + if text + } + ) + if overlay.get("description") or overlay.get("descriptions"): + source.description = _resolve_description( + description_sources or None, + ) + + # Filter columns + excluded = set(overlay.get("exclude_columns", [])) + source.columns = [c for c in source.columns if c.name not in excluded] + + # Append computed columns (overlay columns with expr) + for col in overlay.get("columns", []): + source.columns.append(SourceColumn(**col)) + + # Set measures + source.measures = [MeasureDefinition(**m) for m in overlay.get("measures", [])] + + # Set segments + source.segments = [Segment(**s) for s in overlay.get("segments", [])] + + # Override grain + if overlay.get("grain"): + source.grain = overlay["grain"] + + # Union + dedupe joins, apply suppressions + disabled = {_normalize_ws(j) for j in overlay.get("disable_joins", [])} + manifest_joins = [ + j for j in source.joins if _normalize_ws(j.on) not in disabled + ] + overlay_joins = [JoinDeclaration(**j) for j in overlay.get("joins", [])] + existing_keys = {f"{j.to}::{_normalize_ws(j.on)}" for j in manifest_joins} + new_joins = [ + j + for j in overlay_joins + if f"{j.to}::{_normalize_ws(j.on)}" not in existing_keys + ] + source.joins = manifest_joins + new_joins + + return source, (description_sources or None) + + def _validate_cross_references(self, sources: dict[str, SourceDefinition]) -> None: + """Validate that all join targets reference existing sources.""" + for source in sources.values(): + for join in source.joins: + if join.to not in sources: + raise ValueError( + f"Source '{source.name}' joins to '{join.to}', " + f"but '{join.to}' is not defined" + ) + + +def _is_in_schema_dir(path: Path, sources_dir: Path) -> bool: + """Check if a path is inside the _schema/ directory.""" + try: + path.relative_to(sources_dir / _SCHEMA_DIR) + return True + except ValueError: + return False diff --git a/python/klo-sl/semantic_layer/manifest.py b/python/klo-sl/semantic_layer/manifest.py new file mode 100644 index 00000000..3023cf58 --- /dev/null +++ b/python/klo-sl/semantic_layer/manifest.py @@ -0,0 +1,233 @@ +"""Manifest models and projection for the two-tier schema architecture. + +The manifest (`_schema/*.yaml`) stores physical table catalog data with DB-native +types, PK flags, and join provenance. This module handles: + - Manifest-specific data models (ManifestColumn, ManifestJoin, ManifestEntry) + - DB-native → semantic type mapping + - Projection from ManifestEntry → SourceDefinition +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel + +from semantic_layer.models import ( + ColumnRole, + DefaultTimeDimensionDbt, + FreshnessDbt, + JoinDeclaration, + SourceColumn, + SourceColumnTests, + SourceDefinition, +) + +# ── Type mapping (DB-native → semantic) ───────────────────────────── + +_TYPE_MAP: dict[str, str] = { + # number family + "integer": "number", + "bigint": "number", + "smallint": "number", + "numeric": "number", + "decimal": "number", + "float": "number", + "double": "number", + "real": "number", + "int": "number", + "int2": "number", + "int4": "number", + "int8": "number", + "float4": "number", + "float8": "number", + "double precision": "number", + "number": "number", + "tinyint": "number", + "mediumint": "number", + # time family + "timestamp": "time", + "timestamptz": "time", + "timestamp with time zone": "time", + "timestamp without time zone": "time", + "timestamp_ntz": "time", + "timestamp_ltz": "time", + "timestamp_tz": "time", + "datetime": "time", + "date": "time", + "time": "time", + "timetz": "time", + # boolean family + "boolean": "boolean", + "bool": "boolean", + # fallback → 'string' +} + + +def map_column_type(db_type: str) -> str: + """Map a DB-native column type to a semantic type (string/number/time/boolean).""" + normalized = db_type.lower().split("(")[0].strip() + return _TYPE_MAP.get(normalized, "string") + + +# ── Manifest data models ──────────────────────────────────────────── + + +_DEFAULT_PRIORITY = ["user", "ai", "dbt", "db"] + + +def _description_sources( + descriptions: dict[str, str] | None, + description: str | None = None, + db_description: str | None = None, +) -> dict[str, str] | None: + """Normalize multi-source descriptions to a keyed map.""" + if descriptions: + result = {source: text for source, text in descriptions.items() if text} + if result: + return result + + result: dict[str, str] = {} + if description: + result["ai"] = description + if db_description: + result["db"] = db_description + return result or None + + +def _resolve_description( + descriptions: dict[str, str] | None, + description: str | None = None, + db_description: str | None = None, +) -> str | None: + """Resolve a single description from a multi-source map or legacy flat fields.""" + if descriptions: + for source in _DEFAULT_PRIORITY: + if text := descriptions.get(source): + return text + # Fallback: first available + for text in descriptions.values(): + if text: + return text + # Legacy flat fields + if description: + return description + if db_description: + return db_description + return None + + +class ManifestColumn(BaseModel): + name: str + type: str # DB-native type (e.g., "integer", "varchar", "timestamp") + pk: bool = False + nullable: bool = True + descriptions: dict[str, str] | None = None + # Legacy flat fields (backwards-compatible YAML parsing) + description: str | None = None + db_description: str | None = None + constraints: dict | None = None + enum_values: dict[str, list[str]] | None = None + tests: SourceColumnTests | None = None + + @property + def resolved_description(self) -> str | None: + return _resolve_description( + self.descriptions, self.description, self.db_description + ) + + +class ManifestJoin(BaseModel): + to: str + on: str + relationship: Literal["many_to_one", "one_to_many", "one_to_one"] + source: Literal["formal", "inferred", "manual"] = "formal" + + +class ManifestEntry(BaseModel): + table: str + descriptions: dict[str, str] | None = None + # Legacy flat fields (backwards-compatible YAML parsing) + description: str | None = None + db_description: str | None = None + columns: list[ManifestColumn] + joins: list[ManifestJoin] = [] + default_time_dimension: DefaultTimeDimensionDbt | None = None + tags: dict[str, list[str]] | None = None + freshness: dict[str, FreshnessDbt] | None = None + + @property + def resolved_description(self) -> str | None: + return _resolve_description( + self.descriptions, self.description, self.db_description + ) + + +class Manifest(BaseModel): + """A single manifest shard file (`_schema/{schema}.yaml`).""" + + tables: dict[str, ManifestEntry] + + +# ── Projection ────────────────────────────────────────────────────── + + +def validate_overlay(data: dict) -> list[str]: + """Validate that overlay data doesn't contain structural fields. + + Returns a list of error messages (empty if valid). + """ + errors: list[str] = [] + if "table" in data: + errors.append("Overlay must not contain 'table' (owned by manifest)") + if "sql" in data: + errors.append( + "Overlay must not contain 'sql' (that makes it a standalone source)" + ) + for col in data.get("columns", []): + if "type" in col and "expr" not in col: + errors.append( + f"Overlay column '{col.get('name', '?')}' specifies 'type' without 'expr' " + f"(structural types are inherited from manifest — only computed columns may specify a type)" + ) + return errors + + +def project_manifest_entry(name: str, entry: ManifestEntry) -> SourceDefinition: + """Convert a raw manifest entry into a valid SourceDefinition. + + - Maps DB-native column types to semantic types + - Auto-derives grain from PK columns (or all columns if no PKs) + - Strips join provenance (source field) + """ + columns = [ + SourceColumn( + name=c.name, + type=map_column_type(c.type), + role=ColumnRole.TIME + if map_column_type(c.type) == "time" + else ColumnRole.DEFAULT, + description=c.resolved_description, + constraints=c.constraints, + enum_values=c.enum_values, + tests=c.tests, + ) + for c in entry.columns + ] + pk_columns = [c.name for c in entry.columns if c.pk] + grain = pk_columns if pk_columns else [c.name for c in entry.columns] + + return SourceDefinition( + name=name, + table=entry.table, + description=entry.resolved_description, + grain=grain, + columns=columns, + joins=[ + JoinDeclaration(to=j.to, on=j.on, relationship=j.relationship) + for j in entry.joins + ], + default_time_dimension=entry.default_time_dimension, + tags=entry.tags, + freshness=entry.freshness, + ) diff --git a/python/klo-sl/semantic_layer/models.py b/python/klo-sl/semantic_layer/models.py new file mode 100644 index 00000000..9a6a514f --- /dev/null +++ b/python/klo-sl/semantic_layer/models.py @@ -0,0 +1,235 @@ +from __future__ import annotations + +from enum import Enum +from typing import Any, Literal + +from pydantic import BaseModel, Field, model_validator + + +# ── Source Definition Models ────────────────────────────────────────── + + +class ColumnVisibility(str, Enum): + PUBLIC = "public" + INTERNAL = "internal" + HIDDEN = "hidden" + + +class ColumnRole(str, Enum): + TIME = "time" + DEFAULT = "default" + + +class ColumnDbtConstraints(BaseModel): + not_null: bool | None = None + unique: bool | None = None + + +class DbtDataTestRef(BaseModel): + name: str + package: str + kwargs: dict[str, Any] | None = None + + +class SourceColumnTests(BaseModel): + dbt: list[DbtDataTestRef] | None = None + dbt_by_package: dict[str, list[str]] | None = None + + +class FreshnessDbt(BaseModel): + raw: Any | None = None + loaded_at_field: str | None = None + + +class SourceColumn(BaseModel): + name: str + type: Literal["string", "number", "time", "boolean"] + visibility: ColumnVisibility = ColumnVisibility.PUBLIC + role: ColumnRole = ColumnRole.DEFAULT + description: str | None = None + expr: str | None = None + natural_granularity: str | None = None + constraints: dict[str, ColumnDbtConstraints] | None = None + enum_values: dict[str, list[str]] | None = None + tests: SourceColumnTests | None = None + + +class JoinDeclaration(BaseModel): + to: str + on: str # e.g. "customer_id = customers.id" + relationship: Literal["many_to_one", "one_to_many", "one_to_one"] + alias: str | None = None + + +class MeasureDefinition(BaseModel): + name: str + expr: str # e.g. "sum(amount)" + filter: str | None = None # e.g. "status != 'refunded'" + segments: list[str] = [] # bare segment names defined on the measure's own source + description: str | None = None + + +class Segment(BaseModel): + """A named, reusable boolean predicate scoped to a single source.""" + + name: str + expr: str # e.g. "is_paid = true and is_refunded = '0'" + description: str | None = None + + +class DefaultTimeDimensionDbt(BaseModel): + dbt: str | None = None + + +class SourceDefinition(BaseModel): + name: str + description: str | None = None + table: str | None = None + sql: str | None = None + grain: list[str] + columns: list[SourceColumn] + joins: list[JoinDeclaration] = [] + measures: list[MeasureDefinition] = [] + segments: list[Segment] = [] + default_time_dimension: DefaultTimeDimensionDbt | None = None + tags: dict[str, list[str]] | None = None + freshness: dict[str, FreshnessDbt] | None = None + + @model_validator(mode="after") + def validate_source(self) -> SourceDefinition: + if self.table and self.sql: + raise ValueError("'table' and 'sql' are mutually exclusive") + if not self.grain: + raise ValueError("grain must be non-empty") + return self + + @property + def is_sql_source(self) -> bool: + return self.sql is not None + + @property + def is_table_source(self) -> bool: + return self.table is not None + + +# ── Query Models ────────────────────────────────────────────────────── + + +class QueryMeasure(BaseModel): + """Either a pre-defined name ('orders.revenue') or runtime expr.""" + + ref: str | None = None + expr: str | None = None + name: str | None = None + + +class QueryDimension(BaseModel): + """Either a column ref or a time granularity.""" + + field: str + granularity: str | None = None + + +class SemanticQuery(BaseModel): + measures: list[str | dict[str, Any]] + dimensions: list[str | dict[str, Any]] = [] + filters: list[str] = [] + # dotted "source.segment" names; AND-ed into matching measures + segments: list[str] = [] + order_by: list[str | dict[str, Any]] = [] + limit: int = 1000 + include_empty: bool = True + + @model_validator(mode="after") + def _validate_limit(self) -> SemanticQuery: + if self.limit is not None and self.limit < 0: + raise ValueError(f"limit must be non-negative, got {self.limit}") + return self + + +# ── Plan & Result Models ────────────────────────────────────────────── + + +class Provenance(str, Enum): + VERIFIED = "verified" + COMPOSED = "composed" + DIMENSION = "dimension" + + +class ResolvedColumn(BaseModel): + name: str + provenance: Provenance + expr: str | None = None + description: str | None = None + granularity: str | None = None + + +class ResolvedMeasure(BaseModel): + name: str + expr: str # the aggregate expression, e.g. "sum(amount)" + source_name: str + original_name: str | None = None + qualified_ref: str | None = None + filter: str | None = None + provenance: Provenance = Provenance.COMPOSED + is_derived: bool = False + depends_on: list[str] = [] # names of other measures this depends on + description: str | None = None + + +class MeasureGroup(BaseModel): + """A group of measures from the same source, for aggregate locality.""" + + source_name: str + measures: list[ResolvedMeasure] + join_path_to_dims: list[str] = [] + + +class ResolvedJoin(BaseModel): + from_source: str + to_source: str + from_column: str + to_column: str + relationship: str + + +class OrderByClause(BaseModel): + field: str + direction: str = "asc" + + +class ResolvedPlan(BaseModel): + sources_used: list[str] + join_paths: list[str] # human-readable descriptions + joins: list[ResolvedJoin] = [] # structured join info for generator + anchor_source: str | None = None # the primary FROM source + anchor_grain: list[str] + fan_out_description: str + has_fan_out: bool = False + measure_groups: list[MeasureGroup] = [] + aggregate_locality: list[str] # human-readable CTE descriptions + where_filters: list[str] + having_filters: list[str] + columns: list[ResolvedColumn] + measures: list[ResolvedMeasure] = [] + dimensions: list[QueryDimension] = [] + order_by: list[OrderByClause] = [] + limit: int | None = None + include_empty: bool = True + + +class QueryResult(BaseModel): + resolved_plan: ResolvedPlan + sql: str + dialect: str + columns: list[ResolvedColumn] + + +class ValidationReport(BaseModel): + errors: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + per_source_warnings: dict[str, list[str]] = Field(default_factory=dict) + + @property + def valid(self) -> bool: + return len(self.errors) == 0 diff --git a/python/klo-sl/semantic_layer/parser.py b/python/klo-sl/semantic_layer/parser.py new file mode 100644 index 00000000..39da6813 --- /dev/null +++ b/python/klo-sl/semantic_layer/parser.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +import functools +import re +from dataclasses import dataclass, field + +import sqlglot +from sqlglot import exp + +# DIALECT CONVENTION: +# `ExpressionParser` wraps read-only AST walks over user-authored +# expressions. Callers must construct it with the connection's native +# dialect (per sl_capture). The parse cache is keyed on (sql, dialect) +# so engines with different dialects do not share AST collisions. + +AGGREGATE_FUNCTIONS = frozenset( + { + "sum", + "avg", + "count", + "count_distinct", + "min", + "max", + "median", + "percentile", + } +) + +# Maps sqlglot AggFunc subclasses to our canonical names +_AGG_NODE_MAP: dict[type, str] = { + exp.Sum: "sum", + exp.Avg: "avg", + exp.Count: "count", + exp.Min: "min", + exp.Max: "max", +} + +# Custom aggregates that sqlglot parses as Anonymous (not standard SQL) +_CUSTOM_AGG_NAMES = frozenset({"count_distinct", "percentile", "median"}) + +# SQL reserved words that cause parse failures when used as identifiers +_SQL_RESERVED = frozenset( + { + "select", + "from", + "where", + "group", + "order", + "by", + "having", + "limit", + "join", + "on", + "as", + "and", + "or", + "not", + "in", + "is", + "null", + "true", + "false", + "between", + "like", + "case", + "when", + "then", + "else", + "end", + "insert", + "update", + "delete", + "create", + "drop", + "alter", + "table", + "index", + "view", + "union", + "all", + "distinct", + "into", + "values", + "set", + "with", + "exists", + "any", + "some", + "offset", + "fetch", + "for", + "grant", + "revoke", + "primary", + "key", + "foreign", + "references", + "check", + "constraint", + "default", + "column", + "cross", + "full", + "inner", + "left", + "right", + "outer", + "natural", + "using", + "except", + "intersect", + # Snowflake / cross-dialect reserved words + "glob", + "ilike", + "lateral", + "match_recognize", + "notnull", + "out", + "qualify", + "regexp", + "returning", + "rlike", + "rollback", + "sample", + "tablesample", + "top", + "uncache", + "xor", + } +) + +# Regex pattern for source.column references (word.word) +_DOTTED_IDENT_RE = re.compile(r"\b(\w+)\.(\w+)\b") + +# Matches single-quoted SQL string literals (including escaped quotes '') +_STRING_LITERAL_RE = re.compile(r"'(?:[^']|'')*'") + + +@dataclass +class ParsedExpression: + original: str + source_refs: set[str] = field(default_factory=set) + column_refs: set[str] = field(default_factory=set) # "source.column" format + is_aggregate: bool = False + aggregate_function: str | None = None + has_window_function: bool = False + depends_on_measures: set[str] = field(default_factory=set) + + +def _strip_quotes(name: str) -> str: + """Strip surrounding double quotes from an identifier.""" + if name.startswith('"') and name.endswith('"'): + return name[1:-1] + return name + + +def quote_reserved_identifiers(expr: str) -> str: + """Quote source.column references where either part is a SQL reserved word. + + String literals are masked before processing to prevent matching + dotted identifiers inside quoted strings like 'group.value'. + """ + # Mask string literals to avoid matching inside them + literals: list[str] = [] + + def _mask_literal(m: re.Match) -> str: + literals.append(m.group(0)) + return f"__SL_LIT_{len(literals) - 1}__" + + masked = _STRING_LITERAL_RE.sub(_mask_literal, expr) + + def _quote_match(m: re.Match) -> str: + source, col = m.group(1), m.group(2) + start = m.start() + if start > 0 and masked[start - 1] == '"': + return m.group(0) + needs_quote = False + source_q = source + col_q = col + if source.lower() in _SQL_RESERVED: + source_q = f'"{source}"' + needs_quote = True + if col.lower() in _SQL_RESERVED: + col_q = f'"{col}"' + needs_quote = True + if needs_quote: + return f"{source_q}.{col_q}" + return m.group(0) + + result = _DOTTED_IDENT_RE.sub(_quote_match, masked) + + # Restore string literals + for i, lit in enumerate(literals): + result = result.replace(f"__SL_LIT_{i}__", lit) + + return result + + +@functools.lru_cache(maxsize=256) +def _cached_parse_select(sql: str, dialect: str) -> exp.Expression: + """Cache parsed SELECT wrapper trees keyed by (sql, dialect). + + Each (sql, dialect) pair gets its own entry, so engines using different + dialects don't share AST cache collisions. + """ + return sqlglot.parse_one(sql, read=dialect) + + +class ExpressionParser: + """Parses user-authored SQL expressions for AST walks. + + Must be constructed with the connection's native dialect. User-authored + `expr:`, `filter:`, and segment predicates from YAML are written in that + dialect (per the sl_capture skill contract) and parsing them as postgres + silently drops dialect-specific tokens (e.g. BigQuery `INTERVAL 30 DAY`). + """ + + def __init__(self, dialect: str = "postgres") -> None: + self.dialect = dialect + + def _quote_reserved_identifiers(self, expr: str) -> str: + return quote_reserved_identifiers(expr) + + def _parse_as_select(self, quoted_expr: str) -> exp.Expression: + """Parse expression wrapped in SELECT, using cache for repeated expressions.""" + return _cached_parse_select(f"SELECT {quoted_expr}", self.dialect) + + def parse( + self, + expr: str, + known_measure_names: set[str] | None = None, + ) -> ParsedExpression: + known_measure_names = known_measure_names or set() + result = ParsedExpression(original=expr) + + if not expr or not expr.strip(): + return result + + quoted_expr = self._quote_reserved_identifiers(expr) + tree = self._parse_as_select(quoted_expr) + + # Extract source.column references + for col in tree.find_all(exp.Column): + if col.table: + source_name = _strip_quotes(col.table) + col_name = _strip_quotes(col.name) + result.source_refs.add(source_name) + result.column_refs.add(f"{source_name}.{col_name}") + + # Detect aggregate functions (built-in AggFunc subclasses). + # Aggregates nested inside scalar/correlated subqueries do NOT make the + # outer expression aggregate — e.g. `col = (SELECT MAX(col) FROM t)` is a + # plain column predicate, not a HAVING candidate. + def _inside_subquery(node: exp.Expression) -> bool: + parent = node.parent + while parent is not None: + if isinstance(parent, exp.Subquery): + return True + parent = parent.parent + return False + + agg_names: list[str] = [] + for node in tree.find_all(exp.AggFunc): + if _inside_subquery(node): + continue + name = _AGG_NODE_MAP.get(type(node)) + if name: + agg_names.append(name) + else: + agg_names.append(node.key.lower()) + + # Detect custom aggregates parsed as Anonymous (count_distinct, percentile, median) + for node in tree.find_all(exp.Anonymous): + if _inside_subquery(node): + continue + if node.name.lower() in _CUSTOM_AGG_NAMES: + agg_names.append(node.name.lower()) + + if agg_names: + result.is_aggregate = True + result.aggregate_function = agg_names[0] + + # Detect window functions (OVER clause) + if tree.find(exp.Window): + result.has_window_function = True + + # Detect dependencies on named measures (bare identifiers without table qualifier) + if known_measure_names: + for col in tree.find_all(exp.Column): + if not col.table and col.name in known_measure_names: + result.depends_on_measures.add(col.name) + + return result + + def extract_source_refs(self, expr: str) -> set[str]: + """Quick extraction of source names from an expression.""" + if not expr or not expr.strip(): + return set() + quoted_expr = self._quote_reserved_identifiers(expr) + tree = self._parse_as_select(quoted_expr) + return { + _strip_quotes(col.table) for col in tree.find_all(exp.Column) if col.table + } diff --git a/python/klo-sl/semantic_layer/planner.py b/python/klo-sl/semantic_layer/planner.py new file mode 100644 index 00000000..bfd1d74f --- /dev/null +++ b/python/klo-sl/semantic_layer/planner.py @@ -0,0 +1,1445 @@ +from __future__ import annotations + +import logging +import re +from collections import Counter + +import sqlglot +from sqlglot import exp + +from semantic_layer.graph import JoinGraph +from semantic_layer.models import ( + ColumnVisibility, + MeasureDefinition, + MeasureGroup, + OrderByClause, + Provenance, + QueryDimension, + ResolvedColumn, + ResolvedJoin, + ResolvedMeasure, + ResolvedPlan, + SemanticQuery, + SourceDefinition, +) +from semantic_layer.parser import ExpressionParser, quote_reserved_identifiers + +# DIALECT CONVENTION: +# User-authored measure `expr`, `filter`, and computed-column fragments must +# be parsed with `read=self.dialect`. Authors write in the connection's +# native dialect (per sl_capture); parsing as postgres silently drops +# dialect-specific tokens. When re-emitting ASTs as strings for later +# composition, use `sql(dialect=self.dialect)` so dialect-specific +# functions (e.g. BigQuery `TIMESTAMP_SUB`, Snowflake `DATEADD`) survive. + +logger = logging.getLogger(__name__) + + +class QueryPlanner: + def __init__( + self, + sources: dict[str, SourceDefinition], + graph: JoinGraph, + *, + dialect: str = "postgres", + ): + self.sources = sources + self.graph = graph + self.dialect = dialect + self.parser = ExpressionParser(dialect=dialect) + + def plan(self, query: SemanticQuery) -> ResolvedPlan: + # 0. Validate column visibility + self._validate_visibility(query) + + # 1. Resolve dimensions + dimensions = self._resolve_dimensions(query.dimensions) + + # 2. Resolve measures (parse, look up pre-defined, classify) + raw_measures = self._resolve_measures(query.measures) + + # 3. Topological sort for derived measures + measures = self._topological_sort_measures(raw_measures) + + # 3a. Apply query-time segments (AND each into matching measures' filter) + measures = self._apply_query_segments(measures, query.segments) + + # 3b. Validate column references exist + self._validate_column_refs(measures, dimensions, query.filters) + + # 4. Collect all referenced sources + source_refs: set[str] = set() + for m in measures: + if not m.is_derived: + source_refs.add(m.source_name) + source_refs.update(self.parser.extract_source_refs(m.expr)) + if m.filter: + source_refs.update(self.parser.extract_source_refs(m.filter)) + for d in dimensions: + refs = self.parser.extract_source_refs(d.field) + source_refs.update(refs) + for f in query.filters: + source_refs.update(self.parser.extract_source_refs(f)) + + if not source_refs: + raise ValueError("Query does not reference any sources") + + # 5. Determine anchor source (must happen BEFORE resolve_join_tree) + anchor_source = self._pick_anchor( + measures, + dimensions, + source_refs, + include_empty=query.include_empty, + ) + + # 6. Resolve join tree, rooted at the anchor + tree = self.graph.resolve_join_tree(source_refs, root=anchor_source) + + # 7. Build structured joins from tree edges + joins = [ + ResolvedJoin( + from_source=e.from_source, + to_source=e.to_source, + from_column=e.from_column, + to_column=e.to_column, + relationship=e.relationship, + ) + for e in tree.edges + ] + + # 8. Detect fan-out / chasm trap + has_fan_out, measure_groups, fan_out_desc, locality_descs = ( + self._detect_fan_out(measures, dimensions, tree, filters=query.filters) + ) + + # 9. Classify filters + where_filters, having_filters = self._classify_filters(query.filters, measures) + + # 10. Compute anchor grain + dim_sources = set() + for d in dimensions: + refs = self.parser.extract_source_refs(d.field) + dim_sources.update(refs) + anchor_grain = [] + for d in dimensions: + anchor_grain.append(d.field) + + # 11. Build resolved columns + columns = self._build_columns(measures, dimensions) + + # 12. Build join path descriptions + join_paths = [] + for j in joins: + from_cols = [c.strip() for c in j.from_column.split(",")] + to_cols = [c.strip() for c in j.to_column.split(",")] + conditions = " AND ".join( + f"{j.from_source}.{fc} = {j.to_source}.{tc}" + for fc, tc in zip(from_cols, to_cols) + ) + join_paths.append(f"{conditions} ({j.relationship})") + + # 13. Resolve order_by + order_by_clauses = [] + for ob in query.order_by: + if isinstance(ob, dict): + order_by_clauses.append(OrderByClause(**ob)) + elif isinstance(ob, str): + order_by_clauses.append(OrderByClause(field=ob)) + else: + order_by_clauses.append(ob) + + return ResolvedPlan( + sources_used=sorted(tree.sources), + join_paths=join_paths, + joins=joins, + anchor_source=anchor_source, + anchor_grain=anchor_grain, + fan_out_description=fan_out_desc, + has_fan_out=has_fan_out, + measure_groups=measure_groups, + aggregate_locality=locality_descs, + where_filters=where_filters, + having_filters=having_filters, + columns=columns, + measures=measures, + dimensions=dimensions, + order_by=order_by_clauses, + limit=query.limit, + include_empty=query.include_empty, + ) + + def _resolve_dimensions(self, dims: list[str | dict]) -> list[QueryDimension]: + result = [] + seen: set[tuple[str, str | None]] = set() + for d in dims: + if isinstance(d, str): + dim = QueryDimension(field=self._qualify_bare_column(d)) + elif isinstance(d, dict): + field = d.get("field", "") + dim = QueryDimension(**{**d, "field": self._qualify_bare_column(field)}) + else: + continue + key = (dim.field, dim.granularity) + if key not in seen: + seen.add(key) + result.append(dim) + return result + + def _qualify_bare_column(self, field: str) -> str: + """Qualify a bare column name to source.column if unambiguous.""" + if "." in field or not field.strip().isidentifier(): + return field + bare = field.strip() + matches: list[str] = [] + for source_name, source in self.sources.items(): + if any(c.name == bare for c in source.columns): + matches.append(source_name) + if len(matches) == 1: + return f"{matches[0]}.{bare}" + if len(matches) > 1: + raise ValueError( + f"Column '{bare}' is ambiguous: it exists in multiple sources " + f"({', '.join(sorted(matches))}). Use a qualified name like " + f"'{matches[0]}.{bare}' to disambiguate." + ) + return field # not found — leave as-is, downstream will error + + def _resolve_measures(self, raw: list[str | dict]) -> list[ResolvedMeasure]: + measures: list[ResolvedMeasure] = [] + # Collect all named measures for dependency detection + named: set[str] = set() + for m in raw: + if isinstance(m, dict) and m.get("name"): + named.add(m["name"]) + colliding_predefined_names = self._collect_colliding_predefined_names(raw) + + for m in raw: + if isinstance(m, str): + measures.append( + self._resolve_measure_str(m, colliding_predefined_names) + ) + elif isinstance(m, dict): + measures.append( + self._resolve_measure_dict( + m, + named, + colliding_predefined_names, + ) + ) + + # Expand pre-defined measure chains (e.g., profit = revenue - total_cost) + measures = self._expand_predefined_chains(measures) + # Auto-add predefined measures referenced by derived measures + measures = self._auto_add_predefined_deps(measures) + # Qualify duplicate measure names across sources + measures = self._qualify_duplicate_names(measures) + return measures + + def _collect_colliding_predefined_names(self, raw: list[str | dict]) -> set[str]: + counts: Counter[str] = Counter() + for item in raw: + if isinstance(item, str): + ref = self._match_predefined_ref(item) + if ref: + _, measure_name = ref + counts[measure_name] += 1 + else: + bare = item.strip() + if bare.isidentifier(): + try: + unq = self._resolve_unqualified_measure(bare) + if unq: + counts[unq[1]] += 1 + except ValueError: + pass # ambiguous — caught later during resolution + elif isinstance(item, dict): + expr = item.get("expr", "") + for _, measure_name in self._extract_predefined_refs(expr): + counts[measure_name] += 1 + return {name for name, count in counts.items() if count > 1} + + def _extract_predefined_refs(self, expr: str) -> list[tuple[str, str]]: + refs: list[tuple[str, str]] = [] + parsed = self.parser.parse(expr) + for ref in parsed.column_refs: + parts = ref.split(".", 1) + if len(parts) != 2: + continue + src_name, measure_name = parts + actual_src_name = self.graph.alias_map.get(src_name, src_name) + src = self.sources.get(actual_src_name) + if not src: + continue + if any(md.name == measure_name for md in src.measures) and not any( + c.name == measure_name for c in src.columns + ): + refs.append((src_name, measure_name)) + return refs + + def _match_predefined_ref(self, expr: str) -> tuple[str, str] | None: + parsed = self.parser.parse(expr) + if parsed.is_aggregate or len(parsed.column_refs) != 1: + return None + ref = next(iter(parsed.column_refs)) + parts = ref.split(".", 1) + if len(parts) != 2: + return None + source_name, measure_name = parts + actual_source_name = self.graph.alias_map.get(source_name, source_name) + source = self.sources.get(actual_source_name) + if not source: + return None + if any(md.name == measure_name for md in source.measures): + return source_name, measure_name + return None + + def _resolve_unqualified_measure(self, bare_name: str) -> tuple[str, str] | None: + """Find a unique predefined measure matching a bare (unqualified) name. + + Returns (source_name, measure_name) if exactly one source defines it. + Raises ValueError if ambiguous (multiple sources). + """ + matches: list[str] = [] + for source_name, source in self.sources.items(): + if any(md.name == bare_name for md in source.measures): + matches.append(source_name) + if len(matches) == 0: + return None + if len(matches) == 1: + return matches[0], bare_name + raise ValueError( + f"Measure '{bare_name}' is ambiguous: it exists in multiple sources " + f"({', '.join(sorted(matches))}). Use a qualified name like " + f"'{matches[0]}.{bare_name}' to disambiguate." + ) + + @staticmethod + def _qualified_measure_name(source_name: str, measure_name: str) -> str: + return f"{source_name}_{measure_name}" + + @staticmethod + def _auto_measure_name(expr: str) -> str: + normalized = expr.replace(".", "_").strip().lower() + normalized = re.sub(r"[^a-z0-9_]+", "_", normalized) + normalized = re.sub(r"_+", "_", normalized).strip("_") + if not normalized: + return "measure" + if normalized[0].isdigit(): + return f"m_{normalized}" + return normalized + + def _measure_definition_for_resolved( + self, + source: SourceDefinition, + source_name: str, + resolved_name: str, + original_name: str | None = None, + ): + for candidate in (original_name, resolved_name): + if not candidate: + continue + mdef = next((md for md in source.measures if md.name == candidate), None) + if mdef: + return mdef + for mdef in source.measures: + if resolved_name == self._qualified_measure_name(source_name, mdef.name): + return mdef + return None + + def _split_qualified_dep_token(self, token: str) -> tuple[str, str] | None: + for source_name, source in self.sources.items(): + prefix = f"{source_name}_" + if not token.startswith(prefix): + continue + measure_name = token[len(prefix) :] + if any(md.name == measure_name for md in source.measures): + return source_name, measure_name + return None + + def _auto_add_predefined_deps( + self, measures: list[ResolvedMeasure] + ) -> list[ResolvedMeasure]: + """Auto-add predefined measures that derived measures depend on but aren't in the list.""" + existing_names = {m.name for m in measures} + extra: list[ResolvedMeasure] = [] + for m in measures: + if not m.is_derived: + continue + for dep in m.depends_on: + if dep in existing_names: + continue + exact = self._split_qualified_dep_token(dep) + if exact: + src_name, measure_name = exact + resolved = self._resolve_measure_str( + f"{src_name}.{measure_name}", + set(), + ) + if resolved.name != dep: + resolved = resolved.model_copy(update={"name": dep}) + extra.append(resolved) + existing_names.add(dep) + continue + # Try to resolve as a predefined measure from any source + for src in self.sources.values(): + mdef = next((md for md in src.measures if md.name == dep), None) + if mdef: + extra.append( + self._resolve_measure_str(f"{src.name}.{dep}", set()) + ) + existing_names.add(dep) + break + if extra: + # Prepend extras so dependencies come before derived measures + measures = extra + measures + return measures + + def _qualify_duplicate_names( + self, measures: list[ResolvedMeasure] + ) -> list[ResolvedMeasure]: + """Qualify measure names that collide across different sources.""" + name_counts = Counter(m.name for m in measures) + colliding = {name for name, count in name_counts.items() if count > 1} + if not colliding: + return measures + result = [] + for m in measures: + if m.name in colliding and m.source_name != "__derived__": + result.append( + m.model_copy(update={"name": f"{m.source_name}_{m.name}"}) + ) + else: + result.append(m) + return result + + def _expand_predefined_chains( + self, measures: list[ResolvedMeasure] + ) -> list[ResolvedMeasure]: + """Expand pre-defined measures that reference other pre-defined measures. + + Fully recursive: handles chains of arbitrary depth (e.g., + margin = net_profit / revenue, where net_profit = gross_profit - tax, + where gross_profit = revenue - cost). + """ + existing_names = {m.name for m in measures} + extra_measures: list[ResolvedMeasure] = [] + updated: list[ResolvedMeasure] = [] + # Track already-expanded deps to avoid duplicates + expanded: set[str] = set() + + def _ensure_dep( + dep_name: str, source: SourceDefinition, source_name: str + ) -> None: + """Recursively ensure a dependency measure is added.""" + if dep_name in existing_names or dep_name in expanded: + return + dep_mdef = next((md for md in source.measures if md.name == dep_name), None) + if not dep_mdef: + return + + dep_other = {md.name for md in source.measures if md.name != dep_name} + dep_parsed = self.parser.parse(dep_mdef.expr, known_measure_names=dep_other) + + if dep_parsed.depends_on_measures: + # Recursively add sub-dependencies first + for sub_dep in sorted(dep_parsed.depends_on_measures): + _ensure_dep(sub_dep, source, source_name) + # This dependency is itself derived + extra_measures.append( + ResolvedMeasure( + name=dep_name, + original_name=dep_name, + expr=dep_mdef.expr, + source_name="__derived__", + provenance=Provenance.VERIFIED, + is_derived=True, + depends_on=sorted(dep_parsed.depends_on_measures), + description=dep_mdef.description, + ) + ) + else: + # Leaf dependency: qualify and add as concrete measure + extra_measures.append( + ResolvedMeasure( + name=dep_name, + original_name=dep_name, + qualified_ref=f"{source_name}.{dep_name}", + expr=self._qualify_predefined_expr(dep_mdef.expr, source_name), + source_name=source_name, + filter=self._compose_measure_filter(dep_mdef, source_name), + provenance=Provenance.VERIFIED, + description=dep_mdef.description, + ) + ) + existing_names.add(dep_name) + expanded.add(dep_name) + + for m in measures: + if m.provenance != Provenance.VERIFIED or m.is_derived: + updated.append(m) + continue + + actual_source_name = self.graph.alias_map.get(m.source_name, m.source_name) + source = self.sources.get(actual_source_name) + if not source: + updated.append(m) + continue + + mdef = self._measure_definition_for_resolved( + source, m.source_name, m.name, m.original_name + ) + if not mdef: + updated.append(m) + continue + + other_measure_names = { + md.name for md in source.measures if md.name != mdef.name + } + parsed = self.parser.parse( + mdef.expr, known_measure_names=other_measure_names + ) + + if not parsed.depends_on_measures: + updated.append(m) + continue + + # Recursively add all dependencies + for dep_name in sorted(parsed.depends_on_measures): + _ensure_dep(dep_name, source, m.source_name) + + # Convert this measure to derived + updated.append( + m.model_copy( + update={ + "expr": mdef.expr, + "source_name": "__derived__", + "is_derived": True, + "depends_on": sorted(parsed.depends_on_measures), + "filter": None, + } + ) + ) + + return extra_measures + updated + + def _resolve_measure_str( + self, + s: str, + colliding_predefined_names: set[str], + ) -> ResolvedMeasure: + """ + "orders.revenue" → pre-defined lookup + "sum(orders.amount)" → runtime expression + """ + parsed = self.parser.parse(s) + + # Reject window functions in measures + if parsed.has_window_function: + raise ValueError( + f"Window functions (OVER clause) are not supported in measures: '{s}'. " + f"Window functions require row-level context and cannot be combined with " + f"GROUP BY aggregation." + ) + + predefined_ref = self._match_predefined_ref(s) + + # Try unqualified resolution for bare identifiers (e.g. "revenue" → "orders.revenue") + if predefined_ref is None and not parsed.is_aggregate: + bare = s.strip() + if bare.isidentifier(): + unqualified = self._resolve_unqualified_measure(bare) + if unqualified: + source_name, measure_name = unqualified + qualified = f"{source_name}.{measure_name}" + logger.info( + "Resolved unqualified measure '%s' to '%s'", + bare, + qualified, + ) + return self._resolve_measure_str( + qualified, colliding_predefined_names + ) + + if predefined_ref: + source_name, measure_name = predefined_ref + actual_source_name = self.graph.alias_map.get(source_name, source_name) + source = self.sources[actual_source_name] + for mdef in source.measures: + if mdef.name == measure_name: + resolved_name = measure_name + if measure_name in colliding_predefined_names: + resolved_name = self._qualified_measure_name( + source_name, measure_name + ) + return ResolvedMeasure( + name=resolved_name, + original_name=measure_name, + qualified_ref=f"{source_name}.{measure_name}", + expr=self._qualify_predefined_expr(mdef.expr, source_name), + source_name=source_name, + filter=self._compose_measure_filter(mdef, source_name), + provenance=Provenance.VERIFIED, + description=mdef.description, + ) + + # Bare column reference without aggregation — invalid as a measure + if not parsed.is_aggregate: + if parsed.column_refs: + ref = next(iter(parsed.column_refs)) + src, col = ref.split(".", 1) + raise ValueError( + f"Measure '{s}' is not a pre-defined measure on source '{src}' " + f"and has no aggregate function. Use an aggregate like " + f"sum({s}), count({s}), avg({s}), etc." + ) + raise ValueError(f"Measure '{s}' does not reference any source") + + # Runtime expression + if not parsed.source_refs: + raise ValueError(f"Measure '{s}' does not reference any source") + + # Reject nested aggregation (e.g., avg(sum(orders.amount))) + self._check_nested_aggregation(s) + + source_name = sorted(parsed.source_refs)[0] + name = self._auto_measure_name(s) + return ResolvedMeasure( + name=name, + original_name=name, + expr=s, + source_name=source_name, + provenance=Provenance.COMPOSED, + ) + + def _resolve_measure_dict( + self, + d: dict, + named: set[str], + colliding_predefined_names: set[str], + ) -> ResolvedMeasure: + expr = d.get("expr", "") + name = d.get("name", expr) + parsed = self.parser.parse(expr, known_measure_names=named) + + # Reject window functions + if parsed.has_window_function: + raise ValueError( + f"Window functions (OVER clause) are not supported in measures: '{expr}'. " + f"Window functions require row-level context and cannot be combined with " + f"GROUP BY aggregation." + ) + + # Check if any column_refs match predefined measures (e.g., "orders.revenue") + predefined_deps: list[tuple[str, str, str]] = [] + for src_name, measure_name in self._extract_predefined_refs(expr): + predefined_deps.append( + (f"{src_name}.{measure_name}", src_name, measure_name) + ) + + # Merge bare measure deps + qualified predefined deps + all_dep_names: set[str] = set(parsed.depends_on_measures) + rewritten_expr = expr + + if predefined_deps: + replacement_map: dict[str, str] = {} + for ref, src_name, measure_name in predefined_deps: + dep_name = measure_name + if measure_name in colliding_predefined_names: + dep_name = self._qualified_measure_name(src_name, measure_name) + replacement_map[ref] = dep_name + all_dep_names.add(dep_name) + named.add(dep_name) + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", dialect=self.dialect + ) + + def _replace(node): + if isinstance(node, exp.Column) and node.table: + ref = f"{node.table}.{node.name}" + if ref in replacement_map: + return exp.Column(this=exp.to_identifier(replacement_map[ref])) + return node + + rewritten_expr = ( + tree.transform(_replace).expressions[0].sql(dialect=self.dialect) + ) + + if all_dep_names: + return ResolvedMeasure( + name=name, + original_name=name, + expr=rewritten_expr, + source_name="__derived__", + provenance=Provenance.COMPOSED, + is_derived=True, + depends_on=sorted(all_dep_names), + ) + + if not parsed.source_refs: + raise ValueError(f"Measure expr '{expr}' does not reference any source") + + # Reject nested aggregation (e.g., avg(sum(orders.amount))) + self._check_nested_aggregation(expr) + + source_name = sorted(parsed.source_refs)[0] + return ResolvedMeasure( + name=name, + original_name=name, + expr=expr, + source_name=source_name, + provenance=Provenance.COMPOSED, + ) + + def _check_nested_aggregation(self, expr: str) -> None: + """Reject expressions with nested aggregate functions (e.g., avg(sum(x))).""" + try: + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", dialect=self.dialect + ) + for agg_node in tree.find_all(exp.AggFunc): + # Check if this aggregate contains another aggregate inside + for inner in agg_node.find_all(exp.AggFunc): + if inner is not agg_node: + raise ValueError( + f"Nested aggregation is not supported: '{expr}'. " + f"Use a derived measure to combine aggregates " + f"(e.g., define sum_amount first, then avg it as a derived measure)." + ) + except ValueError: + raise + except Exception: + logger.debug("Failed to check nested aggregation for: %s", expr) + + def _topological_sort_measures( + self, measures: list[ResolvedMeasure] + ) -> list[ResolvedMeasure]: + by_name = {m.name: m for m in measures} + visited: set[str] = set() + in_stack: set[str] = set() + result: list[ResolvedMeasure] = [] + + def visit(m: ResolvedMeasure) -> None: + if m.name in in_stack: + raise ValueError(f"Circular dependency detected: {m.name}") + if m.name in visited: + return + in_stack.add(m.name) + for dep_name in m.depends_on: + if dep_name in by_name: + visit(by_name[dep_name]) + in_stack.discard(m.name) + visited.add(m.name) + result.append(m) + + for m in measures: + visit(m) + return result + + def _pick_anchor( + self, + measures: list[ResolvedMeasure], + dimensions: list[QueryDimension], + source_refs: set[str], + include_empty: bool, + ) -> str: + if include_empty: + for d in dimensions: + refs = self.parser.extract_source_refs(d.field) + if refs: + return sorted(refs)[0] + # Prefer the first non-derived measure's source + for m in measures: + if not m.is_derived and m.source_name in self.sources: + return m.source_name + # Fallback to first dimension's source + for d in dimensions: + refs = self.parser.extract_source_refs(d.field) + if refs: + return sorted(refs)[0] + return sorted(source_refs)[0] + + def _compose_measure_filter( + self, mdef: MeasureDefinition, source_name: str + ) -> str | None: + """Compose mdef.filter with mdef.segments[*].expr into a single AND-ed, + qualified predicate. Returns None if neither contributes. + + Segments are bare names resolved against the measure's own source. + Unknown names raise at plan time. + """ + parts: list[str] = [] + if mdef.filter: + parts.append(self._qualify_predefined_expr(mdef.filter, source_name)) + if mdef.segments: + actual_source_name = self.graph.alias_map.get(source_name, source_name) + source = self.sources.get(actual_source_name) + seg_by_name = {s.name: s for s in (source.segments if source else [])} + for seg_name in mdef.segments: + seg = seg_by_name.get(seg_name) + if not seg: + available = ", ".join(sorted(seg_by_name)) or "(none)" + raise ValueError( + f"Measure '{mdef.name}' on source '{actual_source_name}' " + f"references unknown segment '{seg_name}'. " + f"Available segments: {available}." + ) + parts.append(self._qualify_predefined_expr(seg.expr, source_name)) + if not parts: + return None + if len(parts) == 1: + return parts[0] + return " AND ".join(f"({p})" for p in parts) + + def _apply_query_segments( + self, + measures: list[ResolvedMeasure], + query_segments: list[str], + ) -> list[ResolvedMeasure]: + """AND each query-time segment into the filter of every measure whose + base source matches the segment's source. + + Errors: + - Segment string isn't dotted source.name + - Source or segment doesn't exist + - No measure in the query has the segment's source as its base source + """ + if not query_segments: + return measures + + segs_by_source: dict[str, list[str]] = {} + for raw in query_segments: + if "." not in raw: + raise ValueError( + f"Query-time segment '{raw}' must be a dotted " + f"'source.segment_name' reference." + ) + src_name, seg_name = raw.split(".", 1) + actual = self.graph.alias_map.get(src_name, src_name) + source = self.sources.get(actual) + if not source: + raise ValueError( + f"Query-time segment '{raw}' references unknown source " + f"'{src_name}'." + ) + seg = next((s for s in source.segments if s.name == seg_name), None) + if not seg: + avail = ", ".join(sorted(s.name for s in source.segments)) or "(none)" + raise ValueError( + f"Query-time segment '{raw}' references unknown segment " + f"'{seg_name}' on source '{src_name}'. Available: {avail}." + ) + qualified = self._qualify_predefined_expr(seg.expr, src_name) + segs_by_source.setdefault(src_name, []).append(qualified) + + updated: list[ResolvedMeasure] = [] + matched_sources: set[str] = set() + for m in measures: + if m.is_derived or m.source_name not in segs_by_source: + updated.append(m) + continue + matched_sources.add(m.source_name) + new_parts: list[str] = [] + if m.filter: + new_parts.append(m.filter) + new_parts.extend(segs_by_source[m.source_name]) + composed = ( + new_parts[0] + if len(new_parts) == 1 + else " AND ".join(f"({p})" for p in new_parts) + ) + updated.append(m.model_copy(update={"filter": composed})) + + for src in segs_by_source: + if src not in matched_sources: + raise ValueError( + f"Query-time segment(s) on source '{src}' have no matching " + f"measure in the query. A query-time segment only applies to " + f"measures whose base source matches the segment's source." + ) + + return updated + + def _qualify_predefined_expr(self, expr: str, source_name: str) -> str: + """Qualify bare column references in predefined measure expressions using sqlglot AST. + + BFS-traverses many_to_one/one_to_one joins from the measure's source to find + columns on transitively reachable sources. This handles measure filters that + reference joined-source columns (e.g., filter: "level = 'premium'" where + 'level' is on a 'tiers' table reachable via orders → customers → tiers). + """ + actual_source_name = self.graph.alias_map.get(source_name, source_name) + source = self.sources.get(actual_source_name) + if not source: + return expr + + # BFS through m2o/o2o joins to build column->source mapping + col_to_source: dict[str, str] = {} + visited: set[str] = set() + queue = [actual_source_name] + while queue: + current_name = queue.pop(0) + if current_name in visited: + continue + visited.add(current_name) + current_src = self.sources.get(current_name) + if not current_src: + continue + # Add columns from this source (first-discovered wins for ambiguity) + for c in current_src.columns: + if c.name not in col_to_source: + current_ref = ( + source_name + if current_name == actual_source_name + else current_name + ) + col_to_source[c.name] = current_ref + # Traverse m2o/o2o joins + for join_decl in current_src.joins: + if join_decl.relationship in ("many_to_one", "one_to_one"): + target = join_decl.alias or join_decl.to + actual = join_decl.to + if actual not in visited: + queue.append(actual) + # Map columns using alias if present + joined_src = self.sources.get(actual) + if joined_src: + for c in joined_src.columns: + if c.name not in col_to_source: + col_to_source[c.name] = target + # Own columns always take highest priority + for c in source.columns: + col_to_source[c.name] = source_name + + tree = sqlglot.parse_one( + f"SELECT {quote_reserved_identifiers(expr)}", read=self.dialect + ) + + def _qualify_column(node): + if ( + isinstance(node, exp.Column) + and not node.table + and node.name in col_to_source + ): + target_source = col_to_source[node.name] + return exp.Column( + this=node.this.copy(), table=exp.to_identifier(target_source) + ) + return node + + transformed = tree.transform(_qualify_column) + return transformed.expressions[0].sql(dialect=self.dialect) + + def _detect_fan_out( + self, + measures: list[ResolvedMeasure], + dimensions: list[QueryDimension], + tree, + filters: list[str] | None = None, + ) -> tuple[bool, list[MeasureGroup], str, list[str]]: + """ + Detect fan-out and chasm traps. Group measures by source. + If multiple measure sources exist, each needs its own pre-aggregation CTE. + Also checks filter sources — a filter forcing a one_to_many join from the + measure source is an error (cannot be safely pre-aggregated). + """ + # Group non-derived measures by source + groups: dict[str, list[ResolvedMeasure]] = {} + for m in measures: + if m.is_derived: + continue + groups.setdefault(m.source_name, []).append(m) + + # Validate multi-source aggregate expressions: if a non-derived measure + # references sources from multiple groups, it can't be safely placed in + # a single CTE (the other source won't be available in the CTE scope). + if len(groups) > 1: + for m in measures: + if m.is_derived: + continue + measure_source_refs = self.parser.extract_source_refs(m.expr) + other_group_refs = measure_source_refs - {m.source_name} + for ref in other_group_refs: + ref_actual = self.graph.alias_map.get(ref, ref) + source_actual = self.graph.alias_map.get( + m.source_name, m.source_name + ) + if ref_actual == source_actual: + continue + if ref in groups and ref != m.source_name: + raise ValueError( + f"Measure '{m.name}' references multiple independent " + f"sources ({m.source_name}, {ref}) that are in separate " + f"measure groups. In aggregate locality mode, each CTE " + f"can only access its own source's tables. Decompose " + f"the expression into separate named measures and combine " + f"as a derived measure: e.g., " + f'{{"expr": "part1", "name": "a"}}, ' + f'{{"expr": "part2", "name": "b"}}, ' + f'{{"expr": "a / b", "name": "{m.name}"}}' + ) + + # Collect dimension sources + dim_sources: set[str] = set() + for d in dimensions: + refs = self.parser.extract_source_refs(d.field) + dim_sources.update(refs) + + # Collect filter sources + filter_sources: set[str] = set() + for f in filters or []: + filter_sources.update(self.parser.extract_source_refs(f)) + + if len(groups) <= 1: + # Single measure group: check the path FROM measure source TO dimension sources. + # Only flag fan-out if those specific paths have one_to_many edges. + if groups: + source_name = next(iter(groups)) + source_actual = self.graph.alias_map.get(source_name, source_name) + has_o2m = False + for dim_src in dim_sources: + if dim_src == source_name: + continue + # Skip alias siblings (same underlying source — no fan-out) + dim_actual = self.graph.alias_map.get(dim_src, dim_src) + if dim_actual == source_actual: + continue + path = self.graph.find_path(source_name, dim_src) + if path and path.has_one_to_many: + has_o2m = True + break + + # Also check filter sources for one_to_many fan-out + if not has_o2m: + for filter_src in filter_sources - dim_sources - {source_name}: + filter_actual = self.graph.alias_map.get(filter_src, filter_src) + if filter_actual == source_actual: + continue + path = self.graph.find_path(source_name, filter_src) + if path and path.has_one_to_many: + raise ValueError( + f"Filter on '{filter_src}' requires a one_to_many join " + f"from measure source '{source_name}', which would cause " + f"incorrect aggregation (fan-out). Consider rewriting the " + f"filter as a subquery or adding the filter source as a " + f"dimension source." + ) + + if has_o2m: + measure_groups = [ + MeasureGroup( + source_name=source_name, measures=groups[source_name] + ) + ] + return ( + True, + measure_groups, + f"Fan-out detected: one_to_many edges from {source_name} to dimensions", + [f"Pre-aggregate {source_name} measures before joining"], + ) + return False, [], "No fan-out", [] + + # Multiple measure sources. Only merge groups that are provably row-safe + # (alias siblings or pure one_to_one chains). many_to_one chains are not + # safe to flatten because the "one" side measure is duplicated by the + # "many" side rows. + merged_groups = self._merge_safe_measure_groups(groups, dim_sources) + + if len(merged_groups) <= 1: + # All measure sources are on the same safe join chain + if merged_groups: + mg_name, mg_measures = next(iter(merged_groups.items())) + # Still check if there's fan-out to dimension sources + has_o2m = False + for dim_src in dim_sources: + if dim_src == mg_name: + continue + path = self.graph.find_path(mg_name, dim_src) + if path and path.has_one_to_many: + has_o2m = True + break + if has_o2m: + return ( + True, + [MeasureGroup(source_name=mg_name, measures=mg_measures)], + f"Fan-out detected: one_to_many edges from {mg_name} to dimensions", + [f"Pre-aggregate {mg_name} measures before joining"], + ) + return False, [], "No fan-out", [] + + # True chasm trap — independent measure sources that can't be safely merged. + # Before building groups, validate that all filter sources are reachable + # from at least one measure source without traversing one_to_many edges. + # If not, the filter would be silently dropped during CTE generation. + for filter_src in filter_sources - dim_sources: + reachable_from_any = False + for source_name in merged_groups: + if filter_src == source_name: + reachable_from_any = True + break + filter_actual = self.graph.alias_map.get(filter_src, filter_src) + source_actual = self.graph.alias_map.get(source_name, source_name) + if filter_actual == source_actual: + reachable_from_any = True + break + path = self.graph.find_path(source_name, filter_src) + if path and not path.has_one_to_many: + reachable_from_any = True + break + if not reachable_from_any: + raise ValueError( + f"Filter on '{filter_src}' is not reachable via many_to_one/one_to_one " + f"edges from any measure source ({', '.join(merged_groups.keys())}). " + f"The filter would be silently dropped in aggregate locality mode. " + f"Consider moving the filter condition into a SQL source or removing it." + ) + + measure_groups = [] + locality_descs = [] + for source_name, group_measures in merged_groups.items(): + mg = MeasureGroup(source_name=source_name, measures=group_measures) + measure_groups.append(mg) + measure_names = ", ".join(m.name for m in group_measures) + locality_descs.append( + f"Pre-aggregate {source_name} ({measure_names}) by dimension keys" + ) + + return ( + True, + measure_groups, + f"Chasm trap: {len(merged_groups)} independent measure sources ({', '.join(merged_groups.keys())})", + locality_descs, + ) + + def _merge_safe_measure_groups( + self, + groups: dict[str, list[ResolvedMeasure]], + dim_sources: set[str], + ) -> dict[str, list[ResolvedMeasure]]: + """Merge only row-safe measure groups. + + Alias siblings are kept together to avoid false chasm detection for role- + based aliases, and pure one_to_one chains can be flattened safely. + many_to_one chains are intentionally not merged because measures from the + "one" side are duplicated by the "many" side rows. + """ + names = list(groups.keys()) + + # First pass: merge aliases of the same underlying source. + # Pick one representative per underlying source. + alias_groups: dict[str, list[str]] = {} + for name in names: + actual = self.graph.alias_map.get(name, name) + alias_groups.setdefault(actual, []).append(name) + + merged: dict[str, list[ResolvedMeasure]] = {} + assigned: dict[str, str] = {} # source_name → merged anchor + + # Merge alias siblings into the first alias name + for actual, siblings in alias_groups.items(): + anchor = siblings[0] + merged[anchor] = [] + for sib in siblings: + merged[anchor].extend(groups[sib]) + assigned[sib] = anchor + + def _edge_is_grain_safe(edge) -> bool: + if edge.relationship == "one_to_one": + return True + if edge.relationship != "many_to_one": + return False + actual_source = self.graph.alias_map.get(edge.from_source, edge.from_source) + source = self.sources.get(actual_source) + if not source: + return False + from_cols = {c.strip() for c in edge.from_column.split(",")} + grain_cols = {c.strip() for c in source.grain} + return from_cols == grain_cols + + def _path_is_grain_safe(path) -> bool: + return bool(path) and all(_edge_is_grain_safe(edge) for edge in path.edges) + + # Second pass: check pairwise one_to_one reachability between merged groups + merged_names = list(merged.keys()) + final: dict[str, list[ResolvedMeasure]] = {} + final_assigned: dict[str, str] = {} + + for name in merged_names: + if name in final_assigned: + continue + final[name] = list(merged[name]) + final_assigned[name] = name + + for other in merged_names: + if other == name or other in final_assigned: + continue + path_fwd = self.graph.find_path(name, other) + path_rev = self.graph.find_path(other, name) + if _path_is_grain_safe(path_fwd): + final[name].extend(merged[other]) + final_assigned[other] = name + elif _path_is_grain_safe(path_rev): + final.setdefault(other, []).extend(final.pop(name, [])) + final[other].extend(merged[other]) + for k, v in final_assigned.items(): + if v == name: + final_assigned[k] = other + final_assigned[name] = other + final_assigned[other] = other + break + + return final + + def _classify_filter_clause( + self, + clause: str, + measure_names: set[str], + predefined_refs: set[str], + ) -> str: + """Classify a single filter clause as 'where' or 'having'.""" + parsed = self.parser.parse(clause, known_measure_names=measure_names) + if parsed.is_aggregate or parsed.depends_on_measures: + return "having" + if parsed.column_refs & predefined_refs: + matching_refs = parsed.column_refs & predefined_refs + all_are_columns = True + for ref in matching_refs: + src_name, col_name = ref.split(".", 1) + src = self.sources.get(src_name) + if not src or not any(c.name == col_name for c in src.columns): + all_are_columns = False + break + return "where" if all_are_columns else "having" + return "where" + + def _classify_filters( + self, filters: list[str], measures: list[ResolvedMeasure] + ) -> tuple[list[str], list[str]]: + measure_names = {m.name for m in measures} + where_filters = [] + having_filters = [] + + # Build set of qualified pre-defined measure refs (e.g. "orders.revenue") + predefined_refs: set[str] = set() + for src in self.sources.values(): + for mdef in src.measures: + predefined_refs.add(f"{src.name}.{mdef.name}") + + for f in filters: + if not f or not f.strip(): + continue + # Split compound AND expressions so each clause is classified independently. + # e.g. "sum(x) > 100 AND status = 'active'" → HAVING + WHERE + clauses = self._split_top_level_and(f) + for clause in clauses: + kind = self._classify_filter_clause( + clause, measure_names, predefined_refs + ) + if kind == "having": + # Validate: if an OR expression mixes aggregate and non-aggregate + # sub-expressions, it cannot be split and would produce invalid SQL. + self._validate_or_filter_consistency( + clause, measure_names, predefined_refs + ) + having_filters.append(clause) + else: + where_filters.append(clause) + + return where_filters, having_filters + + def _validate_or_filter_consistency( + self, + clause: str, + measure_names: set[str], + predefined_refs: set[str], + ) -> None: + """Raise an error if an OR expression mixes WHERE and HAVING conditions.""" + try: + tree = sqlglot.parse_one( + f"SELECT * WHERE {quote_reserved_identifiers(clause)}", + dialect=self.dialect, + ) + where = tree.find(exp.Where) + if not where: + return + inner = where.this + # Only check if the top level contains OR + or_parts: list[str] = [] + + def _collect_or(node): + if isinstance(node, exp.Or): + _collect_or(node.left) + _collect_or(node.right) + else: + or_parts.append(node.sql(dialect=self.dialect)) + + _collect_or(inner) + if len(or_parts) <= 1: + return + # Classify each OR branch independently + kinds = set() + for part in or_parts: + kinds.add( + self._classify_filter_clause(part, measure_names, predefined_refs) + ) + if kinds == {"where", "having"}: + raise ValueError( + f"Filter '{clause}' mixes aggregate and non-aggregate conditions " + f"with OR, which cannot be split into WHERE and HAVING. " + f"Rewrite as separate filters or use a subquery." + ) + except ValueError: + raise + except Exception: + logger.debug("Failed to validate OR filter consistency for: %s", clause) + + def _split_top_level_and(self, expr: str) -> list[str]: + """Split a filter expression on top-level AND (not inside parentheses or strings).""" + try: + tree = sqlglot.parse_one( + f"SELECT * WHERE {quote_reserved_identifiers(expr)}", + dialect=self.dialect, + ) + where = tree.find(exp.Where) + if not where: + return [expr] + inner = where.this + parts: list[str] = [] + + def _collect_and(node): + if isinstance(node, exp.And): + _collect_and(node.left) + _collect_and(node.right) + else: + parts.append(node.sql(dialect=self.dialect)) + + _collect_and(inner) + if len(parts) > 1: + return parts + except Exception: + logger.debug("Failed to split top-level AND in filter: %s", expr) + return [expr] + + def _validate_column_refs( + self, + measures: list[ResolvedMeasure], + dimensions: list[QueryDimension], + filters: list[str], + ) -> None: + """Validate that referenced columns exist in their source definitions.""" + # Build separate column and measure name sets per source + valid_cols: dict[str, set[str]] = {} + valid_measure_names: dict[str, set[str]] = {} + for src in self.sources.values(): + valid_cols[src.name] = {c.name for c in src.columns} + valid_measure_names[src.name] = {m.name for m in src.measures} + + def _check_refs(expr: str, allow_measures: bool) -> None: + parsed = self.parser.parse(expr) + for col_ref in parsed.column_refs: + parts = col_ref.split(".", 1) + if len(parts) != 2: + continue + source_name, col_name = parts + resolved = self.graph.alias_map.get(source_name, source_name) + if resolved not in valid_cols: + continue # unknown source — handled elsewhere + if allow_measures: + all_valid = valid_cols[resolved] | valid_measure_names.get( + resolved, set() + ) + else: + all_valid = valid_cols[resolved] + if col_name not in all_valid: + available = sorted( + valid_cols[resolved] | valid_measure_names.get(resolved, set()) + ) + raise ValueError( + f"Column '{col_name}' does not exist in source '{source_name}'. " + f"Available: {', '.join(available)}" + ) + + # Dimension refs: only columns allowed (not measure names) + for d in dimensions: + _check_refs(d.field, allow_measures=False) + + # Measure/filter refs: columns + measure names allowed + for m in measures: + if not m.is_derived: + _check_refs(m.expr, allow_measures=True) + for f in filters: + if f and f.strip(): + _check_refs(f, allow_measures=True) + + def _validate_visibility(self, query: SemanticQuery) -> None: + """Reject queries that reference hidden columns.""" + # Build a set of hidden columns: {source_name: {col_name, ...}} + hidden: dict[str, set[str]] = {} + for source in self.sources.values(): + for col in source.columns: + if col.visibility == ColumnVisibility.HIDDEN: + hidden.setdefault(source.name, set()).add(col.name) + + if not hidden: + return + + # Collect all source.column references from dimensions, measures, filters + all_exprs: list[str] = [] + for d in query.dimensions: + if isinstance(d, str): + all_exprs.append(d) + elif isinstance(d, dict): + all_exprs.append(d.get("field", "")) + for m in query.measures: + if isinstance(m, str): + all_exprs.append(m) + elif isinstance(m, dict): + all_exprs.append(m.get("expr", "")) + all_exprs.extend(query.filters) + + for expr in all_exprs: + parsed = self.parser.parse(expr) + for col_ref in parsed.column_refs: + source_name, col_name = col_ref.split(".", 1) + resolved = self.graph.alias_map.get(source_name, source_name) + if resolved in hidden and col_name in hidden[resolved]: + raise ValueError( + f"Column '{source_name}.{col_name}' is hidden and cannot be queried" + ) + + def _build_columns( + self, + measures: list[ResolvedMeasure], + dimensions: list[QueryDimension], + ) -> list[ResolvedColumn]: + from collections import Counter + + columns: list[ResolvedColumn] = [] + + leaves = [ + d.field.split(".")[-1] if "." in d.field else d.field for d in dimensions + ] + colliding = {leaf for leaf, count in Counter(leaves).items() if count > 1} + + for d in dimensions: + leaf = d.field.split(".")[-1] if "." in d.field else d.field + col_name = d.field.replace(".", "_") if leaf in colliding else leaf + columns.append( + ResolvedColumn( + name=col_name, + provenance=Provenance.DIMENSION, + expr=d.field, + granularity=d.granularity, + ) + ) + + for m in measures: + columns.append( + ResolvedColumn( + name=m.name, + provenance=m.provenance, + expr=m.expr, + description=getattr(m, "description", None), + ) + ) + + return columns diff --git a/python/klo-sl/semantic_layer/sql_table_extractor.py b/python/klo-sl/semantic_layer/sql_table_extractor.py new file mode 100644 index 00000000..008c53ad --- /dev/null +++ b/python/klo-sl/semantic_layer/sql_table_extractor.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import logging + +import sqlglot +from sqlglot import exp + +logger = logging.getLogger(__name__) + + +def extract_table_refs(sql: str, dialect: str = "postgres") -> list[tuple[str, ...]]: + """Return a deduped list of warehouse-table refs found in `sql` as + tuples of normalized (lowercase, unquoted) name parts. + + Skips CTE self-references. Returns refs in the order they first appear + so callers can present consistent error messages. Each tuple is the + fully-qualified name as written in the SQL: `("staging", "shipments")`, + `("analytics", "marts", "listings")`, or `("listings",)`. + + On parse failure returns []; coverage check is best-effort and must + not break source writes when the SQL has unusual syntax. + """ + try: + tree = sqlglot.parse_one(sql, dialect=dialect) + except Exception as e: + logger.debug("sql_table_extractor: parse failed (%s); skipping coverage", e) + return [] + + cte_names = {cte.alias_or_name.lower() for cte in tree.find_all(exp.CTE)} + + seen: set[tuple[str, ...]] = set() + out: list[tuple[str, ...]] = [] + for t in tree.find_all(exp.Table): + name = (t.name or "").lower() + if not name or name in cte_names: + continue + parts: list[str] = [] + catalog = t.args.get("catalog") + db = t.args.get("db") + if catalog and getattr(catalog, "name", None): + parts.append(catalog.name.lower()) + if db and getattr(db, "name", None): + parts.append(db.name.lower()) + parts.append(name) + ref = tuple(parts) + if ref not in seen: + seen.add(ref) + out.append(ref) + return out + + +def normalize_table(value: str) -> tuple[str, ...]: + """Split a `table:` field value into normalized, lowercased parts.""" + return tuple(p.strip('"').strip("`").lower() for p in value.split(".") if p) + + +def ref_matches_source_table(ref: tuple[str, ...], source_table: str) -> bool: + """True iff `ref` is a suffix of `source_table` (or vice versa for the + 1-part bare-name case). + + Examples: + ref=(marts, listings) table=ANALYTICS.MARTS.LISTINGS → True + ref=(analytics, marts, x) table=ANALYTICS.MARTS.X → True + ref=(listings,) table=ANALYTICS.MARTS.LISTINGS → True (bare matches last) + ref=(staging, shipments) table=ANALYTICS.MARTS.SHIPMENTS → False (db differs) + """ + src = normalize_table(source_table) + if not src or not ref: + return False + if len(ref) > len(src): + return False + return src[-len(ref) :] == ref diff --git a/python/klo-sl/semantic_layer/table_identifier_parser.py b/python/klo-sl/semantic_layer/table_identifier_parser.py new file mode 100644 index 00000000..f8df6631 --- /dev/null +++ b/python/klo-sl/semantic_layer/table_identifier_parser.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import Literal + +import sqlglot +from sqlglot import exp + +logger = logging.getLogger(__name__) + +SUPPORTED_TABLE_IDENTIFIER_DIALECTS = { + "bigquery", + "snowflake", + "postgres", + "redshift", + "mysql", + "sqlite", + "tsql", + "clickhouse", +} + +ParseTableIdentifierReason = Literal[ + "looker_template_unresolved", + "derived_table_not_supported", + "no_physical_table", + "multiple_table_references", + "unsupported_dialect", + "parse_error", +] + + +@dataclass(frozen=True) +class ParseTableIdentifierItem: + key: str + sql_table_name: str + dialect: str + + +@dataclass(frozen=True) +class ParsedIdentifier: + ok: bool + catalog: str | None = None + schema_: str | None = None + name: str | None = None + canonical_table: str | None = None + reason: ParseTableIdentifierReason | None = None + detail: str | None = None + + +def parse_table_identifier_batch( + items: list[ParseTableIdentifierItem], +) -> dict[str, ParsedIdentifier]: + return { + item.key: parse_table_identifier_one(item.sql_table_name, item.dialect) + for item in items + } + + +def parse_table_identifier_one(sql_table_name: str, dialect: str) -> ParsedIdentifier: + normalized_dialect = dialect.lower() + if normalized_dialect not in SUPPORTED_TABLE_IDENTIFIER_DIALECTS: + return ParsedIdentifier( + ok=False, + reason="unsupported_dialect", + detail=f"Unsupported sqlglot dialect for table identifier parsing: {dialect}", + ) + + if "${" in sql_table_name or "@{" in sql_table_name: + return ParsedIdentifier(ok=False, reason="looker_template_unresolved") + + try: + parsed = sqlglot.parse_one( + f"SELECT * FROM {sql_table_name}", + read=normalized_dialect, + ) + from_clause = parsed.args.get("from_") + if from_clause is None or from_clause.this is None: + return ParsedIdentifier(ok=False, reason="no_physical_table") + + from_expr = from_clause.this + if isinstance(from_expr, (exp.Subquery, exp.Values, exp.Lateral)): + return ParsedIdentifier(ok=False, reason="derived_table_not_supported") + if not isinstance(from_expr, exp.Table): + return ParsedIdentifier(ok=False, reason="derived_table_not_supported") + + tables = list(parsed.find_all(exp.Table)) + if not tables: + return ParsedIdentifier(ok=False, reason="no_physical_table") + if len(tables) > 1: + return ParsedIdentifier(ok=False, reason="multiple_table_references") + + table = tables[0] + canonical_table = exp.Table( + this=exp.to_identifier(table.name), + db=exp.to_identifier(table.db) if table.db else None, + catalog=exp.to_identifier(table.catalog) if table.catalog else None, + ).sql(dialect=normalized_dialect) + + return ParsedIdentifier( + ok=True, + catalog=table.catalog or None, + schema_=table.db or None, + name=table.name, + canonical_table=canonical_table, + ) + except sqlglot.errors.ParseError as exc: + return ParsedIdentifier(ok=False, reason="parse_error", detail=str(exc)) + except Exception as exc: + logger.exception("Unexpected failure while parsing Looker sql_table_name") + return ParsedIdentifier(ok=False, reason="parse_error", detail=str(exc)) diff --git a/python/klo-sl/sources/b2b_saas/abm_engagements.yaml b/python/klo-sl/sources/b2b_saas/abm_engagements.yaml new file mode 100644 index 00000000..c517b461 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/abm_engagements.yaml @@ -0,0 +1,15 @@ +name: abm_engagements +table: abm_engagements +grain: +- row_id +columns: +- name: account_id + type: number +- name: engagement_month + type: string +- name: row_id + type: number +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/account_intent_signals.yaml b/python/klo-sl/sources/b2b_saas/account_intent_signals.yaml new file mode 100644 index 00000000..dee1cb16 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/account_intent_signals.yaml @@ -0,0 +1,18 @@ +name: account_intent_signals +table: account_intent_signals +grain: +- signal_id +columns: +- name: signal_id + type: number +- name: account_id + type: number +- name: signal_date + type: time + role: time +- name: topic + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/accounts.yaml b/python/klo-sl/sources/b2b_saas/accounts.yaml new file mode 100644 index 00000000..0eb39fe8 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/accounts.yaml @@ -0,0 +1,23 @@ +name: accounts +table: accounts +grain: +- account_id +columns: +- name: account_id + type: number +- name: account_name + type: string +- name: csm_rep_id + type: number +- name: industry + type: string +- name: is_customer + type: string +- name: region + type: string +- name: segment + type: string +joins: +- to: sales_reps + 'on': csm_rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/activities.yaml b/python/klo-sl/sources/b2b_saas/activities.yaml new file mode 100644 index 00000000..4b6a1bff --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/activities.yaml @@ -0,0 +1,36 @@ +name: activities +table: activities +grain: +- activity_id +columns: +- name: activity_id + type: number +- name: account_id + type: number +- name: activity_date + type: time + role: time +- name: activity_type + type: string +- name: channel + type: string +- name: direction + type: string +- name: duration_minutes + type: number +- name: opportunity_id + type: number +- name: rep_id + type: number +- name: subject + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ad_accounts.yaml b/python/klo-sl/sources/b2b_saas/ad_accounts.yaml new file mode 100644 index 00000000..39c047ad --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ad_accounts.yaml @@ -0,0 +1,13 @@ +name: ad_accounts +table: ad_accounts +grain: +- ad_account_id +columns: +- name: ad_account_id + type: number +- name: account_name + type: string +- name: currency + type: string +- name: platform + type: string diff --git a/python/klo-sl/sources/b2b_saas/ad_ad_stats.yaml b/python/klo-sl/sources/b2b_saas/ad_ad_stats.yaml new file mode 100644 index 00000000..13f889d0 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ad_ad_stats.yaml @@ -0,0 +1,24 @@ +name: ad_ad_stats +table: ad_ad_stats +grain: +- row_id +columns: +- name: ad_id + type: number +- name: clicks + type: number +- name: conversions + type: number +- name: impressions + type: number +- name: row_id + type: number +- name: spend + type: number +- name: stat_date + type: time + role: time +joins: +- to: ads + 'on': ad_id = ads.ad_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ad_campaigns.yaml b/python/klo-sl/sources/b2b_saas/ad_campaigns.yaml new file mode 100644 index 00000000..2e9ff43a --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ad_campaigns.yaml @@ -0,0 +1,28 @@ +name: ad_campaigns +table: ad_campaigns +grain: +- ad_campaign_id +columns: +- name: ad_campaign_id + type: number +- name: ad_account_id + type: number +- name: campaign_name + type: string +- name: channel + type: string +- name: end_date + type: time + role: time +- name: objective + type: string +- name: start_date + type: time + role: time +joins: +- to: ad_accounts + 'on': ad_account_id = ad_accounts.ad_account_id + relationship: many_to_one +- to: accounts + 'on': ad_account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ad_creative_stats.yaml b/python/klo-sl/sources/b2b_saas/ad_creative_stats.yaml new file mode 100644 index 00000000..f9150f4c --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ad_creative_stats.yaml @@ -0,0 +1,24 @@ +name: ad_creative_stats +table: ad_creative_stats +grain: +- row_id +columns: +- name: clicks + type: number +- name: conversions + type: number +- name: creative_id + type: number +- name: impressions + type: number +- name: row_id + type: number +- name: spend + type: number +- name: stat_date + type: time + role: time +joins: +- to: ad_creatives + 'on': creative_id = ad_creatives.creative_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ad_creatives.yaml b/python/klo-sl/sources/b2b_saas/ad_creatives.yaml new file mode 100644 index 00000000..7929031c --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ad_creatives.yaml @@ -0,0 +1,20 @@ +name: ad_creatives +table: ad_creatives +grain: +- creative_id +columns: +- name: creative_id + type: number +- name: ad_campaign_id + type: number +- name: created_at + type: time + role: time +- name: format + type: string +- name: name + type: string +joins: +- to: ad_campaigns + 'on': ad_campaign_id = ad_campaigns.ad_campaign_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ad_groups.yaml b/python/klo-sl/sources/b2b_saas/ad_groups.yaml new file mode 100644 index 00000000..c804a0fe --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ad_groups.yaml @@ -0,0 +1,17 @@ +name: ad_groups +table: ad_groups +grain: +- ad_group_id +columns: +- name: ad_group_id + type: number +- name: ad_campaign_id + type: number +- name: name + type: string +- name: status + type: string +joins: +- to: ad_campaigns + 'on': ad_campaign_id = ad_campaigns.ad_campaign_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ad_stats.yaml b/python/klo-sl/sources/b2b_saas/ad_stats.yaml new file mode 100644 index 00000000..f9b1798e --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ad_stats.yaml @@ -0,0 +1,24 @@ +name: ad_stats +table: ad_stats +grain: +- stat_id +columns: +- name: stat_id + type: number +- name: ad_campaign_id + type: number +- name: clicks + type: number +- name: conversions + type: number +- name: impressions + type: number +- name: spend + type: number +- name: stat_date + type: time + role: time +joins: +- to: ad_campaigns + 'on': ad_campaign_id = ad_campaigns.ad_campaign_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ads.yaml b/python/klo-sl/sources/b2b_saas/ads.yaml new file mode 100644 index 00000000..da4e00af --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ads.yaml @@ -0,0 +1,20 @@ +name: ads +table: ads +grain: +- ad_id +columns: +- name: ad_id + type: number +- name: ad_group_id + type: number +- name: created_at + type: time + role: time +- name: name + type: string +- name: status + type: string +joins: +- to: ad_groups + 'on': ad_group_id = ad_groups.ad_group_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ap_bills.yaml b/python/klo-sl/sources/b2b_saas/ap_bills.yaml new file mode 100644 index 00000000..a0e5b26e --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ap_bills.yaml @@ -0,0 +1,23 @@ +name: ap_bills +table: ap_bills +grain: +- bill_id +columns: +- name: bill_id + type: number +- name: amount + type: number +- name: bill_date + type: time + role: time +- name: due_date + type: time + role: time +- name: status + type: string +- name: vendor_id + type: number +joins: +- to: vendors + 'on': vendor_id = vendors.vendor_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/approvals.yaml b/python/klo-sl/sources/b2b_saas/approvals.yaml new file mode 100644 index 00000000..7989708f --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/approvals.yaml @@ -0,0 +1,26 @@ +name: approvals +table: approvals +grain: +- approval_id +columns: +- name: approval_id + type: number +- name: approved_at + type: time + role: time +- name: approver_rep_id + type: number +- name: quote_id + type: number +- name: requested_at + type: time + role: time +- name: status + type: string +joins: +- to: sales_reps + 'on': approver_rep_id = sales_reps.rep_id + relationship: many_to_one +- to: quotes + 'on': quote_id = quotes.quote_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/attribution_credits.yaml b/python/klo-sl/sources/b2b_saas/attribution_credits.yaml new file mode 100644 index 00000000..9ac2a1db --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/attribution_credits.yaml @@ -0,0 +1,22 @@ +name: attribution_credits +table: attribution_credits +grain: +- credit_id +columns: +- name: credit_id + type: number +- name: credit + type: string +- name: model + type: string +- name: opportunity_id + type: number +- name: touchpoint_id + type: number +joins: +- to: touchpoints + 'on': touchpoint_id = touchpoints.touchpoint_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/budgets.yaml b/python/klo-sl/sources/b2b_saas/budgets.yaml new file mode 100644 index 00000000..2fa761df --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/budgets.yaml @@ -0,0 +1,15 @@ +name: budgets +table: budgets +grain: +- budget_id +columns: +- name: budget_id + type: number +- name: department + type: string +- name: period_end + type: string +- name: period_start + type: string +- name: planned_amount + type: number diff --git a/python/klo-sl/sources/b2b_saas/calls.yaml b/python/klo-sl/sources/b2b_saas/calls.yaml new file mode 100644 index 00000000..68433c6f --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/calls.yaml @@ -0,0 +1,28 @@ +name: calls +table: calls +grain: +- call_id +columns: +- name: call_id + type: number +- name: call_date + type: time + role: time +- name: duration_minutes + type: number +- name: opportunity_id + type: number +- name: rep_id + type: number +- name: sentiment + type: time + role: time +- name: transcript_url + type: string +joins: +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/campaign_members.yaml b/python/klo-sl/sources/b2b_saas/campaign_members.yaml new file mode 100644 index 00000000..dab511a1 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/campaign_members.yaml @@ -0,0 +1,23 @@ +name: campaign_members +table: campaign_members +grain: +- campaign_member_id +columns: +- name: campaign_member_id + type: number +- name: campaign_id + type: number +- name: lead_id + type: number +- name: responded_at + type: time + role: time +- name: status + type: string +joins: +- to: campaigns + 'on': campaign_id = campaigns.campaign_id + relationship: many_to_one +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/campaigns.yaml b/python/klo-sl/sources/b2b_saas/campaigns.yaml new file mode 100644 index 00000000..937b6fde --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/campaigns.yaml @@ -0,0 +1,19 @@ +name: campaigns +table: campaigns +grain: +- campaign_id +columns: +- name: campaign_id + type: number +- name: budget + type: string +- name: campaign_name + type: string +- name: end_date + type: time + role: time +- name: start_date + type: time + role: time +- name: type + type: string diff --git a/python/klo-sl/sources/b2b_saas/card_transactions.yaml b/python/klo-sl/sources/b2b_saas/card_transactions.yaml new file mode 100644 index 00000000..2b6d525e --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/card_transactions.yaml @@ -0,0 +1,22 @@ +name: card_transactions +table: card_transactions +grain: +- amount +columns: +- name: amount + type: number +- name: card_txn_id + type: number +- name: department + type: string +- name: employee_email + type: string +- name: txn_date + type: time + role: time +- name: vendor_id + type: number +joins: +- to: vendors + 'on': vendor_id = vendors.vendor_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/cash_balances.yaml b/python/klo-sl/sources/b2b_saas/cash_balances.yaml new file mode 100644 index 00000000..a1130dea --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/cash_balances.yaml @@ -0,0 +1,12 @@ +name: cash_balances +table: cash_balances +grain: +- balance +columns: +- name: balance + type: string +- name: balance_date + type: time + role: time +- name: bank_account + type: string diff --git a/python/klo-sl/sources/b2b_saas/charges.yaml b/python/klo-sl/sources/b2b_saas/charges.yaml new file mode 100644 index 00000000..1bfd7700 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/charges.yaml @@ -0,0 +1,24 @@ +name: charges +table: charges +grain: +- charge_id +columns: +- name: charge_id + type: number +- name: amount + type: number +- name: created_at + type: time + role: time +- name: currency + type: string +- name: payment_intent_id + type: number +- name: payment_method + type: string +- name: status + type: string +joins: +- to: payment_intents + 'on': payment_intent_id = payment_intents.payment_intent_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/churn_risk.yaml b/python/klo-sl/sources/b2b_saas/churn_risk.yaml new file mode 100644 index 00000000..2fae793e --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/churn_risk.yaml @@ -0,0 +1,290 @@ +name: churn_risk +description: | + Per-account churn risk scoring for B2B SaaS customers. Combines signals from + subscriptions (cancellation history), support tickets (severity, SLA breaches), + product usage (adoption decline), contracts (renewal proximity), CSM activities + (engagement recency), and invoices (payment issues) into a weighted composite + risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account. +sql: | + WITH sub_signals AS ( + SELECT + account_id, + MAX(CASE WHEN canceled_at IS NOT NULL THEN 1 ELSE 0 END) AS has_canceled, + COUNT(CASE WHEN canceled_at IS NOT NULL THEN 1 END) AS canceled_count, + STRING_AGG(DISTINCT churn_reason, ', ') AS churn_reasons + FROM subscriptions + GROUP BY account_id + ), + ticket_signals AS ( + SELECT + account_id, + COUNT(*) AS total_tickets, + COUNT(CASE WHEN status = 'Open' THEN 1 END) AS open_tickets, + COUNT(CASE WHEN severity = 'High' THEN 1 END) AS high_severity_tickets, + COUNT(CASE WHEN sla_breached = '1' OR sla_breached = 'true' THEN 1 END) AS sla_breaches + FROM support_tickets + GROUP BY account_id + ), + usage_signals AS ( + SELECT + account_id, + AVG(CASE WHEN CURRENT_DATE - usage_date <= 90 + THEN CAST(active_users AS NUMERIC) END) AS recent_active_users, + AVG(CASE WHEN CURRENT_DATE - usage_date > 90 + AND CURRENT_DATE - usage_date <= 180 + THEN CAST(active_users AS NUMERIC) END) AS prior_active_users, + AVG(CASE WHEN CURRENT_DATE - usage_date <= 90 + THEN CAST(events_count AS NUMERIC) END) AS recent_events, + AVG(CASE WHEN CURRENT_DATE - usage_date > 90 + AND CURRENT_DATE - usage_date <= 180 + THEN CAST(events_count AS NUMERIC) END) AS prior_events + FROM product_usage + GROUP BY account_id + ), + contract_signals AS ( + SELECT + account_id, + MAX(arr) AS current_arr, + MIN(CASE WHEN status = 'Active' + THEN end_date - CURRENT_DATE END) AS days_to_renewal, + COUNT(CASE WHEN status = 'Active' THEN 1 END) AS active_contracts + FROM contracts + GROUP BY account_id + ), + activity_signals AS ( + SELECT + account_id, + COUNT(CASE WHEN CURRENT_DATE - activity_date::date <= 90 + THEN 1 END) AS recent_activities, + MIN(CURRENT_DATE - activity_date::date) AS days_since_last_activity + FROM activities + GROUP BY account_id + ), + invoice_signals AS ( + SELECT + account_id, + COUNT(CASE WHEN status = 'Partial' THEN 1 END) AS partial_invoices, + COUNT(CASE WHEN CURRENT_DATE > due_date + AND status != 'Paid' THEN 1 END) AS overdue_invoices + FROM invoices + GROUP BY account_id + ), + scored AS ( + SELECT + a.account_id, + COALESCE(s.has_canceled, 0) AS has_canceled, + COALESCE(s.canceled_count, 0) AS canceled_count, + s.churn_reasons, + COALESCE(t.open_tickets, 0) AS open_tickets, + COALESCE(t.high_severity_tickets, 0) AS high_severity_tickets, + COALESCE(t.sla_breaches, 0) AS sla_breaches, + COALESCE(u.recent_active_users, 0) AS recent_active_users, + COALESCE(u.prior_active_users, 0) AS prior_active_users, + COALESCE(u.recent_events, 0) AS recent_events, + COALESCE(c.current_arr, 0) AS current_arr, + COALESCE(c.days_to_renewal, 999) AS days_to_renewal, + COALESCE(c.active_contracts, 0) AS active_contracts, + COALESCE(act.recent_activities, 0) AS recent_activities, + COALESCE(act.days_since_last_activity, 999) AS days_since_last_activity, + COALESCE(inv.partial_invoices, 0) AS partial_invoices, + COALESCE(inv.overdue_invoices, 0) AS overdue_invoices, + CASE WHEN COALESCE(s.has_canceled, 0) = 1 THEN 1.0 + WHEN COALESCE(s.canceled_count, 0) > 0 THEN 0.7 + ELSE 0.1 END AS subscription_risk, + CASE WHEN COALESCE(t.high_severity_tickets, 0) >= 3 THEN 0.9 + WHEN COALESCE(t.sla_breaches, 0) >= 2 THEN 0.8 + WHEN COALESCE(t.open_tickets, 0) >= 3 THEN 0.7 + WHEN COALESCE(t.open_tickets, 0) >= 1 THEN 0.4 + ELSE 0.1 END AS support_risk, + CASE WHEN COALESCE(u.recent_active_users, 0) = 0 THEN 0.9 + WHEN COALESCE(u.prior_active_users, 0) > 0 + AND COALESCE(u.recent_active_users, 0) < COALESCE(u.prior_active_users, 0) * 0.5 + THEN 0.8 + WHEN COALESCE(u.prior_active_users, 0) > 0 + AND COALESCE(u.recent_active_users, 0) < COALESCE(u.prior_active_users, 0) * 0.8 + THEN 0.5 + ELSE 0.1 END AS usage_risk, + CASE WHEN COALESCE(c.days_to_renewal, 999) <= 30 THEN 0.9 + WHEN COALESCE(c.days_to_renewal, 999) <= 60 THEN 0.7 + WHEN COALESCE(c.days_to_renewal, 999) <= 90 THEN 0.5 + WHEN COALESCE(c.active_contracts, 0) = 0 THEN 0.8 + ELSE 0.1 END AS contract_risk, + CASE WHEN COALESCE(act.days_since_last_activity, 999) > 90 THEN 0.9 + WHEN COALESCE(act.days_since_last_activity, 999) > 60 THEN 0.7 + WHEN COALESCE(act.recent_activities, 0) <= 2 THEN 0.6 + WHEN COALESCE(act.days_since_last_activity, 999) > 30 THEN 0.4 + ELSE 0.1 END AS engagement_risk, + CASE WHEN COALESCE(inv.overdue_invoices, 0) >= 2 THEN 0.9 + WHEN COALESCE(inv.overdue_invoices, 0) >= 1 THEN 0.7 + WHEN COALESCE(inv.partial_invoices, 0) >= 2 THEN 0.6 + WHEN COALESCE(inv.partial_invoices, 0) >= 1 THEN 0.3 + ELSE 0.1 END AS payment_risk + FROM accounts a + LEFT JOIN sub_signals s ON a.account_id = s.account_id + LEFT JOIN ticket_signals t ON a.account_id = t.account_id + LEFT JOIN usage_signals u ON a.account_id = u.account_id + LEFT JOIN contract_signals c ON a.account_id = c.account_id + LEFT JOIN activity_signals act ON a.account_id = act.account_id + LEFT JOIN invoice_signals inv ON a.account_id = inv.account_id + WHERE a.is_customer = '1' + ) + SELECT + account_id, + has_canceled, + canceled_count, + churn_reasons, + open_tickets, + high_severity_tickets, + sla_breaches, + recent_active_users, + prior_active_users, + recent_events, + current_arr, + days_to_renewal, + active_contracts, + recent_activities, + days_since_last_activity, + partial_invoices, + overdue_invoices, + subscription_risk, + support_risk, + usage_risk, + contract_risk, + engagement_risk, + payment_risk, + ROUND( + subscription_risk * 0.20 + + support_risk * 0.20 + + usage_risk * 0.20 + + contract_risk * 0.15 + + engagement_risk * 0.15 + + payment_risk * 0.10, + 3 + ) AS risk_score, + CASE + WHEN (subscription_risk * 0.20 + + support_risk * 0.20 + + usage_risk * 0.20 + + contract_risk * 0.15 + + engagement_risk * 0.15 + + payment_risk * 0.10) >= 0.7 THEN 'High' + WHEN (subscription_risk * 0.20 + + support_risk * 0.20 + + usage_risk * 0.20 + + contract_risk * 0.15 + + engagement_risk * 0.15 + + payment_risk * 0.10) >= 0.4 THEN 'Medium' + ELSE 'Low' + END AS risk_tier + FROM scored +grain: + - account_id +columns: + - name: account_id + type: number + - name: has_canceled + type: number + description: "1 if the account has any canceled subscription" + - name: canceled_count + type: number + description: "Number of canceled subscriptions" + - name: churn_reasons + type: string + description: "Comma-separated distinct churn reasons from subscriptions" + - name: open_tickets + type: number + description: "Count of currently open support tickets" + - name: high_severity_tickets + type: number + description: "Count of high-severity support tickets" + - name: sla_breaches + type: number + description: "Count of support tickets with SLA breaches" + - name: recent_active_users + type: number + description: "Average active users in the last 90 days" + - name: prior_active_users + type: number + description: "Average active users 90-180 days ago (for trend comparison)" + - name: recent_events + type: number + description: "Average event count in the last 90 days" + - name: current_arr + type: number + description: "Highest ARR from active contracts" + - name: days_to_renewal + type: number + description: "Days until the nearest active contract expires" + - name: active_contracts + type: number + description: "Count of active contracts" + - name: recent_activities + type: number + description: "CSM activities (calls, meetings, emails, tasks) in the last 90 days" + - name: days_since_last_activity + type: number + description: "Days since the most recent CSM activity" + - name: partial_invoices + type: number + description: "Count of invoices with Partial payment status" + - name: overdue_invoices + type: number + description: "Count of overdue unpaid invoices" + - name: subscription_risk + type: number + description: "Subscription cancellation risk sub-score (0.0-1.0)" + - name: support_risk + type: number + description: "Support burden risk sub-score (0.0-1.0)" + - name: usage_risk + type: number + description: "Product usage decline risk sub-score (0.0-1.0)" + - name: contract_risk + type: number + description: "Contract renewal proximity risk sub-score (0.0-1.0)" + - name: engagement_risk + type: number + description: "CSM engagement gap risk sub-score (0.0-1.0)" + - name: payment_risk + type: number + description: "Payment issues risk sub-score (0.0-1.0)" + - name: risk_score + type: number + description: "Weighted composite churn risk score (0.0-1.0); higher = riskier" + - name: risk_tier + type: string + description: "Churn risk tier: High (>=0.7), Medium (>=0.4), Low (<0.4)" +joins: + - to: accounts + "on": account_id = accounts.account_id + relationship: one_to_one +measures: + - name: avg_risk_score + expr: avg(risk_score) + description: "Average churn risk score across accounts" + - name: high_risk_accounts + expr: count(account_id) + filter: "risk_tier = 'High'" + description: "Number of accounts in the High risk tier" + - name: medium_risk_accounts + expr: count(account_id) + filter: "risk_tier = 'Medium'" + description: "Number of accounts in the Medium risk tier" + - name: low_risk_accounts + expr: count(account_id) + filter: "risk_tier = 'Low'" + description: "Number of accounts in the Low risk tier" + - name: total_arr_at_risk + expr: sum(current_arr) + filter: "risk_tier = 'High'" + description: "Total ARR from accounts in the High risk tier" + - name: avg_support_risk + expr: avg(support_risk) + description: "Average support burden risk sub-score" + - name: avg_usage_risk + expr: avg(usage_risk) + description: "Average usage decline risk sub-score" + - name: accounts_expiring_90d + expr: count(account_id) + filter: "days_to_renewal <= 90" + description: "Accounts with contracts expiring within 90 days" diff --git a/python/klo-sl/sources/b2b_saas/contacts.yaml b/python/klo-sl/sources/b2b_saas/contacts.yaml new file mode 100644 index 00000000..a3ddd45d --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/contacts.yaml @@ -0,0 +1,23 @@ +name: contacts +table: contacts +grain: +- contact_id +columns: +- name: contact_id + type: number +- name: account_id + type: number +- name: email + type: string +- name: first_name + type: string +- name: last_name + type: string +- name: phone + type: string +- name: title + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/content_assets.yaml b/python/klo-sl/sources/b2b_saas/content_assets.yaml new file mode 100644 index 00000000..26d872f4 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/content_assets.yaml @@ -0,0 +1,16 @@ +name: content_assets +table: content_assets +grain: +- asset_id +columns: +- name: asset_id + type: number +- name: content_type + type: string +- name: publish_date + type: time + role: time +- name: title + type: string +- name: url + type: string diff --git a/python/klo-sl/sources/b2b_saas/content_touches.yaml b/python/klo-sl/sources/b2b_saas/content_touches.yaml new file mode 100644 index 00000000..d60a92d2 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/content_touches.yaml @@ -0,0 +1,33 @@ +name: content_touches +table: content_touches +grain: +- touch_id +columns: +- name: touch_id + type: number +- name: account_id + type: number +- name: action + type: string +- name: asset_id + type: number +- name: lead_id + type: number +- name: opportunity_id + type: number +- name: touched_at + type: time + role: time +joins: +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one +- to: content_assets + 'on': asset_id = content_assets.asset_id + relationship: many_to_one +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/contracts.yaml b/python/klo-sl/sources/b2b_saas/contracts.yaml new file mode 100644 index 00000000..9bfa9bdc --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/contracts.yaml @@ -0,0 +1,30 @@ +name: contracts +table: contracts +grain: +- contract_id +columns: +- name: contract_id + type: number +- name: account_id + type: number +- name: arr + type: number +- name: contract_number + type: string +- name: end_date + type: time + role: time +- name: opportunity_id + type: number +- name: start_date + type: time + role: time +- name: status + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/crm_notes.yaml b/python/klo-sl/sources/b2b_saas/crm_notes.yaml new file mode 100644 index 00000000..cd95cfd2 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/crm_notes.yaml @@ -0,0 +1,23 @@ +name: crm_notes +table: crm_notes +grain: +- note_id +columns: +- name: note_id + type: number +- name: created_at + type: time + role: time +- name: note_text + type: string +- name: opportunity_id + type: number +- name: rep_id + type: number +joins: +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/currencies.yaml b/python/klo-sl/sources/b2b_saas/currencies.yaml new file mode 100644 index 00000000..1187ed7c --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/currencies.yaml @@ -0,0 +1,9 @@ +name: currencies +table: currencies +grain: +- currency_code +columns: +- name: currency_code + type: string +- name: currency_name + type: string diff --git a/python/klo-sl/sources/b2b_saas/departments_hr.yaml b/python/klo-sl/sources/b2b_saas/departments_hr.yaml new file mode 100644 index 00000000..9bddf5c7 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/departments_hr.yaml @@ -0,0 +1,9 @@ +name: departments_hr +table: departments_hr +grain: +- dept_id +columns: +- name: dept_id + type: number +- name: dept_name + type: string diff --git a/python/klo-sl/sources/b2b_saas/disputes.yaml b/python/klo-sl/sources/b2b_saas/disputes.yaml new file mode 100644 index 00000000..c59f25e5 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/disputes.yaml @@ -0,0 +1,23 @@ +name: disputes +table: disputes +grain: +- dispute_id +columns: +- name: dispute_id + type: number +- name: charge_id + type: number +- name: created_at + type: time + role: time +- name: reason + type: string +- name: resolved_at + type: time + role: time +- name: status + type: string +joins: +- to: charges + 'on': charge_id = charges.charge_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/email_events.yaml b/python/klo-sl/sources/b2b_saas/email_events.yaml new file mode 100644 index 00000000..d161eb59 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/email_events.yaml @@ -0,0 +1,18 @@ +name: email_events +table: email_events +grain: +- event_id +columns: +- name: event_id + type: number +- name: event_at + type: time + role: time +- name: event_type + type: string +- name: send_id + type: number +joins: +- to: email_sends + 'on': send_id = email_sends.send_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/email_sends.yaml b/python/klo-sl/sources/b2b_saas/email_sends.yaml new file mode 100644 index 00000000..0fd214cc --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/email_sends.yaml @@ -0,0 +1,33 @@ +name: email_sends +table: email_sends +grain: +- send_id +columns: +- name: send_id + type: number +- name: campaign_id + type: number +- name: email_id + type: number +- name: lead_id + type: number +- name: rep_id + type: number +- name: sent_at + type: time + role: time +- name: sequence_id + type: number +joins: +- to: campaigns + 'on': campaign_id = campaigns.campaign_id + relationship: many_to_one +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one +- to: sequences + 'on': sequence_id = sequences.sequence_id + relationship: many_to_one +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/employees.yaml b/python/klo-sl/sources/b2b_saas/employees.yaml new file mode 100644 index 00000000..7f64202f --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/employees.yaml @@ -0,0 +1,33 @@ +name: employees +table: employees +grain: +- employee_id +columns: +- name: employee_id + type: number +- name: base_salary + type: number +- name: benefits_cost + type: number +- name: dept_id + type: number +- name: email + type: string +- name: first_name + type: string +- name: hire_date + type: time + role: time +- name: last_name + type: string +- name: region + type: string +- name: role + type: string +- name: termination_date + type: time + role: time +joins: +- to: departments_hr + 'on': dept_id = departments_hr.dept_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/etl_runs.yaml b/python/klo-sl/sources/b2b_saas/etl_runs.yaml new file mode 100644 index 00000000..dcc1d91a --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/etl_runs.yaml @@ -0,0 +1,21 @@ +name: etl_runs +table: etl_runs +grain: +- run_id +columns: +- name: run_id + type: number +- name: destination + type: string +- name: ended_at + type: time + role: time +- name: rows_processed + type: number +- name: source + type: string +- name: started_at + type: time + role: time +- name: status + type: string diff --git a/python/klo-sl/sources/b2b_saas/fiscal_calendar.yaml b/python/klo-sl/sources/b2b_saas/fiscal_calendar.yaml new file mode 100644 index 00000000..73737bd8 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/fiscal_calendar.yaml @@ -0,0 +1,17 @@ +name: fiscal_calendar +table: fiscal_calendar +grain: +- calendar_date +columns: +- name: calendar_date + type: time +- name: fiscal_month + type: string +- name: fiscal_quarter + type: string +- name: fiscal_year + type: string +- name: is_month_start + type: string +- name: is_quarter_start + type: string diff --git a/python/klo-sl/sources/b2b_saas/forecast_snapshots.yaml b/python/klo-sl/sources/b2b_saas/forecast_snapshots.yaml new file mode 100644 index 00000000..94acd3e5 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/forecast_snapshots.yaml @@ -0,0 +1,23 @@ +name: forecast_snapshots +table: forecast_snapshots +grain: +- snapshot_id +columns: +- name: snapshot_id + type: number +- name: category + type: string +- name: rep_id + type: number +- name: snapshot_date + type: time + role: time +- name: team_id + type: number +joins: +- to: sales_teams + 'on': team_id = sales_teams.team_id + relationship: many_to_one +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/fx_rates.yaml b/python/klo-sl/sources/b2b_saas/fx_rates.yaml new file mode 100644 index 00000000..088547d7 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/fx_rates.yaml @@ -0,0 +1,14 @@ +name: fx_rates +table: fx_rates +grain: +- from_currency +columns: +- name: from_currency + type: string +- name: rate + type: string +- name: rate_date + type: time + role: time +- name: to_currency + type: string diff --git a/python/klo-sl/sources/b2b_saas/ga4_event_params.yaml b/python/klo-sl/sources/b2b_saas/ga4_event_params.yaml new file mode 100644 index 00000000..72ec7560 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ga4_event_params.yaml @@ -0,0 +1,23 @@ +name: ga4_event_params +table: ga4_event_params +grain: +- param_id +columns: +- name: param_id + type: number +- name: ga4_event_id + type: number +- name: key + type: string +- name: value + type: string +joins: +- to: ga4_events + 'on': ga4_event_id = ga4_events.ga4_event_id + relationship: many_to_one +- to: email_events + 'on': ga4_event_id = email_events.event_id + relationship: many_to_one +- to: web_events + 'on': ga4_event_id = web_events.event_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/ga4_events.yaml b/python/klo-sl/sources/b2b_saas/ga4_events.yaml new file mode 100644 index 00000000..c9d6a24f --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/ga4_events.yaml @@ -0,0 +1,25 @@ +name: ga4_events +table: ga4_events +grain: +- ga4_event_id +columns: +- name: ga4_event_id + type: number +- name: account_id + type: number +- name: event_name + type: string +- name: event_time + type: time + role: time +- name: session_id + type: number +- name: user_id + type: number +joins: +- to: web_sessions + 'on': session_id = web_sessions.session_id + relationship: many_to_one +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/gl_accounts.yaml b/python/klo-sl/sources/b2b_saas/gl_accounts.yaml new file mode 100644 index 00000000..08806283 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/gl_accounts.yaml @@ -0,0 +1,13 @@ +name: gl_accounts +table: gl_accounts +grain: +- gl_account_id +columns: +- name: gl_account_id + type: number +- name: account_code + type: string +- name: name + type: string +- name: type + type: string diff --git a/python/klo-sl/sources/b2b_saas/identities.yaml b/python/klo-sl/sources/b2b_saas/identities.yaml new file mode 100644 index 00000000..bae5f96d --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/identities.yaml @@ -0,0 +1,22 @@ +name: identities +table: identities +grain: +- identity_id +columns: +- name: identity_id + type: number +- name: account_id + type: number +- name: created_at + type: time + role: time +- name: device_id + type: number +- name: email + type: string +- name: user_id + type: number +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/identity_links.yaml b/python/klo-sl/sources/b2b_saas/identity_links.yaml new file mode 100644 index 00000000..c7ea481f --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/identity_links.yaml @@ -0,0 +1,25 @@ +name: identity_links +table: identity_links +grain: +- link_id +columns: +- name: link_id + type: number +- name: child_identity_id + type: number +- name: linked_at + type: time + role: time +- name: link_source + type: string +- name: parent_identity_id + type: number +joins: +- to: identities + 'on': child_identity_id = identities.identity_id + relationship: many_to_one + alias: identities_1 +- to: identities + 'on': parent_identity_id = identities.identity_id + relationship: many_to_one + alias: identities_2 diff --git a/python/klo-sl/sources/b2b_saas/invoice_lines.yaml b/python/klo-sl/sources/b2b_saas/invoice_lines.yaml new file mode 100644 index 00000000..e9e03839 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/invoice_lines.yaml @@ -0,0 +1,24 @@ +name: invoice_lines +table: invoice_lines +grain: +- invoice_line_id +columns: +- name: invoice_line_id + type: number +- name: amount + type: number +- name: invoice_id + type: number +- name: product_id + type: number +- name: quantity + type: string +- name: unit_price + type: number +joins: +- to: products + 'on': product_id = products.product_id + relationship: many_to_one +- to: invoices + 'on': invoice_id = invoices.invoice_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/invoices.yaml b/python/klo-sl/sources/b2b_saas/invoices.yaml new file mode 100644 index 00000000..1e55eca5 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/invoices.yaml @@ -0,0 +1,28 @@ +name: invoices +table: invoices +grain: +- invoice_id +columns: +- name: invoice_id + type: number +- name: account_id + type: number +- name: contract_id + type: number +- name: currency + type: string +- name: due_date + type: time + role: time +- name: invoice_date + type: time + role: time +- name: status + type: string +joins: +- to: contracts + 'on': contract_id = contracts.contract_id + relationship: many_to_one +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/journal_entries.yaml b/python/klo-sl/sources/b2b_saas/journal_entries.yaml new file mode 100644 index 00000000..8b347777 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/journal_entries.yaml @@ -0,0 +1,12 @@ +name: journal_entries +table: journal_entries +grain: +- journal_entry_id +columns: +- name: journal_entry_id + type: number +- name: entry_date + type: time + role: time +- name: memo + type: string diff --git a/python/klo-sl/sources/b2b_saas/journal_lines.yaml b/python/klo-sl/sources/b2b_saas/journal_lines.yaml new file mode 100644 index 00000000..95eae846 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/journal_lines.yaml @@ -0,0 +1,25 @@ +name: journal_lines +table: journal_lines +grain: +- journal_line_id +columns: +- name: journal_line_id + type: number +- name: amount + type: number +- name: dr_cr + type: string +- name: gl_account_id + type: number +- name: journal_entry_id + type: number +joins: +- to: gl_accounts + 'on': gl_account_id = gl_accounts.gl_account_id + relationship: many_to_one +- to: accounts + 'on': gl_account_id = accounts.account_id + relationship: many_to_one +- to: journal_entries + 'on': journal_entry_id = journal_entries.journal_entry_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/keyword_rankings.yaml b/python/klo-sl/sources/b2b_saas/keyword_rankings.yaml new file mode 100644 index 00000000..3191e35e --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/keyword_rankings.yaml @@ -0,0 +1,20 @@ +name: keyword_rankings +table: keyword_rankings +grain: +- row_id +columns: +- name: domain + type: string +- name: is_competitor + type: string +- name: keyword + type: string +- name: rank + type: string +- name: row_id + type: number +- name: search_volume + type: string +- name: stat_date + type: time + role: time diff --git a/python/klo-sl/sources/b2b_saas/lead_status_history.yaml b/python/klo-sl/sources/b2b_saas/lead_status_history.yaml new file mode 100644 index 00000000..11ae14b8 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/lead_status_history.yaml @@ -0,0 +1,18 @@ +name: lead_status_history +table: lead_status_history +grain: +- row_id +columns: +- name: changed_at + type: time + role: time +- name: lead_id + type: number +- name: row_id + type: number +- name: status + type: string +joins: +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/leads.yaml b/python/klo-sl/sources/b2b_saas/leads.yaml new file mode 100644 index 00000000..4305db0c --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/leads.yaml @@ -0,0 +1,43 @@ +name: leads +table: leads +grain: +- lead_id +columns: +- name: lead_id + type: number +- name: account_id + type: number +- name: converted_at + type: time + role: time +- name: converted_opportunity_id + type: number +- name: created_at + type: time + role: time +- name: first_touch_at + type: time + role: time +- name: last_touch_at + type: time + role: time +- name: owner_rep_id + type: number +- name: source + type: string +- name: utm_campaign + type: string +- name: utm_medium + type: string +- name: utm_source + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one +- to: sales_reps + 'on': owner_rep_id = sales_reps.rep_id + relationship: many_to_one +- to: opportunities + 'on': converted_opportunity_id = opportunities.opportunity_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/meeting_bookings.yaml b/python/klo-sl/sources/b2b_saas/meeting_bookings.yaml new file mode 100644 index 00000000..10b62b25 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/meeting_bookings.yaml @@ -0,0 +1,22 @@ +name: meeting_bookings +table: meeting_bookings +grain: +- meeting_date +columns: +- name: meeting_date + type: time +- name: meeting_id + type: number +- name: opportunity_id + type: number +- name: rep_id + type: number +- name: source + type: string +joins: +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/open_roles.yaml b/python/klo-sl/sources/b2b_saas/open_roles.yaml new file mode 100644 index 00000000..2a236ee9 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/open_roles.yaml @@ -0,0 +1,22 @@ +name: open_roles +table: open_roles +grain: +- budgeted_salary +columns: +- name: budgeted_salary + type: number +- name: dept_id + type: number +- name: opened_date + type: time + role: time +- name: req_id + type: number +- name: status + type: string +- name: title + type: string +joins: +- to: departments_hr + 'on': dept_id = departments_hr.dept_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/opportunities.yaml b/python/klo-sl/sources/b2b_saas/opportunities.yaml new file mode 100644 index 00000000..6c3f7e30 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/opportunities.yaml @@ -0,0 +1,40 @@ +name: opportunities +table: opportunities +grain: +- opportunity_id +columns: +- name: opportunity_id + type: number +- name: account_id + type: number +- name: close_date + type: time + role: time +- name: created_date + type: time + role: time +- name: currency + type: string +- name: lead_source + type: string +- name: owner_rep_id + type: number +- name: parent_opportunity_id + type: number +- name: primary_competitor + type: string +- name: region + type: string +- name: risk_reason + type: string +- name: stage + type: string +- name: type + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one +- to: sales_reps + 'on': owner_rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/opportunity_contact_roles.yaml b/python/klo-sl/sources/b2b_saas/opportunity_contact_roles.yaml new file mode 100644 index 00000000..d61cef49 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/opportunity_contact_roles.yaml @@ -0,0 +1,20 @@ +name: opportunity_contact_roles +table: opportunity_contact_roles +grain: +- contact_id +columns: +- name: contact_id + type: number +- name: ocr_id + type: number +- name: opportunity_id + type: number +- name: role + type: string +joins: +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one +- to: contacts + 'on': contact_id = contacts.contact_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/opportunity_line_items.yaml b/python/klo-sl/sources/b2b_saas/opportunity_line_items.yaml new file mode 100644 index 00000000..8c472c00 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/opportunity_line_items.yaml @@ -0,0 +1,24 @@ +name: opportunity_line_items +table: opportunity_line_items +grain: +- discount_pct +columns: +- name: discount_pct + type: string +- name: line_item_id + type: number +- name: opportunity_id + type: number +- name: product_id + type: number +- name: quantity + type: string +- name: unit_price + type: number +joins: +- to: products + 'on': product_id = products.product_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/opportunity_stage_history.yaml b/python/klo-sl/sources/b2b_saas/opportunity_stage_history.yaml new file mode 100644 index 00000000..a2929fd2 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/opportunity_stage_history.yaml @@ -0,0 +1,21 @@ +name: opportunity_stage_history +table: opportunity_stage_history +grain: +- history_id +columns: +- name: history_id + type: number +- name: entered_at + type: time + role: time +- name: exited_at + type: time + role: time +- name: opportunity_id + type: number +- name: stage + type: string +joins: +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/payment_intents.yaml b/python/klo-sl/sources/b2b_saas/payment_intents.yaml new file mode 100644 index 00000000..69fe0252 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/payment_intents.yaml @@ -0,0 +1,22 @@ +name: payment_intents +table: payment_intents +grain: +- payment_intent_id +columns: +- name: payment_intent_id + type: number +- name: amount + type: number +- name: created_at + type: time + role: time +- name: currency + type: string +- name: invoice_id + type: number +- name: status + type: string +joins: +- to: invoices + 'on': invoice_id = invoices.invoice_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/payments.yaml b/python/klo-sl/sources/b2b_saas/payments.yaml new file mode 100644 index 00000000..63a08a1d --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/payments.yaml @@ -0,0 +1,20 @@ +name: payments +table: payments +grain: +- payment_id +columns: +- name: payment_id + type: number +- name: amount + type: number +- name: invoice_id + type: number +- name: method + type: string +- name: payment_date + type: time + role: time +joins: +- to: invoices + 'on': invoice_id = invoices.invoice_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/payroll_runs.yaml b/python/klo-sl/sources/b2b_saas/payroll_runs.yaml new file mode 100644 index 00000000..e6197124 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/payroll_runs.yaml @@ -0,0 +1,17 @@ +name: payroll_runs +table: payroll_runs +grain: +- run_id +columns: +- name: run_id + type: number +- name: benefits + type: number +- name: gross_pay + type: string +- name: pay_period_end + type: string +- name: pay_period_start + type: string +- name: taxes + type: string diff --git a/python/klo-sl/sources/b2b_saas/pricebook_entries.yaml b/python/klo-sl/sources/b2b_saas/pricebook_entries.yaml new file mode 100644 index 00000000..ca910432 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/pricebook_entries.yaml @@ -0,0 +1,20 @@ +name: pricebook_entries +table: pricebook_entries +grain: +- pricebook_entry_id +columns: +- name: pricebook_entry_id + type: number +- name: list_price + type: number +- name: pricebook_id + type: number +- name: product_id + type: number +joins: +- to: products + 'on': product_id = products.product_id + relationship: many_to_one +- to: pricebooks + 'on': pricebook_id = pricebooks.pricebook_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/pricebooks.yaml b/python/klo-sl/sources/b2b_saas/pricebooks.yaml new file mode 100644 index 00000000..ae5717cc --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/pricebooks.yaml @@ -0,0 +1,13 @@ +name: pricebooks +table: pricebooks +grain: +- pricebook_id +columns: +- name: pricebook_id + type: number +- name: currency + type: string +- name: name + type: string +- name: region + type: string diff --git a/python/klo-sl/sources/b2b_saas/product_costs.yaml b/python/klo-sl/sources/b2b_saas/product_costs.yaml new file mode 100644 index 00000000..6605ce5d --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/product_costs.yaml @@ -0,0 +1,15 @@ +name: product_costs +table: product_costs +grain: +- cogs_per_unit +columns: +- name: cogs_per_unit + type: number +- name: product_id + type: number +- name: region + type: string +joins: +- to: products + 'on': product_id = products.product_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/product_usage.yaml b/python/klo-sl/sources/b2b_saas/product_usage.yaml new file mode 100644 index 00000000..7afd3438 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/product_usage.yaml @@ -0,0 +1,20 @@ +name: product_usage +table: product_usage +grain: +- usage_id +columns: +- name: usage_id + type: number +- name: account_id + type: number +- name: active_users + type: string +- name: events_count + type: string +- name: usage_date + type: time + role: time +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/products.yaml b/python/klo-sl/sources/b2b_saas/products.yaml new file mode 100644 index 00000000..f154e608 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/products.yaml @@ -0,0 +1,13 @@ +name: products +table: products +grain: +- product_id +columns: +- name: product_id + type: number +- name: list_price + type: number +- name: product_name + type: string +- name: sku + type: string diff --git a/python/klo-sl/sources/b2b_saas/quotas.yaml b/python/klo-sl/sources/b2b_saas/quotas.yaml new file mode 100644 index 00000000..243e9d87 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/quotas.yaml @@ -0,0 +1,19 @@ +name: quotas +table: quotas +grain: +- quota_id +columns: +- name: quota_id + type: number +- name: period_end + type: string +- name: period_start + type: string +- name: quota_arr + type: number +- name: rep_id + type: number +joins: +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/quote_line_items.yaml b/python/klo-sl/sources/b2b_saas/quote_line_items.yaml new file mode 100644 index 00000000..dc350157 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/quote_line_items.yaml @@ -0,0 +1,24 @@ +name: quote_line_items +table: quote_line_items +grain: +- quote_line_item_id +columns: +- name: quote_line_item_id + type: number +- name: discount_pct + type: string +- name: product_id + type: number +- name: quantity + type: string +- name: quote_id + type: number +- name: unit_price + type: number +joins: +- to: quotes + 'on': quote_id = quotes.quote_id + relationship: many_to_one +- to: products + 'on': product_id = products.product_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/quotes.yaml b/python/klo-sl/sources/b2b_saas/quotes.yaml new file mode 100644 index 00000000..7abccafa --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/quotes.yaml @@ -0,0 +1,28 @@ +name: quotes +table: quotes +grain: +- quote_id +columns: +- name: quote_id + type: number +- name: created_at + type: time + role: time +- name: opportunity_id + type: number +- name: pricebook_id + type: number +- name: rep_id + type: number +- name: status + type: string +joins: +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one +- to: pricebooks + 'on': pricebook_id = pricebooks.pricebook_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/refunds.yaml b/python/klo-sl/sources/b2b_saas/refunds.yaml new file mode 100644 index 00000000..82a66bf5 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/refunds.yaml @@ -0,0 +1,20 @@ +name: refunds +table: refunds +grain: +- refund_id +columns: +- name: refund_id + type: number +- name: amount + type: number +- name: charge_id + type: number +- name: created_at + type: time + role: time +- name: reason + type: string +joins: +- to: charges + 'on': charge_id = charges.charge_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/revenue_schedules.yaml b/python/klo-sl/sources/b2b_saas/revenue_schedules.yaml new file mode 100644 index 00000000..c07d8b52 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/revenue_schedules.yaml @@ -0,0 +1,28 @@ +name: revenue_schedules +table: revenue_schedules +grain: +- schedule_id +columns: +- name: schedule_id + type: number +- name: account_id + type: number +- name: amount + type: number +- name: contract_id + type: number +- name: end_date + type: time + role: time +- name: recognition_rule + type: string +- name: start_date + type: time + role: time +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one +- to: contracts + 'on': contract_id = contracts.contract_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/reverse_etl_jobs.yaml b/python/klo-sl/sources/b2b_saas/reverse_etl_jobs.yaml new file mode 100644 index 00000000..8af96580 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/reverse_etl_jobs.yaml @@ -0,0 +1,16 @@ +name: reverse_etl_jobs +table: reverse_etl_jobs +grain: +- job_id +columns: +- name: job_id + type: number +- name: last_run_at + type: time + role: time +- name: last_status + type: string +- name: name + type: string +- name: target_system + type: string diff --git a/python/klo-sl/sources/b2b_saas/sales_reps.yaml b/python/klo-sl/sources/b2b_saas/sales_reps.yaml new file mode 100644 index 00000000..b9ca3047 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/sales_reps.yaml @@ -0,0 +1,27 @@ +name: sales_reps +table: sales_reps +grain: +- rep_id +columns: +- name: rep_id + type: number +- name: email + type: string +- name: first_name + type: string +- name: last_name + type: string +- name: manager_rep_id + type: number +- name: region + type: string +- name: role + type: string +- name: segment_focus + type: string +- name: team_id + type: number +joins: +- to: sales_teams + 'on': team_id = sales_teams.team_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/sales_teams.yaml b/python/klo-sl/sources/b2b_saas/sales_teams.yaml new file mode 100644 index 00000000..4daf4d6f --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/sales_teams.yaml @@ -0,0 +1,11 @@ +name: sales_teams +table: sales_teams +grain: +- team_id +columns: +- name: team_id + type: number +- name: region + type: string +- name: team_name + type: string diff --git a/python/klo-sl/sources/b2b_saas/search_console_stats.yaml b/python/klo-sl/sources/b2b_saas/search_console_stats.yaml new file mode 100644 index 00000000..1db12af4 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/search_console_stats.yaml @@ -0,0 +1,20 @@ +name: search_console_stats +table: search_console_stats +grain: +- row_id +columns: +- name: clicks + type: number +- name: impressions + type: number +- name: page + type: string +- name: position + type: number +- name: query + type: string +- name: row_id + type: number +- name: stat_date + type: time + role: time diff --git a/python/klo-sl/sources/b2b_saas/sequence_enrollments.yaml b/python/klo-sl/sources/b2b_saas/sequence_enrollments.yaml new file mode 100644 index 00000000..8dfbd356 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/sequence_enrollments.yaml @@ -0,0 +1,28 @@ +name: sequence_enrollments +table: sequence_enrollments +grain: +- enrollment_id +columns: +- name: enrollment_id + type: number +- name: enrolled_at + type: time + role: time +- name: lead_id + type: number +- name: rep_id + type: number +- name: sequence_id + type: number +- name: status + type: string +joins: +- to: sequences + 'on': sequence_id = sequences.sequence_id + relationship: many_to_one +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/sequence_steps.yaml b/python/klo-sl/sources/b2b_saas/sequence_steps.yaml new file mode 100644 index 00000000..6322dd1a --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/sequence_steps.yaml @@ -0,0 +1,21 @@ +name: sequence_steps +table: sequence_steps +grain: +- step_id +columns: +- name: step_id + type: number +- name: content + type: string +- name: offset_days + type: string +- name: sequence_id + type: number +- name: step_order + type: string +- name: step_type + type: string +joins: +- to: sequences + 'on': sequence_id = sequences.sequence_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/sequence_touches.yaml b/python/klo-sl/sources/b2b_saas/sequence_touches.yaml new file mode 100644 index 00000000..d38c6444 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/sequence_touches.yaml @@ -0,0 +1,25 @@ +name: sequence_touches +table: sequence_touches +grain: +- touch_id +columns: +- name: touch_id + type: number +- name: enrollment_id + type: number +- name: rep_id + type: number +- name: status + type: string +- name: touch_date + type: time + role: time +- name: touch_type + type: string +joins: +- to: sequence_enrollments + 'on': enrollment_id = sequence_enrollments.enrollment_id + relationship: many_to_one +- to: sales_reps + 'on': rep_id = sales_reps.rep_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/sequences.yaml b/python/klo-sl/sources/b2b_saas/sequences.yaml new file mode 100644 index 00000000..e9c147fd --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/sequences.yaml @@ -0,0 +1,14 @@ +name: sequences +table: sequences +grain: +- sequence_id +columns: +- name: sequence_id + type: number +- name: channel + type: string +- name: created_at + type: time + role: time +- name: name + type: string diff --git a/python/klo-sl/sources/b2b_saas/stage_weights.yaml b/python/klo-sl/sources/b2b_saas/stage_weights.yaml new file mode 100644 index 00000000..15b67a51 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/stage_weights.yaml @@ -0,0 +1,9 @@ +name: stage_weights +table: stage_weights +grain: +- stage +columns: +- name: stage + type: string +- name: weight + type: string diff --git a/python/klo-sl/sources/b2b_saas/subscription_items.yaml b/python/klo-sl/sources/b2b_saas/subscription_items.yaml new file mode 100644 index 00000000..07d1745a --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/subscription_items.yaml @@ -0,0 +1,18 @@ +name: subscription_items +table: subscription_items +grain: +- product_id +columns: +- name: product_id + type: number +- name: sub_item_id + type: number +- name: subscription_id + type: number +joins: +- to: products + 'on': product_id = products.product_id + relationship: many_to_one +- to: subscriptions + 'on': subscription_id = subscriptions.subscription_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/subscriptions.yaml b/python/klo-sl/sources/b2b_saas/subscriptions.yaml new file mode 100644 index 00000000..8a746983 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/subscriptions.yaml @@ -0,0 +1,24 @@ +name: subscriptions +table: subscriptions +grain: +- subscription_id +columns: +- name: subscription_id + type: number +- name: account_id + type: number +- name: canceled_at + type: time + role: time +- name: churn_reason + type: string +- name: end_date + type: time + role: time +- name: start_date + type: time + role: time +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/support_tickets.yaml b/python/klo-sl/sources/b2b_saas/support_tickets.yaml new file mode 100644 index 00000000..43b11792 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/support_tickets.yaml @@ -0,0 +1,31 @@ +name: support_tickets +table: support_tickets +grain: +- ticket_id +columns: +- name: ticket_id + type: number +- name: account_id + type: number +- name: closed_at + type: time + role: time +- name: created_at + type: time + role: time +- name: first_response_at + type: time + role: time +- name: resolved_at + type: time + role: time +- name: severity + type: string +- name: sla_breached + type: string +- name: status + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/target_accounts.yaml b/python/klo-sl/sources/b2b_saas/target_accounts.yaml new file mode 100644 index 00000000..7ad9b832 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/target_accounts.yaml @@ -0,0 +1,16 @@ +name: target_accounts +table: target_accounts +grain: +- account_id +columns: +- name: account_id + type: number +- name: start_date + type: time + role: time +- name: target_tier + type: string +joins: +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/touchpoints.yaml b/python/klo-sl/sources/b2b_saas/touchpoints.yaml new file mode 100644 index 00000000..39fdd07a --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/touchpoints.yaml @@ -0,0 +1,34 @@ +name: touchpoints +table: touchpoints +grain: +- touchpoint_id +columns: +- name: touchpoint_id + type: number +- name: account_id + type: number +- name: channel + type: string +- name: lead_id + type: number +- name: occurred_at + type: time + role: time +- name: opportunity_id + type: number +- name: source_id + type: number +- name: source_object + type: string +- name: subchannel + type: string +joins: +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one +- to: opportunities + 'on': opportunity_id = opportunities.opportunity_id + relationship: many_to_one +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/vendors.yaml b/python/klo-sl/sources/b2b_saas/vendors.yaml new file mode 100644 index 00000000..5947e63e --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/vendors.yaml @@ -0,0 +1,11 @@ +name: vendors +table: vendors +grain: +- vendor_id +columns: +- name: vendor_id + type: number +- name: category + type: string +- name: vendor_name + type: string diff --git a/python/klo-sl/sources/b2b_saas/web_events.yaml b/python/klo-sl/sources/b2b_saas/web_events.yaml new file mode 100644 index 00000000..ada83f7e --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/web_events.yaml @@ -0,0 +1,22 @@ +name: web_events +table: web_events +grain: +- event_id +columns: +- name: event_id + type: number +- name: event_name + type: string +- name: event_time + type: time + role: time +- name: page + type: string +- name: session_id + type: number +- name: value + type: string +joins: +- to: web_sessions + 'on': session_id = web_sessions.session_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/web_sessions.yaml b/python/klo-sl/sources/b2b_saas/web_sessions.yaml new file mode 100644 index 00000000..80696890 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/web_sessions.yaml @@ -0,0 +1,30 @@ +name: web_sessions +table: web_sessions +grain: +- session_id +columns: +- name: session_id + type: number +- name: account_id + type: number +- name: landing_page + type: string +- name: lead_id + type: number +- name: session_start + type: string +- name: utm_campaign + type: string +- name: utm_medium + type: string +- name: utm_source + type: string +- name: visitor_id + type: number +joins: +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one +- to: accounts + 'on': account_id = accounts.account_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/webinar_attendance.yaml b/python/klo-sl/sources/b2b_saas/webinar_attendance.yaml new file mode 100644 index 00000000..dcc382ba --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/webinar_attendance.yaml @@ -0,0 +1,22 @@ +name: webinar_attendance +table: webinar_attendance +grain: +- attendance_id +columns: +- name: attendance_id + type: number +- name: attended + type: string +- name: duration_minutes + type: number +- name: lead_id + type: number +- name: webinar_id + type: number +joins: +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one +- to: webinars + 'on': webinar_id = webinars.webinar_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/webinar_registrations.yaml b/python/klo-sl/sources/b2b_saas/webinar_registrations.yaml new file mode 100644 index 00000000..a6c35023 --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/webinar_registrations.yaml @@ -0,0 +1,21 @@ +name: webinar_registrations +table: webinar_registrations +grain: +- registration_id +columns: +- name: registration_id + type: number +- name: lead_id + type: number +- name: registered_at + type: time + role: time +- name: webinar_id + type: number +joins: +- to: leads + 'on': lead_id = leads.lead_id + relationship: many_to_one +- to: webinars + 'on': webinar_id = webinars.webinar_id + relationship: many_to_one diff --git a/python/klo-sl/sources/b2b_saas/webinars.yaml b/python/klo-sl/sources/b2b_saas/webinars.yaml new file mode 100644 index 00000000..b420175f --- /dev/null +++ b/python/klo-sl/sources/b2b_saas/webinars.yaml @@ -0,0 +1,19 @@ +name: webinars +table: webinars +grain: +- webinar_id +columns: +- name: webinar_id + type: number +- name: cost + type: string +- name: end_time + type: time + role: time +- name: host_platform + type: string +- name: start_time + type: time + role: time +- name: title + type: string diff --git a/python/klo-sl/sources/ecommerce/churn_risk.yaml b/python/klo-sl/sources/ecommerce/churn_risk.yaml new file mode 100644 index 00000000..32e919ed --- /dev/null +++ b/python/klo-sl/sources/ecommerce/churn_risk.yaml @@ -0,0 +1,35 @@ +name: churn_risk +description: | + Customer churn risk score combining tenure, + usage trends, and support burden. +sql: | + SELECT + c.id AS customer_id, + c.name AS customer_name, + calculate_churn_score(c.id) AS score, + CASE + WHEN c.arr < 50000 THEN 'SMB' + WHEN c.arr < 500000 THEN 'Mid-Market' + ELSE 'Enterprise' + END AS customer_type + FROM customers c + JOIN usage_summary u ON c.id = u.customer_id + JOIN ticket_summary t ON c.id = t.customer_id +grain: [customer_id] +columns: + - name: customer_id + type: number + - name: customer_name + type: string + - name: score + type: number + - name: customer_type + type: string +joins: + - to: customers + "on": customer_id = customers.id + relationship: many_to_one +measures: + - name: avg_risk + expr: avg(score) + description: "Average churn risk score" diff --git a/python/klo-sl/sources/ecommerce/customers.yaml b/python/klo-sl/sources/ecommerce/customers.yaml new file mode 100644 index 00000000..c02c1537 --- /dev/null +++ b/python/klo-sl/sources/ecommerce/customers.yaml @@ -0,0 +1,19 @@ +name: customers +table: public.customers +grain: [id] +columns: + - name: id + type: number + - name: name + type: string + - name: segment + type: string + - name: region_id + type: number + - name: created_at + type: time + role: time +joins: + - to: regions + "on": region_id = regions.id + relationship: many_to_one diff --git a/python/klo-sl/sources/ecommerce/order_items.yaml b/python/klo-sl/sources/ecommerce/order_items.yaml new file mode 100644 index 00000000..1f4a5279 --- /dev/null +++ b/python/klo-sl/sources/ecommerce/order_items.yaml @@ -0,0 +1,21 @@ +name: order_items +table: public.order_items +grain: [id] +columns: + - name: id + type: number + - name: order_id + type: number + - name: product_id + type: number + - name: quantity + type: number + - name: price + type: number +joins: + - to: orders + "on": order_id = orders.id + relationship: many_to_one + - to: products + "on": product_id = products.id + relationship: many_to_one diff --git a/python/klo-sl/sources/ecommerce/orders.yaml b/python/klo-sl/sources/ecommerce/orders.yaml new file mode 100644 index 00000000..5e7c75b9 --- /dev/null +++ b/python/klo-sl/sources/ecommerce/orders.yaml @@ -0,0 +1,39 @@ +name: orders +table: public.orders +grain: [id] +columns: + - name: id + type: number + - name: customer_id + type: number + - name: amount + type: number + - name: cost + type: number + - name: status + type: string + - name: created_at + type: time + role: time +joins: + - to: customers + "on": customer_id = customers.id + relationship: many_to_one +measures: + - name: revenue + expr: sum(amount) + filter: "status != 'refunded'" + description: "Net revenue excluding refunds" + - name: order_count + expr: count(id) + - name: total_amount + expr: sum(amount) + description: "Total order amount across all statuses" + - name: paid_amount + expr: sum(amount) + filter: "status = 'paid'" + description: "Total amount from paid orders only" + - name: refunded_amount + expr: sum(amount) + filter: "status = 'refunded'" + description: "Total amount from refunded orders" diff --git a/python/klo-sl/sources/ecommerce/products.yaml b/python/klo-sl/sources/ecommerce/products.yaml new file mode 100644 index 00000000..21254949 --- /dev/null +++ b/python/klo-sl/sources/ecommerce/products.yaml @@ -0,0 +1,12 @@ +name: products +table: public.products +grain: [id] +columns: + - name: id + type: number + - name: name + type: string + - name: category + type: string + - name: price + type: number diff --git a/python/klo-sl/sources/ecommerce/regions.yaml b/python/klo-sl/sources/ecommerce/regions.yaml new file mode 100644 index 00000000..0fec327a --- /dev/null +++ b/python/klo-sl/sources/ecommerce/regions.yaml @@ -0,0 +1,8 @@ +name: regions +table: public.regions +grain: [id] +columns: + - name: id + type: number + - name: name + type: string diff --git a/python/klo-sl/sources/tpch/customer.yaml b/python/klo-sl/sources/tpch/customer.yaml new file mode 100644 index 00000000..24d6fc28 --- /dev/null +++ b/python/klo-sl/sources/tpch/customer.yaml @@ -0,0 +1,27 @@ +name: customer +table: public.customer +grain: [c_custkey] +columns: + - name: c_custkey + type: number + - name: c_name + type: string + - name: c_address + type: string + - name: c_nationkey + type: number + - name: c_phone + type: string + - name: c_acctbal + type: number + - name: c_mktsegment + type: string + - name: c_comment + type: string +joins: + - to: nation + "on": c_nationkey = nation.n_nationkey + relationship: many_to_one +measures: + - name: customer_count + expr: count(c_custkey) diff --git a/python/klo-sl/sources/tpch/lineitem.yaml b/python/klo-sl/sources/tpch/lineitem.yaml new file mode 100644 index 00000000..644a0803 --- /dev/null +++ b/python/klo-sl/sources/tpch/lineitem.yaml @@ -0,0 +1,69 @@ +name: lineitem +table: public.lineitem +grain: [l_orderkey, l_linenumber] +columns: + - name: l_orderkey + type: number + - name: l_partkey + type: number + - name: l_suppkey + type: number + - name: l_linenumber + type: number + - name: l_quantity + type: number + - name: l_extendedprice + type: number + - name: l_discount + type: number + - name: l_tax + type: number + - name: l_returnflag + type: string + - name: l_linestatus + type: string + - name: l_shipdate + type: time + role: time + - name: l_commitdate + type: time + - name: l_receiptdate + type: time + - name: l_shipinstruct + type: string + - name: l_shipmode + type: string + - name: l_comment + type: string +joins: + - to: orders + "on": l_orderkey = orders.o_orderkey + relationship: many_to_one + - to: part + "on": l_partkey = part.p_partkey + relationship: many_to_one + - to: supplier + "on": l_suppkey = supplier.s_suppkey + relationship: many_to_one +measures: + - name: revenue + expr: sum(l_extendedprice * (1 - l_discount)) + description: "Net revenue (TPC-H Q1 pricing)" + - name: charge + expr: sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) + description: "Charge including tax" + - name: total_quantity + expr: sum(l_quantity) + - name: avg_quantity + expr: avg(l_quantity) + - name: avg_price + expr: avg(l_extendedprice) + - name: avg_discount + expr: avg(l_discount) + - name: line_count + expr: count(l_orderkey) + description: "Count of line items" + - name: returned_revenue + expr: sum(l_extendedprice * (1 - l_discount)) + filter: "l_returnflag = 'R'" + description: "Revenue from returned items" diff --git a/python/klo-sl/sources/tpch/nation.yaml b/python/klo-sl/sources/tpch/nation.yaml new file mode 100644 index 00000000..7ac59b3e --- /dev/null +++ b/python/klo-sl/sources/tpch/nation.yaml @@ -0,0 +1,16 @@ +name: nation +table: public.nation +grain: [n_nationkey] +columns: + - name: n_nationkey + type: number + - name: n_name + type: string + - name: n_regionkey + type: number + - name: n_comment + type: string +joins: + - to: region + "on": n_regionkey = region.r_regionkey + relationship: many_to_one diff --git a/python/klo-sl/sources/tpch/orders.yaml b/python/klo-sl/sources/tpch/orders.yaml new file mode 100644 index 00000000..18714413 --- /dev/null +++ b/python/klo-sl/sources/tpch/orders.yaml @@ -0,0 +1,36 @@ +name: orders +table: public.orders +grain: [o_orderkey] +columns: + - name: o_orderkey + type: number + - name: o_custkey + type: number + - name: o_orderstatus + type: string + - name: o_totalprice + type: number + - name: o_orderdate + type: time + role: time + - name: o_orderpriority + type: string + - name: o_clerk + type: string + - name: o_shippriority + type: number + - name: o_comment + type: string +joins: + - to: customer + "on": o_custkey = customer.c_custkey + relationship: many_to_one +measures: + - name: order_count + expr: count(o_orderkey) + - name: total_price + expr: sum(o_totalprice) + description: "Total order value" + - name: avg_order_value + expr: avg(o_totalprice) + description: "Average order value" diff --git a/python/klo-sl/sources/tpch/part.yaml b/python/klo-sl/sources/tpch/part.yaml new file mode 100644 index 00000000..21f8edda --- /dev/null +++ b/python/klo-sl/sources/tpch/part.yaml @@ -0,0 +1,22 @@ +name: part +table: public.part +grain: [p_partkey] +columns: + - name: p_partkey + type: number + - name: p_name + type: string + - name: p_mfgr + type: string + - name: p_brand + type: string + - name: p_type + type: string + - name: p_size + type: number + - name: p_container + type: string + - name: p_retailprice + type: number + - name: p_comment + type: string diff --git a/python/klo-sl/sources/tpch/partsupp.yaml b/python/klo-sl/sources/tpch/partsupp.yaml new file mode 100644 index 00000000..ccb719ac --- /dev/null +++ b/python/klo-sl/sources/tpch/partsupp.yaml @@ -0,0 +1,27 @@ +name: partsupp +table: public.partsupp +grain: [ps_partkey, ps_suppkey] +columns: + - name: ps_partkey + type: number + - name: ps_suppkey + type: number + - name: ps_availqty + type: number + - name: ps_supplycost + type: number + - name: ps_comment + type: string +joins: + - to: part + "on": ps_partkey = part.p_partkey + relationship: many_to_one + - to: supplier + "on": ps_suppkey = supplier.s_suppkey + relationship: many_to_one +measures: + - name: total_supply_cost + expr: sum(ps_supplycost * ps_availqty) + description: "Total value of parts in stock" + - name: avg_supply_cost + expr: avg(ps_supplycost) diff --git a/python/klo-sl/sources/tpch/region.yaml b/python/klo-sl/sources/tpch/region.yaml new file mode 100644 index 00000000..d46d4634 --- /dev/null +++ b/python/klo-sl/sources/tpch/region.yaml @@ -0,0 +1,10 @@ +name: region +table: public.region +grain: [r_regionkey] +columns: + - name: r_regionkey + type: number + - name: r_name + type: string + - name: r_comment + type: string diff --git a/python/klo-sl/sources/tpch/supplier.yaml b/python/klo-sl/sources/tpch/supplier.yaml new file mode 100644 index 00000000..6de7c08e --- /dev/null +++ b/python/klo-sl/sources/tpch/supplier.yaml @@ -0,0 +1,22 @@ +name: supplier +table: public.supplier +grain: [s_suppkey] +columns: + - name: s_suppkey + type: number + - name: s_name + type: string + - name: s_address + type: string + - name: s_nationkey + type: number + - name: s_phone + type: string + - name: s_acctbal + type: number + - name: s_comment + type: string +joins: + - to: nation + "on": s_nationkey = nation.n_nationkey + relationship: many_to_one diff --git a/python/klo-sl/tests/__init__.py b/python/klo-sl/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/klo-sl/tests/conftest.py b/python/klo-sl/tests/conftest.py new file mode 100644 index 00000000..b4f6ba13 --- /dev/null +++ b/python/klo-sl/tests/conftest.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import tempfile +from pathlib import Path + +import pytest +import sqlglot +import yaml + +from semantic_layer.engine import SemanticEngine +from semantic_layer.loader import SourceLoader +from semantic_layer.models import SourceDefinition + +SOURCES_DIR = Path(__file__).parent.parent / "sources" / "ecommerce" +TPCH_DIR = Path(__file__).parent.parent / "sources" / "tpch" + + +@pytest.fixture +def ecommerce_sources() -> dict[str, SourceDefinition]: + loader = SourceLoader(SOURCES_DIR) + return loader.load_all() + + +@pytest.fixture +def tpch_sources() -> dict[str, SourceDefinition]: + loader = SourceLoader(TPCH_DIR) + return loader.load_all() + + +# ── Shared test helpers ────────────────────────────────────────────── + + +def make_engine( + sources_dict: dict[str, dict], dialect: str = "postgres" +) -> SemanticEngine: + """Build a SemanticEngine from inline source dicts (writes temp YAML files).""" + tmpdir = tempfile.mkdtemp() + for name, data in sources_dict.items(): + with open(Path(tmpdir) / f"{name}.yaml", "w") as f: + yaml.dump(data, f) + return SemanticEngine(tmpdir, dialect=dialect) + + +def assert_valid_sql(sql: str): + try: + sqlglot.parse(sql) + except Exception as e: + pytest.fail(f"Generated SQL is not valid: {e}\n\nSQL:\n{sql}") + + +@pytest.fixture +def make_bq_fct_orders_engine() -> SemanticEngine: + """BigQuery-dialect engine with fct_orders source mirroring the production YAML.""" + source = { + "name": "fct_orders", + "table": "analytics.fct_orders", + "grain": ["order_id"], + "columns": [ + {"name": "order_id", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "transaction_date", "type": "time"}, + ], + "segments": [ + {"name": "non_cancelled", "expr": "status != 'cancelled'"}, + { + "name": "last_30_days", + "expr": "transaction_date >= timestamp(date_sub(current_date(), interval 30 day))", + }, + ], + "measures": [ + { + "name": "daily_active_orders", + "expr": "count(distinct order_id)", + "segments": ["non_cancelled", "last_30_days"], + }, + ], + } + return make_engine({"fct_orders": source}, dialect="bigquery") + + +@pytest.fixture +def make_engine_factory(): + """Factory fixture: pass a sources-dict + dialect, get a SemanticEngine.""" + + def _make( + sources_dict: dict[str, dict], dialect: str = "postgres" + ) -> SemanticEngine: + return make_engine(sources_dict, dialect=dialect) + + return _make diff --git a/python/klo-sl/tests/test_aggregate_locality.py b/python/klo-sl/tests/test_aggregate_locality.py new file mode 100644 index 00000000..9080d608 --- /dev/null +++ b/python/klo-sl/tests/test_aggregate_locality.py @@ -0,0 +1,1735 @@ +"""Dedicated tests for aggregate locality (fan-out/chasm trap correctness).""" + +import pytest +import sqlglot + +from semantic_layer.generator import SqlGenerator +from semantic_layer.graph import JoinGraph +from semantic_layer.models import ( + JoinDeclaration, + MeasureDefinition, + SemanticQuery, + SourceColumn, + SourceDefinition, +) +from semantic_layer.planner import QueryPlanner + + +def _build_chasm_sources(): + """Build a classic chasm trap: orders and tickets both join m2o to customers.""" + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + SourceColumn(name="region", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="priority", type="string"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[MeasureDefinition(name="ticket_count", expr="count(id)")], + ) + return {"customers": customers, "orders": orders, "tickets": tickets} + + +def _build_chasm_sources_with_regions(): + """Chasm trap with an extra regions dimension table: orders/tickets -> customers -> regions.""" + regions = SourceDefinition( + name="regions", + table="public.regions", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="name", type="string"), + ], + ) + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + SourceColumn(name="region_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="regions", on="region_id = regions.id", relationship="many_to_one" + ) + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[MeasureDefinition(name="ticket_count", expr="count(id)")], + ) + return { + "regions": regions, + "customers": customers, + "orders": orders, + "tickets": tickets, + } + + +def _make_engine(sources): + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + generator = SqlGenerator(dialect="postgres", alias_map=graph.alias_map) + return planner, generator, sources + + +class TestChasmTrapLocality: + def test_two_measure_sources_get_separate_ctes(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # Verify CTE structure + assert "orders_agg" in sql + assert "tickets_agg" in sql + assert sql.count("_agg") >= 2 + + # Verify FULL JOIN + assert "FULL JOIN" in sql.upper() + + # Verify COALESCE for dimension keys + assert "COALESCE" in sql.upper() + + # Verify SQL is valid + sqlglot.parse(sql) + + def test_chasm_with_multiple_dimensions(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment", "customers.region"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "orders_agg" in sql + assert "tickets_agg" in sql + # Both dimensions should appear in COALESCE + assert sql.upper().count("COALESCE") >= 2 + sqlglot.parse(sql) + + def test_pre_defined_measures_in_chasm(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["orders.revenue", "tickets.ticket_count"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "orders_agg" in sql + assert "tickets_agg" in sql + sqlglot.parse(sql) + + +class TestNoFanOut: + def test_single_source_no_ctes(self): + sources = _build_chasm_sources() + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.customer_id"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # No aggregate locality needed + assert "_agg" not in sql + assert "FULL JOIN" not in sql.upper() + sqlglot.parse(sql) + + def test_m2o_join_no_ctes(self, ecommerce_sources): + """orders → customers is m2o, no fan-out.""" + graph = JoinGraph(ecommerce_sources) + graph.build() + planner = QueryPlanner(ecommerce_sources, graph) + gen = SqlGenerator(dialect="postgres") + + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, ecommerce_sources) + + assert "_agg" not in sql + assert "FULL JOIN" not in sql.upper() + sqlglot.parse(sql) + + +class TestMultiHopDimensionInChasm: + def test_chasm_with_regions_dimension(self): + """Both CTEs must join through customers to reach regions.""" + planner, gen, sources = _make_engine(_build_chasm_sources_with_regions()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["regions.name"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "orders_agg" in sql + assert "tickets_agg" in sql + + # Both CTEs must join regions (via customers) + # Extract CTE blocks to verify each one joins regions + orders_cte_start = sql.index("orders_agg AS") + tickets_cte_start = sql.index("tickets_agg AS") + # The orders CTE ends where the tickets CTE begins + orders_cte = sql[orders_cte_start:tickets_cte_start] + tickets_cte = sql[tickets_cte_start:] + + assert "public.regions" in orders_cte or "regions" in orders_cte + assert "public.regions" in tickets_cte or "regions" in tickets_cte + assert "public.customers" in tickets_cte or "customers" in tickets_cte + + sqlglot.parse(sql) + + +class TestFanOutWithFilters: + def test_where_filter_in_chasm(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + filters=["customers.region = 'US'"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # WHERE should appear inside CTEs + assert "WHERE" in sql.upper() + assert "US" in sql + sqlglot.parse(sql) + + def test_having_filter_in_chasm(self): + """HAVING filters appear as WHERE on the outer query (no GROUP BY at outer level).""" + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + filters=["sum(orders.amount) > 10000"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "10000" in sql + + # Filter should NOT be inside any CTE + orders_cte_start = sql.index("orders_agg AS") + tickets_cte_start = sql.index("tickets_agg AS") + orders_cte = sql[orders_cte_start:tickets_cte_start] + # Find the outer SELECT (after all CTEs) + outer_select_start = sql.index( + "SELECT", tickets_cte_start + len("tickets_agg AS") + ) + + assert "HAVING" not in orders_cte.upper() + outer_query = sql[outer_select_start:] + # Outer query has no GROUP BY, so HAVING filters become WHERE + assert "WHERE" in outer_query.upper() + assert "10000" in outer_query + + def test_source_specific_where_filter_in_chasm_stays_in_relevant_cte(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + filters=["tickets.priority = 'high'"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + orders_cte_start = sql.index("orders_agg AS") + tickets_cte_start = sql.index("tickets_agg AS") + orders_cte = sql[orders_cte_start:tickets_cte_start] + filter_index = sql.index("tickets.priority = 'high'") + + assert "tickets.priority = 'high'" not in orders_cte + assert filter_index > tickets_cte_start + assert sql.count("tickets.priority = 'high'") == 1 + sqlglot.parse(sql) + + def test_locality_order_by_dimension_uses_output_alias(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + order_by=[{"field": "customers.segment", "direction": "asc"}], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "ORDER BY segment" in sql + assert "ORDER BY customers.segment" not in sql + sqlglot.parse(sql) + + sqlglot.parse(sql) + + +class TestThreeWayChasmTrap: + """Three independent measure sources → three _agg CTEs.""" + + def test_three_measure_sources(self): + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + payments = SourceDefinition( + name="payments", + table="public.payments", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = { + "customers": customers, + "orders": orders, + "tickets": tickets, + "payments": payments, + } + planner, gen, sources = _make_engine(sources) + + query = SemanticQuery( + measures=[ + "sum(orders.amount)", + "count(tickets.id)", + "sum(payments.amount)", + ], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "orders_agg" in sql + assert "tickets_agg" in sql + assert "payments_agg" in sql + assert sql.upper().count("FULL JOIN") >= 2 + assert sql.upper().count("COALESCE") >= 1 + sqlglot.parse(sql) + + +class TestChasmWithPreDefinedFilters: + """Chasm trap where a measure has a pre-defined filter (CASE WHEN inside CTE).""" + + def test_filtered_measure_in_chasm_cte(self): + sources = _build_chasm_sources() + planner, gen, sources = _make_engine(sources) + + query = SemanticQuery( + measures=["orders.revenue", "tickets.ticket_count"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "orders_agg" in sql + assert "tickets_agg" in sql + sqlglot.parse(sql) + + +class TestChasmWithTimeGranularity: + """Chasm trap with time dimension.""" + + def test_time_dimension_in_chasm(self): + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="created_at", type="time", role="time"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = {"customers": customers, "orders": orders, "tickets": tickets} + planner, gen, sources = _make_engine(sources) + + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=[{"field": "customers.created_at", "granularity": "month"}], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "orders_agg" in sql + assert "tickets_agg" in sql + assert "DATE_TRUNC" in sql.upper() + sqlglot.parse(sql) + + +class TestChasmWithDerivedMeasures: + """Chasm trap with derived measures referencing measures from different CTEs.""" + + def test_derived_across_chasm_ctes(self): + sources = _build_chasm_sources() + planner, gen, sources = _make_engine(sources) + + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "count(tickets.id)", "name": "ticket_count"}, + {"expr": "total_rev / ticket_count", "name": "rev_per_ticket"}, + ], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "orders_agg" in sql + assert "tickets_agg" in sql + assert "rev_per_ticket" in sql + sqlglot.parse(sql) + + +class TestFactSideDimensionsInChasm: + """LIMIT 1: Fact-side dimensions in chasm trap (local to one CTE only).""" + + def test_fact_side_dimension_in_chasm_raises_error(self): + """Asymmetric dim from fact_a only → raises error (would cause FULL JOIN fan-out).""" + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + fact_a = SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + SourceColumn(name="category", type="string"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + fact_b = SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + sources = {"hub": hub, "fact_a": fact_a, "fact_b": fact_b} + planner, gen, sources = _make_engine(sources) + + query = SemanticQuery( + measures=["sum(fact_a.val)", "sum(fact_b.val)"], + dimensions=["fact_a.category"], + ) + plan = planner.plan(query) + with pytest.raises(ValueError, match="Asymmetric dimension grain"): + gen.generate(plan, sources) + + def test_shared_and_local_dims_in_chasm_raises_error(self): + """hub.segment (shared) + fact_a.category (local) → raises error.""" + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + fact_a = SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + SourceColumn(name="category", type="string"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + fact_b = SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + sources = {"hub": hub, "fact_a": fact_a, "fact_b": fact_b} + planner, gen, sources = _make_engine(sources) + + query = SemanticQuery( + measures=["sum(fact_a.val)", "sum(fact_b.val)"], + dimensions=["hub.segment", "fact_a.category"], + ) + plan = planner.plan(query) + with pytest.raises(ValueError, match="Asymmetric dimension grain"): + gen.generate(plan, sources) + + +class TestHavingNotInIndividualCtes: + """LIMIT 3: HAVING clause should NOT appear inside individual CTEs in chasm trap.""" + + def test_having_not_in_individual_ctes(self): + """Verify aggregate filters are NOT inside any individual CTE but on the outer query.""" + sources = _build_chasm_sources() + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + filters=["sum(orders.amount) > 10000"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # Extract individual CTEs + orders_cte_start = sql.index("orders_agg AS") + tickets_cte_start = sql.index("tickets_agg AS") + orders_cte = sql[orders_cte_start:tickets_cte_start] + + # Find the outer SELECT (after all CTEs) + outer_select_start = sql.index( + "SELECT", tickets_cte_start + len("tickets_agg AS") + ) + tickets_cte = sql[tickets_cte_start:outer_select_start] + + # Filter should NOT appear inside either CTE + assert "HAVING" not in orders_cte.upper(), ( + f"HAVING found in orders CTE: {orders_cte}" + ) + assert "HAVING" not in tickets_cte.upper(), ( + f"HAVING found in tickets CTE: {tickets_cte}" + ) + + # Filter should appear as WHERE on the outer query (no GROUP BY at this level) + outer_query = sql[outer_select_start:] + assert "WHERE" in outer_query.upper() + assert "10000" in outer_query + + sqlglot.parse(sql) + + +class TestMeasureFilterInCTE: + """Fix 6: Measure-level filter sources must be included in CTE joins.""" + + def test_measure_filter_source_included_in_cte_joins(self): + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[ + MeasureDefinition( + name="vip_revenue", + expr="sum(amount)", + filter="customers.segment = 'VIP'", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = {"customers": customers, "orders": orders, "tickets": tickets} + planner, gen, sources = _make_engine(sources) + + query = SemanticQuery( + measures=["orders.vip_revenue", "count(tickets.id)"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # The orders_agg CTE must join customers (for the measure filter) + orders_cte_start = sql.index("orders_agg AS") + tickets_cte_start = sql.index("tickets_agg AS") + orders_cte = sql[orders_cte_start:tickets_cte_start] + assert "customers" in orders_cte.lower() + assert "VIP" in sql + sqlglot.parse(sql) + + +class TestDerivedHavingInLocality: + """Fix 7: Derived HAVING filters must appear in the outer query.""" + + def test_derived_having_filter_applied(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "count(tickets.id)", "name": "ticket_count"}, + {"expr": "total_rev / ticket_count", "name": "rev_per_ticket"}, + ], + dimensions=["customers.segment"], + filters=["rev_per_ticket > 10"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # The derived filter should appear in the outer query, inlined + # (not as a bare alias which would be invalid in WHERE) + assert "> 10" in sql + + # It should NOT be inside any CTE + orders_cte_start = sql.index("orders_agg AS") + tickets_cte_start = sql.index("tickets_agg AS") + orders_cte = sql[orders_cte_start:tickets_cte_start] + assert "rev_per_ticket" not in orders_cte + + # Find outer query (after last CTE) + outer_start = sql.index("SELECT", tickets_cte_start + len("tickets_agg AS")) + outer_query = sql[outer_start:] + assert "WHERE" in outer_query.upper() + # The derived filter is inlined (not as bare alias) for valid SQL + assert "> 10" in outer_query + sqlglot.parse(sql) + + +# ── Bug regression tests (bugs 11-17) ───────────────────────────────── + + +def _build_alias_sources(): + """orders with two aliased joins to the same customers table.""" + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="name", type="string"), + SourceColumn(name="lifetime_value", type="number"), + ], + measures=[MeasureDefinition(name="total_ltv", expr="sum(lifetime_value)")], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="billing_customer_id", type="number"), + SourceColumn(name="shipping_customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="billing_customer_id = customers.id", + relationship="many_to_one", + alias="billing_customer", + ), + JoinDeclaration( + to="customers", + on="shipping_customer_id = customers.id", + relationship="many_to_one", + alias="shipping_customer", + ), + ], + ) + return {"customers": customers, "orders": orders} + + +def _build_m2o_chain_sources(): + """churn_risk → customers via m2o — NOT a chasm trap.""" + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="name", type="string"), + SourceColumn(name="segment", type="string"), + ], + ) + churn_risk = SourceDefinition( + name="churn_risk", + sql="SELECT customer_id, score FROM ml_scores", + grain=["customer_id"], + columns=[ + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="score", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + return {"customers": customers, "churn_risk": churn_risk} + + +class TestBug11_PredefinedMeasureViaAlias: + """Predefined measures on aliased sources should resolve correctly.""" + + def test_predefined_measure_via_alias(self): + sources = _build_alias_sources() + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["billing_customer.total_ltv"], + dimensions=["shipping_customer.name"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # Should resolve the predefined measure + assert "sum" in sql.lower() + assert "lifetime_value" in sql.lower() + sqlglot.parse(sql) + + def test_runtime_aggregate_on_alias(self): + sources = _build_alias_sources() + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["billing_customer.name"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "sum(orders.amount)" in sql.lower() + sqlglot.parse(sql) + + +class TestBug12_MixedAggNonaggFilter: + """Compound filter with both aggregate and non-aggregate parts should be split.""" + + def test_mixed_filter_split(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + SourceColumn(name="category", type="string"), + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.category"], + filters=["sum(orders.amount) > 100 AND orders.status = 'active'"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # Non-aggregate part should be in WHERE, aggregate in HAVING + assert "WHERE" in sql + assert "HAVING" in sql + assert ( + "status" in sql.split("WHERE")[1].split("GROUP BY")[0] + ) # in WHERE section + assert "sum" in sql.split("HAVING")[1].lower() # in HAVING section + sqlglot.parse(sql) + + def test_separate_filters_still_work(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + SourceColumn(name="category", type="string"), + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.category"], + filters=["orders.status = 'active'", "sum(orders.amount) > 100"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "WHERE" in sql + assert "HAVING" in sql + sqlglot.parse(sql) + + +class TestBug13_FalseChasm_AliasAggregate: + """Runtime aggregate on aliased source should not trigger false chasm detection.""" + + def test_no_false_chasm_between_aliases(self): + sources = _build_alias_sources() + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["billing_customer.name", "shipping_customer.name"], + ) + plan = planner.plan(query) + assert not plan.has_fan_out, "Should not detect fan-out between alias siblings" + sql = gen.generate(plan, sources) + sqlglot.parse(sql) + + +class TestBug14_HavingDerivedCrossCTE: + """HAVING on derived cross-CTE measure must inline the expression, not use alias.""" + + def test_having_derived_inlined(self): + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "rev"}, + {"expr": "count(tickets.id)", "name": "cnt"}, + {"expr": "rev - cnt", "name": "net"}, + ], + dimensions=["customers.segment"], + filters=["net > 1000"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # The outer WHERE should NOT reference 'net' directly (that's a SELECT alias) + outer_idx = sql.rindex("SELECT") + outer = sql[outer_idx:] + where_section = outer[outer.index("WHERE") :] + # Should contain the inlined expression, not bare 'net' + assert "COALESCE" in where_section or "orders_agg" in where_section + assert "> 1000" in where_section + sqlglot.parse(sql) + + +class TestBug15_DivisionByZeroCrossCTE: + """Cross-CTE derived division should use NULLIF to prevent division by zero.""" + + def test_nullif_on_denominator(self): + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ) + returns = SourceDefinition( + name="returns", + table="public.returns", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="refund", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + measures=[MeasureDefinition(name="total_refunds", expr="sum(refund)")], + ) + sources = {"hub": hub, "orders": orders, "returns": returns} + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=[ + "orders.revenue", + "returns.total_refunds", + { + "expr": "returns.total_refunds / orders.revenue", + "name": "refund_rate", + }, + ], + dimensions=["hub.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # Denominator should be wrapped in NULLIF to prevent division by zero + assert "NULLIF" in sql + sqlglot.parse(sql) + + +class TestBug16_FalseChasmM2OChain: + """Measures from sources on the same m2o chain should not trigger chasm detection.""" + + def test_m2o_chain_no_chasm(self): + sources = _build_m2o_chain_sources() + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["avg(churn_risk.score)", "count(customers.id)"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + # Should NOT be a chasm — churn_risk → customers is m2o + assert not plan.has_fan_out, "m2o chain should not be detected as chasm trap" + sql = gen.generate(plan, sources) + + # Should be simple path, not locality CTEs + assert "_agg" not in sql + assert "FULL JOIN" not in sql + sqlglot.parse(sql) + + +class TestBug17_EmptyMeasuresSelectDistinct: + """Dimension-only queries should use SELECT DISTINCT, not GROUP BY.""" + + def test_select_distinct_no_group_by(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="status", type="string"), + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=[], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert "SELECT DISTINCT" in sql + assert "GROUP BY" not in sql + sqlglot.parse(sql) + + +class TestDerivedChain3LevelLocality: + """3-level derived measure chains in locality mode must fully inline.""" + + def _build_sources(self): + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + sales = SourceDefinition( + name="sales", + table="public.sales", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + costs = SourceDefinition( + name="costs", + table="public.costs", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="cost_amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + return {"hub": hub, "sales": sales, "costs": costs} + + def test_3_level_derived_inlines_fully(self): + """profit = revenue - cost, margin = profit / revenue — both must inline.""" + planner, gen, sources = _make_engine(self._build_sources()) + query = SemanticQuery( + measures=[ + {"expr": "sum(sales.amount)", "name": "revenue"}, + {"expr": "sum(costs.cost_amount)", "name": "total_cost"}, + {"expr": "revenue - total_cost", "name": "profit"}, + {"expr": "profit / revenue", "name": "margin"}, + ], + dimensions=["hub.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # margin must NOT reference 'profit' as a bare alias + assert "CAST(profit " not in sql + # Should reference CTE columns with COALESCE + assert "COALESCE(sales_agg.revenue" in sql + assert "COALESCE(costs_agg.total_cost" in sql + # Denominator should use NULLIF + assert "NULLIF" in sql + sqlglot.parse(sql) + + def test_having_on_3_level_derived(self): + """HAVING on margin must recursively inline profit and revenue.""" + planner, gen, sources = _make_engine(self._build_sources()) + query = SemanticQuery( + measures=[ + {"expr": "sum(sales.amount)", "name": "revenue"}, + {"expr": "sum(costs.cost_amount)", "name": "total_cost"}, + {"expr": "revenue - total_cost", "name": "profit"}, + {"expr": "profit / revenue", "name": "margin"}, + ], + dimensions=["hub.segment"], + filters=["margin > 0.1"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # WHERE clause must not reference 'profit' or 'margin' as bare aliases + where_idx = sql.upper().find("WHERE") + where_clause = sql[where_idx:] if where_idx >= 0 else "" + assert ( + "margin" not in where_clause.split("AS")[-1] + if "AS" in where_clause + else True + ) + # Must contain inlined CTE references + assert "sales_agg.revenue" in sql + assert "costs_agg.total_cost" in sql + sqlglot.parse(sql) + + +class TestDerivedWithPredefinedQualifiedRef: + """Derived measures referencing predefined by source.name must expand both.""" + + def test_mixed_bare_and_qualified_deps(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + measures=[ + MeasureDefinition( + name="revenue", + expr="sum(amount)", + filter="status != 'refunded'", + ) + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=[ + "orders.revenue", + {"expr": "sum(orders.amount)", "name": "gross"}, + {"expr": "gross - orders.revenue", "name": "refund_amount"}, + ], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + # refund_amount must inline both gross and revenue + assert ( + "orders.revenue" not in sql.split("AS refund_amount")[0].split(",")[-1] + if "AS refund_amount" in sql + else True + ) + # Should contain actual SUM expressions + assert "SUM(" in sql + assert "CASE WHEN" in sql + sqlglot.parse(sql) + + +class TestWindowFunctionRejection: + """Window functions in measures must be rejected with a clear error.""" + + def test_window_function_in_named_measure(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="created_at", type="time"), + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=[ + { + "expr": "sum(orders.amount) OVER (ORDER BY orders.created_at)", + "name": "running", + } + ], + dimensions=["orders.id"], + ) + with pytest.raises(ValueError, match="Window functions"): + planner.plan(query) + + def test_row_number_rejected(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=[ + "row_number() OVER (PARTITION BY orders.id ORDER BY orders.amount)" + ], + dimensions=["orders.id"], + ) + with pytest.raises(ValueError, match="Window functions"): + planner.plan(query) + + +class TestCompositeJoinKeySqlGeneration: + """Composite join keys generate multi-condition ON clauses.""" + + def test_composite_key_in_sql(self): + items = SourceDefinition( + name="items", + table="public.items", + grain=["order_id", "product_id"], + columns=[ + SourceColumn(name="order_id", type="number"), + SourceColumn(name="product_id", type="number"), + SourceColumn(name="warehouse_id", type="number"), + SourceColumn(name="qty", type="number"), + ], + joins=[ + JoinDeclaration( + to="inventory", + on="product_id = inventory.product_id AND warehouse_id = inventory.warehouse_id", + relationship="many_to_one", + ) + ], + ) + inv = SourceDefinition( + name="inventory", + table="public.inventory", + grain=["product_id", "warehouse_id"], + columns=[ + SourceColumn(name="product_id", type="number"), + SourceColumn(name="warehouse_id", type="number"), + SourceColumn(name="stock", type="number"), + ], + ) + planner, gen, sources = _make_engine({"items": items, "inventory": inv}) + query = SemanticQuery( + measures=["sum(items.qty)"], + dimensions=["inventory.stock"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert ( + "items.product_id = inventory.product_id" in sql + or "inventory.product_id = items.product_id" in sql + ) + assert ( + "items.warehouse_id = inventory.warehouse_id" in sql + or "inventory.warehouse_id = items.warehouse_id" in sql + ) + assert " AND " in sql + sqlglot.parse(sql) + + +class TestFilterUnreachableInLocality: + """BUG 22: Filters on sources unreachable via safe edges from all measure + sources in a chasm trap should raise an error, not be silently dropped.""" + + def _build_sources_with_details(self): + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + details = SourceDefinition( + name="details", + table="public.details", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="tag", type="string"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + fact_a = SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val_a", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + fact_b = SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val_b", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + return {"hub": hub, "details": details, "fact_a": fact_a, "fact_b": fact_b} + + def test_filter_on_unreachable_source_raises_error(self): + """Filter on 'details' (reachable only via o2m from hub) should error.""" + planner, gen, sources = _make_engine(self._build_sources_with_details()) + query = SemanticQuery( + measures=["sum(fact_a.val_a)", "sum(fact_b.val_b)"], + dimensions=["hub.segment"], + filters=["details.tag = 'important'"], + ) + with pytest.raises(ValueError, match="not reachable via many_to_one"): + planner.plan(query) + + def test_filter_on_reachable_source_works(self): + """Filter on 'hub' (reachable from both facts via m2o) should work.""" + planner, gen, sources = _make_engine(self._build_sources_with_details()) + query = SemanticQuery( + measures=["sum(fact_a.val_a)", "sum(fact_b.val_b)"], + dimensions=["hub.segment"], + filters=["hub.segment != 'x'"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + # Filter should be pushed into both CTEs + assert sql.count("hub.segment != 'x'") >= 1 + sqlglot.parse(sql) + + +class TestOrderByCaseInsensitive: + """BUG 23: ORDER BY field matching should be case-insensitive.""" + + def test_order_by_measure_different_case(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["orders.revenue"], + dimensions=["orders.status"], + order_by=[{"field": "Revenue", "direction": "desc"}], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + assert "ORDER BY revenue DESC" in sql + sqlglot.parse(sql) + + def test_order_by_dimension_different_case(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=["orders.revenue"], + dimensions=["orders.status"], + order_by=[{"field": "STATUS", "direction": "asc"}], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + assert "ORDER BY" in sql + sqlglot.parse(sql) + + +class TestMultiSourceMeasureInLocality: + """BUG 24: Non-derived measures referencing sources from different measure + groups should be rejected, not silently placed in one CTE.""" + + def test_multi_source_aggregate_raises_error(self): + sources = { + "hub": SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ), + "fact_a": SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ), + "fact_b": SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=[ + "sum(fact_a.val)", + "sum(fact_b.val)", + {"expr": "sum(fact_a.val) / count(fact_b.val)", "name": "ratio"}, + ], + dimensions=["hub.segment"], + ) + with pytest.raises(ValueError, match="references multiple independent sources"): + planner.plan(query) + + def test_decomposed_cross_source_works(self): + """When decomposed into separate measures + derived, should work.""" + sources = { + "hub": SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ), + "fact_a": SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ), + "fact_b": SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ), + } + planner, gen, sources = _make_engine(sources) + query = SemanticQuery( + measures=[ + {"expr": "sum(fact_a.val)", "name": "total_a"}, + {"expr": "count(fact_b.val)", "name": "count_b"}, + {"expr": "total_a / count_b", "name": "ratio"}, + ], + dimensions=["hub.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + # Derived measure should be in outer SELECT, not in CTE + assert "ratio" in sql + assert "COALESCE" in sql or "NULLIF" in sql + sqlglot.parse(sql) + + +# ── 12.4 Fix: Multi-source measure expressions in CTE builder ─────── + + +class TestMultiSourceMeasureExprInLocality: + """Verify that _collect_cte_target_sources includes sources from + measure expressions, not just dimensions and filters.""" + + @staticmethod + def _sources(): + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + SourceColumn(name="weight", type="number"), + ], + ) + fact_a = SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + return {"hub": hub, "fact_a": fact_a} + + def test_measure_referencing_secondary_source_generates_join(self): + """sum(fact_a.val * hub.weight) should JOIN hub inside the CTE.""" + planner, gen, sources = _make_engine(self._sources()) + query = SemanticQuery( + measures=["sum(fact_a.val * hub.weight)"], + dimensions=["hub.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + # The CTE must join hub to compute the measure expression + assert "JOIN" in sql + assert "hub.weight" in sql or "hub" in sql + sqlglot.parse(sql) + + +# ── 12.10 Fix: HAVING count(x) = 0 in aggregate locality ──────────── + + +class TestHavingCountZeroInLocality: + """COALESCE wrapping in HAVING filters for multi-CTE FULL JOIN mode.""" + + def test_count_zero_filter_uses_coalesce(self): + """count(tickets.id) = 0 should use COALESCE so NULL → 0.""" + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + filters=["count(tickets.id) = 0"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + assert "COALESCE" in sql + # The filter should match NULL rows (segments with no tickets) + assert "= 0" in sql + sqlglot.parse(sql) + + def test_sum_gt_filter_uses_coalesce(self): + """sum(orders.amount) > 1000 with COALESCE: NULL → 0 > 1000 → false (correct).""" + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + filters=["sum(orders.amount) > 1000"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + assert "COALESCE" in sql + assert "> 1000" in sql + sqlglot.parse(sql) + + def test_single_group_no_coalesce_in_having(self): + """Single measure group → no FULL JOIN → no COALESCE needed.""" + planner, gen, sources = _make_engine(_build_chasm_sources()) + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["customers.segment"], + filters=["sum(orders.amount) > 100"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + # Single-CTE locality: no COALESCE in HAVING filter + assert "HAVING" in sql.upper() + assert "COALESCE" not in sql.split("HAVING")[1] if "HAVING" in sql else True + sqlglot.parse(sql) diff --git a/python/klo-sl/tests/test_cli.py b/python/klo-sl/tests/test_cli.py new file mode 100644 index 00000000..c78eb1e4 --- /dev/null +++ b/python/klo-sl/tests/test_cli.py @@ -0,0 +1,447 @@ +"""Tests for the CLI interface (semantic_layer.cli).""" + +from __future__ import annotations + +import json +from io import StringIO +from pathlib import Path +from unittest.mock import patch + +import pytest + +from semantic_layer.cli import main, print_plan +from semantic_layer.graph import JoinGraph +from semantic_layer.models import ( + JoinDeclaration, + SemanticQuery, + SourceColumn, + SourceDefinition, +) +from semantic_layer.planner import QueryPlanner + +SOURCES_DIR = str(Path(__file__).parent.parent / "sources" / "ecommerce") + + +# ── From test_edge_cases.py: TestCliParserArgs ─────────────────────── + + +class TestCliParserArgs: + def test_no_args_errors(self): + with pytest.raises(SystemExit): + main([]) + + def test_sources_only_no_query(self, capsys): + with pytest.raises(SystemExit): + main(["--sources", SOURCES_DIR]) + + def test_list_sources_no_measures_needed(self, capsys): + main(["--sources", SOURCES_DIR, "--list-sources"]) + output = capsys.readouterr().out + assert "orders" in output + assert "customers" in output + + def test_plan_only_mode(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ), + "--plan-only", + ] + ) + output = capsys.readouterr().out + assert "Resolved Plan" in output + assert "Anchor" in output + + def test_plan_and_sql(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ), + "--plan", + ] + ) + output = capsys.readouterr().out + assert "Resolved Plan" in output + assert "SELECT" in output + + def test_compact_mode(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ), + "--compact", + ] + ) + output = capsys.readouterr().out + assert "SELECT" in output + assert "-- dialect:" not in output + + def test_json_input(self, capsys): + query_json = json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + with patch("sys.stdin", StringIO(query_json)): + main(["--sources", SOURCES_DIR, "--json"]) + output = capsys.readouterr().out + assert "SELECT" in output + + def test_json_input_with_filters(self, capsys): + query_json = json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["orders.status = 'completed'"], + } + ) + with patch("sys.stdin", StringIO(query_json)): + main(["--sources", SOURCES_DIR, "--json"]) + output = capsys.readouterr().out + assert "completed" in output + + def test_json_input_with_order_by(self, capsys): + query_json = json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [{"field": "orders.status", "direction": "desc"}], + } + ) + with patch("sys.stdin", StringIO(query_json)): + main(["--sources", SOURCES_DIR, "--json"]) + output = capsys.readouterr().out + assert "SELECT" in output + + def test_measures_with_alias(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": [ + {"expr": "sum(orders.amount)", "name": "total_rev"} + ], + "dimensions": ["orders.status"], + } + ), + ] + ) + output = capsys.readouterr().out + assert "total_rev" in output + + def test_dimension_with_granularity_cli(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": [ + {"field": "orders.created_at", "granularity": "month"} + ], + } + ), + ] + ) + output = capsys.readouterr().out + assert "DATE_TRUNC" in output + + def test_multiple_filters_cli(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": [ + "orders.status = 'completed'", + "orders.amount > 100", + ], + } + ), + ] + ) + output = capsys.readouterr().out + assert "WHERE" in output + + def test_limit_cli(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "limit": 50, + } + ), + ] + ) + output = capsys.readouterr().out + assert "LIMIT 50" in output + + def test_dialect_cli(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ), + "--dialect", + "bigquery", + ] + ) + output = capsys.readouterr().out + assert "bigquery" in output + + +# ── From test_edge_cases.py: TestCLISuggest ────────────────────────── + + +class TestCliSuggest: + def test_suggest_valid_query(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ), + "--suggest", + ] + ) + output = capsys.readouterr().out + assert "valid" in output.lower() + + def test_suggest_invalid_query(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(nonexistent.amount)"], + "dimensions": ["orders.status"], + } + ), + "--suggest", + ] + ) + output = capsys.readouterr().out + assert "failed" in output.lower() or "Suggestion" in output + + +# ── From test_edge_cases.py: TestCLIOrderBy ────────────────────────── + + +class TestCliOrderBy: + def test_order_by_desc(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [ + {"field": "sum(orders.amount)", "direction": "desc"} + ], + } + ), + ] + ) + output = capsys.readouterr().out + assert "DESC" in output + + def test_order_by_asc(self, capsys): + main( + [ + "--sources", + SOURCES_DIR, + "-q", + json.dumps( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [{"field": "orders.status", "direction": "asc"}], + } + ), + ] + ) + output = capsys.readouterr().out + assert "ORDER BY" in output + + +# ── From test_brainstorm_cases.py: TestBrainstormCliOutput ─────────── + + +def _build_chasm_sources() -> dict[str, SourceDefinition]: + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="cost", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + return {"customers": customers, "orders": orders, "tickets": tickets} + + +def _write_sources(sources_dict: dict[str, dict]) -> str: + import tempfile + import yaml + + tmpdir = tempfile.mkdtemp() + for name, data in sources_dict.items(): + with open(Path(tmpdir) / f"{name}.yaml", "w") as f: + yaml.dump(data, f) + return tmpdir + + +class TestCliPlanOutput: + def test_print_plan_includes_join_locality_where_and_having(self, capsys): + sources = _build_chasm_sources() + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + filters=["customers.segment = 'SMB'", "sum(orders.amount) > 10000"], + ) + plan = planner.plan(query) + + print_plan(plan) + output = capsys.readouterr().out + + assert "Resolved Plan" in output + assert "Joins:" in output + assert "Locality:" in output + assert "WHERE:" in output + assert "HAVING:" in output + assert "customers.segment" in output + + def test_suggest_cli_surfaces_graph_errors(self, capsys): + tmpdir = _write_sources( + { + "a": { + "name": "a", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + }, + "b": { + "name": "b", + "table": "t2", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + }, + } + ) + + main( + [ + "--sources", + tmpdir, + "-q", + json.dumps({"measures": ["sum(a.id)"], "dimensions": ["b.val"]}), + "--suggest", + ] + ) + output = capsys.readouterr().out + + assert "Query failed:" in output + assert "Graph error:" in output + assert "Disconnected components" in output + assert "Suggestion:" in output + + def test_list_sources_includes_join_and_filtered_measure_details(self, capsys): + main(["--sources", SOURCES_DIR, "--list-sources"]) + output = capsys.readouterr().out + + assert "joins:" in output + assert "→ customers (many_to_one) on customer_id = customers.id" in output + assert "revenue: sum(amount) (filter: status != 'refunded')" in output diff --git a/python/klo-sl/tests/test_computed_columns.py b/python/klo-sl/tests/test_computed_columns.py new file mode 100644 index 00000000..f3ee773d --- /dev/null +++ b/python/klo-sl/tests/test_computed_columns.py @@ -0,0 +1,313 @@ +"""Tests for computed column (expr) support on table sources.""" + +from __future__ import annotations + + +from semantic_layer.models import SourceColumn + +from .conftest import assert_valid_sql, make_engine + + +def _lineitem_source(**overrides): + base = { + "name": "lineitem", + "table": "public.lineitem", + "grain": ["l_orderkey", "l_linenumber"], + "columns": [ + {"name": "l_orderkey", "type": "number"}, + {"name": "l_linenumber", "type": "number"}, + {"name": "l_extendedprice", "type": "number"}, + {"name": "l_discount", "type": "number"}, + {"name": "l_quantity", "type": "number"}, + {"name": "l_returnflag", "type": "string"}, + { + "name": "net_price", + "type": "number", + "expr": "l_extendedprice * (1 - l_discount)", + }, + ], + } + base.update(overrides) + return base + + +class TestComputedColumnDimension: + def test_computed_column_in_select_and_group_by(self): + engine = make_engine({"lineitem": _lineitem_source()}) + result = engine.query( + { + "measures": ["sum(lineitem.l_quantity)"], + "dimensions": ["lineitem.net_price"], + } + ) + assert_valid_sql(result.sql) + assert "l_extendedprice" in result.sql + assert "l_discount" in result.sql + assert "AS net_price" in result.sql + + def test_date_trunc_on_computed_column(self): + engine = make_engine( + { + "events": { + "name": "events", + "table": "public.events", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "created_at", "type": "time", "role": "time"}, + {"name": "offset_hours", "type": "number"}, + { + "name": "local_time", + "type": "time", + "role": "time", + "expr": "created_at + offset_hours * INTERVAL '1 hour'", + }, + {"name": "value", "type": "number"}, + ], + } + } + ) + result = engine.query( + { + "measures": ["sum(events.value)"], + "dimensions": [{"field": "events.local_time", "granularity": "month"}], + } + ) + assert_valid_sql(result.sql) + assert "DATE_TRUNC" in result.sql + assert "created_at" in result.sql + assert "offset_hours" in result.sql + + +class TestComputedColumnInMeasure: + def test_runtime_aggregate_on_computed_column(self): + engine = make_engine({"lineitem": _lineitem_source()}) + result = engine.query( + { + "measures": ["sum(lineitem.net_price)"], + "dimensions": [], + } + ) + assert_valid_sql(result.sql) + assert "SUM" in result.sql.upper() + assert "l_extendedprice" in result.sql + assert "l_discount" in result.sql + + def test_predefined_measure_referencing_computed_column(self): + source = _lineitem_source( + measures=[ + {"name": "total_net", "expr": "sum(net_price)"}, + ] + ) + engine = make_engine({"lineitem": source}) + result = engine.query( + { + "measures": ["lineitem.total_net"], + "dimensions": [], + } + ) + assert_valid_sql(result.sql) + assert "l_extendedprice" in result.sql + assert "l_discount" in result.sql + + +class TestComputedColumnInFilter: + def test_computed_column_in_where_filter(self): + engine = make_engine({"lineitem": _lineitem_source()}) + result = engine.query( + { + "measures": ["sum(lineitem.l_extendedprice)"], + "dimensions": [], + "filters": ["lineitem.net_price > 100"], + } + ) + assert_valid_sql(result.sql) + assert "WHERE" in result.sql + assert "l_extendedprice" in result.sql + assert "l_discount" in result.sql + + +class TestComputedColumnWithJoins: + def test_join_on_uses_physical_columns(self): + engine = make_engine( + { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "discount", "type": "number"}, + { + "name": "net_amount", + "type": "number", + "expr": "amount * (1 - discount)", + }, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + } + ) + result = engine.query( + { + "measures": ["sum(orders.net_amount)"], + "dimensions": ["customers.segment"], + } + ) + assert_valid_sql(result.sql) + # JOIN ON should use physical columns + assert "orders.customer_id" in result.sql + assert "customers.id" in result.sql + # Measure should be expanded + assert "orders.amount" in result.sql + assert "orders.discount" in result.sql + + +class TestComputedColumnLocality: + def test_computed_column_in_aggregate_locality(self): + engine = make_engine( + { + "hub": { + "name": "hub", + "table": "public.hub", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "fact_a": { + "name": "fact_a", + "table": "public.fact_a", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "price", "type": "number"}, + {"name": "qty", "type": "number"}, + { + "name": "total", + "type": "number", + "expr": "price * qty", + }, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + }, + "fact_b": { + "name": "fact_b", + "table": "public.fact_b", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + }, + } + ) + result = engine.query( + { + "measures": ["sum(fact_a.total)", "sum(fact_b.val)"], + "dimensions": ["hub.segment"], + } + ) + assert_valid_sql(result.sql) + assert "_agg" in result.sql + assert "fact_a.price" in result.sql + assert "fact_a.qty" in result.sql + + +class TestComputedColumnModel: + def test_source_column_with_expr(self): + col = SourceColumn( + name="net_price", type="number", expr="price * (1 - discount)" + ) + assert col.expr == "price * (1 - discount)" + + def test_source_column_without_expr(self): + col = SourceColumn(name="price", type="number") + assert col.expr is None + + def test_source_column_expr_in_yaml_roundtrip(self): + engine = make_engine( + { + "t": { + "name": "t", + "table": "public.t", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "a", "type": "number"}, + {"name": "b", "type": "number"}, + { + "name": "c", + "type": "number", + "expr": "a + b", + }, + ], + } + } + ) + src = engine.sources["t"] + c_col = next(c for c in src.columns if c.name == "c") + assert c_col.expr == "a + b" + + +def test_bigquery_computed_column_with_timestamp_add(make_engine_factory): + """Computed column authored with BigQuery-native TIMESTAMP_ADD must survive.""" + source = { + "name": "events", + "table": "events", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "event_at", "type": "time"}, + {"name": "tz_offset", "type": "number"}, + { + "name": "local_hour", + "type": "time", + "expr": "TIMESTAMP_ADD(event_at, INTERVAL tz_offset HOUR)", + }, + ], + "measures": [{"name": "cnt", "expr": "count(*)"}], + } + engine = make_engine_factory({"events": source}, dialect="bigquery") + result = engine.query( + { + "measures": ["events.cnt"], + "dimensions": ["events.local_hour"], + "filters": [], + } + ) + assert "TIMESTAMP_ADD" in result.sql.upper() + assert "HOUR" in result.sql.upper() diff --git a/python/klo-sl/tests/test_corner_case_regressions.py b/python/klo-sl/tests/test_corner_case_regressions.py new file mode 100644 index 00000000..cb99d446 --- /dev/null +++ b/python/klo-sl/tests/test_corner_case_regressions.py @@ -0,0 +1,288 @@ +from __future__ import annotations + +from conftest import assert_valid_sql, make_engine + + +def _duplicate_predefined_sources() -> dict[str, dict]: + return { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + "measures": [{"name": "revenue", "expr": "sum(amount)"}], + }, + "refunds": { + "name": "refunds", + "table": "public.refunds", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + "measures": [{"name": "revenue", "expr": "sum(amount)"}], + }, + } + + +def _include_empty_sources() -> dict[str, dict]: + return { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + + +def _alias_measure_sources() -> dict[str, dict]: + return { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "lifetime_value", "type": "number"}, + ], + "measures": [{"name": "total_ltv", "expr": "sum(lifetime_value)"}], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "billing_customer_id", "type": "number"}, + {"name": "status", "type": "string"}, + ], + "joins": [ + { + "to": "customers", + "on": "billing_customer_id = customers.id", + "relationship": "many_to_one", + "alias": "billing_customer", + } + ], + }, + } + + +def test_duplicate_predefined_names_stay_distinct_in_derived_measure(): + engine = make_engine(_duplicate_predefined_sources()) + result = engine.query( + { + "measures": [ + "orders.revenue", + "refunds.revenue", + {"expr": "orders.revenue - refunds.revenue", "name": "net"}, + ], + "dimensions": ["customers.segment"], + } + ) + + assert result.resolved_plan.has_fan_out + assert "orders_agg.orders_revenue" in result.sql + assert "refunds_agg.refunds_revenue" in result.sql + assert "revenue - revenue" not in result.sql + assert_valid_sql(result.sql) + + +def test_duplicate_predefined_names_expand_having_filters_in_locality_mode(): + engine = make_engine(_duplicate_predefined_sources()) + result = engine.query( + { + "measures": ["orders.revenue", "refunds.revenue"], + "dimensions": ["customers.segment"], + "filters": ["orders.revenue > 100"], + } + ) + + # In multi-CTE mode, HAVING refs are wrapped in COALESCE for FULL JOIN NULL safety + assert "WHERE COALESCE(orders_agg.orders_revenue, 0) > 100" in result.sql + assert_valid_sql(result.sql) + + +def test_include_empty_anchors_the_dimension_side(): + engine = make_engine(_include_empty_sources()) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": True, + } + ) + + assert result.resolved_plan.anchor_source == "customers" + assert "FROM public.customers AS customers" in result.sql + assert "LEFT JOIN public.orders AS orders" in result.sql + assert_valid_sql(result.sql) + + +def test_cross_grain_measures_on_same_chain_use_aggregate_locality(): + engine = make_engine( + { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + {"name": "credit_limit", "type": "number"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + ) + result = engine.query( + { + "measures": ["sum(orders.amount)", "sum(customers.credit_limit)"], + "dimensions": ["customers.segment"], + } + ) + + assert result.resolved_plan.has_fan_out + assert "orders_agg" in result.sql + assert "customers_agg" in result.sql + assert_valid_sql(result.sql) + + +def test_filtered_count_distinct_keeps_distinct_inside_count(): + engine = make_engine( + { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "status", "type": "string"}, + ], + "measures": [ + { + "name": "paid_customers", + "expr": "count_distinct(customer_id)", + "filter": "status = 'paid'", + } + ], + } + } + ) + result = engine.query( + {"measures": ["orders.paid_customers"], "dimensions": ["orders.status"]} + ) + + assert "COUNT(DISTINCT CASE WHEN orders.status = 'paid'" in result.sql + assert_valid_sql(result.sql) + + +def test_predefined_measure_via_alias_uses_real_table_and_alias_qualification(): + engine = make_engine(_alias_measure_sources()) + result = engine.query( + { + "measures": ["billing_customer.total_ltv"], + "dimensions": ["billing_customer.id"], + } + ) + + assert "FROM public.customers AS billing_customer" in result.sql + assert "SUM(billing_customer.lifetime_value)" in result.sql + assert_valid_sql(result.sql) + + +def test_runtime_case_measure_gets_a_safe_auto_alias(): + engine = make_engine( + { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "status", "type": "string"}, + ], + } + } + ) + result = engine.query( + { + "measures": [ + "sum(CASE WHEN orders.status = 'paid' THEN orders.amount ELSE 0 END)" + ], + "dimensions": ["orders.status"], + } + ) + + assert ( + "sum_case_when_orders_status_paid_then_orders_amount_else_0_end" in result.sql + ) + assert "=" not in result.resolved_plan.measures[0].name + assert_valid_sql(result.sql) diff --git a/python/klo-sl/tests/test_coverage_gaps.py b/python/klo-sl/tests/test_coverage_gaps.py new file mode 100644 index 00000000..eea91d2f --- /dev/null +++ b/python/klo-sl/tests/test_coverage_gaps.py @@ -0,0 +1,740 @@ +"""Tests targeting specific coverage gaps in planner.py, generator.py, models.py, engine.py.""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from semantic_layer.generator import SqlGenerator +from semantic_layer.graph import JoinGraph +from semantic_layer.models import ( + JoinDeclaration, + MeasureDefinition, + SemanticQuery, + SourceColumn, + SourceDefinition, +) +from semantic_layer.planner import QueryPlanner + +from conftest import assert_valid_sql, make_engine + + +# ── Helpers ────────────────────────────────────────────────────────── + + +def _make_planner(sources: dict[str, SourceDefinition]) -> QueryPlanner: + graph = JoinGraph(sources) + graph.build() + return QueryPlanner(sources, graph) + + +def _plan_and_generate(sources: dict[str, SourceDefinition], query_dict: dict) -> str: + planner = _make_planner(sources) + generator = SqlGenerator(dialect="postgres") + query = SemanticQuery(**query_dict) + plan = planner.plan(query) + sql = generator.generate(plan, sources) + assert_valid_sql(sql) + return sql + + +# ── Source fixtures ────────────────────────────────────────────────── + + +def _simple_sources() -> dict[str, SourceDefinition]: + """orders -> customers (m2o).""" + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[ + MeasureDefinition( + name="revenue", expr="sum(amount)", filter="status != 'refunded'" + ), + MeasureDefinition(name="order_count", expr="count(id)"), + ], + ) + return {"customers": customers, "orders": orders} + + +def _chasm_sources() -> dict[str, SourceDefinition]: + """Two fact tables (orders, tickets) -> hub (customers). Classic chasm trap.""" + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="priority", type="string"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[MeasureDefinition(name="ticket_count", expr="count(id)")], + ) + return {"customers": customers, "orders": orders, "tickets": tickets} + + +def _chain_sources_with_derived() -> dict[str, SourceDefinition]: + """orders -> customers -> tiers (m2o chain) with derived measures.""" + tiers = SourceDefinition( + name="tiers", + table="public.tiers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="level", type="string"), + ], + ) + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="tier_id", type="number"), + SourceColumn(name="segment", type="string"), + ], + joins=[ + JoinDeclaration( + to="tiers", on="tier_id = tiers.id", relationship="many_to_one" + ) + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + measures=[ + MeasureDefinition( + name="revenue", expr="sum(amount)", filter="status != 'refunded'" + ), + MeasureDefinition(name="order_count", expr="count(id)"), + MeasureDefinition(name="avg_order", expr="revenue / order_count"), + ], + ) + return {"tiers": tiers, "customers": customers, "orders": orders} + + +# ── Planner: nested aggregation (lines 432-440) ───────────────────── + + +class TestNestedAggregation: + def test_nested_aggregation_raises(self): + """avg(sum(orders.amount)) should be rejected.""" + sources = _simple_sources() + planner = _make_planner(sources) + with pytest.raises(ValueError, match="Nested aggregation is not supported"): + planner.plan( + SemanticQuery( + measures=["avg(sum(orders.amount))"], + dimensions=["orders.status"], + ) + ) + + def test_nested_max_count_raises(self): + """max(count(orders.id)) should be rejected.""" + sources = _simple_sources() + planner = _make_planner(sources) + with pytest.raises(ValueError, match="Nested aggregation is not supported"): + planner.plan( + SemanticQuery( + measures=["max(count(orders.id))"], + dimensions=["orders.status"], + ) + ) + + +# ── Planner: OR filter mixing (lines 810-833) ─────────────────────── + + +class TestOrFilterMixing: + def test_or_mixing_agg_and_nonagg_raises(self): + """OR that mixes aggregate and non-aggregate conditions should raise.""" + sources = _simple_sources() + planner = _make_planner(sources) + with pytest.raises(ValueError, match="mixes aggregate and non-aggregate"): + planner.plan( + SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + filters=["orders.amount > 100 OR sum(orders.amount) > 5000"], + ) + ) + + def test_or_pure_where_ok(self): + """OR with all non-aggregate conditions should be fine.""" + sources = _simple_sources() + sql = _plan_and_generate( + sources, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["orders.amount > 100 OR orders.amount < 10"], + }, + ) + assert "OR" in sql.upper() + + def test_or_pure_having_ok(self): + """OR with all aggregate conditions should be fine.""" + sources = _simple_sources() + sql = _plan_and_generate( + sources, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["sum(orders.amount) > 1000 OR count(orders.id) > 5"], + }, + ) + assert "HAVING" in sql.upper() + + +# ── Planner: empty source refs (line 62) ───────────────────────────── + + +class TestEmptySourceRef: + def test_no_source_refs_raises(self): + """Query that references no sources should raise.""" + sources = _simple_sources() + planner = _make_planner(sources) + with pytest.raises(ValueError, match="does not reference any source"): + planner.plan( + SemanticQuery( + measures=["sum(1)"], + dimensions=[], + ) + ) + + +# ── Planner: predefined measure dependency chains (lines 189-194, 237, 281-282) ── + + +class TestPredefinedMeasureDeps: + def test_derived_measure_resolves_dependencies(self): + """avg_order depends on revenue and order_count — both should appear in plan.""" + sources = _chain_sources_with_derived() + planner = _make_planner(sources) + plan = planner.plan( + SemanticQuery( + measures=["orders.avg_order"], + dimensions=["orders.status"], + ) + ) + measure_names = {m.name for m in plan.measures} + assert "avg_order" in measure_names + assert "revenue" in measure_names + assert "order_count" in measure_names + + def test_derived_measure_generates_valid_sql(self): + """Derived measures should produce valid SQL.""" + sources = _chain_sources_with_derived() + sql = _plan_and_generate( + sources, + { + "measures": ["orders.avg_order"], + "dimensions": ["customers.segment"], + }, + ) + assert "GROUP BY" in sql.upper() + + +# ── Planner: fan-out with one_to_many to dimension sources (lines 595-643) ── + + +class TestFanOutEdgeCases: + def test_single_source_fan_out_to_dimension(self): + """Measure source with one_to_many to dimension should trigger fan-out.""" + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="name", type="string"), + ], + joins=[ + JoinDeclaration( + to="detail", on="id = detail.hub_id", relationship="one_to_many" + ) + ], + ) + detail = SourceDefinition( + name="detail", + table="public.detail", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="category", type="string"), + ], + ) + sources = {"hub": hub, "detail": detail} + planner = _make_planner(sources) + plan = planner.plan( + SemanticQuery( + measures=["sum(hub.id)"], + dimensions=["detail.category"], + ) + ) + assert plan.has_fan_out + + def test_merged_groups_fan_out_to_dimension(self): + """Two measure sources on the same m2o chain, but with o2m to dimension source.""" + dim = SourceDefinition( + name="dim", + table="public.dim", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="label", type="string"), + ], + ) + parent = SourceDefinition( + name="parent", + table="public.parent", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration(to="dim", on="id = dim.id", relationship="one_to_many") + ], + ) + child = SourceDefinition( + name="child", + table="public.child", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="parent_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="parent", on="parent_id = parent.id", relationship="many_to_one" + ) + ], + ) + sources = {"dim": dim, "parent": parent, "child": child} + planner = _make_planner(sources) + plan = planner.plan( + SemanticQuery( + measures=["sum(child.amount)"], + dimensions=["dim.label"], + ) + ) + assert plan.has_fan_out + + def test_filter_fan_out_one_to_many_raises(self): + """Filter on source reachable only via one_to_many from measure source should raise.""" + parent = SourceDefinition( + name="parent", + table="public.parent", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="child", on="id = child.parent_id", relationship="one_to_many" + ) + ], + ) + child = SourceDefinition( + name="child", + table="public.child", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="parent_id", type="number"), + SourceColumn(name="category", type="string"), + ], + ) + sources = {"parent": parent, "child": child} + planner = _make_planner(sources) + with pytest.raises(ValueError, match="one_to_many join"): + planner.plan( + SemanticQuery( + measures=["sum(parent.val)"], + dimensions=[], + filters=["child.category = 'A'"], + ) + ) + + +# ── Generator: NULL dimension in multi-CTE (lines 385-388) ────────── + + +class TestNullDimensionInCTE: + def test_dimension_not_in_any_cte_gets_null(self): + """When a dimension is from a source not reachable by any CTE, generate NULL.""" + # Use a 3-fact chasm topology where one dimension is only reachable by one fact + hub = SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="name", type="string"), + ], + ) + fact_a = SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + SourceColumn(name="extra", type="string"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + fact_b = SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ) + sources = {"hub": hub, "fact_a": fact_a, "fact_b": fact_b} + sql = _plan_and_generate( + sources, + { + "measures": ["sum(fact_a.val)", "sum(fact_b.val)"], + "dimensions": ["hub.name"], + }, + ) + # Should produce aggregate locality CTEs with FULL JOIN + assert "FULL" in sql.upper() or "WITH" in sql.upper() + + +# ── Generator: CTE alias collision (lines 202-206) ────────────────── + + +class TestCTEAliasCollision: + def test_alias_collision_resolved(self): + """When a source name matches a potential CTE alias, suffix should be used.""" + # Create a source named "orders_agg" to collide with the CTE alias + orders_agg = SourceDefinition( + name="orders_agg", + table="public.orders_agg", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="orders_agg", + on="customer_id = orders_agg.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="priority", type="string"), + ], + joins=[ + JoinDeclaration( + to="orders_agg", + on="customer_id = orders_agg.id", + relationship="many_to_one", + ) + ], + ) + sources = {"orders_agg": orders_agg, "orders": orders, "tickets": tickets} + sql = _plan_and_generate( + sources, + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["orders_agg.segment"], + }, + ) + # Should still produce valid SQL even with the collision + assert_valid_sql(sql) + + +# ── Models: negative limit (line 95) ──────────────────────────────── + + +class TestNegativeLimit: + def test_negative_limit_raises(self): + with pytest.raises(ValidationError, match="limit"): + SemanticQuery( + measures=["sum(orders.amount)"], + limit=-1, + ) + + def test_zero_limit_allowed(self): + q = SemanticQuery(measures=["sum(orders.amount)"], limit=0) + assert q.limit == 0 + + +# ── Engine: suggest with missing sources (lines 100-106, 127) ──────── + + +class TestEngineSuggest: + def test_suggest_with_missing_source(self): + """Suggest should return suggestions for missing sources.""" + engine = make_engine( + { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + }, + } + ) + result = engine.suggest( + { + "measures": ["sum(unknown_source.val)"], + "dimensions": ["orders.id"], + } + ) + assert not result["success"] + assert any( + "missing" in s["description"].lower() + or "unknown_source" in s["description"] + for s in result.get("suggestions", []) + ) + + def test_suggest_with_dict_measure_and_dimension(self): + """Suggest handles dict-format measures and dimensions in failure path.""" + engine = make_engine( + { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + }, + } + ) + # Use a nested aggregate to trigger a planner error that hits the dict-handling code + result = engine.suggest( + { + "measures": [{"expr": "avg(sum(missing.val))", "name": "total"}], + "dimensions": [{"field": "missing.category"}], + } + ) + assert not result["success"] + + +# ── Planner: order_by resolution formats (lines 113-116) ──────────── + + +class TestOrderByResolution: + def test_order_by_as_dict(self): + sources = _simple_sources() + planner = _make_planner(sources) + plan = planner.plan( + SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + order_by=[{"field": "orders.status", "direction": "desc"}], + ) + ) + assert len(plan.order_by) == 1 + assert plan.order_by[0].direction == "desc" + + def test_order_by_as_string(self): + sources = _simple_sources() + planner = _make_planner(sources) + plan = planner.plan( + SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + order_by=["orders.status"], + ) + ) + assert len(plan.order_by) == 1 + + +# ── Planner: measure with no source refs (line 343) ───────────────── + + +class TestMeasureNoSourceRef: + def test_bare_column_no_aggregate_raises(self): + """A measure like 'orders.nonexistent' that isn't predefined should raise.""" + sources = _simple_sources() + planner = _make_planner(sources) + with pytest.raises( + ValueError, match="does not reference any source|not a pre-defined measure" + ): + planner.plan( + SemanticQuery( + measures=["sum(1)"], + dimensions=["orders.status"], + ) + ) + + +# ── Generator: custom aggregate parsing (lines 614-617) ───────────── + + +class TestCustomAggregates: + def test_count_distinct_generates_valid_sql(self): + sources = _simple_sources() + sql = _plan_and_generate( + sources, + { + "measures": ["count(distinct orders.id)"], + "dimensions": ["orders.status"], + }, + ) + upper = sql.upper() + assert "COUNT(DISTINCT" in upper or "COUNT (DISTINCT" in upper + + +# ── Generator: qualified predefined expressions via multi-hop joins (lines 925-931) ── + + +class TestQualifiedPredefinedExpr: + def test_predefined_filter_with_joined_column(self): + """Predefined measure with a filter referencing a column from a joined table.""" + sources = _chain_sources_with_derived() + sql = _plan_and_generate( + sources, + { + "measures": ["orders.revenue"], + "dimensions": ["tiers.level"], + }, + ) + assert_valid_sql(sql) + assert "CASE WHEN" in sql.upper() + + +# ── End-to-end: chasm trap with aggregate locality ─────────────────── + + +class TestChasmTrapEndToEnd: + def test_two_fact_tables_produce_valid_sql(self): + sources = _chasm_sources() + sql = _plan_and_generate( + sources, + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + }, + ) + upper = sql.upper() + assert "WITH" in upper + assert "FULL" in upper or "JOIN" in upper + + def test_chasm_with_filter_on_hub(self): + sources = _chasm_sources() + sql = _plan_and_generate( + sources, + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + "filters": ["customers.segment = 'enterprise'"], + }, + ) + assert "enterprise" in sql + assert_valid_sql(sql) diff --git a/python/klo-sl/tests/test_duplicate_check.py b/python/klo-sl/tests/test_duplicate_check.py new file mode 100644 index 00000000..e2960e0c --- /dev/null +++ b/python/klo-sl/tests/test_duplicate_check.py @@ -0,0 +1,220 @@ +"""Tests for semantic_layer.duplicate_check.validate_measure_duplicates.""" + +from __future__ import annotations + +from semantic_layer.duplicate_check import validate_measure_duplicates +from semantic_layer.models import ( + MeasureDefinition, + SourceColumn, + SourceDefinition, +) + + +def _make_source(name: str, measures: list[MeasureDefinition]) -> SourceDefinition: + return SourceDefinition( + name=name, + table=f"public.{name}", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + measures=measures, + ) + + +def test_same_expr_different_filter_is_flagged() -> None: + """The replay-trimmed case: count(*) twice, one with is_active filter.""" + source = _make_source( + "fct_subscriptions", + [ + MeasureDefinition( + name="active_subscription_count", + expr="count(*)", + filter="is_active = true", + ), + MeasureDefinition( + name="new_subscription_count", + expr="count(*)", + ), + ], + ) + errors = validate_measure_duplicates({"fct_subscriptions": source}) + assert len(errors) == 1 + assert "new_subscription_count" in errors[0] + assert "active_subscription_count" in errors[0] + assert "differs only by `filter`" in errors[0] + + +def test_same_expr_same_filter_is_flagged() -> None: + """Two measures with identical expr and filter — flagged as duplicate pair.""" + source = _make_source( + "fct_orders", + [ + MeasureDefinition( + name="order_count_a", expr="count(*)", filter="is_paid = true" + ), + MeasureDefinition( + name="order_count_b", expr="count(*)", filter="is_paid = true" + ), + ], + ) + errors = validate_measure_duplicates({"fct_orders": source}) + assert len(errors) == 1 + assert "same expression and filter" in errors[0] + + +def test_different_expr_is_not_flagged() -> None: + """count(*) vs sum(amount) on same source — legitimately distinct measures.""" + source = _make_source( + "fct_orders", + [ + MeasureDefinition(name="order_count", expr="count(*)"), + MeasureDefinition(name="total_revenue", expr="sum(amount)"), + MeasureDefinition(name="avg_revenue", expr="avg(amount)"), + ], + ) + errors = validate_measure_duplicates({"fct_orders": source}) + assert errors == [] + + +def test_measures_on_different_sources_not_compared() -> None: + """Same expr on two different sources is not a duplicate.""" + a = _make_source("fct_a", [MeasureDefinition(name="total", expr="count(*)")]) + b = _make_source("fct_b", [MeasureDefinition(name="total", expr="count(*)")]) + errors = validate_measure_duplicates({"fct_a": a, "fct_b": b}) + assert errors == [] + + +def test_whitespace_and_case_are_normalized() -> None: + """COUNT(*) and count(*) and count( * ) all compare equal.""" + source = _make_source( + "fct_orders", + [ + MeasureDefinition(name="a", expr="count(*)"), + MeasureDefinition(name="b", expr="COUNT(*)"), + MeasureDefinition(name="c", expr=" count( * ) "), + ], + ) + errors = validate_measure_duplicates({"fct_orders": source}) + # Three measures pairwise — should yield 3 errors (a vs b, a vs c, b vs c) + assert len(errors) == 3 + + +def test_unparseable_expr_is_skipped_not_errored() -> None: + """A measure whose expr can't be parsed is ignored — don't block commit.""" + source = _make_source( + "fct_orders", + [ + MeasureDefinition(name="bad", expr="!!! not SQL !!!"), + MeasureDefinition(name="good", expr="count(*)"), + ], + ) + # Should not raise, should not flag — the parser validator will catch the bad one elsewhere + errors = validate_measure_duplicates({"fct_orders": source}) + assert errors == [] + + +def test_non_commutative_args_not_treated_as_equivalent() -> None: + """safe_divide(a, b) is NOT equivalent to safe_divide(b, a).""" + source = _make_source( + "fct_orders", + [ + MeasureDefinition( + name="ratio_ab", expr="safe_divide(count(*), sum(amount))" + ), + MeasureDefinition( + name="ratio_ba", expr="safe_divide(sum(amount), count(*))" + ), + ], + ) + errors = validate_measure_duplicates({"fct_orders": source}) + assert errors == [] + + +def test_single_measure_source_no_comparison() -> None: + source = _make_source( + "fct_orders", [MeasureDefinition(name="total", expr="count(*)")] + ) + errors = validate_measure_duplicates({"fct_orders": source}) + assert errors == [] + + +def test_same_expr_different_segments_is_not_flagged() -> None: + """Two measures with same expr but different named segments are by-design distinct.""" + source = _make_source( + "fct_subscriptions", + [ + MeasureDefinition( + name="active_count", expr="count(*)", segments=["active"] + ), + MeasureDefinition( + name="inactive_count", expr="count(*)", segments=["inactive"] + ), + ], + ) + errors = validate_measure_duplicates({"fct_subscriptions": source}) + assert errors == [] + + +def test_same_expr_same_segments_is_flagged() -> None: + """Same expr + same segment set = a true duplicate.""" + source = _make_source( + "fct_subscriptions", + [ + MeasureDefinition(name="a_count", expr="count(*)", segments=["active"]), + MeasureDefinition(name="b_count", expr="count(*)", segments=["active"]), + ], + ) + errors = validate_measure_duplicates({"fct_subscriptions": source}) + assert len(errors) == 1 + assert "same expression and filter" in errors[0] + + +def test_segment_difference_with_filter_difference_not_flagged() -> None: + """Segments differ → distinct measures even if filter also differs.""" + source = _make_source( + "fct_subscriptions", + [ + MeasureDefinition( + name="m1", + expr="count(*)", + segments=["active"], + filter="protocol = 'TRT'", + ), + MeasureDefinition(name="m2", expr="count(*)", segments=["inactive"]), + ], + ) + errors = validate_measure_duplicates({"fct_subscriptions": source}) + assert errors == [] + + +def test_bigquery_native_exprs_compared_correctly(): + """Two measures with identical BigQuery-native exprs must be flagged as duplicates.""" + from semantic_layer.duplicate_check import validate_measure_duplicates + from semantic_layer.models import ( + MeasureDefinition, + SourceColumn, + SourceDefinition, + ) + + source = SourceDefinition( + name="fct_orders", + table="fct_orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + ], + measures=[ + MeasureDefinition( + name="safe_ratio_a", + expr="SAFE_DIVIDE(sum(amount), count(*))", + ), + MeasureDefinition( + name="safe_ratio_b", + expr="SAFE_DIVIDE(sum(amount), count(*))", + ), + ], + ) + errors = validate_measure_duplicates({"fct_orders": source}, dialect="bigquery") + assert any("safe_ratio_a" in e and "safe_ratio_b" in e for e in errors), ( + f"Duplicate detection missed identical BigQuery-native exprs: {errors}" + ) diff --git a/python/klo-sl/tests/test_engine.py b/python/klo-sl/tests/test_engine.py new file mode 100644 index 00000000..81ed8386 --- /dev/null +++ b/python/klo-sl/tests/test_engine.py @@ -0,0 +1,1380 @@ +"""End-to-end tests through the full SemanticEngine stack.""" + +import pytest +import sqlglot +import yaml +from pathlib import Path + +from semantic_layer.engine import SemanticEngine +from semantic_layer.models import ( + JoinDeclaration, + Provenance, + SourceColumn, + SourceDefinition, +) + +SOURCES_DIR = str(Path(__file__).parent.parent / "sources" / "ecommerce") + + +@pytest.fixture +def engine(): + return SemanticEngine(SOURCES_DIR, dialect="postgres") + + +class TestEndToEnd: + def test_simple_query(self, engine): + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result.sql + assert result.dialect == "postgres" + assert len(result.columns) >= 2 + sqlglot.parse(result.sql) + + def test_cross_source_query(self, engine): + result = engine.query( + { + "measures": ["churn_risk.avg_risk"], + "dimensions": ["churn_risk.customer_type", "regions.name"], + "filters": ["regions.name = 'LATAM'"], + } + ) + assert "churn_risk" in result.sql + assert "LATAM" in result.sql + assert "WITH" in result.sql.upper() + sqlglot.parse(result.sql) + + def test_pre_defined_measure(self, engine): + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + } + ) + # Revenue measure should have VERIFIED provenance + rev_col = next(c for c in result.columns if c.name == "revenue") + assert rev_col.provenance == Provenance.VERIFIED + # Should have CASE WHEN for filter + assert "CASE WHEN" in result.sql.upper() + sqlglot.parse(result.sql) + + def test_time_granularity(self, engine): + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": [{"field": "orders.created_at", "granularity": "month"}], + } + ) + assert "DATE_TRUNC" in result.sql.upper() + sqlglot.parse(result.sql) + + def test_derived_measures(self, engine): + result = engine.query( + { + "measures": [ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "sum(orders.cost)", "name": "total_cost"}, + {"expr": "total_rev - total_cost", "name": "profit"}, + ], + "dimensions": ["orders.status"], + } + ) + assert "profit" in result.sql + # Verify the derived measure appears in columns + profit_col = next(c for c in result.columns if c.name == "profit") + assert profit_col.provenance == Provenance.COMPOSED + sqlglot.parse(result.sql) + + def test_having_filter(self, engine): + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["sum(orders.amount) > 10000"], + } + ) + assert "HAVING" in result.sql.upper() + sqlglot.parse(result.sql) + + def test_orders_through_bridge(self, engine): + result = engine.query( + { + "measures": ["sum(order_items.quantity)"], + "dimensions": ["products.category"], + } + ) + assert result.sql + assert "order_items" in result.sql.lower() + assert "products" in result.sql.lower() + sqlglot.parse(result.sql) + + +class TestChasmTrapEndToEnd: + def test_chasm_trap_full_pipeline(self): + """Two measure sources (order_items + orders through different paths) → aggregate locality.""" + # Use a custom source setup for a clean chasm scenario + from semantic_layer.engine import SemanticEngine + import tempfile + import yaml + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmpdir: + customers = { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + } + orders = { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + tickets = { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + for name, data in [ + ("customers", customers), + ("orders", orders), + ("tickets", tickets), + ]: + with open(Path(tmpdir) / f"{name}.yaml", "w") as f: + yaml.dump(data, f) + + engine = SemanticEngine(tmpdir, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + } + ) + assert result.resolved_plan.has_fan_out + assert "orders_agg" in result.sql + assert "tickets_agg" in result.sql + assert "FULL JOIN" in result.sql.upper() + sqlglot.parse(result.sql) + + +class TestMixedMeasures: + def test_pre_defined_and_runtime(self, engine): + """Pre-defined orders.revenue alongside runtime sum(orders.cost).""" + result = engine.query( + { + "measures": [ + "orders.revenue", + {"expr": "sum(orders.cost)", "name": "total_cost"}, + ], + "dimensions": ["orders.status"], + } + ) + assert result.sql + # Revenue is VERIFIED, cost is COMPOSED + rev_col = next(c for c in result.columns if c.name == "revenue") + cost_col = next(c for c in result.columns if c.name == "total_cost") + assert rev_col.provenance == Provenance.VERIFIED + assert cost_col.provenance == Provenance.COMPOSED + sqlglot.parse(result.sql) + + def test_multiple_pre_defined(self, engine): + """Both orders.revenue and orders.order_count are pre-defined.""" + result = engine.query( + { + "measures": ["orders.revenue", "orders.order_count"], + "dimensions": ["orders.status"], + } + ) + assert all( + c.provenance == Provenance.VERIFIED + for c in result.columns + if c.provenance != Provenance.DIMENSION + ) + sqlglot.parse(result.sql) + + +class TestChainedDerived: + def test_margin_chain(self, engine): + """profit = rev - cost, margin = profit / rev — 3-level chain.""" + result = engine.query( + { + "measures": [ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "sum(orders.cost)", "name": "total_cost"}, + {"expr": "total_rev - total_cost", "name": "profit"}, + {"expr": "profit / total_rev", "name": "margin"}, + ], + "dimensions": ["orders.status"], + } + ) + assert "margin" in result.sql + assert "profit" in result.sql + sqlglot.parse(result.sql) + + +class TestCrossSourceRuntime: + def test_runtime_aggregation_by_region(self, engine): + """Runtime count(orders.id) grouped by regions.name — not pre-defined.""" + result = engine.query( + { + "measures": [{"expr": "count(orders.id)", "name": "order_count"}], + "dimensions": ["regions.name"], + } + ) + assert "regions" in result.sql.lower() + assert "COUNT" in result.sql.upper() + sqlglot.parse(result.sql) + + +class TestGlobalAggregates: + def test_no_dimensions(self, engine): + """Measures without dimensions — should produce single-row result.""" + result = engine.query( + { + "measures": ["sum(orders.amount)"], + } + ) + assert result.sql + assert "GROUP BY" not in result.sql.upper() + sqlglot.parse(result.sql) + + +class TestPlanOnly: + def test_plan_returns_metadata(self, engine): + plan = engine.plan_only( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert "orders" in plan.sources_used + assert plan.anchor_source == "orders" + assert not plan.has_fan_out + assert len(plan.measures) == 1 + assert len(plan.dimensions) == 1 + + +class TestSuggest: + def test_success(self, engine): + result = engine.suggest( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result["success"] is True + + def test_failure_with_suggestions(self, engine): + result = engine.suggest( + { + "measures": ["sum(nonexistent.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result["success"] is False + assert "error" in result + assert len(result["suggestions"]) > 0 + + +class TestSuggestDetailed: + def test_suggest_disconnected_sources(self): + """Suggest should report error when sources can't be connected.""" + import tempfile + import yaml + from pathlib import Path + + with tempfile.TemporaryDirectory() as tmpdir: + src_a = { + "name": "a", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + } + src_b = { + "name": "b", + "table": "t2", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + } + for name, data in [("a", src_a), ("b", src_b)]: + with open(Path(tmpdir) / f"{name}.yaml", "w") as f: + yaml.dump(data, f) + + engine = SemanticEngine(tmpdir, dialect="postgres") + result = engine.suggest( + { + "measures": ["sum(a.id)"], + "dimensions": ["b.val"], + } + ) + assert result["success"] is False + assert "error" in result + assert len(result["suggestions"]) > 0 + + +class TestDialects: + def test_bigquery(self): + engine = SemanticEngine(SOURCES_DIR, dialect="bigquery") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result.dialect == "bigquery" + assert result.sql + + def test_snowflake(self): + engine = SemanticEngine(SOURCES_DIR, dialect="snowflake") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result.dialect == "snowflake" + assert result.sql + + def test_bigquery_time_granularity(self): + engine = SemanticEngine(SOURCES_DIR, dialect="bigquery") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": [{"field": "orders.created_at", "granularity": "month"}], + } + ) + assert result.dialect == "bigquery" + assert result.sql + # BigQuery should transpile the SQL + sqlglot.parse(result.sql) + + +# ── From test_edge_cases.py: engine edge cases ────────────────────── + + +class TestEngineEdgeCases: + @pytest.fixture + def _engine(self): + return SemanticEngine(SOURCES_DIR, dialect="postgres") + + def test_query_with_dict_input(self, _engine): + result = _engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result.sql + + def test_query_with_semantic_query_input(self, _engine): + from semantic_layer.models import SemanticQuery + + q = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + ) + result = _engine.query(q) + assert result.sql + + def test_plan_only_with_dict(self, _engine): + plan = _engine.plan_only( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert plan.anchor_source == "orders" + + def test_suggest_with_valid_query(self, _engine): + result = _engine.suggest( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result["success"] is True + + def test_suggest_with_invalid_source(self, _engine): + result = _engine.suggest( + { + "measures": ["sum(nonexistent.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result["success"] is False + + def test_complex_cross_source_query(self, _engine): + result = _engine.query( + { + "measures": ["sum(order_items.quantity)"], + "dimensions": ["regions.name"], + } + ) + assert "regions" in result.sql.lower() + assert "order_items" in result.sql.lower() + sqlglot.parse(result.sql) + + def test_filter_only_sources(self, _engine): + result = _engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["customers.segment = 'Enterprise'"], + } + ) + assert "customers" in result.sql.lower() + assert "Enterprise" in result.sql + sqlglot.parse(result.sql) + + def test_predefined_measure_with_runtime_same_source(self, _engine): + result = _engine.query( + { + "measures": ["orders.revenue", "avg(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + sqlglot.parse(result.sql) + assert "CASE WHEN" in result.sql + + def test_churn_risk_cross_source_latam(self, _engine): + result = _engine.query( + { + "measures": ["churn_risk.avg_risk"], + "dimensions": ["churn_risk.customer_type", "regions.name"], + "filters": ["regions.name = 'LATAM'"], + } + ) + assert "LATAM" in result.sql + assert "churn_risk" in result.sql + assert "regions" in result.sql.lower() + sqlglot.parse(result.sql) + + def test_products_dimension_with_order_items_measure(self, _engine): + result = _engine.query( + { + "measures": ["sum(order_items.price)"], + "dimensions": ["products.category", "products.name"], + } + ) + assert "products" in result.sql.lower() + sqlglot.parse(result.sql) + + def test_all_ecommerce_sources_loaded(self, _engine): + assert "orders" in _engine.sources + assert "customers" in _engine.sources + assert "regions" in _engine.sources + assert "products" in _engine.sources + assert "order_items" in _engine.sources + assert "churn_risk" in _engine.sources + + +# ── From test_edge_cases.py: structured suggest ────────────────────── + + +class TestStructuredSuggest: + def test_missing_source_returns_structured_suggestion(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.suggest( + { + "measures": ["sum(nonexistent.val)"], + "dimensions": ["orders.status"], + } + ) + assert not result["success"] + assert "nonexistent" in result["missing_sources"] + assert len(result["suggestions"]) > 0 + suggestion = result["suggestions"][0] + assert "required_sources" in suggestion + assert "required_joins" in suggestion + assert "notes" in suggestion + assert "nonexistent" in suggestion["required_sources"] + + def test_disconnected_sources_returns_structured_suggestion(self): + from semantic_layer.models import SourceColumn, SourceDefinition + + sources = { + "src_a": SourceDefinition( + name="src_a", + table="public.src_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="val", type="number"), + ], + ), + "src_b": SourceDefinition( + name="src_b", + table="public.src_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="name", type="string"), + ], + ), + } + engine = SemanticEngine.from_sources(sources) + result = engine.suggest( + { + "measures": ["sum(src_a.val)"], + "dimensions": ["src_b.name"], + } + ) + assert not result["success"] + assert len(result["suggestions"]) > 0 + suggestion = result["suggestions"][0] + assert "required_joins" in suggestion + assert "notes" in suggestion + + def test_valid_query_returns_empty_suggestions(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.suggest( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result["success"] + assert result["suggestions"] == [] + + +# ── From test_brainstorm_cases.py ──────────────────────────────────── + + +class TestGlobalAggregatesChasm: + def test_cross_source_global_aggregates_use_cross_join_locality(self): + import tempfile + + tmpdir = tempfile.mkdtemp() + sources_dict = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + "tickets": { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "cost", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + for name, data in sources_dict.items(): + with open(Path(tmpdir) / f"{name}.yaml", "w") as f: + yaml.dump(data, f) + + engine = SemanticEngine(tmpdir, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)", "sum(tickets.cost)"], + } + ) + + assert result.resolved_plan.has_fan_out + assert "orders_agg" in result.sql + assert "tickets_agg" in result.sql + assert "CROSS JOIN" in result.sql.upper() + assert "FULL JOIN" not in result.sql.upper() + assert "GROUP BY" not in result.sql.upper() + sqlglot.parse(result.sql) + + def test_support_cost_pct_matches_cross_source_example(self): + from semantic_layer.models import ( + JoinDeclaration, + SemanticQuery, + SourceColumn, + SourceDefinition, + ) + from semantic_layer.graph import JoinGraph + from semantic_layer.planner import QueryPlanner + from semantic_layer.generator import SqlGenerator + + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="cost", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = {"customers": customers, "orders": orders, "tickets": tickets} + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + generator = SqlGenerator(dialect="postgres") + + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_revenue"}, + {"expr": "sum(tickets.cost)", "name": "total_support_cost"}, + { + "expr": "total_support_cost / total_revenue * 100", + "name": "support_cost_pct", + }, + ], + dimensions=["customers.segment"], + order_by=[{"field": "support_cost_pct", "direction": "desc"}], + ) + plan = planner.plan(query) + sql = generator.generate(plan, sources) + + assert plan.has_fan_out + assert "orders_agg" in sql + assert "tickets_agg" in sql + assert "FULL JOIN" in sql.upper() + assert "support_cost_pct" in sql + assert "ORDER BY support_cost_pct DESC" in sql + sqlglot.parse(sql) + + +class TestBrainstormExamples: + def test_high_churn_risk_customers_from_latam(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": [ + "churn_risk.avg_risk", + {"expr": "count(churn_risk.customer_id)", "name": "customer_count"}, + ], + "dimensions": ["churn_risk.customer_type", "regions.name"], + "filters": ["regions.name = 'LATAM'", "churn_risk.score > 0.7"], + "order_by": [{"field": "churn_risk.avg_risk", "direction": "desc"}], + "limit": 100, + } + ) + assert not result.resolved_plan.has_fan_out + assert result.resolved_plan.sources_used == [ + "churn_risk", + "customers", + "regions", + ] + assert "COUNT(CHURN_RISK.CUSTOMER_ID) AS CUSTOMER_COUNT" in result.sql.upper() + assert "WHERE regions.name = 'LATAM' AND churn_risk.score > 0.7" in result.sql + assert "ORDER BY avg_risk DESC" in result.sql + assert "ORDER BY churn_risk.avg_risk DESC" not in result.sql + avg_risk = next(col for col in result.columns if col.name == "avg_risk") + customer_count = next( + col for col in result.columns if col.name == "customer_count" + ) + assert avg_risk.provenance == Provenance.VERIFIED + assert customer_count.provenance == Provenance.COMPOSED + sqlglot.parse(result.sql) + + def test_median_order_value_by_region(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": [ + {"expr": "median(orders.amount)", "name": "median_order"}, + "orders.revenue", + ], + "dimensions": ["regions.name"], + "order_by": [{"field": "median_order", "direction": "desc"}], + } + ) + assert not result.resolved_plan.has_fan_out + assert result.resolved_plan.sources_used == ["customers", "orders", "regions"] + assert "ORDER BY median_order DESC" in result.sql + median_order = next(col for col in result.columns if col.name == "median_order") + revenue = next(col for col in result.columns if col.name == "revenue") + assert median_order.provenance == Provenance.COMPOSED + assert revenue.provenance == Provenance.VERIFIED + sqlglot.parse(result.sql) + + def test_revenue_trend_by_month(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": [ + "orders.revenue", + {"expr": "count(orders.id)", "name": "order_count"}, + ], + "dimensions": [{"field": "orders.created_at", "granularity": "month"}], + "filters": ["orders.created_at >= '2025-01-01'"], + "order_by": [{"field": "orders.created_at", "direction": "asc"}], + } + ) + assert ( + "DATE_TRUNC('month', orders.created_at) AS created_at_month" in result.sql + ) + assert "WHERE orders.created_at >= '2025-01-01'" in result.sql + assert "ORDER BY created_at_month" in result.sql + assert "ORDER BY orders.created_at" not in result.sql + revenue = next(col for col in result.columns if col.name == "revenue") + order_count = next(col for col in result.columns if col.name == "order_count") + assert revenue.provenance == Provenance.VERIFIED + assert order_count.provenance == Provenance.COMPOSED + sqlglot.parse(result.sql) + + def test_single_source_fanout_to_product_category_is_rejected(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + with pytest.raises(ValueError, match="cannot safely reach 'products'"): + engine.query( + { + "measures": ["churn_risk.avg_risk"], + "dimensions": ["products.category"], + "filters": ["churn_risk.score > 0.5"], + } + ) + + +# ── From test_spec_gaps.py ─────────────────────────────────────────── + + +class TestCountDistinctPK: + def test_count_with_pk_in_simple_join(self): + from conftest import make_engine, assert_valid_sql + + chasm = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(chasm) + result = engine.query( + { + "measures": ["count(orders.id)"], + "dimensions": ["customers.segment"], + } + ) + assert_valid_sql(result.sql) + + def test_count_distinct_pk_in_aggregate_locality(self): + from conftest import make_engine, assert_valid_sql + + chasm = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + "tickets": { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(chasm) + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + } + ) + assert result.resolved_plan.has_fan_out + assert "orders_agg" in result.sql + assert "tickets_agg" in result.sql + assert_valid_sql(result.sql) + + +class TestSuggestMode: + def test_suggest_disconnected_returns_referenced_sources(self): + from conftest import make_engine + + sources = { + "a": { + "name": "a", + "table": "t", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + }, + "b": { + "name": "b", + "table": "t2", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + }, + } + engine = make_engine(sources) + result = engine.suggest( + { + "measures": ["sum(a.val)"], + "dimensions": ["b.val"], + } + ) + assert result["success"] is False + assert "error" in result + assert "referenced_sources" in result + assert set(result["referenced_sources"]) == {"a", "b"} + + def test_suggest_missing_source_reports_name(self): + from conftest import make_engine + + sources = { + "a": { + "name": "a", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + }, + } + engine = make_engine(sources) + result = engine.suggest( + { + "measures": ["sum(nonexistent.val)"], + "dimensions": ["a.id"], + } + ) + assert result["success"] is False + assert "nonexistent" in result["error"] + assert "missing_sources" in result + assert "nonexistent" in result["missing_sources"] + + def test_suggest_success_returns_plan(self): + from conftest import make_engine + + chasm = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(chasm) + result = engine.suggest( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + } + ) + assert result["success"] is True + assert result["suggestions"] == [] + + +class TestPredefinedMeasureChains: + """BUG 2: Pre-defined measures that reference other pre-defined measures.""" + + def test_predefined_chain_profit(self): + """Query orders.profit where profit=revenue-total_cost, both pre-defined.""" + from conftest import make_engine, assert_valid_sql + + sources = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "cost", "type": "number"}, + {"name": "status", "type": "string"}, + ], + "measures": [ + { + "name": "revenue", + "expr": "sum(amount)", + "filter": "status != 'refunded'", + }, + {"name": "total_cost", "expr": "sum(cost)"}, + {"name": "profit", "expr": "revenue - total_cost"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["orders.profit"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + # profit should appear as a derived measure + profit_measure = next( + m for m in result.resolved_plan.measures if m.name == "profit" + ) + assert profit_measure.is_derived + # The dependencies (revenue, total_cost) should be auto-added + measure_names = {m.name for m in result.resolved_plan.measures} + assert "revenue" in measure_names + assert "total_cost" in measure_names + + def test_predefined_chain_margin(self): + """Multi-level chain: margin = profit / revenue, profit = revenue - total_cost.""" + from conftest import make_engine, assert_valid_sql + + sources = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "cost", "type": "number"}, + {"name": "status", "type": "string"}, + ], + "measures": [ + { + "name": "revenue", + "expr": "sum(amount)", + "filter": "status != 'refunded'", + }, + {"name": "total_cost", "expr": "sum(cost)"}, + {"name": "profit", "expr": "revenue - total_cost"}, + {"name": "margin", "expr": "profit / revenue"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["orders.margin"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + # margin should be derived + margin_measure = next( + m for m in result.resolved_plan.measures if m.name == "margin" + ) + assert margin_measure.is_derived + # profit, revenue, total_cost should all be present + measure_names = {m.name for m in result.resolved_plan.measures} + assert "margin" in measure_names + assert "profit" in measure_names + assert "revenue" in measure_names + assert "total_cost" in measure_names + + +class TestSuggestValidation: + """BUG 4: Suggest mode should also validate SQL generation.""" + + def test_suggest_catches_generator_error(self): + """Create a scenario where plan succeeds but generator fails --> suggest returns failure.""" + from conftest import make_engine + + # Two fact tables joining to same hub, but dim from a source that's unreachable + # via safe (m2o) edges from any measure source --> generator should fail + sources = { + "hub": { + "name": "hub", + "table": "public.hub", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "fact_a": { + "name": "fact_a", + "table": "public.fact_a", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + }, + "fact_b": { + "name": "fact_b", + "table": "public.fact_b", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + }, + "leaf": { + "name": "leaf", + "table": "public.leaf", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "fact_a_id", "type": "number"}, + {"name": "label", "type": "string"}, + ], + "joins": [ + { + "to": "fact_a", + "on": "fact_a_id = fact_a.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(sources) + # This query has two measure sources (chasm trap) and a dimension from 'leaf' + # which is only reachable from fact_a (not from fact_b) via safe edges + # The planner will plan it, but the generator should fail for the leaf dimension + result = engine.suggest( + { + "measures": ["sum(fact_a.val)", "sum(fact_b.val)"], + "dimensions": ["leaf.label"], + } + ) + assert result["success"] is False + assert "error" in result + assert len(result["suggestions"]) > 0 + + def test_suggest_success_includes_generation(self): + """Valid query -- suggest returns success=True after both planning and generation.""" + from conftest import make_engine + + sources = { + "hub": { + "name": "hub", + "table": "public.hub", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "fact_a": { + "name": "fact_a", + "table": "public.fact_a", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(sources) + result = engine.suggest( + { + "measures": ["sum(fact_a.val)"], + "dimensions": ["hub.segment"], + } + ) + assert result["success"] is True + assert result["suggestions"] == [] + + +class TestInvalidDialect: + def test_invalid_dialect_on_engine(self): + with pytest.raises(ValueError, match="Unknown SQL dialect"): + SemanticEngine(SOURCES_DIR, dialect="not_a_real_dialect") + + def test_invalid_dialect_from_sources(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ), + } + with pytest.raises(ValueError, match="Unknown SQL dialect"): + SemanticEngine.from_sources(sources, dialect="foobar") + + +class TestCrossReferenceValidation: + def test_validate_reports_bad_join_target_as_error(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="nonexistent", + on="fk = nonexistent.id", + relationship="many_to_one", + ) + ], + ), + } + # from_sources no longer hard-raises on orphan targets; the validator surfaces it. + engine = SemanticEngine.from_sources(sources) + report = engine.validate() + assert not report.valid + assert any("'nonexistent'" in e and "not defined" in e for e in report.errors) + + def test_from_sources_accepts_valid_join_target(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ), + "customers": SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ), + } + engine = SemanticEngine.from_sources(sources) + assert "customers" in engine.sources + + +class TestUnqualifiedMeasureResolution: + """Bare measure names (e.g. 'revenue') should auto-resolve when unambiguous.""" + + def test_bare_name_resolves_uniquely(self, engine): + result = engine.query( + {"measures": ["revenue"], "dimensions": ["orders.status"]} + ) + assert result.sql + assert "CASE WHEN" in result.sql.upper() # revenue has a filter + sqlglot.parse(result.sql) + + def test_bare_name_with_dimensions(self, engine): + result = engine.query( + {"measures": ["revenue"], "dimensions": ["customers.segment"]} + ) + assert result.sql + sqlglot.parse(result.sql) + + def test_bare_and_qualified_coexist(self, engine): + result = engine.query( + { + "measures": ["revenue", "sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result.sql + sqlglot.parse(result.sql) + + def test_bare_name_ambiguous_raises(self): + from conftest import make_engine + + sources = { + "store_a": { + "name": "store_a", + "table": "public.store_a", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "measures": [{"name": "revenue", "expr": "sum(amount)"}], + }, + "store_b": { + "name": "store_b", + "table": "public.store_b", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "measures": [{"name": "revenue", "expr": "sum(amount)"}], + }, + } + engine = make_engine(sources) + with pytest.raises(ValueError, match="ambiguous"): + engine.query({"measures": ["revenue"], "dimensions": []}) + + def test_bare_name_not_found_raises(self, engine): + with pytest.raises(ValueError, match="does not reference any source"): + engine.query({"measures": ["nonexistent_measure"], "dimensions": []}) + + def test_bare_aggregate_not_resolved(self, engine): + with pytest.raises(ValueError, match="does not reference any source"): + engine.query({"measures": ["sum(amount)"], "dimensions": []}) diff --git a/python/klo-sl/tests/test_generator.py b/python/klo-sl/tests/test_generator.py new file mode 100644 index 00000000..9ef147ea --- /dev/null +++ b/python/klo-sl/tests/test_generator.py @@ -0,0 +1,2302 @@ +from pathlib import Path + +import pytest +import sqlglot + +from semantic_layer.engine import SemanticEngine +from semantic_layer.generator import SqlGenerator +from semantic_layer.graph import JoinGraph +from semantic_layer.models import ( + JoinDeclaration, + MeasureDefinition, + SemanticQuery, + SourceColumn, + SourceDefinition, +) +from semantic_layer.planner import QueryPlanner + + +@pytest.fixture +def planner(ecommerce_sources): + graph = JoinGraph(ecommerce_sources) + graph.build() + return QueryPlanner(ecommerce_sources, graph) + + +@pytest.fixture +def generator(): + return SqlGenerator(dialect="postgres") + + +def generate_sql(planner, generator, query_dict, sources): + query = SemanticQuery(**query_dict) + plan = planner.plan(query) + return generator.generate(plan, sources) + + +def assert_valid_sql(sql: str): + """Assert that the SQL is syntactically valid.""" + try: + sqlglot.parse(sql) + except Exception as e: + pytest.fail(f"Generated SQL is not valid: {e}\n\nSQL:\n{sql}") + + +class TestSimpleSingleSource: + """Test 1: Simple single source.""" + + def test_basic_aggregation(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "SUM(orders.amount)" in sql.upper() or "sum(orders.amount)" in sql + assert "status" in sql.lower() + assert "GROUP BY" in sql.upper() + assert "public.orders" in sql + + +class TestCrossSourceM2O: + """Test 2: Cross-source, all m2o (the LATAM query).""" + + def test_churn_risk_by_region(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["churn_risk.avg_risk"], + "dimensions": ["churn_risk.customer_type", "regions.name"], + "filters": ["regions.name = 'LATAM'", "churn_risk.score > 0.7"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + # Should have CTE for churn_risk (SQL source) + assert "churn_risk AS" in sql + assert "calculate_churn_score" in sql # SQL source content + assert "AVG" in sql.upper() or "avg" in sql + assert "WHERE" in sql.upper() + assert "LATAM" in sql + assert "GROUP BY" in sql.upper() + + +class TestFanOut: + """Test 3: Fan-out (aggregate locality).""" + + def test_orders_by_region_no_fanout(self, planner, generator, ecommerce_sources): + """orders → customers → regions is all m2o. No fan-out needed.""" + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["regions.name"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "SUM" in sql.upper() or "sum" in sql + assert "JOIN" in sql.upper() + # Should NOT have aggregate locality CTEs + assert "_agg" not in sql + + +class TestChasmTrap: + """Test 4: Chasm trap (two o2m from same dimension source).""" + + def test_chasm_trap_generates_locality(self): + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + + sources = {"customers": customers, "orders": orders, "tickets": tickets} + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + generator = SqlGenerator(dialect="postgres") + + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = generator.generate(plan, sources) + + assert_valid_sql(sql) + # Should have pre-aggregation CTEs + assert "orders_agg" in sql + assert "tickets_agg" in sql + assert "FULL JOIN" in sql.upper() or "full join" in sql.lower() + assert "COALESCE" in sql.upper() or "coalesce" in sql.lower() + + +class TestDerivedExpression: + """Test 5: Derived expression.""" + + def test_profit_calculation(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": [ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "sum(orders.cost)", "name": "total_cost"}, + {"expr": "total_rev - total_cost", "name": "profit"}, + ], + "dimensions": ["orders.status"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "total_rev" in sql + assert "total_cost" in sql + assert "profit" in sql + # The derived expression should inline the aggregate expressions + assert "GROUP BY" in sql.upper() + + +class TestAutoHaving: + """Test 6: Auto-HAVING.""" + + def test_having_filter(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["sum(orders.amount) > 10000"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "HAVING" in sql.upper() + assert "10000" in sql + + +class TestTimeGranularity: + """Test 7: Time granularity.""" + + def test_month_truncation(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": [{"field": "orders.created_at", "granularity": "month"}], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "DATE_TRUNC" in sql.upper() + assert "month" in sql.lower() + + +class TestPreDefinedMeasureWithFilter: + """Test 8: Pre-defined measure with filter.""" + + def test_revenue_filter(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + # Revenue has filter: status != 'refunded' + # Should generate: SUM(CASE WHEN status != 'refunded' THEN amount END) + upper = sql.upper() + assert "CASE WHEN" in upper or "case when" in sql + assert "REFUNDED" in upper or "refunded" in sql + assert "SUM" in upper + + +class TestDialectValidation: + def test_invalid_dialect_raises(self): + with pytest.raises(ValueError, match="Unknown SQL dialect"): + SqlGenerator(dialect="not_real") + + def test_valid_dialect_postgres(self): + gen = SqlGenerator(dialect="postgres") + assert gen.dialect == "postgres" + + def test_valid_dialect_bigquery(self): + gen = SqlGenerator(dialect="bigquery") + assert gen.dialect == "bigquery" + + def test_valid_dialect_snowflake(self): + gen = SqlGenerator(dialect="snowflake") + assert gen.dialect == "snowflake" + + +class TestDialectTranspilation: + """Test 9: Dialect transpilation.""" + + def test_bigquery(self, planner, ecommerce_sources): + gen = SqlGenerator(dialect="bigquery") + sql = generate_sql( + planner, + gen, + { + "measures": ["sum(orders.amount)"], + "dimensions": [{"field": "orders.created_at", "granularity": "month"}], + }, + ecommerce_sources, + ) + + assert sql + # BigQuery: col is a timestamp, so sqlglot emits TIMESTAMP_TRUNC(col, MONTH). + # Either form is valid BQ; both must have MONTH as an unquoted part. + assert "DATE_TRUNC(" in sql or "TIMESTAMP_TRUNC(" in sql + assert ", MONTH)" in sql + assert "DATE_TRUNC('month'" not in sql + assert "TIMESTAMP_TRUNC('month'" not in sql + + def test_snowflake(self, planner, ecommerce_sources): + gen = SqlGenerator(dialect="snowflake") + sql = generate_sql( + planner, + gen, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + }, + ecommerce_sources, + ) + + assert sql + + +class TestSqlSourceAsCte: + """Test 10: SQL source as CTE.""" + + def test_churn_risk_cte(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["churn_risk.avg_risk"], + "dimensions": ["churn_risk.customer_type"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + # churn_risk is a SQL source, should appear as CTE + assert "WITH" in sql.upper() + assert "churn_risk AS" in sql + assert "customer_type" in sql + assert "AVG" in sql.upper() or "avg" in sql + + +class TestLimitClause: + """Test: LIMIT appears in generated SQL.""" + + def test_limit_in_sql(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "limit": 50, + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "LIMIT 50" in sql.upper() + + def test_default_limit(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "LIMIT 1000" in sql.upper() + + +class TestOrderByClause: + """Test: ORDER BY appears in generated SQL.""" + + def test_order_by_dimensions(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "ORDER BY" in sql.upper() + + +class TestMultipleWhereFilters: + """Test: Multiple WHERE filters combined with AND.""" + + def test_two_where_filters(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["orders.status = 'completed'", "orders.amount > 100"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "WHERE" in sql.upper() + assert "completed" in sql + assert "100" in sql + assert "AND" in sql.upper() + + +class TestCombinedWhereHaving: + """Test: Both WHERE and HAVING in same query.""" + + def test_where_and_having(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": [ + "orders.status = 'completed'", + "sum(orders.amount) > 10000", + ], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "WHERE" in sql.upper() + assert "HAVING" in sql.upper() + assert "completed" in sql + assert "10000" in sql + + +class TestMultiplePreDefinedMeasures: + """Test: Multiple pre-defined measures from same source.""" + + def test_revenue_and_order_count(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": ["orders.revenue", "orders.order_count"], + "dimensions": ["orders.status"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "revenue" in sql.lower() + assert "order_count" in sql.lower() + # Revenue should have CASE WHEN (filtered measure) + assert "CASE WHEN" in sql.upper() + + +class TestRuntimeAggregationCrossSource: + """Test: Runtime aggregation across joined sources.""" + + def test_runtime_agg_by_region(self, planner, generator, ecommerce_sources): + sql = generate_sql( + planner, + generator, + { + "measures": [{"expr": "count(orders.id)", "name": "order_count"}], + "dimensions": ["regions.name"], + }, + ecommerce_sources, + ) + + assert_valid_sql(sql) + assert "COUNT" in sql.upper() + assert "regions" in sql.lower() + assert "JOIN" in sql.upper() + + +class TestChasmTrapWithDerived: + """Test: Chasm trap with derived measures referencing different CTEs.""" + + def test_derived_across_ctes(self): + customers = SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="public.tickets", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="cost", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = {"customers": customers, "orders": orders, "tickets": tickets} + + from semantic_layer.graph import JoinGraph + + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + gen = SqlGenerator(dialect="postgres") + + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "sum(tickets.cost)", "name": "total_cost"}, + {"expr": "total_rev - total_cost", "name": "profit"}, + ], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + sql = gen.generate(plan, sources) + + assert_valid_sql(sql) + assert "orders_agg" in sql + assert "tickets_agg" in sql + assert "profit" in sql + + +SOURCES_DIR = str(Path(__file__).parent.parent / "sources" / "ecommerce") + + +# ── From test_edge_cases.py: generator edge cases ─────────────────── + + +class TestGeneratorEdgeCases: + def test_no_dimensions_no_group_by(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query({"measures": ["sum(orders.amount)"]}) + assert "GROUP BY" not in result.sql + assert_valid_sql(result.sql) + + def test_multiple_time_dimensions(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": [ + {"field": "orders.created_at", "granularity": "month"}, + {"field": "customers.created_at", "granularity": "year"}, + ], + } + ) + sql = result.sql + assert "DATE_TRUNC('month'" in sql + assert "DATE_TRUNC('year'" in sql + assert_valid_sql(sql) + + def test_limit_zero(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "limit": 0, + } + ) + assert_valid_sql(result.sql) + + def test_very_large_limit(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "limit": 999999, + } + ) + assert "LIMIT 999999" in result.sql + assert_valid_sql(result.sql) + + def test_chasm_trap_no_dimensions(self): + from conftest import make_engine + + customers = { + "name": "customers", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + } + orders = { + "name": "orders", + "table": "t2", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + tickets = { + "name": "tickets", + "table": "t3", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + engine = make_engine( + {"customers": customers, "orders": orders, "tickets": tickets} + ) + result = engine.query({"measures": ["sum(orders.amount)", "count(tickets.id)"]}) + assert_valid_sql(result.sql) + + def test_sql_source_with_chasm_trap(self): + from conftest import make_engine + + customers = { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + } + orders = { + "name": "orders", + "sql": "SELECT id, customer_id, amount FROM raw_orders WHERE amount > 0", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + tickets = { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + engine = make_engine( + {"customers": customers, "orders": orders, "tickets": tickets} + ) + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + } + ) + assert "orders AS" in result.sql + assert "orders_agg" in result.sql + assert_valid_sql(result.sql) + + def test_dialect_duckdb(self): + engine = SemanticEngine(SOURCES_DIR, dialect="duckdb") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result.dialect == "duckdb" + assert result.sql + + def test_dialect_mysql(self): + engine = SemanticEngine(SOURCES_DIR, dialect="mysql") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert result.dialect == "mysql" + assert result.sql + + def test_pre_defined_measure_cross_source_join(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["regions.name"], + } + ) + assert "CASE WHEN" in result.sql + assert "regions" in result.sql.lower() + assert_valid_sql(result.sql) + + +# ── From test_edge_cases.py: duplicate aliases, granularity, ORDER BY + + +class TestDuplicateColumnAliases: + def test_same_column_name_different_sources(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.created_at", "customers.created_at"], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "orders_created_at" in sql + assert "customers_created_at" in sql + + def test_same_column_name_one_with_granularity(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": [ + {"field": "orders.created_at", "granularity": "month"}, + "customers.created_at", + ], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "orders_created_at_month" in sql + assert "customers_created_at" in sql + + +class TestEmptyGranularity: + def test_empty_granularity_treated_as_no_granularity(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": [{"field": "orders.created_at", "granularity": ""}], + } + ) + sql = result.sql + assert "DATE_TRUNC" not in sql + assert "orders.created_at" in sql + assert_valid_sql(sql) + + +class TestOrderBySupported: + def test_order_by_desc(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [{"field": "sum(orders.amount)", "direction": "desc"}], + } + ) + sql = result.sql + assert "ORDER BY" in sql + assert "DESC" in sql.upper() + assert_valid_sql(sql) + + def test_order_by_multiple_fields(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [ + {"field": "orders.status", "direction": "asc"}, + {"field": "sum(orders.amount)", "direction": "desc"}, + ], + } + ) + sql = result.sql + assert "ORDER BY" in sql + assert "DESC" in sql.upper() + assert_valid_sql(sql) + + def test_default_order_by_when_not_specified(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert "ORDER BY 1" in result.sql + + +class TestMeasureNameCollision: + def test_two_measures_same_auto_name(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)", "sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "sum_orders_amount" in sql.lower() or "sum(orders.amount)" in sql.lower() + + def test_runtime_name_matches_predefined(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": [ + "orders.revenue", + {"expr": "sum(orders.cost)", "name": "revenue"}, + ], + "dimensions": ["orders.status"], + } + ) + sql = result.sql + assert_valid_sql(sql) + + +class TestChainedJoins: + def test_four_hop_join(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(order_items.quantity)"], + "dimensions": ["regions.name"], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "order_items" in sql.lower() + assert "orders" in sql.lower() + assert "customers" in sql.lower() + assert "regions" in sql.lower() + + def test_measure_from_leaf_dim_from_root(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(order_items.price)"], + "dimensions": ["products.category"], + } + ) + assert_valid_sql(result.sql) + assert "products" in result.sql.lower() + + +# ── From test_edge_cases.py: locality CTE filters, join types ──────── + + +class TestWhereFilterInLocalityCTE: + def test_where_filter_in_both_ctes(self): + from conftest import make_engine + + customers = { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + } + orders = { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + tickets = { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + } + engine = make_engine( + {"customers": customers, "orders": orders, "tickets": tickets} + ) + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + "filters": ["customers.segment = 'Enterprise'"], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "Enterprise" in sql + assert sql.count("Enterprise") >= 2 + + +class TestThreeCteFullJoinCoalesce: + def test_three_cte_join_uses_coalesce(self): + sources = { + "hub": SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ), + "fact_a": SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ), + "fact_b": SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ), + "fact_c": SourceDefinition( + name="fact_c", + table="public.fact_c", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + JoinDeclaration( + to="hub", on="hub_id = hub.id", relationship="many_to_one" + ) + ], + ), + } + engine = SemanticEngine.from_sources(sources) + result = engine.query( + { + "measures": ["sum(fact_a.val)", "sum(fact_b.val)", "sum(fact_c.val)"], + "dimensions": ["hub.segment"], + } + ) + sql_upper = result.sql.upper() + assert "COALESCE(" in result.sql + assert sql_upper.count("FULL JOIN") == 2 + + def test_two_cte_join_no_coalesce_needed(self): + engine = SemanticEngine(SOURCES_DIR, dialect="postgres") + result = engine.query( + { + "measures": ["sum(orders.amount)", "avg(churn_risk.score)"], + "dimensions": ["customers.segment"], + } + ) + sql = result.sql + lines = [ + line.strip() for line in sql.split("\n") if "FULL JOIN" in line.upper() + ] + for line in lines: + assert "COALESCE" not in line + + +# ── From test_bug_fixes.py ─────────────────────────────────────────── + + +BUG_FIX_SOURCES = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "cost", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "customer_id", "type": "number"}, + {"name": "created_at", "type": "time", "role": "time"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + }, + ], + "measures": [ + { + "name": "revenue", + "expr": "sum(amount)", + "filter": "status != 'refunded'", + }, + {"name": "order_count", "expr": "count(id)"}, + ], + }, + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + {"name": "segment", "type": "string"}, + ], + }, +} + + +class TestPercentileAlias: + def test_percentile_alias_has_no_comma(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + result = engine.query( + { + "measures": ["percentile(orders.amount, 0.9)"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + assert "," not in result.resolved_plan.measures[0].name + assert "percentile_orders_amount_0_9" == result.resolved_plan.measures[0].name + + def test_median_alias_clean(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + result = engine.query( + { + "measures": ["median(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + assert "," not in result.resolved_plan.measures[0].name + + +class TestCountDistinct: + def test_count_distinct_translated(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + result = engine.query( + { + "measures": ["count_distinct(orders.customer_id)"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + assert "COUNT(DISTINCT" in result.sql.upper() + assert "count_distinct(" not in result.sql.lower() + + def test_count_distinct_in_chasm_cte(self): + from conftest import make_engine + + sources = { + "hub": { + "name": "hub", + "table": "public.hub", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "fact_a": { + "name": "fact_a", + "table": "public.fact_a", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "val", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + }, + "fact_b": { + "name": "fact_b", + "table": "public.fact_b", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "user_id", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["sum(fact_a.val)", "count_distinct(fact_b.user_id)"], + "dimensions": ["hub.segment"], + } + ) + assert_valid_sql(result.sql) + assert "COUNT(DISTINCT" in result.sql.upper() + + +class TestColumnValidation: + def test_nonexistent_column_in_measure(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + with pytest.raises(ValueError, match="does not exist in source"): + engine.query( + { + "measures": ["sum(orders.nonexistent_column)"], + "dimensions": ["orders.status"], + } + ) + + def test_nonexistent_column_in_dimension(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + with pytest.raises(ValueError, match="does not exist in source"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.nonexistent_dim"], + } + ) + + def test_nonexistent_column_in_filter(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + with pytest.raises(ValueError, match="does not exist in source"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["orders.nonexistent_col = 'x'"], + } + ) + + def test_valid_columns_pass(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["orders.status = 'completed'"], + } + ) + assert_valid_sql(result.sql) + + def test_error_lists_available_columns(self): + from conftest import make_engine + + engine = make_engine(BUG_FIX_SOURCES) + with pytest.raises(ValueError, match="Available:.*amount"): + engine.query( + { + "measures": ["sum(orders.bogus)"], + "dimensions": ["orders.status"], + } + ) + + +class TestCrossSourceMeasureFilter: + def test_measure_filter_adds_join(self): + from conftest import make_engine + + cross_sources = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + }, + ], + "measures": [ + { + "name": "vip_revenue", + "expr": "sum(amount)", + "filter": "customers.segment = 'VIP'", + }, + ], + }, + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + } + engine = make_engine(cross_sources) + result = engine.query( + { + "measures": ["orders.vip_revenue"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + assert "customers" in result.resolved_plan.sources_used + assert "JOIN" in result.sql.upper() + assert "customers" in result.sql.lower() + + def test_measure_filter_produces_case_when(self): + from conftest import make_engine + + cross_sources = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + }, + ], + "measures": [ + { + "name": "vip_revenue", + "expr": "sum(amount)", + "filter": "customers.segment = 'VIP'", + }, + ], + }, + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + } + engine = make_engine(cross_sources) + result = engine.query( + { + "measures": ["orders.vip_revenue"], + "dimensions": ["orders.status"], + } + ) + sql_upper = result.sql.upper() + assert "CASE WHEN" in sql_upper + assert "VIP" in result.sql + + +# ── From test_brainstorm_cases.py ──────────────────────────────────── + + +class TestPredefinedMeasureWithFilterWrapping: + def test_non_aggregate_predefined_formula_with_filter_wraps_entire_expr(self): + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + measures=[ + MeasureDefinition( + name="completed_amount_twice", + expr="amount * 2", + filter="status = 'completed'", + ) + ], + ) + sources = {"orders": orders} + graph = JoinGraph(sources) + graph.build() + planner_local = QueryPlanner(sources, graph) + gen = SqlGenerator(dialect="postgres") + + plan = planner_local.plan( + SemanticQuery(measures=["orders.completed_amount_twice"]) + ) + sql = gen.generate(plan, sources) + + assert "CASE WHEN orders.status = 'completed' THEN orders.amount * 2 END" in sql + sqlglot.parse(sql) + + +# ── From test_spec_gaps.py ─────────────────────────────────────────── + + +class TestIncludeEmpty: + def test_include_empty_true_uses_left_join(self): + from conftest import make_engine + + sources = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": True, + } + ) + assert "LEFT JOIN" in result.sql.upper() + assert_valid_sql(result.sql) + + def test_include_empty_false_uses_inner_join(self): + from conftest import make_engine + + sources = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": False, + } + ) + sql_upper = result.sql.upper() + assert "LEFT JOIN" not in sql_upper + assert "JOIN" in sql_upper + assert_valid_sql(result.sql) + + def test_include_empty_true_different_from_false(self): + from conftest import make_engine + + sources = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(sources) + result_true = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": True, + } + ) + result_false = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": False, + } + ) + assert result_true.sql != result_false.sql + + def test_include_empty_in_resolved_plan(self): + from conftest import make_engine + + sources = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(sources) + plan = engine.plan_only( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": True, + } + ) + assert plan.include_empty is True + + def test_include_empty_locality_uses_full_join(self): + from conftest import make_engine + + chasm = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + "tickets": { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(chasm) + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + "include_empty": True, + } + ) + assert "FULL JOIN" in result.sql.upper() + assert_valid_sql(result.sql) + + def test_include_empty_false_locality_uses_inner_join_between_ctes(self): + from conftest import make_engine + + chasm = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + "tickets": { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(chasm) + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + "include_empty": False, + } + ) + sql_upper = result.sql.upper() + assert "FULL JOIN" not in sql_upper + assert_valid_sql(result.sql) + + +class TestFilterPushDown: + def test_where_filter_only_pushed_to_relevant_cte(self): + from conftest import make_engine + + chasm = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + "tickets": { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(chasm) + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + "filters": ["orders.amount > 100"], + } + ) + sql = result.sql + assert_valid_sql(sql) + + tickets_start = sql.find("tickets_agg AS (") + assert tickets_start >= 0 + depth = 0 + tickets_body_start = tickets_start + len("tickets_agg AS (") + tickets_end = tickets_body_start + for i, c in enumerate(sql[tickets_body_start:], tickets_body_start): + if c == "(": + depth += 1 + elif c == ")": + if depth == 0: + tickets_end = i + break + depth -= 1 + tickets_body = sql[tickets_body_start:tickets_end] + assert "orders.amount" not in tickets_body + + orders_start = sql.find("orders_agg AS (") + assert orders_start >= 0 + depth = 0 + orders_body_start = orders_start + len("orders_agg AS (") + orders_end = orders_body_start + for i, c in enumerate(sql[orders_body_start:], orders_body_start): + if c == "(": + depth += 1 + elif c == ")": + if depth == 0: + orders_end = i + break + depth -= 1 + orders_body = sql[orders_body_start:orders_end] + assert "orders.amount > 100" in orders_body + + def test_filter_on_shared_dimension_pushed_to_all_ctes(self): + from conftest import make_engine + + chasm = { + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + "tickets": { + "name": "tickets", + "table": "public.tickets", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "customer_id", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + }, + } + engine = make_engine(chasm) + result = engine.query( + { + "measures": ["sum(orders.amount)", "count(tickets.id)"], + "dimensions": ["customers.segment"], + "filters": ["customers.segment = 'Enterprise'"], + } + ) + sql = result.sql + assert_valid_sql(sql) + count = sql.count("customers.segment = 'Enterprise'") + assert count >= 2 + + +class TestJoinAliases: + def test_alias_used_in_sql_generation(self): + from conftest import make_engine + + sources = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "billing_customer_id", "type": "number"}, + {"name": "shipping_customer_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "customers", + "on": "billing_customer_id = customers.id", + "relationship": "many_to_one", + "alias": "billing_customer", + }, + { + "to": "customers", + "on": "shipping_customer_id = customers.id", + "relationship": "many_to_one", + "alias": "shipping_customer", + }, + ], + }, + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["billing_customer.name"], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "billing_customer" in sql + + def test_self_referencing_alias(self): + from conftest import make_engine + + sources = { + "employees": { + "name": "employees", + "table": "public.employees", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + {"name": "manager_id", "type": "number"}, + ], + "joins": [ + { + "to": "employees", + "on": "manager_id = employees.id", + "relationship": "many_to_one", + "alias": "manager", + }, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["count(employees.id)"], + "dimensions": ["manager.name"], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "manager" in sql + + +class TestMedianPercentile: + def test_median_generates_percentile_cont(self): + from conftest import make_engine + + sources = { + "scores": { + "name": "scores", + "table": "public.scores", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "value", "type": "number"}, + {"name": "category", "type": "string"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": [{"expr": "median(scores.value)", "name": "med_val"}], + "dimensions": ["scores.category"], + } + ) + sql_upper = result.sql.upper() + assert "PERCENTILE_CONT" in sql_upper + assert "0.5" in result.sql + assert_valid_sql(result.sql) + + def test_percentile_generates_percentile_cont(self): + from conftest import make_engine + + sources = { + "scores": { + "name": "scores", + "table": "public.scores", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "value", "type": "number"}, + {"name": "category", "type": "string"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": [{"expr": "percentile(scores.value, 0.9)", "name": "p90"}], + "dimensions": ["scores.category"], + } + ) + sql_upper = result.sql.upper() + assert "PERCENTILE_CONT" in sql_upper + assert "0.9" in result.sql + assert_valid_sql(result.sql) + + +class TestSqlSourceWithClause: + """BUG 3: SQL sources with inner WITH clauses should not produce nested WITH.""" + + def test_sql_source_with_inner_with_clause(self): + """SQL source containing WITH base AS (...) SELECT ... should not produce nested WITH.""" + from conftest import make_engine + + sources = { + "enriched_orders": { + "name": "enriched_orders", + "sql": "WITH base AS (SELECT id, amount FROM raw_orders WHERE amount > 0) SELECT id, amount FROM base", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["sum(enriched_orders.amount)"], + "dimensions": [], + } + ) + sql = result.sql + assert_valid_sql(sql) + # Should NOT have nested WITH (WITH ... WITH ...) + # The inner WITH should be promoted/flattened + upper_sql = sql.upper() + # Only one WITH keyword should appear at the top level + with_count = upper_sql.count("WITH ") + assert with_count == 1, f"Expected 1 WITH, got {with_count}. SQL:\n{sql}" + # The inner CTE name should be promoted with prefix + assert "enriched_orders__base" in sql + + def test_sql_source_without_with_unchanged(self): + """Regular SQL source (no inner WITH) should work as before.""" + from conftest import make_engine + + sources = { + "simple_view": { + "name": "simple_view", + "sql": "SELECT id, amount FROM raw_orders WHERE amount > 0", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["sum(simple_view.amount)"], + "dimensions": [], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "simple_view AS" in sql + assert "raw_orders" in sql + + def test_sql_source_inner_cte_unaliased_references(self): + """Inner CTEs referenced without explicit aliases should get AS old_name after promotion.""" + from conftest import make_engine + + sources = { + "analysis": { + "name": "analysis", + "sql": ( + "WITH q AS (SELECT id, amount, status FROM raw_data), " + "filtered AS (SELECT q.id, q.amount FROM q WHERE q.status = 'active') " + "SELECT filtered.id, filtered.amount FROM filtered" + ), + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["sum(analysis.amount)"], + "dimensions": [], + } + ) + sql = result.sql + assert_valid_sql(sql) + assert "analysis__q" in sql + assert "analysis__filtered" in sql + # Unaliased CTE refs should get AS old_name so column qualifiers work + assert "analysis__q AS q" in sql or 'analysis__q AS "q"' in sql + assert ( + "analysis__filtered AS filtered" in sql + or 'analysis__filtered AS "filtered"' in sql + ) + + +class TestSqliteTimeGranularity: + """SQLite uses strftime/date arithmetic instead of DATE_TRUNC.""" + + @pytest.fixture + def sqlite_generator(self): + return SqlGenerator(dialect="sqlite") + + @pytest.mark.parametrize( + "granularity,expected_fragment", + [ + ("year", "STRFTIME('%Y-01-01'"), + ("month", "STRFTIME('%Y-%m-01'"), + ("day", "STRFTIME('%Y-%m-%d'"), + ("hour", "STRFTIME('%Y-%m-%d %H:00:00'"), + ("quarter", "PRINTF('%02d'"), + ("week", "weekday 1"), + ], + ) + def test_granularity( + self, + ecommerce_sources, + sqlite_generator, + granularity, + expected_fragment, + ): + graph = JoinGraph(ecommerce_sources) + graph.build() + planner = QueryPlanner(ecommerce_sources, graph) + plan = planner.plan( + SemanticQuery( + measures=["count(orders.id)"], + dimensions=[{"field": "orders.created_at", "granularity": granularity}], + ) + ) + sql = sqlite_generator.generate(plan, ecommerce_sources) + assert expected_fragment in sql, f"Expected '{expected_fragment}' in:\n{sql}" + assert "DATE_TRUNC" not in sql + + def test_postgres_unchanged(self, ecommerce_sources): + """Postgres still generates DATE_TRUNC as before.""" + gen = SqlGenerator(dialect="postgres") + graph = JoinGraph(ecommerce_sources) + graph.build() + planner = QueryPlanner(ecommerce_sources, graph) + plan = planner.plan( + SemanticQuery( + measures=["count(orders.id)"], + dimensions=[{"field": "orders.created_at", "granularity": "month"}], + ) + ) + sql = gen.generate(plan, ecommerce_sources) + assert "DATE_TRUNC" in sql + + +class TestTranspileWithNativeCtes: + def test_bigquery_hyphenated_project_ref_survives_in_sql_source( + self, make_engine_factory + ): + """sql: source body with BigQuery-specific hyphenated project ref must survive verbatim.""" + # A project ref like `my-project.dataset.table` is not valid postgres, + # so feeding it to a postgres parser at transpile time would fail. + source = { + "name": "raw_events", + "sql": "SELECT id, user_id FROM `my-project.analytics.events`", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "user_id", "type": "number"}, + ], + "measures": [{"name": "event_count", "expr": "count(*)"}], + } + engine = make_engine_factory({"raw_events": source}, dialect="bigquery") + result = engine.query( + {"measures": ["raw_events.event_count"], "dimensions": [], "filters": []} + ) + assert "my-project.analytics.events" in result.sql, ( + f"Hyphenated BigQuery project ref was rewritten:\n{result.sql}" + ) + + def test_postgres_only_idiom_in_outer_gets_translated(self, make_engine_factory): + """Postgres-only idioms in the engine scaffold are translated to target.""" + source = { + "name": "events", + "table": "events", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "created_at", "type": "time"}, + ], + "measures": [{"name": "total", "expr": "sum(amount)"}], + } + engine = make_engine_factory({"events": source}, dialect="bigquery") + result = engine.query( + { + "measures": ["events.total"], + "dimensions": [{"field": "events.created_at", "granularity": "month"}], + "filters": [], + } + ) + # BigQuery's DATE_TRUNC (or TIMESTAMP_TRUNC) uses (col, MONTH) order. + sql = result.sql.upper() + assert "DATE_TRUNC(" in sql or "TIMESTAMP_TRUNC(" in sql + assert ", MONTH)" in sql + + +class TestNativeDialectExprPreservation: + """User-authored expr: in native dialect must survive composition intact. + + Regression coverage for the fct_orders.daily_active_orders 400 bug: + BigQuery segments authored with `INTERVAL 30 DAY` were being parsed as + postgres, silently dropping the `DAY` unit. + """ + + def test_bigquery_segment_with_interval_day_preserves_unit( + self, make_bq_fct_orders_engine + ): + """Production repro: segment with `interval 30 day` must emit `INTERVAL 30 DAY`.""" + engine = make_bq_fct_orders_engine + result = engine.query( + { + "measures": ["fct_orders.daily_active_orders"], + "dimensions": [], + "filters": [], + } + ) + sql = result.sql + assert "INTERVAL '30'" not in sql or "INTERVAL '30' DAY" in sql, ( + f"BigQuery INTERVAL unit was dropped.\nSQL:\n{sql}" + ) + # More specific: the DAY unit must be present somewhere + assert "DAY" in sql.upper(), f"DAY unit missing from emitted SQL:\n{sql}" + + def test_bigquery_measure_filter_with_timestamp_sub(self, make_engine_factory): + """Measure filter using TIMESTAMP_SUB(INTERVAL 7 DAY) must preserve unit.""" + source = { + "name": "events", + "table": "events", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "user_id", "type": "number"}, + {"name": "ts", "type": "time"}, + ], + "measures": [ + { + "name": "recent_users", + "expr": "count(distinct user_id)", + "filter": "ts >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)", + } + ], + } + engine = make_engine_factory({"events": source}, dialect="bigquery") + result = engine.query( + {"measures": ["events.recent_users"], "dimensions": [], "filters": []} + ) + sql = result.sql.upper() + assert "TIMESTAMP_SUB" in sql + assert "INTERVAL 7 DAY" in sql or "INTERVAL '7' DAY" in sql + + def test_snowflake_segment_with_dateadd(self, make_engine_factory): + """Snowflake DATEADD(day, -30, CURRENT_TIMESTAMP()) must survive.""" + source = { + "name": "orders", + "table": "orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "placed_at", "type": "time"}, + ], + "segments": [ + { + "name": "recent", + "expr": "placed_at >= DATEADD(day, -30, CURRENT_TIMESTAMP())", + } + ], + "measures": [{"name": "cnt", "expr": "count(*)", "segments": ["recent"]}], + } + engine = make_engine_factory({"orders": source}, dialect="snowflake") + result = engine.query( + {"measures": ["orders.cnt"], "dimensions": [], "filters": []} + ) + assert "DATEADD" in result.sql.upper() + + def test_postgres_interval_baseline_still_works(self, make_engine_factory): + """Baseline: postgres INTERVAL '30 days' round-trips correctly.""" + source = { + "name": "orders", + "table": "orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "placed_at", "type": "time"}, + ], + "segments": [ + { + "name": "recent", + "expr": "placed_at >= current_date - interval '30 days'", + } + ], + "measures": [{"name": "cnt", "expr": "count(*)", "segments": ["recent"]}], + } + engine = make_engine_factory({"orders": source}, dialect="postgres") + result = engine.query( + {"measures": ["orders.cnt"], "dimensions": [], "filters": []} + ) + sql_upper = result.sql.upper() + assert "INTERVAL" in sql_upper + assert "30 DAYS" in sql_upper or "30' DAY" in sql_upper diff --git a/python/klo-sl/tests/test_graph.py b/python/klo-sl/tests/test_graph.py new file mode 100644 index 00000000..002a1f47 --- /dev/null +++ b/python/klo-sl/tests/test_graph.py @@ -0,0 +1,731 @@ +import pytest + +from semantic_layer.graph import JoinGraph +from semantic_layer.models import SourceDefinition, SourceColumn, JoinDeclaration + + +@pytest.fixture +def graph(ecommerce_sources): + g = JoinGraph(ecommerce_sources) + g.build() + return g + + +class TestJoinGraphBuild: + def test_all_sources_in_adjacency(self, graph, ecommerce_sources): + assert set(graph.adjacency.keys()) == set(ecommerce_sources.keys()) + + def test_bidirectional_edges(self, graph): + # orders declares join to customers → both directions exist + orders_edges = graph.adjacency["orders"] + assert any(e.to_source == "customers" for e in orders_edges) + + customers_edges = graph.adjacency["customers"] + assert any(e.to_source == "orders" for e in customers_edges) + + def test_relationship_inversion(self, graph): + # orders → customers is many_to_one + fwd = next(e for e in graph.adjacency["orders"] if e.to_source == "customers") + assert fwd.relationship == "many_to_one" + + # customers → orders is one_to_many (reverse) + rev = next(e for e in graph.adjacency["customers"] if e.to_source == "orders") + assert rev.relationship == "one_to_many" + + def test_on_parsing(self, graph): + fwd = next(e for e in graph.adjacency["orders"] if e.to_source == "customers") + assert fwd.from_column == "customer_id" + assert fwd.to_column == "id" + + +class TestFindPath: + def test_direct_join(self, graph): + path = graph.find_path("orders", "customers") + assert path is not None + assert len(path.edges) == 1 + assert path.edges[0].from_source == "orders" + assert path.edges[0].to_source == "customers" + assert not path.has_one_to_many + + def test_two_hop_m2o(self, graph): + # orders → customers → regions (all m2o) + path = graph.find_path("orders", "regions") + assert path is not None + assert len(path.edges) == 2 + assert path.source_names == ["orders", "customers", "regions"] + assert not path.has_one_to_many + + def test_reverse_path_flagged(self, graph): + # regions → customers (o2m) → orders (o2m) + path = graph.find_path("regions", "orders") + assert path is not None + assert len(path.edges) == 2 + assert path.has_one_to_many + + def test_through_bridge(self, graph): + # orders → order_items is reverse (o2m), order_items → products is m2o + # But shortest may be: orders ← order_items → products + path = graph.find_path("orders", "products") + assert path is not None + assert "order_items" in path.source_names + + def test_churn_risk_to_regions(self, graph): + path = graph.find_path("churn_risk", "regions") + assert path is not None + assert "customers" in path.source_names + + def test_same_source(self, graph): + path = graph.find_path("orders", "orders") + assert path is not None + assert len(path.edges) == 0 + assert not path.has_one_to_many + + def test_source_names_property(self, graph): + path = graph.find_path("orders", "regions") + assert path.source_names == ["orders", "customers", "regions"] + + def test_empty_path_source_names(self, graph): + path = graph.find_path("orders", "orders") + assert path.source_names == [] + + +class TestResolveJoinTree: + def test_single_source(self, graph): + tree = graph.resolve_join_tree({"orders"}) + assert tree.sources == {"orders"} + assert tree.edges == [] + + def test_two_sources(self, graph): + tree = graph.resolve_join_tree({"orders", "customers"}) + assert "orders" in tree.sources + assert "customers" in tree.sources + assert len(tree.edges) >= 1 + + def test_three_sources_via_customers(self, graph): + tree = graph.resolve_join_tree({"churn_risk", "regions", "orders"}) + assert "customers" in tree.sources # intermediate node added + assert len(tree.sources) >= 4 + + def test_disconnected_raises(self): + from semantic_layer.models import SourceDefinition, SourceColumn + + src_a = SourceDefinition( + name="a", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + src_b = SourceDefinition( + name="b", + table="t2", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + g = JoinGraph({"a": src_a, "b": src_b}) + g.build() + with pytest.raises(ValueError, match="No join path"): + g.resolve_join_tree({"a", "b"}) + + +class TestOneToOneRelationship: + def test_one_to_one_no_fan_out(self): + """one_to_one joins should not flag has_one_to_many.""" + from semantic_layer.models import ( + SourceDefinition, + SourceColumn, + JoinDeclaration, + ) + + users = SourceDefinition( + name="users", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + profiles = SourceDefinition( + name="profiles", + table="t2", + grain=["user_id"], + columns=[SourceColumn(name="user_id", type="number")], + joins=[ + JoinDeclaration( + to="users", on="user_id = users.id", relationship="one_to_one" + ) + ], + ) + g = JoinGraph({"users": users, "profiles": profiles}) + g.build() + + path = g.find_path("profiles", "users") + assert path is not None + assert not path.has_one_to_many + + # Reverse should also be one_to_one + rev_path = g.find_path("users", "profiles") + assert rev_path is not None + assert not rev_path.has_one_to_many + + def test_one_to_one_inverse(self): + """one_to_one inverted should stay one_to_one.""" + from semantic_layer.models import ( + SourceDefinition, + SourceColumn, + JoinDeclaration, + ) + + a = SourceDefinition( + name="a", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + b = SourceDefinition( + name="b", + table="t2", + grain=["a_id"], + columns=[SourceColumn(name="a_id", type="number")], + joins=[ + JoinDeclaration(to="a", on="a_id = a.id", relationship="one_to_one") + ], + ) + g = JoinGraph({"a": a, "b": b}) + g.build() + + fwd = next(e for e in g.adjacency["b"] if e.to_source == "a") + assert fwd.relationship == "one_to_one" + rev = next(e for e in g.adjacency["a"] if e.to_source == "b") + assert rev.relationship == "one_to_one" + + +class TestMultipleJoinsFromSource: + def test_order_items_two_joins(self, graph): + """order_items has joins to both orders and products.""" + oi_edges = graph.adjacency["order_items"] + targets = {e.to_source for e in oi_edges} + assert "orders" in targets + assert "products" in targets + + def test_path_through_bridge(self, graph): + """Can find path from orders to products through order_items.""" + path = graph.find_path("orders", "products") + assert path is not None + assert "order_items" in path.source_names + + +class TestResolveJoinTreeRoot: + def test_root_is_respected(self, graph): + """When root is specified, it should be the anchor of the tree.""" + tree = graph.resolve_join_tree({"orders", "regions"}, root="orders") + assert "orders" in tree.sources + assert "regions" in tree.sources + assert "customers" in tree.sources # intermediate + + def test_root_not_in_sources_uses_default(self, graph): + """When root is not in source_names, falls back to sorted order.""" + tree = graph.resolve_join_tree({"orders", "customers"}, root="nonexistent") + assert "orders" in tree.sources + assert "customers" in tree.sources + + +class TestFindComponents: + def test_connected_graph(self, graph): + components = graph.find_components() + assert len(components) == 1 + assert components[0] == set(graph.adjacency.keys()) + + def test_disconnected_graph(self): + from semantic_layer.models import SourceDefinition, SourceColumn + + src_a = SourceDefinition( + name="a", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + src_b = SourceDefinition( + name="b", + table="t2", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + g = JoinGraph({"a": src_a, "b": src_b}) + g.build() + components = g.find_components() + assert len(components) == 2 + assert {frozenset(c) for c in components} == { + frozenset({"a"}), + frozenset({"b"}), + } + + +# ── From test_edge_cases.py ────────────────────────────────────────── + + +class TestGraphEdgeCases: + def test_self_referencing_join(self): + emp_with_join = SourceDefinition( + name="employees", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="manager_id", type="number"), + SourceColumn(name="salary", type="number"), + ], + joins=[ + JoinDeclaration( + to="employees", + on="manager_id = employees.id", + relationship="many_to_one", + ) + ], + ) + sources = {"employees": emp_with_join} + graph = JoinGraph(sources) + graph.build() + path = graph.find_path("employees", "employees") + assert path is not None + assert len(path.edges) == 0 + + def test_no_sources(self): + graph = JoinGraph({}) + graph.build() + components = graph.find_components() + assert components == [] + + def test_single_source_no_joins(self): + src = SourceDefinition( + name="a", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + graph = JoinGraph({"a": src}) + graph.build() + assert graph.find_path("a", "a") is not None + assert graph.find_path("a", "nonexistent") is None + + def test_two_disconnected_sources(self): + a = SourceDefinition( + name="a", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + b = SourceDefinition( + name="b", + table="t2", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + graph = JoinGraph({"a": a, "b": b}) + graph.build() + assert graph.find_path("a", "b") is None + + def test_on_clause_with_spaces(self): + g = JoinGraph({}) + result = g._parse_on(" customer_id = customers.id ", "customers") + assert result == ("customer_id", "id") + + def test_on_clause_without_prefix(self): + g = JoinGraph({}) + result = g._parse_on("customer_id = id", "customers") + assert result == ("customer_id", "id") + + def test_on_clause_invalid(self): + g = JoinGraph({}) + with pytest.raises(ValueError, match="Invalid join condition"): + g._parse_on("customer_id", "customers") + + def test_on_clause_three_parts(self): + g = JoinGraph({}) + with pytest.raises(ValueError, match="Invalid join condition"): + g._parse_on("a = b = c", "target") + + def test_composite_join_key(self): + """Composite join: 'a = t.x AND b = t.y' → comma-separated columns.""" + g = JoinGraph({}) + from_col, to_col = g._parse_on( + "product_id = inventory.product_id AND warehouse_id = inventory.warehouse_id", + "inventory", + ) + assert from_col == "product_id,warehouse_id" + assert to_col == "product_id,warehouse_id" + + def test_composite_join_key_with_source_prefix(self): + """Composite join with source prefix on left side.""" + g = JoinGraph({}) + from_col, to_col = g._parse_on( + "items.product_id = inventory.product_id AND items.warehouse_id = inventory.warehouse_id", + "inventory", + ) + assert from_col == "product_id,warehouse_id" + assert to_col == "product_id,warehouse_id" + + def test_composite_join_generates_correct_sql(self): + """End-to-end: composite join keys produce multi-condition ON clause.""" + items = SourceDefinition( + name="items", + table="public.items", + grain=["order_id", "product_id"], + columns=[ + SourceColumn(name="order_id", type="number"), + SourceColumn(name="product_id", type="number"), + SourceColumn(name="warehouse_id", type="number"), + SourceColumn(name="qty", type="number"), + ], + joins=[ + JoinDeclaration( + to="inventory", + on="product_id = inventory.product_id AND warehouse_id = inventory.warehouse_id", + relationship="many_to_one", + ) + ], + ) + inv = SourceDefinition( + name="inventory", + table="public.inventory", + grain=["product_id", "warehouse_id"], + columns=[ + SourceColumn(name="product_id", type="number"), + SourceColumn(name="warehouse_id", type="number"), + SourceColumn(name="stock", type="number"), + ], + ) + graph = JoinGraph({"items": items, "inventory": inv}) + graph.build() + path = graph.find_path("items", "inventory") + assert path is not None + assert len(path.edges) == 1 + assert path.edges[0].from_column == "product_id,warehouse_id" + assert path.edges[0].to_column == "product_id,warehouse_id" + + def test_resolve_join_tree_empty_set(self): + graph = JoinGraph({}) + graph.build() + tree = graph.resolve_join_tree(set()) + assert tree.sources == set() + assert tree.edges == [] + + +# ── From test_brainstorm_cases.py ──────────────────────────────────── + + +class TestJoinTreeReusesIntermediates: + def test_resolve_join_tree_reuses_intermediate_sources(self): + a = SourceDefinition( + name="a", + table="public.a", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration(to="z", on="z_id = z.id", relationship="many_to_one") + ], + ) + z = SourceDefinition( + name="z", + table="public.z", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="m_id", type="number"), + ], + joins=[ + JoinDeclaration(to="m", on="m_id = m.id", relationship="many_to_one") + ], + ) + m = SourceDefinition( + name="m", + table="public.m", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + + graph = JoinGraph({"a": a, "z": z, "m": m}) + graph.build() + + tree = graph.resolve_join_tree({"a", "m", "z"}, root="a") + + assert tree.sources == {"a", "z", "m"} + assert len(tree.edges) == 2 + assert {(edge.from_source, edge.to_source) for edge in tree.edges} == { + ("a", "z"), + ("z", "m"), + } + + +class TestDijkstraEdgeWeightPreference: + """LIMIT 2: Dijkstra prefers safe (m2o) paths over one_to_many paths.""" + + def test_dijkstra_prefers_safe_path(self): + """1-hop o2m path vs 2-hop all-m2o path: Dijkstra should pick the 2-hop m2o path.""" + # A --o2m--> C (direct, 1-hop, but unsafe) + # A --m2o--> B --m2o--> C (2-hop, all safe) + a = SourceDefinition( + name="a", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration(to="c", on="c_id = c.id", relationship="one_to_many"), + JoinDeclaration(to="b", on="b_id = b.id", relationship="many_to_one"), + ], + ) + b = SourceDefinition( + name="b", + table="t2", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="c_id", type="number"), + ], + joins=[ + JoinDeclaration(to="c", on="c_id = c.id", relationship="many_to_one"), + ], + ) + c = SourceDefinition( + name="c", + table="t3", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="c_id", type="number"), + ], + ) + g = JoinGraph({"a": a, "b": b, "c": c}) + g.build() + + path = g.find_path("a", "c") + assert path is not None + # Should pick the 2-hop safe path (a -> b -> c) over the 1-hop o2m (a -> c) + assert len(path.edges) == 2 + assert path.source_names == ["a", "b", "c"] + assert not path.has_one_to_many + + def test_dijkstra_uses_unsafe_when_only_option(self): + """When only an o2m path exists, it should still be returned.""" + a = SourceDefinition( + name="a", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration(to="b", on="b_id = b.id", relationship="one_to_many"), + ], + ) + b = SourceDefinition( + name="b", + table="t2", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + g = JoinGraph({"a": a, "b": b}) + g.build() + + path = g.find_path("a", "b") + assert path is not None + assert len(path.edges) == 1 + assert path.has_one_to_many + + +class TestAmbiguousPathDetection: + """Tests for 12.1 fix: diamond graph ambiguity detection.""" + + @staticmethod + def _diamond_sources(): + """Diamond: A →(m2o) B →(m2o) D, A →(m2o) C →(m2o) D. Two equal-cost paths.""" + return { + "a": SourceDefinition( + name="a", + table="t_a", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="b", on="b_id = b.id", relationship="many_to_one" + ), + JoinDeclaration( + to="c", on="c_id = c.id", relationship="many_to_one" + ), + ], + ), + "b": SourceDefinition( + name="b", + table="t_b", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="d", on="d_id = d.id", relationship="many_to_one" + ) + ], + ), + "c": SourceDefinition( + name="c", + table="t_c", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="d", on="d_id = d.id", relationship="many_to_one" + ) + ], + ), + "d": SourceDefinition( + name="d", + table="t_d", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ), + } + + def test_diamond_graph_is_ambiguous(self): + g = JoinGraph(self._diamond_sources()) + g.build() + path = g.find_path("a", "d") + assert path is not None + assert path.is_ambiguous is True + + def test_linear_graph_not_ambiguous(self): + """A → B → C: single path, no ambiguity.""" + sources = { + "a": SourceDefinition( + name="a", + table="t_a", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="b", on="b_id = b.id", relationship="many_to_one" + ) + ], + ), + "b": SourceDefinition( + name="b", + table="t_b", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="c", on="c_id = c.id", relationship="many_to_one" + ) + ], + ), + "c": SourceDefinition( + name="c", + table="t_c", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ), + } + g = JoinGraph(sources) + g.build() + path = g.find_path("a", "c") + assert path is not None + assert path.is_ambiguous is False + + def test_different_cost_paths_not_ambiguous(self): + """A →(m2o) B →(m2o) D and A →(o2m) C →(m2o) D: costs differ.""" + sources = { + "a": SourceDefinition( + name="a", + table="t_a", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="b", on="b_id = b.id", relationship="many_to_one" + ), + JoinDeclaration( + to="c", on="id = c.a_id", relationship="one_to_many" + ), + ], + ), + "b": SourceDefinition( + name="b", + table="t_b", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + JoinDeclaration( + to="d", on="d_id = d.id", relationship="many_to_one" + ) + ], + ), + "c": SourceDefinition( + name="c", + table="t_c", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="a_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="d", on="d_id = d.id", relationship="many_to_one" + ) + ], + ), + "d": SourceDefinition( + name="d", + table="t_d", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ), + } + g = JoinGraph(sources) + g.build() + path = g.find_path("a", "d") + assert path is not None + # Safe path (cost 2) vs unsafe path (cost 11) — not ambiguous + assert path.is_ambiguous is False + assert path.has_one_to_many is False + + def test_ambiguous_path_warning_in_resolve_join_tree(self, caplog): + """resolve_join_tree logs a warning for ambiguous paths.""" + import logging + + g = JoinGraph(self._diamond_sources()) + g.build() + with caplog.at_level(logging.WARNING, logger="semantic_layer.graph"): + g.resolve_join_tree({"a", "d"}, root="a") + assert any("Ambiguous join path" in r.message for r in caplog.records) + + +def test_bigquery_native_on_clause_extracts_column_pair(): + """Join on: with BigQuery-specific casts must parse and yield column pairs.""" + orders = SourceDefinition( + name="orders", + table="orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="user_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="users", + on="user_id = SAFE_CAST(users.id AS INT64)", + relationship="many_to_one", + ) + ], + ) + users = SourceDefinition( + name="users", + table="users", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + graph = JoinGraph({"orders": orders, "users": users}, dialect="bigquery") + graph.build() + # The graph must have recorded the compatibility edge + orders_edges = graph.adjacency.get("orders", []) + assert any(e.to_source == "users" for e in orders_edges), ( + f"orders → users edge missing after BigQuery-native on: parse:\n{orders_edges}" + ) + + +def test_joingraph_dialect_defaults_to_postgres(): + """Default keeps existing test ergonomics unchanged.""" + g = JoinGraph({}) + assert g.dialect == "postgres" diff --git a/python/klo-sl/tests/test_loader.py b/python/klo-sl/tests/test_loader.py new file mode 100644 index 00000000..73c5c23a --- /dev/null +++ b/python/klo-sl/tests/test_loader.py @@ -0,0 +1,171 @@ +import pytest +from pathlib import Path +import tempfile + +import yaml + +from semantic_layer.loader import SourceLoader +from semantic_layer.models import SourceDefinition + +SOURCES_DIR = Path(__file__).parent.parent / "sources" / "ecommerce" + + +class TestSourceLoader: + def test_load_all_ecommerce(self, ecommerce_sources): + assert len(ecommerce_sources) == 6 + assert set(ecommerce_sources.keys()) == { + "customers", + "orders", + "regions", + "products", + "order_items", + "churn_risk", + } + + def test_orders_source(self, ecommerce_sources): + orders = ecommerce_sources["orders"] + assert orders.is_table_source + assert orders.table == "public.orders" + assert orders.grain == ["id"] + assert len(orders.columns) == 6 + assert len(orders.measures) == 5 + assert len(orders.joins) == 1 + assert orders.joins[0].to == "customers" + assert orders.joins[0].relationship == "many_to_one" + + def test_churn_risk_sql_source(self, ecommerce_sources): + churn = ecommerce_sources["churn_risk"] + assert churn.is_sql_source + assert churn.sql is not None + assert "calculate_churn_score" in churn.sql + assert churn.grain == ["customer_id"] + assert len(churn.measures) == 1 + assert churn.measures[0].name == "avg_risk" + + def test_regions_no_joins(self, ecommerce_sources): + regions = ecommerce_sources["regions"] + assert regions.joins == [] + assert regions.measures == [] + + def test_order_items_bridge(self, ecommerce_sources): + oi = ecommerce_sources["order_items"] + assert len(oi.joins) == 2 + targets = {j.to for j in oi.joins} + assert targets == {"orders", "products"} + + def test_revenue_measure_has_filter(self, ecommerce_sources): + orders = ecommerce_sources["orders"] + revenue = next(m for m in orders.measures if m.name == "revenue") + assert revenue.filter == "status != 'refunded'" + assert revenue.expr == "sum(amount)" + + def test_load_single_file(self): + loader = SourceLoader(SOURCES_DIR) + src = loader.load_file(SOURCES_DIR / "regions.yaml") + assert src.name == "regions" + assert isinstance(src, SourceDefinition) + + def test_invalid_join_target(self): + with tempfile.TemporaryDirectory() as tmpdir: + data = { + "name": "bad_source", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + "joins": [ + { + "to": "nonexistent", + "on": "id = nonexistent.id", + "relationship": "many_to_one", + } + ], + } + path = Path(tmpdir) / "bad.yaml" + with open(path, "w") as f: + yaml.dump(data, f) + + loader = SourceLoader(tmpdir) + with pytest.raises(ValueError, match="nonexistent"): + loader.load_all() + + def test_duplicate_source_name(self): + with tempfile.TemporaryDirectory() as tmpdir: + data = { + "name": "dupe", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + } + for fname in ["a.yaml", "b.yaml"]: + with open(Path(tmpdir) / fname, "w") as f: + yaml.dump(data, f) + + loader = SourceLoader(tmpdir) + with pytest.raises(ValueError, match="Duplicate source name"): + loader.load_all() + + def test_source_description_loads(self, ecommerce_sources): + churn = ecommerce_sources["churn_risk"] + assert churn.description is not None + assert "churn" in churn.description.lower() + + def test_column_role_loads(self, ecommerce_sources): + orders = ecommerce_sources["orders"] + time_col = next(c for c in orders.columns if c.name == "created_at") + assert time_col.role == "time" + + def test_source_without_description(self, ecommerce_sources): + regions = ecommerce_sources["regions"] + assert regions.description is None + + +# ── From test_edge_cases.py ────────────────────────────────────────── + + +class TestLoaderEdgeCases: + def test_empty_directory(self): + with tempfile.TemporaryDirectory() as tmpdir: + loader = SourceLoader(tmpdir) + sources = loader.load_all() + assert sources == {} + + def test_non_yaml_files_ignored(self): + with tempfile.TemporaryDirectory() as tmpdir: + (Path(tmpdir) / "readme.txt").write_text("not a yaml file") + loader = SourceLoader(tmpdir) + sources = loader.load_all() + assert sources == {} + + def test_yaml_with_extra_fields(self): + with tempfile.TemporaryDirectory() as tmpdir: + data = { + "name": "test", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + "unknown_field": "should be rejected", + } + with open(Path(tmpdir) / "test.yaml", "w") as f: + yaml.dump(data, f) + loader = SourceLoader(tmpdir) + try: + sources = loader.load_all() + assert "test" in sources + except Exception: + pass + + def test_subdirectory_sources(self): + with tempfile.TemporaryDirectory() as tmpdir: + subdir = Path(tmpdir) / "sub" + subdir.mkdir() + data = { + "name": "nested", + "table": "t", + "grain": ["id"], + "columns": [{"name": "id", "type": "number"}], + } + with open(subdir / "nested.yaml", "w") as f: + yaml.dump(data, f) + loader = SourceLoader(tmpdir) + sources = loader.load_all() + assert "nested" in sources diff --git a/python/klo-sl/tests/test_manifest.py b/python/klo-sl/tests/test_manifest.py new file mode 100644 index 00000000..e025c3da --- /dev/null +++ b/python/klo-sl/tests/test_manifest.py @@ -0,0 +1,619 @@ +"""Tests for manifest models, projection, overlay validation, and two-tier loading.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +import yaml + +from semantic_layer.loader import SourceLoader +from semantic_layer.manifest import ( + ManifestColumn, + ManifestEntry, + ManifestJoin, + map_column_type, + project_manifest_entry, + validate_overlay, +) +from semantic_layer.models import ColumnRole + + +# ── Type Mapping Tests ────────────────────────────────────────────── + + +class TestMapColumnType: + def test_map_column_type_numbers(self): + number_types = [ + "integer", + "bigint", + "smallint", + "numeric", + "decimal", + "float", + "double", + "real", + "int", + "int2", + "int4", + "int8", + "float4", + "float8", + "double precision", + "number", + "tinyint", + "mediumint", + ] + for db_type in number_types: + assert map_column_type(db_type) == "number", ( + f"{db_type} should map to 'number'" + ) + + def test_map_column_type_time(self): + time_types = [ + "timestamp", + "timestamptz", + "timestamp with time zone", + "timestamp without time zone", + "TIMESTAMP_NTZ", + "TIMESTAMP_LTZ", + "TIMESTAMP_TZ", + "datetime", + "date", + "time", + "timetz", + ] + for db_type in time_types: + assert map_column_type(db_type) == "time", f"{db_type} should map to 'time'" + + def test_map_column_type_boolean(self): + for db_type in ["boolean", "bool"]: + assert map_column_type(db_type) == "boolean", ( + f"{db_type} should map to 'boolean'" + ) + + def test_map_column_type_string_fallback(self): + string_types = ["varchar", "text", "char", "unknown", "jsonb", "xml"] + for db_type in string_types: + assert map_column_type(db_type) == "string", ( + f"{db_type} should map to 'string'" + ) + + def test_map_column_type_strips_precision(self): + assert map_column_type("numeric(10,2)") == "number" + assert map_column_type("varchar(255)") == "string" + assert map_column_type("decimal(18,4)") == "number" + assert map_column_type("timestamp(6)") == "time" + assert map_column_type("char(1)") == "string" + + +# ── Manifest Projection Tests ────────────────────────────────────── + + +class TestProjectManifestEntry: + @pytest.fixture() + def orders_entry(self) -> ManifestEntry: + return ManifestEntry( + table="public.orders", + description="Customer orders", + columns=[ + ManifestColumn(name="id", type="integer", pk=True), + ManifestColumn(name="customer_id", type="integer"), + ManifestColumn(name="total", type="numeric"), + ManifestColumn(name="status", type="varchar"), + ManifestColumn(name="created_at", type="timestamp"), + ], + joins=[ + ManifestJoin( + to="customers", + on="orders.customer_id = customers.id", + relationship="many_to_one", + source="formal", + ), + ], + ) + + def test_project_manifest_entry_basic(self, orders_entry: ManifestEntry): + src = project_manifest_entry("orders", orders_entry) + assert src.name == "orders" + assert src.table == "public.orders" + assert src.description == "Customer orders" + assert len(src.columns) == 5 + assert src.measures == [] + col_names = [c.name for c in src.columns] + assert col_names == ["id", "customer_id", "total", "status", "created_at"] + + def test_project_manifest_entry_type_mapping(self, orders_entry: ManifestEntry): + src = project_manifest_entry("orders", orders_entry) + col_types = {c.name: c.type for c in src.columns} + assert col_types["id"] == "number" + assert col_types["customer_id"] == "number" + assert col_types["total"] == "number" + assert col_types["status"] == "string" + assert col_types["created_at"] == "time" + + def test_project_manifest_entry_grain_from_pk(self, orders_entry: ManifestEntry): + src = project_manifest_entry("orders", orders_entry) + assert src.grain == ["id"] + + def test_project_manifest_entry_grain_all_columns_no_pk(self): + entry = ManifestEntry( + table="public.events", + columns=[ + ManifestColumn(name="user_id", type="integer"), + ManifestColumn(name="event_type", type="varchar"), + ManifestColumn(name="ts", type="timestamp"), + ], + ) + src = project_manifest_entry("events", entry) + assert src.grain == ["user_id", "event_type", "ts"] + + def test_project_manifest_entry_joins_stripped(self, orders_entry: ManifestEntry): + src = project_manifest_entry("orders", orders_entry) + assert len(src.joins) == 1 + join = src.joins[0] + assert join.to == "customers" + assert join.on == "orders.customer_id = customers.id" + assert join.relationship == "many_to_one" + assert not hasattr(join, "source") or getattr(join, "source", None) is None + + def test_project_manifest_entry_time_role(self, orders_entry: ManifestEntry): + src = project_manifest_entry("orders", orders_entry) + time_cols = [c for c in src.columns if c.role == ColumnRole.TIME] + assert len(time_cols) == 1 + assert time_cols[0].name == "created_at" + non_time = [c for c in src.columns if c.role == ColumnRole.DEFAULT] + assert len(non_time) == 4 + + def test_project_manifest_entry_preserves_dbt_metadata(self): + entry = ManifestEntry( + table="public.orders", + columns=[ + ManifestColumn( + name="status", + type="varchar", + constraints={"dbt": {"not_null": True}}, + enum_values={"dbt": ["placed", "shipped"]}, + tests={"dbt": [{"name": "accepted_values", "package": "dbt"}]}, + ) + ], + tags={"dbt": ["mart"]}, + freshness={"dbt": {"loaded_at_field": "updated_at"}}, + ) + + src = project_manifest_entry("orders", entry) + + assert src.columns[0].constraints is not None + assert src.columns[0].constraints["dbt"].not_null is True + assert src.columns[0].enum_values == {"dbt": ["placed", "shipped"]} + assert src.columns[0].tests is not None + assert src.columns[0].tests.model_dump(mode="python", exclude_none=True) == { + "dbt": [{"name": "accepted_values", "package": "dbt"}] + } + assert src.tags == {"dbt": ["mart"]} + assert src.freshness is not None + assert src.freshness["dbt"].loaded_at_field == "updated_at" + + +# ── Overlay Validation Tests ─────────────────────────────────────── + + +class TestValidateOverlay: + def test_validate_overlay_valid(self): + data = { + "name": "orders", + "description": "Revenue-bearing orders", + "grain": ["id"], + "measures": [{"name": "revenue", "expr": "sum(total)"}], + "columns": [ + {"name": "is_high_value", "expr": "total > 1000", "type": "boolean"} + ], + "exclude_columns": ["status"], + } + errors = validate_overlay(data) + assert errors == [] + + def test_validate_overlay_rejects_table(self): + data = {"name": "orders", "table": "public.orders"} + errors = validate_overlay(data) + assert len(errors) == 1 + assert "table" in errors[0].lower() + + def test_validate_overlay_rejects_sql(self): + data = {"name": "orders", "sql": "SELECT * FROM orders"} + errors = validate_overlay(data) + assert len(errors) == 1 + assert "sql" in errors[0].lower() + + def test_validate_overlay_rejects_type_without_expr(self): + data = { + "name": "orders", + "columns": [{"name": "status", "type": "string"}], + } + errors = validate_overlay(data) + assert len(errors) == 1 + assert "type" in errors[0].lower() + assert "expr" in errors[0].lower() + + def test_validate_overlay_allows_type_with_expr(self): + data = { + "name": "orders", + "columns": [{"name": "is_big", "type": "boolean", "expr": "total > 1000"}], + } + errors = validate_overlay(data) + assert errors == [] + + +# ── Two-Tier Loading Tests ───────────────────────────────────────── + + +def _write_yaml(path: Path, data: dict | list) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.dump(data, f, default_flow_style=False) + + +def _manifest_tables() -> dict: + """Manifest shard with orders + customers tables.""" + return { + "tables": { + "orders": { + "table": "public.orders", + "description": "Customer orders", + "columns": [ + {"name": "id", "type": "integer", "pk": True}, + {"name": "customer_id", "type": "integer"}, + {"name": "total", "type": "numeric"}, + {"name": "status", "type": "varchar"}, + {"name": "created_at", "type": "timestamp"}, + ], + "joins": [ + { + "to": "customers", + "on": "orders.customer_id = customers.id", + "relationship": "many_to_one", + "source": "formal", + }, + ], + }, + "customers": { + "table": "public.customers", + "description": "Customer accounts", + "columns": [ + {"name": "id", "type": "integer", "pk": True}, + {"name": "name", "type": "varchar"}, + ], + "joins": [ + { + "to": "orders", + "on": "customers.id = orders.customer_id", + "relationship": "one_to_many", + "source": "formal", + }, + ], + }, + }, + } + + +class TestTwoTierLoading: + def test_load_manifest_shard(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + assert "orders" in sources + assert "customers" in sources + assert sources["orders"].table == "public.orders" + assert sources["orders"].grain == ["id"] + assert sources["customers"].table == "public.customers" + + def test_load_standalone_source(self, tmp_path: Path): + standalone = { + "name": "regions", + "table": "public.regions", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + ], + } + _write_yaml(tmp_path / "regions.yaml", standalone) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + assert "regions" in sources + assert sources["regions"].table == "public.regions" + assert sources["regions"].is_table_source + + def test_overlay_descriptions_do_not_promote_base_description_to_user_source( + self, tmp_path: Path + ): + standalone = { + "name": "regions", + "description": "Standalone description", + "table": "public.regions", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + ], + } + _write_yaml(tmp_path / "a_regions.yaml", standalone) + + overlay = {"name": "regions", "descriptions": {"dbt": "dbt description"}} + _write_yaml(tmp_path / "z_regions_overlay.yaml", overlay) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + assert sources["regions"].description == "dbt description" + + def test_load_sql_source(self, tmp_path: Path): + sql_source = { + "name": "active_users", + "sql": "SELECT id, email FROM users WHERE active = true", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "email", "type": "string"}, + ], + } + _write_yaml(tmp_path / "active_users.yaml", sql_source) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + assert "active_users" in sources + assert sources["active_users"].is_sql_source + assert "SELECT" in sources["active_users"].sql + + def test_load_overlay_composition(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = { + "name": "orders", + "description": "Revenue-bearing orders", + "grain": ["id"], + "measures": [{"name": "revenue", "expr": "sum(total)"}], + } + _write_yaml(tmp_path / "orders.yaml", overlay) + + # Customers overlay (empty, just name match) to avoid cross-ref error + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + orders = sources["orders"] + assert orders.table == "public.orders" + assert orders.description == "Revenue-bearing orders" + assert len(orders.measures) == 1 + assert orders.measures[0].name == "revenue" + + def test_overlay_description_override(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = {"name": "orders", "description": "Overridden description"} + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + assert sources["orders"].description == "Overridden description" + + def test_overlay_descriptions_map_preserves_higher_priority_manifest_description( + self, tmp_path: Path + ): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = { + "name": "orders", + "descriptions": { + "db": "DB description", + "dbt": "dbt description", + }, + } + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + assert sources["orders"].description == "Customer orders" + + def test_overlay_descriptions_map_overrides_lower_priority_db_description( + self, tmp_path: Path + ): + schema_dir = tmp_path / "_schema" + _write_yaml( + schema_dir / "public.yaml", + { + "tables": { + "orders": { + "table": "public.orders", + "descriptions": {"db": "DB description"}, + "columns": [{"name": "id", "type": "integer", "pk": True}], + }, + "customers": { + "table": "public.customers", + "columns": [{"name": "id", "type": "integer", "pk": True}], + }, + } + }, + ) + + overlay = { + "name": "orders", + "descriptions": { + "dbt": "dbt description", + }, + } + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + assert sources["orders"].description == "dbt description" + + def test_overlay_exclude_columns(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = {"name": "orders", "exclude_columns": ["status"]} + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + col_names = [c.name for c in sources["orders"].columns] + assert "status" not in col_names + assert "id" in col_names + assert "total" in col_names + + def test_overlay_computed_columns_appended(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = { + "name": "orders", + "columns": [ + {"name": "is_high_value", "expr": "total > 1000", "type": "boolean"}, + ], + } + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + col_names = [c.name for c in sources["orders"].columns] + assert "is_high_value" in col_names + # Original columns still present + assert "id" in col_names + assert "total" in col_names + # Computed column is at end + hv = next(c for c in sources["orders"].columns if c.name == "is_high_value") + assert hv.expr == "total > 1000" + assert hv.type == "boolean" + + def test_overlay_measures_set(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = { + "name": "orders", + "measures": [ + {"name": "revenue", "expr": "sum(total)"}, + {"name": "order_count", "expr": "count(id)"}, + ], + } + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + assert len(sources["orders"].measures) == 2 + measure_names = {m.name for m in sources["orders"].measures} + assert measure_names == {"revenue", "order_count"} + + def test_overlay_grain_override(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = {"name": "orders", "grain": ["id", "customer_id"]} + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + assert sources["orders"].grain == ["id", "customer_id"] + + def test_overlay_join_union_and_dedupe(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + # Add a "regions" standalone so the join target exists + _write_yaml( + tmp_path / "regions.yaml", + { + "name": "regions", + "table": "public.regions", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + ], + }, + ) + + overlay = { + "name": "orders", + "joins": [ + # Duplicate of manifest join (should be deduped) + { + "to": "customers", + "on": "orders.customer_id = customers.id", + "relationship": "many_to_one", + }, + # New join + { + "to": "regions", + "on": "orders.region_id = regions.id", + "relationship": "many_to_one", + }, + ], + } + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + joins = sources["orders"].joins + # Manifest had 1 join to customers, overlay adds 1 new (regions), duplicate deduped + assert len(joins) == 2 + join_targets = [j.to for j in joins] + assert "customers" in join_targets + assert "regions" in join_targets + + def test_overlay_disable_joins(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + overlay = { + "name": "orders", + "disable_joins": ["orders.customer_id = customers.id"], + } + _write_yaml(tmp_path / "orders.yaml", overlay) + + # Customers still needs to exist since the customers manifest entry has + # a join back to orders that is NOT disabled + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + sources = loader.load_all() + + assert len(sources["orders"].joins) == 0 + + def test_overlay_rejects_invalid(self, tmp_path: Path): + schema_dir = tmp_path / "_schema" + _write_yaml(schema_dir / "public.yaml", _manifest_tables()) + + # An overlay with a column that has type but no expr is invalid + overlay = { + "name": "orders", + "columns": [{"name": "status", "type": "string"}], + } + _write_yaml(tmp_path / "orders.yaml", overlay) + _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) + + loader = SourceLoader(tmp_path) + with pytest.raises(ValueError, match="Invalid overlay"): + loader.load_all() diff --git a/python/klo-sl/tests/test_models.py b/python/klo-sl/tests/test_models.py new file mode 100644 index 00000000..b6468462 --- /dev/null +++ b/python/klo-sl/tests/test_models.py @@ -0,0 +1,373 @@ +import pytest +from pydantic import ValidationError + +from semantic_layer.models import ( + ColumnRole, + ColumnVisibility, + ColumnDbtConstraints, + DefaultTimeDimensionDbt, + FreshnessDbt, + MeasureGroup, + Provenance, + QueryResult, + ResolvedColumn, + ResolvedMeasure, + ResolvedPlan, + SemanticQuery, + SourceColumn, + SourceDefinition, +) + + +class TestSourceColumn: + def test_defaults(self): + col = SourceColumn(name="id", type="number") + assert col.visibility == ColumnVisibility.PUBLIC + assert col.role == ColumnRole.DEFAULT + assert col.description is None + + def test_all_fields(self): + col = SourceColumn( + name="id", type="number", visibility="hidden", role="time", description="PK" + ) + assert col.visibility == ColumnVisibility.HIDDEN + assert col.role == ColumnRole.TIME + + def test_invalid_type(self): + with pytest.raises(ValidationError): + SourceColumn(name="id", type="integer") + + +class TestSourceDefinition: + def test_table_source(self): + src = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + assert src.table == "public.orders" + assert src.sql is None + assert src.is_table_source + assert not src.is_sql_source + + def test_sql_source(self): + src = SourceDefinition( + name="churn", + sql="SELECT * FROM x", + grain=["customer_id"], + columns=[SourceColumn(name="customer_id", type="number")], + ) + assert src.sql == "SELECT * FROM x" + assert src.table is None + assert src.is_sql_source + assert not src.is_table_source + + def test_table_and_sql_mutually_exclusive(self): + with pytest.raises(ValidationError, match="mutually exclusive"): + SourceDefinition( + name="bad", + table="t", + sql="SELECT 1", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + + def test_empty_grain_rejected(self): + with pytest.raises(ValidationError, match="grain must be non-empty"): + SourceDefinition( + name="bad", + table="t", + grain=[], + columns=[SourceColumn(name="id", type="number")], + ) + + def test_measures_and_joins(self): + src = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + joins=[ + { + "to": "customers", + "on": "cid = customers.id", + "relationship": "many_to_one", + } + ], + measures=[{"name": "revenue", "expr": "sum(amount)"}], + ) + assert len(src.joins) == 1 + assert src.joins[0].to == "customers" + assert len(src.measures) == 1 + assert src.measures[0].name == "revenue" + + def test_default_time_dimension_optional_and_dump(self): + minimal = SourceDefinition( + name="orders", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + assert minimal.default_time_dimension is None + + src = SourceDefinition( + name="orders", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + default_time_dimension=DefaultTimeDimensionDbt(dbt="order_date"), + ) + dumped = src.model_dump(mode="python", exclude_none=True) + assert dumped["default_time_dimension"] == {"dbt": "order_date"} + + round_tripped = SourceDefinition.model_validate(dumped) + assert round_tripped.default_time_dimension == DefaultTimeDimensionDbt( + dbt="order_date" + ) + + def test_dbt_structural_metadata_round_trips(self): + src = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn( + name="status", + type="string", + constraints={"dbt": {"not_null": True, "unique": True}}, + enum_values={"dbt": ["placed", "shipped"]}, + tests={ + "dbt": [{"name": "accepted_values", "package": "dbt"}], + "dbt_by_package": {"dbt": ["accepted_values"]}, + }, + ) + ], + tags={"dbt": ["mart", "finance"]}, + freshness={ + "dbt": { + "loaded_at_field": "updated_at", + "raw": {"warn_after": {"count": 12, "period": "hour"}}, + } + }, + default_time_dimension=DefaultTimeDimensionDbt(dbt="updated_at"), + ) + + assert src.columns[0].constraints == { + "dbt": ColumnDbtConstraints(not_null=True, unique=True) + } + assert src.columns[0].enum_values == {"dbt": ["placed", "shipped"]} + assert src.columns[0].tests is not None + assert src.columns[0].tests.model_dump(mode="python", exclude_none=True) == { + "dbt": [{"name": "accepted_values", "package": "dbt"}], + "dbt_by_package": {"dbt": ["accepted_values"]}, + } + assert src.tags == {"dbt": ["mart", "finance"]} + assert src.freshness == { + "dbt": FreshnessDbt( + loaded_at_field="updated_at", + raw={"warn_after": {"count": 12, "period": "hour"}}, + ) + } + + dumped = src.model_dump(mode="python", exclude_none=True) + round_tripped = SourceDefinition.model_validate(dumped) + assert round_tripped.columns[0].constraints == src.columns[0].constraints + assert round_tripped.columns[0].enum_values == src.columns[0].enum_values + assert round_tripped.columns[0].tests == src.columns[0].tests + assert round_tripped.tags == src.tags + assert round_tripped.freshness == src.freshness + + +class TestSemanticQuery: + def test_minimal(self): + q = SemanticQuery(measures=["sum(orders.amount)"]) + assert q.dimensions == [] + assert q.filters == [] + assert q.limit == 1000 + + def test_mixed_measures(self): + q = SemanticQuery( + measures=[ + "orders.revenue", + {"expr": "sum(orders.amount)", "name": "total"}, + ] + ) + assert isinstance(q.measures[0], str) + assert isinstance(q.measures[1], dict) + + def test_with_dimensions(self): + q = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=[ + "orders.status", + {"field": "orders.created_at", "granularity": "month"}, + ], + ) + assert len(q.dimensions) == 2 + + +class TestResolvedModels: + def test_resolved_column(self): + col = ResolvedColumn( + name="revenue", provenance=Provenance.VERIFIED, expr="sum(amount)" + ) + assert col.provenance == Provenance.VERIFIED + + def test_resolved_measure(self): + m = ResolvedMeasure(name="revenue", expr="sum(amount)", source_name="orders") + assert m.provenance == Provenance.COMPOSED + assert not m.is_derived + + def test_measure_group(self): + m = ResolvedMeasure(name="rev", expr="sum(amount)", source_name="orders") + g = MeasureGroup(source_name="orders", measures=[m]) + assert g.source_name == "orders" + + def test_resolved_plan(self): + plan = ResolvedPlan( + sources_used=["orders"], + join_paths=[], + anchor_grain=["id"], + fan_out_description="none", + aggregate_locality=[], + where_filters=[], + having_filters=[], + columns=[ResolvedColumn(name="revenue", provenance=Provenance.COMPOSED)], + ) + assert plan.has_fan_out is False + assert plan.measure_groups == [] + + def test_query_result(self): + plan = ResolvedPlan( + sources_used=["orders"], + join_paths=[], + anchor_grain=["id"], + fan_out_description="none", + aggregate_locality=[], + where_filters=[], + having_filters=[], + columns=[], + ) + result = QueryResult( + resolved_plan=plan, sql="SELECT 1", dialect="postgres", columns=[] + ) + assert result.dialect == "postgres" + + +class TestJoinDeclaration: + def test_with_alias(self): + from semantic_layer.models import JoinDeclaration + + j = JoinDeclaration( + to="customers", + on="billing_customer_id = customers.id", + relationship="many_to_one", + alias="billing_customer", + ) + assert j.alias == "billing_customer" + assert j.to == "customers" + + def test_without_alias(self): + from semantic_layer.models import JoinDeclaration + + j = JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + assert j.alias is None + + +class TestMeasureDefinition: + def test_with_filter_and_description(self): + from semantic_layer.models import MeasureDefinition + + m = MeasureDefinition( + name="revenue", + expr="sum(amount)", + filter="status != 'refunded'", + description="Net revenue excluding refunds", + ) + assert m.filter == "status != 'refunded'" + assert m.description == "Net revenue excluding refunds" + + def test_minimal(self): + from semantic_layer.models import MeasureDefinition + + m = MeasureDefinition(name="total", expr="count(id)") + assert m.filter is None + assert m.description is None + + +class TestSemanticQueryExtended: + def test_include_empty_default(self): + q = SemanticQuery(measures=["sum(orders.amount)"]) + assert q.include_empty is True + + def test_include_empty_false(self): + q = SemanticQuery(measures=["sum(orders.amount)"], include_empty=False) + assert q.include_empty is False + + def test_with_order_by(self): + q = SemanticQuery( + measures=["sum(orders.amount)"], + order_by=[{"field": "orders.amount", "direction": "desc"}], + ) + assert len(q.order_by) == 1 + assert q.order_by[0]["direction"] == "desc" + + def test_custom_limit(self): + q = SemanticQuery(measures=["sum(orders.amount)"], limit=50) + assert q.limit == 50 + + +# ── From test_edge_cases.py ────────────────────────────────────────── + + +class TestModelEdgeCases: + def test_semantic_query_empty_measures(self): + q = SemanticQuery(measures=[]) + assert q.measures == [] + + def test_semantic_query_defaults(self): + q = SemanticQuery(measures=["sum(x.y)"]) + assert q.dimensions == [] + assert q.filters == [] + assert q.order_by == [] + assert q.limit == 1000 + assert q.include_empty is True + + def test_semantic_query_with_order_by(self): + q = SemanticQuery( + measures=["sum(orders.amount)"], + order_by=[{"field": "orders.status", "direction": "desc"}], + ) + assert len(q.order_by) == 1 + + def test_table_and_sql_mutually_exclusive(self): + with pytest.raises(ValidationError, match="mutually exclusive"): + SourceDefinition( + name="bad", + table="t", + sql="SELECT 1", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + + def test_empty_grain_rejected(self): + with pytest.raises(ValidationError, match="grain must be non-empty"): + SourceDefinition( + name="bad", + table="t", + grain=[], + columns=[SourceColumn(name="id", type="number")], + ) + + def test_measure_definition_with_filter(self): + from semantic_layer.models import MeasureDefinition + + m = MeasureDefinition( + name="rev", expr="sum(amount)", filter="status != 'refunded'" + ) + assert m.filter == "status != 'refunded'" diff --git a/python/klo-sl/tests/test_parser.py b/python/klo-sl/tests/test_parser.py new file mode 100644 index 00000000..db9d10c9 --- /dev/null +++ b/python/klo-sl/tests/test_parser.py @@ -0,0 +1,279 @@ +from semantic_layer.parser import ExpressionParser + + +parser = ExpressionParser() + + +class TestAggregateDetection: + def test_sum(self): + r = parser.parse("sum(orders.amount)") + assert r.is_aggregate + assert r.aggregate_function == "sum" + + def test_avg(self): + r = parser.parse("avg(score)") + assert r.is_aggregate + assert r.aggregate_function == "avg" + + def test_count(self): + r = parser.parse("count(orders.id)") + assert r.is_aggregate + assert r.aggregate_function == "count" + + def test_count_distinct(self): + r = parser.parse("count_distinct(orders.customer_id)") + assert r.is_aggregate + assert r.aggregate_function == "count_distinct" + + def test_non_aggregate(self): + r = parser.parse("orders.revenue") + assert not r.is_aggregate + assert r.aggregate_function is None + + def test_multiple_aggregates(self): + r = parser.parse("sum(orders.amount) / count(orders.id)") + assert r.is_aggregate + # first aggregate found + assert r.aggregate_function == "sum" + + def test_aggregate_in_scalar_subquery_not_aggregate(self): + # `col = (SELECT MAX(col) FROM t)` is a plain column predicate, not HAVING-bound + r = parser.parse("orders.created_at = (SELECT MAX(created_at) FROM orders)") + assert not r.is_aggregate + assert r.aggregate_function is None + + def test_aggregate_in_in_subquery_not_aggregate(self): + r = parser.parse("orders.id IN (SELECT COUNT(id) FROM orders)") + assert not r.is_aggregate + + def test_custom_agg_in_subquery_not_aggregate(self): + r = parser.parse( + "orders.customer_id = (SELECT count_distinct(customer_id) FROM orders)" + ) + assert not r.is_aggregate + + def test_outer_aggregate_with_inner_subquery_still_aggregate(self): + # Outer SUM on a plain column, even if subquery appears elsewhere + r = parser.parse("sum(orders.amount) > (SELECT AVG(amount) FROM orders)") + assert r.is_aggregate + assert r.aggregate_function == "sum" + + +class TestSourceRefs: + def test_single_ref(self): + r = parser.parse("sum(orders.amount)") + assert r.source_refs == {"orders"} + assert r.column_refs == {"orders.amount"} + + def test_multiple_refs(self): + r = parser.parse("sum(orders.revenue) / count(customers.id)") + assert r.source_refs == {"orders", "customers"} + assert r.column_refs == {"orders.revenue", "customers.id"} + + def test_pre_defined_ref(self): + r = parser.parse("orders.revenue") + assert r.source_refs == {"orders"} + assert r.column_refs == {"orders.revenue"} + + def test_no_refs(self): + r = parser.parse("total_rev - total_cost") + assert r.source_refs == set() + assert r.column_refs == set() + + def test_mixed_refs(self): + r = parser.parse("sum(orders.amount) + churn_risk.score") + assert r.source_refs == {"orders", "churn_risk"} + + +class TestDerivedMeasures: + def test_depends_on_known_measures(self): + r = parser.parse( + "total_rev - total_cost", + known_measure_names={"total_rev", "total_cost"}, + ) + assert r.depends_on_measures == {"total_rev", "total_cost"} + assert not r.is_aggregate + + def test_no_false_positives(self): + # "sum" should not be detected as a measure dependency + r = parser.parse( + "sum(orders.amount)", + known_measure_names={"sum"}, + ) + assert r.depends_on_measures == set() + + def test_mixed_ref_and_derived(self): + r = parser.parse( + "total_rev / count(orders.id)", + known_measure_names={"total_rev"}, + ) + assert r.depends_on_measures == {"total_rev"} + assert r.is_aggregate + + def test_empty_known_measures(self): + r = parser.parse("total_rev - total_cost") + assert r.depends_on_measures == set() + + +class TestExtractSourceRefs: + def test_basic(self): + refs = parser.extract_source_refs("sum(orders.amount)") + assert refs == {"orders"} + + def test_multiple(self): + refs = parser.extract_source_refs("orders.amount + customers.score") + assert refs == {"orders", "customers"} + + def test_no_refs(self): + refs = parser.extract_source_refs("count(*)") + assert refs == set() + + +class TestEdgeCases: + def test_percentile(self): + r = parser.parse("percentile(churn_risk.score, 0.9)") + assert r.is_aggregate + assert r.aggregate_function == "percentile" + assert r.source_refs == {"churn_risk"} + + def test_string_literal_not_detected(self): + # "status != 'refunded'" — 'refunded' should not be a source ref + r = parser.parse("status != 'refunded'") + assert r.source_refs == set() + + def test_complex_expression(self): + r = parser.parse("sum(orders.amount) / count(orders.id) * 100") + assert r.is_aggregate + assert r.source_refs == {"orders"} + assert r.column_refs == {"orders.amount", "orders.id"} + + +class TestAdditionalAggregates: + def test_min(self): + r = parser.parse("min(orders.amount)") + assert r.is_aggregate + assert r.aggregate_function == "min" + + def test_max(self): + r = parser.parse("max(orders.amount)") + assert r.is_aggregate + assert r.aggregate_function == "max" + + def test_median(self): + r = parser.parse("median(orders.amount)") + assert r.is_aggregate + assert r.aggregate_function == "median" + + def test_nested_function_not_aggregate(self): + """abs() is not an aggregate function, but sum() wrapping it is.""" + r = parser.parse("sum(orders.amount)") + assert r.is_aggregate + assert r.source_refs == {"orders"} + + def test_comparison_operators(self): + """Filter-like expression with comparison.""" + r = parser.parse("orders.status = 'completed'") + assert not r.is_aggregate + assert r.source_refs == {"orders"} + + def test_multiple_source_column_refs(self): + """Expression referencing columns from 3 different sources.""" + r = parser.parse( + "sum(orders.amount) + count(customers.id) - avg(tickets.score)" + ) + assert r.is_aggregate + assert r.source_refs == {"orders", "customers", "tickets"} + + +# ── From test_edge_cases.py: TestExpressionParserEdgeCases ─────────── + + +class TestExpressionParserEdgeCases: + def test_empty_string(self): + result = parser.parse("") + assert result.source_refs == set() + assert result.column_refs == set() + assert not result.is_aggregate + + def test_count_star(self): + result = parser.parse("count(*)") + assert result.is_aggregate + assert result.aggregate_function == "count" + assert result.source_refs == set() + + def test_multiple_aggregate_functions(self): + result = parser.parse("sum(orders.amount) + avg(orders.cost)") + assert result.is_aggregate + assert result.aggregate_function == "sum" + assert result.source_refs == {"orders"} + + def test_nested_function_not_aggregate(self): + result = parser.parse("lower(orders.status)") + assert not result.is_aggregate + assert result.source_refs == {"orders"} + + def test_source_ref_in_string_literal(self): + result = parser.parse("'orders.amount'") + assert "orders" not in result.source_refs + assert len(result.column_refs) == 0 + + def test_underscore_names(self): + result = parser.parse("sum(order_items.unit_price)") + assert "order_items" in result.source_refs + assert "order_items.unit_price" in result.column_refs + + def test_extract_source_refs_multi(self): + refs = parser.extract_source_refs("orders.amount + customers.score") + assert refs == {"orders", "customers"} + + +class TestReservedWordHandling: + """LIMIT 4: Reserved SQL keywords as source or column names.""" + + def test_reserved_word_source_name(self): + """Parse 'sum(where.value)' where 'where' is a source name.""" + r = parser.parse("sum(where.value)") + assert r.source_refs == {"where"} + assert r.column_refs == {"where.value"} + assert r.is_aggregate + + def test_reserved_word_column_name(self): + """Parse 'select.from' where both are reserved words.""" + r = parser.parse("select.from") + assert r.source_refs == {"select"} + assert r.column_refs == {"select.from"} + + def test_reserved_word_in_extract_source_refs(self): + """extract_source_refs should handle reserved words in expressions.""" + refs = parser.extract_source_refs("where.value > 10") + assert refs == {"where"} + + +def test_extract_source_refs_bigquery_native(): + """BigQuery-native filter must not drop source refs due to mis-parse.""" + from semantic_layer.parser import ExpressionParser + + parser = ExpressionParser(dialect="bigquery") + refs = parser.extract_source_refs( + "SAFE_DIVIDE(orders.revenue, customers.count) > 0" + ) + assert refs == {"orders", "customers"} + + +def test_expression_parser_dialect_defaults_to_postgres(): + """Constructor default is postgres — keeps existing tests working.""" + from semantic_layer.parser import ExpressionParser + + parser = ExpressionParser() + assert parser.dialect == "postgres" + + +def test_extract_source_refs_postgres_baseline(): + """Postgres-dialect parser continues to work on postgres syntax.""" + from semantic_layer.parser import ExpressionParser + + parser = ExpressionParser(dialect="postgres") + refs = parser.extract_source_refs( + "orders.created_at >= current_date - interval '30 days'" + ) + assert refs == {"orders"} diff --git a/python/klo-sl/tests/test_planner.py b/python/klo-sl/tests/test_planner.py new file mode 100644 index 00000000..dd6483b7 --- /dev/null +++ b/python/klo-sl/tests/test_planner.py @@ -0,0 +1,1509 @@ +import pytest + +from semantic_layer.graph import JoinGraph +from semantic_layer.models import ( + ColumnVisibility, + MeasureDefinition, + Provenance, + SemanticQuery, + SourceColumn, + SourceDefinition, + JoinDeclaration, +) +from semantic_layer.planner import QueryPlanner +from semantic_layer.engine import SemanticEngine + + +@pytest.fixture +def planner(ecommerce_sources): + graph = JoinGraph(ecommerce_sources) + graph.build() + return QueryPlanner(ecommerce_sources, graph) + + +class TestSingleSource: + def test_simple_aggregation(self, planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + assert "orders" in plan.sources_used + assert len(plan.measures) == 1 + assert plan.measures[0].expr == "sum(orders.amount)" + assert len(plan.dimensions) == 1 + assert plan.dimensions[0].field == "orders.status" + assert not plan.has_fan_out + + def test_pre_defined_measure(self, planner): + query = SemanticQuery( + measures=["orders.revenue"], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + revenue = plan.measures[0] + assert revenue.name == "revenue" + assert revenue.provenance == Provenance.VERIFIED + assert revenue.expr == "SUM(orders.amount)" + assert revenue.filter == "orders.status <> 'refunded'" + + def test_multiple_pre_defined_measures(self, planner): + """Both orders.revenue and orders.order_count are pre-defined.""" + query = SemanticQuery( + measures=["orders.revenue", "orders.order_count"], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + assert len(plan.measures) == 2 + names = {m.name for m in plan.measures} + assert names == {"revenue", "order_count"} + assert all(m.provenance == Provenance.VERIFIED for m in plan.measures) + + def test_pre_defined_and_runtime_coexist(self, planner): + """Pre-defined orders.revenue alongside runtime sum(orders.amount).""" + query = SemanticQuery( + measures=["orders.revenue", "sum(orders.amount)"], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + assert len(plan.measures) == 2 + revenue = next(m for m in plan.measures if m.name == "revenue") + runtime = next(m for m in plan.measures if m.name != "revenue") + assert revenue.provenance == Provenance.VERIFIED + assert runtime.provenance == Provenance.COMPOSED + + def test_global_aggregates_no_dimensions(self, planner): + """Measures without any dimensions — global aggregate.""" + query = SemanticQuery( + measures=["sum(orders.amount)", "orders.order_count"], + ) + plan = planner.plan(query) + assert len(plan.measures) == 2 + assert len(plan.dimensions) == 0 + assert "orders" in plan.sources_used + + +class TestCrossSource: + def test_m2o_joins(self, planner): + query = SemanticQuery( + measures=["churn_risk.avg_risk"], + dimensions=["churn_risk.customer_type", "regions.name"], + filters=["regions.name = 'LATAM'"], + ) + plan = planner.plan(query) + assert "churn_risk" in plan.sources_used + assert "regions" in plan.sources_used + assert "customers" in plan.sources_used # intermediate join + assert plan.where_filters == ["regions.name = 'LATAM'"] + + def test_orders_to_regions(self, planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["regions.name"], + ) + plan = planner.plan(query) + assert "orders" in plan.sources_used + assert "customers" in plan.sources_used # intermediate + assert "regions" in plan.sources_used + + def test_runtime_aggregation_cross_source(self, planner): + """Runtime median(orders.amount) grouped by regions.name — not pre-defined.""" + query = SemanticQuery( + measures=[{"expr": "median(orders.amount)", "name": "median_order"}], + dimensions=["regions.name"], + ) + plan = planner.plan(query) + assert "orders" in plan.sources_used + assert "regions" in plan.sources_used + median_m = next(m for m in plan.measures if m.name == "median_order") + assert median_m.provenance == Provenance.COMPOSED + + def test_dimensions_from_multiple_sources(self, planner): + """Dimensions from churn_risk and regions in same query.""" + query = SemanticQuery( + measures=["churn_risk.avg_risk"], + dimensions=["churn_risk.customer_type", "regions.name"], + ) + plan = planner.plan(query) + assert len(plan.dimensions) == 2 + dim_fields = {d.field for d in plan.dimensions} + assert dim_fields == {"churn_risk.customer_type", "regions.name"} + + def test_filter_adds_source_to_graph(self, planner): + """Filter on regions.name when measures/dimensions don't reference regions.""" + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + filters=["regions.name = 'LATAM'"], + ) + plan = planner.plan(query) + assert "regions" in plan.sources_used + assert "customers" in plan.sources_used # intermediate to reach regions + + +class TestFanOutDetection: + def test_chasm_trap(self): + """Two independent sources joining m2o to same dimension source.""" + customers = SourceDefinition( + name="customers", + table="t", + grain=["id"], + columns=[SourceColumn(name="id", type="number")], + ) + orders = SourceDefinition( + name="orders", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = {"customers": customers, "orders": orders, "tickets": tickets} + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + + query = SemanticQuery( + measures=["sum(orders.amount)", "count(tickets.id)"], + dimensions=["customers.id"], + ) + plan = planner.plan(query) + assert plan.has_fan_out + assert len(plan.measure_groups) == 2 + group_sources = {g.source_name for g in plan.measure_groups} + assert group_sources == {"orders", "tickets"} + + +class TestFanOutSingleSource: + """Fan-out when a single measure source has o2m path to dimension source.""" + + def test_reverse_path_fan_out(self): + """Querying from customers (dimension) with measures from orders triggers fan-out + when the path from the measure source (orders) to the dimension source (customers) + is m2o — so no fan-out. But reversed: measure on customers, dim on orders.""" + customers = SourceDefinition( + name="customers", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="score", type="number"), + ], + ) + orders = SourceDefinition( + name="orders", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="status", type="string"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = {"customers": customers, "orders": orders} + from semantic_layer.graph import JoinGraph + + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + + # measure on customers, dimension on orders — path from customers to orders is o2m + query = SemanticQuery( + measures=["avg(customers.score)"], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + assert plan.has_fan_out + + def test_m2o_multi_hop_no_fan_out(self, planner): + """orders → customers → regions is all m2o. No fan-out.""" + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["regions.name"], + ) + plan = planner.plan(query) + assert not plan.has_fan_out + + +class TestTripleChasmTrap: + """Three independent measure sources joining to same dimension source.""" + + def test_three_measure_sources(self): + customers = SourceDefinition( + name="customers", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ) + orders = SourceDefinition( + name="orders", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + tickets = SourceDefinition( + name="tickets", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + payments = SourceDefinition( + name="payments", + table="t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + sources = { + "customers": customers, + "orders": orders, + "tickets": tickets, + "payments": payments, + } + from semantic_layer.graph import JoinGraph + + graph = JoinGraph(sources) + graph.build() + planner = QueryPlanner(sources, graph) + + query = SemanticQuery( + measures=[ + "sum(orders.amount)", + "count(tickets.id)", + "sum(payments.amount)", + ], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + assert plan.has_fan_out + assert len(plan.measure_groups) == 3 + group_sources = {g.source_name for g in plan.measure_groups} + assert group_sources == {"orders", "tickets", "payments"} + + +class TestFilterClassification: + def test_where_filter(self, planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + filters=["orders.status = 'completed'"], + ) + plan = planner.plan(query) + assert "orders.status = 'completed'" in plan.where_filters + assert plan.having_filters == [] + + def test_having_filter(self, planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + filters=["sum(orders.amount) > 10000"], + ) + plan = planner.plan(query) + assert plan.where_filters == [] + assert "sum(orders.amount) > 10000" in plan.having_filters + + def test_mixed_filters(self, planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + filters=["orders.status = 'completed'", "sum(orders.amount) > 10000"], + ) + plan = planner.plan(query) + assert len(plan.where_filters) == 1 + assert len(plan.having_filters) == 1 + + +class TestDerivedMeasures: + def test_topological_order(self, planner): + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "sum(orders.cost)", "name": "total_cost"}, + {"expr": "total_rev - total_cost", "name": "profit"}, + ], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + names = [m.name for m in plan.measures] + assert names.index("profit") > names.index("total_rev") + assert names.index("profit") > names.index("total_cost") + + profit = next(m for m in plan.measures if m.name == "profit") + assert profit.is_derived + assert set(profit.depends_on) == {"total_rev", "total_cost"} + + def test_chained_derivation(self, planner): + """profit = rev - cost, margin = profit / rev — 3-level chain.""" + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "sum(orders.cost)", "name": "total_cost"}, + {"expr": "total_rev - total_cost", "name": "profit"}, + {"expr": "profit / total_rev", "name": "margin"}, + ], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + names = [m.name for m in plan.measures] + # margin depends on profit which depends on total_rev and total_cost + assert names.index("margin") > names.index("profit") + assert names.index("profit") > names.index("total_rev") + assert names.index("profit") > names.index("total_cost") + + margin = next(m for m in plan.measures if m.name == "margin") + assert margin.is_derived + assert "profit" in margin.depends_on + assert "total_rev" in margin.depends_on + + def test_cross_source_derived(self, planner): + """Derived measure referencing measures from different sources.""" + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "count(customers.id)", "name": "cust_count"}, + {"expr": "total_rev / cust_count", "name": "rev_per_customer"}, + ], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + rev_per = next(m for m in plan.measures if m.name == "rev_per_customer") + assert rev_per.is_derived + assert set(rev_per.depends_on) == {"total_rev", "cust_count"} + + +class TestDimensions: + def test_time_granularity(self, planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=[{"field": "orders.created_at", "granularity": "month"}], + ) + plan = planner.plan(query) + assert len(plan.dimensions) == 1 + assert plan.dimensions[0].granularity == "month" + + def test_string_dimension(self, planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + assert plan.dimensions[0].field == "orders.status" + assert plan.dimensions[0].granularity is None + + +class TestAnchorSelection: + def test_anchor_prefers_dimension_source_for_include_empty_queries(self, planner): + """Dimension-side anchor preserves empty dimension rows by default.""" + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["customers.segment"], + ) + plan = planner.plan(query) + assert plan.anchor_source == "customers" + + def test_anchor_fallback_to_dimension(self, planner): + """When all measures are derived, anchor falls back to dimension source.""" + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "rev"}, + {"expr": "sum(orders.cost)", "name": "cost"}, + {"expr": "rev - cost", "name": "profit"}, + ], + dimensions=["orders.status"], + ) + plan = planner.plan(query) + # rev and cost are non-derived, so anchor should be orders + assert plan.anchor_source == "orders" + + +class TestFilterEdgeCases: + def test_filter_referencing_named_measure(self, planner): + """Filter on a named measure → HAVING.""" + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + ], + dimensions=["orders.status"], + filters=["total_rev > 10000"], + ) + plan = planner.plan(query) + assert "total_rev > 10000" in plan.having_filters + + def test_filter_on_joined_dimension(self, planner): + """Filter on a dimension from a joined source → WHERE.""" + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status"], + filters=["customers.segment = 'Enterprise'"], + ) + plan = planner.plan(query) + assert "customers.segment = 'Enterprise'" in plan.where_filters + assert "customers" in plan.sources_used + + +class TestErrors: + def test_no_source_refs(self, planner): + query = SemanticQuery(measures=["count(*)"]) + with pytest.raises(ValueError, match="does not reference any source"): + planner.plan(query) + + def test_missing_source(self, planner): + query = SemanticQuery( + measures=["sum(nonexistent.amount)"], + dimensions=["orders.status"], + ) + # This should fail because nonexistent is not in the graph + with pytest.raises(ValueError): + planner.plan(query) + + +# ── From test_edge_cases.py: planner edge cases ───────────────────── + + +class TestPlannerEdgeCases: + @pytest.fixture + def _planner(self, ecommerce_sources): + graph = JoinGraph(ecommerce_sources) + graph.build() + return QueryPlanner(ecommerce_sources, graph) + + def test_nonexistent_predefined_measure(self, _planner): + query = SemanticQuery( + measures=["orders.nonexistent"], + dimensions=["orders.status"], + ) + with pytest.raises(ValueError, match="not a pre-defined measure"): + _planner.plan(query) + + def test_duplicate_dimension_names(self, _planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=["orders.status", "orders.status"], + ) + plan = _planner.plan(query) + # Duplicate dimensions are deduplicated + assert len(plan.dimensions) == 1 + assert plan.dimensions[0].field == "orders.status" + + def test_dimension_only_query(self, _planner): + query = SemanticQuery( + measures=[], + dimensions=["orders.status"], + ) + plan = _planner.plan(query) + assert "orders" in plan.sources_used + + def test_many_dimensions_one_measure(self, _planner): + query = SemanticQuery( + measures=["sum(orders.amount)"], + dimensions=[ + "orders.status", + "orders.created_at", + "customers.segment", + "regions.name", + ], + ) + plan = _planner.plan(query) + assert len(plan.dimensions) == 4 + assert "orders" in plan.sources_used + assert "customers" in plan.sources_used + assert "regions" in plan.sources_used + + def test_filter_with_aggregate_and_named_measure(self, _planner): + query = SemanticQuery( + measures=[ + {"expr": "sum(orders.amount)", "name": "total"}, + {"expr": "count(orders.id)", "name": "cnt"}, + ], + dimensions=["orders.status"], + filters=["total > 100", "cnt > 5"], + ) + plan = _planner.plan(query) + assert len(plan.having_filters) == 2 + assert len(plan.where_filters) == 0 + + +# ── From test_edge_cases.py: qualify predefined expressions ────────── + + +class TestQualifyPredefinedExpr: + @pytest.fixture + def _planner(self, ecommerce_sources): + graph = JoinGraph(ecommerce_sources) + graph.build() + return QueryPlanner(ecommerce_sources, graph) + + def test_revenue_qualified(self, _planner): + result = _planner._qualify_predefined_expr("sum(amount)", "orders") + assert "orders" in result.lower() + assert "amount" in result.lower() + + def test_already_qualified_stays(self, _planner): + result = _planner._qualify_predefined_expr("sum(orders.amount)", "orders") + assert "amount" in result.lower() + + def test_nonexistent_source_passthrough(self, _planner): + result = _planner._qualify_predefined_expr("sum(amount)", "nonexistent") + assert result == "sum(amount)" + + def test_filter_expression_qualified(self, _planner): + result = _planner._qualify_predefined_expr("status != 'refunded'", "orders") + assert "orders" in result.lower() + + +# ── From test_bug_fixes.py ─────────────────────────────────────────── + + +ECOMMERCE_SOURCES_DICT = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "cost", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "customer_id", "type": "number"}, + {"name": "created_at", "type": "time", "role": "time"}, + ], + "joins": [ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + }, + ], + "measures": [ + { + "name": "revenue", + "expr": "sum(amount)", + "filter": "status != 'refunded'", + }, + {"name": "order_count", "expr": "count(id)"}, + ], + }, + "customers": { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + {"name": "segment", "type": "string"}, + ], + }, +} + + +class TestPreDefinedMeasureFilterClassification: + """Filters referencing pre-defined measure names should be HAVING, not WHERE.""" + + def test_predefined_measure_in_filter_goes_to_having(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + "filters": ["orders.revenue > 1000"], + } + ) + assert_valid_sql(result.sql) + assert "HAVING" in result.sql.upper() + assert "orders.revenue > 1000" in result.resolved_plan.having_filters + assert result.resolved_plan.where_filters == [] + + def test_regular_column_filter_stays_in_where(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + "filters": ["orders.status = 'completed'"], + } + ) + assert_valid_sql(result.sql) + assert "WHERE" in result.sql.upper() + assert "orders.status = 'completed'" in result.resolved_plan.where_filters + + def test_mixed_where_and_having(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + "filters": ["orders.status != 'cancelled'", "orders.revenue > 500"], + } + ) + assert_valid_sql(result.sql) + assert "orders.status != 'cancelled'" in result.resolved_plan.where_filters + assert "orders.revenue > 500" in result.resolved_plan.having_filters + + def test_explicit_aggregate_filter_still_having(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["sum(orders.amount) > 1000"], + } + ) + assert_valid_sql(result.sql) + assert "HAVING" in result.sql.upper() + + +class TestBareColumnInMeasures: + """Bare column references in measures (no aggregate) should error.""" + + def test_bare_column_rejected(self): + from conftest import make_engine + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + with pytest.raises(ValueError, match="not a pre-defined measure.*no aggregate"): + engine.query( + { + "measures": ["orders.amount"], + "dimensions": ["orders.status"], + } + ) + + def test_aggregate_column_accepted(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + + def test_predefined_measure_accepted(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + assert result.resolved_plan.measures[0].provenance == Provenance.VERIFIED + + +class TestOrderByValidation: + """ORDER BY on non-existent fields should error.""" + + def test_order_by_unknown_field_rejected(self): + from conftest import make_engine + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + with pytest.raises(ValueError, match="not a recognized measure or dimension"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [{"field": "orders.created_at", "direction": "desc"}], + } + ) + + def test_order_by_measure_name_accepted(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": [{"expr": "sum(orders.amount)", "name": "total"}], + "dimensions": ["orders.status"], + "order_by": [{"field": "total", "direction": "desc"}], + } + ) + assert_valid_sql(result.sql) + assert "total DESC" in result.sql or "total desc" in result.sql.lower() + + def test_order_by_dimension_field_accepted(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [{"field": "orders.status", "direction": "asc"}], + } + ) + assert_valid_sql(result.sql) + + +class TestEmptyFilter: + """Empty filter strings should be silently skipped.""" + + def test_empty_string_filter_ignored(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": [""], + } + ) + assert_valid_sql(result.sql) + assert "WHERE \n" not in result.sql + assert "WHERE " not in result.sql + + def test_whitespace_only_filter_ignored(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": [" "], + } + ) + assert_valid_sql(result.sql) + + def test_empty_mixed_with_real_filter(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["", "orders.status = 'completed'", " "], + } + ) + assert_valid_sql(result.sql) + assert "WHERE" in result.sql.upper() + assert "completed" in result.sql + assert len(result.resolved_plan.where_filters) == 1 + + +class TestNonexistentMeasure: + """Referencing a nonexistent pre-defined measure should error.""" + + def test_nonexistent_measure_errors(self): + from conftest import make_engine + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + with pytest.raises(ValueError, match="not a pre-defined measure"): + engine.query( + { + "measures": ["orders.nonexistent_measure"], + "dimensions": ["orders.status"], + } + ) + + def test_existing_measure_works(self): + from conftest import make_engine, assert_valid_sql + + engine = make_engine(ECOMMERCE_SOURCES_DICT) + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + } + ) + assert_valid_sql(result.sql) + assert result.resolved_plan.measures[0].name == "revenue" + + +# ── From test_spec_gaps.py: column visibility ──────────────────────── + + +class TestColumnVisibility: + """Querying hidden columns should raise errors.""" + + def test_hidden_column_rejected_in_dimension(self): + from conftest import make_engine + + sources = { + "users": { + "name": "users", + "table": "public.users", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + {"name": "ssn", "type": "string", "visibility": "hidden"}, + ], + }, + } + engine = make_engine(sources) + with pytest.raises(ValueError, match="[Hh]idden"): + engine.query( + { + "measures": ["count(users.id)"], + "dimensions": ["users.ssn"], + } + ) + + def test_hidden_column_rejected_in_measure(self): + from conftest import make_engine + + sources = { + "users": { + "name": "users", + "table": "public.users", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "secret_score", "type": "number", "visibility": "hidden"}, + ], + }, + } + engine = make_engine(sources) + with pytest.raises(ValueError, match="[Hh]idden"): + engine.query( + { + "measures": ["sum(users.secret_score)"], + "dimensions": ["users.id"], + } + ) + + def test_hidden_column_rejected_in_filter(self): + from conftest import make_engine + + sources = { + "users": { + "name": "users", + "table": "public.users", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "name", "type": "string"}, + { + "name": "internal_flag", + "type": "boolean", + "visibility": "hidden", + }, + ], + }, + } + engine = make_engine(sources) + with pytest.raises(ValueError, match="[Hh]idden"): + engine.query( + { + "measures": ["count(users.id)"], + "dimensions": ["users.name"], + "filters": ["users.internal_flag = true"], + } + ) + + def test_internal_column_allowed(self): + from conftest import make_engine, assert_valid_sql + + sources = { + "users": { + "name": "users", + "table": "public.users", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "debug_col", "type": "string", "visibility": "internal"}, + ], + }, + } + engine = make_engine(sources) + result = engine.query( + { + "measures": ["count(users.id)"], + "dimensions": ["users.debug_col"], + } + ) + assert result.sql + assert_valid_sql(result.sql) + + +# ── From test_edge_cases.py: derived measure cycles ───────────────── + + +class TestCyclicDerivedMeasures: + def test_direct_cycle_a_b(self): + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + with pytest.raises(ValueError, match="Circular dependency"): + engine.query( + { + "measures": [ + {"expr": "b * 2", "name": "a"}, + {"expr": "a + 1", "name": "b"}, + ], + "dimensions": ["orders.status"], + } + ) + + def test_three_way_cycle(self): + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + with pytest.raises(ValueError, match="Circular dependency"): + engine.query( + { + "measures": [ + {"expr": "c + sum(orders.amount)", "name": "a"}, + {"expr": "a + 1", "name": "b"}, + {"expr": "b + 1", "name": "c"}, + ], + "dimensions": ["orders.status"], + } + ) + + def test_self_referencing_measure(self): + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + with pytest.raises(ValueError, match="Circular dependency"): + engine.query( + { + "measures": [ + {"expr": "x + sum(orders.amount)", "name": "x"}, + ], + "dimensions": ["orders.status"], + } + ) + + def test_non_circular_derived_still_works(self): + from conftest import assert_valid_sql + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + result = engine.query( + { + "measures": [ + {"expr": "sum(orders.amount)", "name": "total"}, + {"expr": "sum(orders.cost)", "name": "cost"}, + {"expr": "total - cost", "name": "profit"}, + ], + "dimensions": ["orders.status"], + } + ) + assert "profit" in result.sql.lower() + assert_valid_sql(result.sql) + + +# ── From test_edge_cases.py: derived measure edge cases ────────────── + + +class TestDerivedMeasureEdgeCases: + def test_derived_measure_with_no_dependencies(self): + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + with pytest.raises(ValueError): + engine.query( + { + "measures": [{"expr": "42", "name": "constant"}], + "dimensions": ["orders.status"], + } + ) + + def test_multi_level_derived_chain(self): + from conftest import assert_valid_sql + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + result = engine.query( + { + "measures": [ + {"expr": "sum(orders.amount)", "name": "base"}, + {"expr": "base * 2", "name": "doubled"}, + {"expr": "doubled + base", "name": "combined"}, + ], + "dimensions": ["orders.status"], + } + ) + assert "combined" in result.sql + assert_valid_sql(result.sql) + + +# ── From test_edge_cases.py: filter fan-out detection ──────────────── + + +class TestFilterFanOutDetection: + def test_filter_only_fan_out_raises(self): + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + with pytest.raises(ValueError, match="one_to_many join"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["products.category = 'Electronics'"], + } + ) + + def test_filter_on_dimension_source_ok(self): + from conftest import assert_valid_sql + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["customers.segment = 'enterprise'"], + } + ) + assert "customers" in result.sql.lower() + assert_valid_sql(result.sql) + + def test_filter_on_same_source_ok(self): + from conftest import assert_valid_sql + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["orders.status != 'cancelled'"], + } + ) + assert_valid_sql(result.sql) + + +# ── From test_edge_cases.py: filter on dimension not in query ──────── + + +class TestFilterOnDimensionNotInQuery: + def test_filter_brings_in_new_source(self): + from conftest import assert_valid_sql + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["regions.name = 'LATAM'"], + } + ) + sql = result.sql + assert "regions" in sql.lower() + assert "LATAM" in sql + assert "customers" in sql.lower() + assert_valid_sql(sql) + + def test_filter_on_products_with_order_measures_raises(self): + from pathlib import Path + + engine = SemanticEngine( + str(Path(__file__).parent.parent / "sources" / "ecommerce"), + dialect="postgres", + ) + with pytest.raises(ValueError, match="one_to_many join"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["products.category = 'Electronics'"], + } + ) + + +class TestMeasureNameCollision: + """BUG 1: Same measure name across different sources gets qualified.""" + + def test_same_measure_name_qualified(self): + """Two sources each with 'revenue' measure -- names become source-qualified.""" + from conftest import make_engine + + sources = { + "hub": { + "name": "hub", + "table": "public.hub", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "segment", "type": "string"}, + ], + }, + "online_sales": { + "name": "online_sales", + "table": "public.online_sales", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + "measures": [{"name": "revenue", "expr": "sum(amount)"}], + }, + "store_sales": { + "name": "store_sales", + "table": "public.store_sales", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "hub_id", "type": "number"}, + {"name": "amount", "type": "number"}, + ], + "joins": [ + { + "to": "hub", + "on": "hub_id = hub.id", + "relationship": "many_to_one", + } + ], + "measures": [{"name": "revenue", "expr": "sum(amount)"}], + }, + } + engine = make_engine(sources) + plan = engine.plan_only( + { + "measures": ["online_sales.revenue", "store_sales.revenue"], + "dimensions": ["hub.segment"], + } + ) + measure_names = {m.name for m in plan.measures} + assert "online_sales_revenue" in measure_names + assert "store_sales_revenue" in measure_names + assert "revenue" not in measure_names + + def test_no_qualification_when_no_collision(self): + """Single source with 'revenue' -- name stays 'revenue'.""" + from conftest import make_engine + + sources = { + "orders": { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "status", "type": "string"}, + ], + "measures": [{"name": "revenue", "expr": "sum(amount)"}], + }, + } + engine = make_engine(sources) + plan = engine.plan_only( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + } + ) + assert plan.measures[0].name == "revenue" + + +class TestMeasureNameAsDimension: + """Fix 3: Measure names should not be accepted as dimensions.""" + + def test_measure_name_rejected_as_dimension(self): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ), + } + engine = SemanticEngine.from_sources(sources) + with pytest.raises(ValueError, match="does not exist"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.revenue"], + } + ) + + def test_measure_name_accepted_in_filter(self): + """Measure names in filters should still work (HAVING path).""" + from conftest import assert_valid_sql + + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="status", type="string"), + ], + measures=[MeasureDefinition(name="revenue", expr="sum(amount)")], + ), + } + engine = SemanticEngine.from_sources(sources) + result = engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + "filters": ["orders.revenue > 1000"], + } + ) + assert_valid_sql(result.sql) + + +class TestAliasValidation: + """Fix 4: Alias refs should be validated for column existence and visibility.""" + + def _build_alias_sources(self): + return { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="billing_customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="billing_customer_id = customers.id", + relationship="many_to_one", + alias="billing_customer", + ) + ], + ), + "customers": SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="name", type="string"), + SourceColumn( + name="secret_code", + type="string", + visibility=ColumnVisibility.HIDDEN, + ), + ], + ), + } + + def test_alias_nonexistent_column_rejected(self): + engine = SemanticEngine.from_sources(self._build_alias_sources()) + with pytest.raises(ValueError, match="does not exist"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["billing_customer.nonexistent_col"], + } + ) + + def test_alias_hidden_column_rejected(self): + engine = SemanticEngine.from_sources(self._build_alias_sources()) + with pytest.raises(ValueError, match="hidden"): + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["billing_customer.secret_code"], + } + ) + + def test_alias_valid_column_accepted(self): + from conftest import assert_valid_sql + + engine = SemanticEngine.from_sources(self._build_alias_sources()) + result = engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["billing_customer.name"], + } + ) + assert_valid_sql(result.sql) + + +class TestMultiSourceMeasure: + """Fix 5: Multi-source measure expressions should include all source refs.""" + + def test_multi_source_expr_includes_all_source_refs(self): + sources = { + "customers": SourceDefinition( + name="customers", + table="public.customers", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ), + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="customer_id", type="number"), + SourceColumn(name="amount", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ), + } + engine = SemanticEngine.from_sources(sources) + result = engine.query( + { + "measures": [ + { + "expr": "sum(orders.amount) / count(customers.id)", + "name": "amt_per_cust", + } + ], + "dimensions": ["customers.segment"], + } + ) + assert "orders" in result.resolved_plan.sources_used + assert "customers" in result.resolved_plan.sources_used + assert "JOIN" in result.sql.upper() + + +def test_derived_measure_with_bigquery_native_dependency(make_engine_factory): + """Derived measure referencing a BigQuery-native base measure must not degrade.""" + source = { + "name": "events", + "table": "events", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "user_id", "type": "number"}, + ], + "measures": [ + {"name": "distinct_users", "expr": "APPROX_COUNT_DISTINCT(user_id)"}, + { + "name": "thousand_users", + "expr": "distinct_users / 1000.0", + }, + ], + } + engine = make_engine_factory({"events": source}, dialect="bigquery") + result = engine.query( + {"measures": ["events.thousand_users"], "dimensions": [], "filters": []} + ) + assert "APPROX_COUNT_DISTINCT" in result.sql.upper(), ( + f"APPROX_COUNT_DISTINCT was rewritten away:\n{result.sql}" + ) diff --git a/python/klo-sl/tests/test_segments.py b/python/klo-sl/tests/test_segments.py new file mode 100644 index 00000000..6eddc937 --- /dev/null +++ b/python/klo-sl/tests/test_segments.py @@ -0,0 +1,293 @@ +"""Tests for named segments — reusable boolean predicates on a source. + +Segments are AND-ed into the measure's effective filter via the same CASE WHEN +pathway used by `measure.filter`. They never become a global WHERE clause. +""" + +from __future__ import annotations + +import pytest + +from .conftest import assert_valid_sql, make_engine + + +def _orders_source(**overrides): + base = { + "name": "orders", + "table": "public.orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "amount", "type": "number"}, + {"name": "is_paid", "type": "boolean"}, + {"name": "is_refunded", "type": "string"}, + {"name": "customer_id", "type": "number"}, + ], + "segments": [ + { + "name": "paid_non_refunded", + "expr": "is_paid = true and is_refunded = '0'", + "description": "Settled, not reversed.", + }, + ], + "measures": [ + { + "name": "total_revenue", + "expr": "sum(amount)", + "segments": ["paid_non_refunded"], + }, + ], + } + base.update(overrides) + return base + + +def _customers_source(**overrides): + base = { + "name": "customers", + "table": "public.customers", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "is_vip", "type": "boolean"}, + ], + "measures": [ + {"name": "customer_count", "expr": "count(distinct id)"}, + ], + } + base.update(overrides) + return base + + +# ── Composition + golden SQL shape ─────────────────────────────────── + + +class TestSegmentComposition: + def test_measure_segment_lands_in_case_when_wrap(self): + engine = make_engine({"orders": _orders_source()}) + result = engine.query({"measures": ["orders.total_revenue"]}) + assert_valid_sql(result.sql) + sql_upper = result.sql.upper() + # Filter must be inside CASE WHEN (the measure-filter pathway) + assert "CASE WHEN" in sql_upper + assert "is_paid" in result.sql.lower() + assert "is_refunded" in result.sql.lower() + # Should NOT show up as a global WHERE + # (a WHERE clause may exist for other reasons — assert no segment expr in it) + # Easiest: assert WHERE doesn't contain the segment's exact predicate. + # Split before/after first WHERE keyword if any. + assert "WHERE IS_PAID" not in sql_upper.replace(" = ", " = ") + + def test_measure_filter_and_segment_both_applied(self): + src = _orders_source() + src["measures"][0]["filter"] = "amount > 0" + engine = make_engine({"orders": src}) + result = engine.query({"measures": ["orders.total_revenue"]}) + assert_valid_sql(result.sql) + sql_lower = result.sql.lower() + # Both predicates appear inside the measure's CASE WHEN wrap + assert "amount > 0" in sql_lower + assert "is_paid" in sql_lower + assert "is_refunded" in sql_lower + # AND composition: ensure both halves are joined + assert " and " in sql_lower + + def test_query_time_segment_applies_to_measure(self): + # Measure has no measure-bound segment; segment is applied at query time. + src = _orders_source() + src["measures"] = [{"name": "raw_revenue", "expr": "sum(amount)"}] + engine = make_engine({"orders": src}) + result = engine.query( + { + "measures": ["orders.raw_revenue"], + "segments": ["orders.paid_non_refunded"], + } + ) + assert_valid_sql(result.sql) + sql_lower = result.sql.lower() + assert "case when" in sql_lower + assert "is_paid" in sql_lower + assert "is_refunded" in sql_lower + + def test_measure_and_query_segments_compose(self): + # Measure has paid_non_refunded; query adds 'high_value'. + src = _orders_source() + src["segments"].append( + {"name": "high_value", "expr": "amount >= 100"}, + ) + engine = make_engine({"orders": src}) + result = engine.query( + { + "measures": ["orders.total_revenue"], + "segments": ["orders.high_value"], + } + ) + assert_valid_sql(result.sql) + sql_lower = result.sql.lower() + # All three predicates present + assert "is_paid" in sql_lower + assert "is_refunded" in sql_lower + assert "amount >= 100" in sql_lower + + +# ── Multi-source query: scope is per-measure, not global ───────────── + + +class TestSegmentMultiSourceScope: + def test_segment_does_not_apply_to_other_source_measures(self): + # Query touches both orders and customers; segment is on orders only. + # Assert that the segment predicate does NOT show up in the + # customers CTE / WHERE on customers. + engine = make_engine( + { + "orders": _orders_source( + joins=[ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + measures=[ + {"name": "raw_revenue", "expr": "sum(amount)"}, + ], + ), + "customers": _customers_source(), + } + ) + result = engine.query( + { + "measures": [ + "orders.raw_revenue", + "customers.customer_count", + ], + "segments": ["orders.paid_non_refunded"], + } + ) + assert_valid_sql(result.sql) + sql_lower = result.sql.lower() + # Segment predicate appears (it landed on orders) + assert "is_paid" in sql_lower + # The customers measure's pre-aggregation CTE / clause must not be filtered by the segment. + # Heuristic: find each line that references count(distinct ... id) and assert no + # "is_paid" or "is_refunded" in the same CASE WHEN block. The simpler assertion + # is that there's no global WHERE applying the segment. + # We assert the segment doesn't appear inside an aggregate against the customers source. + # Concretely: count(...customers...) should not contain is_paid/is_refunded. + # Walk the SQL and find COUNT(DISTINCT ... ID) — that aggregate must be unfiltered. + import re + + count_aggs = re.findall( + r"COUNT\s*\(\s*DISTINCT[^()]*\)", result.sql, flags=re.IGNORECASE + ) + assert count_aggs, "expected at least one COUNT(DISTINCT ...) aggregate" + for agg in count_aggs: + assert "is_paid" not in agg.lower(), ( + f"customer_count aggregate must not be filtered by segment: {agg}" + ) + + +# ── Error cases ────────────────────────────────────────────────────── + + +class TestSegmentErrors: + def test_unknown_bare_name_in_measure_segments(self): + src = _orders_source() + src["measures"][0]["segments"] = ["does_not_exist"] + engine = make_engine({"orders": src}) + with pytest.raises(ValueError, match="unknown segment 'does_not_exist'"): + engine.query({"measures": ["orders.total_revenue"]}) + + def test_unknown_query_time_segment_name(self): + engine = make_engine({"orders": _orders_source()}) + with pytest.raises(ValueError, match="unknown segment 'does_not_exist'"): + engine.query( + { + "measures": ["orders.total_revenue"], + "segments": ["orders.does_not_exist"], + } + ) + + def test_unknown_query_time_segment_source(self): + engine = make_engine({"orders": _orders_source()}) + with pytest.raises(ValueError, match="unknown source 'no_such_source'"): + engine.query( + { + "measures": ["orders.total_revenue"], + "segments": ["no_such_source.foo"], + } + ) + + def test_query_time_segment_must_be_dotted(self): + engine = make_engine({"orders": _orders_source()}) + with pytest.raises(ValueError, match="dotted"): + engine.query( + { + "measures": ["orders.total_revenue"], + "segments": ["paid_non_refunded"], # missing source prefix + } + ) + + def test_no_op_query_time_segment_errors(self): + # Segment on customers, but no customers measure in the query. + engine = make_engine( + { + "orders": _orders_source( + joins=[ + { + "to": "customers", + "on": "customer_id = customers.id", + "relationship": "many_to_one", + } + ], + measures=[{"name": "raw_revenue", "expr": "sum(amount)"}], + ), + "customers": _customers_source( + segments=[{"name": "vips", "expr": "is_vip = true"}] + ), + } + ) + with pytest.raises(ValueError, match="no matching"): + engine.query( + { + "measures": ["orders.raw_revenue"], + "segments": ["customers.vips"], + } + ) + + +def test_bigquery_native_segment_referenced_by_measure(make_engine_factory): + """Segment authored in BigQuery dialect, referenced by a measure, + must not degrade the segment's native syntax when composed.""" + source = { + "name": "fct_orders", + "table": "fct_orders", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "status", "type": "string"}, + {"name": "ts", "type": "time"}, + ], + "segments": [ + {"name": "non_cancelled", "expr": "status != 'cancelled'"}, + { + "name": "last_30", + "expr": "ts >= timestamp(date_sub(current_date(), interval 30 day))", + }, + ], + "measures": [ + { + "name": "dau", + "expr": "count(distinct id)", + "segments": ["non_cancelled", "last_30"], + } + ], + } + engine = make_engine_factory({"fct_orders": source}, dialect="bigquery") + result = engine.query( + {"measures": ["fct_orders.dau"], "dimensions": [], "filters": []} + ) + sql = result.sql + assert "INTERVAL '30'" not in sql or "DAY" in sql.upper(), ( + f"INTERVAL unit lost in segment reference:\n{sql}" + ) diff --git a/python/klo-sl/tests/test_snowflake.py b/python/klo-sl/tests/test_snowflake.py new file mode 100644 index 00000000..6a9db403 --- /dev/null +++ b/python/klo-sl/tests/test_snowflake.py @@ -0,0 +1,470 @@ +"""Comprehensive Snowflake dialect tests covering all major SQL generation code paths.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +import sqlglot + +from semantic_layer.engine import SemanticEngine +from semantic_layer.models import SourceColumn, SourceDefinition + +SOURCES_DIR = str(Path(__file__).parent.parent / "sources" / "ecommerce") + + +def assert_valid_snowflake_sql(sql: str): + """Assert SQL parses as valid Snowflake SQL.""" + try: + result = sqlglot.parse(sql, read="snowflake") + assert result and all(r is not None for r in result) + except Exception as e: + pytest.fail(f"SQL is not valid Snowflake: {e}\n\nSQL:\n{sql}") + + +@pytest.fixture +def sf_engine(): + return SemanticEngine(SOURCES_DIR, dialect="snowflake") + + +@pytest.fixture +def chasm_engine(): + """Engine with hub + two fact tables for chasm trap / aggregate locality tests.""" + sources = { + "hub": SourceDefinition( + name="hub", + table="public.hub", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="segment", type="string"), + ], + ), + "fact_a": SourceDefinition( + name="fact_a", + table="public.fact_a", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + SourceColumn(name="created_at", type="time"), + ], + joins=[ + {"to": "hub", "on": "hub_id = hub.id", "relationship": "many_to_one"} + ], + ), + "fact_b": SourceDefinition( + name="fact_b", + table="public.fact_b", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="hub_id", type="number"), + SourceColumn(name="val", type="number"), + ], + joins=[ + {"to": "hub", "on": "hub_id = hub.id", "relationship": "many_to_one"} + ], + measures=[{"name": "total_val", "expr": "sum(val)", "filter": "val > 0"}], + ), + } + return SemanticEngine.from_sources(sources, dialect="snowflake") + + +# ── Basic query patterns ───────────────────────────────────────────── + + +class TestSnowflakeBasic: + def test_simple_single_source(self, sf_engine): + result = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + } + ) + sql = result.sql + assert result.dialect == "snowflake" + assert_valid_snowflake_sql(sql) + assert "GROUP BY" in sql + + def test_cross_source_m2o(self, sf_engine): + result = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment", "regions.name"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "JOIN" in sql + + def test_predefined_measure_with_filter(self, sf_engine): + result = sf_engine.query( + { + "measures": ["orders.revenue"], + "dimensions": ["orders.status"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "CASE WHEN" in sql + assert "<>" in sql # sqlglot transpiles != to <> + assert "'refunded'" in sql + + def test_derived_measures(self, sf_engine): + result = sf_engine.query( + { + "measures": [ + {"expr": "sum(orders.amount)", "name": "total_rev"}, + {"expr": "sum(orders.cost)", "name": "total_cost"}, + {"expr": "total_rev - total_cost", "name": "profit"}, + ], + "dimensions": ["customers.segment"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "profit" in sql.lower() + assert "total_rev" in sql + assert "total_cost" in sql + + def test_include_empty_false(self, sf_engine): + result_left = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": True, + } + ) + result_inner = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.segment"], + "include_empty": False, + } + ) + assert_valid_snowflake_sql(result_left.sql) + assert_valid_snowflake_sql(result_inner.sql) + assert "LEFT JOIN" in result_left.sql.upper() + assert "LEFT JOIN" not in result_inner.sql.upper() + + +# ── Time granularity ───────────────────────────────────────────────── + + +class TestSnowflakeTimeGranularity: + @pytest.mark.parametrize("granularity", ["day", "week", "month", "quarter", "year"]) + def test_date_trunc_uppercase(self, sf_engine, granularity): + result = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": [ + {"field": "orders.created_at", "granularity": granularity} + ], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + # Snowflake DATE_TRUNC uses uppercase granularity + assert f"DATE_TRUNC('{granularity.upper()}'" in sql + + +# ── Filters ────────────────────────────────────────────────────────── + + +class TestSnowflakeFilters: + def test_having_filter(self, sf_engine): + result = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": ["sum(orders.amount) > 10000"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "HAVING" in sql + assert "10000" in sql + + def test_where_and_having(self, sf_engine): + result = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "filters": [ + "orders.status != 'cancelled'", + "sum(orders.amount) > 1000", + ], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "WHERE" in sql + assert "HAVING" in sql + + +# ── SQL sources / CTEs ─────────────────────────────────────────────── + + +class TestSnowflakeCTE: + def test_sql_source_as_cte(self, sf_engine): + result = sf_engine.query( + { + "measures": ["avg(churn_risk.score)"], + "dimensions": ["churn_risk.customer_type"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "WITH" in sql + assert "churn_risk" in sql + + def test_cross_source_with_sql_source(self, sf_engine): + result = sf_engine.query( + { + "measures": ["avg(churn_risk.score)"], + "dimensions": ["regions.name"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "WITH" in sql + assert "JOIN" in sql + + def test_sql_source_with_datediff(self): + """DATEDIFF in SQL source must survive transpilation (not become AGE).""" + sources = { + "cohorts": SourceDefinition( + name="cohorts", + sql="SELECT id, DATEDIFF(WEEK, start_date, end_date) AS n FROM t", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="n", type="number"), + ], + ), + } + engine = SemanticEngine.from_sources(sources, dialect="snowflake") + result = engine.query({"measures": ["sum(cohorts.n)"], "dimensions": []}) + assert_valid_snowflake_sql(result.sql) + assert "DATEDIFF" in result.sql.upper() + assert "AGE" not in result.sql.upper() + + def test_sql_source_with_datediff_in_ctes(self): + """DATEDIFF inside inner CTEs must survive CTE promotion.""" + sources = { + "retention": SourceDefinition( + name="retention", + sql=( + "WITH spine AS (" + " SELECT DISTINCT cohort_week," + " DATEDIFF(WEEK, cohort_week, period_week) AS n" + " FROM adopters" + ") SELECT cohort_week, n, COUNT(*) AS cnt FROM spine GROUP BY 1, 2" + ), + grain=["cohort_week", "n"], + columns=[ + SourceColumn(name="cohort_week", type="time"), + SourceColumn(name="n", type="number"), + SourceColumn(name="cnt", type="number"), + ], + ), + } + engine = SemanticEngine.from_sources(sources, dialect="snowflake") + result = engine.query( + {"measures": ["sum(retention.cnt)"], "dimensions": ["retention.n"]} + ) + assert_valid_snowflake_sql(result.sql) + assert "DATEDIFF" in result.sql.upper() + # Inner CTE should be promoted with prefix + assert "retention__spine" in result.sql + + +# ── Aggregate functions ────────────────────────────────────────────── + + +class TestSnowflakeAggregateFunctions: + def test_median_percentile_cont(self, sf_engine): + result = sf_engine.query( + { + "measures": [{"expr": "median(orders.amount)", "name": "median_order"}], + "dimensions": ["orders.status"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "PERCENTILE_CONT" in sql + assert "WITHIN GROUP" in sql + + def test_percentile(self, sf_engine): + result = sf_engine.query( + { + "measures": [{"expr": "percentile(orders.amount, 0.9)", "name": "p90"}], + "dimensions": ["orders.status"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "PERCENTILE_CONT" in sql + assert "0.9" in sql + + def test_count_distinct(self, sf_engine): + result = sf_engine.query( + { + "measures": ["count_distinct(orders.customer_id)"], + "dimensions": ["orders.status"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "COUNT(DISTINCT" in sql + + +# ── Aggregate locality / chasm traps ───────────────────────────────── + + +class TestSnowflakeAggregateLocality: + def test_chasm_trap_full_join(self, chasm_engine): + result = chasm_engine.query( + { + "measures": ["sum(fact_a.val)", "sum(fact_b.val)"], + "dimensions": ["hub.segment"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "FULL JOIN" in sql.upper() + assert "COALESCE" in sql.upper() + assert "fact_a_agg" in sql + assert "fact_b_agg" in sql + + def test_chasm_trap_predefined_filtered_measure(self, chasm_engine): + result = chasm_engine.query( + { + "measures": ["sum(fact_a.val)", "fact_b.total_val"], + "dimensions": ["hub.segment"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "CASE WHEN" in sql + assert "total_val" in sql + + def test_chasm_trap_derived_measure(self, chasm_engine): + result = chasm_engine.query( + { + "measures": [ + {"expr": "sum(fact_a.val)", "name": "total_a"}, + {"expr": "sum(fact_b.val)", "name": "total_b"}, + {"expr": "total_a + total_b", "name": "grand_total"}, + ], + "dimensions": ["hub.segment"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "grand_total" in sql + assert "COALESCE" in sql.upper() + + def test_chasm_trap_derived_ratio_nullif(self, chasm_engine): + result = chasm_engine.query( + { + "measures": [ + {"expr": "sum(fact_a.val)", "name": "total_a"}, + {"expr": "sum(fact_b.val)", "name": "total_b"}, + {"expr": "total_a / total_b", "name": "ratio"}, + ], + "dimensions": ["hub.segment"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "NULLIF" in sql.upper() + assert "ratio" in sql + + def test_chasm_trap_having(self, chasm_engine): + result = chasm_engine.query( + { + "measures": ["sum(fact_a.val)", "sum(fact_b.val)"], + "dimensions": ["hub.segment"], + "filters": ["sum(fact_a.val) > 100"], + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "100" in sql + # HAVING in locality mode becomes WHERE on outer query + assert "WHERE" in sql + + +# ── ORDER BY + LIMIT ───────────────────────────────────────────────── + + +class TestSnowflakeOrderByLimit: + def test_order_by_desc_with_limit(self, sf_engine): + result = sf_engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["orders.status"], + "order_by": [{"field": "sum(orders.amount)", "direction": "desc"}], + "limit": 10, + } + ) + sql = result.sql + assert_valid_snowflake_sql(sql) + assert "DESC" in sql.upper() + assert "LIMIT 10" in sql + + +# ── Snowflake reserved words as identifiers ────────────────────────── + + +class TestSnowflakeReservedWords: + """Snowflake-specific reserved words (sample, qualify) must be quoted.""" + + @pytest.mark.parametrize("source_name", ["sample", "qualify"]) + def test_snowflake_reserved_word_as_source_name(self, source_name): + sources = { + source_name: SourceDefinition( + name=source_name, + table=f"public.{source_name}s", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="val", type="number"), + ], + ), + } + engine = SemanticEngine.from_sources(sources, dialect="snowflake") + result = engine.query( + { + "measures": [f"sum({source_name}.val)"], + "dimensions": [], + } + ) + assert_valid_snowflake_sql(result.sql) + assert "SUM" in result.sql.upper() + + @pytest.mark.parametrize("col_name", ["sample", "qualify"]) + def test_snowflake_reserved_word_as_column_name(self, col_name): + sources = { + "orders": SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name=col_name, type="number"), + ], + ), + } + engine = SemanticEngine.from_sources(sources, dialect="snowflake") + result = engine.query( + { + "measures": [f"sum(orders.{col_name})"], + "dimensions": [], + } + ) + assert_valid_snowflake_sql(result.sql) + assert "SUM" in result.sql.upper() diff --git a/python/klo-sl/tests/test_sql_join_coverage.py b/python/klo-sl/tests/test_sql_join_coverage.py new file mode 100644 index 00000000..739bd345 --- /dev/null +++ b/python/klo-sl/tests/test_sql_join_coverage.py @@ -0,0 +1,296 @@ +from __future__ import annotations + +from semantic_layer.engine import SemanticEngine +from semantic_layer.models import ( + JoinDeclaration, + SourceColumn, + SourceDefinition, +) +from semantic_layer.sql_table_extractor import ( + extract_table_refs, + normalize_table, + ref_matches_source_table, +) + + +def _table_src( + name: str, table: str, columns: list[str] | None = None +) -> SourceDefinition: + cols = columns or ["id"] + return SourceDefinition( + name=name, + table=table, + grain=["id"], + columns=[SourceColumn(name=c, type="number") for c in cols], + ) + + +def _sql_src( + name: str, + sql: str, + columns: list[str] | None = None, + joins: list[JoinDeclaration] | None = None, +) -> SourceDefinition: + cols = columns or ["id"] + return SourceDefinition( + name=name, + sql=sql, + grain=["id"], + columns=[SourceColumn(name=c, type="number") for c in cols], + joins=joins or [], + ) + + +class TestExtractTableRefs: + def test_simple_select(self): + refs = extract_table_refs("select id from analytics.marts.listings") + assert refs == [("analytics", "marts", "listings")] + + def test_join_clause(self): + sql = """ + select l.id from analytics.marts.listings l + join analytics.marts.accounts a on l.account_id = a.id + """ + assert extract_table_refs(sql) == [ + ("analytics", "marts", "listings"), + ("analytics", "marts", "accounts"), + ] + + def test_cte_alias_skipped(self): + sql = """ + with d as (select id from staging.shipments) + select * from d join staging.items_shipments i on d.id = i.shipment_id + """ + # `d` is a CTE — must not appear. `staging.shipments` and + # `staging.items_shipments` both should. + refs = extract_table_refs(sql) + assert ("staging", "shipments") in refs + assert ("staging", "items_shipments") in refs + assert all(ref != ("d",) for ref in refs) + + def test_dedup(self): + sql = """ + select * from analytics.marts.listings l1 + join analytics.marts.listings l2 on l1.id = l2.id + """ + assert extract_table_refs(sql) == [("analytics", "marts", "listings")] + + def test_unparseable_returns_empty(self): + assert extract_table_refs("not valid sql !!!") == [] + + +class TestRefMatching: + def test_normalize_strips_quotes_and_lowercases(self): + assert normalize_table('"ANALYTICS"."MARTS"."LISTINGS"') == ( + "analytics", + "marts", + "listings", + ) + + def test_full_match(self): + assert ref_matches_source_table( + ("analytics", "marts", "listings"), "ANALYTICS.MARTS.LISTINGS" + ) + + def test_two_part_suffix_matches_three_part_table(self): + assert ref_matches_source_table( + ("marts", "listings"), "ANALYTICS.MARTS.LISTINGS" + ) + + def test_bare_name_matches_three_part_table(self): + assert ref_matches_source_table(("listings",), "ANALYTICS.MARTS.LISTINGS") + + def test_db_mismatch_blocks_match(self): + assert not ref_matches_source_table( + ("staging", "listings"), "ANALYTICS.MARTS.LISTINGS" + ) + + def test_longer_ref_does_not_match_shorter_table(self): + assert not ref_matches_source_table( + ("analytics", "marts", "listings"), "marts.listings" + ) + + +class TestSqlJoinCoverage: + def _build_engine( + self, + listings_table: str = "ANALYTICS.MARTS.LISTINGS", + accounts_table: str = "ANALYTICS.MARTS.ACCOUNTS", + new_source_sql: str | None = None, + new_source_joins: list[JoinDeclaration] | None = None, + ) -> SemanticEngine: + listings = _table_src("LISTINGS", listings_table) + accounts = _table_src("ACCOUNTS", accounts_table) + sources = {"LISTINGS": listings, "ACCOUNTS": accounts} + if new_source_sql is not None: + sources["my_source"] = _sql_src( + "my_source", + sql=new_source_sql, + joins=new_source_joins, + ) + return SemanticEngine.from_sources(sources) + + def test_coverage_gap_emitted_as_error(self): + sql = """ + select l.id, a.name + from ANALYTICS.MARTS.LISTINGS l + join ANALYTICS.MARTS.ACCOUNTS a on l.account_id = a.id + """ + engine = self._build_engine(new_source_sql=sql, new_source_joins=[]) + + report = engine.validate(recently_touched={"my_source"}) + + assert not report.valid + coverage_errors = [e for e in report.errors if "my_source" in e] + assert any("LISTINGS" in e and "ACCOUNTS" in e for e in coverage_errors), ( + f"Expected coverage error mentioning LISTINGS and ACCOUNTS, got: {report.errors}" + ) + + def test_declared_join_satisfies_coverage(self): + sql = """ + select l.id, a.name + from ANALYTICS.MARTS.LISTINGS l + join ANALYTICS.MARTS.ACCOUNTS a on l.account_id = a.id + """ + joins = [ + JoinDeclaration( + to="LISTINGS", + on="my_source.listing_id = LISTINGS.id", + relationship="many_to_one", + ), + JoinDeclaration( + to="ACCOUNTS", + on="my_source.account_id = ACCOUNTS.id", + relationship="many_to_one", + ), + ] + engine = self._build_engine(new_source_sql=sql, new_source_joins=joins) + + report = engine.validate(recently_touched={"my_source"}) + + coverage_errors = [ + e for e in report.errors if "my_source" in e and "joins[]" in e + ] + assert coverage_errors == [] + + def test_partial_coverage_lists_only_missing(self): + sql = """ + select l.id, a.name + from ANALYTICS.MARTS.LISTINGS l + join ANALYTICS.MARTS.ACCOUNTS a on l.account_id = a.id + """ + joins = [ + JoinDeclaration( + to="LISTINGS", + on="my_source.listing_id = LISTINGS.id", + relationship="many_to_one", + ), + ] + engine = self._build_engine(new_source_sql=sql, new_source_joins=joins) + + report = engine.validate(recently_touched={"my_source"}) + + coverage_errors = [ + e for e in report.errors if "my_source" in e and "ACCOUNTS" in e + ] + assert coverage_errors, f"Expected ACCOUNTS gap, got: {report.errors}" + assert all("LISTINGS]" not in e for e in coverage_errors), ( + f"LISTINGS should be satisfied: {report.errors}" + ) + + def test_unmapped_table_does_not_trigger_coverage_error(self): + # SQL references staging.foo which has no manifest entry — the + # check is silent. (The agent is still expected to write a wiki + # note, but that's outside the validator's scope.) + sql = "select id from staging.foo" + engine = self._build_engine(new_source_sql=sql) + + report = engine.validate(recently_touched={"my_source"}) + + assert not any("my_source" in e and "joins[]" in e for e in report.errors), ( + f"Unmapped table must not be flagged: {report.errors}" + ) + + def test_quoted_identifiers_match(self): + sql = ( + 'select * from "ANALYTICS"."MARTS"."LISTINGS" l ' + 'join "ANALYTICS"."MARTS"."ACCOUNTS" a on l.account_id = a.id' + ) + engine = self._build_engine(new_source_sql=sql, new_source_joins=[]) + + report = engine.validate(recently_touched={"my_source"}) + + assert any( + "my_source" in e and "LISTINGS" in e and "ACCOUNTS" in e + for e in report.errors + ), f"Quoted identifiers should match: {report.errors}" + + def test_cte_self_reference_not_flagged(self): + sql = """ + with d as (select id from ANALYTICS.MARTS.LISTINGS) + select * from d + """ + # LISTINGS is referenced inside the CTE — that still counts and + # must be flagged (the manifest entry exists). `d` itself must + # NOT be flagged as missing. + engine = self._build_engine(new_source_sql=sql, new_source_joins=[]) + + report = engine.validate(recently_touched={"my_source"}) + + coverage_errors = [e for e in report.errors if "my_source" in e] + assert any("LISTINGS" in e for e in coverage_errors) + assert not any("'d'" in e or " d " in e for e in coverage_errors), ( + f"CTE alias 'd' must not be flagged: {coverage_errors}" + ) + + def test_two_part_suffix_match(self): + # Source's SQL references `MARTS.LISTINGS` (2-part) — should match + # the 3-part manifest entry `ANALYTICS.MARTS.LISTINGS`. + sql = "select id from MARTS.LISTINGS" + engine = self._build_engine(new_source_sql=sql, new_source_joins=[]) + + report = engine.validate(recently_touched={"my_source"}) + + assert any("my_source" in e and "LISTINGS" in e for e in report.errors), ( + f"Two-part suffix should match: {report.errors}" + ) + + def test_not_recently_touched_means_no_check(self): + # Same buggy SQL as above, but the source isn't in + # `recently_touched` — coverage check skipped. + sql = """ + select l.id from ANALYTICS.MARTS.LISTINGS l + join ANALYTICS.MARTS.ACCOUNTS a on l.account_id = a.id + """ + engine = self._build_engine(new_source_sql=sql, new_source_joins=[]) + + report = engine.validate(recently_touched=None) + + coverage_errors = [ + e for e in report.errors if "my_source" in e and "joins[]" in e + ] + assert coverage_errors == [] + + def test_table_only_source_skipped(self): + # A source with `table:` (no SQL) cannot be coverage-checked. + listings = _table_src("LISTINGS", "ANALYTICS.MARTS.LISTINGS") + bare = _table_src("bare", "public.bare", columns=["id"]) + engine = SemanticEngine.from_sources({"LISTINGS": listings, "bare": bare}) + + report = engine.validate(recently_touched={"bare"}) + + assert not any("bare" in e and "joins[]" in e for e in report.errors), ( + f"Table-only source must not be flagged: {report.errors}" + ) + + def test_self_reference_not_flagged(self): + # If `my_source` somehow names its own table in the manifest, we + # shouldn't flag itself. + my_source = _sql_src("my_source", sql="select id from public.my_source") + # Not realistic for SQL sources, but make sure self-refs are + # filtered defensively. + engine = SemanticEngine.from_sources({"my_source": my_source}) + + report = engine.validate(recently_touched={"my_source"}) + + assert not any("my_source" in e and "joins[]" in e for e in report.errors) diff --git a/python/klo-sl/tests/test_table_identifier_parser.py b/python/klo-sl/tests/test_table_identifier_parser.py new file mode 100644 index 00000000..ef18e990 --- /dev/null +++ b/python/klo-sl/tests/test_table_identifier_parser.py @@ -0,0 +1,77 @@ +from semantic_layer.table_identifier_parser import ( + ParseTableIdentifierItem, + parse_table_identifier_batch, + parse_table_identifier_one, +) + + +def test_parse_table_identifier_supported_dialects_and_aliases() -> None: + response = parse_table_identifier_batch( + [ + ParseTableIdentifierItem( + key="pg", + sql_table_name="public.orders AS o", + dialect="postgres", + ), + ParseTableIdentifierItem( + key="bq", + sql_table_name="analytics.orders", + dialect="bigquery", + ), + ParseTableIdentifierItem( + key="sf", + sql_table_name="RAW.PUBLIC.ORDERS", + dialect="snowflake", + ), + ] + ) + + assert response["pg"].ok is True + assert response["pg"].schema_ == "public" + assert response["pg"].name == "orders" + assert response["pg"].canonical_table == "public.orders" + assert response["bq"].ok is True + assert response["bq"].schema_ == "analytics" + assert response["bq"].name == "orders" + assert response["sf"].ok is True + assert response["sf"].catalog == "RAW" + assert response["sf"].schema_ == "PUBLIC" + assert response["sf"].name == "ORDERS" + + +def test_parse_table_identifier_rejects_non_physical_inputs() -> None: + assert ( + parse_table_identifier_one("${orders.SQL_TABLE_NAME}", "postgres").reason + == "looker_template_unresolved" + ) + assert ( + parse_table_identifier_one("(select * from public.orders)", "postgres").reason + == "derived_table_not_supported" + ) + assert ( + parse_table_identifier_one( + "public.orders join public.users on true", "postgres" + ).reason + == "multiple_table_references" + ) + assert ( + parse_table_identifier_one("public.orders", "not-a-dialect").reason + == "unsupported_dialect" + ) + + +def test_parse_table_identifier_preserves_batch_keys() -> None: + response = parse_table_identifier_batch( + [ + ParseTableIdentifierItem( + key="z", sql_table_name="public.z", dialect="postgres" + ), + ParseTableIdentifierItem( + key="a", sql_table_name="public.a", dialect="postgres" + ), + ] + ) + + assert list(response) == ["z", "a"] + assert response["z"].name == "z" + assert response["a"].name == "a" diff --git a/python/klo-sl/tests/test_tpch.py b/python/klo-sl/tests/test_tpch.py new file mode 100644 index 00000000..a276afa9 --- /dev/null +++ b/python/klo-sl/tests/test_tpch.py @@ -0,0 +1,360 @@ +"""TPC-H schema tests: loading, graph, planning, and SQL execution against DuckDB.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from semantic_layer.engine import SemanticEngine +from semantic_layer.graph import JoinGraph +from semantic_layer.loader import SourceLoader +from semantic_layer.models import SourceDefinition + +TPCH_DIR = Path(__file__).parent.parent / "sources" / "tpch" +TPCH_TABLES = [ + "region", + "nation", + "supplier", + "customer", + "part", + "partsupp", + "orders", + "lineitem", +] + +try: + import duckdb + + HAS_DUCKDB = True +except ImportError: + HAS_DUCKDB = False + + +# ── Fixtures ───────────────────────────────────────────────────────── + + +@pytest.fixture(scope="module") +def sources() -> dict[str, SourceDefinition]: + return SourceLoader(TPCH_DIR).load_all() + + +@pytest.fixture(scope="module") +def graph(sources: dict[str, SourceDefinition]) -> JoinGraph: + g = JoinGraph(sources) + g.build() + return g + + +@pytest.fixture(scope="module") +def engine() -> SemanticEngine: + return SemanticEngine(str(TPCH_DIR), dialect="duckdb") + + +@pytest.fixture(scope="module") +def tpch_conn(): + if not HAS_DUCKDB: + pytest.skip("duckdb not installed") + conn = duckdb.connect() + conn.execute("INSTALL tpch; LOAD tpch") + conn.execute("CALL dbgen(sf=0.01)") + conn.execute("CREATE SCHEMA IF NOT EXISTS public") + for t in TPCH_TABLES: + conn.execute(f"CREATE VIEW public.{t} AS SELECT * FROM main.{t}") + return conn + + +# ── Loader Tests ───────────────────────────────────────────────────── + + +class TestTpchLoader: + def test_all_sources_loaded(self, sources): + assert set(sources.keys()) == set(TPCH_TABLES) + + def test_lineitem_columns(self, sources): + li = sources["lineitem"] + col_names = {c.name for c in li.columns} + assert "l_orderkey" in col_names + assert "l_extendedprice" in col_names + assert "l_shipdate" in col_names + assert len(li.columns) == 16 + + def test_lineitem_composite_grain(self, sources): + assert sources["lineitem"].grain == ["l_orderkey", "l_linenumber"] + + def test_partsupp_composite_grain(self, sources): + assert sources["partsupp"].grain == ["ps_partkey", "ps_suppkey"] + + def test_lineitem_measures(self, sources): + measure_names = {m.name for m in sources["lineitem"].measures} + assert "revenue" in measure_names + assert "returned_revenue" in measure_names + assert "charge" in measure_names + assert len(sources["lineitem"].measures) == 8 + + def test_returned_revenue_has_filter(self, sources): + m = next( + m for m in sources["lineitem"].measures if m.name == "returned_revenue" + ) + assert m.filter == "l_returnflag = 'R'" + + def test_lineitem_joins(self, sources): + join_targets = {j.to for j in sources["lineitem"].joins} + assert join_targets == {"orders", "part", "supplier"} + + def test_region_is_leaf(self, sources): + assert sources["region"].joins == [] + assert sources["region"].measures == [] + + def test_orders_measures(self, sources): + measure_names = {m.name for m in sources["orders"].measures} + assert measure_names == {"order_count", "total_price", "avg_order_value"} + + +# ── Graph Tests ────────────────────────────────────────────────────── + + +class TestTpchGraph: + def test_all_sources_in_graph(self, graph): + assert set(graph.adjacency.keys()) >= set(TPCH_TABLES) + + def test_lineitem_to_region_path(self, graph): + """Shortest path: lineitem → supplier → nation → region (3 hops).""" + path = graph.find_path("lineitem", "region") + assert path is not None + source_chain = [path.edges[0].from_source] + [e.to_source for e in path.edges] + assert "lineitem" in source_chain + assert "region" in source_chain + assert len(path.edges) == 3 + + def test_lineitem_to_part_direct(self, graph): + path = graph.find_path("lineitem", "part") + assert path is not None + assert len(path.edges) == 1 + + def test_part_to_supplier_via_lineitem(self, graph): + """Shortest path: part → lineitem → supplier (2 hops, shorter than via partsupp).""" + path = graph.find_path("part", "supplier") + assert path is not None + assert len(path.edges) == 2 + + def test_partsupp_bridges_part_and_supplier(self, graph): + """partsupp has direct edges to both part and supplier.""" + path_to_part = graph.find_path("partsupp", "part") + path_to_supplier = graph.find_path("partsupp", "supplier") + assert path_to_part is not None and len(path_to_part.edges) == 1 + assert path_to_supplier is not None and len(path_to_supplier.edges) == 1 + + def test_graph_is_single_component(self, graph): + components = graph.find_components() + assert len(components) == 1 + + +# ── Plan-only Tests (no DuckDB needed) ─────────────────────────────── + + +class TestTpchPlanning: + def test_q1_plan(self, engine): + plan = engine.plan_only( + { + "measures": ["lineitem.revenue"], + "dimensions": ["lineitem.l_returnflag", "lineitem.l_linestatus"], + } + ) + assert plan.anchor_source == "lineitem" + assert len(plan.sources_used) == 1 + + def test_q5_plan_multi_hop(self, engine): + plan = engine.plan_only( + { + "measures": ["lineitem.revenue"], + "dimensions": ["nation.n_name"], + "filters": ["region.r_name = 'ASIA'"], + } + ) + assert "lineitem" in plan.sources_used + assert "nation" in plan.sources_used + assert "region" in plan.sources_used + + def test_filtered_measure_plan(self, engine): + plan = engine.plan_only( + { + "measures": ["lineitem.returned_revenue"], + "dimensions": ["customer.c_name"], + } + ) + assert any(m.filter for m in plan.measures) + + def test_time_granularity_plan(self, engine): + plan = engine.plan_only( + { + "measures": ["lineitem.revenue"], + "dimensions": [{"field": "orders.o_orderdate", "granularity": "month"}], + } + ) + col_names = [c.name for c in plan.columns] + # Column may be named "o_orderdate" with granularity metadata + assert "o_orderdate" in col_names + dim_col = next(c for c in plan.columns if c.name == "o_orderdate") + assert dim_col.granularity == "month" + + def test_suggest_valid_query(self, engine): + result = engine.suggest( + { + "measures": ["lineitem.revenue"], + "dimensions": ["lineitem.l_returnflag"], + } + ) + assert result["success"] is True + + def test_suggest_missing_source(self, engine): + result = engine.suggest( + { + "measures": ["sum(lineitem.l_quantity)"], + "dimensions": ["nonexistent.col"], + } + ) + assert result["success"] is False + + +# ── Execution Tests (require DuckDB) ──────────────────────────────── + + +@pytest.mark.skipif(not HAS_DUCKDB, reason="duckdb not installed") +class TestTpchExecution: + def test_q1_pricing_summary(self, tpch_conn, engine): + result = engine.query( + { + "measures": [ + "lineitem.revenue", + "lineitem.total_quantity", + "lineitem.avg_discount", + "lineitem.line_count", + ], + "dimensions": ["lineitem.l_returnflag", "lineitem.l_linestatus"], + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) > 0 + # TPC-H has exactly 4 combinations: A/F, N/F, N/O, R/F + assert len(rows) <= 4 + + def test_q5_revenue_by_nation_asia(self, tpch_conn, engine): + """4-hop join with filter: lineitem→supplier→nation→region.""" + result = engine.query( + { + "measures": ["lineitem.revenue"], + "dimensions": ["nation.n_name"], + "filters": ["region.r_name = 'ASIA'"], + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) > 0 + # ASIA has 5 nations + assert len(rows) <= 5 + + def test_q3_revenue_by_month(self, tpch_conn, engine): + """DATE_TRUNC + multi-table filter.""" + result = engine.query( + { + "measures": ["lineitem.revenue"], + "dimensions": [{"field": "orders.o_orderdate", "granularity": "month"}], + "filters": ["customer.c_mktsegment = 'BUILDING'"], + "limit": 12, + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) > 0 + assert len(rows) <= 12 + + def test_q10_returned_revenue(self, tpch_conn, engine): + """Filtered measure with CASE WHEN.""" + result = engine.query( + { + "measures": ["lineitem.returned_revenue"], + "dimensions": ["customer.c_name"], + "limit": 10, + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) > 0 + assert len(rows) <= 10 + + def test_order_count(self, tpch_conn, engine): + result = engine.query( + { + "measures": ["orders.order_count"], + "dimensions": ["orders.o_orderstatus"], + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) > 0 + # Sum of counts should equal total orders at SF=0.01 + total = sum(r[1] for r in rows) + assert total == 15000 # SF=0.01 → 15000 orders + + def test_supply_cost_by_nation(self, tpch_conn, engine): + """Bridge table path: partsupp → supplier → nation.""" + result = engine.query( + { + "measures": ["partsupp.total_supply_cost"], + "dimensions": ["nation.n_name"], + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) == 25 # 25 nations + + def test_avg_order_value(self, tpch_conn, engine): + result = engine.query( + { + "measures": ["orders.avg_order_value"], + "dimensions": ["customer.c_mktsegment"], + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) == 5 # 5 market segments + # avg values should be positive + for row in rows: + assert row[1] > 0 + + def test_lineitem_charge(self, tpch_conn, engine): + """Complex expression: sum(price * (1 - discount) * (1 + tax)).""" + result = engine.query( + { + "measures": ["lineitem.charge"], + "dimensions": ["lineitem.l_returnflag"], + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) > 0 + for row in rows: + assert row[1] > 0 + + def test_order_by_desc(self, tpch_conn, engine): + result = engine.query( + { + "measures": ["lineitem.revenue"], + "dimensions": ["nation.n_name"], + "order_by": [{"field": "lineitem.revenue", "direction": "desc"}], + "limit": 5, + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) == 5 + # Revenue should be descending + revenues = [r[1] for r in rows] + assert revenues == sorted(revenues, reverse=True) + + def test_multiple_filters(self, tpch_conn, engine): + result = engine.query( + { + "measures": ["lineitem.revenue"], + "dimensions": ["orders.o_orderpriority"], + "filters": [ + "customer.c_mktsegment = 'BUILDING'", + "nation.n_name = 'FRANCE'", + ], + } + ) + rows = tpch_conn.execute(result.sql).fetchall() + assert len(rows) > 0 diff --git a/python/klo-sl/tests/test_validator.py b/python/klo-sl/tests/test_validator.py new file mode 100644 index 00000000..376b5381 --- /dev/null +++ b/python/klo-sl/tests/test_validator.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +import pytest + +from semantic_layer.engine import SemanticEngine +from semantic_layer.models import ( + JoinDeclaration, + SourceColumn, + SourceDefinition, +) + + +def _src( + name: str, + columns: list[str] | None = None, + grain: list[str] | None = None, + joins: list[JoinDeclaration] | None = None, +) -> SourceDefinition: + """Minimal-boilerplate source factory for validator tests.""" + columns = columns or ["id"] + grain = grain or ["id"] + return SourceDefinition( + name=name, + table=f"public.{name}", + grain=grain, + columns=[SourceColumn(name=c, type="number") for c in columns], + joins=joins or [], + ) + + +class TestValidatorValid: + def test_valid_connected_model(self): + orders = _src( + "orders", + columns=["id", "customer_id"], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + customers = _src("customers") + engine = SemanticEngine.from_sources({"orders": orders, "customers": customers}) + + report = engine.validate() + + assert report.valid + assert report.errors == [] + assert report.warnings == [] + + +class TestOrphanJoinTarget: + def test_orphan_join_target_is_error(self): + orders = _src( + "orders", + columns=["id", "customer_id"], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + # `customers` deliberately not defined + engine = SemanticEngine.from_sources({"orders": orders}) + + report = engine.validate() + + assert not report.valid + assert any( + "orders" in e and "customers" in e and "not defined" in e + for e in report.errors + ) + + def test_query_with_orphan_target_raises_before_sql(self): + """Query path must reject orphan targets, not silently emit SQL + that references the undefined table name (which could read a real + unmodeled table sharing that name).""" + orders = _src( + "orders", + columns=["id", "amount", "customer_id"], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + engine = SemanticEngine.from_sources({"orders": orders}) + + with pytest.raises(ValueError) as exc: + engine.query( + { + "measures": ["sum(orders.amount)"], + "dimensions": ["customers.id"], + } + ) + msg = str(exc.value) + assert "orders" in msg + assert "customers" in msg + assert "not defined" in msg + + +class TestInvalidGrain: + def test_grain_column_missing_from_columns(self): + bad = _src( + "bad", + columns=["id"], + grain=["nonexistent_col"], + ) + engine = SemanticEngine.from_sources({"bad": bad}) + + report = engine.validate() + + assert not report.valid + assert any("bad" in e and "nonexistent_col" in e for e in report.errors) + + +class TestDisconnectedComponents: + def test_two_components_produce_warning_not_error(self): + a = _src("a") + b = _src("b") + engine = SemanticEngine.from_sources({"a": a, "b": b}) + + report = engine.validate() + + assert report.valid + assert report.errors == [] + assert len(report.warnings) >= 1 + disconnection = next( + (w for w in report.warnings if "disconnected components" in w), None + ) + assert disconnection is not None + assert "2 disconnected components" in disconnection + assert "Component 1" in disconnection + assert "Component 2" in disconnection + + def test_aliases_do_not_create_false_disconnection(self): + """Two aliases of the same base source must count as one component + with the base, not as separate islands.""" + orders = SourceDefinition( + name="orders", + table="public.orders", + grain=["id"], + columns=[ + SourceColumn(name="id", type="number"), + SourceColumn(name="amount", type="number"), + SourceColumn(name="billing_customer_id", type="number"), + SourceColumn(name="shipping_customer_id", type="number"), + ], + joins=[ + JoinDeclaration( + to="customers", + alias="billing_customer", + on="billing_customer_id = billing_customer.id", + relationship="many_to_one", + ), + JoinDeclaration( + to="customers", + alias="shipping_customer", + on="shipping_customer_id = shipping_customer.id", + relationship="many_to_one", + ), + ], + ) + customers = _src("customers", columns=["id", "segment"]) + engine = SemanticEngine.from_sources({"orders": orders, "customers": customers}) + + report = engine.validate() + + assert report.valid + assert not any("disconnected components" in w for w in report.warnings) + + def test_large_component_is_truncated(self): + many = {f"s{i}": _src(f"s{i}") for i in range(10)} + # Join them sequentially so they form one big component + for i in range(9): + many[f"s{i}"].joins.append( + JoinDeclaration( + to=f"s{i + 1}", + on=f"id = s{i + 1}.id", + relationship="many_to_one", + ) + ) + many["island"] = _src("island") + engine = SemanticEngine.from_sources(many) + + report = engine.validate() + + disconnection = next( + w for w in report.warnings if "disconnected components" in w + ) + assert "(10 sources)" in disconnection + assert "... (+8 more)" in disconnection + assert "(1 sources): island" in disconnection + + def test_singleton_component_warning_names_recently_touched_source(self): + orders = _src( + "orders", + columns=["id", "customer_id"], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + customers = _src("customers") + lonely_source = _src("lonely_source") + engine = SemanticEngine.from_sources( + { + "orders": orders, + "customers": customers, + "lonely_source": lonely_source, + } + ) + + report = engine.validate(recently_touched={"lonely_source"}) + + assert report.per_source_warnings["lonely_source"] + msg = report.per_source_warnings["lonely_source"][0] + assert "lonely_source" in msg + assert "singleton" in msg.lower() or "no joins" in msg.lower() + + def test_no_per_source_warning_for_connected_recently_touched_source(self): + orders = _src( + "orders", + columns=["id", "customer_id"], + joins=[ + JoinDeclaration( + to="customers", + on="customer_id = customers.id", + relationship="many_to_one", + ) + ], + ) + customers = _src("customers") + engine = SemanticEngine.from_sources({"orders": orders, "customers": customers}) + + report = engine.validate(recently_touched={"orders"}) + + assert report.per_source_warnings.get("orders", []) == [] + + def test_recently_touched_default_none_preserves_existing_behavior(self): + lonely = _src("lonely") + other = _src("other") + engine = SemanticEngine.from_sources({"lonely": lonely, "other": other}) + + report = engine.validate() + + assert any("disconnected components" in w for w in report.warnings) + assert report.per_source_warnings == {} + + +class TestEcommerceSmoke: + def test_ecommerce_fixtures_validate_cleanly(self, ecommerce_sources): + engine = SemanticEngine.from_sources(ecommerce_sources) + + report = engine.validate() + + assert report.valid, f"Expected clean report, got errors: {report.errors}" + assert report.warnings == [], f"Expected no warnings, got: {report.warnings}" + + +class TestMultipleIssuesCollected: + def test_errors_and_warnings_coexist(self): + bad_grain = _src("bad_grain", columns=["id"], grain=["missing"]) + orphan_target = _src( + "with_orphan", + columns=["id", "fk"], + joins=[ + JoinDeclaration( + to="doesnt_exist", + on="fk = doesnt_exist.id", + relationship="many_to_one", + ) + ], + ) + isolated = _src("isolated") + engine = SemanticEngine.from_sources( + { + "bad_grain": bad_grain, + "with_orphan": orphan_target, + "isolated": isolated, + } + ) + + report = engine.validate() + + assert not report.valid + assert len(report.errors) >= 2 + assert any("missing" in e for e in report.errors) + assert any("doesnt_exist" in e for e in report.errors) + assert len(report.warnings) >= 1 diff --git a/release-policy.json b/release-policy.json new file mode 100644 index 00000000..0f0a7c73 --- /dev/null +++ b/release-policy.json @@ -0,0 +1,38 @@ +{ + "schemaVersion": 1, + "releaseMode": "ci-artifact-only", + "npm": { + "publish": false, + "registry": null, + "packages": [ + "@klo/cli", + "@klo/connector-bigquery", + "@klo/connector-clickhouse", + "@klo/connector-mysql", + "@klo/connector-postgres", + "@klo/connector-posthog", + "@klo/connector-snowflake", + "@klo/connector-sqlite", + "@klo/connector-sqlserver", + "@klo/context", + "@klo/llm" + ] + }, + "python": { + "publish": false, + "repository": null, + "packages": ["klo-sl", "klo-daemon"] + }, + "publishedPackageSmoke": { + "packageName": null, + "version": "latest", + "registry": null + }, + "requiredBeforePublishing": [ + "Choose npm registry and package visibility.", + "Choose Python package repository.", + "Choose public release versions.", + "Configure registry credentials outside source control.", + "Choose release tag and provenance policy." + ] +} diff --git a/scripts/acquire-public-benchmark-fixtures.mjs b/scripts/acquire-public-benchmark-fixtures.mjs new file mode 100644 index 00000000..e7c0d131 --- /dev/null +++ b/scripts/acquire-public-benchmark-fixtures.mjs @@ -0,0 +1,60 @@ +#!/usr/bin/env node +import { createHash } from 'node:crypto'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const repoRoot = path.resolve(scriptDir, '..'); +const fixturesRoot = path.join(repoRoot, 'packages', 'context', 'test', 'fixtures', 'relationship-benchmarks'); +const manifestPath = path.join(scriptDir, 'public-benchmark-manifest.json'); + +export async function acquirePublicBenchmarkFixtures(options = {}) { + const fetchImpl = options.fetch ?? fetch; + const writeFile = options.writeFile ?? writeFileSync; + const readFile = options.readFile ?? readFileSync; + const fileExists = options.fileExists ?? existsSync; + const ensureDir = options.ensureDir ?? ((dir) => mkdirSync(dir, { recursive: true })); + const manifestPathOverride = options.manifestPath ?? manifestPath; + const fixturesRootOverride = options.fixturesRoot ?? fixturesRoot; + const log = options.log ?? console.log; + + const manifest = JSON.parse(readFile(manifestPathOverride, 'utf8')); + const results = []; + for (const fixture of manifest.fixtures) { + const fixtureDir = path.join(fixturesRootOverride, fixture.id); + const dest = path.join(fixtureDir, 'data.sqlite'); + ensureDir(fixtureDir); + if (fileExists(dest)) { + const existingHash = createHash('sha256').update(readFile(dest)).digest('hex'); + if (fixture.sha256 && existingHash === fixture.sha256) { + log(`[skip] ${fixture.id}: hash matches`); + results.push({ id: fixture.id, action: 'skip', sha256: existingHash }); + continue; + } + log(`[refresh] ${fixture.id}: hash mismatch (${existingHash}), re-downloading from ${fixture.url}`); + } else { + log(`[download] ${fixture.id} from ${fixture.url}`); + } + const res = await fetchImpl(fixture.url); + if (!res.ok) { + throw new Error(`Failed to download ${fixture.id} from ${fixture.url}: HTTP ${res.status}`); + } + const buf = Buffer.from(await res.arrayBuffer()); + const hash = createHash('sha256').update(buf).digest('hex'); + if (fixture.sha256 && hash !== fixture.sha256) { + throw new Error(`Hash mismatch for ${fixture.id}: expected ${fixture.sha256}, got ${hash}`); + } + writeFile(dest, buf); + log(`[done] ${fixture.id}: sha256=${hash} bytes=${buf.length}`); + results.push({ id: fixture.id, action: 'downloaded', sha256: hash, bytes: buf.length }); + } + return results; +} + +if (import.meta.url === `file://${process.argv[1]}`) { + acquirePublicBenchmarkFixtures().catch((err) => { + console.error(err); + process.exit(1); + }); +} diff --git a/scripts/acquire-public-benchmark-fixtures.test.mjs b/scripts/acquire-public-benchmark-fixtures.test.mjs new file mode 100644 index 00000000..8b8d871a --- /dev/null +++ b/scripts/acquire-public-benchmark-fixtures.test.mjs @@ -0,0 +1,168 @@ +import assert from 'node:assert/strict'; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; +import { describe, it } from 'node:test'; +import { acquirePublicBenchmarkFixtures } from './acquire-public-benchmark-fixtures.mjs'; + +function tempRoot() { + return mkdtempSync(path.join(tmpdir(), 'klo-acquire-')); +} + +function writeManifest(dir, fixtures) { + const p = path.join(dir, 'manifest.json'); + writeFileSync(p, JSON.stringify({ fixtures }), 'utf8'); + return p; +} + +describe('acquirePublicBenchmarkFixtures', () => { + it('downloads, hashes, and writes data.sqlite for each manifest entry', async () => { + const root = tempRoot(); + try { + const fixturesRoot = path.join(root, 'fixtures'); + const manifestPath = writeManifest(root, [ + { id: 'foo_fixture', url: 'https://example.invalid/foo', sha256: '' }, + ]); + const calls = []; + const result = await acquirePublicBenchmarkFixtures({ + manifestPath, + fixturesRoot, + fetch: async (url) => { + calls.push(url); + return { + ok: true, + status: 200, + async arrayBuffer() { + return Buffer.from('hello-sqlite'); + }, + }; + }, + log: () => {}, + }); + assert.equal(calls.length, 1); + assert.equal(calls[0], 'https://example.invalid/foo'); + assert.equal(result.length, 1); + assert.equal(result[0].action, 'downloaded'); + const dest = path.join(fixturesRoot, 'foo_fixture', 'data.sqlite'); + assert.ok(existsSync(dest)); + assert.equal(readFileSync(dest, 'utf8'), 'hello-sqlite'); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + it('skips when existing file matches the manifest sha256', async () => { + const root = tempRoot(); + try { + const fixturesRoot = path.join(root, 'fixtures'); + const fixtureDir = path.join(fixturesRoot, 'foo_fixture'); + const dest = path.join(fixtureDir, 'data.sqlite'); + const { mkdirSync } = await import('node:fs'); + mkdirSync(fixtureDir, { recursive: true }); + writeFileSync(dest, Buffer.from('hello-sqlite')); + const expectedHash = '52a3e2d435cdf97a44eca3dd4882d008b9ef73b63bc75476d320fdd665c812c0'; // pragma: allowlist secret + const manifestPath = writeManifest(root, [ + { id: 'foo_fixture', url: 'https://example.invalid/foo', sha256: expectedHash }, + ]); + let fetchCalls = 0; + const result = await acquirePublicBenchmarkFixtures({ + manifestPath, + fixturesRoot, + fetch: async () => { + fetchCalls += 1; + throw new Error('should not fetch'); + }, + log: () => {}, + }); + assert.equal(result[0].action, 'skip'); + assert.equal(fetchCalls, 0); + assert.equal(readFileSync(dest, 'utf8'), 'hello-sqlite'); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + it('throws when the downloaded payload sha256 does not match the manifest', async () => { + const root = tempRoot(); + try { + const fixturesRoot = path.join(root, 'fixtures'); + const manifestPath = writeManifest(root, [ + { + id: 'foo_fixture', + url: 'https://example.invalid/foo', + sha256: '0000000000000000000000000000000000000000000000000000000000000000', + }, + ]); + await assert.rejects( + acquirePublicBenchmarkFixtures({ + manifestPath, + fixturesRoot, + fetch: async () => ({ + ok: true, + status: 200, + async arrayBuffer() { + return Buffer.from('different-payload'); + }, + }), + log: () => {}, + }), + /Hash mismatch/, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + it('surfaces non-OK HTTP statuses with the fixture id', async () => { + const root = tempRoot(); + try { + const fixturesRoot = path.join(root, 'fixtures'); + const manifestPath = writeManifest(root, [ + { id: 'foo_fixture', url: 'https://example.invalid/foo', sha256: '' }, + ]); + await assert.rejects( + acquirePublicBenchmarkFixtures({ + manifestPath, + fixturesRoot, + fetch: async () => ({ + ok: false, + status: 404, + async arrayBuffer() { + return Buffer.alloc(0); + }, + }), + log: () => {}, + }), + /foo_fixture .* HTTP 404/, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + + it('pins every checked-in public benchmark fixture download in the manifest', () => { + const manifestPath = new URL('./public-benchmark-manifest.json', import.meta.url); + const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')); + const fixtureIds = manifest.fixtures.map((fixture) => fixture.id).sort(); + + assert.deepEqual(fixtureIds, [ + 'adventureworkslt_with_declared_metadata', + 'chinook_with_declared_metadata', + 'northwind_with_declared_metadata', + 'sakila_with_declared_metadata', + ]); + + const adventureWorks = manifest.fixtures.find( + (fixture) => fixture.id === 'adventureworkslt_with_declared_metadata', + ); + assert.ok(adventureWorks); + assert.equal(adventureWorks.displayName, 'AdventureWorksLT (SQLite, declared metadata)'); + assert.equal( + adventureWorks.url, + 'https://github.com/nuitsjp/AdventureWorks-for-SQLite/releases/download/Release-1_0_0/AdventureWorksLT.db', + ); + assert.equal(adventureWorks.sha256, 'f1a87a31f4efb5654f57a3b1ca47fac338972ceb7553673d66ea0bd9d55a7008'); // pragma: allowlist secret + assert.equal(adventureWorks.license, 'MIT'); + assert.equal(adventureWorks.source, 'https://github.com/nuitsjp/AdventureWorks-for-SQLite'); + }); +}); diff --git a/scripts/adventureworks-oltp-source.json b/scripts/adventureworks-oltp-source.json new file mode 100644 index 00000000..e8fa52d2 --- /dev/null +++ b/scripts/adventureworks-oltp-source.json @@ -0,0 +1,13 @@ +{ + "id": "adventureworks_oltp_with_declared_metadata", + "displayName": "AdventureWorks OLTP (SQL Server 2022, declared metadata)", + "installScriptUrl": "https://github.com/microsoft/sql-server-samples/releases/download/adventureworks/AdventureWorks-oltp-install-script.zip", + "installScriptSha256": "58962e94ea386ef7cd3d8a08211bfd42a79d9b81bdd68fd4b6b0051de6c5bd42", "_allowlist": "// pragma: allowlist secret", + "source": "https://github.com/microsoft/sql-server-samples/tree/master/samples/databases/adventure-works", + "license": "MIT", + "expectedTables": 71, + "expectedPrimaryKeys": 71, + "expectedForeignKeys": 90, + "expectedCsvFiles": 69, + "notes": "Full OLTP AdventureWorks corpus. Do not replace with AdventureWorksLT; the LT SQLite source is already covered by adventureworkslt_with_declared_metadata." +} diff --git a/scripts/adventureworks-oltp-source.test.mjs b/scripts/adventureworks-oltp-source.test.mjs new file mode 100644 index 00000000..f2677715 --- /dev/null +++ b/scripts/adventureworks-oltp-source.test.mjs @@ -0,0 +1,25 @@ +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { describe, it } from 'node:test'; + +describe('AdventureWorks OLTP benchmark source metadata', () => { + it('pins the full OLTP source instead of the lightweight LT source', () => { + const source = JSON.parse(readFileSync(new URL('./adventureworks-oltp-source.json', import.meta.url), 'utf8')); + + assert.equal(source.id, 'adventureworks_oltp_with_declared_metadata'); + assert.equal(source.displayName, 'AdventureWorks OLTP (SQL Server 2022, declared metadata)'); + assert.equal( + source.installScriptUrl, + 'https://github.com/microsoft/sql-server-samples/releases/download/adventureworks/AdventureWorks-oltp-install-script.zip', + ); + assert.equal(source.installScriptSha256, '58962e94ea386ef7cd3d8a08211bfd42a79d9b81bdd68fd4b6b0051de6c5bd42'); // pragma: allowlist secret + assert.equal(source.license, 'MIT'); + assert.equal(source.source, 'https://github.com/microsoft/sql-server-samples/tree/master/samples/databases/adventure-works'); + assert.equal(source.expectedTables, 71); + assert.equal(source.expectedPrimaryKeys, 71); + assert.equal(source.expectedForeignKeys, 90); + assert.equal(source.expectedCsvFiles, 69); + assert.match(source.notes, /full OLTP/i); + assert.doesNotMatch(JSON.stringify(source), /AdventureWorksLT\.db|Release-1_0_0|nuitsjp/); + }); +}); diff --git a/scripts/anti-fixture-conditional.test.mjs b/scripts/anti-fixture-conditional.test.mjs new file mode 100644 index 00000000..d7d678a9 --- /dev/null +++ b/scripts/anti-fixture-conditional.test.mjs @@ -0,0 +1,66 @@ +import assert from 'node:assert/strict'; +import { readdir, readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +const KLO_ROOT = new URL('../', import.meta.url); + +const RELATIONSHIP_RUNTIME_SOURCES = Object.freeze([ + 'packages/context/src/scan/relationship-benchmarks.ts', + 'packages/context/src/scan/relationship-budget.ts', + 'packages/context/src/scan/relationship-candidates.ts', + 'packages/context/src/scan/relationship-composite-candidates.ts', + 'packages/context/src/scan/relationship-graph-resolver.ts', + 'packages/context/src/scan/relationship-locality.ts', + 'packages/context/src/scan/relationship-name-similarity.ts', + 'packages/context/src/scan/relationship-discovery.ts', + 'packages/context/src/scan/relationship-profiling.ts', + 'packages/context/src/scan/relationship-scoring.ts', + 'packages/context/src/scan/relationship-validation.ts', +]); + +async function checkedInFixtureIds() { + const fixtureRoot = new URL('packages/context/test/fixtures/relationship-benchmarks/', KLO_ROOT); + const entries = await readdir(fixtureRoot, { withFileTypes: true }); + return entries + .filter((entry) => entry.isDirectory()) + .map((entry) => entry.name) + .sort((left, right) => left.localeCompare(right)); +} + +async function readRuntimeSources() { + return Promise.all( + RELATIONSHIP_RUNTIME_SOURCES.map(async (relativePath) => ({ + relativePath, + source: await readFile(new URL(relativePath, KLO_ROOT), 'utf8'), + })), + ); +} + +describe('relationship evidence-fusion source guardrails', () => { + it('keeps runtime relationship modules free of fixture-id conditionals', async () => { + const fixtureIds = await checkedInFixtureIds(); + const sources = await readRuntimeSources(); + const hits = []; + + for (const { relativePath, source } of sources) { + for (const fixtureId of fixtureIds) { + if (source.includes(fixtureId)) { + hits.push(`${relativePath}: ${fixtureId}`); + } + } + } + + assert.deepEqual(hits, []); + }); + + it('keeps runtime relationship modules free of length-threshold drop-all cliffs', async () => { + const sources = await readRuntimeSources(); + const dropAllPattern = /if\s*\([^)]*\.length\s*>\s*\d+[^)]*\)\s*(?:\{\s*)?return\s*\[\];/gs; + const hits = sources.flatMap(({ relativePath, source }) => { + const matches = Array.from(source.matchAll(dropAllPattern)); + return matches.map((match) => `${relativePath}: ${match[0].replace(/\s+/g, ' ').trim()}`); + }); + + assert.deepEqual(hits, []); + }); +}); diff --git a/scripts/build-adventureworks-oltp-fixture.mjs b/scripts/build-adventureworks-oltp-fixture.mjs new file mode 100644 index 00000000..52c0f3db --- /dev/null +++ b/scripts/build-adventureworks-oltp-fixture.mjs @@ -0,0 +1,260 @@ +#!/usr/bin/env node +import { mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { createRequire } from 'node:module'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { expectedLinksFromSnapshot, normalizeSqliteType } from './build-benchmark-snapshot.mjs'; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const repoRoot = path.resolve(scriptDir, '..'); +const require = createRequire(new URL('../packages/context/package.json', import.meta.url)); +const Database = require('better-sqlite3'); +const { stringify: yamlStringify } = require('yaml'); + +const fixtureId = 'adventureworks_oltp_with_declared_metadata'; +const defaultFixtureDir = path.join( + repoRoot, + 'packages', + 'context', + 'test', + 'fixtures', + 'relationship-benchmarks', + fixtureId, +); + +function quoteSqliteIdentifier(value) { + return `"${String(value).replaceAll('"', '""')}"`; +} + +function quoteSqlServerIdentifier(value) { + return `[${String(value).replaceAll(']', ']]')}]`; +} + +function flattenTableName(table) { + return `${table.db}.${table.name}`; +} + +function sqliteDimensionType(nativeType, columnName) { + const type = normalizeSqliteType(nativeType); + const name = columnName.toLowerCase(); + if (/date|time/.test(name) || /date|time/.test(String(nativeType).toLowerCase())) { + return 'time'; + } + if (type === 'integer' || type === 'real') { + return 'number'; + } + return 'string'; +} + +function sqliteValue(value) { + if (value === undefined) { + return null; + } + if (value instanceof Date) { + return value.toISOString(); + } + if (typeof value === 'boolean') { + return value ? 1 : 0; + } + if (Buffer.isBuffer(value)) { + return value; + } + if (typeof value === 'object' && value !== null) { + return JSON.stringify(value); + } + return value; +} + +export function snapshotForSqliteBenchmark(sqlServerSnapshot) { + const tableNameByOriginal = new Map( + sqlServerSnapshot.tables + .filter((table) => table.kind === 'table') + .map((table) => [`${table.db}.${table.name}`, flattenTableName(table)]), + ); + + return { + connectionId: fixtureId, + driver: 'sqlite', + extractedAt: sqlServerSnapshot.extractedAt, + scope: { catalogs: ['main'], schemas: ['main'] }, + metadata: { + ...sqlServerSnapshot.metadata, + source_driver: 'sqlserver', + source_connection_id: sqlServerSnapshot.connectionId, + source_database: sqlServerSnapshot.metadata?.database ?? null, + }, + tables: sqlServerSnapshot.tables + .filter((table) => table.kind === 'table') + .map((table) => ({ + catalog: null, + db: 'main', + name: flattenTableName(table), + kind: 'table', + comment: table.comment ?? null, + estimatedRows: table.estimatedRows ?? 0, + columns: table.columns.map((column) => ({ + name: column.name, + nativeType: column.nativeType, + normalizedType: normalizeSqliteType(column.nativeType), + dimensionType: sqliteDimensionType(column.nativeType, column.name), + nullable: column.nullable, + primaryKey: column.primaryKey, + comment: column.comment ?? null, + })), + foreignKeys: (table.foreignKeys ?? []).flatMap((fk) => { + const originalTarget = `${fk.toDb}.${fk.toTable}`; + const targetName = tableNameByOriginal.get(originalTarget); + if (!targetName) { + return []; + } + return [ + { + fromColumn: fk.fromColumn, + toCatalog: null, + toDb: 'main', + toTable: targetName, + toColumn: fk.toColumn, + constraintName: fk.constraintName, + }, + ]; + }), + })), + }; +} + +export function writeAdventureWorksFixtureConfig(fixtureDir) { + const fixture = { + id: fixtureId, + name: 'AdventureWorks OLTP (SQL Server 2022, declared metadata)', + tier: 'row_bearing', + thresholdEligible: true, + defaultModes: [ + 'metadata_present', + 'declared_pks_and_declared_fks_removed', + 'declared_pks_removed', + 'declared_fks_removed', + 'profiling_disabled', + 'validation_disabled', + 'llm_disabled', + 'embeddings_disabled', + ], + }; + writeFileSync(path.join(fixtureDir, 'fixture.yaml'), yamlStringify(fixture), 'utf8'); +} + +export function writeAdventureWorksSnapshotAndLabels(fixtureDir, sqliteSnapshot) { + writeFileSync(path.join(fixtureDir, 'snapshot.json'), `${JSON.stringify(sqliteSnapshot, null, 2)}\n`, 'utf8'); + writeFileSync(path.join(fixtureDir, 'expected-links.yaml'), yamlStringify(expectedLinksFromSnapshot(sqliteSnapshot)), 'utf8'); +} + +export async function copySqlServerRowsToSqlite(input) { + const { connector, sourceSnapshot, sqliteSnapshot, fixtureDir } = input; + const sqlitePath = path.join(fixtureDir, 'data.sqlite'); + rmSync(sqlitePath, { force: true }); + const db = new Database(sqlitePath); + try { + db.pragma('journal_mode = WAL'); + db.exec('BEGIN'); + for (const sourceTable of sourceSnapshot.tables.filter((table) => table.kind === 'table')) { + const sqliteTable = sqliteSnapshot.tables.find((table) => table.name === flattenTableName(sourceTable)); + if (!sqliteTable) { + continue; + } + const columns = sqliteTable.columns; + const createColumns = columns + .map((column) => `${quoteSqliteIdentifier(column.name)} ${normalizeSqliteType(column.nativeType).toUpperCase()}`) + .join(', '); + db.exec(`CREATE TABLE ${quoteSqliteIdentifier(sqliteTable.name)} (${createColumns})`); + + const selectSql = `SELECT * FROM ${quoteSqlServerIdentifier(sourceTable.db)}.${quoteSqlServerIdentifier(sourceTable.name)}`; + const result = await connector.executeReadOnly( + { + connectionId: sourceSnapshot.connectionId, + sql: selectSql, + maxRows: Math.max(sourceTable.estimatedRows ?? 0, 1000000), + }, + { runId: `adventureworks-oltp-copy:${sqliteTable.name}` }, + ); + const bindSlots = columns.map(() => '?').join(', '); + const insert = db.prepare( + `INSERT INTO ${quoteSqliteIdentifier(sqliteTable.name)} (${columns + .map((column) => quoteSqliteIdentifier(column.name)) + .join(', ')}) VALUES (${bindSlots})`, + ); + for (const row of result.rows) { + insert.run(row.map(sqliteValue)); + } + } + db.exec('COMMIT'); + } catch (error) { + db.exec('ROLLBACK'); + throw error; + } finally { + db.close(); + } +} + +export async function buildAdventureWorksOltpFixture(input) { + const fixtureDir = input.fixtureDir ?? defaultFixtureDir; + mkdirSync(fixtureDir, { recursive: true }); + + const sourceSnapshot = await input.connector.introspect( + { connectionId: input.connectionId, driver: 'sqlserver' }, + { runId: 'adventureworks-oltp-fixture:introspect' }, + ); + const sqliteSnapshot = snapshotForSqliteBenchmark(sourceSnapshot); + + writeAdventureWorksFixtureConfig(fixtureDir); + writeAdventureWorksSnapshotAndLabels(fixtureDir, sqliteSnapshot); + await copySqlServerRowsToSqlite({ connector: input.connector, sourceSnapshot, sqliteSnapshot, fixtureDir }); + + return { + fixtureDir, + tableCount: sqliteSnapshot.tables.length, + expected: expectedLinksFromSnapshot(sqliteSnapshot), + }; +} + +async function main() { + const url = process.env.KLO_ADVENTUREWORKS_SQLSERVER_URL; + if (!url) { + throw new Error( + 'Set KLO_ADVENTUREWORKS_SQLSERVER_URL to a read-only SQL Server URL for a full AdventureWorks OLTP database before running this script.', + ); + } + + const source = JSON.parse(readFileSync(path.join(scriptDir, 'adventureworks-oltp-source.json'), 'utf8')); + const { KloSqlServerScanConnector } = await import('../packages/connector-sqlserver/dist/index.js'); + const connector = new KloSqlServerScanConnector({ + connectionId: fixtureId, + connection: { + driver: 'sqlserver', + url, + schemas: ['dbo', 'HumanResources', 'Person', 'Production', 'Purchasing', 'Sales'], + readonly: true, + trustServerCertificate: true, + }, + now: () => new Date('2026-05-07T00:00:00.000Z'), + }); + + const result = await buildAdventureWorksOltpFixture({ connector, connectionId: fixtureId }); + if (result.tableCount !== source.expectedTables) { + throw new Error(`Expected ${source.expectedTables} tables, generated ${result.tableCount}`); + } + if (result.expected.expectedPks.length !== source.expectedPrimaryKeys) { + throw new Error(`Expected ${source.expectedPrimaryKeys} PK entries, generated ${result.expected.expectedPks.length}`); + } + if (result.expected.expectedLinks.length !== source.expectedForeignKeys) { + throw new Error(`Expected ${source.expectedForeignKeys} FK links, generated ${result.expected.expectedLinks.length}`); + } + console.log( + `[built] ${fixtureId}: ${result.tableCount} tables, ${result.expected.expectedPks.length} PKs, ${result.expected.expectedLinks.length} FKs`, + ); +} + +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch((err) => { + console.error(err); + process.exit(1); + }); +} diff --git a/scripts/build-benchmark-snapshot.mjs b/scripts/build-benchmark-snapshot.mjs new file mode 100644 index 00000000..718c997a --- /dev/null +++ b/scripts/build-benchmark-snapshot.mjs @@ -0,0 +1,267 @@ +#!/usr/bin/env node +import { existsSync, readFileSync, writeFileSync } from 'node:fs'; +import { createRequire } from 'node:module'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const require = createRequire(new URL('../packages/context/package.json', import.meta.url)); +const Database = require('better-sqlite3'); +const { stringify: yamlStringify } = require('yaml'); + +const TIME_PATTERNS = /(_at$|_date$|^date_|_time$|^timestamp_)/i; +const TIME_TYPES = /(date|time|timestamp)/i; + +function quoteIdentifier(value) { + return `"${String(value).replaceAll('"', '""')}"`; +} + +export function normalizeSqliteType(rawType) { + const t = (rawType || '').toLowerCase().trim(); + if (!t) { + return 'text'; + } + if (/int/.test(t)) { + return 'integer'; + } + if (/char|text|clob/.test(t)) { + return 'text'; + } + if (/real|float|double|numeric|decimal/.test(t)) { + return 'real'; + } + if (/blob/.test(t)) { + return 'blob'; + } + if (/bool/.test(t)) { + return 'integer'; + } + if (/date|time/.test(t)) { + return 'text'; + } + return 'text'; +} + +export function dimensionTypeFor(rawType, columnName) { + const t = (rawType || '').toLowerCase(); + const n = (columnName || '').toLowerCase(); + if (TIME_PATTERNS.test(n) || TIME_TYPES.test(t)) { + return 'time'; + } + if (/bool/.test(t)) { + return 'boolean'; + } + if (/int|real|float|double|numeric|decimal/.test(t)) { + return 'number'; + } + return 'string'; +} + +function tableNames(db) { + return db + .prepare("SELECT name FROM sqlite_master WHERE type = 'table' AND name NOT LIKE 'sqlite_%' ORDER BY name") + .all() + .map((row) => row.name); +} + +function columnsFor(db, table) { + return db + .prepare(`PRAGMA table_info(${quoteIdentifier(table)})`) + .all() + .map((c) => ({ + cid: c.cid, + name: c.name, + nativeType: c.type ?? '', + nullable: !c.notnull, + primaryKey: c.pk > 0, + pkOrdinal: c.pk > 0 ? c.pk : null, + })); +} + +function rawForeignKeys(db, table) { + return db.prepare(`PRAGMA foreign_key_list(${quoteIdentifier(table)})`).all(); +} + +function rowCount(db, table) { + const row = db.prepare(`SELECT COUNT(*) AS c FROM ${quoteIdentifier(table)}`).get(); + return Number(row?.c ?? 0); +} + +function groupedForeignKeys(rawFks, table) { + const byId = new Map(); + for (const row of rawFks) { + const list = byId.get(row.id) ?? []; + list.push(row); + byId.set(row.id, list); + } + const out = []; + for (const rows of byId.values()) { + rows.sort((a, b) => a.seq - b.seq); + out.push({ + from: rows.map((r) => r.from), + toTable: rows[0].table, + to: rows.map((r) => r.to), + constraintName: `${table}_${rows.map((r) => r.from).join('_')}_fkey`, + }); + } + return out; +} + +function groupedSnapshotForeignKeys(table) { + const byKey = new Map(); + for (const fk of table.foreignKeys ?? []) { + const key = fk.constraintName ?? `${table.name}:${fk.toTable}:${fk.toColumn}`; + const rows = byKey.get(key) ?? []; + rows.push(fk); + byKey.set(key, rows); + } + return [...byKey.values()].map((rows) => ({ + fromTable: table.name, + fromColumns: rows.map((row) => row.fromColumn), + toTable: rows[0].toTable, + toColumns: rows.map((row) => row.toColumn), + relationship: 'many_to_one', + })); +} + +export function expectedLinksFromSnapshot(snapshot) { + const expectedPks = []; + const expectedLinks = []; + + for (const table of snapshot.tables ?? []) { + if (table.kind !== 'table') { + continue; + } + const pkColumns = (table.columns ?? []).filter((column) => column.primaryKey).map((column) => column.name); + if (pkColumns.length) { + expectedPks.push({ table: table.name, columns: pkColumns }); + } + expectedLinks.push(...groupedSnapshotForeignKeys(table)); + } + + expectedPks.sort((left, right) => left.table.localeCompare(right.table)); + expectedLinks.sort((left, right) => { + const leftKey = `${left.fromTable}.${left.fromColumns.join(',')}->${left.toTable}.${left.toColumns.join(',')}`; + const rightKey = `${right.fromTable}.${right.fromColumns.join(',')}->${right.toTable}.${right.toColumns.join(',')}`; + return leftKey.localeCompare(rightKey); + }); + + return { expectedPks, expectedLinks }; +} + +export function buildBenchmarkSnapshot(input) { + const { db, fixtureId, extractedAt } = input; + const names = tableNames(db); + const tables = []; + + for (const name of names) { + const cols = columnsFor(db, name); + const grouped = groupedForeignKeys(rawForeignKeys(db, name), name); + const estimatedRows = rowCount(db, name); + + const columns = cols.map((c) => ({ + name: c.name, + nativeType: c.nativeType, + normalizedType: normalizeSqliteType(c.nativeType), + dimensionType: dimensionTypeFor(c.nativeType, c.name), + nullable: c.nullable, + primaryKey: c.primaryKey, + comment: null, + })); + + const foreignKeys = grouped.flatMap((g) => + g.from.map((fromColumn, index) => ({ + fromColumn, + toCatalog: null, + toDb: 'main', + toTable: g.toTable, + toColumn: g.to[index], + constraintName: g.constraintName, + })), + ); + + tables.push({ + catalog: null, + db: 'main', + name, + kind: 'table', + comment: null, + estimatedRows, + columns, + foreignKeys, + }); + } + + return { + snapshot: { + connectionId: fixtureId, + driver: 'sqlite', + extractedAt: extractedAt ?? '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables, + }, + expected: expectedLinksFromSnapshot({ + connectionId: fixtureId, + driver: 'sqlite', + extractedAt: extractedAt ?? '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables, + }), + }; +} + +export function writeFixtureFiles(input) { + const { fixtureDir, snapshot, expected } = input; + writeFileSync(path.join(fixtureDir, 'snapshot.json'), `${JSON.stringify(snapshot, null, 2)}\n`, 'utf8'); + writeFileSync(path.join(fixtureDir, 'expected-links.yaml'), yamlStringify(expected), 'utf8'); +} + +export function rebuildAllPublicSnapshots(options = {}) { + const repoRoot = options.repoRoot ?? path.resolve(scriptDir, '..'); + const fixturesRoot = + options.fixturesRoot ?? path.join(repoRoot, 'packages', 'context', 'test', 'fixtures', 'relationship-benchmarks'); + const manifestPath = options.manifestPath ?? path.join(scriptDir, 'public-benchmark-manifest.json'); + const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')); + + for (const fixture of manifest.fixtures) { + const fixtureDir = path.join(fixturesRoot, fixture.id); + const dataPath = path.join(fixtureDir, 'data.sqlite'); + if (!existsSync(dataPath)) { + console.log(`[skip] ${fixture.id}: data.sqlite missing (run relationships:acquire-public-fixtures first)`); + continue; + } + const db = new Database(dataPath, { readonly: true }); + try { + const result = buildBenchmarkSnapshot({ db, fixtureId: fixture.id }); + writeFixtureFiles({ fixtureDir, snapshot: result.snapshot, expected: result.expected }); + console.log( + `[built] ${fixture.id}: ${result.snapshot.tables.length} tables, ${result.expected.expectedLinks.length} expected links`, + ); + } finally { + db.close(); + } + } +} + +if (import.meta.url === `file://${process.argv[1]}`) { + const args = process.argv.slice(2); + if (args[0] === '--rebuild-all') { + rebuildAllPublicSnapshots(); + } else if (args.length === 2) { + const [dataPath, fixtureDir] = args; + const db = new Database(dataPath, { readonly: true }); + try { + const fixtureId = path.basename(fixtureDir); + const result = buildBenchmarkSnapshot({ db, fixtureId }); + writeFixtureFiles({ fixtureDir, snapshot: result.snapshot, expected: result.expected }); + console.log(`[built] ${fixtureId}`); + } finally { + db.close(); + } + } else { + console.error('Usage: build-benchmark-snapshot.mjs | --rebuild-all'); + process.exit(2); + } +} diff --git a/scripts/build-benchmark-snapshot.test.mjs b/scripts/build-benchmark-snapshot.test.mjs new file mode 100644 index 00000000..818d1489 --- /dev/null +++ b/scripts/build-benchmark-snapshot.test.mjs @@ -0,0 +1,253 @@ +import assert from 'node:assert/strict'; +import { createRequire } from 'node:module'; +import { describe, it } from 'node:test'; +import { buildBenchmarkSnapshot } from './build-benchmark-snapshot.mjs'; + +const require = createRequire(new URL('../packages/context/package.json', import.meta.url)); +const Database = require('better-sqlite3'); + +describe('buildBenchmarkSnapshot', () => { + it('emits a KloSchemaSnapshot-shaped object plus expected-links from declared FKs', () => { + const db = new Database(':memory:'); + db.exec(` + PRAGMA foreign_keys = ON; + CREATE TABLE accounts ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL + ); + CREATE TABLE orders ( + id INTEGER PRIMARY KEY, + account_id INTEGER NOT NULL REFERENCES accounts(id), + total REAL, + created_at TEXT + ); + INSERT INTO accounts (id, name) VALUES (1, 'a'), (2, 'b'); + INSERT INTO orders (id, account_id, total, created_at) VALUES + (1, 1, 10.0, '2024-01-01'), (2, 1, 20.0, '2024-01-02'), (3, 2, 30.0, '2024-01-03'); + `); + + const result = buildBenchmarkSnapshot({ db, fixtureId: 'fixture_x' }); + db.close(); + + assert.equal(result.snapshot.connectionId, 'fixture_x'); + assert.equal(result.snapshot.driver, 'sqlite'); + assert.equal(result.snapshot.tables.length, 2); + + const accounts = result.snapshot.tables.find((t) => t.name === 'accounts'); + assert.ok(accounts); + assert.equal(accounts.estimatedRows, 2); + assert.deepEqual(accounts.foreignKeys, []); + const idCol = accounts.columns.find((c) => c.name === 'id'); + assert.equal(idCol.primaryKey, true); + assert.equal(idCol.normalizedType, 'integer'); + assert.equal(idCol.dimensionType, 'number'); + + const orders = result.snapshot.tables.find((t) => t.name === 'orders'); + assert.equal(orders.foreignKeys.length, 1); + assert.equal(orders.foreignKeys[0].fromColumn, 'account_id'); + assert.equal(orders.foreignKeys[0].toTable, 'accounts'); + assert.equal(orders.foreignKeys[0].toColumn, 'id'); + + const createdAt = orders.columns.find((c) => c.name === 'created_at'); + assert.equal(createdAt.dimensionType, 'time'); + + const total = orders.columns.find((c) => c.name === 'total'); + assert.equal(total.dimensionType, 'number'); + assert.equal(total.nullable, true); + + assert.deepEqual( + result.expected.expectedPks.sort((a, b) => a.table.localeCompare(b.table)), + [ + { table: 'accounts', columns: ['id'] }, + { table: 'orders', columns: ['id'] }, + ], + ); + assert.deepEqual(result.expected.expectedLinks, [ + { + fromTable: 'orders', + fromColumns: ['account_id'], + toTable: 'accounts', + toColumns: ['id'], + relationship: 'many_to_one', + }, + ]); + }); + + it('skips internal SQLite tables (sqlite_*) and views', () => { + const db = new Database(':memory:'); + db.exec(` + CREATE TABLE keep_me (id INTEGER PRIMARY KEY); + CREATE VIEW keep_me_view AS SELECT id FROM keep_me; + INSERT INTO keep_me (id) VALUES (1); + `); + const result = buildBenchmarkSnapshot({ db, fixtureId: 'fx' }); + db.close(); + assert.equal(result.snapshot.tables.length, 1); + assert.equal(result.snapshot.tables[0].name, 'keep_me'); + }); + + it('groups composite foreign keys into a single ordered link', () => { + const db = new Database(':memory:'); + db.exec(` + PRAGMA foreign_keys = ON; + CREATE TABLE order_lines ( + order_id INTEGER NOT NULL, + line_number INTEGER NOT NULL, + sku TEXT NOT NULL, + PRIMARY KEY (order_id, line_number) + ); + CREATE TABLE allocations ( + id INTEGER PRIMARY KEY, + order_id INTEGER NOT NULL, + line_number INTEGER NOT NULL, + FOREIGN KEY (order_id, line_number) REFERENCES order_lines(order_id, line_number) + ); + `); + const result = buildBenchmarkSnapshot({ db, fixtureId: 'fx' }); + db.close(); + + const composite = result.expected.expectedLinks.find((l) => l.fromTable === 'allocations'); + assert.deepEqual(composite, { + fromTable: 'allocations', + fromColumns: ['order_id', 'line_number'], + toTable: 'order_lines', + toColumns: ['order_id', 'line_number'], + relationship: 'many_to_one', + }); + + const compositePk = result.expected.expectedPks.find((p) => p.table === 'order_lines'); + assert.deepEqual(compositePk.columns, ['order_id', 'line_number']); + }); + + it('derives expected PKs and grouped FKs from an existing snapshot', async () => { + const { expectedLinksFromSnapshot } = await import('./build-benchmark-snapshot.mjs'); + + const expected = expectedLinksFromSnapshot({ + connectionId: 'fixture', + driver: 'sqlite', + extractedAt: '2026-05-07T00:00:00.000Z', + scope: {}, + metadata: {}, + tables: [ + { + catalog: null, + db: 'main', + name: 'Sales.SalesOrderHeader', + kind: 'table', + comment: null, + estimatedRows: 3, + columns: [ + { + name: 'SalesOrderID', + nativeType: 'int', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'CustomerID', + nativeType: 'int', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: false, + comment: null, + }, + ], + foreignKeys: [ + { + fromColumn: 'CustomerID', + toCatalog: null, + toDb: 'main', + toTable: 'Sales.Customer', + toColumn: 'CustomerID', + constraintName: 'FK_SalesOrderHeader_Customer_CustomerID', + }, + ], + }, + { + catalog: null, + db: 'main', + name: 'Sales.Customer', + kind: 'table', + comment: null, + estimatedRows: 2, + columns: [ + { + name: 'CustomerID', + nativeType: 'int', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [], + }, + { + catalog: null, + db: 'main', + name: 'Sales.SalesOrderDetail', + kind: 'table', + comment: null, + estimatedRows: 6, + columns: [ + { + name: 'SalesOrderID', + nativeType: 'int', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + { + name: 'SalesOrderDetailID', + nativeType: 'int', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], + foreignKeys: [ + { + fromColumn: 'SalesOrderID', + toCatalog: null, + toDb: 'main', + toTable: 'Sales.SalesOrderHeader', + toColumn: 'SalesOrderID', + constraintName: 'FK_SalesOrderDetail_SalesOrderHeader_SalesOrderID', + }, + ], + }, + ], + }); + + assert.deepEqual(expected.expectedPks, [ + { table: 'Sales.Customer', columns: ['CustomerID'] }, + { table: 'Sales.SalesOrderDetail', columns: ['SalesOrderID', 'SalesOrderDetailID'] }, + { table: 'Sales.SalesOrderHeader', columns: ['SalesOrderID'] }, + ]); + assert.deepEqual(expected.expectedLinks, [ + { + fromTable: 'Sales.SalesOrderDetail', + fromColumns: ['SalesOrderID'], + toTable: 'Sales.SalesOrderHeader', + toColumns: ['SalesOrderID'], + relationship: 'many_to_one', + }, + { + fromTable: 'Sales.SalesOrderHeader', + fromColumns: ['CustomerID'], + toTable: 'Sales.Customer', + toColumns: ['CustomerID'], + relationship: 'many_to_one', + }, + ]); + }); +}); diff --git a/scripts/build-evidence-fusion-adversarial-fixtures.mjs b/scripts/build-evidence-fusion-adversarial-fixtures.mjs new file mode 100644 index 00000000..cdd60545 --- /dev/null +++ b/scripts/build-evidence-fusion-adversarial-fixtures.mjs @@ -0,0 +1,492 @@ +#!/usr/bin/env node +import { mkdirSync, readFileSync, rmSync, unlinkSync, writeFileSync } from 'node:fs'; +import { createRequire } from 'node:module'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { gzipSync } from 'node:zlib'; +import { buildBenchmarkSnapshot, writeFixtureFiles } from './build-benchmark-snapshot.mjs'; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const kloRoot = path.resolve(scriptDir, '..'); +const fixtureRoot = path.join(kloRoot, 'packages', 'context', 'test', 'fixtures', 'relationship-benchmarks'); +const require = createRequire(new URL('../packages/context/package.json', import.meta.url)); +const Database = require('better-sqlite3'); +const { stringify: yamlStringify } = require('yaml'); + +function q(value) { + return `"${String(value).replaceAll('"', '""')}"`; +} + +function sqlValue(value) { + if (value === null) { + return 'NULL'; + } + if (typeof value === 'number') { + return String(value); + } + return `'${String(value).replaceAll("'", "''")}'`; +} + +function insertSql(table, columns, rows) { + return `INSERT INTO ${q(table)} (${columns.map(q).join(', ')}) VALUES\n${rows + .map((row) => ` (${row.map(sqlValue).join(', ')})`) + .join(',\n')};`; +} + +function fixtureYaml(config) { + return yamlStringify({ + id: config.id, + name: config.name, + tier: config.tier, + origin: 'synthetic', + thresholdEligible: false, + ...(config.validationBudget === undefined ? {} : { validationBudget: config.validationBudget }), + defaultModes: ['declared_pks_and_declared_fks_removed'], + }); +} + +function writeFixture(config) { + const fixtureDir = path.join(fixtureRoot, config.id); + rmSync(fixtureDir, { recursive: true, force: true }); + mkdirSync(fixtureDir, { recursive: true }); + writeFileSync(path.join(fixtureDir, 'fixture.yaml'), fixtureYaml(config), 'utf8'); + + const dataPath = path.join(fixtureDir, 'data.sqlite'); + const db = new Database(dataPath); + try { + db.pragma('foreign_keys = OFF'); + db.exec(config.sql); + const { snapshot } = buildBenchmarkSnapshot({ db, fixtureId: config.id }); + writeFixtureFiles({ fixtureDir, snapshot, expected: config.expected }); + } finally { + db.close(); + } + + if (config.compressArtifacts) { + for (const fileName of ['snapshot.json', 'data.sqlite']) { + const rawPath = path.join(fixtureDir, fileName); + writeFileSync(`${rawPath}.gz`, gzipSync(readFileSync(rawPath)), 'utf8'); + unlinkSync(rawPath); + } + } + + console.log(`[built] ${config.id}: ${config.expected.expectedPks.length} PKs, ${config.expected.expectedLinks.length} links`); +} + +function nonEnglishFixture() { + return { + id: 'non_english_naming_no_declared_constraints', + name: 'Non-English naming fixture with no declared constraints', + tier: 'row_bearing', + sql: [ + 'CREATE TABLE kundenstamm (kundennummer TEXT NOT NULL, firmenname TEXT NOT NULL, stadt TEXT NOT NULL);', + insertSql('kundenstamm', ['kundennummer', 'firmenname', 'stadt'], [ + ['K-001', 'Baeckerei Mueller', 'Muenchen'], + ['K-002', 'Cafe Sakura', 'Berlin'], + ['K-003', 'Nord Handel', 'Hamburg'], + ]), + 'CREATE TABLE bestellungen (bestellnummer TEXT NOT NULL, "kaeufer_nummer" TEXT NOT NULL, betrag INTEGER NOT NULL);', + insertSql('bestellungen', ['bestellnummer', 'kaeufer_nummer', 'betrag'], [ + ['B-100', 'K-001', 420], + ['B-101', 'K-002', 300], + ['B-102', 'K-001', 125], + ]), + 'CREATE TABLE seihin (seihin_bango TEXT NOT NULL, bezeichnung TEXT NOT NULL, kategorie TEXT NOT NULL);', + insertSql('seihin', ['seihin_bango', 'bezeichnung', 'kategorie'], [ + ['S-01', 'ocha', 'drink'], + ['S-02', 'pan', 'food'], + ['S-03', 'miso', 'food'], + ]), + 'CREATE TABLE uriage (verkauf_nr TEXT NOT NULL, hinban TEXT NOT NULL, menge INTEGER NOT NULL);', + insertSql('uriage', ['verkauf_nr', 'hinban', 'menge'], [ + ['U-1', 'S-01', 7], + ['U-2', 'S-02', 3], + ['U-3', 'S-01', 5], + ]), + ].join('\n'), + expected: { + expectedPks: [ + { table: 'kundenstamm', columns: ['kundennummer'] }, + { table: 'seihin', columns: ['seihin_bango'] }, + ], + expectedLinks: [ + { + fromTable: 'bestellungen', + fromColumns: ['kaeufer_nummer'], + toTable: 'kundenstamm', + toColumns: ['kundennummer'], + relationship: 'many_to_one', + }, + { + fromTable: 'uriage', + fromColumns: ['hinban'], + toTable: 'seihin', + toColumns: ['seihin_bango'], + relationship: 'many_to_one', + }, + ], + }, + }; +} + +function abbreviatedLegacyFixture() { + return { + id: 'abbreviated_legacy_no_declared_constraints', + name: 'Abbreviated legacy naming fixture with no declared constraints', + tier: 'row_bearing', + sql: [ + 'CREATE TABLE cust (cust_id TEXT NOT NULL, nm TEXT NOT NULL, stat_cd TEXT NOT NULL);', + insertSql('cust', ['cust_id', 'nm', 'stat_cd'], [ + ['C001', 'Acme', 'A'], + ['C002', 'Globex', 'A'], + ['C003', 'Initech', 'I'], + ]), + 'CREATE TABLE prod (prod_cd TEXT NOT NULL, prod_nm TEXT NOT NULL, cat_cd TEXT NOT NULL);', + insertSql('prod', ['prod_cd', 'prod_nm', 'cat_cd'], [ + ['P10', 'Seat', 'FURN'], + ['P11', 'Desk', 'FURN'], + ['P12', 'Lamp', 'HOME'], + ]), + 'CREATE TABLE ord_hdr (ord_id TEXT NOT NULL, cust_id TEXT NOT NULL, ord_dt TEXT NOT NULL);', + insertSql('ord_hdr', ['ord_id', 'cust_id', 'ord_dt'], [ + ['O900', 'C001', '2026-01-01'], + ['O901', 'C001', '2026-01-02'], + ['O902', 'C002', '2026-01-03'], + ]), + 'CREATE TABLE ord_ln (ln_id TEXT NOT NULL, ord_id TEXT NOT NULL, prod_cd TEXT NOT NULL, qty INTEGER NOT NULL);', + insertSql('ord_ln', ['ln_id', 'ord_id', 'prod_cd', 'qty'], [ + ['L1', 'O900', 'P10', 2], + ['L2', 'O900', 'P12', 1], + ['L3', 'O901', 'P11', 4], + ]), + ].join('\n'), + expected: { + expectedPks: [ + { table: 'cust', columns: ['cust_id'] }, + { table: 'ord_hdr', columns: ['ord_id'] }, + { table: 'prod', columns: ['prod_cd'] }, + ], + expectedLinks: [ + { + fromTable: 'ord_hdr', + fromColumns: ['cust_id'], + toTable: 'cust', + toColumns: ['cust_id'], + relationship: 'many_to_one', + }, + { + fromTable: 'ord_ln', + fromColumns: ['ord_id'], + toTable: 'ord_hdr', + toColumns: ['ord_id'], + relationship: 'many_to_one', + }, + { + fromTable: 'ord_ln', + fromColumns: ['prod_cd'], + toTable: 'prod', + toColumns: ['prod_cd'], + relationship: 'many_to_one', + }, + ], + }, + }; +} + +function analyticalWarehouseFixture() { + return { + id: 'analytical_warehouse_no_naming_convention', + name: 'Analytical warehouse fixture with no naming convention', + tier: 'row_bearing', + sql: [ + 'CREATE TABLE dim_signup_country (country_code TEXT NOT NULL, country_name TEXT NOT NULL, region_name TEXT NOT NULL);', + insertSql('dim_signup_country', ['country_code', 'country_name', 'region_name'], [ + ['US', 'United States', 'americas'], + ['DE', 'Germany', 'emea'], + ['JP', 'Japan', 'apac'], + ]), + 'CREATE TABLE dim_commercial_plan (plan_code TEXT NOT NULL, plan_family TEXT NOT NULL, sales_motion TEXT NOT NULL);', + insertSql('dim_commercial_plan', ['plan_code', 'plan_family', 'sales_motion'], [ + ['FREE', 'free', 'self_serve'], + ['TEAM', 'team', 'sales_assisted'], + ['ENT', 'enterprise', 'sales_led'], + ]), + 'CREATE TABLE mart_revenue_daily (revenue_event_key TEXT NOT NULL, signup_country_code TEXT NOT NULL, commercial_plan_code TEXT NOT NULL, booked_revenue INTEGER NOT NULL);', + insertSql( + 'mart_revenue_daily', + ['revenue_event_key', 'signup_country_code', 'commercial_plan_code', 'booked_revenue'], + [ + ['R1', 'US', 'TEAM', 200], + ['R2', 'DE', 'ENT', 900], + ['R3', 'US', 'FREE', 0], + ], + ), + 'CREATE TABLE mart_activation_cohort (cohort_key TEXT NOT NULL, first_touch_country TEXT NOT NULL, purchased_plan TEXT NOT NULL, activated_accounts INTEGER NOT NULL);', + insertSql( + 'mart_activation_cohort', + ['cohort_key', 'first_touch_country', 'purchased_plan', 'activated_accounts'], + [ + ['C1', 'JP', 'TEAM', 7], + ['C2', 'DE', 'ENT', 2], + ['C3', 'US', 'FREE', 30], + ], + ), + ].join('\n'), + expected: { + expectedPks: [ + { table: 'dim_commercial_plan', columns: ['plan_code'] }, + { table: 'dim_signup_country', columns: ['country_code'] }, + ], + expectedLinks: [ + { + fromTable: 'mart_activation_cohort', + fromColumns: ['first_touch_country'], + toTable: 'dim_signup_country', + toColumns: ['country_code'], + relationship: 'many_to_one', + }, + { + fromTable: 'mart_activation_cohort', + fromColumns: ['purchased_plan'], + toTable: 'dim_commercial_plan', + toColumns: ['plan_code'], + relationship: 'many_to_one', + }, + { + fromTable: 'mart_revenue_daily', + fromColumns: ['commercial_plan_code'], + toTable: 'dim_commercial_plan', + toColumns: ['plan_code'], + relationship: 'many_to_one', + }, + { + fromTable: 'mart_revenue_daily', + fromColumns: ['signup_country_code'], + toTable: 'dim_signup_country', + toColumns: ['country_code'], + relationship: 'many_to_one', + }, + ], + }, + }; +} + +function mixedCaseFixture() { + return { + id: 'mixed_case_within_schema_no_declared_constraints', + name: 'Mixed case within schema fixture with no declared constraints', + tier: 'row_bearing', + sql: [ + 'CREATE TABLE CustomerAccount (AccountID TEXT NOT NULL, AccountName TEXT NOT NULL, accountTier TEXT NOT NULL);', + insertSql('CustomerAccount', ['AccountID', 'AccountName', 'accountTier'], [ + ['A-1', 'Acme', 'team'], + ['A-2', 'Globex', 'enterprise'], + ['A-3', 'Initech', 'free'], + ]), + 'CREATE TABLE subscriptionPlans (planId TEXT NOT NULL, display_name TEXT NOT NULL, BillingCadence TEXT NOT NULL);', + insertSql('subscriptionPlans', ['planId', 'display_name', 'BillingCadence'], [ + ['P-free', 'Free', 'none'], + ['P-team', 'Team', 'monthly'], + ['P-ent', 'Enterprise', 'annual'], + ]), + 'CREATE TABLE order_events (event_id TEXT NOT NULL, accountId TEXT NOT NULL, plan_id TEXT NOT NULL, amount INTEGER NOT NULL);', + insertSql('order_events', ['event_id', 'accountId', 'plan_id', 'amount'], [ + ['E1', 'A-1', 'P-team', 120], + ['E2', 'A-2', 'P-ent', 1000], + ['E3', 'A-1', 'P-free', 0], + ]), + 'CREATE TABLE InvoiceHeader (InvoiceID TEXT NOT NULL, CustomerAccountID TEXT NOT NULL, invoice_total INTEGER NOT NULL);', + insertSql('InvoiceHeader', ['InvoiceID', 'CustomerAccountID', 'invoice_total'], [ + ['I1', 'A-1', 120], + ['I2', 'A-2', 1000], + ['I3', 'A-1', 20], + ]), + 'CREATE TABLE line_items (line_item_id TEXT NOT NULL, invoice_id TEXT NOT NULL, skuCode TEXT NOT NULL);', + insertSql('line_items', ['line_item_id', 'invoice_id', 'skuCode'], [ + ['L1', 'I1', 'SKU1'], + ['L2', 'I1', 'SKU2'], + ['L3', 'I2', 'SKU3'], + ]), + ].join('\n'), + expected: { + expectedPks: [ + { table: 'CustomerAccount', columns: ['AccountID'] }, + { table: 'InvoiceHeader', columns: ['InvoiceID'] }, + { table: 'subscriptionPlans', columns: ['planId'] }, + ], + expectedLinks: [ + { + fromTable: 'InvoiceHeader', + fromColumns: ['CustomerAccountID'], + toTable: 'CustomerAccount', + toColumns: ['AccountID'], + relationship: 'many_to_one', + }, + { + fromTable: 'line_items', + fromColumns: ['invoice_id'], + toTable: 'InvoiceHeader', + toColumns: ['InvoiceID'], + relationship: 'many_to_one', + }, + { + fromTable: 'order_events', + fromColumns: ['accountId'], + toTable: 'CustomerAccount', + toColumns: ['AccountID'], + relationship: 'many_to_one', + }, + { + fromTable: 'order_events', + fromColumns: ['plan_id'], + toTable: 'subscriptionPlans', + toColumns: ['planId'], + relationship: 'many_to_one', + }, + ], + }, + }; +} + +function polymorphicFixture() { + return { + id: 'polymorphic_partial_overlap_no_declared_constraints', + name: 'Polymorphic partial-overlap fixture with no declared constraints', + tier: 'row_bearing', + sql: [ + 'CREATE TABLE users (user_id TEXT NOT NULL, email TEXT NOT NULL, lifecycle TEXT NOT NULL);', + insertSql('users', ['user_id', 'email', 'lifecycle'], [ + ['U1', 'ada@example.com', 'active'], + ['U2', 'grace@example.com', 'active'], + ['U3', 'alan@example.com', 'inactive'], + ]), + 'CREATE TABLE organizations (organization_id TEXT NOT NULL, organization_name TEXT NOT NULL, market TEXT NOT NULL);', + insertSql('organizations', ['organization_id', 'organization_name', 'market'], [ + ['O1', 'Acme', 'midmarket'], + ['O2', 'Globex', 'enterprise'], + ['O3', 'Initech', 'smb'], + ]), + 'CREATE TABLE activity_events (event_id TEXT NOT NULL, entity_id TEXT NOT NULL, entity_type TEXT NOT NULL, action_name TEXT NOT NULL);', + insertSql('activity_events', ['event_id', 'entity_id', 'entity_type', 'action_name'], [ + ['E1', 'U1', 'user', 'login'], + ['E2', 'O1', 'organization', 'workspace_created'], + ['E3', 'U2', 'user', 'invite_sent'], + ['E4', 'O2', 'organization', 'billing_updated'], + ]), + ].join('\n'), + expected: { + expectedPks: [ + { table: 'organizations', columns: ['organization_id'] }, + { table: 'users', columns: ['user_id'] }, + ], + expectedLinks: [ + { + fromTable: 'activity_events', + fromColumns: ['entity_id'], + toTable: 'organizations', + toColumns: ['organization_id'], + relationship: 'many_to_one', + }, + { + fromTable: 'activity_events', + fromColumns: ['entity_id'], + toTable: 'users', + toColumns: ['user_id'], + relationship: 'many_to_one', + }, + ], + }, + }; +} + +function padded(value, width) { + return String(value).padStart(width, '0'); +} + +function scaleFixture() { + const statements = []; + const expectedPks = []; + const expectedLinks = []; + const dimensionCount = 20; + const factCount = 380; + + for (let dim = 0; dim < dimensionCount; dim += 1) { + const dimId = padded(dim, 2); + const table = `dim_entity_${dimId}`; + const key = `entity_${dimId}_key`; + const columns = [key, ...Array.from({ length: 49 }, (_, index) => `attribute_${padded(index, 2)}`)]; + statements.push(`CREATE TABLE ${q(table)} (${columns.map((column) => `${q(column)} TEXT NOT NULL`).join(', ')});`); + statements.push( + insertSql( + table, + columns, + Array.from({ length: 3 }, (_, rowIndex) => [ + `D${dimId}-${rowIndex}`, + ...Array.from({ length: 49 }, (_, attrIndex) => `dim${dimId}_attr${attrIndex}_${rowIndex}`), + ]), + ), + ); + expectedPks.push({ table, columns: [key] }); + } + + for (let fact = 0; fact < factCount; fact += 1) { + const factId = padded(fact, 3); + const table = `fact_activity_${factId}`; + const referencedDims = Array.from({ length: 5 }, (_, offset) => (fact + offset) % dimensionCount); + const referenceColumns = referencedDims.map((dim) => `entity_${padded(dim, 2)}_key`); + const metricColumns = Array.from({ length: 44 }, (_, index) => `metric_${padded(index, 2)}`); + const columns = ['event_id', ...referenceColumns, ...metricColumns]; + statements.push( + `CREATE TABLE ${q(table)} (${[ + `${q('event_id')} TEXT NOT NULL`, + ...referenceColumns.map((column) => `${q(column)} TEXT NOT NULL`), + ...metricColumns.map((column) => `${q(column)} INTEGER NOT NULL`), + ].join(', ')});`, + ); + statements.push( + insertSql( + table, + columns, + Array.from({ length: 3 }, (_, rowIndex) => [ + `F${factId}-${rowIndex}`, + ...referencedDims.map((dim) => `D${padded(dim, 2)}-${rowIndex}`), + ...metricColumns.map((_, metricIndex) => fact * 1000 + metricIndex * 10 + rowIndex), + ]), + ), + ); + + for (const dim of referencedDims) { + const dimId = padded(dim, 2); + expectedLinks.push({ + fromTable: table, + fromColumns: [`entity_${dimId}_key`], + toTable: `dim_entity_${dimId}`, + toColumns: [`entity_${dimId}_key`], + relationship: 'many_to_one', + }); + } + } + + return { + id: 'scale_stress_no_declared_constraints', + name: 'Scale stress fixture with no declared constraints', + tier: 'row_bearing', + validationBudget: 800, + compressArtifacts: true, + sql: statements.join('\n'), + expected: { expectedPks, expectedLinks }, + }; +} + +const fixtures = [ + nonEnglishFixture(), + abbreviatedLegacyFixture(), + analyticalWarehouseFixture(), + mixedCaseFixture(), + polymorphicFixture(), + scaleFixture(), +]; + +for (const fixture of fixtures) { + writeFixture(fixture); +} diff --git a/scripts/check-boundaries.mjs b/scripts/check-boundaries.mjs new file mode 100644 index 00000000..3099840c --- /dev/null +++ b/scripts/check-boundaries.mjs @@ -0,0 +1,213 @@ +#!/usr/bin/env node + +import { readdir, readFile } from 'node:fs/promises'; +import path from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const codeExtensions = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py']); +const runtimeAssetPatterns = [/^packages\/[^/]+\/prompts\/.+\.md$/, /^packages\/[^/]+\/skills\/.+\.md$/]; +const identifierSkipPrefixes = ['docs/', 'examples/', 'python/klo-sl/plans/', 'python/klo-sl/openspec/']; +const forbiddenIdentifierTerms = ['kae' + 'lio', 'Kae' + 'lio', 'KAE' + 'LIO_']; + +const appImportPatterns = [ + { + label: 'server source import', + pattern: /(?:from\s+['"][^'"]*|import\s*\(\s*['"][^'"]*|import\s+['"][^'"]*)(?:@server\/|server\/src|(?:\.\.\/)+server\/src)/, + }, + { + label: 'frontend source import', + pattern: /(?:from\s+['"][^'"]*|import\s*\(\s*['"][^'"]*|import\s+['"][^'"]*)(?:@frontend\/|frontend\/src|(?:\.\.\/)+frontend\/src)/, + }, + { + label: 'python service app import', + pattern: /(?:from\s+['"][^'"]*|import\s*\(\s*['"][^'"]*|import\s+['"][^'"]*|from\s+)(?:python-service\/app|python_service\.app|app\.)/, + }, +]; + +const llmBoundaryPatterns = [ + { + label: 'direct Anthropic provider construction', + pattern: /\bcreateAnthropic\b/, + }, + { + label: 'direct Vertex Anthropic provider construction', + pattern: /\bcreateVertexAnthropic\b/, + }, + { + label: 'direct AI SDK gateway construction', + pattern: /\bcreateGateway\b/, + }, + { + label: 'direct AI SDK embedding execution', + pattern: /\bembedMany\b/, + }, + { + label: 'legacy context LLM provider port', + pattern: /\bLlmProviderPort\b/, + }, + { + label: 'legacy scan LLM provider port', + pattern: /\bKloScanLlmPort\b/, + }, + { + label: 'legacy gateway LLM provider helper', + pattern: /\bcreateGatewayLlmProvider\b/, + }, +]; + +const contextProductionLlmBoundaryPatterns = [ + { + label: 'context getModelByName call', + pattern: /\.\s*getModelByName\s*\(/, + }, +]; + +function normalizePath(filePath) { + return filePath.split(path.sep).join('/'); +} + +function isCodeSource(relativePath) { + return codeExtensions.has(path.extname(relativePath)); +} + +function isRuntimeAsset(relativePath) { + return runtimeAssetPatterns.some((pattern) => pattern.test(relativePath)); +} + +function scansForAppImports(relativePath) { + return isCodeSource(relativePath); +} + +function scansForLlmBoundaries(relativePath) { + return isCodeSource(relativePath) && relativePath.startsWith('packages/context/src/'); +} + +function isTestSource(relativePath) { + return /(?:^|\/)[^/]+\.(?:test|spec)\.[cm]?[jt]sx?$/.test(relativePath); +} + +function scansForContextProductionLlmBoundaries(relativePath) { + return scansForLlmBoundaries(relativePath) && !isTestSource(relativePath); +} + +function scansForForbiddenIdentifiers(relativePath) { + return isCodeSource(relativePath) || isRuntimeAsset(relativePath); +} + +function skipsIdentifierScan(relativePath) { + return identifierSkipPrefixes.some((prefix) => relativePath.startsWith(prefix)); +} + +export function scanFileContent(relativePath, content) { + const normalizedPath = normalizePath(relativePath); + const violations = []; + + if (scansForAppImports(normalizedPath)) { + for (const appImportPattern of appImportPatterns) { + if (appImportPattern.pattern.test(content)) { + violations.push({ + file: normalizedPath, + kind: 'app-import', + message: `Forbidden ${appImportPattern.label}`, + }); + } + } + } + + if (scansForLlmBoundaries(normalizedPath)) { + for (const llmBoundaryPattern of llmBoundaryPatterns) { + if (llmBoundaryPattern.pattern.test(content)) { + violations.push({ + file: normalizedPath, + kind: 'llm-boundary', + message: `Forbidden ${llmBoundaryPattern.label}; use @klo/llm`, + }); + } + } + } + + if (scansForContextProductionLlmBoundaries(normalizedPath)) { + for (const llmBoundaryPattern of contextProductionLlmBoundaryPatterns) { + if (llmBoundaryPattern.pattern.test(content)) { + violations.push({ + file: normalizedPath, + kind: 'llm-boundary', + message: `Forbidden ${llmBoundaryPattern.label}; use getModel(role) inside @klo/context`, + }); + } + } + } + + if (scansForForbiddenIdentifiers(normalizedPath) && !skipsIdentifierScan(normalizedPath)) { + for (const term of forbiddenIdentifierTerms) { + if (content.includes(term)) { + violations.push({ + file: normalizedPath, + kind: 'identifier', + message: `Forbidden product identifier "${term}"`, + }); + } + } + } + + return violations; +} + +async function collectFiles(rootDir, currentDir = rootDir) { + const entries = await readdir(currentDir, { withFileTypes: true }); + const files = []; + + for (const entry of entries) { + const fullPath = path.join(currentDir, entry.name); + + if (entry.isDirectory()) { + if (entry.name === 'node_modules' || entry.name === 'dist' || entry.name === '.venv') { + continue; + } + + files.push(...(await collectFiles(rootDir, fullPath))); + continue; + } + + if (entry.isFile()) { + files.push(fullPath); + } + } + + return files; +} + +export async function collectViolations(rootDir) { + const files = await collectFiles(rootDir); + const violations = []; + + for (const file of files) { + const relativePath = normalizePath(path.relative(rootDir, file)); + const content = await readFile(file, 'utf8'); + + violations.push(...scanFileContent(relativePath, content)); + } + + return violations; +} + +async function main() { + const scriptDir = path.dirname(fileURLToPath(import.meta.url)); + const rootDir = path.resolve(scriptDir, '..'); + const violations = await collectViolations(rootDir); + + if (violations.length === 0) { + process.stdout.write('klo boundary check passed\n'); + return; + } + + for (const violation of violations) { + process.stderr.write(`${violation.file}: ${violation.message}\n`); + } + + process.exitCode = 1; +} + +if (import.meta.url === pathToFileURL(process.argv[1] ?? '').href) { + await main(); +} diff --git a/scripts/check-boundaries.test.mjs b/scripts/check-boundaries.test.mjs new file mode 100644 index 00000000..0c1daae2 --- /dev/null +++ b/scripts/check-boundaries.test.mjs @@ -0,0 +1,147 @@ +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; + +import { scanFileContent } from './check-boundaries.mjs'; + +function productName() { + return ['Kae', 'lio'].join(''); +} + +function lowerProductName() { + return ['kae', 'lio'].join(''); +} + +describe('scanFileContent', () => { + it('rejects source imports from application directories', () => { + const serverAlias = '@' + 'server/contracts'; + const pythonAppPath = 'python-service/' + 'app/api/endpoints/semantic_layer.py'; + + const violations = [ + ...scanFileContent('packages/context/src/index.ts', `import { orpc } from '${serverAlias}';`), + ...scanFileContent('packages/context/src/index.ts', `import "${pythonAppPath}";`), + ]; + + assert.deepEqual( + violations.map((violation) => violation.kind), + ['app-import', 'app-import'], + ); + }); + + it('rejects forbidden product identifiers in code source files', () => { + const violations = scanFileContent('packages/context/src/index.ts', `export const owner = '${lowerProductName()}';`); + + assert.equal(violations.length, 1); + assert.equal(violations[0]?.kind, 'identifier'); + }); + + it('rejects forbidden product identifiers in shipped runtime prompt assets', () => { + const violations = scanFileContent( + 'packages/context/prompts/memory_agent_bundle_ingest_work_unit.md', + `Write output for ${productName()}.`, + ); + + assert.equal(violations.length, 1); + assert.equal(violations[0]?.kind, 'identifier'); + assert.equal(violations[0]?.file, 'packages/context/prompts/memory_agent_bundle_ingest_work_unit.md'); + }); + + it('rejects forbidden product identifiers in shipped runtime skill assets', () => { + const violations = scanFileContent( + 'packages/context/skills/metabase_ingest/SKILL.md', + `Use ${productName()} project conventions.`, + ); + + assert.equal(violations.length, 1); + assert.equal(violations[0]?.kind, 'identifier'); + assert.equal(violations[0]?.file, 'packages/context/skills/metabase_ingest/SKILL.md'); + }); + + it('allows product identifiers in docs, examples, and transition metadata', () => { + const name = productName(); + + assert.equal(scanFileContent('docs/transition.md', name).length, 0); + assert.equal(scanFileContent('examples/transition.md', name).length, 0); + assert.equal(scanFileContent('python/klo-sl/plans/brainstorm.md', name).length, 0); + assert.equal(scanFileContent('python/klo-sl/openspec/specs/semantic-layer/spec.md', name).length, 0); + }); + + it('allows clean source files and clean runtime prompt assets', () => { + assert.deepEqual( + scanFileContent('packages/context/src/index.ts', "export const packageName = '@klo/context';"), + [], + ); + assert.deepEqual( + scanFileContent('packages/context/prompts/memory_agent_bundle_ingest_work_unit.md', 'Write output for KLO.'), + [], + ); + }); + + it('rejects context-owned LLM provider construction after @klo/llm migration', () => { + const violations = [ + ...scanFileContent( + 'packages/context/src/agent/local-llm-provider.ts', + "import { createAnthropic } from '@ai-sdk/anthropic';", + ), + ...scanFileContent('packages/context/src/scan/local-ai-gateway-enrichment.ts', "import { createGateway } from 'ai';"), + ...scanFileContent('packages/context/src/core/local-embedding-provider.ts', "import { embedMany } from 'ai';"), + ]; + + assert.deepEqual( + violations.map((violation) => violation.kind), + ['llm-boundary', 'llm-boundary', 'llm-boundary'], + ); + }); + + it('rejects old KLO LLM port declarations in context', () => { + const violations = [ + ...scanFileContent('packages/context/src/agent/agent-runner.service.ts', 'export interface LlmProviderPort {}'), + ...scanFileContent('packages/context/src/scan/types.ts', 'export interface KloScanLlmPort {}'), + ...scanFileContent('packages/context/src/agent/gateway-llm-provider.ts', 'export function createGatewayLlmProvider() {}'), + ]; + + assert.deepEqual( + violations.map((violation) => violation.kind), + ['llm-boundary', 'llm-boundary', 'llm-boundary'], + ); + }); + + it('rejects getModelByName calls in context production source', () => { + const violations = scanFileContent( + 'packages/context/src/ingest/page-triage/page-triage.service.ts', + "const model = this.deps.llmProvider.getModelByName('claude-sonnet-4-6');", + ); + + assert.equal(violations.length, 1); + assert.equal(violations[0]?.kind, 'llm-boundary'); + assert.equal( + violations[0]?.message, + 'Forbidden context getModelByName call; use getModel(role) inside @klo/context', + ); + }); + + it('allows role-driven getModel calls, test calls, and provider shape declarations', () => { + assert.deepEqual( + scanFileContent( + 'packages/context/src/ingest/page-triage/page-triage.service.ts', + "const model = this.deps.llmProvider.getModel('triage');", + ), + [], + ); + + assert.deepEqual( + scanFileContent( + 'packages/context/src/ingest/page-triage/page-triage.service.test.ts', + "const model = this.deps.llmProvider.getModelByName('test-model');", + ), + [], + ); + + assert.deepEqual( + scanFileContent( + 'packages/context/src/scan/local-enrichment.ts', + 'return { getModel() { return model; }, getModelByName() { return model; } };', + ), + [], + ); + }); +}); diff --git a/scripts/ci-artifact-upload.test.mjs b/scripts/ci-artifact-upload.test.mjs new file mode 100644 index 00000000..d8ba166c --- /dev/null +++ b/scripts/ci-artifact-upload.test.mjs @@ -0,0 +1,70 @@ +import assert from 'node:assert/strict'; +import { access, readFile } from 'node:fs/promises'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { describe, it } from 'node:test'; + +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), '..', '..'); +const ciWorkflowPath = resolve(repoRoot, '.github', 'workflows', 'ci.yml'); + +async function readCiWorkflowOrSkip(testContext) { + try { + await access(ciWorkflowPath); + } catch (error) { + if (error && error.code === 'ENOENT') { + testContext.skip('root CI workflow is absent from sparse klo checkout'); + return null; + } + throw error; + } + return readFile(ciWorkflowPath, 'utf-8'); +} + +describe('KLO CI artifact upload contract', () => { + it('uploads verified KLO package artifacts from check-klo-subtree', async (testContext) => { + const workflow = await readCiWorkflowOrSkip(testContext); + if (workflow === null) { + return; + } + + assert.match( + workflow, + /name: Build klo package artifacts and verify public smoke\s+run: cd klo && pnpm run artifacts:build && pnpm run artifacts:verify-manifest && pnpm run artifacts:verify-demo\s+- name: Upload klo package artifacts/s, + ); + assert.match(workflow, /uses: actions\/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f/); + assert.match(workflow, /name: klo-package-artifacts-\$\{\{ github\.sha \}\}/); + assert.match(workflow, /klo\/dist\/artifacts\/manifest\.json/); + assert.match(workflow, /klo\/dist\/artifacts\/npm\/\*\.tgz/); + assert.match(workflow, /klo\/dist\/artifacts\/python\/\*\.whl/); + assert.match(workflow, /klo\/dist\/artifacts\/python\/\*\.tar\.gz/); + assert.match(workflow, /if-no-files-found: error/); + assert.match(workflow, /retention-days: 7/); + }); + + it('runs packed demo artifact smoke on Linux and macOS', async (testContext) => { + const workflow = await readCiWorkflowOrSkip(testContext); + if (workflow === null) { + return; + } + + assert.match(workflow, /check-klo-packed-demo:/); + assert.match(workflow, /matrix:\s+os: \[ubuntu-latest, macos-latest\]/s); + assert.match(workflow, /name: Download klo package artifacts/); + assert.match(workflow, /path: klo\/dist\/artifacts/); + assert.match(workflow, /run: cd klo && pnpm run artifacts:verify-demo/); + }); + + it('includes packed demo artifact smoke in ci-success', async (testContext) => { + const workflow = await readCiWorkflowOrSkip(testContext); + if (workflow === null) { + return; + } + + assert.match( + workflow, + /needs: \[check-klo-subtree, check-klo-packed-demo, build-python-service, test-server, build-frontend, run-pre-commit, build-docker-images\]/, + ); + assert.match(workflow, /needs\.check-klo-packed-demo\.result.*== "failure"/); + assert.match(workflow, /needs\.check-klo-packed-demo\.result.*== "cancelled"/); + }); +}); diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs new file mode 100644 index 00000000..1fc338f3 --- /dev/null +++ b/scripts/examples-docs.test.mjs @@ -0,0 +1,174 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +async function readText(relativePath) { + return readFile(new URL(`../${relativePath}`, import.meta.url), 'utf8'); +} + +describe('standalone example docs', () => { + it('documents the local warehouse example from the examples index', async () => { + const examples = await readText('examples/README.md'); + + assert.match(examples, /local-warehouse/); + assert.match(examples, /fake ingest adapter/); + assert.doesNotMatch(examples, /will contain standalone examples/); + }); + + it('documents the Orbit relationship verification example project', async () => { + const examples = await readText('examples/README.md'); + const readme = await readText('examples/orbit-relationship-verification/README.md'); + const config = await readText('examples/orbit-relationship-verification/klo.yaml'); + + assert.match(examples, /orbit-relationship-verification/); + assert.match(examples, /relationships:verify-orbit/); + assert.match(readme, /Orbit-style relationship discovery verification/); + assert.match(readme, /pnpm run relationships:verify-orbit/); + assert.match(readme, /Accepted: 9/); + assert.match(readme, /Review: 0/); + assert.match(readme, /Rejected: 0/); + assert.match(config, /project: orbit-relationship-verification/); + assert.match(config, /orbit:/); + assert.match(config, /driver: sqlite/); + assert.match( + config, + /path: \.\.\/\.\.\/packages\/context\/test\/fixtures\/relationship-benchmarks\/orbit_style_product_no_declared_constraints\/data\.sqlite/, + ); + assert.match(config, /readonly: true/); + assert.match(config, /llm_proposals: false/); + assert.match(config, /validation_required_for_manifest: true/); + }); + + it('documents the Postgres historic SQL smoke example', async () => { + const examples = await readText('examples/README.md'); + const readme = await readText('examples/postgres-historic/README.md'); + const compose = await readText('examples/postgres-historic/docker-compose.yml'); + const initSql = await readText('examples/postgres-historic/init/001-schema.sql'); + const workload = await readText('examples/postgres-historic/scripts/generate-workload.sh'); + const smoke = await readText('examples/postgres-historic/scripts/smoke.sh'); + + assert.match(examples, /postgres-historic/); + assert.match(examples, /pg_stat_statements/); + assert.match(readme, /--enable-historic-sql/); + assert.match(readme, /--historic-sql-min-calls 2/); + assert.match(readme, /klo dev doctor --project-dir/); + assert.match(readme, /Postgres Historic SQL/); + assert.match(readme, /dev ingest run/); + assert.match(compose, /postgres:14/); + assert.match(compose, /shared_preload_libraries=pg_stat_statements/); + assert.match(compose, /pg_stat_statements.track=top/); + assert.match(initSql, /CREATE EXTENSION IF NOT EXISTS pg_stat_statements/); + assert.match(initSql, /GRANT pg_read_all_stats TO klo_reader/); + assert.match(workload, /JOIN customers/); + assert.match(workload, /app_user/); + assert.match(workload, /etl_user/); + assert.match(smoke, /pg_stat_statements_reset/); + assert.match(smoke, /assert_manifest "\$FIRST_MANIFEST" true/); + assert.match(smoke, /assert_manifest "\$SECOND_MANIFEST" false/); + assert.match(smoke, /assert_manifest "\$RESET_MANIFEST" true/); + }); + + it('lists every published TypeScript package in the package root README', async () => { + const rootReadme = await readText('README.md'); + + assert.match(rootReadme, /`packages\/context`/); + assert.match(rootReadme, /`packages\/cli`/); + assert.match(rootReadme, /`packages\/connector-bigquery`/); + assert.match(rootReadme, /`packages\/connector-clickhouse`/); + assert.match(rootReadme, /`packages\/connector-mysql`/); + assert.match(rootReadme, /`packages\/connector-postgres`/); + assert.match(rootReadme, /`packages\/connector-posthog`/); + assert.match(rootReadme, /`packages\/connector-snowflake`/); + assert.match(rootReadme, /`packages\/connector-sqlite`/); + assert.match(rootReadme, /`packages\/connector-sqlserver`/); + assert.match(rootReadme, /`python\/klo-sl`/); + assert.match(rootReadme, /`python\/klo-daemon`/); + }); + + it('documents every standalone MCP tool that the CLI server exposes', async () => { + const rootReadme = await readText('README.md'); + + assert.match(rootReadme, /`connection_list`/); + assert.match(rootReadme, /`knowledge_search`/); + assert.match(rootReadme, /`knowledge_read`/); + assert.match(rootReadme, /`knowledge_write`/); + assert.match(rootReadme, /`sl_list_sources`/); + assert.match(rootReadme, /`sl_read_source`/); + assert.match(rootReadme, /`sl_write_source`/); + assert.match(rootReadme, /`sl_validate`/); + assert.match(rootReadme, /`sl_query`/); + assert.match(rootReadme, /`ingest_trigger`/); + assert.match(rootReadme, /`ingest_status`/); + assert.match(rootReadme, /`ingest_report`/); + assert.match(rootReadme, /`ingest_replay`/); + }); + + it('walks through klo connection list and klo connection test in the README quickstart', async () => { + const rootReadme = await readText('README.md'); + + assert.match(rootReadme, /connection list --project-dir/); + assert.match(rootReadme, /connection test warehouse --project-dir/); + assert.match(rootReadme, /Driver: sqlite/); + assert.match(rootReadme, /Tables: 1/); + }); + + it('replaces the fake-ingest smoke with a klo scan walkthrough in the README', async () => { + const rootReadme = await readText('README.md'); + + assert.match(rootReadme, /### Scan the demo warehouse/); + assert.match(rootReadme, /scan warehouse --project-dir/); + assert.match(rootReadme, /scan status --project-dir/); + assert.match(rootReadme, /scan report --project-dir/); + assert.match(rootReadme, /raw-sources\/warehouse\/live-database/); + assert.doesNotMatch(rootReadme, /Run a local ingest smoke test/); + assert.doesNotMatch(rootReadme, /klo dev ingest run --project-dir/); + assert.doesNotMatch(rootReadme, /klo ingest status --project-dir/); + }); + + it('documents pnpm setup as a prerequisite when optional dev linking fails', async () => { + const rootReadme = await readText('README.md'); + + assert.match(rootReadme, /pnpm run link:dev/); + assert.match(rootReadme, /klo-dev --help/); + assert.doesNotMatch( + rootReadme, + /If the setup command reports that pnpm's global bin directory is not on your\n`PATH`, add the printed directory to your shell profile/, + ); + }); + + it('runs the example smoke in the cli smoke script', async () => { + const packageJson = JSON.parse(await readText('packages/cli/package.json')); + + assert.match(packageJson.scripts.smoke, /src\/standalone-smoke\.test\.ts/); + assert.match(packageJson.scripts.smoke, /src\/example-smoke\.test\.ts/); + }); + + it('documents daemon HTTP database, source generation, LookML, embedding, and code execution support', async () => { + const readme = await readText('python/klo-daemon/README.md'); + + assert.match(readme, /semantic-generate-sources/); + assert.match(readme, /database-introspect/); + assert.match(readme, /POST \/database\/introspect/); + assert.match(readme, /Introspect a Postgres database schema/); + assert.match(readme, /lookml-parse/); + assert.match(readme, /embedding-compute/); + assert.match(readme, /embedding-compute-bulk/); + assert.match(readme, /code-execute/); + assert.match(readme, /--enable-code-execution/); + assert.match(readme, /POST \/semantic-layer\/generate-sources/); + assert.match(readme, /POST \/lookml\/parse/); + assert.match(readme, /POST \/embeddings\/compute/); + assert.match(readme, /POST \/embeddings\/compute-bulk/); + assert.match(readme, /POST \/code\/execute/); + assert.match(readme, /Generate semantic-layer sources from schema scan data/); + assert.match(readme, /Parse LookML projects into resolved, KSL-ready structures/); + assert.match(readme, /Compute text embeddings locally/); + assert.match(readme, /Execute Python code with the current in-process boundary/); + assert.match(readme, /Code execution is off by default/); + assert.match(readme, /does not provide OS-level sandboxing/); + assert.doesNotMatch(readme, /source generation are not exposed through this/); + assert.doesNotMatch(readme, /LookML parsing are not exposed through this/); + assert.doesNotMatch(readme, /embeddings are not exposed through this server mode/); + assert.doesNotMatch(readme, /Code execution is not exposed through this server mode/); + }); +}); diff --git a/scripts/installed-live-database-smoke.mjs b/scripts/installed-live-database-smoke.mjs new file mode 100644 index 00000000..c14d410a --- /dev/null +++ b/scripts/installed-live-database-smoke.mjs @@ -0,0 +1,432 @@ +#!/usr/bin/env node + +import { execFile, spawn } from 'node:child_process'; +import { once } from 'node:events'; +import { access, mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { request as httpRequest } from 'node:http'; +import { createServer } from 'node:net'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { pathToFileURL } from 'node:url'; +import { + findPythonArtifacts, + npmSmokePackageJson, + npmSmokePythonEnv, + packageArtifactLayout, + pythonArtifactInstallArgs, +} from './package-artifacts.mjs'; + +const POSTGRES_IMAGE = process.env.KLO_ARTIFACT_POSTGRES_IMAGE ?? 'postgres:16-alpine'; +const POSTGRES_USER = 'klo'; +const POSTGRES_PASSWORD = 'postgres'; // pragma: allowlist secret +const POSTGRES_DB = 'warehouse'; + +export function smokeContainerName(pid = process.pid, now = Date.now()) { + return `klo-live-db-smoke-${pid}-${now}`; +} + +export function buildPostgresUrl(hostPort) { + return `postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@127.0.0.1:${hostPort}/${POSTGRES_DB}`; // pragma: allowlist secret +} + +export function buildDockerRunArgs({ containerName, hostPort, image = POSTGRES_IMAGE }) { + return [ + 'run', + '--rm', + '-d', + '--name', + containerName, + '-e', + `POSTGRES_PASSWORD=${POSTGRES_PASSWORD}`, + '-e', + `POSTGRES_USER=${POSTGRES_USER}`, + '-e', + `POSTGRES_DB=${POSTGRES_DB}`, + '-p', + `127.0.0.1:${hostPort}:5432`, + image, + ]; +} + +export function buildPostgresReadyArgs(containerName) { + return [ + 'exec', + containerName, + 'psql', + '-U', + POSTGRES_USER, + '-d', + POSTGRES_DB, + '-v', + 'ON_ERROR_STOP=1', + '-c', + 'SELECT 1;', + ]; +} + +export function buildSeedSql() { + return [ + 'DROP TABLE IF EXISTS orders;', + 'DROP TABLE IF EXISTS customers;', + 'CREATE TABLE customers (', + ' id integer PRIMARY KEY,', + ' name text NOT NULL', + ');', + "COMMENT ON TABLE customers IS 'Customers captured by the artifact smoke';", + "COMMENT ON COLUMN customers.name IS 'Customer display name';", + 'CREATE TABLE orders (', + ' id integer PRIMARY KEY,', + ' customer_id integer NOT NULL REFERENCES customers(id),', + ' status text NOT NULL,', + ' amount integer NOT NULL', + ');', + "COMMENT ON TABLE orders IS 'Orders captured by the artifact smoke';", + "COMMENT ON COLUMN orders.amount IS 'Order amount in cents';", + "INSERT INTO customers (id, name) VALUES (1, 'Acme'), (2, 'Globex');", + "INSERT INTO orders (id, customer_id, status, amount) VALUES (10, 1, 'paid', 2000), (11, 2, 'open', 3500);", + '', + ].join('\n'); +} + +export function buildKloYaml(postgresUrl) { + return [ + 'project: artifact-live-database', + 'connections:', + ' warehouse:', + ' driver: postgres', + ` url: "${postgresUrl}"`, + ' readonly: true', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'); +} + +export function buildLiveDatabaseIngestArgs(projectDir, databaseIntrospectionUrl) { + return [ + 'exec', + 'klo', + 'dev', + 'ingest', + 'run', + '--project-dir', + projectDir, + '--connection-id', + 'warehouse', + '--adapter', + 'live-database', + '--database-introspection-url', + databaseIntrospectionUrl, + ]; +} + +export function buildLiveDatabaseStatusArgs(projectDir, runId) { + return ['exec', 'klo', 'ingest', 'status', '--project-dir', projectDir, runId]; +} + +async function run(command, args, options = {}) { + process.stdout.write(`$ ${command} ${args.join(' ')}\n`); + return new Promise((resolve) => { + const child = execFile( + command, + args, + { + cwd: options.cwd, + env: options.env ?? process.env, + encoding: 'utf8', + maxBuffer: 1024 * 1024 * 20, + timeout: options.timeout ?? 60_000, + }, + (error, stdout, stderr) => { + if (stdout) { + process.stdout.write(stdout); + } + if (stderr) { + process.stderr.write(stderr); + } + resolve({ + code: error && typeof error.code === 'number' ? error.code : error ? 1 : 0, + stdout, + stderr: stderr || (error instanceof Error ? error.message : ''), + }); + }, + ); + if (options.input !== undefined) { + child.stdin?.end(options.input); + } + }); +} + +function requireSuccess(label, result) { + if (result.code !== 0) { + throw new Error( + `${label} failed with code ${result.code}\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`, + ); + } +} + +function requireOutput(label, result, pattern) { + if (!pattern.test(result.stdout)) { + throw new Error(`${label} output did not match ${pattern}\nstdout:\n${result.stdout}`); + } +} + +function getRunId(stdout) { + const match = stdout.match(/^Run: (.+)$/m); + if (!match) { + throw new Error(`ingest run output did not include a run id\nstdout:\n${stdout}`); + } + return match[1]; +} + +async function requireDocker() { + const result = await run('docker', ['info'], { timeout: 20_000 }); + if (result.code !== 0) { + throw new Error( + 'Docker is required for the installed live-database artifact smoke. Start Docker and rerun `pnpm run artifacts:live-db-smoke`.', + ); + } +} + +async function getAvailablePort() { + const server = createServer(); + server.listen(0, '127.0.0.1'); + await once(server, 'listening'); + const address = server.address(); + if (!address || typeof address === 'string') { + server.close(); + throw new Error('expected TCP server address'); + } + const port = address.port; + server.close(); + await once(server, 'close'); + return port; +} + +async function startPostgresContainer(containerName, hostPort) { + await requireDocker(); + const result = await run('docker', buildDockerRunArgs({ containerName, hostPort }), { timeout: 120_000 }); + requireSuccess('docker run postgres', result); +} + +async function stopPostgresContainer(containerName) { + await run('docker', ['rm', '-f', containerName], { timeout: 30_000 }); +} + +async function waitForPostgres(containerName) { + const deadline = Date.now() + 60_000; + while (Date.now() < deadline) { + const result = await run('docker', buildPostgresReadyArgs(containerName), { timeout: 10_000 }); + if (result.code === 0) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + throw new Error(`Timed out waiting for Postgres container ${containerName}`); +} + +async function seedPostgres(containerName) { + const result = await run( + 'docker', + ['exec', '-i', containerName, 'psql', '-U', POSTGRES_USER, '-d', POSTGRES_DB, '-v', 'ON_ERROR_STOP=1'], + { input: buildSeedSql(), timeout: 30_000 }, + ); + requireSuccess('seed postgres catalog', result); +} + +function httpGetOk(url) { + return new Promise((resolve, reject) => { + const request = httpRequest(url, { method: 'GET' }, (response) => { + response.resume(); + response.on('end', () => resolve((response.statusCode ?? 0) >= 200 && (response.statusCode ?? 0) < 300)); + }); + request.on('error', reject); + request.end(); + }); +} + +function spawnLogged(command, args, options = {}) { + const stdout = []; + const stderr = []; + let spawnError; + const child = spawn(command, args, { + cwd: options.cwd, + env: options.env ?? process.env, + stdio: ['ignore', 'pipe', 'pipe'], + }); + child.stdout.on('data', (chunk) => stdout.push(chunk)); + child.stderr.on('data', (chunk) => stderr.push(chunk)); + child.on('error', (error) => { + spawnError = error; + }); + return { + child, + error() { + return spawnError; + }, + output() { + return { + stdout: Buffer.concat(stdout).toString('utf8'), + stderr: Buffer.concat(stderr).toString('utf8'), + }; + }, + }; +} + +async function waitForHttpHealth(url, daemon) { + const deadline = Date.now() + 15_000; + while (Date.now() < deadline) { + if (daemon.error()) { + const output = daemon.output(); + throw new Error( + `Failed to start klo-daemon: ${daemon.error().message}\nstdout:\n${output.stdout}\nstderr:\n${output.stderr}`, + ); + } + if (daemon.child.exitCode !== null || daemon.child.signalCode !== null) { + const output = daemon.output(); + throw new Error(`klo-daemon exited before health check passed\nstdout:\n${output.stdout}\nstderr:\n${output.stderr}`); + } + try { + if (await httpGetOk(url)) { + return; + } + } catch { + await new Promise((resolve) => setTimeout(resolve, 100)); + continue; + } + await new Promise((resolve) => setTimeout(resolve, 100)); + } + const output = daemon.output(); + throw new Error(`Timed out waiting for ${url}\nstdout:\n${output.stdout}\nstderr:\n${output.stderr}`); +} + +async function startDaemon(port, cleanInstallDir) { + const daemon = spawnLogged( + 'klo-daemon', + ['serve-http', '--host', '127.0.0.1', '--port', String(port), '--log-level', 'warning'], + { cwd: cleanInstallDir, env: npmSmokePythonEnv(cleanInstallDir) }, + ); + await waitForHttpHealth(`http://127.0.0.1:${port}/health`, daemon); + return daemon; +} + +async function stopDaemon(daemon) { + if (daemon.child.exitCode !== null || daemon.child.signalCode !== null) { + return; + } + daemon.child.kill('SIGTERM'); + const closed = once(daemon.child, 'close').then(() => true); + const timedOut = new Promise((resolve) => setTimeout(() => resolve(false), 5_000)); + if (!(await Promise.race([closed, timedOut]))) { + daemon.child.kill('SIGKILL'); + await once(daemon.child, 'close'); + } +} + +async function assertPathExists(path, label) { + try { + await access(path); + } catch { + throw new Error(`Missing ${label}: ${path}`); + } +} + +async function prepareCleanInstall(layout, cleanInstallDir) { + const pythonArtifacts = await findPythonArtifacts(layout.pythonDir); + await assertPathExists(layout.contextTarball, '@klo/context tarball'); + await assertPathExists(layout.cliTarball, '@klo/cli tarball'); + await mkdir(cleanInstallDir, { recursive: true }); + await writeFile(join(cleanInstallDir, 'package.json'), `${JSON.stringify(npmSmokePackageJson(layout), null, 2)}\n`); + await run('pnpm', ['install'], { cwd: cleanInstallDir, timeout: 120_000 }).then((result) => + requireSuccess('pnpm install clean artifact project', result), + ); + await run('uv', ['venv', '.venv'], { cwd: cleanInstallDir, timeout: 120_000 }).then((result) => + requireSuccess('uv venv clean artifact project', result), + ); + await run( + 'uv', + pythonArtifactInstallArgs( + join(cleanInstallDir, '.venv', process.platform === 'win32' ? 'Scripts/python.exe' : 'bin/python'), + pythonArtifacts, + ), + { + cwd: cleanInstallDir, + timeout: 120_000, + }, + ).then((result) => requireSuccess('install Python artifacts', result)); +} + +async function main() { + const layout = packageArtifactLayout(); + const root = await mkdtemp(join(tmpdir(), 'klo-live-db-artifact-smoke-')); + const containerName = smokeContainerName(); + let daemon; + try { + const postgresPort = await getAvailablePort(); + const daemonPort = await getAvailablePort(); + const postgresUrl = buildPostgresUrl(postgresPort); + const cleanInstallDir = join(root, 'npm-clean-install'); + const projectDir = join(root, 'project'); + const databaseIntrospectionUrl = `http://127.0.0.1:${daemonPort}`; + + await startPostgresContainer(containerName, postgresPort); + await waitForPostgres(containerName); + await seedPostgres(containerName); + await prepareCleanInstall(layout, cleanInstallDir); + + await mkdir(projectDir, { recursive: true }); + const init = await run('pnpm', ['exec', 'klo', 'init', projectDir, '--name', 'artifact-live-database'], { + cwd: cleanInstallDir, + timeout: 30_000, + }); + requireSuccess('klo init', init); + await writeFile(join(projectDir, 'klo.yaml'), buildKloYaml(postgresUrl), 'utf8'); + + daemon = await startDaemon(daemonPort, cleanInstallDir); + + const ingestRun = await run('pnpm', buildLiveDatabaseIngestArgs(projectDir, databaseIntrospectionUrl), { + cwd: cleanInstallDir, + env: npmSmokePythonEnv(cleanInstallDir), + timeout: 120_000, + }); + requireSuccess('klo dev ingest run live-database', ingestRun); + requireOutput('klo dev ingest run live-database', ingestRun, /Status: done/); + requireOutput('klo dev ingest run live-database', ingestRun, /Adapter: live-database/); + requireOutput('klo dev ingest run live-database', ingestRun, /Diff: \+4\/~0\/-0\/=0/); + requireOutput('klo dev ingest run live-database', ingestRun, /Raw files: 4/); + requireOutput('klo dev ingest run live-database', ingestRun, /Work units: 2/); + + const runId = getRunId(ingestRun.stdout); + const ingestStatus = await run('pnpm', buildLiveDatabaseStatusArgs(projectDir, runId), { + cwd: cleanInstallDir, + env: npmSmokePythonEnv(cleanInstallDir), + timeout: 30_000, + }); + requireSuccess('klo ingest status live-database', ingestStatus); + requireOutput('klo ingest status live-database', ingestStatus, new RegExp(`Run: ${runId}`)); + requireOutput('klo ingest status live-database', ingestStatus, /Status: done/); + requireOutput('klo ingest status live-database', ingestStatus, /Raw files: 4/); + requireOutput('klo ingest status live-database', ingestStatus, /Work units: 2/); + await assertPathExists(join(projectDir, '.klo', 'db.sqlite'), 'SQLite local ingest state'); + process.stdout.write(`Installed live-database artifact smoke passed: ${runId}\n`); + } finally { + if (daemon) { + await stopDaemon(daemon); + } + await stopPostgresContainer(containerName); + await rm(root, { recursive: true, force: true }); + } +} + +if (import.meta.url === pathToFileURL(process.argv[1] ?? '').href) { + try { + await main(); + } catch (error) { + process.stderr.write(`${error instanceof Error ? error.stack : String(error)}\n`); + process.exitCode = 1; + } +} diff --git a/scripts/installed-live-database-smoke.test.mjs b/scripts/installed-live-database-smoke.test.mjs new file mode 100644 index 00000000..58da8222 --- /dev/null +++ b/scripts/installed-live-database-smoke.test.mjs @@ -0,0 +1,128 @@ +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; + +import { + buildDockerRunArgs, + buildKloYaml, + buildLiveDatabaseIngestArgs, + buildLiveDatabaseStatusArgs, + buildPostgresUrl, + buildPostgresReadyArgs, + buildSeedSql, + smokeContainerName, +} from './installed-live-database-smoke.mjs'; + +describe('installed live-database artifact smoke helpers', () => { + it('builds a deterministic disposable Postgres container command', () => { + assert.deepEqual( + buildDockerRunArgs({ + containerName: 'klo-live-db-smoke-test', + hostPort: 15432, + image: 'postgres:16-alpine', + }), + [ + 'run', + '--rm', + '-d', + '--name', + 'klo-live-db-smoke-test', + '-e', + 'POSTGRES_PASSWORD=postgres', // pragma: allowlist secret + '-e', + 'POSTGRES_USER=klo', + '-e', + 'POSTGRES_DB=warehouse', + '-p', + '127.0.0.1:15432:5432', + 'postgres:16-alpine', + ], + ); + }); + + it('uses a collision-resistant Docker container name prefix', () => { + assert.match(smokeContainerName(1234, 5678), /^klo-live-db-smoke-1234-5678$/); + }); + + it('builds the Postgres URL used by klo.yaml and daemon introspection', () => { + assert.equal( + buildPostgresUrl(15432), + 'postgresql://klo:postgres@127.0.0.1:15432/warehouse', // pragma: allowlist secret + ); + }); + + it('writes a live-database-only KLO project config with SQLite local state', () => { + assert.equal( + buildKloYaml('postgresql://klo:postgres@127.0.0.1:15432/warehouse'), // pragma: allowlist secret + [ + 'project: artifact-live-database', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: "postgresql://klo:postgres@127.0.0.1:15432/warehouse"', // pragma: allowlist secret + ' readonly: true', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + 'ingest:', + ' adapters:', + ' - live-database', + '', + ].join('\n'), + ); + }); + + it('seeds comments and a foreign key for daemon catalog introspection', () => { + const sql = buildSeedSql(); + + assert.match(sql, /CREATE TABLE customers/); + assert.match(sql, /CREATE TABLE orders/); + assert.match(sql, /REFERENCES customers\(id\)/); + assert.match(sql, /COMMENT ON TABLE orders IS 'Orders captured by the artifact smoke'/); + assert.match(sql, /COMMENT ON COLUMN orders.amount IS 'Order amount in cents'/); + assert.match(sql, /INSERT INTO orders/); + }); + + it('waits for a real SQL connection to the target Postgres database', () => { + assert.deepEqual(buildPostgresReadyArgs('klo-live-db-smoke-test'), [ + 'exec', + 'klo-live-db-smoke-test', + 'psql', + '-U', + 'klo', + '-d', + 'warehouse', + '-v', + 'ON_ERROR_STOP=1', + '-c', + 'SELECT 1;', + ]); + }); + + it('builds installed CLI live-database ingest and status commands', () => { + assert.deepEqual(buildLiveDatabaseIngestArgs('/tmp/project', 'http://127.0.0.1:8765'), [ + 'exec', + 'klo', + 'dev', + 'ingest', + 'run', + '--project-dir', + '/tmp/project', + '--connection-id', + 'warehouse', + '--adapter', + 'live-database', + '--database-introspection-url', + 'http://127.0.0.1:8765', + ]); + + assert.deepEqual(buildLiveDatabaseStatusArgs('/tmp/project', 'local-run-1'), [ + 'exec', + 'klo', + 'ingest', + 'status', + '--project-dir', + '/tmp/project', + 'local-run-1', + ]); + }); +}); diff --git a/scripts/link-dev-cli.mjs b/scripts/link-dev-cli.mjs new file mode 100644 index 00000000..96e95486 --- /dev/null +++ b/scripts/link-dev-cli.mjs @@ -0,0 +1,197 @@ +#!/usr/bin/env node + +import { execFile } from 'node:child_process'; +import { constants } from 'node:fs'; +import { access as fsAccess, chmod as fsChmod, writeFile as fsWriteFile } from 'node:fs/promises'; +import { delimiter, join } from 'node:path'; +import { pathToFileURL } from 'node:url'; +import { promisify } from 'node:util'; +import { ensureCliBinExecutable, kloRootDir } from './prepare-cli-bin.mjs'; + +const execFileAsync = promisify(execFile); + +function hasFlag(flag) { + return process.argv.includes(flag); +} + +function optionValue(flag, fallback) { + const index = process.argv.indexOf(flag); + if (index === -1) { + return fallback; + } + const value = process.argv[index + 1]; + if (!value || value.startsWith('-')) { + throw new Error(`${flag} requires a value`); + } + return value; +} + +function commandEnv(extraPath) { + if (!extraPath) { + return process.env; + } + + return { + ...process.env, + PATH: `${extraPath}${delimiter}${process.env.PATH ?? ''}`, + }; +} + +async function execText(command, args, options = {}) { + const result = await execFileAsync(command, args, { + cwd: options.cwd, + env: options.env, + maxBuffer: 1024 * 1024, + }); + return `${result.stdout}${result.stderr}`.trim(); +} + +async function optionalText(command, args, options = {}) { + try { + return await execText(command, args, options); + } catch { + return ''; + } +} + +async function findPnpmGlobalBin() { + const output = await optionalText('pnpm', ['bin', '--global']); + return output.split(/\r?\n/).find((line) => line.trim().length > 0)?.trim() ?? ''; +} + +function shellDoubleQuote(value) { + return `"${value.replaceAll('\\', '\\\\').replaceAll('"', '\\"').replaceAll('$', '\\$').replaceAll('`', '\\`')}"`; +} + +function assertBinaryName(binaryName) { + if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(binaryName)) { + throw new Error(`Invalid binary name: ${binaryName}`); + } +} + +async function writePinnedPosixLauncher(globalBin, binPath, binaryName, writeFile, chmod) { + const launcherPath = join(globalBin, binaryName); + const script = [ + '#!/bin/sh', + '# Generated by `pnpm run link:dev` in the KLO workspace.', + '# Keep this launcher pinned to the Node binary that built native dependencies.', + `exec ${shellDoubleQuote(process.execPath)} ${shellDoubleQuote(binPath)} "$@"`, + '', + ].join('\n'); + + await writeFile(launcherPath, script, 'utf-8'); + await chmod(launcherPath, 0o755); + return launcherPath; +} + +async function writePinnedWindowsLauncher(globalBin, binPath, binaryName, writeFile) { + const launcherPath = join(globalBin, `${binaryName}.cmd`); + const script = [ + '@echo off', + 'REM Generated by `pnpm run link:dev` in the KLO workspace.', + `"${process.execPath}" "${binPath}" %*`, + '', + ].join('\r\n'); + + await writeFile(launcherPath, script, 'utf-8'); + return launcherPath; +} + +async function writePinnedLauncher(globalBin, binPath, binaryName, deps) { + if (!globalBin) { + throw new Error('Could not find pnpm global bin directory. Run `pnpm setup`, restart your shell, then retry.'); + } + + if (process.platform === 'win32') { + return writePinnedWindowsLauncher(globalBin, binPath, binaryName, deps.writeFile); + } + + return writePinnedPosixLauncher(globalBin, binPath, binaryName, deps.writeFile, deps.chmod); +} + +async function verifyBinaryOnPath(binaryName, globalBin, execTextFn) { + try { + const output = await execTextFn(binaryName, ['--version']); + return { ok: true, output }; + } catch (error) { + if (!globalBin) { + return { ok: false, output: '', error }; + } + + try { + const output = await execTextFn(binaryName, ['--version'], { env: commandEnv(globalBin) }); + return { ok: false, output, globalBin, error }; + } catch { + return { ok: false, output: '', error }; + } + } +} + +async function assertBuiltCli(rootDir, access, binPathOverride) { + const binPath = binPathOverride ?? (await ensureCliBinExecutable(rootDir)); + await access(binPath, constants.X_OK); + return binPath; +} + +export async function linkDevCli(options = {}) { + const rootDir = options.rootDir ?? kloRootDir(); + const binaryName = options.binaryName ?? 'klo-dev'; + const access = options.access ?? fsAccess; + const chmod = options.chmod ?? fsChmod; + const writeFile = options.writeFile ?? fsWriteFile; + const execTextFn = options.execText ?? execText; + assertBinaryName(binaryName); + + const binPath = await assertBuiltCli(rootDir, access, options.binPath); + const globalBin = options.globalBin ?? (await findPnpmGlobalBin()); + + if (options.checkOnly) { + return { + binaryName, + binPath, + linked: false, + verification: await verifyBinaryOnPath(binaryName, globalBin, execTextFn), + }; + } + + const launcherPath = await writePinnedLauncher(globalBin, binPath, binaryName, { writeFile, chmod }); + const verification = await verifyBinaryOnPath(binaryName, globalBin, execTextFn); + if (!verification.ok) { + const pathHint = verification.globalBin + ? `\nAdd pnpm's global bin directory to PATH, then retry:\n\n export PATH="${verification.globalBin}:$PATH"\n\n` + : '\nRun `pnpm setup`, restart your shell, then rerun `pnpm run link:dev`.\n\n'; + + throw new Error(`${binaryName} was linked at ${launcherPath}, but it is not available on PATH.${pathHint}`); + } + + return { + binaryName, + binPath, + launcherPath, + linked: true, + verification, + }; +} + +if (import.meta.url === pathToFileURL(process.argv[1]).href) { + try { + const result = await linkDevCli({ + checkOnly: hasFlag('--check-only'), + binaryName: optionValue('--name', 'klo-dev'), + }); + process.stdout.write(`KLO CLI bin: ${result.binPath}\n`); + if (result.linked) { + process.stdout.write(`Linked binary: ${result.binaryName}\n`); + process.stdout.write(`Verified: ${result.verification.output}\n`); + process.stdout.write(`Pinned Node: ${process.execPath} ${process.version} ABI ${process.versions.modules}\n`); + process.stdout.write(`You can now run \`${result.binaryName} --help\` from any directory.\n`); + } else if (result.verification.ok) { + process.stdout.write(`Already available: ${result.verification.output}\n`); + } else { + process.stdout.write(`${result.binaryName} is not linked on PATH yet.\n`); + } + } catch (error) { + process.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + process.exitCode = 1; + } +} diff --git a/scripts/link-dev-cli.test.mjs b/scripts/link-dev-cli.test.mjs new file mode 100644 index 00000000..3db235e8 --- /dev/null +++ b/scripts/link-dev-cli.test.mjs @@ -0,0 +1,45 @@ +import assert from 'node:assert/strict'; +import { test } from 'node:test'; +import { linkDevCli } from './link-dev-cli.mjs'; + +test('linkDevCli writes a klo-dev launcher by default', async () => { + const writes = []; + const chmods = []; + + const result = await linkDevCli({ + rootDir: '/workspace/klo', + globalBin: '/pnpm/bin', + binPath: '/workspace/klo/packages/cli/dist/bin.js', + execText: async (command, args) => { + assert.equal(command, 'klo-dev'); + assert.deepEqual(args, ['--version']); + return '@klo/cli 0.0.0-private'; + }, + writeFile: async (path, content) => writes.push({ path, content }), + chmod: async (path, mode) => chmods.push({ path, mode }), + access: async () => undefined, + }); + + assert.equal(result.binaryName, 'klo-dev'); + assert.equal(writes[0].path, '/pnpm/bin/klo-dev'); + assert.match(writes[0].content, /packages\/cli\/dist\/bin.js/); + assert.deepEqual(chmods, [{ path: '/pnpm/bin/klo-dev', mode: 0o755 }]); +}); + +test('linkDevCli can explicitly write klo when requested', async () => { + const writes = []; + + const result = await linkDevCli({ + rootDir: '/workspace/klo', + binaryName: 'klo', + globalBin: '/pnpm/bin', + binPath: '/workspace/klo/packages/cli/dist/bin.js', + execText: async () => '@klo/cli 0.0.0-private', + writeFile: async (path, content) => writes.push({ path, content }), + chmod: async () => undefined, + access: async () => undefined, + }); + + assert.equal(result.binaryName, 'klo'); + assert.equal(writes[0].path, '/pnpm/bin/klo'); +}); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs new file mode 100644 index 00000000..56c79a1d --- /dev/null +++ b/scripts/package-artifacts.mjs @@ -0,0 +1,1686 @@ +#!/usr/bin/env node + +import { createHash } from 'node:crypto'; +import { execFile } from 'node:child_process'; +import { access, mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { delimiter, dirname, isAbsolute, join, relative, resolve, sep } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const PACKAGE_VERSION = '0.0.0-private'; +const PYTHON_PACKAGE_VERSION = '0.1.0'; + +export const NPM_ARTIFACT_PACKAGES = [ + { name: '@klo/context', packageRoot: 'packages/context' }, + { name: '@klo/llm', packageRoot: 'packages/llm' }, + { name: '@klo/connector-bigquery', packageRoot: 'packages/connector-bigquery' }, + { name: '@klo/connector-clickhouse', packageRoot: 'packages/connector-clickhouse' }, + { name: '@klo/connector-mysql', packageRoot: 'packages/connector-mysql' }, + { name: '@klo/connector-postgres', packageRoot: 'packages/connector-postgres' }, + { name: '@klo/connector-posthog', packageRoot: 'packages/connector-posthog' }, + { name: '@klo/connector-snowflake', packageRoot: 'packages/connector-snowflake' }, + { name: '@klo/connector-sqlite', packageRoot: 'packages/connector-sqlite' }, + { name: '@klo/connector-sqlserver', packageRoot: 'packages/connector-sqlserver' }, + { name: '@klo/cli', packageRoot: 'packages/cli' }, +]; + +const CONNECTOR_PACKAGE_NAMES = NPM_ARTIFACT_PACKAGES + .map((packageInfo) => packageInfo.name) + .filter((packageName) => packageName.startsWith('@klo/connector-')); + +const ordersSource = { + name: 'orders', + table: 'public.orders', + grain: ['id'], + columns: [ + { name: 'id', type: 'number' }, + { name: 'status', type: 'string' }, + { name: 'amount', type: 'number' }, + ], + measures: [{ name: 'order_count', expr: 'count(*)' }], + joins: [], +}; + +function scriptRootDir() { + return resolve(dirname(fileURLToPath(import.meta.url)), '..'); +} + +function npmPackageTarballName(packageName) { + return `${packageName.replace('@klo/', 'klo-')}-${PACKAGE_VERSION}.tgz`; +} + +function npmPackageTarballs(npmDir) { + return Object.fromEntries( + NPM_ARTIFACT_PACKAGES.map((packageInfo) => [packageInfo.name, join(npmDir, npmPackageTarballName(packageInfo.name))]), + ); +} + +export function packageArtifactLayout(rootDir = scriptRootDir()) { + const artifactDir = join(rootDir, 'dist', 'artifacts'); + const npmDir = join(artifactDir, 'npm'); + const pythonDir = join(artifactDir, 'python'); + const npmTarballs = npmPackageTarballs(npmDir); + + return { + rootDir, + artifactDir, + npmDir, + pythonDir, + npmTarballs, + contextTarball: npmTarballs['@klo/context'], + cliTarball: npmTarballs['@klo/cli'], + connectorTarballs: Object.fromEntries( + CONNECTOR_PACKAGE_NAMES.map((packageName) => [packageName, npmTarballs[packageName]]), + ), + manifestPath: join(artifactDir, 'manifest.json'), + }; +} + +export function buildArtifactCommands(layout) { + const npmBuildCommands = NPM_ARTIFACT_PACKAGES.map((packageInfo) => ({ + command: 'pnpm', + args: ['--filter', packageInfo.name, 'run', 'build'], + cwd: layout.rootDir, + })); + const npmPackCommands = NPM_ARTIFACT_PACKAGES.map((packageInfo) => ({ + command: 'pnpm', + args: ['--filter', packageInfo.name, 'pack', '--out', layout.npmTarballs[packageInfo.name]], + cwd: layout.rootDir, + })); + + return [ + ...npmBuildCommands, + ...npmPackCommands, + { + command: 'uv', + args: ['build', '--package', 'klo-sl', '--out-dir', layout.pythonDir], + cwd: layout.rootDir, + }, + { + command: 'uv', + args: ['build', '--package', 'klo-daemon', '--out-dir', layout.pythonDir], + cwd: layout.rootDir, + }, + ]; +} + +async function pathExists(path) { + try { + await access(path); + return true; + } catch { + return false; + } +} + +async function assertPathExists(path, label) { + if (!(await pathExists(path))) { + throw new Error(`Missing ${label}: ${path}`); + } +} + +function normalizePythonDistributionName(name) { + return name.replaceAll('-', '_'); +} + +function findOne(files, distributionName, suffix, label, pythonDir) { + const normalized = normalizePythonDistributionName(distributionName); + const found = files.find((file) => file.startsWith(`${normalized}-${PYTHON_PACKAGE_VERSION}`) && file.endsWith(suffix)); + if (!found) { + throw new Error(`Missing Python artifact: ${label}`); + } + return join(pythonDir, found); +} + +export async function findPythonArtifacts(pythonDir) { + const files = await readdir(pythonDir); + + return { + kloSlWheel: findOne(files, 'klo-sl', '.whl', 'klo-sl wheel', pythonDir), + kloSlSdist: findOne(files, 'klo-sl', '.tar.gz', 'klo-sl source distribution', pythonDir), + kloDaemonWheel: findOne(files, 'klo-daemon', '.whl', 'klo-daemon wheel', pythonDir), + kloDaemonSdist: findOne(files, 'klo-daemon', '.tar.gz', 'klo-daemon source distribution', pythonDir), + }; +} + +export function artifactManifestPath(layout) { + return layout.manifestPath ?? join(layout.artifactDir, 'manifest.json'); +} + +async function readJson(path) { + return JSON.parse(await readFile(path, 'utf-8')); +} + +function readProjectBlock(toml, sourcePath) { + const lines = toml.split(/\r?\n/); + const block = []; + let inProject = false; + + for (const line of lines) { + if (/^\[project\]\s*$/.test(line)) { + inProject = true; + continue; + } + if (inProject && /^\[.*\]\s*$/.test(line)) { + break; + } + if (inProject) { + block.push(line); + } + } + + if (!inProject) { + throw new Error(`Missing [project] table in ${sourcePath}`); + } + return block.join('\n'); +} + +function readTomlStringField(projectBlock, fieldName, sourcePath) { + const match = projectBlock.match(new RegExp(`^${fieldName}\\s*=\\s*"([^"]+)"\\s*$`, 'm')); + if (!match) { + throw new Error(`Missing project.${fieldName} in ${sourcePath}`); + } + return match[1]; +} + +async function readPyprojectMetadata(path) { + const toml = await readFile(path, 'utf-8'); + const projectBlock = readProjectBlock(toml, path); + return { + name: readTomlStringField(projectBlock, 'name', path), + version: readTomlStringField(projectBlock, 'version', path), + }; +} + +function releaseMetadataEntry({ ecosystem, packageName, packageRoot, packageVersion, privatePackage }) { + return { + ecosystem, + packageName, + packageRoot, + packageVersion, + private: privatePackage, + releaseMode: 'ci-artifact-only', + }; +} + +async function readNpmPackageMetadata(rootDir, packageInfo) { + const packageJson = await readJson(join(rootDir, packageInfo.packageRoot, 'package.json')); + if (packageJson.name !== packageInfo.name) { + throw new Error( + `Unexpected package name in ${packageInfo.packageRoot}/package.json: expected ${packageInfo.name}, got ${packageJson.name}`, + ); + } + return releaseMetadataEntry({ + ecosystem: 'npm', + packageName: packageJson.name, + packageRoot: packageInfo.packageRoot, + packageVersion: packageJson.version, + privatePackage: packageJson.private === true, + }); +} + +export async function packageReleaseMetadata(rootDir = scriptRootDir()) { + const npmPackages = await Promise.all( + NPM_ARTIFACT_PACKAGES.map((packageInfo) => readNpmPackageMetadata(rootDir, packageInfo)), + ); + const kloSlPackage = await readPyprojectMetadata(join(rootDir, 'python', 'klo-sl', 'pyproject.toml')); + const kloDaemonPackage = await readPyprojectMetadata(join(rootDir, 'python', 'klo-daemon', 'pyproject.toml')); + + return [ + ...npmPackages, + releaseMetadataEntry({ + ecosystem: 'python', + packageName: kloSlPackage.name, + packageRoot: 'python/klo-sl', + packageVersion: kloSlPackage.version, + privatePackage: false, + }), + releaseMetadataEntry({ + ecosystem: 'python', + packageName: kloDaemonPackage.name, + packageRoot: 'python/klo-daemon', + packageVersion: kloDaemonPackage.version, + privatePackage: false, + }), + ]; +} + +function packageMetadataByName(packages) { + return new Map(packages.map((metadata) => [metadata.packageName, metadata])); +} + +function requirePackageMetadata(packagesByName, packageName) { + const metadata = packagesByName.get(packageName); + if (!metadata) { + throw new Error(`Missing package release metadata for ${packageName}`); + } + return metadata; +} + +function artifactPackageRecords(layout, pythonArtifacts, packages) { + const packagesByName = packageMetadataByName(packages); + const npmRecords = NPM_ARTIFACT_PACKAGES.map((packageInfo) => ({ + artifactKind: 'tarball', + artifactPath: layout.npmTarballs[packageInfo.name], + metadata: requirePackageMetadata(packagesByName, packageInfo.name), + })); + + return [ + ...npmRecords, + { + artifactKind: 'wheel', + artifactPath: pythonArtifacts.kloSlWheel, + metadata: requirePackageMetadata(packagesByName, 'klo-sl'), + }, + { + artifactKind: 'sdist', + artifactPath: pythonArtifacts.kloSlSdist, + metadata: requirePackageMetadata(packagesByName, 'klo-sl'), + }, + { + artifactKind: 'wheel', + artifactPath: pythonArtifacts.kloDaemonWheel, + metadata: requirePackageMetadata(packagesByName, 'klo-daemon'), + }, + { + artifactKind: 'sdist', + artifactPath: pythonArtifacts.kloDaemonSdist, + metadata: requirePackageMetadata(packagesByName, 'klo-daemon'), + }, + ]; +} + +function artifactRelativePath(layout, artifactPath) { + return relative(layout.artifactDir, artifactPath).split(sep).join('/'); +} + +function formatJson(value) { + return JSON.stringify(value, null, 2); +} + +function assertJsonEqual(actual, expected, label) { + if (JSON.stringify(actual) !== JSON.stringify(expected)) { + throw new Error(`${label} do not match\nExpected:\n${formatJson(expected)}\nActual:\n${formatJson(actual)}`); + } +} + +function isPlainObject(value) { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function assertString(value, label) { + if (typeof value !== 'string') { + throw new Error(`${label} must be a string`); + } +} + +function artifactPathFromManifest(layout, manifestPath) { + assertString(manifestPath, 'Artifact manifest file path'); + + if ( + manifestPath.length === 0 || + manifestPath.startsWith('/') || + manifestPath.includes('\\') || + manifestPath.split('/').some((part) => part.length === 0 || part === '..') + ) { + throw new Error(`Unsafe artifact manifest path: ${manifestPath}`); + } + + const resolvedPath = resolve(layout.artifactDir, manifestPath); + const relativePath = relative(layout.artifactDir, resolvedPath); + if (relativePath.startsWith('..') || isAbsolute(relativePath)) { + throw new Error(`Unsafe artifact manifest path: ${manifestPath}`); + } + + return resolvedPath; +} + +function sortedManifestFiles(files) { + return [...files].sort((a, b) => a.path.localeCompare(b.path)); +} + +function assertManifestShape(manifest) { + if (!isPlainObject(manifest)) { + throw new Error('Artifact manifest must be a JSON object'); + } + if (manifest.schemaVersion !== 2) { + throw new Error(`Unsupported artifact manifest schemaVersion: ${manifest.schemaVersion}`); + } + assertString(manifest.generatedAt, 'Artifact manifest generatedAt'); + if (Number.isNaN(Date.parse(manifest.generatedAt))) { + throw new Error(`Artifact manifest generatedAt is not an ISO timestamp: ${manifest.generatedAt}`); + } + if (manifest.sourceRevision !== null && typeof manifest.sourceRevision !== 'string') { + throw new Error('Artifact manifest sourceRevision must be a string or null'); + } + if (!Array.isArray(manifest.packages)) { + throw new Error('Artifact manifest packages must be an array'); + } + if (!Array.isArray(manifest.files)) { + throw new Error('Artifact manifest files must be an array'); + } +} + +async function artifactManifestEntry(layout, record) { + const contents = await readFile(record.artifactPath); + return { + path: artifactRelativePath(layout, record.artifactPath), + ecosystem: record.metadata.ecosystem, + artifactKind: record.artifactKind, + packageName: record.metadata.packageName, + packageVersion: record.metadata.packageVersion, + bytes: contents.byteLength, + sha256: createHash('sha256').update(contents).digest('hex'), + }; +} + +export async function buildArtifactManifest(layout, generatedAt = new Date(), options = {}) { + const pythonArtifacts = await findPythonArtifacts(layout.pythonDir); + const packages = await packageReleaseMetadata(layout.rootDir); + const artifactRecords = artifactPackageRecords(layout, pythonArtifacts, packages); + const files = await Promise.all(artifactRecords.map((record) => artifactManifestEntry(layout, record))); + + return { + schemaVersion: 2, + generatedAt: generatedAt.toISOString(), + sourceRevision: options.sourceRevision ?? process.env.GITHUB_SHA ?? null, + packages, + files: files.sort((a, b) => a.path.localeCompare(b.path)), + }; +} + +export async function writeArtifactManifest(layout, generatedAt = new Date(), options = {}) { + const manifest = await buildArtifactManifest(layout, generatedAt, options); + await writeFile(artifactManifestPath(layout), `${JSON.stringify(manifest, null, 2)}\n`); + return manifest; +} + +export async function verifyArtifactManifest(layout, options = {}) { + const manifest = await readJson(artifactManifestPath(layout)); + assertManifestShape(manifest); + + const expectedSourceRevision = options.expectedSourceRevision ?? process.env.KLO_EXPECTED_SOURCE_REVISION; + if (expectedSourceRevision !== undefined && manifest.sourceRevision !== expectedSourceRevision) { + throw new Error( + `Artifact manifest sourceRevision mismatch: expected ${expectedSourceRevision}, got ${manifest.sourceRevision}`, + ); + } + + const expectedPackages = await packageReleaseMetadata(layout.rootDir); + assertJsonEqual(manifest.packages, expectedPackages, 'Artifact manifest packages'); + + for (const file of manifest.files) { + if (!isPlainObject(file)) { + throw new Error('Artifact manifest file entries must be JSON objects'); + } + artifactPathFromManifest(layout, file.path); + } + + const pythonArtifacts = await findPythonArtifacts(layout.pythonDir); + const expectedFiles = await Promise.all( + artifactPackageRecords(layout, pythonArtifacts, expectedPackages).map((record) => artifactManifestEntry(layout, record)), + ); + assertJsonEqual( + sortedManifestFiles(manifest.files), + sortedManifestFiles(expectedFiles), + 'Artifact manifest files do not match artifact contents', + ); + + return manifest; +} + +export function pythonArtifactInstallArgs(python, pythonArtifacts) { + return [ + 'pip', + 'install', + '--python', + python, + pythonArtifacts.kloSlWheel, + pythonArtifacts.kloDaemonWheel, + ]; +} + +function runCommand(command, args, options = {}) { + const cwd = options.cwd ?? process.cwd(); + process.stdout.write(`$ ${command} ${args.join(' ')}\n`); + + return new Promise((resolvePromise, reject) => { + const child = execFile( + command, + args, + { + cwd, + env: { ...process.env, ...options.env }, + maxBuffer: 1024 * 1024 * 20, + }, + (error, stdout, stderr) => { + if (stdout) { + process.stdout.write(stdout); + } + if (stderr) { + process.stderr.write(stderr); + } + if (error) { + reject(error); + return; + } + resolvePromise({ stdout, stderr }); + }, + ); + + if (options.input !== undefined) { + child.stdin?.end(options.input); + } + }); +} + +function npmTarballDependencyEntries(layout) { + return Object.fromEntries( + NPM_ARTIFACT_PACKAGES.map((packageInfo) => [ + packageInfo.name, + `file:${layout.npmTarballs[packageInfo.name]}`, + ]), + ); +} + +export function npmSmokePackageJson(layout) { + const npmTarballDependencies = npmTarballDependencyEntries(layout); + return { + name: 'klo-artifact-npm-smoke', + version: '0.0.0', + private: true, + type: 'module', + dependencies: { + ...npmTarballDependencies, + '@modelcontextprotocol/sdk': '^1.27.1', + }, + pnpm: { + overrides: npmTarballDependencies, + onlyBuiltDependencies: ['better-sqlite3'], + }, + }; +} + +export function npmVerifySource() { + return ` +const context = await import('@klo/context'); +const project = await import('@klo/context/project'); +const mcp = await import('@klo/context/mcp'); +const memory = await import('@klo/context/memory'); +const daemon = await import('@klo/context/daemon'); +const ingest = await import('@klo/context/ingest'); +const search = await import('@klo/context/search'); +const llm = await import('@klo/llm'); +const cli = await import('@klo/cli'); +const bigqueryConnector = await import('@klo/connector-bigquery'); +const clickhouseConnector = await import('@klo/connector-clickhouse'); +const mysqlConnector = await import('@klo/connector-mysql'); +const postgresConnector = await import('@klo/connector-postgres'); +const posthogConnector = await import('@klo/connector-posthog'); +const snowflakeConnector = await import('@klo/connector-snowflake'); +const sqliteConnector = await import('@klo/connector-sqlite'); +const sqlserverConnector = await import('@klo/connector-sqlserver'); + +if (context.kloContextPackageInfo.name !== '@klo/context') { + throw new Error('Unexpected @klo/context package info'); +} +if (typeof llm.createKloLlmProvider !== 'function') { + throw new Error('Missing createKloLlmProvider export'); +} +if (typeof llm.KloMessageBuilder !== 'function') { + throw new Error('Missing KloMessageBuilder export'); +} +if (typeof llm.createKloEmbeddingProvider !== 'function') { + throw new Error('Missing createKloEmbeddingProvider export'); +} +if (typeof project.initKloProject !== 'function') { + throw new Error('Missing initKloProject export'); +} +if (typeof mcp.createDefaultKloMcpServer !== 'function') { + throw new Error('Missing createDefaultKloMcpServer export'); +} +if (typeof memory.createLocalProjectMemoryCapture !== 'function') { + throw new Error('Missing createLocalProjectMemoryCapture export'); +} +if (typeof search.HybridSearchCore !== 'function') { + throw new Error('Missing HybridSearchCore export from @klo/context/search'); +} +if (typeof search.assertSearchBackendConformanceCase !== 'function') { + throw new Error('Missing assertSearchBackendConformanceCase export from @klo/context/search'); +} +if (typeof search.assertSearchBackendCapabilities !== 'function') { + throw new Error('Missing assertSearchBackendCapabilities export from @klo/context/search'); +} +if (typeof daemon.createPythonSemanticLayerComputePort !== 'function') { + throw new Error('Missing createPythonSemanticLayerComputePort export'); +} +const dbtExtractionExports = [ + ['parseMetricflowFiles', ingest.parseMetricflowFiles], + ['parseMetricflowPullConfig', ingest.parseMetricflowPullConfig], + ['importMetricflowSemanticModels', ingest.importMetricflowSemanticModels], + ['parseDbtSchemaFiles', ingest.parseDbtSchemaFiles], + ['toDescriptionUpdates', ingest.toDescriptionUpdates], + ['toRelationshipUpdates', ingest.toRelationshipUpdates], + ['mergeSemanticModelTables', ingest.mergeSemanticModelTables], + ['loadProjectInfo', ingest.loadProjectInfo], + ['loadDbtSchemaFiles', ingest.loadDbtSchemaFiles], +]; + +for (const [exportName, exportValue] of dbtExtractionExports) { + if (typeof exportValue !== 'function') { + throw new Error('Missing dbt extraction export: ' + exportName); + } +} + +const metricflowConfig = ingest.parseMetricflowPullConfig({ + repoUrl: 'https://example.com/acme/analytics.git', +}); +if (metricflowConfig.branch !== 'main' || metricflowConfig.path !== null) { + throw new Error('Unexpected MetricFlow pull-config defaults from installed @klo/context/ingest'); +} +if (cli.getKloCliPackageInfo().name !== '@klo/cli') { + throw new Error('Unexpected @klo/cli package info'); +} + +const connectorExports = [ + ['@klo/connector-bigquery', bigqueryConnector.KloBigQueryScanConnector, bigqueryConnector.KloBigQueryDialect], + ['@klo/connector-clickhouse', clickhouseConnector.KloClickHouseScanConnector, clickhouseConnector.KloClickHouseDialect], + ['@klo/connector-mysql', mysqlConnector.KloMysqlScanConnector, mysqlConnector.KloMysqlDialect], + ['@klo/connector-postgres', postgresConnector.KloPostgresScanConnector, postgresConnector.KloPostgresDialect], + ['@klo/connector-posthog', posthogConnector.KloPostHogScanConnector, posthogConnector.KloPostHogDialect], + ['@klo/connector-snowflake', snowflakeConnector.KloSnowflakeScanConnector, snowflakeConnector.KloSnowflakeDialect], + ['@klo/connector-sqlite', sqliteConnector.KloSqliteScanConnector, sqliteConnector.KloSqliteDialect], + ['@klo/connector-sqlserver', sqlserverConnector.KloSqlServerScanConnector, sqlserverConnector.KloSqlServerDialect], +]; + +for (const [packageName, ScanConnector, Dialect] of connectorExports) { + if (typeof ScanConnector !== 'function') { + throw new Error('Missing scan connector export from ' + packageName); + } + if (typeof Dialect !== 'function') { + throw new Error('Missing dialect export from ' + packageName); + } +} +`; +} + +export function npmRuntimeSmokeSource() { + return ` +import assert from 'node:assert/strict'; +import { spawn, execFile } from 'node:child_process'; +import { once } from 'node:events'; +import { access, mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { request as httpRequest } from 'node:http'; +import { createServer } from 'node:net'; +import { createRequire } from 'node:module'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { promisify } from 'node:util'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; +import { + createDaemonLookerTableIdentifierParser, + LocalLookerRuntimeStore, +} from '@klo/context/ingest'; + +const execFileAsync = promisify(execFile); +const require = createRequire(import.meta.url); +const contextPackageRoot = dirname(require.resolve('@klo/context/package.json')); + +async function requireContextRuntimeAsset(relativePath) { + await access(join(contextPackageRoot, relativePath)); +} + +async function run(command, args, options = {}) { + process.stdout.write('$ ' + command + ' ' + args.join(' ') + '\\n'); + try { + const result = await execFileAsync(command, args, { + cwd: options.cwd, + encoding: 'utf8', + timeout: 30_000, + }); + return { code: 0, stdout: result.stdout, stderr: result.stderr }; + } catch (error) { + return { + code: typeof error.code === 'number' ? error.code : 1, + stdout: error.stdout ?? '', + stderr: error.stderr ?? error.message, + }; + } +} + +function requireSuccess(label, result) { + assert.equal( + result.code, + 0, + label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr, + ); + assert.equal(result.stderr, '', label + ' wrote unexpected stderr'); +} + +function requireOutput(label, result, text) { + assert.match(result.stdout, text, label + ' output did not match ' + text); +} + +function parseJsonResult(label, result) { + requireSuccess(label, result); + return JSON.parse(result.stdout); +} + +function parseJsonFailure(label, result) { + assert.equal(result.code, 1, label + ' should fail with exit code 1'); + assert.equal(result.stdout, '', label + ' should not write stdout when failing'); + return JSON.parse(result.stderr); +} + +function requireIncludes(values, expected, label) { + assert.ok(Array.isArray(values), label + ' must be an array'); + assert.ok(values.includes(expected), label + ' did not include ' + expected + ': ' + values.join(', ')); +} + +function getRunId(stdout) { + const match = stdout.match(/^Run: (.+)$/m); + assert.ok(match, 'ingest run output did not include a run id'); + return match[1]; +} + +function requireToolNames(tools, expectedNames) { + const names = tools.tools.map((tool) => tool.name).sort(); + for (const expectedName of expectedNames) { + assert.ok(names.includes(expectedName), 'MCP tool list did not include ' + expectedName + ': ' + names.join(', ')); + } +} + +function structuredContent(result) { + assert.ok(result.structuredContent, 'MCP result did not include structuredContent'); + return result.structuredContent; +} + +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function getAvailablePort() { + const server = createServer(); + server.listen(0, '127.0.0.1'); + await once(server, 'listening'); + const address = server.address(); + if (!address || typeof address === 'string') { + server.close(); + throw new Error('expected TCP server address for daemon smoke'); + } + const port = address.port; + server.close(); + await once(server, 'close'); + return port; +} + +function httpGetOk(url) { + return new Promise((resolve, reject) => { + const request = httpRequest(url, { method: 'GET' }, (response) => { + response.resume(); + response.on('end', () => resolve((response.statusCode ?? 0) >= 200 && (response.statusCode ?? 0) < 300)); + }); + request.on('error', reject); + request.end(); + }); +} + +function spawnLogged(command, args, options = {}) { + const stdout = []; + const stderr = []; + let spawnError; + const child = spawn(command, args, { + cwd: options.cwd, + env: options.env ?? process.env, + stdio: ['ignore', 'pipe', 'pipe'], + }); + child.stdout.on('data', (chunk) => stdout.push(chunk)); + child.stderr.on('data', (chunk) => stderr.push(chunk)); + child.on('error', (error) => { + spawnError = error; + }); + return { + child, + error() { + return spawnError; + }, + output() { + return { + stdout: Buffer.concat(stdout).toString('utf8'), + stderr: Buffer.concat(stderr).toString('utf8'), + }; + }, + }; +} + +async function waitForHttpHealth(url, daemon) { + const deadline = Date.now() + 15_000; + while (Date.now() < deadline) { + if (daemon.error()) { + const output = daemon.output(); + throw new Error( + 'Failed to start klo-daemon serve-http: ' + + daemon.error().message + + '\\nstdout:\\n' + + output.stdout + + '\\nstderr:\\n' + + output.stderr, + ); + } + if (daemon.child.exitCode !== null || daemon.child.signalCode !== null) { + const output = daemon.output(); + throw new Error( + 'klo-daemon serve-http exited before health check passed\\nstdout:\\n' + + output.stdout + + '\\nstderr:\\n' + + output.stderr, + ); + } + try { + if (await httpGetOk(url)) { + return; + } + } catch { + await sleep(100); + continue; + } + await sleep(100); + } + const output = daemon.output(); + throw new Error('Timed out waiting for ' + url + '\\nstdout:\\n' + output.stdout + '\\nstderr:\\n' + output.stderr); +} + +async function startSemanticDaemon(port) { + const daemon = spawnLogged('klo-daemon', [ + 'serve-http', + '--host', + '127.0.0.1', + '--port', + String(port), + '--log-level', + 'warning', + ]); + await waitForHttpHealth('http://127.0.0.1:' + port + '/health', daemon); + return daemon; +} + +async function stopSemanticDaemon(daemon) { + if (daemon.child.exitCode !== null || daemon.child.signalCode !== null) { + return; + } + daemon.child.kill('SIGTERM'); + const closed = once(daemon.child, 'close').then(() => true); + const timedOut = sleep(5_000).then(() => false); + if (!(await Promise.race([closed, timedOut]))) { + daemon.child.kill('SIGKILL'); + await once(daemon.child, 'close'); + } +} + +async function writeSqliteWarehouse(projectDir) { + const createDb = await run('python', [ + '-c', + [ + 'import sqlite3', + 'import sys', + 'db_path = sys.argv[1]', + 'conn = sqlite3.connect(db_path)', + 'conn.executescript("""', + 'DROP TABLE IF EXISTS orders;', + 'CREATE TABLE orders (', + ' id INTEGER PRIMARY KEY,', + ' status TEXT NOT NULL,', + ' amount INTEGER NOT NULL', + ');', + "INSERT INTO orders (status, amount) VALUES ('paid', 20), ('paid', 30), ('open', 10);", + '""")', + 'conn.close()', + ].join('\\n'), + join(projectDir, 'warehouse.db'), + ]); + requireSuccess('create sqlite warehouse', createDb); +} + +await requireContextRuntimeAsset('skills/notion_synthesize/SKILL.md'); +await requireContextRuntimeAsset('prompts/skills/page_triage_classifier.md'); +await requireContextRuntimeAsset('prompts/skills/light_extraction.md'); +process.stdout.write('packaged ingest runtime assets verified\\n'); + +const root = await mkdtemp(join(tmpdir(), 'klo-installed-cli-smoke-')); +try { + const projectDir = join(root, 'project'); + const sourceDir = join(root, 'source'); + + const missingProjectDir = join(root, 'missing-project'); + await mkdir(missingProjectDir, { recursive: true }); + const missingProjectSearch = await run('pnpm', [ + 'exec', + 'klo', + 'agent', + 'sl', + 'list', + '--json', + '--query', + 'revenue', + '--project-dir', + missingProjectDir, + ]); + const missingProjectError = parseJsonFailure('klo agent sl list missing project', missingProjectSearch); + assert.equal(missingProjectError.error.code, 'agent_sl_search_missing_project'); + assert.deepEqual(missingProjectError.error.nextSteps, [ + 'klo demo', + 'klo setup --project-dir ' + missingProjectDir, + 'klo ingest ', + 'klo agent sl list --json --query "revenue" --project-dir ' + missingProjectDir, + ]); + process.stdout.write('klo agent sl list missing project guidance verified\\n'); + + const init = await run('pnpm', [ + 'exec', + 'klo', + 'setup', + '--project-dir', + projectDir, + '--new', + '--no-input', + '--yes', + '--skip-llm', + '--skip-embeddings', + '--skip-databases', + '--skip-sources', + '--skip-agents', + ]); + requireSuccess('klo setup', init); + requireOutput('klo setup', init, /Project: /); + + const emptyProjectDir = join(root, 'empty-project'); + const emptyInit = await run('pnpm', [ + 'exec', + 'klo', + 'setup', + '--project-dir', + emptyProjectDir, + '--new', + '--no-input', + '--yes', + '--skip-llm', + '--skip-embeddings', + '--skip-databases', + '--skip-sources', + '--skip-agents', + ]); + requireSuccess('klo setup empty project', emptyInit); + const emptySearch = await run('pnpm', [ + 'exec', + 'klo', + 'agent', + 'sl', + 'list', + '--json', + '--query', + 'revenue', + '--project-dir', + emptyProjectDir, + ]); + const emptySearchError = parseJsonFailure('klo agent sl list no connections', emptySearch); + assert.equal(emptySearchError.error.code, 'agent_sl_search_no_connections'); + assert.deepEqual(emptySearchError.error.nextSteps, [ + 'klo demo', + 'klo setup --project-dir ' + emptyProjectDir, + 'klo ingest ', + 'klo agent sl list --json --query "revenue" --project-dir ' + emptyProjectDir, + ]); + process.stdout.write('klo agent sl list no connections guidance verified\\n'); + + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + 'scan:', + ' enrichment:', + ' mode: deterministic', + 'ingest:', + ' adapters:', + ' - fake', + ' - live-database', + '', + ].join('\\n'), + 'utf-8', + ); + await writeSqliteWarehouse(projectDir); + + const lookerStore = new LocalLookerRuntimeStore({ dbPath: join(projectDir, '.klo', 'db.sqlite') }); + await lookerStore.setCursors('prod-looker', { + dashboardsLastSyncedAt: null, + looksLastSyncedAt: null, + }); + await lookerStore.upsertConnectionMapping({ + lookerConnectionId: 'prod-looker', + lookerConnectionName: 'analytics', + kloConnectionId: 'warehouse', + source: 'cli', + }); + const lookerMappings = await lookerStore.readMappings('prod-looker'); + assert.equal(lookerMappings.length, 1); + assert.equal(lookerMappings[0].kloConnectionId, 'warehouse'); + process.stdout.write('Looker local runtime store verified\\n'); + + await mkdir(join(projectDir, 'knowledge', 'global'), { recursive: true }); + await writeFile( + join(projectDir, 'knowledge', 'global', 'revenue.md'), + [ + '---', + 'summary: Paid order value', + 'tags:', + ' - finance', + 'refs: []', + 'sl_refs: []', + 'usage_mode: auto', + '---', + '', + 'Revenue is the sum of paid order amounts.', + '', + ].join('\\n'), + 'utf-8', + ); + + const agentWikiSearch = await run('pnpm', [ + 'exec', + 'klo', + 'agent', + 'wiki', + 'search', + 'revenue', + '--json', + '--limit', + '5', + '--project-dir', + projectDir, + ]); + const agentWikiSearchJson = parseJsonResult('klo agent wiki search', agentWikiSearch); + assert.equal(agentWikiSearchJson.totalFound, 1); + assert.equal(agentWikiSearchJson.results[0].key, 'revenue'); + assert.equal(agentWikiSearchJson.results[0].path, 'knowledge/global/revenue.md'); + assert.equal(typeof agentWikiSearchJson.results[0].score, 'number'); + requireIncludes(agentWikiSearchJson.results[0].matchReasons, 'lexical', 'agent wiki search match reasons'); + process.stdout.write('klo agent wiki search hybrid metadata verified\\n'); + await access(join(projectDir, '.klo', 'db.sqlite')); + process.stdout.write('SQLite knowledge index: ' + join(projectDir, '.klo', 'db.sqlite') + '\\n'); + + const noSourceSearch = await run('pnpm', [ + 'exec', + 'klo', + 'agent', + 'sl', + 'list', + '--json', + '--connection-id', + 'warehouse', + '--query', + 'revenue', + '--project-dir', + projectDir, + ]); + const noSourceSearchError = parseJsonFailure('klo agent sl list no indexed sources', noSourceSearch); + assert.equal(noSourceSearchError.error.code, 'agent_sl_search_no_indexed_sources'); + assert.deepEqual(noSourceSearchError.error.nextSteps, [ + 'klo demo', + 'klo setup --project-dir ' + projectDir, + 'klo ingest ', + 'klo agent sl list --json --query "revenue" --project-dir ' + projectDir, + ]); + process.stdout.write('klo agent sl list no indexed sources guidance verified\\n'); + + const slYaml = [ + 'name: orders', + 'table: orders', + 'grain:', + ' - id', + 'columns:', + ' - name: id', + ' type: number', + ' - name: amount', + ' type: number', + 'measures:', + ' - name: order_count', + ' expr: count(*)', + 'joins: []', + '', + ].join('\\n'); + + await mkdir(join(projectDir, 'semantic-layer', 'warehouse'), { recursive: true }); + await writeFile(join(projectDir, 'semantic-layer', 'warehouse', 'orders.yaml'), slYaml, 'utf-8'); + + const agentSlSearch = await run('pnpm', [ + 'exec', + 'klo', + 'agent', + 'sl', + 'list', + '--json', + '--connection-id', + 'warehouse', + '--query', + 'orders', + '--project-dir', + projectDir, + ]); + const agentSlSearchJson = parseJsonResult('klo agent sl list', agentSlSearch); + assert.equal(agentSlSearchJson.totalSources, 1); + assert.equal(agentSlSearchJson.sources[0].connectionId, 'warehouse'); + assert.equal(agentSlSearchJson.sources[0].name, 'orders'); + assert.equal(typeof agentSlSearchJson.sources[0].score, 'number'); + requireIncludes(agentSlSearchJson.sources[0].matchReasons, 'lexical', 'agent sl search match reasons'); + process.stdout.write('klo agent sl list hybrid metadata verified\\n'); + + const slQueryFile = join(projectDir, 'sl-query.json'); + await writeFile(slQueryFile, '{"measures":["orders.order_count"],"dimensions":[]}\\n', 'utf-8'); + + const slQuery = await run('pnpm', ['exec', 'klo', 'agent', 'sl', 'query', + '--json', + '--connection-id', + 'warehouse', + '--query-file', + slQueryFile, + '--project-dir', + projectDir, + ]); + requireSuccess('klo agent sl query', slQuery); + requireOutput('klo agent sl query', slQuery, /"mode": "compile_only"/); + requireOutput('klo agent sl query', slQuery, /orders/); + + const sqliteSlQuery = await run('pnpm', ['exec', 'klo', 'agent', 'sl', 'query', + '--json', + '--connection-id', + 'warehouse', + '--query-file', + slQueryFile, + '--execute', + '--max-rows', + '100', + '--project-dir', + projectDir, + ]); + requireSuccess('klo agent sl query sqlite execute', sqliteSlQuery); + requireOutput('klo agent sl query sqlite execute', sqliteSlQuery, /"dialect": "sqlite"/); + requireOutput('klo agent sl query sqlite execute', sqliteSlQuery, /"mode": "executed"/); + requireOutput('klo agent sl query sqlite execute', sqliteSlQuery, /"driver": "sqlite"/); + requireOutput('klo agent sl query sqlite execute', sqliteSlQuery, /"rows": \\[\\s*\\[\\s*3\\s*\\]\\s*\\]/); + process.stdout.write('klo agent sl query sqlite execute verified\\n'); + + const structuralScan = await run('pnpm', ['exec', 'klo', 'dev', 'scan', 'warehouse', + '--project-dir', + projectDir, + ]); + requireSuccess('klo scan structural', structuralScan); + requireOutput('klo scan structural', structuralScan, /Status: done/); + requireOutput('klo scan structural', structuralScan, /Mode: structural/); + requireOutput('klo scan structural', structuralScan, /Needs attention\\s+None/); + const structuralScanRunId = getRunId(structuralScan.stdout); + + const scanStatus = await run('pnpm', ['exec', 'klo', 'dev', 'scan', 'status', + '--project-dir', + projectDir, + structuralScanRunId, + ]); + requireSuccess('klo scan status', scanStatus); + requireOutput('klo scan status', scanStatus, new RegExp('Run: ' + structuralScanRunId)); + requireOutput('klo scan status', scanStatus, /Status: done/); + requireOutput('klo scan status', scanStatus, /Mode: structural/); + + const scanReport = await run('pnpm', ['exec', 'klo', 'dev', 'scan', 'report', + '--project-dir', + projectDir, + '--json', + structuralScanRunId, + ]); + requireSuccess('klo scan report', scanReport); + const scanReportJson = JSON.parse(scanReport.stdout); + assert.equal(scanReportJson.mode, 'structural'); + assert.equal(scanReportJson.connectionId, 'warehouse'); + assert.equal(scanReportJson.manifestShardsWritten, 1); + assert.deepEqual(scanReportJson.artifactPaths.enrichmentArtifacts, []); + assert.deepEqual(scanReportJson.artifactPaths.manifestShards, ['semantic-layer/warehouse/_schema/public.yaml']); + await access(join(projectDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml')); + process.stdout.write('klo scan structural verified: ' + structuralScanRunId + '\\n'); + + const enrichedScan = await run('pnpm', ['exec', 'klo', 'dev', 'scan', 'warehouse', + '--project-dir', + projectDir, + '--mode', + 'enriched', + ]); + requireSuccess('klo scan enriched', enrichedScan); + requireOutput('klo scan enriched', enrichedScan, /Status: done/); + requireOutput('klo scan enriched', enrichedScan, /Mode: enriched/); + const enrichedScanRunId = getRunId(enrichedScan.stdout); + const enrichedScanReport = await run('pnpm', ['exec', 'klo', 'dev', 'scan', 'report', + '--project-dir', + projectDir, + '--json', + enrichedScanRunId, + ]); + requireSuccess('klo scan enriched report', enrichedScanReport); + const enrichedScanReportJson = JSON.parse(enrichedScanReport.stdout); + assert.equal(enrichedScanReportJson.mode, 'enriched'); + assert.ok(enrichedScanReportJson.artifactPaths.enrichmentArtifacts.length > 0); + assert.deepEqual(enrichedScanReportJson.artifactPaths.manifestShards, ['semantic-layer/warehouse/_schema/public.yaml']); + process.stdout.write('klo scan enriched verified: ' + enrichedScanRunId + '\\n'); + + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\\n', 'utf-8'); + + const ingestRun = await run('pnpm', ['exec', 'klo', 'dev', 'ingest', 'run', + '--project-dir', + projectDir, + '--connection-id', + 'warehouse', + '--adapter', + 'fake', + '--source-dir', + sourceDir, + ]); + assert.equal(ingestRun.code, 1, 'klo dev ingest run without an LLM provider must fail'); + assert.match( + ingestRun.stderr, + /klo dev ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway, or an injected agentRunner/, + ); + + await access(join(projectDir, '.klo', 'db.sqlite')); + process.stdout.write('klo dev ingest provider guard verified\\n'); + + await writeFile( + join(projectDir, 'klo.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: warehouse.db', + ' readonly: true', + 'storage:', + ' state: sqlite', + ' search: sqlite-fts5', + 'scan:', + ' enrichment:', + ' mode: deterministic', + 'llm:', + ' provider:', + ' backend: gateway', + ' gateway:', + ' api_key: env:AI_GATEWAY_API_KEY', + ' models:', + ' default: smoke/provider', + 'ingest:', + ' adapters:', + ' - fake', + ' - live-database', + '', + ].join('\\n'), + 'utf-8', + ); + + const daemonPort = await getAvailablePort(); + const semanticComputeUrl = 'http://127.0.0.1:' + daemonPort; + process.stdout.write('klo-daemon serve-http --host 127.0.0.1 --port ' + daemonPort + '\\n'); + const daemon = await startSemanticDaemon(daemonPort); + const lookerParser = createDaemonLookerTableIdentifierParser({ baseUrl: semanticComputeUrl }); + const parsedLookerTables = await lookerParser.parse([ + { key: 'orders', sql_table_name: 'orders', dialect: 'sqlite' }, + ]); + assert.equal(parsedLookerTables.orders.ok, true); + assert.equal(parsedLookerTables.orders.name, 'orders'); + assert.equal(parsedLookerTables.orders.canonical_table, 'orders'); + process.stdout.write('Looker daemon table identifier parser verified\\n'); + const client = new Client({ name: 'klo-artifact-smoke-client', version: '0.0.0' }); + process.stdout.write('klo serve --mcp stdio --semantic-compute-url ' + semanticComputeUrl + ' --execute-queries\\n'); + const transport = new StdioClientTransport({ + command: 'pnpm', + args: [ + 'exec', + 'klo', + 'serve', '--mcp', 'stdio', + '--project-dir', + projectDir, + '--user-id', + 'artifact-smoke-user', + '--semantic-compute-url', + semanticComputeUrl, + '--execute-queries', + '--memory-capture', '--memory-model', 'smoke/provider', + ], + cwd: process.cwd(), + stderr: 'pipe', + env: { + ...process.env, + AI_GATEWAY_API_KEY: process.env.AI_GATEWAY_API_KEY ?? 'artifact-smoke-token', + }, + }); + const mcpServerStderr = []; + transport.stderr?.on('data', (chunk) => mcpServerStderr.push(chunk)); + + try { + await client.connect(transport); + const tools = await client.listTools(); + requireToolNames(tools, [ + 'connection_list', + 'connection_test', + 'ingest_status', + 'ingest_trigger', + 'knowledge_read', + 'knowledge_search', + 'knowledge_write', + 'memory_capture', + 'memory_capture_status', + 'scan_list_artifacts', + 'scan_read_artifact', + 'scan_report', + 'scan_status', + 'scan_trigger', + 'sl_list_sources', + 'sl_query', + 'sl_read_source', + 'sl_validate', + 'sl_write_source', + ]); + const slValidateResult = structuredContent(await client.callTool({ + name: 'sl_validate', + arguments: { + connectionId: 'warehouse', + names: ['orders'], + }, + })); + assert.equal(slValidateResult.success, true); + assert.deepEqual(slValidateResult.errors, []); + const slQueryResult = structuredContent(await client.callTool({ + name: 'sl_query', + arguments: { + connectionId: 'warehouse', + measures: ['orders.order_count'], + limit: 5, + }, + })); + assert.equal(slQueryResult.connectionId, 'warehouse'); + assert.equal(slQueryResult.dialect, 'sqlite'); + assert.match(slQueryResult.sql, /orders/); + assert.deepEqual(slQueryResult.headers, ['order_count']); + assert.deepEqual(slQueryResult.rows, [[3]]); + assert.equal(slQueryResult.totalRows, 1); + assert.equal(slQueryResult.plan.execution.mode, 'executed'); + assert.equal(slQueryResult.plan.execution.driver, 'sqlite'); + + const connectionTest = structuredContent(await client.callTool({ + name: 'connection_test', + arguments: { + connectionId: 'warehouse', + }, + })); + assert.equal(connectionTest.id, 'warehouse'); + assert.equal(connectionTest.ok, true); + + const mcpScanTrigger = structuredContent(await client.callTool({ + name: 'scan_trigger', + arguments: { + connectionId: 'warehouse', + mode: 'structural', + }, + })); + assert.equal(mcpScanTrigger.connectionId, 'warehouse'); + assert.equal(mcpScanTrigger.report.mode, 'structural'); + assert.equal(mcpScanTrigger.report.manifestShardsWritten, 1); + + const mcpScanStatus = structuredContent(await client.callTool({ + name: 'scan_status', + arguments: { + runId: mcpScanTrigger.runId, + }, + })); + assert.equal(mcpScanStatus.runId, mcpScanTrigger.runId); + assert.equal(mcpScanStatus.status, 'done'); + + const mcpScanReport = structuredContent(await client.callTool({ + name: 'scan_report', + arguments: { + runId: mcpScanTrigger.runId, + }, + })); + assert.equal(mcpScanReport.runId, mcpScanTrigger.runId); + assert.deepEqual(mcpScanReport.artifactPaths.manifestShards, ['semantic-layer/warehouse/_schema/public.yaml']); + + const mcpScanArtifacts = structuredContent(await client.callTool({ + name: 'scan_list_artifacts', + arguments: { + runId: mcpScanTrigger.runId, + }, + })); + const manifestArtifact = mcpScanArtifacts.artifacts.find((artifact) => artifact.type === 'manifest_shard'); + assert.ok(manifestArtifact, 'scan_list_artifacts did not include a manifest shard'); + assert.equal(manifestArtifact.path, 'semantic-layer/warehouse/_schema/public.yaml'); + + const mcpManifestRead = structuredContent(await client.callTool({ + name: 'scan_read_artifact', + arguments: { + runId: mcpScanTrigger.runId, + path: manifestArtifact.path, + }, + })); + assert.equal(mcpManifestRead.path, 'semantic-layer/warehouse/_schema/public.yaml'); + assert.equal(mcpManifestRead.type, 'manifest_shard'); + assert.match(mcpManifestRead.content, /orders:/); + } catch (error) { + const stderr = Buffer.concat(mcpServerStderr).toString('utf8'); + if (stderr) { + error.message += '\\nklo serve stderr:\\n' + stderr; + } + throw error; + } finally { + await client.close(); + await stopSemanticDaemon(daemon); + } +} finally { + await rm(root, { recursive: true, force: true }); +} +`; +} + +export function npmDemoSmokeSource() { + return ` +import assert from 'node:assert/strict'; +import { execFile } from 'node:child_process'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; + +const execFileAsync = promisify(execFile); + +async function run(command, args, options = {}) { + process.stdout.write('$ ' + command + ' ' + args.join(' ') + '\\n'); + try { + const result = await execFileAsync(command, args, { + cwd: options.cwd, + env: options.env ?? process.env, + encoding: 'utf8', + timeout: 45_000, + }); + return { code: 0, stdout: result.stdout, stderr: result.stderr }; + } catch (error) { + return { + code: typeof error.code === 'number' ? error.code : 1, + stdout: error.stdout ?? '', + stderr: error.stderr ?? error.message, + }; + } +} + +function requireSuccess(label, result) { + assert.equal( + result.code, + 0, + label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr, + ); +} + +function requireStdout(label, result, pattern) { + assert.match(result.stdout, pattern, label + ' stdout did not match ' + pattern); +} + +const root = await mkdtemp(join(tmpdir(), 'klo-packed-demo-smoke-')); +try { + const projectDir = join(root, 'demo-project'); + + const help = await run('pnpm', ['exec', 'klo', '--help']); + requireSuccess('klo --help', help); + requireStdout('klo --help', help, /Usage: klo/); + requireStdout('klo --help', help, /setup/); + + const seeded = await run( + 'pnpm', + ['exec', 'klo', 'setup', 'demo', '--project-dir', projectDir, '--no-input', '--plain'], + ); + requireSuccess('klo setup demo seeded', seeded); + requireStdout('klo setup demo seeded', seeded, /Mode: seeded/); + requireStdout('klo setup demo seeded', seeded, /Source: packaged demo project/); + requireStdout('klo setup demo seeded', seeded, /LLM calls: none/); + requireStdout('klo setup demo seeded', seeded, /klo serve --mcp stdio/); + assert.doesNotMatch(seeded.stdout, new RegExp(['--mode', 'deterministic'].join(' '))); + assert.doesNotMatch(seeded.stdout, /KLO memory flow/); + assert.equal(seeded.stderr, '', 'klo setup demo seeded wrote unexpected stderr'); + + const demoWikiSearch = await run('pnpm', [ + 'exec', + 'klo', + 'agent', + 'wiki', + 'search', + 'ARR contract', + '--json', + '--limit', + '5', + '--project-dir', + projectDir, + ]); + requireSuccess('klo seeded demo agent wiki search', demoWikiSearch); + const demoWikiSearchJson = JSON.parse(demoWikiSearch.stdout); + assert.ok(demoWikiSearchJson.totalFound > 0, 'seeded demo wiki search should find results'); + assert.ok( + demoWikiSearchJson.results.some((result) => Array.isArray(result.matchReasons) && result.matchReasons.length > 0), + 'seeded demo wiki search should expose match reasons', + ); + process.stdout.write('klo seeded demo agent wiki search verified\\n'); + + const demoSlSearch = await run('pnpm', [ + 'exec', + 'klo', + 'agent', + 'sl', + 'list', + '--json', + '--query', + 'ARR', + '--project-dir', + projectDir, + ]); + requireSuccess('klo seeded demo agent sl search', demoSlSearch); + const demoSlSearchJson = JSON.parse(demoSlSearch.stdout); + assert.ok(demoSlSearchJson.totalSources > 0, 'seeded demo semantic-layer search should find sources'); + assert.ok( + demoSlSearchJson.sources.some((source) => Array.isArray(source.matchReasons) && source.matchReasons.length > 0), + 'seeded demo semantic-layer search should expose match reasons', + ); + process.stdout.write('klo seeded demo agent sl search verified\\n'); + + const doctor = await run('pnpm', ['exec', 'klo', 'dev', 'doctor', 'setup', '--no-input']); + assert.ok([0, 1].includes(doctor.code), 'klo dev doctor setup exit code must be 0 or 1'); + requireStdout('klo dev doctor setup', doctor, /KLO setup doctor/); + requireStdout('klo dev doctor setup', doctor, /Node 22\\+/); + assert.equal(doctor.stderr, '', 'klo dev doctor setup wrote unexpected stderr'); +} finally { + await rm(root, { recursive: true, force: true }); +} +`; +} + +export function pythonVerifySource() { + return ` +import importlib.metadata +import klo_daemon +import semantic_layer + +assert importlib.metadata.version("klo-sl") == "0.1.0" +assert importlib.metadata.version("klo-daemon") == "0.1.0" +assert semantic_layer is not None +assert klo_daemon.PACKAGE_NAME == "klo-daemon" +`; +} + +function pythonExecutable(projectDir) { + if (process.platform === 'win32') { + return join(projectDir, '.venv', 'Scripts', 'python.exe'); + } + return join(projectDir, '.venv', 'bin', 'python'); +} + +export function npmSmokePythonEnv(projectDir, baseEnv = process.env) { + const binDir = process.platform === 'win32' ? join(projectDir, '.venv', 'Scripts') : join(projectDir, '.venv', 'bin'); + const existingPath = baseEnv.PATH ?? ''; + + return Object.assign({}, baseEnv, { + PATH: existingPath ? `${binDir}${delimiter}${existingPath}` : binDir, + }); +} + +async function buildArtifacts(layout) { + await rm(layout.artifactDir, { recursive: true, force: true }); + await mkdir(layout.npmDir, { recursive: true }); + await mkdir(layout.pythonDir, { recursive: true }); + + for (const command of buildArtifactCommands(layout)) { + await runCommand(command.command, command.args, { cwd: command.cwd }); + } + + for (const packageInfo of NPM_ARTIFACT_PACKAGES) { + await assertPathExists(layout.npmTarballs[packageInfo.name], `${packageInfo.name} tarball`); + } + await findPythonArtifacts(layout.pythonDir); + await writeArtifactManifest(layout); + await assertPathExists(artifactManifestPath(layout), 'artifact manifest'); +} + +async function verifyNpmArtifacts(layout, tmpRoot) { + for (const packageInfo of NPM_ARTIFACT_PACKAGES) { + await assertPathExists(layout.npmTarballs[packageInfo.name], `${packageInfo.name} tarball`); + } + const pythonArtifacts = await findPythonArtifacts(layout.pythonDir); + + const projectDir = join(tmpRoot, 'npm-clean-install'); + const python = pythonExecutable(projectDir); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'package.json'), + `${JSON.stringify(npmSmokePackageJson(layout), null, 2)}\n`, + ); + await writeFile(join(projectDir, 'verify-npm.mjs'), npmVerifySource()); + await writeFile(join(projectDir, 'verify-installed-cli.mjs'), npmRuntimeSmokeSource()); + await writeFile(join(projectDir, 'verify-installed-demo.mjs'), npmDemoSmokeSource()); + + await runCommand('pnpm', ['install'], { cwd: projectDir }); + await runCommand('pnpm', ['rebuild', 'better-sqlite3'], { cwd: projectDir }); + await runCommand('uv', ['venv', '.venv'], { cwd: projectDir }); + await runCommand('uv', pythonArtifactInstallArgs(python, pythonArtifacts), { + cwd: projectDir, + }); + await runCommand('node', ['verify-npm.mjs'], { cwd: projectDir }); + await runCommand('pnpm', ['exec', 'klo', '--version'], { cwd: projectDir }); + await runCommand('node', ['verify-installed-cli.mjs'], { + cwd: projectDir, + env: npmSmokePythonEnv(projectDir), + }); + await runCommand('node', ['verify-installed-demo.mjs'], { + cwd: projectDir, + env: npmSmokePythonEnv(projectDir), + }); +} + +async function verifyNpmDemoArtifacts(layout, tmpRoot) { + for (const packageInfo of NPM_ARTIFACT_PACKAGES) { + await assertPathExists(layout.npmTarballs[packageInfo.name], `${packageInfo.name} tarball`); + } + + const projectDir = join(tmpRoot, 'npm-demo-clean-install'); + await mkdir(projectDir, { recursive: true }); + await writeFile(join(projectDir, 'package.json'), `${JSON.stringify(npmSmokePackageJson(layout), null, 2)}\n`); + await writeFile(join(projectDir, 'verify-installed-demo.mjs'), npmDemoSmokeSource()); + + await runCommand('pnpm', ['install'], { cwd: projectDir }); + await runCommand('node', ['verify-installed-demo.mjs'], { cwd: projectDir }); +} + +async function verifyPythonArtifacts(layout, tmpRoot) { + const pythonArtifacts = await findPythonArtifacts(layout.pythonDir); + + const projectDir = join(tmpRoot, 'python-clean-install'); + await mkdir(projectDir, { recursive: true }); + const python = pythonExecutable(projectDir); + await writeFile(join(projectDir, 'verify_python.py'), pythonVerifySource()); + + await runCommand('uv', ['venv', '.venv'], { cwd: projectDir }); + await runCommand('uv', pythonArtifactInstallArgs(python, pythonArtifacts), { + cwd: projectDir, + }); + await runCommand(python, ['verify_python.py'], { cwd: projectDir }); + await runCommand(python, ['-m', 'klo_daemon', 'semantic-validate'], { + cwd: projectDir, + input: `${JSON.stringify({ sources: [ordersSource], dialect: 'postgres' })}\n`, + }); +} + +async function verifyArtifacts(layout) { + await verifyArtifactManifest(layout); + + const tmpRoot = await mkdtemp(join(tmpdir(), 'klo-artifacts-')); + try { + await verifyNpmArtifacts(layout, tmpRoot); + await verifyPythonArtifacts(layout, tmpRoot); + } finally { + await rm(tmpRoot, { recursive: true, force: true }); + } +} + +async function verifyDemoArtifacts(layout) { + await verifyArtifactManifest(layout); + + const tmpRoot = await mkdtemp(join(tmpdir(), 'klo-demo-artifacts-')); + try { + await verifyNpmDemoArtifacts(layout, tmpRoot); + } finally { + await rm(tmpRoot, { recursive: true, force: true }); + } +} + +async function main() { + const command = process.argv[2] ?? 'check'; + const layout = packageArtifactLayout(); + + if (command === 'build') { + await buildArtifacts(layout); + return; + } + if (command === 'verify') { + await verifyArtifacts(layout); + return; + } + if (command === 'verify-demo') { + await verifyDemoArtifacts(layout); + return; + } + if (command === 'verify-manifest') { + await verifyArtifactManifest(layout); + return; + } + if (command === 'check') { + await buildArtifacts(layout); + await verifyArtifacts(layout); + return; + } + + throw new Error(`Unknown package artifact command: ${command}`); +} + +if (import.meta.url === pathToFileURL(process.argv[1] ?? '').href) { + try { + await main(); + } catch (error) { + process.stderr.write(`${error instanceof Error ? error.stack : String(error)}\n`); + process.exitCode = 1; + } +} diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs new file mode 100644 index 00000000..58d308b6 --- /dev/null +++ b/scripts/package-artifacts.test.mjs @@ -0,0 +1,655 @@ +import assert from 'node:assert/strict'; +import { createHash } from 'node:crypto'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, it } from 'node:test'; + +import { + artifactManifestPath, + buildArtifactCommands, + findPythonArtifacts, + NPM_ARTIFACT_PACKAGES, + npmDemoSmokeSource, + npmRuntimeSmokeSource, + npmSmokePackageJson, + npmSmokePythonEnv, + npmVerifySource, + packageArtifactLayout, + packageReleaseMetadata, + pythonArtifactInstallArgs, + pythonVerifySource, + verifyArtifactManifest, + writeArtifactManifest, +} from './package-artifacts.mjs'; + +const STALE_METABASE_UNSUPPORTED = ['Standalone Metabase scheduled fetch', 'is intentionally unsupported'].join(' '); + +async function writeJson(path, value) { + await writeFile(path, `${JSON.stringify(value, null, 2)}\n`); +} + +const CONNECTOR_PACKAGE_NAMES = [ + '@klo/connector-bigquery', + '@klo/connector-clickhouse', + '@klo/connector-mysql', + '@klo/connector-postgres', + '@klo/connector-posthog', + '@klo/connector-snowflake', + '@klo/connector-sqlite', + '@klo/connector-sqlserver', +]; + +function packageRootForName(packageName) { + return `packages/${packageName.replace('@klo/', '')}`; +} + +function expectedNpmArtifactPath(packageName) { + return `npm/${packageName.replace('@klo/', 'klo-')}-0.0.0-private.tgz`; +} + +async function writeReleaseMetadataInputs(root) { + const npmPackages = ['@klo/context', '@klo/llm', ...CONNECTOR_PACKAGE_NAMES, '@klo/cli']; + + for (const packageName of npmPackages) { + const packageRoot = packageName === '@klo/context' ? 'packages/context' : packageRootForName(packageName); + await mkdir(join(root, packageRoot), { recursive: true }); + await writeJson(join(root, packageRoot, 'package.json'), { + name: packageName, + version: '0.0.0-private', + private: true, + }); + } + + await mkdir(join(root, 'python', 'klo-sl'), { recursive: true }); + await mkdir(join(root, 'python', 'klo-daemon'), { recursive: true }); + await writeFile( + join(root, 'python', 'klo-sl', 'pyproject.toml'), + ['[project]', 'name = "klo-sl"', 'version = "0.1.0"', ''].join('\n'), + ); + await writeFile( + join(root, 'python', 'klo-daemon', 'pyproject.toml'), + ['[project]', 'name = "klo-daemon"', 'version = "0.1.0"', ''].join('\n'), + ); +} + +async function writeUploadableArtifactFixtures(layout) { + await mkdir(layout.npmDir, { recursive: true }); + await mkdir(layout.pythonDir, { recursive: true }); + + const fileContents = new Map([ + ...NPM_ARTIFACT_PACKAGES.map((packageInfo) => [ + layout.npmTarballs[packageInfo.name], + `${packageInfo.name}-tarball`, + ]), + [join(layout.pythonDir, 'klo_sl-0.1.0-py3-none-any.whl'), 'klo-sl-wheel'], + [join(layout.pythonDir, 'klo_sl-0.1.0.tar.gz'), 'klo-sl-sdist'], + [join(layout.pythonDir, 'klo_daemon-0.1.0-py3-none-any.whl'), 'klo-daemon-wheel'], + [join(layout.pythonDir, 'klo_daemon-0.1.0.tar.gz'), 'klo-daemon-sdist'], + ]); + + for (const [path, contents] of fileContents) { + await writeFile(path, contents); + } +} + +describe('packageArtifactLayout', () => { + it('uses stable artifact paths under klo/dist/artifacts', () => { + const layout = packageArtifactLayout('/repo/klo'); + + assert.equal(layout.artifactDir, '/repo/klo/dist/artifacts'); + assert.equal(layout.npmDir, '/repo/klo/dist/artifacts/npm'); + assert.equal(layout.pythonDir, '/repo/klo/dist/artifacts/python'); + assert.equal(layout.contextTarball, '/repo/klo/dist/artifacts/npm/klo-context-0.0.0-private.tgz'); + assert.equal(layout.cliTarball, '/repo/klo/dist/artifacts/npm/klo-cli-0.0.0-private.tgz'); + assert.equal( + layout.connectorTarballs['@klo/connector-sqlite'], + '/repo/klo/dist/artifacts/npm/klo-connector-sqlite-0.0.0-private.tgz', + ); + assert.equal( + layout.connectorTarballs['@klo/connector-postgres'], + '/repo/klo/dist/artifacts/npm/klo-connector-postgres-0.0.0-private.tgz', + ); + assert.deepEqual( + Object.keys(layout.npmTarballs), + NPM_ARTIFACT_PACKAGES.map((packageInfo) => packageInfo.name), + ); + }); +}); + +describe('buildArtifactCommands', () => { + it('builds all TypeScript packages before packing npm artifacts and builds both Python packages', () => { + const layout = packageArtifactLayout('/repo/klo'); + const commands = buildArtifactCommands(layout); + + assert.deepEqual( + commands.slice(0, NPM_ARTIFACT_PACKAGES.length).map((command) => [command.command, command.args]), + NPM_ARTIFACT_PACKAGES.map((packageInfo) => ['pnpm', ['--filter', packageInfo.name, 'run', 'build']]), + ); + assert.deepEqual( + commands + .slice(NPM_ARTIFACT_PACKAGES.length, NPM_ARTIFACT_PACKAGES.length * 2) + .map((command) => [command.command, command.args]), + NPM_ARTIFACT_PACKAGES.map((packageInfo) => [ + 'pnpm', + ['--filter', packageInfo.name, 'pack', '--out', layout.npmTarballs[packageInfo.name]], + ]), + ); + assert.deepEqual( + commands.slice(NPM_ARTIFACT_PACKAGES.length * 2).map((command) => [command.command, command.args]), + [ + ['uv', ['build', '--package', 'klo-sl', '--out-dir', '/repo/klo/dist/artifacts/python']], + ['uv', ['build', '--package', 'klo-daemon', '--out-dir', '/repo/klo/dist/artifacts/python']], + ], + ); + }); +}); + +describe('packageReleaseMetadata', () => { + it('reads package identities and versions from package manifests', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-metadata-test-')); + try { + await writeReleaseMetadataInputs(root); + + assert.deepEqual(await packageReleaseMetadata(root), [ + ...NPM_ARTIFACT_PACKAGES.map((packageInfo) => ({ + ecosystem: 'npm', + packageName: packageInfo.name, + packageRoot: packageInfo.packageRoot, + packageVersion: '0.0.0-private', + private: true, + releaseMode: 'ci-artifact-only', + })), + { + ecosystem: 'python', + packageName: 'klo-sl', + packageRoot: 'python/klo-sl', + packageVersion: '0.1.0', + private: false, + releaseMode: 'ci-artifact-only', + }, + { + ecosystem: 'python', + packageName: 'klo-daemon', + packageRoot: 'python/klo-daemon', + packageVersion: '0.1.0', + private: false, + releaseMode: 'ci-artifact-only', + }, + ]); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); +}); + +describe('findPythonArtifacts', () => { + it('finds one wheel and one source distribution for each Python package', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-artifacts-test-')); + try { + await writeFile(join(root, 'klo_sl-0.1.0-py3-none-any.whl'), ''); + await writeFile(join(root, 'klo_sl-0.1.0.tar.gz'), ''); + await writeFile(join(root, 'klo_daemon-0.1.0-py3-none-any.whl'), ''); + await writeFile(join(root, 'klo_daemon-0.1.0.tar.gz'), ''); + + assert.deepEqual(await findPythonArtifacts(root), { + kloSlWheel: join(root, 'klo_sl-0.1.0-py3-none-any.whl'), + kloSlSdist: join(root, 'klo_sl-0.1.0.tar.gz'), + kloDaemonWheel: join(root, 'klo_daemon-0.1.0-py3-none-any.whl'), + kloDaemonSdist: join(root, 'klo_daemon-0.1.0.tar.gz'), + }); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('throws when a required Python artifact is missing', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-artifacts-test-')); + try { + await assert.rejects(() => findPythonArtifacts(root), /Missing Python artifact: klo-sl wheel/); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); +}); + +describe('artifact manifest', () => { + it('writes release metadata, source revision, checksums, and byte counts for every uploadable artifact', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-artifacts-manifest-test-')); + const layout = packageArtifactLayout(root); + try { + await writeReleaseMetadataInputs(root); + await writeUploadableArtifactFixtures(layout); + + const manifest = await writeArtifactManifest(layout, new Date('2026-04-28T12:00:00.000Z'), { + sourceRevision: 'abc123', + }); + + assert.equal(artifactManifestPath(layout), join(root, 'dist', 'artifacts', 'manifest.json')); + assert.equal(manifest.schemaVersion, 2); + assert.equal(manifest.generatedAt, '2026-04-28T12:00:00.000Z'); + assert.equal(manifest.sourceRevision, 'abc123'); + assert.deepEqual( + manifest.packages.filter((entry) => entry.ecosystem === 'npm'), + NPM_ARTIFACT_PACKAGES.map((packageInfo) => ({ + ecosystem: 'npm', + packageName: packageInfo.name, + packageRoot: packageInfo.packageRoot, + packageVersion: '0.0.0-private', + private: true, + releaseMode: 'ci-artifact-only', + })), + ); + assert.deepEqual( + manifest.packages.filter((entry) => entry.ecosystem === 'python'), + [ + { + ecosystem: 'python', + packageName: 'klo-sl', + packageRoot: 'python/klo-sl', + packageVersion: '0.1.0', + private: false, + releaseMode: 'ci-artifact-only', + }, + { + ecosystem: 'python', + packageName: 'klo-daemon', + packageRoot: 'python/klo-daemon', + packageVersion: '0.1.0', + private: false, + releaseMode: 'ci-artifact-only', + }, + ], + ); + assert.deepEqual( + manifest.files + .filter((file) => file.ecosystem === 'npm') + .map((file) => ({ + artifactKind: file.artifactKind, + ecosystem: file.ecosystem, + packageName: file.packageName, + packageVersion: file.packageVersion, + path: file.path, + })) + .sort((left, right) => left.packageName.localeCompare(right.packageName)), + NPM_ARTIFACT_PACKAGES.map((packageInfo) => ({ + artifactKind: 'tarball', + ecosystem: 'npm', + packageName: packageInfo.name, + packageVersion: '0.0.0-private', + path: expectedNpmArtifactPath(packageInfo.name), + })).sort((left, right) => left.packageName.localeCompare(right.packageName)), + ); + assert.deepEqual( + manifest.files + .filter((file) => file.ecosystem === 'python') + .map((file) => ({ + artifactKind: file.artifactKind, + ecosystem: file.ecosystem, + packageName: file.packageName, + packageVersion: file.packageVersion, + path: file.path, + })), + [ + { + artifactKind: 'wheel', + ecosystem: 'python', + packageName: 'klo-daemon', + packageVersion: '0.1.0', + path: 'python/klo_daemon-0.1.0-py3-none-any.whl', + }, + { + artifactKind: 'sdist', + ecosystem: 'python', + packageName: 'klo-daemon', + packageVersion: '0.1.0', + path: 'python/klo_daemon-0.1.0.tar.gz', + }, + { + artifactKind: 'wheel', + ecosystem: 'python', + packageName: 'klo-sl', + packageVersion: '0.1.0', + path: 'python/klo_sl-0.1.0-py3-none-any.whl', + }, + { + artifactKind: 'sdist', + ecosystem: 'python', + packageName: 'klo-sl', + packageVersion: '0.1.0', + path: 'python/klo_sl-0.1.0.tar.gz', + }, + ], + ); + + const sqliteEntry = manifest.files.find((file) => file.path === 'npm/klo-connector-sqlite-0.0.0-private.tgz'); + assert.ok(sqliteEntry); + assert.equal(sqliteEntry.bytes, Buffer.byteLength('@klo/connector-sqlite-tarball')); + assert.equal(sqliteEntry.sha256, createHash('sha256').update('@klo/connector-sqlite-tarball').digest('hex')); + + const writtenManifest = JSON.parse(await readFile(artifactManifestPath(layout), 'utf-8')); + assert.deepEqual(writtenManifest, manifest); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); +}); + +describe('verifyArtifactManifest', () => { + it('accepts a schema version 2 manifest that matches the artifact directory', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-artifacts-verify-manifest-test-')); + const layout = packageArtifactLayout(root); + try { + await writeReleaseMetadataInputs(root); + await writeUploadableArtifactFixtures(layout); + await writeArtifactManifest(layout, new Date('2026-04-28T12:00:00.000Z'), { + sourceRevision: 'abc123', + }); + + const manifest = await verifyArtifactManifest(layout, { + expectedSourceRevision: 'abc123', + }); + + assert.equal(manifest.schemaVersion, 2); + assert.equal(manifest.sourceRevision, 'abc123'); + assert.equal(manifest.files.length, NPM_ARTIFACT_PACKAGES.length + 4); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects a manifest when a file checksum has drifted', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-artifacts-checksum-drift-test-')); + const layout = packageArtifactLayout(root); + try { + await writeReleaseMetadataInputs(root); + await writeUploadableArtifactFixtures(layout); + await writeArtifactManifest(layout, new Date('2026-04-28T12:00:00.000Z'), { + sourceRevision: 'abc123', + }); + await writeFile(layout.contextTarball, 'changed-context-tarball'); + + await assert.rejects( + () => verifyArtifactManifest(layout), + /Artifact manifest files do not match artifact contents/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects a manifest with an unsafe artifact path', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-artifacts-path-test-')); + const layout = packageArtifactLayout(root); + try { + await writeReleaseMetadataInputs(root); + await writeUploadableArtifactFixtures(layout); + const manifest = await writeArtifactManifest(layout, new Date('2026-04-28T12:00:00.000Z'), { + sourceRevision: 'abc123', + }); + manifest.files[0].path = '../outside.tgz'; + await writeFile(artifactManifestPath(layout), `${JSON.stringify(manifest, null, 2)}\n`); + + await assert.rejects(() => verifyArtifactManifest(layout), /Unsafe artifact manifest path: \.\.\/outside\.tgz/); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects a manifest from the wrong source revision when one is required', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-artifacts-revision-test-')); + const layout = packageArtifactLayout(root); + try { + await writeReleaseMetadataInputs(root); + await writeUploadableArtifactFixtures(layout); + await writeArtifactManifest(layout, new Date('2026-04-28T12:00:00.000Z'), { + sourceRevision: 'abc123', + }); + + await assert.rejects( + () => + verifyArtifactManifest(layout, { + expectedSourceRevision: 'def456', + }), + /Artifact manifest sourceRevision mismatch: expected def456, got abc123/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); +}); + +describe('pythonArtifactInstallArgs', () => { + it('installs the built Python wheels by artifact path', () => { + const args = pythonArtifactInstallArgs('/tmp/smoke/.venv/bin/python', { + kloSlWheel: '/repo/klo/dist/artifacts/python/klo_sl-0.1.0-py3-none-any.whl', + kloSlSdist: '/repo/klo/dist/artifacts/python/klo_sl-0.1.0.tar.gz', + kloDaemonWheel: '/repo/klo/dist/artifacts/python/klo_daemon-0.1.0-py3-none-any.whl', + kloDaemonSdist: '/repo/klo/dist/artifacts/python/klo_daemon-0.1.0.tar.gz', + }); + + assert.deepEqual(args, [ + 'pip', + 'install', + '--python', + '/tmp/smoke/.venv/bin/python', + '/repo/klo/dist/artifacts/python/klo_sl-0.1.0-py3-none-any.whl', + '/repo/klo/dist/artifacts/python/klo_daemon-0.1.0-py3-none-any.whl', + ]); + assert.equal(args.includes('klo-daemon'), false); + assert.equal(args.includes('--find-links'), false); + }); +}); + +describe('npmSmokePythonEnv', () => { + it('prepends the npm smoke virtualenv bin directory to PATH', () => { + const env = npmSmokePythonEnv('/tmp/klo-npm-smoke', { PATH: '/usr/bin' }); + + assert.match(env.PATH, /^\/tmp\/klo-npm-smoke\/\.venv\/(bin|Scripts)/); + assert.match(env.PATH, /\/usr\/bin$/); + }); +}); + +describe('verification snippets', () => { + it('pins smoke dependencies and connector packages to clean-install-safe artifacts', () => { + const layout = packageArtifactLayout('/repo/klo'); + const packageJson = npmSmokePackageJson(layout); + + for (const packageInfo of NPM_ARTIFACT_PACKAGES) { + assert.equal(packageJson.dependencies[packageInfo.name], `file:${layout.npmTarballs[packageInfo.name]}`); + assert.equal(packageJson.pnpm.overrides[packageInfo.name], `file:${layout.npmTarballs[packageInfo.name]}`); + } + assert.equal(packageJson.dependencies['@modelcontextprotocol/sdk'], '^1.27.1'); + assert.deepEqual(packageJson.pnpm.onlyBuiltDependencies, ['better-sqlite3']); + }); + + it('exposes manifest verification as a package artifact command', async () => { + const source = await readFile(new URL('./package-artifacts.mjs', import.meta.url), 'utf8'); + const packageJson = JSON.parse(await readFile(new URL('../package.json', import.meta.url), 'utf8')); + + assert.match(source, /if \(command === 'verify-manifest'\)/); + assert.match(source, /await verifyArtifactManifest\(layout\)/); + assert.equal(packageJson.scripts['artifacts:verify-demo'], 'node scripts/package-artifacts.mjs verify-demo'); + assert.equal(packageJson.scripts['artifacts:verify-manifest'], 'node scripts/package-artifacts.mjs verify-manifest'); + }); + + it('verifies installed dbt extraction exports from @klo/context/ingest', () => { + const source = npmVerifySource(); + + assert.match(source, /const ingest = await import\('@klo\/context\/ingest'\);/); + assert.match(source, /const dbtExtractionExports = \[/); + assert.match(source, /throw new Error\('Missing dbt extraction export: ' \+ exportName\);/); + + for (const exportName of [ + 'parseMetricflowFiles', + 'parseMetricflowPullConfig', + 'importMetricflowSemanticModels', + 'parseDbtSchemaFiles', + 'toDescriptionUpdates', + 'toRelationshipUpdates', + 'mergeSemanticModelTables', + 'loadProjectInfo', + 'loadDbtSchemaFiles', + ]) { + assert.match(source, new RegExp(`\\['${exportName}', ingest\\.${exportName}\\]`)); + } + }); + + it('asserts the public npm and connector entry points that clean installs must expose', () => { + const source = npmVerifySource(); + + assert.match(source, /@klo\/context/); + assert.match(source, /@klo\/context\/project/); + assert.match(source, /@klo\/context\/mcp/); + assert.match(source, /@klo\/context\/memory/); + assert.match(source, /@klo\/context\/daemon/); + assert.match(source, /@klo\/cli/); + assert.match(source, /@klo\/llm/); + assert.match(source, /createKloLlmProvider/); + assert.match(source, /KloMessageBuilder/); + assert.match(source, /createKloEmbeddingProvider/); + assert.doesNotMatch(source, /createGatewayLlmProvider/); + assert.match(source, /createLocalProjectMemoryCapture/); + for (const packageName of CONNECTOR_PACKAGE_NAMES) { + assert.match(source, new RegExp(packageName.replace('/', '\\/'))); + } + assert.match(source, /KloSqliteScanConnector/); + assert.match(source, /KloPostgresScanConnector/); + assert.match(source, /KloBigQueryScanConnector/); + assert.match(source, /KloSnowflakeScanConnector/); + assert.match(source, /KloPostHogScanConnector/); + }); + + it('asserts installed hybrid search exports and CLI smoke coverage', () => { + const verifySource = npmVerifySource(); + const runtimeSource = npmRuntimeSmokeSource(); + const demoSource = npmDemoSmokeSource(); + + assert.match(verifySource, /const search = await import\('@klo\/context\/search'\);/); + assert.match(verifySource, /HybridSearchCore/); + assert.match(verifySource, /assertSearchBackendConformanceCase/); + assert.match(verifySource, /assertSearchBackendCapabilities/); + + assert.match(runtimeSource, /klo agent wiki search hybrid metadata verified/); + assert.match(runtimeSource, /klo agent sl list hybrid metadata verified/); + assert.match(runtimeSource, /agent_sl_search_missing_project/); + assert.match(runtimeSource, /agent_sl_search_no_connections/); + assert.match(runtimeSource, /agent_sl_search_no_indexed_sources/); + + assert.match(demoSource, /klo seeded demo agent wiki search verified/); + assert.match(demoSource, /klo seeded demo agent sl search verified/); + }); + + it('runs installed CLI commands and MCP through an installed daemon HTTP server', () => { + const source = npmRuntimeSmokeSource(); + + assert.match(source, /@modelcontextprotocol\/sdk\/client\/index\.js/); + assert.match(source, /@modelcontextprotocol\/sdk\/client\/stdio\.js/); + assert.match(source, /spawn\(command, args/); + assert.match(source, /createServer/); + assert.match(source, /request as httpRequest/); + assert.match(source, /getAvailablePort/); + assert.match(source, /startSemanticDaemon/); + assert.match(source, /waitForHttpHealth/); + assert.match(source, /stopSemanticDaemon/); + assert.match(source, /'klo-daemon'/); + assert.match(source, /'serve-http'/); + assert.match(source, /'--host'/); + assert.match(source, /'127\.0\.0\.1'/); + assert.match(source, /'--port'/); + assert.match(source, /\/health/); + assert.match(source, /--semantic-compute-url/); + assert.match(source, /createDaemonLookerTableIdentifierParser/); + assert.match(source, /LocalLookerRuntimeStore/); + assert.match(source, /Looker daemon table identifier parser verified/); + assert.match(source, /Looker local runtime store verified/); + assert.match(source, /semanticComputeUrl/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'klo',\s*'setup'/); + assert.match(source, /knowledge', 'global', 'revenue\.md'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'klo',\s*'agent',\s*'wiki',\s*'search'/); + assert.match(source, /semantic-layer', 'warehouse', 'orders\.yaml'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'klo',\s*'agent',\s*'sl',\s*'list'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'klo',\s*'agent',\s*'sl',\s*'query'/); + assert.match(source, /orders\.order_count/); + assert.match(source, /sqlite3/); + assert.match(source, /driver: sqlite/); + assert.match(source, /path: warehouse\.db/); + assert.match(source, /live-database/); + assert.match(source, /'--execute'/); + assert.match(source, /'--execute-queries'/); + assert.match(source, /slValidateResult\.success, true/); + assert.match(source, /slQueryResult\.dialect, 'sqlite'/); + assert.match(source, /slQueryResult\.plan\.execution\.driver, 'sqlite'/); + assert.match(source, /"mode": "compile_only"/); + assert.match(source, /"mode": "executed"/); + assert.match(source, /klo agent sl query sqlite execute/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'klo',\s*'dev',\s*'scan',\s*'warehouse'/); + assert.match(source, /'--mode',\s*'enriched'/); + assert.doesNotMatch(source, /'--enrich'/); + assert.match(source, /klo scan structural verified/); + assert.match(source, /klo scan enriched verified/); + assert.match(source, /scanReportJson\.artifactPaths\.manifestShards/); + assert.match(source, /scanReportJson\.artifactPaths\.enrichmentArtifacts/); + assert.match(source, /enrichment:/); + assert.match(source, /mode: deterministic/); + assert.match(source, /backend: gateway/); + assert.match(source, /models:/); + assert.match(source, /default: smoke\/provider/); + assert.match(source, /api_key: env:AI_GATEWAY_API_KEY/); + assert.match(source, /run\('pnpm', \['exec', 'klo', 'dev', 'ingest', 'run'/); + assert.match(source, /'serve', '--mcp', 'stdio'/); + assert.doesNotMatch(source, /'--semantic-compute',\n\s*'--execute-queries'/); + assert.match(source, /'--memory-capture', '--memory-model', 'smoke\/provider'/); + assert.match(source, /mcpServerStderr/); + assert.match(source, /klo serve stderr/); + assert.match(source, /sl_validate/); + assert.match(source, /sl_query/); + assert.match(source, /memory_capture/); + assert.match(source, /memory_capture_status/); + assert.match(source, /connection_test/); + assert.match(source, /scan_trigger/); + assert.match(source, /scan_status/); + assert.match(source, /scan_report/); + assert.match(source, /scan_list_artifacts/); + assert.match(source, /scan_read_artifact/); + assert.match(source, /mcpScanArtifacts\.artifacts\.find/); + assert.match(source, /AI_GATEWAY_API_KEY/); + assert.match(source, /access\(join\(projectDir, '\.klo', 'db\.sqlite'\)\)/); + assert.match(source, /SQLite knowledge index/); + assert.match(source, /klo dev ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway/); + assert.match(source, /klo dev ingest provider guard verified/); + }); + + describe('npmDemoSmokeSource', () => { + it('exercises the public packed-demo first-run contract', () => { + const source = npmDemoSmokeSource(); + + assert.match(source, /pnpm', \['exec', 'klo', '--help'\]/); + assert.match(source, /'demo', '--project-dir', projectDir, '--no-input', '--plain'/); + assert.match(source, /Mode: seeded/); + assert.match(source, /Source: packaged demo project/); + assert.match(source, /LLM calls: none/); + assert.match(source, /klo serve --mcp stdio/); + assert.doesNotMatch(source, new RegExp(["'demo'", "'--mode'", "'deterministic'"].join(', '))); + assert.match(source, /'dev', 'doctor', 'setup', '--no-input'/); + assert.match(source, /'--plain'/); + assert.match(source, /klo setup demo seeded wrote unexpected stderr/); + }); + }); + + it('checks packaged ingest runtime assets in the installed npm smoke', () => { + const source = npmRuntimeSmokeSource(); + + assert.match(source, /notion_synthesize\/SKILL\.md/); + assert.match(source, /skills\/page_triage_classifier\.md/); + assert.match(source, /skills\/light_extraction\.md/); + }); + + it('asserts the Python modules that clean installs must expose', () => { + const source = pythonVerifySource(); + + assert.match(source, /semantic_layer/); + assert.match(source, /klo_daemon/); + assert.match(source, /importlib.metadata/); + }); +}); diff --git a/scripts/precommit-check.mjs b/scripts/precommit-check.mjs new file mode 100644 index 00000000..feedf28f --- /dev/null +++ b/scripts/precommit-check.mjs @@ -0,0 +1,195 @@ +#!/usr/bin/env node +import { spawnSync } from 'node:child_process'; +import { existsSync, readFileSync } from 'node:fs'; +import { dirname, join, relative, sep } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const scriptPath = fileURLToPath(import.meta.url); +const kloRoot = dirname(dirname(scriptPath)); +const repoRoot = dirname(kloRoot); + +const packageNameByDir = new Map( + [ + 'cli', + 'connector-bigquery', + 'connector-clickhouse', + 'connector-mysql', + 'connector-postgres', + 'connector-posthog', + 'connector-snowflake', + 'connector-sqlite', + 'connector-sqlserver', + 'context', + 'llm', + ].map((packageDir) => { + const manifestPath = join(kloRoot, 'packages', packageDir, 'package.json'); + const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')); + return [packageDir, manifest.name]; + }), +); + +const packageCodePattern = /\.(?:ts|tsx|js|jsx|json)$/; +const scriptPattern = /\.(?:mjs|js|json)$/; +const pythonPackageTests = new Map([ + ['klo-sl', 'python/klo-sl/tests'], + ['klo-daemon', 'python/klo-daemon/tests'], +]); + +function normalizeFilePath(filePath) { + return filePath.replaceAll('\\', '/').replace(/^\.\//, ''); +} + +function stablePush(commands, key, cmd, args) { + if (commands.some((command) => command.key === key)) { + return; + } + + commands.push({ key, cmd, args }); +} + +function maybeScriptTest(scriptFile) { + if (scriptFile.endsWith('.test.mjs')) { + return scriptFile; + } + + if (!scriptFile.endsWith('.mjs')) { + return null; + } + + const testFile = scriptFile.replace(/\.mjs$/, '.test.mjs'); + return existsSync(join(kloRoot, testFile)) ? testFile : null; +} + +export function planChecks(files) { + const commands = []; + const packageNames = new Set(); + const pythonPackages = new Set(); + let runBoundaryCheck = false; + let runAllTypeChecks = false; + let runAllPythonTests = false; + + for (const rawFile of files) { + const file = normalizeFilePath(rawFile); + + if (!file.startsWith('klo/')) { + continue; + } + + const kloFile = file.slice('klo/'.length); + + if (kloFile.startsWith('packages/')) { + const [, packageDir, ...rest] = kloFile.split('/'); + const packageName = packageNameByDir.get(packageDir); + const packageFile = rest.join('/'); + + if (packageName && packageCodePattern.test(packageFile)) { + packageNames.add(packageName); + runBoundaryCheck = true; + } + + continue; + } + + if (kloFile.startsWith('scripts/') && scriptPattern.test(kloFile)) { + const testFile = maybeScriptTest(kloFile); + + if (testFile) { + stablePush(commands, `script-test:${testFile}`, 'node', ['--test', testFile]); + } + + continue; + } + + if (kloFile.startsWith('python/')) { + const [, packageDir] = kloFile.split('/'); + + if (pythonPackageTests.has(packageDir)) { + pythonPackages.add(packageDir); + } + + continue; + } + + if ( + ['package.json', 'pnpm-lock.yaml', 'pnpm-workspace.yaml', 'release-policy.json', 'tsconfig.base.json'].includes( + kloFile, + ) + ) { + runBoundaryCheck = true; + runAllTypeChecks = true; + continue; + } + + if (['pyproject.toml', 'uv.lock', 'uv.toml'].includes(kloFile)) { + runAllPythonTests = true; + } + } + + if (runBoundaryCheck) { + stablePush(commands, 'boundary-check', 'node', ['scripts/check-boundaries.mjs']); + } + + if (runAllTypeChecks) { + stablePush(commands, 'type-check:all', 'pnpm', ['--filter', './packages/*', 'run', 'type-check']); + } else { + for (const packageName of [...packageNames].sort()) { + stablePush(commands, `type-check:${packageName}`, 'pnpm', ['--filter', packageName, 'run', 'type-check']); + stablePush(commands, `build:${packageName}`, 'pnpm', ['--filter', `${packageName}...`, 'run', 'build']); + stablePush(commands, `test:${packageName}`, 'pnpm', ['--filter', packageName, 'run', 'test']); + } + } + + if (runAllPythonTests) { + stablePush(commands, 'pytest:all', 'uv', ['run', 'pytest']); + } else { + for (const packageDir of [...pythonPackages].sort()) { + stablePush(commands, `pytest:${packageDir}`, 'uv', [ + 'run', + '--package', + packageDir, + 'pytest', + pythonPackageTests.get(packageDir), + ]); + } + } + + return commands; +} + +function printCommand(command) { + console.log(`\n$ ${command.cmd} ${command.args.join(' ')}`); +} + +export function runChecks(files) { + const commands = planChecks(files); + + if (commands.length === 0) { + console.log('No KLO package checks needed for these files.'); + return 0; + } + + for (const command of commands) { + printCommand(command); + + const result = spawnSync(command.cmd, command.args, { + cwd: kloRoot, + stdio: 'inherit', + env: process.env, + }); + + if (result.error) { + console.error(result.error.message); + return 1; + } + + if (result.status !== 0) { + return result.status ?? 1; + } + } + + return 0; +} + +if (process.argv[1] && relative(repoRoot, process.argv[1]).split(sep).join('/') === 'klo/scripts/precommit-check.mjs') { + process.exitCode = runChecks(process.argv.slice(2)); +} diff --git a/scripts/precommit-check.test.mjs b/scripts/precommit-check.test.mjs new file mode 100644 index 00000000..ee5366b2 --- /dev/null +++ b/scripts/precommit-check.test.mjs @@ -0,0 +1,33 @@ +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; + +import { planChecks } from './precommit-check.mjs'; + +function commandKeys(files) { + return planChecks(files).map((command) => command.key); +} + +describe('precommit-check', () => { + it('skips files outside klo', () => { + assert.deepEqual(commandKeys(['server/src/app.ts']), []); + }); + + it('runs only the touched package checks for package code', () => { + assert.deepEqual(commandKeys(['klo/packages/cli/src/index.ts']), [ + 'boundary-check', + 'type-check:@klo/cli', + 'build:@klo/cli', + 'test:@klo/cli', + ]); + }); + + it('runs the matching script test when a script changes', () => { + assert.deepEqual(commandKeys(['klo/scripts/check-boundaries.mjs']), [ + 'script-test:scripts/check-boundaries.test.mjs', + ]); + }); + + it('runs the touched python package tests', () => { + assert.deepEqual(commandKeys(['klo/python/klo-sl/semantic_layer/parser.py']), ['pytest:klo-sl']); + }); +}); diff --git a/scripts/prepare-cli-bin.mjs b/scripts/prepare-cli-bin.mjs new file mode 100644 index 00000000..99ffceac --- /dev/null +++ b/scripts/prepare-cli-bin.mjs @@ -0,0 +1,44 @@ +#!/usr/bin/env node + +import { constants } from 'node:fs'; +import { access, chmod } from 'node:fs/promises'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +export function kloRootDir() { + return resolve(dirname(fileURLToPath(import.meta.url)), '..'); +} + +export function cliBinPath(rootDir = kloRootDir()) { + return resolve(rootDir, 'packages', 'cli', 'dist', 'bin.js'); +} + +async function canExecute(path) { + try { + await access(path, constants.X_OK); + return true; + } catch { + return false; + } +} + +export async function ensureCliBinExecutable(rootDir = kloRootDir()) { + const binPath = cliBinPath(rootDir); + await access(binPath, constants.R_OK); + + if (process.platform !== 'win32' && !(await canExecute(binPath))) { + await chmod(binPath, 0o755); + } + + return binPath; +} + +if (import.meta.url === pathToFileURL(process.argv[1]).href) { + try { + const binPath = await ensureCliBinExecutable(); + process.stdout.write(`Prepared KLO CLI bin: ${binPath}\n`); + } catch (error) { + process.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + process.exitCode = 1; + } +} diff --git a/scripts/public-benchmark-manifest.json b/scripts/public-benchmark-manifest.json new file mode 100644 index 00000000..e106e24e --- /dev/null +++ b/scripts/public-benchmark-manifest.json @@ -0,0 +1,36 @@ +{ + "fixtures": [ + { + "id": "chinook_with_declared_metadata", + "displayName": "Chinook (SQLite, declared metadata)", + "url": "https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite", + "sha256": "7651ba378ac2fcd0dfc3c66fb101f7a7eed3ba39a612ec642b96e20702061f15", + "license": "MIT", + "source": "https://github.com/lerocha/chinook-database" + }, + { + "id": "northwind_with_declared_metadata", + "displayName": "Northwind (SQLite, declared metadata)", + "url": "https://github.com/jpwhite3/northwind-SQLite3/raw/main/dist/northwind.db", + "sha256": "2f4f5c68dfcd33ba27373eae48c7a4869800c68095ee0f9f0da494f83382a877", + "license": "MIT", + "source": "https://github.com/jpwhite3/northwind-SQLite3" + }, + { + "id": "sakila_with_declared_metadata", + "displayName": "Sakila (SQLite, declared metadata)", + "url": "https://raw.githubusercontent.com/bradleygrant/sakila-sqlite3/master/sakila_master.db", + "sha256": "88c91a4a1a6b61f9d3f35904c0a173c887b25e73f20c3c2fdb073818c06f4268", + "license": "BSD-2-Clause", + "source": "https://github.com/bradleygrant/sakila-sqlite3" + }, + { + "id": "adventureworkslt_with_declared_metadata", + "displayName": "AdventureWorksLT (SQLite, declared metadata)", + "url": "https://github.com/nuitsjp/AdventureWorks-for-SQLite/releases/download/Release-1_0_0/AdventureWorksLT.db", + "sha256": "f1a87a31f4efb5654f57a3b1ca47fac338972ceb7553673d66ea0bd9d55a7008", "_allowlist": "// pragma: allowlist secret", + "license": "MIT", + "source": "https://github.com/nuitsjp/AdventureWorks-for-SQLite" + } + ] +} diff --git a/scripts/published-package-smoke-config.mjs b/scripts/published-package-smoke-config.mjs new file mode 100644 index 00000000..148dd0e4 --- /dev/null +++ b/scripts/published-package-smoke-config.mjs @@ -0,0 +1,152 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; + +export const DEFAULT_VERSION_TAG = 'latest'; +export const NO_PACKAGE_REASON = + 'Set KLO_PUBLISHED_KLO_PACKAGE or release-policy.json publishedPackageSmoke.packageName to the published npm package name after the release decision.'; + +function optionalTrimmedString(value) { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function assertSafePackageName(packageName, label) { + if (!/^(?:@[a-z0-9][a-z0-9._-]*\/)?[a-z0-9][a-z0-9._-]*$/.test(packageName)) { + throw new Error(`Invalid ${label}: ${packageName}`); + } +} + +function assertSafeVersionTag(version, label) { + if (!/^[a-zA-Z0-9][a-zA-Z0-9._+-]*$/.test(version)) { + throw new Error(`Invalid ${label}: ${version}`); + } +} + +function assertHttpRegistry(registry, label) { + const parsed = new URL(registry); + if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') { + throw new Error(`${label} must be an http(s) URL`); + } +} + +function normalizePolicyConfig(policyConfig = {}) { + if (policyConfig === null || policyConfig === undefined) { + return { packageName: null, version: DEFAULT_VERSION_TAG, registry: null }; + } + + if (typeof policyConfig !== 'object' || Array.isArray(policyConfig)) { + throw new Error('release-policy.json publishedPackageSmoke must be a JSON object'); + } + + const normalized = { + packageName: optionalTrimmedString(policyConfig.packageName), + version: optionalTrimmedString(policyConfig.version) ?? DEFAULT_VERSION_TAG, + registry: optionalTrimmedString(policyConfig.registry), + }; + assertSafeVersionTag(normalized.version, 'release-policy.json publishedPackageSmoke.version'); + if (normalized.registry) { + assertHttpRegistry(normalized.registry, 'release-policy.json publishedPackageSmoke.registry'); + } + return normalized; +} + +export function readPublishedPackageSmokeConfig(env = process.env, args = process.argv.slice(2), policyConfig = {}) { + const requireConfig = args.includes('--require-config'); + const policy = normalizePolicyConfig(policyConfig); + + const envPackageName = optionalTrimmedString(env.KLO_PUBLISHED_KLO_PACKAGE); + const packageName = envPackageName ?? policy.packageName; + + if (!packageName) { + return { + enabled: false, + requireConfig, + reason: NO_PACKAGE_REASON, + }; + } + + const configSource = envPackageName ? 'environment' : 'release-policy'; + assertSafePackageName( + packageName, + configSource === 'environment' + ? 'KLO_PUBLISHED_KLO_PACKAGE' + : 'release-policy.json publishedPackageSmoke.packageName', + ); + + const packageVersion = optionalTrimmedString(env.KLO_PUBLISHED_KLO_VERSION) ?? policy.version; + assertSafeVersionTag( + packageVersion, + optionalTrimmedString(env.KLO_PUBLISHED_KLO_VERSION) + ? 'KLO_PUBLISHED_KLO_VERSION' + : 'release-policy.json publishedPackageSmoke.version', + ); + + const registry = optionalTrimmedString(env.KLO_PUBLISHED_KLO_REGISTRY) ?? policy.registry; + if (registry) { + assertHttpRegistry( + registry, + optionalTrimmedString(env.KLO_PUBLISHED_KLO_REGISTRY) + ? 'KLO_PUBLISHED_KLO_REGISTRY' + : 'release-policy.json publishedPackageSmoke.registry', + ); + } + + return { + enabled: true, + requireConfig, + configSource, + packageName, + packageVersion, + registry, + }; +} + +export async function readPublishedPackageSmokeConfigFromPolicyFile( + policyPath, + env = process.env, + args = process.argv.slice(2), +) { + const policy = JSON.parse(await readFile(policyPath, 'utf8')); + return readPublishedPackageSmokeConfig(env, args, policy.publishedPackageSmoke ?? {}); +} + +export function publishedPackageSpec(config) { + assert.equal(config.enabled, true, 'publishedPackageSpec requires an enabled smoke config'); + return `${config.packageName}@${config.packageVersion}`; +} + +export function buildPublishedPackageNpxCommand(config, args, label = 'published package command') { + const env = config.registry ? { npm_config_registry: config.registry } : {}; + + return { + label, + command: 'npx', + args: ['--yes', publishedPackageSpec(config), ...args], + env, + }; +} + +export function buildPublishedPackageSmokeCommands(config, projectDir, emptyProjectDir) { + return [ + buildPublishedPackageNpxCommand(config, ['--version'], 'published package version'), + buildPublishedPackageNpxCommand( + config, + ['demo', '--project-dir', projectDir, '--no-input', '--plain'], + 'published package demo', + ), + buildPublishedPackageNpxCommand( + config, + ['agent', 'wiki', 'search', 'ARR contract', '--json', '--limit', '5', '--project-dir', projectDir], + 'published package wiki hybrid search', + ), + buildPublishedPackageNpxCommand( + config, + ['agent', 'sl', 'list', '--json', '--query', 'ARR', '--project-dir', projectDir], + 'published package semantic-layer hybrid search', + ), + buildPublishedPackageNpxCommand( + config, + ['agent', 'sl', 'list', '--json', '--query', 'revenue', '--project-dir', emptyProjectDir], + 'published package missing-project readiness', + ), + ]; +} diff --git a/scripts/published-package-smoke.mjs b/scripts/published-package-smoke.mjs new file mode 100644 index 00000000..e9d18f0d --- /dev/null +++ b/scripts/published-package-smoke.mjs @@ -0,0 +1,164 @@ +#!/usr/bin/env node + +import assert from 'node:assert/strict'; +import { execFile } from 'node:child_process'; +import { mkdir, mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; +import { promisify } from 'node:util'; + +import { + buildPublishedPackageSmokeCommands, + readPublishedPackageSmokeConfigFromPolicyFile, +} from './published-package-smoke-config.mjs'; + +export { + buildPublishedPackageNpxCommand, + buildPublishedPackageSmokeCommands, + publishedPackageSpec, + readPublishedPackageSmokeConfig, +} from './published-package-smoke-config.mjs'; + +const execFileAsync = promisify(execFile); +const SMOKE_TIMEOUT_MS = 180_000; + +function scriptRootDir() { + return resolve(dirname(fileURLToPath(import.meta.url)), '..'); +} + +function releasePolicyPath(rootDir = scriptRootDir()) { + return join(rootDir, 'release-policy.json'); +} + +async function runCommand(command, args, options = {}) { + process.stdout.write(`$ ${command} ${args.join(' ')}\n`); + try { + const result = await execFileAsync(command, args, { + cwd: options.cwd, + env: Object.assign({}, process.env, options.env ?? {}), + encoding: 'utf8', + maxBuffer: 10 * 1024 * 1024, + timeout: SMOKE_TIMEOUT_MS, + }); + return { code: 0, stdout: result.stdout, stderr: result.stderr }; + } catch (error) { + return { + code: typeof error.code === 'number' ? error.code : 1, + stdout: error.stdout ?? '', + stderr: error.stderr ?? error.message, + }; + } +} + +function requireSuccess(label, result) { + assert.equal( + result.code, + 0, + `${label} failed with code ${result.code}\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`, + ); +} + +function parseJson(label, text) { + try { + return JSON.parse(text); + } catch (error) { + throw new Error(`${label} did not produce JSON: ${error instanceof Error ? error.message : String(error)}\n${text}`); + } +} + +function assertHybridWikiSearch(result) { + const payload = parseJson('published package wiki search', result.stdout); + assert.ok(payload.totalFound > 0, 'published package wiki search should return results'); + assert.ok( + payload.results.some((entry) => Array.isArray(entry.matchReasons) && entry.matchReasons.length > 0), + 'published package wiki search should expose match reasons', + ); +} + +function assertHybridSlSearch(result) { + const payload = parseJson('published package semantic-layer search', result.stdout); + assert.ok(payload.totalSources > 0, 'published package semantic-layer search should return sources'); + assert.ok( + payload.sources.some((entry) => Array.isArray(entry.matchReasons) && entry.matchReasons.length > 0), + 'published package semantic-layer search should expose match reasons', + ); +} + +function assertMissingProjectReadiness(result, emptyProjectDir) { + assert.equal(result.code, 1, 'missing-project semantic-layer search should exit 1'); + assert.equal(result.stdout, '', 'missing-project semantic-layer search should not write JSON errors to stdout'); + + const payload = parseJson('published package missing-project semantic-layer search', result.stderr); + assert.deepEqual(payload, { + ok: false, + error: { + code: 'agent_sl_search_missing_project', + message: `Semantic-layer search needs an initialized KLO project at ${emptyProjectDir}.`, + nextSteps: [ + 'klo demo', + `klo setup --project-dir ${emptyProjectDir}`, + 'klo ingest ', + `klo agent sl list --json --query "revenue" --project-dir ${emptyProjectDir}`, + ], + }, + }); +} + +export async function runPublishedPackageSmoke(config) { + const root = await mkdtemp(join(tmpdir(), 'klo-published-package-smoke-')); + try { + const projectDir = join(root, 'demo-project'); + const emptyProjectDir = join(root, 'empty-project'); + await mkdir(emptyProjectDir, { recursive: true }); + + const commands = buildPublishedPackageSmokeCommands(config, projectDir, emptyProjectDir); + for (const command of commands.slice(0, 4)) { + const result = await runCommand(command.command, command.args, { env: command.env }); + requireSuccess(command.label, result); + if (command.label === 'published package wiki hybrid search') { + assertHybridWikiSearch(result); + } + if (command.label === 'published package semantic-layer hybrid search') { + assertHybridSlSearch(result); + } + } + + const missingProjectCommand = commands[4]; + const missingProject = await runCommand(missingProjectCommand.command, missingProjectCommand.args, { + env: missingProjectCommand.env, + }); + assertMissingProjectReadiness(missingProject, emptyProjectDir); + + process.stdout.write('published package hybrid search smoke verified\n'); + } finally { + await rm(root, { recursive: true, force: true }); + } +} + +async function main() { + const config = await readPublishedPackageSmokeConfigFromPolicyFile( + releasePolicyPath(), + process.env, + process.argv.slice(2), + ); + + if (!config.enabled) { + if (config.requireConfig) { + throw new Error(config.reason); + } + process.stdout.write(`Published KLO package smoke skipped: ${config.reason}\n`); + return; + } + + await runPublishedPackageSmoke(config); +} + +if (import.meta.url === pathToFileURL(process.argv[1] ?? '').href) { + try { + await main(); + } catch (error) { + process.stderr.write(`${error instanceof Error ? error.stack : String(error)}\n`); + process.exitCode = 1; + } +} diff --git a/scripts/published-package-smoke.test.mjs b/scripts/published-package-smoke.test.mjs new file mode 100644 index 00000000..cf33524e --- /dev/null +++ b/scripts/published-package-smoke.test.mjs @@ -0,0 +1,256 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +import { + buildPublishedPackageNpxCommand, + buildPublishedPackageSmokeCommands, + publishedPackageSpec, + readPublishedPackageSmokeConfig, +} from './published-package-smoke.mjs'; + +describe('published package smoke config', () => { + it('skips by default until a published package name is supplied', () => { + assert.deepEqual(readPublishedPackageSmokeConfig({}, []), { + enabled: false, + requireConfig: false, + reason: + 'Set KLO_PUBLISHED_KLO_PACKAGE or release-policy.json publishedPackageSmoke.packageName to the published npm package name after the release decision.', + }); + }); + + it('can require the published package config for post-publication CI', () => { + assert.deepEqual(readPublishedPackageSmokeConfig({}, ['--require-config']), { + enabled: false, + requireConfig: true, + reason: + 'Set KLO_PUBLISHED_KLO_PACKAGE or release-policy.json publishedPackageSmoke.packageName to the published npm package name after the release decision.', + }); + }); + + it('reads the package, version, and registry from environment variables', () => { + assert.deepEqual( + readPublishedPackageSmokeConfig( + { + KLO_PUBLISHED_KLO_PACKAGE: '@klo/cli-public', + KLO_PUBLISHED_KLO_VERSION: 'latest', + KLO_PUBLISHED_KLO_REGISTRY: 'https://registry.npmjs.org/', + }, + [], + ), + { + enabled: true, + requireConfig: false, + configSource: 'environment', + packageName: '@klo/cli-public', + packageVersion: 'latest', + registry: 'https://registry.npmjs.org/', + }, + ); + }); + + it('reads the package, version, and registry from release policy when env vars are absent', () => { + assert.deepEqual( + readPublishedPackageSmokeConfig( + {}, + [], + { + packageName: '@klo/cli-public', + version: '2026.5.8', + registry: 'https://registry.npmjs.org/', + }, + ), + { + enabled: true, + requireConfig: false, + configSource: 'release-policy', + packageName: '@klo/cli-public', + packageVersion: '2026.5.8', + registry: 'https://registry.npmjs.org/', + }, + ); + }); + + it('lets environment variables override release policy values', () => { + assert.deepEqual( + readPublishedPackageSmokeConfig( + { + KLO_PUBLISHED_KLO_PACKAGE: '@klo/cli-from-env', + KLO_PUBLISHED_KLO_VERSION: 'latest', + }, + [], + { + packageName: '@klo/cli-from-policy', + version: '2026.5.8', + registry: 'https://registry.npmjs.org/', + }, + ), + { + enabled: true, + requireConfig: false, + configSource: 'environment', + packageName: '@klo/cli-from-env', + packageVersion: 'latest', + registry: 'https://registry.npmjs.org/', + }, + ); + }); + + it('rejects package names that would be unsafe as npx package specs', () => { + assert.throws( + () => readPublishedPackageSmokeConfig({ KLO_PUBLISHED_KLO_PACKAGE: '--package=@evil/pkg' }, []), + /Invalid KLO_PUBLISHED_KLO_PACKAGE/, + ); + assert.throws( + () => readPublishedPackageSmokeConfig({ KLO_PUBLISHED_KLO_PACKAGE: '@klo/cli public' }, []), + /Invalid KLO_PUBLISHED_KLO_PACKAGE/, + ); + assert.throws( + () => + readPublishedPackageSmokeConfig( + {}, + [], + { + packageName: '@klo/cli public', + version: 'latest', + registry: null, + }, + ), + /Invalid release-policy\.json publishedPackageSmoke\.packageName/, + ); + }); + + it('rejects unsafe version tags and non-HTTP registries', () => { + assert.throws( + () => + readPublishedPackageSmokeConfig( + { + KLO_PUBLISHED_KLO_PACKAGE: '@klo/cli-public', + KLO_PUBLISHED_KLO_VERSION: '--tag latest', + }, + [], + ), + /Invalid KLO_PUBLISHED_KLO_VERSION/, + ); + assert.throws( + () => + readPublishedPackageSmokeConfig( + { + KLO_PUBLISHED_KLO_PACKAGE: '@klo/cli-public', + KLO_PUBLISHED_KLO_REGISTRY: 'file:///tmp/npm', + }, + [], + ), + /KLO_PUBLISHED_KLO_REGISTRY must be an http\(s\) URL/, + ); + }); +}); + +describe('published package smoke command construction', () => { + const config = { + enabled: true, + requireConfig: false, + packageName: '@klo/cli-public', + packageVersion: 'latest', + registry: 'https://registry.npmjs.org/', + }; + + it('builds the npx package spec from package name and version tag', () => { + assert.equal(publishedPackageSpec(config), '@klo/cli-public@latest'); + }); + + it('builds npx commands with a registry env patch instead of shell interpolation', () => { + assert.deepEqual(buildPublishedPackageNpxCommand(config, ['--version']), { + label: 'published package command', + command: 'npx', + args: ['--yes', '@klo/cli-public@latest', '--version'], + env: { npm_config_registry: 'https://registry.npmjs.org/' }, + }); + }); + + it('builds the full hybrid-search smoke command list', () => { + assert.deepEqual(buildPublishedPackageSmokeCommands(config, '/tmp/klo-smoke/demo', '/tmp/klo-smoke/empty'), [ + { + label: 'published package version', + command: 'npx', + args: ['--yes', '@klo/cli-public@latest', '--version'], + env: { npm_config_registry: 'https://registry.npmjs.org/' }, + }, + { + label: 'published package demo', + command: 'npx', + args: [ + '--yes', + '@klo/cli-public@latest', + 'demo', + '--project-dir', + '/tmp/klo-smoke/demo', + '--no-input', + '--plain', + ], + env: { npm_config_registry: 'https://registry.npmjs.org/' }, + }, + { + label: 'published package wiki hybrid search', + command: 'npx', + args: [ + '--yes', + '@klo/cli-public@latest', + 'agent', + 'wiki', + 'search', + 'ARR contract', + '--json', + '--limit', + '5', + '--project-dir', + '/tmp/klo-smoke/demo', + ], + env: { npm_config_registry: 'https://registry.npmjs.org/' }, + }, + { + label: 'published package semantic-layer hybrid search', + command: 'npx', + args: [ + '--yes', + '@klo/cli-public@latest', + 'agent', + 'sl', + 'list', + '--json', + '--query', + 'ARR', + '--project-dir', + '/tmp/klo-smoke/demo', + ], + env: { npm_config_registry: 'https://registry.npmjs.org/' }, + }, + { + label: 'published package missing-project readiness', + command: 'npx', + args: [ + '--yes', + '@klo/cli-public@latest', + 'agent', + 'sl', + 'list', + '--json', + '--query', + 'revenue', + '--project-dir', + '/tmp/klo-smoke/empty', + ], + env: { npm_config_registry: 'https://registry.npmjs.org/' }, + }, + ]); + }); + + it('exposes the smoke through the package release script', async () => { + const packageJson = JSON.parse(await readFile(new URL('../package.json', import.meta.url), 'utf8')); + + assert.equal( + packageJson.scripts['release:published-smoke'], + 'node scripts/published-package-smoke.mjs --require-config', + ); + }); +}); diff --git a/scripts/relationship-orbit-verification.mjs b/scripts/relationship-orbit-verification.mjs new file mode 100644 index 00000000..8bc6e51c --- /dev/null +++ b/scripts/relationship-orbit-verification.mjs @@ -0,0 +1,330 @@ +#!/usr/bin/env node + +import { mkdir as fsMkdir, writeFile as fsWriteFile } from 'node:fs/promises'; +import { execFile as childExecFile } from 'node:child_process'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; +import { promisify } from 'node:util'; +import { runWorkspaceKlo } from './run-klo.mjs'; + +const scriptDir = dirname(fileURLToPath(import.meta.url)); +const kloRootDir = resolve(scriptDir, '..'); +const repoRootDir = resolve(kloRootDir, '..'); +const defaultProjectDir = resolve(kloRootDir, 'examples/orbit-relationship-verification'); +const defaultReportPath = resolve( + kloRootDir, + 'examples/orbit-relationship-verification/reports/orbit-verification.md', +); +const defaultExecFile = promisify(childExecFile); + +class BufferWriter { + chunks = []; + + write(chunk) { + this.chunks.push(String(chunk)); + } + + text() { + return this.chunks.join(''); + } +} + +function dateOnly(date) { + return date.toISOString().slice(0, 10); +} + +function trimForReport(value) { + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : 'none'; +} + +export function defaultOrbitVerificationProjectDir() { + return defaultProjectDir; +} + +function shellCommand(argv) { + return ['pnpm', 'run', 'klo', '--', ...argv].join(' '); +} + +function firstNonEmptyLine(...values) { + for (const value of values) { + const line = value + .split('\n') + .map((candidate) => candidate.trim()) + .find((candidate) => candidate.length > 0); + if (line) { + return line; + } + } + return 'Orbit scan command failed before producing diagnostic output'; +} + +function parseArgs(argv) { + const options = { + connectionId: process.env.KLO_ORBIT_CONNECTION_ID ?? 'orbit', + projectDir: process.env.KLO_ORBIT_PROJECT_DIR ?? defaultProjectDir, + reportPath: defaultReportPath, + }; + + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (arg === '--connection-id' || arg === '--connection') { + options.connectionId = argv[index + 1]; + index += 1; + continue; + } + if (arg === '--project-dir') { + options.projectDir = argv[index + 1]; + index += 1; + continue; + } + if (arg === '--report-path') { + options.reportPath = argv[index + 1]; + index += 1; + continue; + } + throw new Error(`Unknown option: ${arg}`); + } + + return options; +} + +export function buildOrbitScanArgv(input) { + return ['dev', 'scan', input.connectionId, '--enrich', '--project-dir', input.projectDir]; +} + +export function buildOrbitReportArgv(input) { + return ['dev', 'scan', 'report', '--json', '--project-dir', input.projectDir, input.runId]; +} + +export function extractRunId(stdout) { + const match = stdout.match(/^Run:\s*(\S+)/m); + return match?.[1] ?? null; +} + +function listLines(values) { + if (!values || values.length === 0) { + return ['- none']; + } + return values.map((value) => `- \`${value}\``); +} + +function warningLines(report) { + if (!Array.isArray(report.warnings) || report.warnings.length === 0) { + return ['- none']; + } + return report.warnings.map((warning) => `- \`${warning.code}\`: ${warning.message}`); +} + +function formatSuccess(result) { + const relationships = result.report.relationships ?? { accepted: 0, review: 0, rejected: 0, skipped: 0 }; + const enrichment = result.report.enrichment ?? {}; + const artifactPaths = result.report.artifactPaths ?? {}; + + return [ + '## Outcome', + '', + '- Exit code: 0', + `- Run: \`${result.report.runId ?? 'unknown'}\``, + `- Connection: \`${result.report.connectionId ?? result.connectionId}\``, + `- Mode: \`${result.report.mode ?? 'unknown'}\``, + `- Sync: \`${result.report.syncId ?? 'unknown'}\``, + '', + '## Relationship Summary', + '', + `- Accepted: ${relationships.accepted ?? 0}`, + `- Review: ${relationships.review ?? 0}`, + `- Rejected: ${relationships.rejected ?? 0}`, + `- Skipped: ${relationships.skipped ?? 0}`, + '', + '## Enrichment Summary', + '', + `- Deterministic relationships: \`${enrichment.deterministicRelationships ?? 'unknown'}\``, + `- Statistical validation: \`${enrichment.statisticalValidation ?? 'unknown'}\``, + `- LLM relationship validation: \`${enrichment.llmRelationshipValidation ?? 'unknown'}\``, + '', + '## Artifacts', + '', + `- Report: \`${artifactPaths.reportPath ?? 'none'}\``, + `- Raw sources: \`${artifactPaths.rawSourcesDir ?? 'none'}\``, + '', + 'Manifest shards:', + '', + ...listLines(artifactPaths.manifestShards), + '', + 'Enrichment artifacts:', + '', + ...listLines(artifactPaths.enrichmentArtifacts), + '', + 'Warnings:', + '', + ...warningLines(result.report), + ]; +} + +function formatBlocked(result) { + return [ + '## Outcome', + '', + `- Exit code: ${result.scanExitCode}`, + `- Blocker: \`${result.blocker}\``, + '', + '## Evidence', + '', + '- Orbit verification was not executed because the current local Orbit scan command failed.', + '- Re-run with `--report-path` to write verification evidence to a custom location.', + '', + 'Scan stdout:', + '', + '```text', + trimForReport(result.scanStdout), + '```', + '', + 'Scan stderr:', + '', + '```text', + trimForReport(result.scanStderr), + '```', + ]; +} + +export function formatOrbitVerificationMarkdown(result) { + const lines = [ + '# KLO Relationship Discovery Orbit Verification', + '', + `Date: ${result.date}`, + '', + '## Command', + '', + '```bash', + result.scanCommand, + '```', + '', + ]; + + if (result.status === 'success') { + lines.push( + '## JSON Report Command', + '', + '```bash', + result.reportCommand, + '```', + '', + ...formatSuccess(result), + ); + } else { + lines.push(...formatBlocked(result)); + } + + return `${lines.join('\n')}\n`; +} + +async function runBufferedWorkspaceKlo(runner, argv, rootDir, execFile) { + const stdout = new BufferWriter(); + const stderr = new BufferWriter(); + const exitCode = await runner(argv, { rootDir, execFile, stdout, stderr }); + return { + exitCode, + stdout: stdout.text(), + stderr: stderr.text(), + }; +} + +function orbitVerificationEnv(projectDir) { + if (projectDir !== defaultProjectDir) { + return process.env; + } + return { + ...process.env, + GIT_CEILING_DIRECTORIES: dirname(defaultProjectDir), + }; +} + +export async function runOrbitVerification(options = {}) { + const connectionId = options.connectionId ?? process.env.KLO_ORBIT_CONNECTION_ID ?? 'orbit'; + const projectDir = options.projectDir ?? process.env.KLO_ORBIT_PROJECT_DIR ?? defaultProjectDir; + const reportPath = options.reportPath ?? defaultReportPath; + const rootDir = options.rootDir ?? kloRootDir; + const runner = options.runWorkspaceKlo ?? runWorkspaceKlo; + const execFile = options.execFile ?? defaultExecFile; + const now = options.now ?? (() => new Date()); + const mkdir = options.mkdir ?? fsMkdir; + const writeFile = options.writeFile ?? fsWriteFile; + const date = dateOnly(now()); + const env = options.env ?? orbitVerificationEnv(projectDir); + const runWithEnv = (argv, runnerOptions) => runner(argv, { ...runnerOptions, env }); + + const scanArgv = buildOrbitScanArgv({ connectionId, projectDir }); + const scan = await runBufferedWorkspaceKlo(runWithEnv, scanArgv, rootDir, execFile); + let result; + + if (scan.exitCode !== 0) { + result = { + status: 'blocked', + date, + connectionId, + projectDir, + scanCommand: shellCommand(scanArgv), + scanExitCode: scan.exitCode, + blocker: firstNonEmptyLine(scan.stderr, scan.stdout), + scanStdout: scan.stdout, + scanStderr: scan.stderr, + }; + } else { + const runId = extractRunId(scan.stdout); + if (!runId) { + result = { + status: 'blocked', + date, + connectionId, + projectDir, + scanCommand: shellCommand(scanArgv), + scanExitCode: scan.exitCode, + blocker: 'KLO scan completed without printing a Run id', + scanStdout: scan.stdout, + scanStderr: scan.stderr, + }; + } else { + const reportArgv = buildOrbitReportArgv({ projectDir, runId }); + const reportOutput = await runBufferedWorkspaceKlo(runWithEnv, reportArgv, rootDir, execFile); + if (reportOutput.exitCode !== 0) { + result = { + status: 'blocked', + date, + connectionId, + projectDir, + scanCommand: shellCommand(scanArgv), + scanExitCode: reportOutput.exitCode, + blocker: firstNonEmptyLine(reportOutput.stderr, reportOutput.stdout), + scanStdout: `${scan.stdout}\n${reportOutput.stdout}`.trim(), + scanStderr: `${scan.stderr}\n${reportOutput.stderr}`.trim(), + }; + } else { + result = { + status: 'success', + date, + connectionId, + projectDir, + scanCommand: shellCommand(scanArgv), + reportCommand: shellCommand(reportArgv), + scanExitCode: scan.exitCode, + reportExitCode: reportOutput.exitCode, + scanStdout: scan.stdout, + scanStderr: scan.stderr, + report: JSON.parse(reportOutput.stdout), + }; + } + } + } + + await mkdir(dirname(reportPath), { recursive: true }); + await writeFile(reportPath, formatOrbitVerificationMarkdown(result)); + return result; +} + +if (import.meta.url === pathToFileURL(process.argv[1]).href) { + const options = parseArgs(process.argv.slice(2)); + const result = await runOrbitVerification(options); + process.stdout.write(`Wrote ${options.reportPath}\n`); + process.stdout.write(`Outcome: ${result.status}\n`); +} diff --git a/scripts/relationship-orbit-verification.test.mjs b/scripts/relationship-orbit-verification.test.mjs new file mode 100644 index 00000000..c2af6ddb --- /dev/null +++ b/scripts/relationship-orbit-verification.test.mjs @@ -0,0 +1,244 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { dirname } from 'node:path'; +import { describe, it } from 'node:test'; +import { + buildOrbitReportArgv, + buildOrbitScanArgv, + defaultOrbitVerificationProjectDir, + extractRunId, + formatOrbitVerificationMarkdown, + runOrbitVerification, +} from './relationship-orbit-verification.mjs'; + +function successReportJson() { + return JSON.stringify({ + runId: 'scan-orbit-1', + connectionId: 'orbit', + mode: 'enriched', + syncId: '2026-05-07-100000-scan-enriched-1', + relationships: { + accepted: 14, + review: 8, + rejected: 91, + skipped: 0, + }, + enrichment: { + deterministicRelationships: 'completed', + statisticalValidation: 'completed', + llmRelationshipValidation: 'skipped', + }, + warnings: [ + { + code: 'scan_enrichment_backend_not_configured', + message: + 'Skipping description and embedding enrichment because scan.enrichment.mode is not configured; relationship discovery still ran.', + recoverable: true, + }, + ], + artifactPaths: { + reportPath: 'raw-sources/orbit/live-database/2026-05-07-100000-scan-enriched-1/reports/scan-report.json', + rawSourcesDir: 'raw-sources/orbit/live-database/2026-05-07-100000-scan-enriched-1', + manifestShards: ['semantic-layer/orbit/_schema/orbit_analytics.yaml'], + enrichmentArtifacts: [ + 'raw-sources/orbit/live-database/2026-05-07-100000-scan-enriched-1/enrichment/relationships.json', + 'raw-sources/orbit/live-database/2026-05-07-100000-scan-enriched-1/enrichment/relationship-profile.json', + 'raw-sources/orbit/live-database/2026-05-07-100000-scan-enriched-1/enrichment/relationship-diagnostics.json', + ], + }, + }); +} + +describe('relationship Orbit verification helper', () => { + it('exposes the Orbit verification command from the KLO workspace package', async () => { + const packageJson = JSON.parse(await readFile(new URL('../package.json', import.meta.url), 'utf8')); + + assert.equal( + packageJson.scripts['relationships:verify-orbit'], + 'node scripts/relationship-orbit-verification.mjs', + ); + }); + + it('builds the current KLO launcher arguments for scan and JSON report commands', () => { + assert.deepEqual(buildOrbitScanArgv({ connectionId: 'orbit', projectDir: '/tmp/orbit-project' }), [ + 'dev', + 'scan', + 'orbit', + '--enrich', + '--project-dir', + '/tmp/orbit-project', + ]); + assert.deepEqual(buildOrbitReportArgv({ projectDir: '/tmp/orbit-project', runId: 'scan-orbit-1' }), [ + 'dev', + 'scan', + 'report', + '--json', + '--project-dir', + '/tmp/orbit-project', + 'scan-orbit-1', + ]); + }); + + it('uses the checked-in Orbit verification project by default', async () => { + const calls = []; + const envs = []; + const writes = []; + const defaultProjectDir = defaultOrbitVerificationProjectDir(); + + const result = await runOrbitVerification({ + reportPath: '/tmp/orbit-report.md', + now: () => new Date('2026-05-07T10:00:00.000Z'), + mkdir: async () => {}, + writeFile: async (path, content) => { + writes.push({ path, content }); + }, + runWorkspaceKlo: async (argv, options) => { + calls.push(argv); + envs.push(options.env); + if (argv[2] === 'report') { + options.stdout.write(successReportJson()); + return 0; + } + options.stdout.write('KLO scan completed\nRun: scan-orbit-1\nConnection: orbit\n'); + return 0; + }, + }); + + assert.equal(result.status, 'success'); + assert.deepEqual(calls, [ + ['dev', 'scan', 'orbit', '--enrich', '--project-dir', defaultProjectDir], + ['dev', 'scan', 'report', '--json', '--project-dir', defaultProjectDir, 'scan-orbit-1'], + ]); + assert.equal(envs[0].GIT_CEILING_DIRECTORIES, dirname(defaultProjectDir)); + assert.equal(envs[1].GIT_CEILING_DIRECTORIES, dirname(defaultProjectDir)); + assert.equal(writes.length, 1); + assert.match(writes[0].content, new RegExp(defaultProjectDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); + }); + + it('extracts the run id from human scan output', () => { + assert.equal(extractRunId(`KLO scan completed\nStatus: done\nRun: scan-orbit-1\nConnection: orbit\n`), 'scan-orbit-1'); + assert.equal(extractRunId('KLO scan completed without a run line\n'), null); + }); + + it('formats successful Orbit verification evidence from the JSON report', () => { + const markdown = formatOrbitVerificationMarkdown({ + status: 'success', + date: '2026-05-07', + connectionId: 'orbit', + projectDir: '/tmp/orbit-project', + scanCommand: 'pnpm run klo -- dev scan orbit --enrich --project-dir /tmp/orbit-project', + reportCommand: 'pnpm run klo -- dev scan report --json --project-dir /tmp/orbit-project scan-orbit-1', + scanExitCode: 0, + reportExitCode: 0, + scanStdout: 'KLO scan completed\nRun: scan-orbit-1\n', + scanStderr: '', + report: JSON.parse(successReportJson()), + }); + + assert.match(markdown, /# KLO Relationship Discovery Orbit Verification/); + assert.match(markdown, /Outcome/); + assert.match(markdown, /Exit code: 0/); + assert.match(markdown, /Accepted: 14/); + assert.match(markdown, /Review: 8/); + assert.match(markdown, /Rejected: 91/); + assert.match(markdown, /semantic-layer\/orbit\/_schema\/orbit_analytics\.yaml/); + assert.match(markdown, /relationship-diagnostics\.json/); + assert.match(markdown, /scan_enrichment_backend_not_configured/); + }); + + it('formats blocked Orbit verification evidence from the current failing command', () => { + const markdown = formatOrbitVerificationMarkdown({ + status: 'blocked', + date: '2026-05-07', + connectionId: 'orbit', + projectDir: '/tmp/orbit-project', + scanCommand: 'pnpm run klo -- dev scan orbit --enrich --project-dir /tmp/orbit-project', + scanExitCode: 1, + blocker: 'Connection "orbit" was not found', + scanStdout: '', + scanStderr: 'Connection "orbit" was not found\n', + }); + + assert.match(markdown, /Exit code: 1/); + assert.match(markdown, /Connection "orbit" was not found/); + assert.match(markdown, /Orbit verification was not executed because the current local Orbit scan command failed/); + assert.doesNotMatch(markdown, /scan\.enrichment\.mode is required/); + }); + + it('runs scan then JSON report and writes success Markdown', async () => { + const calls = []; + const writes = []; + const result = await runOrbitVerification({ + connectionId: 'orbit', + projectDir: '/tmp/orbit-project', + reportPath: '/tmp/orbit-report.md', + now: () => new Date('2026-05-07T10:00:00.000Z'), + mkdir: async () => {}, + writeFile: async (path, content) => { + writes.push({ path, content }); + }, + runWorkspaceKlo: async (argv, options) => { + calls.push(argv); + if (argv[2] === 'report') { + options.stdout.write(successReportJson()); + return 0; + } + options.stdout.write('KLO scan completed\nRun: scan-orbit-1\nConnection: orbit\n'); + return 0; + }, + }); + + assert.equal(result.status, 'success'); + assert.deepEqual(calls, [ + ['dev', 'scan', 'orbit', '--enrich', '--project-dir', '/tmp/orbit-project'], + ['dev', 'scan', 'report', '--json', '--project-dir', '/tmp/orbit-project', 'scan-orbit-1'], + ]); + assert.equal(writes.length, 1); + assert.equal(writes[0].path, '/tmp/orbit-report.md'); + assert.match(writes[0].content, /Accepted: 14/); + }); + + it('writes blocked Markdown when the scan command fails before a run id exists', async () => { + const writes = []; + const result = await runOrbitVerification({ + connectionId: 'orbit', + projectDir: '/tmp/orbit-project', + reportPath: '/tmp/orbit-report.md', + now: () => new Date('2026-05-07T10:00:00.000Z'), + mkdir: async () => {}, + writeFile: async (path, content) => { + writes.push({ path, content }); + }, + runWorkspaceKlo: async (_argv, options) => { + options.stderr.write('Connection "orbit" was not found\n'); + return 1; + }, + }); + + assert.equal(result.status, 'blocked'); + assert.equal(result.scanExitCode, 1); + assert.equal(writes.length, 1); + assert.match(writes[0].content, /Connection "orbit" was not found/); + }); + + it('runs the workspace launcher in buffered mode so real scan errors are captured', async () => { + let sawExecFile = false; + const result = await runOrbitVerification({ + connectionId: 'orbit', + projectDir: '/tmp/orbit-project', + reportPath: '/tmp/orbit-report.md', + now: () => new Date('2026-05-07T10:00:00.000Z'), + mkdir: async () => {}, + writeFile: async () => {}, + execFile: async () => ({ stdout: '', stderr: '' }), + runWorkspaceKlo: async (_argv, options) => { + sawExecFile = typeof options.execFile === 'function'; + options.stderr.write('ENOENT: no such file or directory, open \'/tmp/orbit-project/klo.yaml\'\n'); + return 1; + }, + }); + + assert.equal(sawExecFile, true); + assert.equal(result.blocker, "ENOENT: no such file or directory, open '/tmp/orbit-project/klo.yaml'"); + }); +}); diff --git a/scripts/release-readiness.mjs b/scripts/release-readiness.mjs new file mode 100644 index 00000000..24839687 --- /dev/null +++ b/scripts/release-readiness.mjs @@ -0,0 +1,246 @@ +#!/usr/bin/env node + +import { readFile } from 'node:fs/promises'; +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +import { packageArtifactLayout, packageReleaseMetadata, verifyArtifactManifest } from './package-artifacts.mjs'; +import { readPublishedPackageSmokeConfig } from './published-package-smoke-config.mjs'; + +function scriptRootDir() { + return resolve(dirname(fileURLToPath(import.meta.url)), '..'); +} + +export function releasePolicyPath(rootDir = scriptRootDir()) { + return join(rootDir, 'release-policy.json'); +} + +async function readJson(path) { + return JSON.parse(await readFile(path, 'utf-8')); +} + +const CI_ARTIFACT_ONLY_RELEASE_MODE = 'ci-artifact-only'; +const PUBLISHED_PACKAGE_SMOKE_REQUIRED_RELEASE_MODE = 'published-package-smoke-required'; +const SUPPORTED_RELEASE_MODES = new Set([ + CI_ARTIFACT_ONLY_RELEASE_MODE, + PUBLISHED_PACKAGE_SMOKE_REQUIRED_RELEASE_MODE, +]); + +export async function readReleasePolicy(rootDir = scriptRootDir()) { + return readJson(releasePolicyPath(rootDir)); +} + +function isPlainObject(value) { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function assertPlainObject(value, label) { + if (!isPlainObject(value)) { + throw new Error(`${label} must be a JSON object`); + } +} + +function assertBoolean(value, label) { + if (typeof value !== 'boolean') { + throw new Error(`${label} must be a boolean`); + } +} + +function assertString(value, label) { + if (typeof value !== 'string') { + throw new Error(`${label} must be a string`); + } +} + +function assertNullableString(value, label) { + if (value !== null && typeof value !== 'string') { + throw new Error(`${label} must be a string or null`); + } +} + +function assertStringArray(value, label) { + if (!Array.isArray(value) || !value.every((entry) => typeof entry === 'string')) { + throw new Error(`${label} must be an array of strings`); + } +} + +function assertSupportedReleaseMode(releaseMode) { + assertString(releaseMode, 'Release policy releaseMode'); + if (!SUPPORTED_RELEASE_MODES.has(releaseMode)) { + throw new Error(`Unsupported release policy releaseMode: ${releaseMode}`); + } +} + +function assertRequiredBeforePublishing(policy) { + assertStringArray(policy.requiredBeforePublishing, 'Release policy requiredBeforePublishing'); + + if (policy.releaseMode === CI_ARTIFACT_ONLY_RELEASE_MODE && policy.requiredBeforePublishing.length === 0) { + throw new Error('Release policy requiredBeforePublishing must list the remaining publishing decisions'); + } + + if ( + policy.releaseMode === PUBLISHED_PACKAGE_SMOKE_REQUIRED_RELEASE_MODE && + policy.requiredBeforePublishing.length > 0 + ) { + throw new Error('published-package-smoke-required release mode requires requiredBeforePublishing to be empty'); + } +} + +function assertSameMembers(actual, expected, label) { + const sortedActual = [...actual].sort(); + const sortedExpected = [...expected].sort(); + if (JSON.stringify(sortedActual) !== JSON.stringify(sortedExpected)) { + throw new Error(`${label} mismatch: expected ${sortedExpected.join(', ')}, got ${sortedActual.join(', ')}`); + } +} + +export function validateReleasePolicy(policy) { + assertPlainObject(policy, 'Release policy'); + + if (policy.schemaVersion !== 1) { + throw new Error(`Unsupported release policy schemaVersion: ${policy.schemaVersion}`); + } + assertSupportedReleaseMode(policy.releaseMode); + assertPlainObject(policy.npm, 'Release policy npm'); + assertPlainObject(policy.python, 'Release policy python'); + assertPlainObject(policy.publishedPackageSmoke, 'Release policy publishedPackageSmoke'); + + assertBoolean(policy.npm.publish, 'Release policy npm.publish'); + assertNullableString(policy.npm.registry, 'Release policy npm.registry'); + assertStringArray(policy.npm.packages, 'Release policy npm.packages'); + + assertBoolean(policy.python.publish, 'Release policy python.publish'); + assertNullableString(policy.python.repository, 'Release policy python.repository'); + assertStringArray(policy.python.packages, 'Release policy python.packages'); + assertNullableString(policy.publishedPackageSmoke.packageName, 'Release policy publishedPackageSmoke.packageName'); + assertString(policy.publishedPackageSmoke.version, 'Release policy publishedPackageSmoke.version'); + assertNullableString(policy.publishedPackageSmoke.registry, 'Release policy publishedPackageSmoke.registry'); + readPublishedPackageSmokeConfig({}, [], policy.publishedPackageSmoke); + assertRequiredBeforePublishing(policy); + + return policy; +} + +function metadataNames(metadata, ecosystem) { + return metadata.filter((entry) => entry.ecosystem === ecosystem).map((entry) => entry.packageName); +} + +function publishedPackageSmokeGate(policy) { + const config = readPublishedPackageSmokeConfig({}, [], policy.publishedPackageSmoke); + + if (policy.releaseMode === PUBLISHED_PACKAGE_SMOKE_REQUIRED_RELEASE_MODE && !config.enabled) { + throw new Error( + 'published-package-smoke-required release mode requires release-policy.json publishedPackageSmoke.packageName', + ); + } + + const base = + policy.releaseMode === CI_ARTIFACT_ONLY_RELEASE_MODE + ? { + status: 'not_required', + reason: 'Published package smoke remains pending until release-policy.json enables npm registry publishing.', + } + : { + status: 'required', + reason: 'Run the published package smoke before accepting the hybrid-search release.', + }; + + return { + ...base, + script: 'pnpm run release:published-smoke', + configSource: config.enabled ? config.configSource : null, + packageName: config.enabled ? config.packageName : null, + version: config.enabled ? config.packageVersion : policy.publishedPackageSmoke.version, + registry: config.enabled ? (config.registry ?? null) : policy.publishedPackageSmoke.registry, + }; +} + +function assertNonPublishingArtifactPolicy(policy, metadata) { + const policyLabel = + policy.releaseMode === CI_ARTIFACT_ONLY_RELEASE_MODE ? 'ci-artifact-only policy' : `${policy.releaseMode} policy`; + + if (policy.npm.publish !== false) { + throw new Error(`${policyLabel} must keep npm.publish false`); + } + if (policy.python.publish !== false) { + throw new Error(`${policyLabel} must keep python.publish false`); + } + if (policy.npm.registry !== null) { + throw new Error(`${policyLabel} must keep npm.registry null`); + } + if (policy.python.repository !== null) { + throw new Error(`${policyLabel} must keep python.repository null`); + } + + assertSameMembers(policy.npm.packages, metadataNames(metadata, 'npm'), 'Release policy npm.packages'); + assertSameMembers(policy.python.packages, metadataNames(metadata, 'python'), 'Release policy python.packages'); + + for (const entry of metadata) { + if (entry.releaseMode !== CI_ARTIFACT_ONLY_RELEASE_MODE) { + throw new Error(`Package ${entry.packageName} releaseMode must remain ci-artifact-only`); + } + if (entry.ecosystem === 'npm') { + if (entry.private !== true) { + throw new Error(`${policyLabel} npm package ${entry.packageName} must remain private`); + } + if (!entry.packageVersion.endsWith('-private')) { + throw new Error(`${policyLabel} npm package ${entry.packageName} must use a private version suffix`); + } + } + } +} + +export async function releaseReadinessReport(rootDir = scriptRootDir()) { + const policy = validateReleasePolicy(await readReleasePolicy(rootDir)); + const layout = packageArtifactLayout(rootDir); + const manifest = await verifyArtifactManifest(layout); + const metadata = await packageReleaseMetadata(rootDir); + + assertNonPublishingArtifactPolicy(policy, metadata); + + return { + schemaVersion: 1, + releaseMode: policy.releaseMode, + sourceRevision: manifest.sourceRevision, + npmPublishEnabled: policy.npm.publish, + pythonPublishEnabled: policy.python.publish, + packageNames: metadata.map((entry) => entry.packageName), + publishedPackageSmokeGate: publishedPackageSmokeGate(policy), + blockedPublishingDecisions: policy.requiredBeforePublishing, + }; +} + +async function main() { + const report = await releaseReadinessReport(); + + if (process.argv.includes('--json')) { + process.stdout.write(`${JSON.stringify(report, null, 2)}\n`); + return; + } + + process.stdout.write(`KLO release mode: ${report.releaseMode}\n`); + process.stdout.write(`KLO source revision: ${report.sourceRevision ?? 'local'}\n`); + process.stdout.write(`KLO packages: ${report.packageNames.join(', ')}\n`); + process.stdout.write(`Published package smoke: ${report.publishedPackageSmokeGate.status}\n`); + process.stdout.write(`Published package smoke script: ${report.publishedPackageSmokeGate.script}\n`); + process.stdout.write(`Published package smoke reason: ${report.publishedPackageSmokeGate.reason}\n`); + process.stdout.write(`Published package smoke package: ${report.publishedPackageSmokeGate.packageName ?? 'not configured'}\n`); + process.stdout.write(`Published package smoke version: ${report.publishedPackageSmokeGate.version}\n`); + process.stdout.write( + `Published package smoke registry: ${report.publishedPackageSmokeGate.registry ?? 'default npm registry'}\n`, + ); + process.stdout.write('Registry publishing remains disabled by release-policy.json.\n'); + process.stdout.write('Required decisions before publishing:\n'); + for (const decision of report.blockedPublishingDecisions) { + process.stdout.write(`- ${decision}\n`); + } +} + +if (import.meta.url === pathToFileURL(process.argv[1] ?? '').href) { + try { + await main(); + } catch (error) { + process.stderr.write(`${error instanceof Error ? error.stack : String(error)}\n`); + process.exitCode = 1; + } +} diff --git a/scripts/release-readiness.test.mjs b/scripts/release-readiness.test.mjs new file mode 100644 index 00000000..f22fe42e --- /dev/null +++ b/scripts/release-readiness.test.mjs @@ -0,0 +1,376 @@ +import assert from 'node:assert/strict'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, it } from 'node:test'; + +import { NPM_ARTIFACT_PACKAGES, packageArtifactLayout, writeArtifactManifest } from './package-artifacts.mjs'; +import { readReleasePolicy, releasePolicyPath, releaseReadinessReport } from './release-readiness.mjs'; + +async function writeJson(path, value) { + await writeFile(path, `${JSON.stringify(value, null, 2)}\n`); +} + +async function writeReleaseMetadataInputs(root, options = {}) { + for (const packageInfo of NPM_ARTIFACT_PACKAGES) { + await mkdir(join(root, packageInfo.packageRoot), { recursive: true }); + await writeJson(join(root, packageInfo.packageRoot, 'package.json'), { + name: packageInfo.name, + version: '0.0.0-private', + private: + packageInfo.name === '@klo/context' + ? (options.contextPrivate ?? true) + : packageInfo.name === '@klo/cli' + ? (options.cliPrivate ?? true) + : true, + }); + } + + await mkdir(join(root, 'python', 'klo-sl'), { recursive: true }); + await mkdir(join(root, 'python', 'klo-daemon'), { recursive: true }); + + await writeFile( + join(root, 'python', 'klo-sl', 'pyproject.toml'), + ['[project]', 'name = "klo-sl"', 'version = "0.1.0"', ''].join('\n'), + ); + await writeFile( + join(root, 'python', 'klo-daemon', 'pyproject.toml'), + ['[project]', 'name = "klo-daemon"', 'version = "0.1.0"', ''].join('\n'), + ); +} + +async function writeUploadableArtifactFixtures(layout) { + await mkdir(layout.npmDir, { recursive: true }); + await mkdir(layout.pythonDir, { recursive: true }); + + const fileContents = new Map([ + ...NPM_ARTIFACT_PACKAGES.map((packageInfo) => [ + layout.npmTarballs[packageInfo.name], + `${packageInfo.name}-tarball`, + ]), + [join(layout.pythonDir, 'klo_sl-0.1.0-py3-none-any.whl'), 'klo-sl-wheel'], + [join(layout.pythonDir, 'klo_sl-0.1.0.tar.gz'), 'klo-sl-sdist'], + [join(layout.pythonDir, 'klo_daemon-0.1.0-py3-none-any.whl'), 'klo-daemon-wheel'], + [join(layout.pythonDir, 'klo_daemon-0.1.0.tar.gz'), 'klo-daemon-sdist'], + ]); + + for (const [path, contents] of fileContents) { + await writeFile(path, contents); + } +} + +function releasePolicy(overrides = {}) { + const { npm: npmOverrides = {}, python: pythonOverrides = {}, ...policyOverrides } = overrides; + + return { + schemaVersion: 1, + releaseMode: 'ci-artifact-only', + npm: { + publish: false, + registry: null, + packages: NPM_ARTIFACT_PACKAGES.map((packageInfo) => packageInfo.name), + ...npmOverrides, + }, + python: { + publish: false, + repository: null, + packages: ['klo-sl', 'klo-daemon'], + ...pythonOverrides, + }, + publishedPackageSmoke: { + packageName: null, + version: 'latest', + registry: null, + }, + requiredBeforePublishing: [ + 'Choose npm registry and package visibility.', + 'Choose Python package repository.', + 'Choose public release versions.', + 'Configure registry credentials outside source control.', + 'Choose release tag and provenance policy.', + ], + ...policyOverrides, + }; +} + +async function writePolicy(root, policy = releasePolicy()) { + await writeJson(releasePolicyPath(root), policy); +} + +async function writeReadyFixture(root, options = {}) { + await writeReleaseMetadataInputs(root, options); + await writePolicy(root, options.policy ?? releasePolicy()); + const layout = packageArtifactLayout(root); + await writeUploadableArtifactFixtures(layout); + await writeArtifactManifest(layout, new Date('2026-04-28T12:00:00.000Z'), { + sourceRevision: 'abc123', + }); + return layout; +} + +describe('release readiness policy', () => { + it('reads the checked release policy path from the KLO root', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-policy-test-')); + try { + const policy = releasePolicy(); + await writePolicy(root, policy); + + assert.equal(releasePolicyPath(root), join(root, 'release-policy.json')); + assert.deepEqual(await readReleasePolicy(root), policy); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('accepts the current ci-artifact-only policy, package metadata, and artifact manifest', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-ready-test-')); + try { + await writeReadyFixture(root); + + const report = await releaseReadinessReport(root); + + assert.deepEqual(report, { + schemaVersion: 1, + releaseMode: 'ci-artifact-only', + sourceRevision: 'abc123', + npmPublishEnabled: false, + pythonPublishEnabled: false, + packageNames: [...NPM_ARTIFACT_PACKAGES.map((packageInfo) => packageInfo.name), 'klo-sl', 'klo-daemon'], + publishedPackageSmokeGate: { + status: 'not_required', + script: 'pnpm run release:published-smoke', + reason: 'Published package smoke remains pending until release-policy.json enables npm registry publishing.', + configSource: null, + packageName: null, + version: 'latest', + registry: null, + }, + blockedPublishingDecisions: [ + 'Choose npm registry and package visibility.', + 'Choose Python package repository.', + 'Choose public release versions.', + 'Configure registry credentials outside source control.', + 'Choose release tag and provenance policy.', + ], + }); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('reports policy-controlled published package smoke config when present', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-smoke-config-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + publishedPackageSmoke: { + packageName: '@klo/cli-public', + version: '2026.5.8', + registry: 'https://registry.npmjs.org/', + }, + }), + }); + + const report = await releaseReadinessReport(root); + + assert.deepEqual(report.publishedPackageSmokeGate, { + status: 'not_required', + script: 'pnpm run release:published-smoke', + reason: 'Published package smoke remains pending until release-policy.json enables npm registry publishing.', + configSource: 'release-policy', + packageName: '@klo/cli-public', + version: '2026.5.8', + registry: 'https://registry.npmjs.org/', + }); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('reports required published package smoke when release mode requires it', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-smoke-required-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + releaseMode: 'published-package-smoke-required', + publishedPackageSmoke: { + packageName: '@klo/cli-public', + version: '2026.5.8', + registry: 'https://registry.npmjs.org/', + }, + requiredBeforePublishing: [], + }), + }); + + const report = await releaseReadinessReport(root); + + assert.deepEqual(report, { + schemaVersion: 1, + releaseMode: 'published-package-smoke-required', + sourceRevision: 'abc123', + npmPublishEnabled: false, + pythonPublishEnabled: false, + packageNames: [...NPM_ARTIFACT_PACKAGES.map((packageInfo) => packageInfo.name), 'klo-sl', 'klo-daemon'], + publishedPackageSmokeGate: { + status: 'required', + script: 'pnpm run release:published-smoke', + reason: 'Run the published package smoke before accepting the hybrid-search release.', + configSource: 'release-policy', + packageName: '@klo/cli-public', + version: '2026.5.8', + registry: 'https://registry.npmjs.org/', + }, + blockedPublishingDecisions: [], + }); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects required published smoke mode without a package name', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-smoke-required-missing-config-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + releaseMode: 'published-package-smoke-required', + requiredBeforePublishing: [], + }), + }); + + await assert.rejects( + () => releaseReadinessReport(root), + /published-package-smoke-required release mode requires release-policy\.json publishedPackageSmoke\.packageName/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects required published smoke mode while publishing decisions remain', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-smoke-required-blocked-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + releaseMode: 'published-package-smoke-required', + publishedPackageSmoke: { + packageName: '@klo/cli-public', + version: 'latest', + registry: null, + }, + }), + }); + + await assert.rejects( + () => releaseReadinessReport(root), + /published-package-smoke-required release mode requires requiredBeforePublishing to be empty/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects unsupported release modes', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-unsupported-mode-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + releaseMode: 'experimental-publish', + }), + }); + + await assert.rejects( + () => releaseReadinessReport(root), + /Unsupported release policy releaseMode: experimental-publish/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects publish-enabled npm policy while releaseMode is ci-artifact-only', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-npm-publish-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + npm: { publish: true, registry: 'https://registry.npmjs.org/' }, + }), + }); + + await assert.rejects( + () => releaseReadinessReport(root), + /ci-artifact-only policy must keep npm.publish false/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects publish-enabled Python policy while releaseMode is ci-artifact-only', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-python-publish-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + python: { publish: true, repository: 'pypi' }, + }), + }); + + await assert.rejects( + () => releaseReadinessReport(root), + /ci-artifact-only policy must keep python.publish false/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects unsafe release-policy published package smoke config', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-smoke-invalid-test-')); + try { + await writeReadyFixture(root, { + policy: releasePolicy({ + publishedPackageSmoke: { + packageName: '@klo/cli public', + version: 'latest', + registry: null, + }, + }), + }); + + await assert.rejects( + () => releaseReadinessReport(root), + /Invalid release-policy\.json publishedPackageSmoke\.packageName/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects a public npm package while releaseMode is ci-artifact-only', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-public-npm-test-')); + try { + await writeReadyFixture(root, { contextPrivate: false }); + + await assert.rejects( + () => releaseReadinessReport(root), + /ci-artifact-only policy npm package @klo\/context must remain private/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); + + it('rejects stale artifacts before reporting release readiness', async () => { + const root = await mkdtemp(join(tmpdir(), 'klo-release-stale-artifact-test-')); + try { + const layout = await writeReadyFixture(root); + await writeFile(layout.cliTarball, 'changed-cli-tarball'); + + await assert.rejects( + () => releaseReadinessReport(root), + /Artifact manifest files do not match artifact contents/, + ); + } finally { + await rm(root, { recursive: true, force: true }); + } + }); +}); diff --git a/scripts/run-klo.mjs b/scripts/run-klo.mjs new file mode 100644 index 00000000..fd8da9c6 --- /dev/null +++ b/scripts/run-klo.mjs @@ -0,0 +1,175 @@ +#!/usr/bin/env node + +import { spawn } from 'node:child_process'; +import { constants } from 'node:fs'; +import { access as fsAccess, readdir as fsReaddir, stat as fsStat } from 'node:fs/promises'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +function kloRootDir() { + return resolve(dirname(fileURLToPath(import.meta.url)), '..'); +} + +function cliBinPath(rootDir) { + return resolve(rootDir, 'packages', 'cli', 'dist', 'bin.js'); +} + +async function fileExists(path, access) { + try { + await access(path, constants.R_OK); + return true; + } catch { + return false; + } +} + +async function packageBuildInputPaths(rootDir, readdir) { + const paths = [resolve(rootDir, 'package.json'), resolve(rootDir, 'tsconfig.base.json')]; + let packageEntries = []; + try { + packageEntries = await readdir(resolve(rootDir, 'packages'), { withFileTypes: true }); + } catch { + return paths; + } + + for (const entry of packageEntries) { + if (!entry.isDirectory()) { + continue; + } + const packageDir = resolve(rootDir, 'packages', entry.name); + paths.push(resolve(packageDir, 'package.json'), resolve(packageDir, 'tsconfig.json'), resolve(packageDir, 'src')); + } + return paths; +} + +async function newestMtimeMs(path, fs) { + let stats; + try { + stats = await fs.stat(path); + } catch { + return 0; + } + if (!stats.isDirectory()) { + return stats.mtimeMs; + } + + let newest = stats.mtimeMs; + let entries = []; + try { + entries = await fs.readdir(path, { withFileTypes: true }); + } catch { + return newest; + } + for (const entry of entries) { + newest = Math.max(newest, await newestMtimeMs(resolve(path, entry.name), fs)); + } + return newest; +} + +async function isBuildStale(rootDir, binPath, fs) { + let binStats; + try { + binStats = await fs.stat(binPath); + } catch { + return true; + } + + const inputPaths = await packageBuildInputPaths(rootDir, fs.readdir); + for (const inputPath of inputPaths) { + if ((await newestMtimeMs(inputPath, fs)) > binStats.mtimeMs) { + return true; + } + } + return false; +} + +function isShellCompletionRequest(argv) { + return argv[0] === '__complete' || (argv[0] === 'dev' && argv[1] === '__complete'); +} + +async function runBuffered(execFile, stdout, stderr, command, args, options) { + try { + const result = await execFile(command, args, { cwd: options.cwd, env: options.env, maxBuffer: 1024 * 1024 * 16 }); + if (result.stdout) { + stdout.write(result.stdout); + } + if (result.stderr) { + stderr.write(result.stderr); + } + return 0; + } catch (error) { + if (typeof error?.stdout === 'string' && error.stdout.length > 0) { + stdout.write(error.stdout); + } + if (typeof error?.stderr === 'string' && error.stderr.length > 0) { + stderr.write(error.stderr); + } + return typeof error?.code === 'number' ? error.code : 1; + } +} + +function runInherited(command, args, options) { + return new Promise((resolveExitCode) => { + const child = spawn(command, args, { + cwd: options.cwd, + stdio: 'inherit', + env: options.env ?? process.env, + }); + + child.on('error', (error) => { + process.stderr.write(`${error.message}\n`); + resolveExitCode(1); + }); + child.on('exit', (code, signal) => { + if (code !== null) { + resolveExitCode(code); + return; + } + process.stderr.write(`Command terminated by signal ${signal ?? 'unknown'}\n`); + resolveExitCode(1); + }); + }); +} + +export async function runWorkspaceKlo(argv, options = {}) { + const cliArgv = argv[0] === '--' ? argv.slice(1) : argv; + const rootDir = options.rootDir ?? kloRootDir(); + const stdout = options.stdout ?? process.stdout; + const stderr = options.stderr ?? process.stderr; + const access = options.access ?? fsAccess; + const fs = { + stat: options.stat ?? fsStat, + readdir: options.readdir ?? fsReaddir, + }; + const binPath = cliBinPath(rootDir); + const runCommand = + options.runCommand ?? + (options.execFile + ? (command, args, commandOptions) => runBuffered(options.execFile, stdout, stderr, command, args, commandOptions) + : (command, args, commandOptions) => runInherited(command, args, commandOptions)); + const commandEnv = options.env; + + const binExists = await fileExists(binPath, access); + const skipStaleBuildCheck = binExists && isShellCompletionRequest(cliArgv); + const needsBuild = !binExists || (!skipStaleBuildCheck && (await isBuildStale(rootDir, binPath, fs))); + if (needsBuild) { + stderr.write( + binExists + ? 'KLO CLI build output is stale. Rebuilding it now with `pnpm run build`...\n' + : 'KLO CLI build output is missing. Building it now with `pnpm run build`...\n', + ); + const buildExitCode = await runCommand('pnpm', ['run', 'build'], { cwd: rootDir, env: commandEnv }); + if (buildExitCode !== 0) { + stderr.write( + '\nKLO CLI build failed. Run `pnpm run setup:dev` from the KLO directory, then retry this command.\n', + ); + return buildExitCode; + } + } + + return await runCommand(process.execPath, [binPath, ...cliArgv], { cwd: rootDir, env: commandEnv }); +} + +if (import.meta.url === pathToFileURL(process.argv[1]).href) { + process.exitCode = await runWorkspaceKlo(process.argv.slice(2)); +} diff --git a/scripts/run-klo.test.mjs b/scripts/run-klo.test.mjs new file mode 100644 index 00000000..f0175ce8 --- /dev/null +++ b/scripts/run-klo.test.mjs @@ -0,0 +1,243 @@ +import assert from 'node:assert/strict'; +import { test } from 'node:test'; +import { runWorkspaceKlo } from './run-klo.mjs'; + +function freshBuildFs() { + return { + stat: async (path) => ({ + mtimeMs: path.endsWith('/packages/cli/dist/bin.js') ? 2000 : 1000, + isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), + }), + readdir: async (path) => { + if (path.endsWith('/packages')) { + return [{ name: 'cli', isDirectory: () => true }]; + } + if (path.endsWith('/src')) { + return [{ name: 'bin.ts', isDirectory: () => false }]; + } + return []; + }, + }; +} + +test('runWorkspaceKlo runs the built CLI when it already exists', async () => { + const calls = []; + const logs = []; + const fs = freshBuildFs(); + + const exitCode = await runWorkspaceKlo(['--version'], { + rootDir: '/workspace/klo', + access: async () => undefined, + stat: fs.stat, + readdir: fs.readdir, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + return { stdout: '@klo/cli 0.0.0-private\n', stderr: '' }; + }, + stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, + stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, + }); + + assert.equal(exitCode, 0); + assert.deepEqual(calls, [ + { + command: process.execPath, + args: ['/workspace/klo/packages/cli/dist/bin.js', '--version'], + cwd: '/workspace/klo', + }, + ]); + assert.deepEqual(logs, [['stdout', '@klo/cli 0.0.0-private\n']]); +}); + +test('runWorkspaceKlo forwards a caller-provided environment to buffered commands', async () => { + const calls = []; + const fs = freshBuildFs(); + + const exitCode = await runWorkspaceKlo(['--version'], { + rootDir: '/workspace/klo', + access: async () => undefined, + stat: fs.stat, + readdir: fs.readdir, + env: { PATH: '/bin', GIT_CEILING_DIRECTORIES: '/workspace/klo/examples' }, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd, env: options.env }); + return { stdout: '@klo/cli 0.0.0-private\n', stderr: '' }; + }, + stdout: { write: () => undefined }, + stderr: { write: () => undefined }, + }); + + assert.equal(exitCode, 0); + assert.deepEqual(calls, [ + { + command: process.execPath, + args: ['/workspace/klo/packages/cli/dist/bin.js', '--version'], + cwd: '/workspace/klo', + env: { PATH: '/bin', GIT_CEILING_DIRECTORIES: '/workspace/klo/examples' }, + }, + ]); +}); + +test('runWorkspaceKlo drops a leading npm argument separator', async () => { + const calls = []; + const fs = freshBuildFs(); + + const exitCode = await runWorkspaceKlo(['--', 'connection', 'test', 'warehouse', '--help'], { + rootDir: '/workspace/klo', + access: async () => undefined, + stat: fs.stat, + readdir: fs.readdir, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + return { stdout: 'Usage: klo connection test\n', stderr: '' }; + }, + stdout: { write: () => undefined }, + stderr: { write: () => undefined }, + }); + + assert.equal(exitCode, 0); + assert.deepEqual(calls, [ + { + command: process.execPath, + args: ['/workspace/klo/packages/cli/dist/bin.js', 'connection', 'test', 'warehouse', '--help'], + cwd: '/workspace/klo', + }, + ]); +}); + +test('runWorkspaceKlo skips stale-build checks for shell completion when dist exists', async () => { + const calls = []; + let statCalls = 0; + + const exitCode = await runWorkspaceKlo(['dev', '__complete', '--shell', 'zsh', '--position', '2', '--', 'klo', ''], { + rootDir: '/workspace/klo', + access: async () => undefined, + stat: async (path) => { + statCalls += 1; + return { + mtimeMs: path.endsWith('/packages/cli/dist/bin.js') ? 2000 : 3000, + isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), + }; + }, + readdir: async () => { + throw new Error('completion should not scan source directories'); + }, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + return { stdout: 'connect:Add, list, test, and map data sources\n', stderr: '' }; + }, + stdout: { write: () => undefined }, + stderr: { write: () => undefined }, + }); + + assert.equal(exitCode, 0); + assert.equal(statCalls, 0); + assert.deepEqual(calls, [ + { + command: process.execPath, + args: [ + '/workspace/klo/packages/cli/dist/bin.js', + 'dev', + '__complete', + '--shell', + 'zsh', + '--position', + '2', + '--', + 'klo', + '', + ], + cwd: '/workspace/klo', + }, + ]); +}); + +test('runWorkspaceKlo builds the workspace CLI before running it when dist is missing', async () => { + const calls = []; + const logs = []; + let binExists = false; + + const exitCode = await runWorkspaceKlo(['setup', 'demo', '--mode', 'replay', '--no-input', '--viz'], { + rootDir: '/workspace/klo', + access: async () => { + if (!binExists) { + throw Object.assign(new Error('missing'), { code: 'ENOENT' }); + } + }, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + if (command === 'pnpm') { + binExists = true; + return { stdout: 'build ok\n', stderr: '' }; + } + return { stdout: 'Replay complete\n', stderr: '' }; + }, + stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, + stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, + }); + + assert.equal(exitCode, 0); + assert.deepEqual( + calls.map((call) => [call.command, call.args]), + [ + ['pnpm', ['run', 'build']], + [ + process.execPath, + ['/workspace/klo/packages/cli/dist/bin.js', 'setup', 'demo', '--mode', 'replay', '--no-input', '--viz'], + ], + ], + ); + assert.deepEqual(logs, [ + ['stderr', 'KLO CLI build output is missing. Building it now with `pnpm run build`...\n'], + ['stdout', 'build ok\n'], + ['stdout', 'Replay complete\n'], + ]); +}); + +test('runWorkspaceKlo rebuilds before running when workspace sources are newer than dist', async () => { + const calls = []; + const logs = []; + let sourceMtimeMs = 3000; + + const exitCode = await runWorkspaceKlo(['dev', 'scan', 'orbit', '--enrich'], { + rootDir: '/workspace/klo', + access: async () => undefined, + stat: async (path) => ({ + mtimeMs: path.endsWith('/packages/cli/dist/bin.js') ? 2000 : sourceMtimeMs, + isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), + }), + readdir: async (path) => { + if (path.endsWith('/packages')) { + return [{ name: 'context', isDirectory: () => true }]; + } + if (path.endsWith('/src')) { + return [{ name: 'scan.ts', isDirectory: () => false }]; + } + return []; + }, + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + if (command === 'pnpm') { + sourceMtimeMs = 1000; + return { stdout: 'build ok\n', stderr: '' }; + } + return { stdout: 'scan ok\n', stderr: '' }; + }, + stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, + stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, + }); + + assert.equal(exitCode, 0); + assert.deepEqual( + calls.map((call) => [call.command, call.args]), + [ + ['pnpm', ['run', 'build']], + [process.execPath, ['/workspace/klo/packages/cli/dist/bin.js', 'dev', 'scan', 'orbit', '--enrich']], + ], + ); + assert.deepEqual(logs, [ + ['stderr', 'KLO CLI build output is stale. Rebuilding it now with `pnpm run build`...\n'], + ['stdout', 'build ok\n'], + ['stdout', 'scan ok\n'], + ]); +}); diff --git a/scripts/setup-dev.mjs b/scripts/setup-dev.mjs new file mode 100644 index 00000000..aed414c6 --- /dev/null +++ b/scripts/setup-dev.mjs @@ -0,0 +1,74 @@ +#!/usr/bin/env node + +import { execFile as execFileCallback } from 'node:child_process'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; +import { promisify } from 'node:util'; + +const execFileAsync = promisify(execFileCallback); + +function kloRootDir() { + return resolve(dirname(fileURLToPath(import.meta.url)), '..'); +} + +function failureText(error) { + const stdout = typeof error?.stdout === 'string' ? error.stdout.trim() : ''; + const stderr = typeof error?.stderr === 'string' ? error.stderr.trim() : ''; + const message = error instanceof Error ? error.message.trim() : String(error); + return [stderr, stdout, message].find((line) => line.length > 0) ?? 'Command failed'; +} + +export async function runSetupDev(options = {}) { + const rootDir = options.rootDir ?? kloRootDir(); + const execFile = options.execFile ?? execFileAsync; + const log = options.log ?? ((line) => process.stdout.write(`${line}\n`)); + const phases = [ + { + name: 'dependency install', + command: 'pnpm', + args: ['install', '--frozen-lockfile'], + retry: 'pnpm install --frozen-lockfile', + }, + { + name: 'native SQLite rebuild', + command: 'pnpm', + args: ['run', 'native:rebuild'], + retry: 'pnpm run native:rebuild', + }, + { + name: 'TypeScript package build', + command: 'pnpm', + args: ['run', 'build'], + retry: 'pnpm run build', + }, + { + name: 'doctor setup', + command: process.execPath, + args: ['packages/cli/dist/bin.js', 'dev', 'doctor', 'setup', '--no-input'], + retry: 'pnpm run klo -- dev doctor setup --no-input', + }, + ]; + + for (const phase of phases) { + log(`RUN ${phase.name}: ${phase.command} ${phase.args.join(' ')}`); + try { + await execFile(phase.command, phase.args, { cwd: rootDir, maxBuffer: 1024 * 1024 }); + log(`PASS ${phase.name}`); + } catch (error) { + log(`FAIL ${phase.name}: ${failureText(error)}`); + log(`Retry: ${phase.retry}`); + return { ok: false, failedPhase: phase }; + } + } + + log('Workspace CLI: pnpm run klo -- --help'); + log('Optional global dev link: pnpm run link:dev'); + return { ok: true }; +} + +if (import.meta.url === pathToFileURL(process.argv[1]).href) { + const result = await runSetupDev(); + if (!result.ok) { + process.exitCode = 1; + } +} diff --git a/scripts/setup-dev.test.mjs b/scripts/setup-dev.test.mjs new file mode 100644 index 00000000..d27a5f05 --- /dev/null +++ b/scripts/setup-dev.test.mjs @@ -0,0 +1,56 @@ +import assert from 'node:assert/strict'; +import { test } from 'node:test'; +import { runSetupDev } from './setup-dev.mjs'; + +test('runSetupDev runs phased setup without global linking', async () => { + const calls = []; + const logs = []; + + const result = await runSetupDev({ + rootDir: '/workspace/klo', + execFile: async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd }); + return { stdout: `${command} ${args.join(' ')}`, stderr: '' }; + }, + log: (line) => logs.push(line), + }); + + assert.equal(result.ok, true); + assert.deepEqual( + calls.map((call) => [call.command, call.args]), + [ + ['pnpm', ['install', '--frozen-lockfile']], + ['pnpm', ['run', 'native:rebuild']], + ['pnpm', ['run', 'build']], + [process.execPath, ['packages/cli/dist/bin.js', 'dev', 'doctor', 'setup', '--no-input']], + ], + ); + assert.equal(calls.some((call) => call.args.includes('link')), false); + assert.equal(logs.some((line) => line.includes('PASS doctor setup')), true); +}); + +test('runSetupDev stops at the failed phase and prints a retry command', async () => { + const calls = []; + const logs = []; + + const result = await runSetupDev({ + rootDir: '/workspace/klo', + execFile: async (command, args) => { + calls.push({ command, args }); + if (args.includes('native:rebuild')) { + const error = new Error('native rebuild failed'); + error.stdout = ''; + error.stderr = 'better-sqlite3 rebuild failed'; + throw error; + } + return { stdout: '', stderr: '' }; + }, + log: (line) => logs.push(line), + }); + + assert.equal(result.ok, false); + assert.equal(result.failedPhase.name, 'native SQLite rebuild'); + assert.equal(result.failedPhase.retry, 'pnpm run native:rebuild'); + assert.equal(calls.length, 2); + assert.equal(logs.some((line) => line.includes('Retry: pnpm run native:rebuild')), true); +}); diff --git a/scripts/standalone-ci-workflow.test.mjs b/scripts/standalone-ci-workflow.test.mjs new file mode 100644 index 00000000..b1b90b57 --- /dev/null +++ b/scripts/standalone-ci-workflow.test.mjs @@ -0,0 +1,67 @@ +import assert from 'node:assert/strict'; +import { readFile } from 'node:fs/promises'; +import { describe, it } from 'node:test'; + +async function readText(relativePath) { + return readFile(new URL(`../${relativePath}`, import.meta.url), 'utf8'); +} + +function assertIncludesAll(text, values) { + for (const value of values) { + assert.match(text, new RegExp(value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); + } +} + +describe('standalone KLO CI workflow', () => { + it('runs the package checks from a filtered repository root', async () => { + const workflow = await readText('.github/workflows/ci.yml'); + + assert.match(workflow, /^name: KLO CI/m); + assertIncludesAll(workflow, [ + 'permissions:', + 'contents: read', + 'actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd', + 'pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061', + 'actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238', + 'node-version: "24"', + 'cache-dependency-path: "pnpm-lock.yaml"', + 'pnpm install --frozen-lockfile', + 'pnpm run check', + 'actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405', + 'python-version: "3.13"', + 'astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b', + 'cache-dependency-glob: "uv.lock"', + 'uv sync --all-packages', + 'uv run pytest', + 'pnpm run artifacts:check', + ]); + + assert.doesNotMatch(workflow, /sparse-checkout/); + assert.doesNotMatch(workflow, /cd klo/); + assert.doesNotMatch(workflow, /klo\/pnpm-lock\.yaml/); + assert.doesNotMatch(workflow, /klo\/uv\.lock/); + }); + + it('uploads verified artifacts from root-relative paths', async () => { + const workflow = await readText('.github/workflows/ci.yml'); + + assertIncludesAll(workflow, [ + 'actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f', + 'name: klo-package-artifacts-${{ github.sha }}', + 'dist/artifacts/manifest.json', + 'dist/artifacts/npm/*.tgz', + 'dist/artifacts/python/*.whl', + 'dist/artifacts/python/*.tar.gz', + 'if-no-files-found: error', + 'retention-days: 7', + ]); + + assert.doesNotMatch(workflow, /klo\/dist\/artifacts/); + }); + + it('syncs injected workspace packages after package builds', async () => { + const workspace = await readText('pnpm-workspace.yaml'); + + assert.match(workspace, /syncInjectedDepsAfterScripts:\n\s+- build/); + }); +}); diff --git a/scripts/validate-llm-debug-jsonl.mjs b/scripts/validate-llm-debug-jsonl.mjs new file mode 100644 index 00000000..27c70a28 --- /dev/null +++ b/scripts/validate-llm-debug-jsonl.mjs @@ -0,0 +1,98 @@ +#!/usr/bin/env node + +import { readFileSync } from 'node:fs'; + +const [backend, filePath] = process.argv.slice(2); + +function usage() { + process.stderr.write('Usage: node klo/scripts/validate-llm-debug-jsonl.mjs anthropic|vertex /path/to/debug.jsonl\n'); +} + +function fail(message) { + process.stderr.write(`${message}\n`); + process.exit(1); +} + +if (!['anthropic', 'vertex'].includes(backend) || !filePath) { + usage(); + process.exit(2); +} + +const raw = readFileSync(filePath, 'utf8').trim(); +if (!raw) { + fail(`debug JSONL is empty: ${filePath}`); +} + +const records = raw.split(/\n+/).map((line, index) => { + try { + return JSON.parse(line); + } catch (error) { + throw new Error(`line ${index + 1} is not valid JSON: ${error.message}`); + } +}); + +const serialized = JSON.stringify(records); +const bannedKeyPattern = /"(content|text|prompt|toolSchema|parameters|apiKey|api_key|password|token)"\s*:/i; +if (bannedKeyPattern.test(serialized)) { + fail('debug JSONL contains a prompt, schema, credential, or token-shaped field'); +} + +const providerOptionEntries = records.flatMap((record) => { + if (!Array.isArray(record.providerOptions)) { + throw new Error(`record ${record.operationName ?? ''} is missing providerOptions array`); + } + return record.providerOptions; +}); + +const cacheMarkerEntries = providerOptionEntries.filter((entry) => { + return JSON.stringify(entry.providerOptions).includes('"cacheControl"'); +}); + +if (cacheMarkerEntries.length === 0) { + fail('no cacheControl providerOptions were recorded'); +} + +const requiredMarkerTargets = ['message', 'message-part', 'tool']; +const markerTargets = new Set(cacheMarkerEntries.map((entry) => entry.target)); +for (const target of requiredMarkerTargets) { + if (!markerTargets.has(target)) { + fail(`missing cacheControl marker target: ${target}`); + } +} + +const ttlValues = new Set(); +for (const marker of cacheMarkerEntries) { + const markerJson = JSON.stringify(marker.providerOptions); + for (const match of markerJson.matchAll(/"ttl":"([^"]+)"/g)) { + ttlValues.add(match[1]); + } +} + +if (ttlValues.size === 0) { + fail('cacheControl markers did not expose ttl values'); +} + +for (const ttl of ttlValues) { + if (ttl !== '1h' && ttl !== '5m') { + fail(`unexpected cache ttl: ${ttl}`); + } +} + +if (backend === 'vertex' && !ttlValues.has('1h')) { + fail('vertex debug capture did not include a default 1h cache marker'); +} + +if (backend === 'vertex' && serialized.includes('extended-cache-ttl-2025-04-11')) { + fail('vertex debug capture included the direct-Anthropic extended cache TTL beta header'); +} + +process.stdout.write( + `${JSON.stringify({ + backend, + records: records.length, + providerOptionEntries: providerOptionEntries.length, + cacheMarkerEntries: cacheMarkerEntries.length, + markerTargets: [...markerTargets].sort(), + ttlValues: [...ttlValues].sort(), + })}\n`, +); diff --git a/scripts/validate-llm-debug-jsonl.test.mjs b/scripts/validate-llm-debug-jsonl.test.mjs new file mode 100644 index 00000000..d5a049c5 --- /dev/null +++ b/scripts/validate-llm-debug-jsonl.test.mjs @@ -0,0 +1,112 @@ +import { spawnSync } from 'node:child_process'; +import assert from 'node:assert/strict'; +import { mkdtempSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { test } from 'node:test'; + +const scriptPath = new URL('./validate-llm-debug-jsonl.mjs', import.meta.url).pathname; + +function runValidator(args) { + return spawnSync(process.execPath, [scriptPath, ...args], { + encoding: 'utf8', + }); +} + +function writeDebugJsonl(records) { + const dir = mkdtempSync(join(tmpdir(), 'klo-llm-debug-validator-')); + const filePath = join(dir, 'debug.jsonl'); + writeFileSync(filePath, `${records.map((record) => JSON.stringify(record)).join('\n')}\n`, 'utf8'); + return filePath; +} + +const validRecord = { + operationName: 'ingest-bundle-wu', + modelRole: 'candidateExtraction', + modelId: 'claude-sonnet-4-6', + messageCount: 2, + toolNames: ['emit_candidate'], + providerOptions: [ + { + target: 'message', + index: 0, + role: 'system', + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, + }, + { + target: 'message-part', + index: 1, + role: 'user', + partIndex: 0, + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } } }, + }, + { + target: 'tool', + name: 'emit_candidate', + providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, + }, + ], +}; + +test('prints usage and exits 2 when required arguments are missing', () => { + const result = runValidator([]); + + assert.equal(result.status, 2); + assert.match(result.stderr, /Usage: node klo\/scripts\/validate-llm-debug-jsonl\.mjs anthropic\|vertex/); +}); + +test('accepts sanitized debug JSONL with message, message-part, and tool cache markers', () => { + const filePath = writeDebugJsonl([validRecord]); + const result = runValidator(['anthropic', filePath]); + + assert.equal(result.status, 0, result.stderr); + const parsed = JSON.parse(result.stdout); + assert.equal(parsed.backend, 'anthropic'); + assert.equal(parsed.records, 1); + assert.equal(parsed.providerOptionEntries, 3); + assert.equal(parsed.cacheMarkerEntries, 3); + assert.deepEqual(parsed.markerTargets, ['message', 'message-part', 'tool']); + assert.deepEqual(parsed.ttlValues, ['1h', '5m']); +}); + +test('rejects debug JSONL that lacks nested message-part cache marker evidence', () => { + const filePath = writeDebugJsonl([ + { + ...validRecord, + providerOptions: validRecord.providerOptions.filter((entry) => entry.target !== 'message-part'), + }, + ]); + const result = runValidator(['anthropic', filePath]); + + assert.notEqual(result.status, 0); + assert.match(result.stderr, /missing cacheControl marker target: message-part/); +}); + +test('rejects prompt-shaped fields in debug JSONL', () => { + const filePath = writeDebugJsonl([{ ...validRecord, text: 'SECRET PROMPT' }]); + const result = runValidator(['anthropic', filePath]); + + assert.notEqual(result.status, 0); + assert.match(result.stderr, /prompt, schema, credential, or token-shaped field/); +}); + +test('rejects direct-Anthropic extended cache beta header in Vertex debug summaries', () => { + const filePath = writeDebugJsonl([ + { + ...validRecord, + providerOptions: [ + ...validRecord.providerOptions, + { + target: 'message', + index: 0, + role: 'system', + providerOptions: { header: 'extended-cache-ttl-2025-04-11' }, + }, + ], + }, + ]); + const result = runValidator(['vertex', filePath]); + + assert.notEqual(result.status, 0); + assert.match(result.stderr, /direct-Anthropic extended cache TTL beta header/); +}); diff --git a/tsconfig.base.json b/tsconfig.base.json new file mode 100644 index 00000000..1976d77e --- /dev/null +++ b/tsconfig.base.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "compilerOptions": { + "module": "NodeNext", + "moduleResolution": "NodeNext", + "target": "ES2023", + "lib": ["ES2023"], + "declaration": true, + "strict": true, + "strictNullChecks": true, + "esModuleInterop": true, + "isolatedModules": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "types": ["node"] + } +} diff --git a/uv.lock b/uv.lock new file mode 100644 index 00000000..f44ea7d3 --- /dev/null +++ b/uv.lock @@ -0,0 +1,1723 @@ +version = 1 +revision = 3 +requires-python = ">=3.13" +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version >= '3.14' and sys_platform == 'darwin'", + "python_full_version < '3.14' and sys_platform == 'darwin'", +] + +[manifest] +members = [ + "klo-daemon", + "klo-sl", + "klo-workspace", +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + +[[package]] +name = "certifi" +version = "2026.4.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/ee/6caf7a40c36a1220410afe15a1cc64993a1f864871f698c0f93acb72842a/certifi-2026.4.22.tar.gz", hash = "sha256:8d455352a37b71bf76a79caa83a3d6c25afee4a385d632127b6afb3963f1c580", size = 137077, upload-time = "2026-04-22T11:26:11.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707, upload-time = "2026-04-22T11:26:09.372Z" }, +] + +[[package]] +name = "cfgv" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627, upload-time = "2026-04-02T09:26:45.198Z" }, + { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008, upload-time = "2026-04-02T09:26:46.824Z" }, + { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303, upload-time = "2026-04-02T09:26:48.397Z" }, + { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282, upload-time = "2026-04-02T09:26:49.684Z" }, + { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595, upload-time = "2026-04-02T09:26:50.915Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986, upload-time = "2026-04-02T09:26:52.197Z" }, + { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711, upload-time = "2026-04-02T09:26:53.49Z" }, + { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036, upload-time = "2026-04-02T09:26:54.975Z" }, + { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998, upload-time = "2026-04-02T09:26:56.303Z" }, + { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056, upload-time = "2026-04-02T09:26:57.554Z" }, + { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537, upload-time = "2026-04-02T09:26:58.843Z" }, + { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176, upload-time = "2026-04-02T09:27:00.437Z" }, + { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723, upload-time = "2026-04-02T09:27:02.021Z" }, + { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085, upload-time = "2026-04-02T09:27:03.192Z" }, + { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819, upload-time = "2026-04-02T09:27:04.454Z" }, + { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915, upload-time = "2026-04-02T09:27:05.971Z" }, + { url = "https://files.pythonhosted.org/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234, upload-time = "2026-04-02T09:27:07.194Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042, upload-time = "2026-04-02T09:27:08.749Z" }, + { url = "https://files.pythonhosted.org/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706, upload-time = "2026-04-02T09:27:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727, upload-time = "2026-04-02T09:27:11.175Z" }, + { url = "https://files.pythonhosted.org/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882, upload-time = "2026-04-02T09:27:12.446Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860, upload-time = "2026-04-02T09:27:13.721Z" }, + { url = "https://files.pythonhosted.org/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564, upload-time = "2026-04-02T09:27:15.272Z" }, + { url = "https://files.pythonhosted.org/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276, upload-time = "2026-04-02T09:27:16.834Z" }, + { url = "https://files.pythonhosted.org/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238, upload-time = "2026-04-02T09:27:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189, upload-time = "2026-04-02T09:27:19.445Z" }, + { url = "https://files.pythonhosted.org/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352, upload-time = "2026-04-02T09:27:20.79Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024, upload-time = "2026-04-02T09:27:22.063Z" }, + { url = "https://files.pythonhosted.org/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869, upload-time = "2026-04-02T09:27:23.486Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541, upload-time = "2026-04-02T09:27:25.146Z" }, + { url = "https://files.pythonhosted.org/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634, upload-time = "2026-04-02T09:27:26.642Z" }, + { url = "https://files.pythonhosted.org/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384, upload-time = "2026-04-02T09:27:28.271Z" }, + { url = "https://files.pythonhosted.org/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133, upload-time = "2026-04-02T09:27:29.474Z" }, + { url = "https://files.pythonhosted.org/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257, upload-time = "2026-04-02T09:27:30.793Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851, upload-time = "2026-04-02T09:27:32.44Z" }, + { url = "https://files.pythonhosted.org/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393, upload-time = "2026-04-02T09:27:34.03Z" }, + { url = "https://files.pythonhosted.org/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251, upload-time = "2026-04-02T09:27:35.369Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609, upload-time = "2026-04-02T09:27:36.661Z" }, + { url = "https://files.pythonhosted.org/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014, upload-time = "2026-04-02T09:27:38.019Z" }, + { url = "https://files.pythonhosted.org/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979, upload-time = "2026-04-02T09:27:39.37Z" }, + { url = "https://files.pythonhosted.org/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238, upload-time = "2026-04-02T09:27:40.722Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110, upload-time = "2026-04-02T09:27:42.33Z" }, + { url = "https://files.pythonhosted.org/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824, upload-time = "2026-04-02T09:27:43.924Z" }, + { url = "https://files.pythonhosted.org/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103, upload-time = "2026-04-02T09:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194, upload-time = "2026-04-02T09:27:46.706Z" }, + { url = "https://files.pythonhosted.org/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827, upload-time = "2026-04-02T09:27:48.053Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168, upload-time = "2026-04-02T09:27:49.795Z" }, + { url = "https://files.pythonhosted.org/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018, upload-time = "2026-04-02T09:27:51.116Z" }, + { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" }, +] + +[[package]] +name = "click" +version = "8.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bb/63/f9e1ea081ce35720d8b92acde70daaedace594dc93b693c869e0d5910718/click-8.3.3.tar.gz", hash = "sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2", size = 328061, upload-time = "2026-04-22T15:11:27.506Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/44/c1221527f6a71a01ec6fbad7fa78f1d50dfa02217385cf0fa3eec7087d59/click-8.3.3-py3-none-any.whl", hash = "sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613", size = 110502, upload-time = "2026-04-22T15:11:25.044Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "coverage" +version = "7.13.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/e0/70553e3000e345daff267cec284ce4cbf3fc141b6da229ac52775b5428f1/coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179", size = 915967, upload-time = "2026-03-17T10:33:18.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/8c/74fedc9663dcf168b0a059d4ea756ecae4da77a489048f94b5f512a8d0b3/coverage-7.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1", size = 219576, upload-time = "2026-03-17T10:31:09.045Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c9/44fb661c55062f0818a6ffd2685c67aa30816200d5f2817543717d4b92eb/coverage-7.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3", size = 219942, upload-time = "2026-03-17T10:31:10.708Z" }, + { url = "https://files.pythonhosted.org/packages/5f/13/93419671cee82b780bab7ea96b67c8ef448f5f295f36bf5031154ec9a790/coverage-7.13.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26", size = 250935, upload-time = "2026-03-17T10:31:12.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/68/1666e3a4462f8202d836920114fa7a5ee9275d1fa45366d336c551a162dd/coverage-7.13.5-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3", size = 253541, upload-time = "2026-03-17T10:31:14.247Z" }, + { url = "https://files.pythonhosted.org/packages/4e/5e/3ee3b835647be646dcf3c65a7c6c18f87c27326a858f72ab22c12730773d/coverage-7.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b", size = 254780, upload-time = "2026-03-17T10:31:16.193Z" }, + { url = "https://files.pythonhosted.org/packages/44/b3/cb5bd1a04cfcc49ede6cd8409d80bee17661167686741e041abc7ee1b9a9/coverage-7.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a", size = 256912, upload-time = "2026-03-17T10:31:17.89Z" }, + { url = "https://files.pythonhosted.org/packages/1b/66/c1dceb7b9714473800b075f5c8a84f4588f887a90eb8645282031676e242/coverage-7.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969", size = 251165, upload-time = "2026-03-17T10:31:19.605Z" }, + { url = "https://files.pythonhosted.org/packages/b7/62/5502b73b97aa2e53ea22a39cf8649ff44827bef76d90bf638777daa27a9d/coverage-7.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161", size = 252908, upload-time = "2026-03-17T10:31:21.312Z" }, + { url = "https://files.pythonhosted.org/packages/7d/37/7792c2d69854397ca77a55c4646e5897c467928b0e27f2d235d83b5d08c6/coverage-7.13.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15", size = 250873, upload-time = "2026-03-17T10:31:23.565Z" }, + { url = "https://files.pythonhosted.org/packages/a3/23/bc866fb6163be52a8a9e5d708ba0d3b1283c12158cefca0a8bbb6e247a43/coverage-7.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1", size = 255030, upload-time = "2026-03-17T10:31:25.58Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8b/ef67e1c222ef49860701d346b8bbb70881bef283bd5f6cbba68a39a086c7/coverage-7.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6", size = 250694, upload-time = "2026-03-17T10:31:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/46/0d/866d1f74f0acddbb906db212e096dee77a8e2158ca5e6bb44729f9d93298/coverage-7.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17", size = 252469, upload-time = "2026-03-17T10:31:29.472Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f5/be742fec31118f02ce42b21c6af187ad6a344fed546b56ca60caacc6a9a0/coverage-7.13.5-cp313-cp313-win32.whl", hash = "sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85", size = 222112, upload-time = "2026-03-17T10:31:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/66/40/7732d648ab9d069a46e686043241f01206348e2bbf128daea85be4d6414b/coverage-7.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b", size = 222923, upload-time = "2026-03-17T10:31:33.633Z" }, + { url = "https://files.pythonhosted.org/packages/48/af/fea819c12a095781f6ccd504890aaddaf88b8fab263c4940e82c7b770124/coverage-7.13.5-cp313-cp313-win_arm64.whl", hash = "sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664", size = 221540, upload-time = "2026-03-17T10:31:35.445Z" }, + { url = "https://files.pythonhosted.org/packages/23/d2/17879af479df7fbbd44bd528a31692a48f6b25055d16482fdf5cdb633805/coverage-7.13.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d", size = 220262, upload-time = "2026-03-17T10:31:37.184Z" }, + { url = "https://files.pythonhosted.org/packages/5b/4c/d20e554f988c8f91d6a02c5118f9abbbf73a8768a3048cb4962230d5743f/coverage-7.13.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0", size = 220617, upload-time = "2026-03-17T10:31:39.245Z" }, + { url = "https://files.pythonhosted.org/packages/29/9c/f9f5277b95184f764b24e7231e166dfdb5780a46d408a2ac665969416d61/coverage-7.13.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806", size = 261912, upload-time = "2026-03-17T10:31:41.324Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f6/7f1ab39393eeb50cfe4747ae8ef0e4fc564b989225aa1152e13a180d74f8/coverage-7.13.5-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3", size = 263987, upload-time = "2026-03-17T10:31:43.724Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/62c084fb489ed9c6fbdf57e006752e7c516ea46fd690e5ed8b8617c7d52e/coverage-7.13.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9", size = 266416, upload-time = "2026-03-17T10:31:45.769Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f6/df63d8660e1a0bff6125947afda112a0502736f470d62ca68b288ea762d8/coverage-7.13.5-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd", size = 267558, upload-time = "2026-03-17T10:31:48.293Z" }, + { url = "https://files.pythonhosted.org/packages/5b/02/353ca81d36779bd108f6d384425f7139ac3c58c750dcfaafe5d0bee6436b/coverage-7.13.5-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606", size = 261163, upload-time = "2026-03-17T10:31:50.125Z" }, + { url = "https://files.pythonhosted.org/packages/2c/16/2e79106d5749bcaf3aee6d309123548e3276517cd7851faa8da213bc61bf/coverage-7.13.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e", size = 263981, upload-time = "2026-03-17T10:31:51.961Z" }, + { url = "https://files.pythonhosted.org/packages/29/c7/c29e0c59ffa6942030ae6f50b88ae49988e7e8da06de7ecdbf49c6d4feae/coverage-7.13.5-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0", size = 261604, upload-time = "2026-03-17T10:31:53.872Z" }, + { url = "https://files.pythonhosted.org/packages/40/48/097cdc3db342f34006a308ab41c3a7c11c3f0d84750d340f45d88a782e00/coverage-7.13.5-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87", size = 265321, upload-time = "2026-03-17T10:31:55.997Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/4994af354689e14fd03a75f8ec85a9a68d94e0188bbdab3fc1516b55e512/coverage-7.13.5-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479", size = 260502, upload-time = "2026-03-17T10:31:58.308Z" }, + { url = "https://files.pythonhosted.org/packages/22/c6/9bb9ef55903e628033560885f5c31aa227e46878118b63ab15dc7ba87797/coverage-7.13.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2", size = 262688, upload-time = "2026-03-17T10:32:00.141Z" }, + { url = "https://files.pythonhosted.org/packages/14/4f/f5df9007e50b15e53e01edea486814783a7f019893733d9e4d6caad75557/coverage-7.13.5-cp313-cp313t-win32.whl", hash = "sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a", size = 222788, upload-time = "2026-03-17T10:32:02.246Z" }, + { url = "https://files.pythonhosted.org/packages/e1/98/aa7fccaa97d0f3192bec013c4e6fd6d294a6ed44b640e6bb61f479e00ed5/coverage-7.13.5-cp313-cp313t-win_amd64.whl", hash = "sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819", size = 223851, upload-time = "2026-03-17T10:32:04.416Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8b/e5c469f7352651e5f013198e9e21f97510b23de957dd06a84071683b4b60/coverage-7.13.5-cp313-cp313t-win_arm64.whl", hash = "sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911", size = 222104, upload-time = "2026-03-17T10:32:06.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/77/39703f0d1d4b478bfd30191d3c14f53caf596fac00efb3f8f6ee23646439/coverage-7.13.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f", size = 219621, upload-time = "2026-03-17T10:32:08.589Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3e/51dff36d99ae14639a133d9b164d63e628532e2974d8b1edb99dd1ebc733/coverage-7.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e", size = 219953, upload-time = "2026-03-17T10:32:10.507Z" }, + { url = "https://files.pythonhosted.org/packages/6a/6c/1f1917b01eb647c2f2adc9962bd66c79eb978951cab61bdc1acab3290c07/coverage-7.13.5-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a", size = 250992, upload-time = "2026-03-17T10:32:12.41Z" }, + { url = "https://files.pythonhosted.org/packages/22/e5/06b1f88f42a5a99df42ce61208bdec3bddb3d261412874280a19796fc09c/coverage-7.13.5-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510", size = 253503, upload-time = "2026-03-17T10:32:14.449Z" }, + { url = "https://files.pythonhosted.org/packages/80/28/2a148a51e5907e504fa7b85490277734e6771d8844ebcc48764a15e28155/coverage-7.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247", size = 254852, upload-time = "2026-03-17T10:32:16.56Z" }, + { url = "https://files.pythonhosted.org/packages/61/77/50e8d3d85cc0b7ebe09f30f151d670e302c7ff4a1bf6243f71dd8b0981fa/coverage-7.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6", size = 257161, upload-time = "2026-03-17T10:32:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c4/b5fd1d4b7bf8d0e75d997afd3925c59ba629fc8616f1b3aae7605132e256/coverage-7.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0", size = 251021, upload-time = "2026-03-17T10:32:21.344Z" }, + { url = "https://files.pythonhosted.org/packages/f8/66/6ea21f910e92d69ef0b1c3346ea5922a51bad4446c9126db2ae96ee24c4c/coverage-7.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882", size = 252858, upload-time = "2026-03-17T10:32:23.506Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ea/879c83cb5d61aa2a35fb80e72715e92672daef8191b84911a643f533840c/coverage-7.13.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740", size = 250823, upload-time = "2026-03-17T10:32:25.516Z" }, + { url = "https://files.pythonhosted.org/packages/8a/fb/616d95d3adb88b9803b275580bdeee8bd1b69a886d057652521f83d7322f/coverage-7.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16", size = 255099, upload-time = "2026-03-17T10:32:27.944Z" }, + { url = "https://files.pythonhosted.org/packages/1c/93/25e6917c90ec1c9a56b0b26f6cad6408e5f13bb6b35d484a0d75c9cf000d/coverage-7.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0", size = 250638, upload-time = "2026-03-17T10:32:29.914Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7b/dc1776b0464145a929deed214aef9fb1493f159b59ff3c7eeeedf91eddd0/coverage-7.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0", size = 252295, upload-time = "2026-03-17T10:32:31.981Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fb/99cbbc56a26e07762a2740713f3c8f9f3f3106e3a3dd8cc4474954bccd34/coverage-7.13.5-cp314-cp314-win32.whl", hash = "sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc", size = 222360, upload-time = "2026-03-17T10:32:34.233Z" }, + { url = "https://files.pythonhosted.org/packages/8d/b7/4758d4f73fb536347cc5e4ad63662f9d60ba9118cb6785e9616b2ce5d7fa/coverage-7.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633", size = 223174, upload-time = "2026-03-17T10:32:36.369Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f2/24d84e1dfe70f8ac9fdf30d338239860d0d1d5da0bda528959d0ebc9da28/coverage-7.13.5-cp314-cp314-win_arm64.whl", hash = "sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8", size = 221739, upload-time = "2026-03-17T10:32:38.736Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/4a168591057b3668c2428bff25dd3ebc21b629d666d90bcdfa0217940e84/coverage-7.13.5-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b", size = 220351, upload-time = "2026-03-17T10:32:41.196Z" }, + { url = "https://files.pythonhosted.org/packages/f5/21/1fd5c4dbfe4a58b6b99649125635df46decdfd4a784c3cd6d410d303e370/coverage-7.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c", size = 220612, upload-time = "2026-03-17T10:32:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/d6/fe/2a924b3055a5e7e4512655a9d4609781b0d62334fa0140c3e742926834e2/coverage-7.13.5-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9", size = 261985, upload-time = "2026-03-17T10:32:45.514Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0d/c8928f2bd518c45990fe1a2ab8db42e914ef9b726c975facc4282578c3eb/coverage-7.13.5-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29", size = 264107, upload-time = "2026-03-17T10:32:47.971Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ae/4ae35bbd9a0af9d820362751f0766582833c211224b38665c0f8de3d487f/coverage-7.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607", size = 266513, upload-time = "2026-03-17T10:32:50.1Z" }, + { url = "https://files.pythonhosted.org/packages/9c/20/d326174c55af36f74eac6ae781612d9492f060ce8244b570bb9d50d9d609/coverage-7.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90", size = 267650, upload-time = "2026-03-17T10:32:52.391Z" }, + { url = "https://files.pythonhosted.org/packages/7a/5e/31484d62cbd0eabd3412e30d74386ece4a0837d4f6c3040a653878bfc019/coverage-7.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3", size = 261089, upload-time = "2026-03-17T10:32:54.544Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d8/49a72d6de146eebb0b7e48cc0f4bc2c0dd858e3d4790ab2b39a2872b62bd/coverage-7.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab", size = 263982, upload-time = "2026-03-17T10:32:56.803Z" }, + { url = "https://files.pythonhosted.org/packages/06/3b/0351f1bd566e6e4dd39e978efe7958bde1d32f879e85589de147654f57bb/coverage-7.13.5-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562", size = 261579, upload-time = "2026-03-17T10:32:59.466Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ce/796a2a2f4017f554d7810f5c573449b35b1e46788424a548d4d19201b222/coverage-7.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2", size = 265316, upload-time = "2026-03-17T10:33:01.847Z" }, + { url = "https://files.pythonhosted.org/packages/3d/16/d5ae91455541d1a78bc90abf495be600588aff8f6db5c8b0dae739fa39c9/coverage-7.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea", size = 260427, upload-time = "2026-03-17T10:33:03.945Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/07f413dba62db21fb3fad5d0de013a50e073cc4e2dc4306e770360f6dfc8/coverage-7.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a", size = 262745, upload-time = "2026-03-17T10:33:06.285Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/d792371332eb4663115becf4bad47e047d16234b1aff687b1b18c58d60ae/coverage-7.13.5-cp314-cp314t-win32.whl", hash = "sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215", size = 223146, upload-time = "2026-03-17T10:33:08.756Z" }, + { url = "https://files.pythonhosted.org/packages/db/51/37221f59a111dca5e85be7dbf09696323b5b9f13ff65e0641d535ed06ea8/coverage-7.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43", size = 224254, upload-time = "2026-03-17T10:33:11.174Z" }, + { url = "https://files.pythonhosted.org/packages/54/83/6acacc889de8987441aa7d5adfbdbf33d288dad28704a67e574f1df9bcbb/coverage-7.13.5-cp314-cp314t-win_arm64.whl", hash = "sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45", size = 222276, upload-time = "2026-03-17T10:33:13.466Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, +] + +[[package]] +name = "distlib" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, +] + +[[package]] +name = "duckdb" +version = "1.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0c/66/744b4931b799a42f8cb9bc7a6f169e7b8e51195b62b246db407fd90bf15f/duckdb-1.5.2.tar.gz", hash = "sha256:638da0d5102b6cb6f7d47f83d0600708ac1d3cb46c5e9aaabc845f9ba4d69246", size = 18017166, upload-time = "2026-04-13T11:30:09.065Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/f2/e3d742808f138d374be4bb516fade3d1f33749b813650810ab7885cdc363/duckdb-1.5.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:4420b3f47027a7849d0e1815532007f377fa95ee5810b47ea717d35525c12f79", size = 30064879, upload-time = "2026-04-13T11:29:30.763Z" }, + { url = "https://files.pythonhosted.org/packages/72/0d/f3dc1cf97e1267ca15e4307d456f96ce583961f0703fd75e62b2ad8d64fa/duckdb-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bb42e6ed543902e14eae647850da24103a89f0bc2587dec5601b1c1f213bd2ed", size = 15969327, upload-time = "2026-04-13T11:29:33.481Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e0/d5418def53ae4e05a63075705ff44ed5af5a1a5932627eb2b600c5df1c93/duckdb-1.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:98c0535cd6d901f61a5ea3c2e26a1fd28482953d794deb183daf568e3aa5dda6", size = 14225107, upload-time = "2026-04-13T11:29:35.882Z" }, + { url = "https://files.pythonhosted.org/packages/16/a7/15aaa59dbecc35e9711980fcdbf525b32a52470b32d18ef678193a146213/duckdb-1.5.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:486c862bf7f163c0110b6d85b3e5c031d224a671cca468f12ebb1d3a348f6b39", size = 19313433, upload-time = "2026-04-13T11:29:38.367Z" }, + { url = "https://files.pythonhosted.org/packages/bd/21/d903cc63a5140c822b7b62b373a87dc557e60c29b321dfb435061c5e67cf/duckdb-1.5.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70631c847ca918ee710ec874241b00cf9d2e5be90762cbb2a0389f17823c08f7", size = 21429837, upload-time = "2026-04-13T11:29:41.135Z" }, + { url = "https://files.pythonhosted.org/packages/e3/0a/b770d1f60c70597302130d6247f418549b7094251a02348fbaf1c7e147ae/duckdb-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:52a21823f3fbb52f0f0e5425e20b07391ad882464b955879499b5ff0b45a376b", size = 13107699, upload-time = "2026-04-13T11:29:43.905Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cf/e200fe431d700962d1a908d2ce89f53ccee1cc8db260174ae663ba09686b/duckdb-1.5.2-cp313-cp313-win_arm64.whl", hash = "sha256:411ad438bd4140f189a10e7f515781335962c5d18bd07837dc6d202e3985253d", size = 13927646, upload-time = "2026-04-13T11:29:46.598Z" }, + { url = "https://files.pythonhosted.org/packages/83/a1/f6286c67726cc1ea60a6e3c0d9fbc66527dde24ae089a51bbe298b13ca78/duckdb-1.5.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:6b0fe75c148000f060aa1a27b293cacc0ea08cc1cad724fbf2143d56070a3785", size = 30078598, upload-time = "2026-04-13T11:29:49.828Z" }, + { url = "https://files.pythonhosted.org/packages/de/6a/59febb02f21a4a5c6b0b0099ef7c965fdd5e61e4904cf813809bb792e35f/duckdb-1.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:35579b8e3a064b5eaf15b0eafc558056a13f79a0a62e34cc4baf57119daecfec", size = 15975120, upload-time = "2026-04-13T11:29:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/09/70/ce750854d37bb5a45cccbb2c3cb04df4af56aea8fc30a2499bb643b4a9c0/duckdb-1.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ea58ff5b0880593a280cf5511734b17711b32ee1f58b47d726e8600848358160", size = 14227762, upload-time = "2026-04-13T11:29:55.564Z" }, + { url = "https://files.pythonhosted.org/packages/28/dc/ad45ac3c0b6c4687dc649e8f6cf01af1c8b0443932a39b2abb4ebcb3babd/duckdb-1.5.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef461bca07313412dc09961c4a4757a851f56b95ac01c58fac6007632b7b94f2", size = 19315668, upload-time = "2026-04-13T11:29:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b1/1464f468d2e5813f5808de95df9d3113a645a5bfa2ffcaecbc542ddae272/duckdb-1.5.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be37680ddb380015cb37318e378c53511c45c4f0d8fac5599d22b7d092b9217a", size = 21434056, upload-time = "2026-04-13T11:30:01.238Z" }, + { url = "https://files.pythonhosted.org/packages/ce/32/6673607e024722473fa7aafdd29c0e3dd231dd528f6cd8b5797fbeeb229d/duckdb-1.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:0b291786014df1133f8f18b9df4d004484613146e858d71a21791e0fcca16cf4", size = 13633667, upload-time = "2026-04-13T11:30:04.05Z" }, + { url = "https://files.pythonhosted.org/packages/7a/e3/9d34173ec068631faea3ea6e73050700729363e7e33306a9a3218e5cdc61/duckdb-1.5.2-cp314-cp314-win_arm64.whl", hash = "sha256:c9f3e0b71b8a50fccfb42794899285d9d318ce2503782b9dd54868e5ecd0ad31", size = 14402513, upload-time = "2026-04-13T11:30:06.609Z" }, +] + +[[package]] +name = "fastapi" +version = "0.136.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/45/c130091c2dfa061bbfe3150f2a5091ef1adf149f2a8d2ae769ecaf6e99a2/fastapi-0.136.1.tar.gz", hash = "sha256:7af665ad7acfa0a3baf8983d393b6b471b9da10ede59c60045f49fbc89a0fa7f", size = 397448, upload-time = "2026-04-23T16:49:44.046Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/ff/2e4eca3ade2c22fe1dea7043b8ee9dabe47753349eb1b56a202de8af6349/fastapi-0.136.1-py3-none-any.whl", hash = "sha256:a6e9d7eeada96c93a4d69cb03836b44fa34e2854accb7244a1ece36cd4781c3f", size = 117683, upload-time = "2026-04-23T16:49:42.437Z" }, +] + +[[package]] +name = "filelock" +version = "3.29.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz", hash = "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size = 57571, upload-time = "2026-04-19T15:39:10.068Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/cf/b50ddf667c15276a9ab15a70ef5f257564de271957933ffea49d2cdbcdfb/fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41", size = 313547, upload-time = "2026-03-27T19:11:14.892Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "hf-xet" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httptools" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" }, + { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" }, + { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" }, + { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" }, + { url = "https://files.pythonhosted.org/packages/34/50/9d095fcbb6de2d523e027a2f304d4551855c2f46e0b82befd718b8b20056/httptools-0.7.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c08fe65728b8d70b6923ce31e3956f859d5e1e8548e6f22ec520a962c6757270", size = 203619, upload-time = "2025-10-10T03:54:54.321Z" }, + { url = "https://files.pythonhosted.org/packages/07/f0/89720dc5139ae54b03f861b5e2c55a37dba9a5da7d51e1e824a1f343627f/httptools-0.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7aea2e3c3953521c3c51106ee11487a910d45586e351202474d45472db7d72d3", size = 108714, upload-time = "2025-10-10T03:54:55.163Z" }, + { url = "https://files.pythonhosted.org/packages/b3/cb/eea88506f191fb552c11787c23f9a405f4c7b0c5799bf73f2249cd4f5228/httptools-0.7.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0e68b8582f4ea9166be62926077a3334064d422cf08ab87d8b74664f8e9058e1", size = 472909, upload-time = "2025-10-10T03:54:56.056Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4a/a548bdfae6369c0d078bab5769f7b66f17f1bfaa6fa28f81d6be6959066b/httptools-0.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df091cf961a3be783d6aebae963cc9b71e00d57fa6f149025075217bc6a55a7b", size = 470831, upload-time = "2025-10-10T03:54:57.219Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/14df99e1c43bd132eec921c2e7e11cda7852f65619bc0fc5bdc2d0cb126c/httptools-0.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f084813239e1eb403ddacd06a30de3d3e09a9b76e7894dcda2b22f8a726e9c60", size = 452631, upload-time = "2025-10-10T03:54:58.219Z" }, + { url = "https://files.pythonhosted.org/packages/22/d2/b7e131f7be8d854d48cb6d048113c30f9a46dca0c9a8b08fcb3fcd588cdc/httptools-0.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7347714368fb2b335e9063bc2b96f2f87a9ceffcd9758ac295f8bbcd3ffbc0ca", size = 452910, upload-time = "2025-10-10T03:54:59.366Z" }, + { url = "https://files.pythonhosted.org/packages/53/cf/878f3b91e4e6e011eff6d1fa9ca39f7eb17d19c9d7971b04873734112f30/httptools-0.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:cfabda2a5bb85aa2a904ce06d974a3f30fb36cc63d7feaddec05d2050acede96", size = 88205, upload-time = "2025-10-10T03:55:00.389Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "huggingface-hub" +version = "1.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/52/1b54cb569509c725a32c1315261ac9fd0e6b91bbbf74d86fca10d3376164/huggingface_hub-1.12.0.tar.gz", hash = "sha256:7c3fe85e24b652334e5d456d7a812cd9a071e75630fac4365d9165ab5e4a34b6", size = 763091, upload-time = "2026-04-24T13:32:08.674Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/2b/ef03ddb96bd1123503c2bd6932001020292deea649e9bf4caa2cb65a85bf/huggingface_hub-1.12.0-py3-none-any.whl", hash = "sha256:d74939969585ee35748bd66de09baf84099d461bda7287cd9043bfb99b0e424d", size = 646806, upload-time = "2026-04-24T13:32:06.717Z" }, +] + +[[package]] +name = "identify" +version = "2.6.19" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/63/51723b5f116cc04b061cb6f5a561790abf249d25931d515cd375e063e0f4/identify-2.6.19.tar.gz", hash = "sha256:6be5020c38fcb07da56c53733538a3081ea5aa70d36a156f83044bfbf9173842", size = 99567, upload-time = "2026-04-17T18:39:50.265Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/84/d9273cd09688070a6523c4aee4663a8538721b2b755c4962aafae0011e72/identify-2.6.19-py2.py3-none-any.whl", hash = "sha256:20e6a87f786f768c092a721ad107fc9df0eb89347be9396cadf3f4abbd1fb78a", size = 99397, upload-time = "2026-04-17T18:39:49.221Z" }, +] + +[[package]] +name = "idna" +version = "3.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/cc/762dfb036166873f0059f3b7de4565e1b5bc3d6f28a414c13da27e442f99/idna-3.13.tar.gz", hash = "sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242", size = 194210, upload-time = "2026-04-22T16:42:42.314Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/13/ad7d7ca3808a898b4612b6fe93cde56b53f3034dcde235acb1f0e1df24c6/idna-3.13-py3-none-any.whl", hash = "sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3", size = 68629, upload-time = "2026-04-22T16:42:40.909Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "joblib" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, +] + +[[package]] +name = "klo-daemon" +version = "0.1.0" +source = { editable = "python/klo-daemon" } +dependencies = [ + { name = "fastapi" }, + { name = "klo-sl" }, + { name = "lkml" }, + { name = "numpy" }, + { name = "orjson" }, + { name = "pandas" }, + { name = "psycopg", extra = ["binary"] }, + { name = "pydantic" }, + { name = "requests" }, + { name = "sentence-transformers" }, + { name = "sqlglot" }, + { name = "torch", version = "2.11.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.11.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, + { name = "uvicorn", extra = ["standard"] }, +] + +[package.dev-dependencies] +dev = [ + { name = "httpx" }, + { name = "pytest" }, +] + +[package.metadata] +requires-dist = [ + { name = "fastapi", specifier = ">=0.115.0" }, + { name = "klo-sl", editable = "python/klo-sl" }, + { name = "lkml", specifier = ">=1.3.7" }, + { name = "numpy", specifier = ">=2.2.6" }, + { name = "orjson", specifier = ">=3.11.4" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "psycopg", extras = ["binary"], specifier = ">=3.2.0" }, + { name = "pydantic", specifier = ">=2.9.0" }, + { name = "requests", specifier = ">=2.32.0" }, + { name = "sentence-transformers", specifier = ">=5.1.1" }, + { name = "sqlglot", specifier = ">=26" }, + { name = "torch", specifier = ">=2.2.0", index = "https://download.pytorch.org/whl/cpu" }, + { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "httpx", specifier = ">=0.28.1" }, + { name = "pytest", specifier = ">=9.0.2" }, +] + +[[package]] +name = "klo-sl" +version = "0.1.0" +source = { editable = "python/klo-sl" } +dependencies = [ + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "sqlglot" }, +] + +[package.optional-dependencies] +dev = [ + { name = "pre-commit" }, + { name = "pytest" }, + { name = "pytest-cov" }, + { name = "ruff" }, +] +tpch = [ + { name = "duckdb" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-cov" }, +] + +[package.metadata] +requires-dist = [ + { name = "duckdb", marker = "extra == 'tpch'", specifier = ">=1.0" }, + { name = "pre-commit", marker = "extra == 'dev'" }, + { name = "pydantic", specifier = ">=2" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8" }, + { name = "pytest-cov", marker = "extra == 'dev'" }, + { name = "pyyaml", specifier = ">=6" }, + { name = "ruff", marker = "extra == 'dev'" }, + { name = "sqlglot", specifier = ">=26" }, +] +provides-extras = ["dev", "tpch"] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=9.0.2" }, + { name = "pytest-cov", specifier = ">=7.1.0" }, +] + +[[package]] +name = "klo-workspace" +version = "0.0.0" +source = { virtual = "." } + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "ruff" }, +] + +[package.metadata] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=9.0.2" }, + { name = "ruff", specifier = ">=0.8.4" }, +] + +[[package]] +name = "lkml" +version = "1.3.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bf/18/18a3d0281c5e209156b877796096d4ac7259f03465409673056386c99221/lkml-1.3.7.tar.gz", hash = "sha256:51dc9f1b7e74cd7a00e0dbbf06fb573952015328f1f4a3a0730d444444a8ae7a", size = 28763, upload-time = "2025-01-31T02:30:35.472Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/15/e7124d4ec54fdcafa801b55d6b67d6196ed6c8a0de554e1a8b67b66fec65/lkml-1.3.7-py2.py3-none-any.whl", hash = "sha256:ce54c517f81fbd21d452038be9e2504fa02951a5bc30f7d7f1eb552c1f3f2b39", size = 23062, upload-time = "2025-01-31T02:30:34.377Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, +] + +[[package]] +name = "networkx" +version = "3.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, +] + +[[package]] +name = "nodeenv" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, +] + +[[package]] +name = "orjson" +version = "3.11.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/1b/2024d06792d0779f9dbc51531b61c24f76c75b9f4ce05e6f3377a1814cea/orjson-3.11.8.tar.gz", hash = "sha256:96163d9cdc5a202703e9ad1b9ae757d5f0ca62f4fa0cc93d1f27b0e180cc404e", size = 5603832, upload-time = "2026-03-31T16:16:27.878Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/7f/95fba509bb2305fab0073558f1e8c3a2ec4b2afe58ed9fcb7d3b8beafe94/orjson-3.11.8-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3f23426851d98478c8970da5991f84784a76682213cd50eb73a1da56b95239dc", size = 229180, upload-time = "2026-03-31T16:15:36.426Z" }, + { url = "https://files.pythonhosted.org/packages/f6/9d/b237215c743ca073697d759b5503abd2cb8a0d7b9c9e21f524bcf176ab66/orjson-3.11.8-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:ebaed4cef74a045b83e23537b52ef19a367c7e3f536751e355a2a394f8648559", size = 128754, upload-time = "2026-03-31T16:15:38.049Z" }, + { url = "https://files.pythonhosted.org/packages/42/3d/27d65b6d11e63f133781425f132807aef793ed25075fec686fc8e46dd528/orjson-3.11.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97c8f5d3b62380b70c36ffacb2a356b7c6becec86099b177f73851ba095ef623", size = 131877, upload-time = "2026-03-31T16:15:39.484Z" }, + { url = "https://files.pythonhosted.org/packages/dd/cc/faee30cd8f00421999e40ef0eba7332e3a625ce91a58200a2f52c7fef235/orjson-3.11.8-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:436c4922968a619fb7fef1ccd4b8b3a76c13b67d607073914d675026e911a65c", size = 130361, upload-time = "2026-03-31T16:15:41.274Z" }, + { url = "https://files.pythonhosted.org/packages/5c/bb/a6c55896197f97b6d4b4e7c7fd77e7235517c34f5d6ad5aadd43c54c6d7c/orjson-3.11.8-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ab359aff0436d80bfe8a23b46b5fea69f1e18aaf1760a709b4787f1318b317f", size = 135521, upload-time = "2026-03-31T16:15:42.758Z" }, + { url = "https://files.pythonhosted.org/packages/9c/7c/ca3a3525aa32ff636ebb1778e77e3587b016ab2edb1b618b36ba96f8f2c0/orjson-3.11.8-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f89b6d0b3a8d81e1929d3ab3d92bbc225688bd80a770c49432543928fe09ac55", size = 146862, upload-time = "2026-03-31T16:15:44.341Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0c/18a9d7f18b5edd37344d1fd5be17e94dc652c67826ab749c6e5948a78112/orjson-3.11.8-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29c009e7a2ca9ad0ed1376ce20dd692146a5d9fe4310848904b6b4fee5c5c137", size = 132847, upload-time = "2026-03-31T16:15:46.368Z" }, + { url = "https://files.pythonhosted.org/packages/23/91/7e722f352ad67ca573cee44de2a58fb810d0f4eb4e33276c6a557979fd8a/orjson-3.11.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:705b895b781b3e395c067129d8551655642dfe9437273211d5404e87ac752b53", size = 133637, upload-time = "2026-03-31T16:15:48.123Z" }, + { url = "https://files.pythonhosted.org/packages/af/04/32845ce13ac5bd1046ddb02ac9432ba856cc35f6d74dde95864fe0ad5523/orjson-3.11.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:88006eda83858a9fdf73985ce3804e885c2befb2f506c9a3723cdeb5a2880e3e", size = 141906, upload-time = "2026-03-31T16:15:49.626Z" }, + { url = "https://files.pythonhosted.org/packages/02/5e/c551387ddf2d7106d9039369862245c85738b828844d13b99ccb8d61fd06/orjson-3.11.8-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:55120759e61309af7fcf9e961c6f6af3dde5921cdb3ee863ef63fd9db126cae6", size = 423722, upload-time = "2026-03-31T16:15:51.176Z" }, + { url = "https://files.pythonhosted.org/packages/00/a3/ecfe62434096f8a794d4976728cb59bcfc4a643977f21c2040545d37eb4c/orjson-3.11.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:98bdc6cb889d19bed01de46e67574a2eab61f5cc6b768ed50e8ac68e9d6ffab6", size = 147801, upload-time = "2026-03-31T16:15:52.939Z" }, + { url = "https://files.pythonhosted.org/packages/18/6d/0dce10b9f6643fdc59d99333871a38fa5a769d8e2fc34a18e5d2bfdee900/orjson-3.11.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:708c95f925a43ab9f34625e45dcdadf09ec8a6e7b664a938f2f8d5650f6c090b", size = 136460, upload-time = "2026-03-31T16:15:54.431Z" }, + { url = "https://files.pythonhosted.org/packages/01/d6/6dde4f31842d87099238f1f07b459d24edc1a774d20687187443ab044191/orjson-3.11.8-cp313-cp313-win32.whl", hash = "sha256:01c4e5a6695dc09098f2e6468a251bc4671c50922d4d745aff1a0a33a0cf5b8d", size = 131956, upload-time = "2026-03-31T16:15:56.081Z" }, + { url = "https://files.pythonhosted.org/packages/c1/f9/4e494a56e013db957fb77186b818b916d4695b8fa2aa612364974160e91b/orjson-3.11.8-cp313-cp313-win_amd64.whl", hash = "sha256:c154a35dd1330707450bb4d4e7dd1f17fa6f42267a40c1e8a1daa5e13719b4b8", size = 127410, upload-time = "2026-03-31T16:15:57.54Z" }, + { url = "https://files.pythonhosted.org/packages/57/7f/803203d00d6edb6e9e7eef421d4e1adbb5ea973e40b3533f3cfd9aeb374e/orjson-3.11.8-cp313-cp313-win_arm64.whl", hash = "sha256:4861bde57f4d253ab041e374f44023460e60e71efaa121f3c5f0ed457c3a701e", size = 127338, upload-time = "2026-03-31T16:15:59.106Z" }, + { url = "https://files.pythonhosted.org/packages/6d/35/b01910c3d6b85dc882442afe5060cbf719c7d1fc85749294beda23d17873/orjson-3.11.8-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ec795530a73c269a55130498842aaa762e4a939f6ce481a7e986eeaa790e9da4", size = 229171, upload-time = "2026-03-31T16:16:00.651Z" }, + { url = "https://files.pythonhosted.org/packages/c2/56/c9ec97bd11240abef39b9e5d99a15462809c45f677420fd148a6c5e6295e/orjson-3.11.8-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:c492a0e011c0f9066e9ceaa896fbc5b068c54d365fea5f3444b697ee01bc8625", size = 128746, upload-time = "2026-03-31T16:16:02.673Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e4/66d4f30a90de45e2f0cbd9623588e8ae71eef7679dbe2ae954ed6d66a41f/orjson-3.11.8-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:883206d55b1bd5f5679ad5e6ddd3d1a5e3cac5190482927fdb8c78fb699193b5", size = 131867, upload-time = "2026-03-31T16:16:04.342Z" }, + { url = "https://files.pythonhosted.org/packages/19/30/2a645fc9286b928675e43fa2a3a16fb7b6764aa78cc719dc82141e00f30b/orjson-3.11.8-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5774c1fdcc98b2259800b683b19599c133baeb11d60033e2095fd9d4667b82db", size = 124664, upload-time = "2026-03-31T16:16:05.837Z" }, + { url = "https://files.pythonhosted.org/packages/db/44/77b9a86d84a28d52ba3316d77737f6514e17118119ade3f91b639e859029/orjson-3.11.8-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ac7381c83dd3d4a6347e6635950aa448f54e7b8406a27c7ecb4a37e9f1ae08b", size = 129701, upload-time = "2026-03-31T16:16:07.407Z" }, + { url = "https://files.pythonhosted.org/packages/b3/ea/eff3d9bfe47e9bc6969c9181c58d9f71237f923f9c86a2d2f490cd898c82/orjson-3.11.8-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:14439063aebcb92401c11afc68ee4e407258d2752e62d748b6942dad20d2a70d", size = 141202, upload-time = "2026-03-31T16:16:09.48Z" }, + { url = "https://files.pythonhosted.org/packages/52/c8/90d4b4c60c84d62068d0cf9e4d8f0a4e05e76971d133ac0c60d818d4db20/orjson-3.11.8-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa72e71977bff96567b0f500fc5bfd2fdf915f34052c782a4c6ebbdaa97aa858", size = 127194, upload-time = "2026-03-31T16:16:11.02Z" }, + { url = "https://files.pythonhosted.org/packages/8d/c7/ea9e08d1f0ba981adffb629811148b44774d935171e7b3d780ae43c4c254/orjson-3.11.8-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7679bc2f01bb0d219758f1a5f87bb7c8a81c0a186824a393b366876b4948e14f", size = 133639, upload-time = "2026-03-31T16:16:13.434Z" }, + { url = "https://files.pythonhosted.org/packages/6c/8c/ddbbfd6ba59453c8fc7fe1d0e5983895864e264c37481b2a791db635f046/orjson-3.11.8-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:14f7b8fcb35ef403b42fa5ecfa4ed032332a91f3dc7368fbce4184d59e1eae0d", size = 141914, upload-time = "2026-03-31T16:16:14.955Z" }, + { url = "https://files.pythonhosted.org/packages/4e/31/dbfbefec9df060d34ef4962cd0afcb6fa7a9ec65884cb78f04a7859526c3/orjson-3.11.8-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:c2bdf7b2facc80b5e34f48a2d557727d5c5c57a8a450de122ae81fa26a81c1bc", size = 423800, upload-time = "2026-03-31T16:16:16.594Z" }, + { url = "https://files.pythonhosted.org/packages/87/cf/f74e9ae9803d4ab46b163494adba636c6d7ea955af5cc23b8aaa94cfd528/orjson-3.11.8-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ccd7ba1b0605813a0715171d39ec4c314cb97a9c85893c2c5c0c3a3729df38bf", size = 147837, upload-time = "2026-03-31T16:16:18.585Z" }, + { url = "https://files.pythonhosted.org/packages/64/e6/9214f017b5db85e84e68602792f742e5dc5249e963503d1b356bee611e01/orjson-3.11.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbc8c9c02463fef4d3c53a9ba3336d05496ec8e1f1c53326a1e4acc11f5c600", size = 136441, upload-time = "2026-03-31T16:16:20.151Z" }, + { url = "https://files.pythonhosted.org/packages/24/dd/3590348818f58f837a75fb969b04cdf187ae197e14d60b5e5a794a38b79d/orjson-3.11.8-cp314-cp314-win32.whl", hash = "sha256:0b57f67710a8cd459e4e54eb96d5f77f3624eba0c661ba19a525807e42eccade", size = 131983, upload-time = "2026-03-31T16:16:21.823Z" }, + { url = "https://files.pythonhosted.org/packages/3f/0f/b6cb692116e05d058f31ceee819c70f097fa9167c82f67fabe7516289abc/orjson-3.11.8-cp314-cp314-win_amd64.whl", hash = "sha256:735e2262363dcbe05c35e3a8869898022af78f89dde9e256924dc02e99fe69ca", size = 127396, upload-time = "2026-03-31T16:16:23.685Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d1/facb5b5051fabb0ef9d26c6544d87ef19a939a9a001198655d0d891062dd/orjson-3.11.8-cp314-cp314-win_arm64.whl", hash = "sha256:6ccdea2c213cf9f3d9490cbd5d427693c870753df41e6cb375bd79bcbafc8817", size = 127330, upload-time = "2026-03-31T16:16:25.496Z" }, +] + +[[package]] +name = "packaging" +version = "26.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, +] + +[[package]] +name = "pandas" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/99/b342345300f13440fe9fe385c3c481e2d9a595ee3bab4d3219247ac94e9a/pandas-3.0.2.tar.gz", hash = "sha256:f4753e73e34c8d83221ba58f232433fca2748be8b18dbca02d242ed153945043", size = 4645855, upload-time = "2026-03-31T06:48:30.816Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/ca/3e639a1ea6fcd0617ca4e8ca45f62a74de33a56ae6cd552735470b22c8d3/pandas-3.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5918ba197c951dec132b0c5929a00c0bf05d5942f590d3c10a807f6e15a57d3", size = 10321105, upload-time = "2026-03-31T06:46:57.327Z" }, + { url = "https://files.pythonhosted.org/packages/0b/77/dbc82ff2fb0e63c6564356682bf201edff0ba16c98630d21a1fb312a8182/pandas-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d606a041c89c0a474a4702d532ab7e73a14fe35c8d427b972a625c8e46373668", size = 9864088, upload-time = "2026-03-31T06:46:59.935Z" }, + { url = "https://files.pythonhosted.org/packages/5c/2b/341f1b04bbca2e17e13cd3f08c215b70ef2c60c5356ef1e8c6857449edc7/pandas-3.0.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:710246ba0616e86891b58ab95f2495143bb2bc83ab6b06747c74216f583a6ac9", size = 10369066, upload-time = "2026-03-31T06:47:02.792Z" }, + { url = "https://files.pythonhosted.org/packages/12/c5/cbb1ffefb20a93d3f0e1fdcda699fb84976210d411b008f97f48bf6ce27e/pandas-3.0.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5d3cfe227c725b1f3dff4278b43d8c784656a42a9325b63af6b1492a8232209e", size = 10876780, upload-time = "2026-03-31T06:47:06.205Z" }, + { url = "https://files.pythonhosted.org/packages/98/fe/2249ae5e0a69bd0ddf17353d0a5d26611d70970111f5b3600cdc8be883e7/pandas-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c3b723df9087a9a9a840e263ebd9f88b64a12075d1bf2ea401a5a42f254f084d", size = 11375181, upload-time = "2026-03-31T06:47:09.383Z" }, + { url = "https://files.pythonhosted.org/packages/de/64/77a38b09e70b6464883b8d7584ab543e748e42c1b5d337a2ee088e0df741/pandas-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a3096110bf9eac0070b7208465f2740e2d8a670d5cb6530b5bb884eca495fd39", size = 11928899, upload-time = "2026-03-31T06:47:12.686Z" }, + { url = "https://files.pythonhosted.org/packages/5e/52/42855bf626868413f761addd574acc6195880ae247a5346477a4361c3acb/pandas-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:07a10f5c36512eead51bc578eb3354ad17578b22c013d89a796ab5eee90cd991", size = 9746574, upload-time = "2026-03-31T06:47:15.64Z" }, + { url = "https://files.pythonhosted.org/packages/88/39/21304ae06a25e8bf9fc820d69b29b2c495b2ae580d1e143146c309941760/pandas-3.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:5fdbfa05931071aba28b408e59226186b01eb5e92bea2ab78b65863ca3228d84", size = 9047156, upload-time = "2026-03-31T06:47:18.595Z" }, + { url = "https://files.pythonhosted.org/packages/72/20/7defa8b27d4f330a903bb68eea33be07d839c5ea6bdda54174efcec0e1d2/pandas-3.0.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:dbc20dea3b9e27d0e66d74c42b2d0c1bed9c2ffe92adea33633e3bedeb5ac235", size = 10756238, upload-time = "2026-03-31T06:47:22.012Z" }, + { url = "https://files.pythonhosted.org/packages/e9/95/49433c14862c636afc0e9b2db83ff16b3ad92959364e52b2955e44c8e94c/pandas-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b75c347eff42497452116ce05ef461822d97ce5b9ff8df6edacb8076092c855d", size = 10408520, upload-time = "2026-03-31T06:47:25.197Z" }, + { url = "https://files.pythonhosted.org/packages/3b/f8/462ad2b5881d6b8ec8e5f7ed2ea1893faa02290d13870a1600fe72ad8efc/pandas-3.0.2-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1478075142e83a5571782ad007fb201ed074bdeac7ebcc8890c71442e96adf7", size = 10324154, upload-time = "2026-03-31T06:47:28.097Z" }, + { url = "https://files.pythonhosted.org/packages/0a/65/d1e69b649cbcddda23ad6e4c40ef935340f6f652a006e5cbc3555ac8adb3/pandas-3.0.2-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5880314e69e763d4c8b27937090de570f1fb8d027059a7ada3f7f8e98bdcb677", size = 10714449, upload-time = "2026-03-31T06:47:30.85Z" }, + { url = "https://files.pythonhosted.org/packages/47/a4/85b59bc65b8190ea3689882db6cdf32a5003c0ccd5a586c30fdcc3ffc4fc/pandas-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b5329e26898896f06035241a626d7c335daa479b9bbc82be7c2742d048e41172", size = 11338475, upload-time = "2026-03-31T06:47:34.026Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c4/bc6966c6e38e5d9478b935272d124d80a589511ed1612a5d21d36f664c68/pandas-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:81526c4afd31971f8b62671442a4b2b51e0aa9acc3819c9f0f12a28b6fcf85f1", size = 11786568, upload-time = "2026-03-31T06:47:36.941Z" }, + { url = "https://files.pythonhosted.org/packages/e8/74/09298ca9740beed1d3504e073d67e128aa07e5ca5ca2824b0c674c0b8676/pandas-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:7cadd7e9a44ec13b621aec60f9150e744cfc7a3dd32924a7e2f45edff31823b0", size = 10488652, upload-time = "2026-03-31T06:47:40.612Z" }, + { url = "https://files.pythonhosted.org/packages/bb/40/c6ea527147c73b24fc15c891c3fcffe9c019793119c5742b8784a062c7db/pandas-3.0.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:db0dbfd2a6cdf3770aa60464d50333d8f3d9165b2f2671bcc299b72de5a6677b", size = 10326084, upload-time = "2026-03-31T06:47:43.834Z" }, + { url = "https://files.pythonhosted.org/packages/95/25/bdb9326c3b5455f8d4d3549fce7abcf967259de146fe2cf7a82368141948/pandas-3.0.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0555c5882688a39317179ab4a0ed41d3ebc8812ab14c69364bbee8fb7a3f6288", size = 9914146, upload-time = "2026-03-31T06:47:46.67Z" }, + { url = "https://files.pythonhosted.org/packages/8d/77/3a227ff3337aa376c60d288e1d61c5d097131d0ac71f954d90a8f369e422/pandas-3.0.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:01f31a546acd5574ef77fe199bc90b55527c225c20ccda6601cf6b0fd5ed597c", size = 10444081, upload-time = "2026-03-31T06:47:49.681Z" }, + { url = "https://files.pythonhosted.org/packages/15/88/3cdd54fa279341afa10acf8d2b503556b1375245dccc9315659f795dd2e9/pandas-3.0.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:deeca1b5a931fdf0c2212c8a659ade6d3b1edc21f0914ce71ef24456ca7a6535", size = 10897535, upload-time = "2026-03-31T06:47:53.033Z" }, + { url = "https://files.pythonhosted.org/packages/06/9d/98cc7a7624f7932e40f434299260e2917b090a579d75937cb8a57b9d2de3/pandas-3.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0f48afd9bb13300ffb5a3316973324c787054ba6665cda0da3fbd67f451995db", size = 11446992, upload-time = "2026-03-31T06:47:56.193Z" }, + { url = "https://files.pythonhosted.org/packages/9a/cd/19ff605cc3760e80602e6826ddef2824d8e7050ed80f2e11c4b079741dc3/pandas-3.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6c4d8458b97a35717b62469a4ea0e85abd5ed8687277f5ccfc67f8a5126f8c53", size = 11968257, upload-time = "2026-03-31T06:47:59.137Z" }, + { url = "https://files.pythonhosted.org/packages/db/60/aba6a38de456e7341285102bede27514795c1eaa353bc0e7638b6b785356/pandas-3.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:b35d14bb5d8285d9494fe93815a9e9307c0876e10f1e8e89ac5b88f728ec8dcf", size = 9865893, upload-time = "2026-03-31T06:48:02.038Z" }, + { url = "https://files.pythonhosted.org/packages/08/71/e5ec979dd2e8a093dacb8864598c0ff59a0cee0bbcdc0bfec16a51684d4f/pandas-3.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:63d141b56ef686f7f0d714cfb8de4e320475b86bf4b620aa0b7da89af8cbdbbb", size = 9188644, upload-time = "2026-03-31T06:48:05.045Z" }, + { url = "https://files.pythonhosted.org/packages/f1/6c/7b45d85db19cae1eb524f2418ceaa9d85965dcf7b764ed151386b7c540f0/pandas-3.0.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:140f0cffb1fa2524e874dde5b477d9defe10780d8e9e220d259b2c0874c89d9d", size = 10776246, upload-time = "2026-03-31T06:48:07.789Z" }, + { url = "https://files.pythonhosted.org/packages/a8/3e/7b00648b086c106e81766f25322b48aa8dfa95b55e621dbdf2fdd413a117/pandas-3.0.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ae37e833ff4fed0ba352f6bdd8b73ba3ab3256a85e54edfd1ab51ae40cca0af8", size = 10424801, upload-time = "2026-03-31T06:48:10.897Z" }, + { url = "https://files.pythonhosted.org/packages/da/6e/558dd09a71b53b4008e7fc8a98ec6d447e9bfb63cdaeea10e5eb9b2dabe8/pandas-3.0.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d888a5c678a419a5bb41a2a93818e8ed9fd3172246555c0b37b7cc27027effd", size = 10345643, upload-time = "2026-03-31T06:48:13.7Z" }, + { url = "https://files.pythonhosted.org/packages/be/e3/921c93b4d9a280409451dc8d07b062b503bbec0531d2627e73a756e99a82/pandas-3.0.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b444dc64c079e84df91baa8bf613d58405645461cabca929d9178f2cd392398d", size = 10743641, upload-time = "2026-03-31T06:48:16.659Z" }, + { url = "https://files.pythonhosted.org/packages/56/ca/fd17286f24fa3b4d067965d8d5d7e14fe557dd4f979a0b068ac0deaf8228/pandas-3.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4544c7a54920de8eeacaa1466a6b7268ecfbc9bc64ab4dbb89c6bbe94d5e0660", size = 11361993, upload-time = "2026-03-31T06:48:19.475Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a5/2f6ed612056819de445a433ca1f2821ac3dab7f150d569a59e9cc105de1d/pandas-3.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:734be7551687c00fbd760dc0522ed974f82ad230d4a10f54bf51b80d44a08702", size = 11815274, upload-time = "2026-03-31T06:48:22.695Z" }, + { url = "https://files.pythonhosted.org/packages/00/2f/b622683e99ec3ce00b0854bac9e80868592c5b051733f2cf3a868e5fea26/pandas-3.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:57a07209bebcbcf768d2d13c9b78b852f9a15978dac41b9e6421a81ad4cdd276", size = 10888530, upload-time = "2026-03-31T06:48:25.806Z" }, + { url = "https://files.pythonhosted.org/packages/cb/2b/f8434233fab2bd66a02ec014febe4e5adced20e2693e0e90a07d118ed30e/pandas-3.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:5371b72c2d4d415d08765f32d689217a43227484e81b2305b52076e328f6f482", size = 9455341, upload-time = "2026-03-31T06:48:28.418Z" }, +] + +[[package]] +name = "platformdirs" +version = "4.9.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400, upload-time = "2026-04-09T00:04:10.812Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pre-commit" +version = "4.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/22/2de9408ac81acbb8a7d05d4cc064a152ccf33b3d480ebe0cd292153db239/pre_commit-4.6.0.tar.gz", hash = "sha256:718d2208cef53fdc38206e40524a6d4d9576d103eb16f0fec11c875e7716e9d9", size = 198525, upload-time = "2026-04-21T20:31:41.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/80/6e/4b28b62ecb6aae56769c34a8ff1d661473ec1e9519e2d5f8b2c150086b26/pre_commit-4.6.0-py2.py3-none-any.whl", hash = "sha256:e2cf246f7299edcabcf15f9b0571fdce06058527f0a06535068a86d38089f29b", size = 226472, upload-time = "2026-04-21T20:31:40.092Z" }, +] + +[[package]] +name = "psycopg" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/0a/cac9fdf1df16a269ba0e5f0f06cac61f826c94cadb39df028cdfe19d3a33/psycopg_binary-3.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05f32239aec25c5fb15f7948cffdc2dc0dac098e48b80a140e4ba32b572a2e7d", size = 4590414, upload-time = "2026-02-18T16:50:01.441Z" }, + { url = "https://files.pythonhosted.org/packages/9c/c0/d8f8508fbf440edbc0099b1abff33003cd80c9e66eb3a1e78834e3fb4fb9/psycopg_binary-3.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c84f9d214f2d1de2fafebc17fa68ac3f6561a59e291553dfc45ad299f4898c1", size = 4669021, upload-time = "2026-02-18T16:50:08.803Z" }, + { url = "https://files.pythonhosted.org/packages/04/05/097016b77e343b4568feddf12c72171fc513acef9a4214d21b9478569068/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e77957d2ba17cada11be09a5066d93026cdb61ada7c8893101d7fe1c6e1f3925", size = 5467453, upload-time = "2026-02-18T16:50:14.985Z" }, + { url = "https://files.pythonhosted.org/packages/91/23/73244e5feb55b5ca109cede6e97f32ef45189f0fdac4c80d75c99862729d/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42961609ac07c232a427da7c87a468d3c82fee6762c220f38e37cfdacb2b178d", size = 5151135, upload-time = "2026-02-18T16:50:24.82Z" }, + { url = "https://files.pythonhosted.org/packages/11/49/5309473b9803b207682095201d8708bbc7842ddf3f192488a69204e36455/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae07a3114313dd91fce686cab2f4c44af094398519af0e0f854bc707e1aeedf1", size = 6737315, upload-time = "2026-02-18T16:50:35.106Z" }, + { url = "https://files.pythonhosted.org/packages/d4/5d/03abe74ef34d460b33c4d9662bf6ec1dd38888324323c1a1752133c10377/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d257c58d7b36a621dcce1d01476ad8b60f12d80eb1406aee4cf796f88b2ae482", size = 4979783, upload-time = "2026-02-18T16:50:42.067Z" }, + { url = "https://files.pythonhosted.org/packages/f0/6c/3fbf8e604e15f2f3752900434046c00c90bb8764305a1b81112bff30ba24/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:07c7211f9327d522c9c47560cae00a4ecf6687f4e02d779d035dd3177b41cb12", size = 4509023, upload-time = "2026-02-18T16:50:50.116Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6b/1a06b43b7c7af756c80b67eac8bfaa51d77e68635a8a8d246e4f0bb7604a/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8e7e9eca9b363dbedeceeadd8be97149d2499081f3c52d141d7cd1f395a91f83", size = 4185874, upload-time = "2026-02-18T16:50:55.97Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d3/bf49e3dcaadba510170c8d111e5e69e5ae3f981c1554c5bb71c75ce354bb/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:cb85b1d5702877c16f28d7b92ba030c1f49ebcc9b87d03d8c10bf45a2f1c7508", size = 3925668, upload-time = "2026-02-18T16:51:03.299Z" }, + { url = "https://files.pythonhosted.org/packages/f8/92/0aac830ed6a944fe334404e1687a074e4215630725753f0e3e9a9a595b62/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d4606c84d04b80f9138d72f1e28c6c02dc5ae0c7b8f3f8aaf89c681ce1cd1b1", size = 4234973, upload-time = "2026-02-18T16:51:09.097Z" }, + { url = "https://files.pythonhosted.org/packages/2e/96/102244653ee5a143ece5afe33f00f52fe64e389dfce8dbc87580c6d70d3d/psycopg_binary-3.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:74eae563166ebf74e8d950ff359be037b85723d99ca83f57d9b244a871d6c13b", size = 3551342, upload-time = "2026-02-18T16:51:13.892Z" }, + { url = "https://files.pythonhosted.org/packages/a2/71/7a57e5b12275fe7e7d84d54113f0226080423a869118419c9106c083a21c/psycopg_binary-3.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:497852c5eaf1f0c2d88ab74a64a8097c099deac0c71de1cbcf18659a8a04a4b2", size = 4607368, upload-time = "2026-02-18T16:51:19.295Z" }, + { url = "https://files.pythonhosted.org/packages/c7/04/cb834f120f2b2c10d4003515ef9ca9d688115b9431735e3936ae48549af8/psycopg_binary-3.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:258d1ea53464d29768bf25930f43291949f4c7becc706f6e220c515a63a24edd", size = 4687047, upload-time = "2026-02-18T16:51:23.84Z" }, + { url = "https://files.pythonhosted.org/packages/40/e9/47a69692d3da9704468041aa5ed3ad6fc7f6bb1a5ae788d261a26bbca6c7/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:111c59897a452196116db12e7f608da472fbff000693a21040e35fc978b23430", size = 5487096, upload-time = "2026-02-18T16:51:29.645Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b6/0e0dd6a2f802864a4ae3dbadf4ec620f05e3904c7842b326aafc43e5f464/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:17bb6600e2455993946385249a3c3d0af52cd70c1c1cdbf712e9d696d0b0bf1b", size = 5168720, upload-time = "2026-02-18T16:51:36.499Z" }, + { url = "https://files.pythonhosted.org/packages/6f/0d/977af38ac19a6b55d22dff508bd743fd7c1901e1b73657e7937c7cccb0a3/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642050398583d61c9856210568eb09a8e4f2fe8224bf3be21b67a370e677eead", size = 6762076, upload-time = "2026-02-18T16:51:43.167Z" }, + { url = "https://files.pythonhosted.org/packages/34/40/912a39d48322cf86895c0eaf2d5b95cb899402443faefd4b09abbba6b6e1/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:533efe6dc3a7cba5e2a84e38970786bb966306863e45f3db152007e9f48638a6", size = 4997623, upload-time = "2026-02-18T16:51:47.707Z" }, + { url = "https://files.pythonhosted.org/packages/98/0c/c14d0e259c65dc7be854d926993f151077887391d5a081118907a9d89603/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5958dbf28b77ce2033482f6cb9ef04d43f5d8f4b7636e6963d5626f000efb23e", size = 4532096, upload-time = "2026-02-18T16:51:51.421Z" }, + { url = "https://files.pythonhosted.org/packages/39/21/8b7c50a194cfca6ea0fd4d1f276158307785775426e90700ab2eba5cd623/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a6af77b6626ce92b5817bf294b4d45ec1a6161dba80fc2d82cdffdd6814fd023", size = 4208884, upload-time = "2026-02-18T16:51:57.336Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2c/a4981bf42cf30ebba0424971d7ce70a222ae9b82594c42fc3f2105d7b525/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:47f06fcbe8542b4d96d7392c476a74ada521c5aebdb41c3c0155f6595fc14c8d", size = 3944542, upload-time = "2026-02-18T16:52:04.266Z" }, + { url = "https://files.pythonhosted.org/packages/60/e9/b7c29b56aa0b85a4e0c4d89db691c1ceef08f46a356369144430c155a2f5/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7800e6c6b5dc4b0ca7cc7370f770f53ac83886b76afda0848065a674231e856", size = 4254339, upload-time = "2026-02-18T16:52:10.444Z" }, + { url = "https://files.pythonhosted.org/packages/98/5a/291d89f44d3820fffb7a04ebc8f3ef5dda4f542f44a5daea0c55a84abf45/psycopg_binary-3.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:165f22ab5a9513a3d7425ffb7fcc7955ed8ccaeef6d37e369d6cc1dff1582383", size = 3652796, upload-time = "2026-02-18T16:52:14.02Z" }, +] + +[[package]] +name = "pydantic" +version = "2.13.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d9/e4/40d09941a2cebcb20609b86a559817d5b9291c49dd6f8c87e5feffbe703a/pydantic-2.13.3.tar.gz", hash = "sha256:af09e9d1d09f4e7fe37145c1f577e1d61ceb9a41924bf0094a36506285d0a84d", size = 844068, upload-time = "2026-04-20T14:46:43.632Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/0a/fd7d723f8f8153418fb40cf9c940e82004fce7e987026b08a68a36dd3fe7/pydantic-2.13.3-py3-none-any.whl", hash = "sha256:6db14ac8dfc9a1e57f87ea2c0de670c251240f43cb0c30a5130e9720dc612927", size = 471981, upload-time = "2026-04-20T14:46:41.402Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.46.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2a/ef/f7abb56c49382a246fd2ce9c799691e3c3e7175ec74b14d99e798bcddb1a/pydantic_core-2.46.3.tar.gz", hash = "sha256:41c178f65b8c29807239d47e6050262eb6bf84eb695e41101e62e38df4a5bc2c", size = 471412, upload-time = "2026-04-20T14:40:56.672Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/3c/9b5e8eb9821936d065439c3b0fb1490ffa64163bfe7e1595985a47896073/pydantic_core-2.46.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:12bc98de041458b80c86c56b24df1d23832f3e166cbaff011f25d187f5c62c37", size = 2102109, upload-time = "2026-04-20T14:41:24.219Z" }, + { url = "https://files.pythonhosted.org/packages/91/97/1c41d1f5a19f241d8069f1e249853bcce378cdb76eec8ab636d7bc426280/pydantic_core-2.46.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:85348b8f89d2c3508b65b16c3c33a4da22b8215138d8b996912bb1532868885f", size = 1951820, upload-time = "2026-04-20T14:42:14.236Z" }, + { url = "https://files.pythonhosted.org/packages/30/b4/d03a7ae14571bc2b6b3c7b122441154720619afe9a336fa3a95434df5e2f/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1105677a6df914b1fb71a81b96c8cce7726857e1717d86001f29be06a25ee6f8", size = 1977785, upload-time = "2026-04-20T14:42:31.648Z" }, + { url = "https://files.pythonhosted.org/packages/ae/0c/4086f808834b59e3c8f1aa26df8f4b6d998cdcf354a143d18ef41529d1fe/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87082cd65669a33adeba5470769e9704c7cf026cc30afb9cc77fd865578ebaad", size = 2062761, upload-time = "2026-04-20T14:40:37.093Z" }, + { url = "https://files.pythonhosted.org/packages/fa/71/a649be5a5064c2df0db06e0a512c2281134ed2fcc981f52a657936a7527c/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e5f66e12c4f5212d08522963380eaaeac5ebd795826cfd19b2dfb0c7a52b9c", size = 2232989, upload-time = "2026-04-20T14:42:59.254Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/7756e75763e810b3a710f4724441d1ecc5883b94aacb07ca71c5fb5cfb69/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6cdf19bf84128d5e7c37e8a73a0c5c10d51103a650ac585d42dd6ae233f2b7f", size = 2303975, upload-time = "2026-04-20T14:41:32.287Z" }, + { url = "https://files.pythonhosted.org/packages/6c/35/68a762e0c1e31f35fa0dac733cbd9f5b118042853698de9509c8e5bf128b/pydantic_core-2.46.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:031bb17f4885a43773c8c763089499f242aee2ea85cf17154168775dccdecf35", size = 2095325, upload-time = "2026-04-20T14:42:47.685Z" }, + { url = "https://files.pythonhosted.org/packages/77/bf/1bf8c9a8e91836c926eae5e3e51dce009bf495a60ca56060689d3df3f340/pydantic_core-2.46.3-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:bcf2a8b2982a6673693eae7348ef3d8cf3979c1d63b54fca7c397a635cc68687", size = 2133368, upload-time = "2026-04-20T14:41:22.766Z" }, + { url = "https://files.pythonhosted.org/packages/e5/50/87d818d6bab915984995157ceb2380f5aac4e563dddbed6b56f0ed057aba/pydantic_core-2.46.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28e8cf2f52d72ced402a137145923a762cbb5081e48b34312f7a0c8f55928ec3", size = 2173908, upload-time = "2026-04-20T14:42:52.044Z" }, + { url = "https://files.pythonhosted.org/packages/91/88/a311fb306d0bd6185db41fa14ae888fb81d0baf648a761ae760d30819d33/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:17eaface65d9fc5abb940003020309c1bf7a211f5f608d7870297c367e6f9022", size = 2186422, upload-time = "2026-04-20T14:43:29.55Z" }, + { url = "https://files.pythonhosted.org/packages/8f/79/28fd0d81508525ab2054fef7c77a638c8b5b0afcbbaeee493cf7c3fef7e1/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:93fd339f23408a07e98950a89644f92c54d8729719a40b30c0a30bb9ebc55d23", size = 2332709, upload-time = "2026-04-20T14:42:16.134Z" }, + { url = "https://files.pythonhosted.org/packages/b3/21/795bf5fe5c0f379308b8ef19c50dedab2e7711dbc8d0c2acf08f1c7daa05/pydantic_core-2.46.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:23cbdb3aaa74dfe0837975dbf69b469753bbde8eacace524519ffdb6b6e89eb7", size = 2372428, upload-time = "2026-04-20T14:41:10.974Z" }, + { url = "https://files.pythonhosted.org/packages/45/b3/ed14c659cbe7605e3ef063077680a64680aec81eb1a04763a05190d49b7f/pydantic_core-2.46.3-cp313-cp313-win32.whl", hash = "sha256:610eda2e3838f401105e6326ca304f5da1e15393ae25dacae5c5c63f2c275b13", size = 1965601, upload-time = "2026-04-20T14:41:42.128Z" }, + { url = "https://files.pythonhosted.org/packages/ef/bb/adb70d9a762ddd002d723fbf1bd492244d37da41e3af7b74ad212609027e/pydantic_core-2.46.3-cp313-cp313-win_amd64.whl", hash = "sha256:68cc7866ed863db34351294187f9b729964c371ba33e31c26f478471c52e1ed0", size = 2071517, upload-time = "2026-04-20T14:43:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/52/eb/66faefabebfe68bd7788339c9c9127231e680b11906368c67ce112fdb47f/pydantic_core-2.46.3-cp313-cp313-win_arm64.whl", hash = "sha256:f64b5537ac62b231572879cd08ec05600308636a5d63bcbdb15063a466977bec", size = 2035802, upload-time = "2026-04-20T14:43:38.507Z" }, + { url = "https://files.pythonhosted.org/packages/7f/db/a7bcb4940183fda36022cd18ba8dd12f2dff40740ec7b58ce7457befa416/pydantic_core-2.46.3-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:afa3aa644f74e290cdede48a7b0bee37d1c35e71b05105f6b340d484af536d9b", size = 2097614, upload-time = "2026-04-20T14:44:38.374Z" }, + { url = "https://files.pythonhosted.org/packages/24/35/e4066358a22e3e99519db370494c7528f5a2aa1367370e80e27e20283543/pydantic_core-2.46.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ced3310e51aa425f7f77da8bbbb5212616655bedbe82c70944320bc1dbe5e018", size = 1951896, upload-time = "2026-04-20T14:40:53.996Z" }, + { url = "https://files.pythonhosted.org/packages/87/92/37cf4049d1636996e4b888c05a501f40a43ff218983a551d57f9d5e14f0d/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e29908922ce9da1a30b4da490bd1d3d82c01dcfdf864d2a74aacee674d0bfa34", size = 1979314, upload-time = "2026-04-20T14:41:49.446Z" }, + { url = "https://files.pythonhosted.org/packages/d8/36/9ff4d676dfbdfb2d591cf43f3d90ded01e15b1404fd101180ed2d62a2fd3/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c9ff69140423eea8ed2d5477df3ba037f671f5e897d206d921bc9fdc39613e7", size = 2056133, upload-time = "2026-04-20T14:42:23.574Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f0/405b442a4d7ba855b06eec8b2bf9c617d43b8432d099dfdc7bf999293495/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b675ab0a0d5b1c8fdb81195dc5bcefea3f3c240871cdd7ff9a2de8aa50772eb2", size = 2228726, upload-time = "2026-04-20T14:44:22.816Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f8/65cd92dd5a0bd89ba277a98ecbfaf6fc36bbd3300973c7a4b826d6ab1391/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0087084960f209a9a4af50ecd1fb063d9ad3658c07bb81a7a53f452dacbfb2ba", size = 2301214, upload-time = "2026-04-20T14:44:48.792Z" }, + { url = "https://files.pythonhosted.org/packages/fd/86/ef96a4c6e79e7a2d0410826a68fbc0eccc0fd44aa733be199d5fcac3bb87/pydantic_core-2.46.3-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed42e6cc8e1b0e2b9b96e2276bad70ae625d10d6d524aed0c93de974ae029f9f", size = 2099927, upload-time = "2026-04-20T14:41:40.196Z" }, + { url = "https://files.pythonhosted.org/packages/6d/53/269caf30e0096e0a8a8f929d1982a27b3879872cca2d917d17c2f9fdf4fe/pydantic_core-2.46.3-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:f1771ce258afb3e4201e67d154edbbae712a76a6081079fe247c2f53c6322c22", size = 2128789, upload-time = "2026-04-20T14:41:15.868Z" }, + { url = "https://files.pythonhosted.org/packages/00/b0/1a6d9b6a587e118482910c244a1c5acf4d192604174132efd12bf0ac486f/pydantic_core-2.46.3-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7610b6a5242a6c736d8ad47fd5fff87fcfe8f833b281b1c409c3d6835d9227f", size = 2173815, upload-time = "2026-04-20T14:44:25.152Z" }, + { url = "https://files.pythonhosted.org/packages/87/56/e7e00d4041a7e62b5a40815590114db3b535bf3ca0bf4dca9f16cef25246/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:ff5e7783bcc5476e1db448bf268f11cb257b1c276d3e89f00b5727be86dd0127", size = 2181608, upload-time = "2026-04-20T14:41:28.933Z" }, + { url = "https://files.pythonhosted.org/packages/e8/22/4bd23c3d41f7c185d60808a1de83c76cf5aeabf792f6c636a55c3b1ec7f9/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:9d2e32edcc143bc01e95300671915d9ca052d4f745aa0a49c48d4803f8a85f2c", size = 2326968, upload-time = "2026-04-20T14:42:03.962Z" }, + { url = "https://files.pythonhosted.org/packages/24/ac/66cd45129e3915e5ade3b292cb3bc7fd537f58f8f8dbdaba6170f7cabb74/pydantic_core-2.46.3-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d83d1c6b87fa56b521479cff237e626a292f3b31b6345c15a99121b454c1", size = 2369842, upload-time = "2026-04-20T14:41:35.52Z" }, + { url = "https://files.pythonhosted.org/packages/a2/51/dd4248abb84113615473aa20d5545b7c4cd73c8644003b5259686f93996c/pydantic_core-2.46.3-cp314-cp314-win32.whl", hash = "sha256:07bc6d2a28c3adb4f7c6ae46aa4f2d2929af127f587ed44057af50bf1ce0f505", size = 1959661, upload-time = "2026-04-20T14:41:00.042Z" }, + { url = "https://files.pythonhosted.org/packages/20/eb/59980e5f1ae54a3b86372bd9f0fa373ea2d402e8cdcd3459334430f91e91/pydantic_core-2.46.3-cp314-cp314-win_amd64.whl", hash = "sha256:8940562319bc621da30714617e6a7eaa6b98c84e8c685bcdc02d7ed5e7c7c44e", size = 2071686, upload-time = "2026-04-20T14:43:16.471Z" }, + { url = "https://files.pythonhosted.org/packages/8c/db/1cf77e5247047dfee34bc01fa9bca134854f528c8eb053e144298893d370/pydantic_core-2.46.3-cp314-cp314-win_arm64.whl", hash = "sha256:5dcbbcf4d22210ced8f837c96db941bdb078f419543472aca5d9a0bb7cddc7df", size = 2026907, upload-time = "2026-04-20T14:43:31.732Z" }, + { url = "https://files.pythonhosted.org/packages/57/c0/b3df9f6a543276eadba0a48487b082ca1f201745329d97dbfa287034a230/pydantic_core-2.46.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d0fe3dce1e836e418f912c1ad91c73357d03e556a4d286f441bf34fed2dbeecf", size = 2095047, upload-time = "2026-04-20T14:42:37.982Z" }, + { url = "https://files.pythonhosted.org/packages/66/57/886a938073b97556c168fd99e1a7305bb363cd30a6d2c76086bf0587b32a/pydantic_core-2.46.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9ce92e58abc722dac1bf835a6798a60b294e48eb0e625ec9fd994b932ac5feee", size = 1934329, upload-time = "2026-04-20T14:43:49.655Z" }, + { url = "https://files.pythonhosted.org/packages/0b/7c/b42eaa5c34b13b07ecb51da21761297a9b8eb43044c864a035999998f328/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a03e6467f0f5ab796a486146d1b887b2dc5e5f9b3288898c1b1c3ad974e53e4a", size = 1974847, upload-time = "2026-04-20T14:42:10.737Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9b/92b42db6543e7de4f99ae977101a2967b63122d4b6cf7773812da2d7d5b5/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2798b6ba041b9d70acfb9071a2ea13c8456dd1e6a5555798e41ba7b0790e329c", size = 2041742, upload-time = "2026-04-20T14:40:44.262Z" }, + { url = "https://files.pythonhosted.org/packages/0f/19/46fbe1efabb5aa2834b43b9454e70f9a83ad9c338c1291e48bdc4fecf167/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9be3e221bdc6d69abf294dcf7aff6af19c31a5cdcc8f0aa3b14be29df4bd03b1", size = 2236235, upload-time = "2026-04-20T14:41:27.307Z" }, + { url = "https://files.pythonhosted.org/packages/77/da/b3f95bc009ad60ec53120f5d16c6faa8cabdbe8a20d83849a1f2b8728148/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f13936129ce841f2a5ddf6f126fea3c43cd128807b5a59588c37cf10178c2e64", size = 2282633, upload-time = "2026-04-20T14:44:33.271Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6e/401336117722e28f32fb8220df676769d28ebdf08f2f4469646d404c43a3/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28b5f2ef03416facccb1c6ef744c69793175fd27e44ef15669201601cf423acb", size = 2109679, upload-time = "2026-04-20T14:44:41.065Z" }, + { url = "https://files.pythonhosted.org/packages/fc/53/b289f9bc8756a32fe718c46f55afaeaf8d489ee18d1a1e7be1db73f42cc4/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:830d1247d77ad23852314f069e9d7ddafeec5f684baf9d7e7065ed46a049c4e6", size = 2108342, upload-time = "2026-04-20T14:42:50.144Z" }, + { url = "https://files.pythonhosted.org/packages/10/5b/8292fc7c1f9111f1b2b7c1b0dcf1179edcd014fc3ea4517499f50b829d71/pydantic_core-2.46.3-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0793c90c1a3c74966e7975eaef3ed30ebdff3260a0f815a62a22adc17e4c01c", size = 2157208, upload-time = "2026-04-20T14:42:08.133Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9e/f80044e9ec07580f057a89fc131f78dda7a58751ddf52bbe05eaf31db50f/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:d2d0aead851b66f5245ec0c4fb2612ef457f8bbafefdf65a2bf9d6bac6140f47", size = 2167237, upload-time = "2026-04-20T14:42:25.412Z" }, + { url = "https://files.pythonhosted.org/packages/f8/84/6781a1b037f3b96be9227edbd1101f6d3946746056231bf4ac48cdff1a8d/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:2f40e4246676beb31c5ce77c38a55ca4e465c6b38d11ea1bd935420568e0b1ab", size = 2312540, upload-time = "2026-04-20T14:40:40.313Z" }, + { url = "https://files.pythonhosted.org/packages/3e/db/19c0839feeb728e7df03255581f198dfdf1c2aeb1e174a8420b63c5252e5/pydantic_core-2.46.3-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:cf489cf8986c543939aeee17a09c04d6ffb43bfef8ca16fcbcc5cfdcbed24dba", size = 2369556, upload-time = "2026-04-20T14:41:09.427Z" }, + { url = "https://files.pythonhosted.org/packages/e0/15/3228774cb7cd45f5f721ddf1b2242747f4eb834d0c491f0c02d606f09fed/pydantic_core-2.46.3-cp314-cp314t-win32.whl", hash = "sha256:ffe0883b56cfc05798bf994164d2b2ff03efe2d22022a2bb080f3b626176dd56", size = 1949756, upload-time = "2026-04-20T14:41:25.717Z" }, + { url = "https://files.pythonhosted.org/packages/b8/2a/c79cf53fd91e5a87e30d481809f52f9a60dd221e39de66455cf04deaad37/pydantic_core-2.46.3-cp314-cp314t-win_amd64.whl", hash = "sha256:706d9d0ce9cf4593d07270d8e9f53b161f90c57d315aeec4fb4fd7a8b10240d8", size = 2051305, upload-time = "2026-04-20T14:43:18.627Z" }, + { url = "https://files.pythonhosted.org/packages/0b/db/d8182a7f1d9343a032265aae186eb063fe26ca4c40f256b21e8da4498e89/pydantic_core-2.46.3-cp314-cp314t-win_arm64.whl", hash = "sha256:77706aeb41df6a76568434701e0917da10692da28cb69d5fb6919ce5fdb07374", size = 2026310, upload-time = "2026-04-20T14:41:01.778Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + +[[package]] +name = "pytest-cov" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/51/a849f96e117386044471c8ec2bd6cfebacda285da9525c9106aeb28da671/pytest_cov-7.1.0.tar.gz", hash = "sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2", size = 55592, upload-time = "2026-03-21T20:11:16.284Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "python-discovery" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/de/ef/3bae0e537cfe91e8431efcba4434463d2c5a65f5a89edd47c6cf2f03c55f/python_discovery-1.2.2.tar.gz", hash = "sha256:876e9c57139eb757cb5878cbdd9ae5379e5d96266c99ef731119e04fffe533bb", size = 58872, upload-time = "2026-04-07T17:28:49.249Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/db/795879cc3ddfe338599bddea6388cc5100b088db0a4caf6e6c1af1c27e04/python_discovery-1.2.2-py3-none-any.whl", hash = "sha256:e1ae95d9af875e78f15e19aed0c6137ab1bb49c200f21f5061786490c9585c7a", size = 31894, upload-time = "2026-04-07T17:28:48.09Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "regex" +version = "2026.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/3a246dbf05666918bd3664d9d787f84a9108f6f43cc953a077e4a7dfdb7e/regex-2026.4.4.tar.gz", hash = "sha256:e08270659717f6973523ce3afbafa53515c4dc5dcad637dc215b6fd50f689423", size = 416000, upload-time = "2026-04-03T20:56:28.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/83/c4373bc5f31f2cf4b66f9b7c31005bd87fe66f0dce17701f7db4ee79ee29/regex-2026.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:62f5519042c101762509b1d717b45a69c0139d60414b3c604b81328c01bd1943", size = 490273, upload-time = "2026-04-03T20:54:11.202Z" }, + { url = "https://files.pythonhosted.org/packages/46/f8/fe62afbcc3cf4ad4ac9adeaafd98aa747869ae12d3e8e2ac293d0593c435/regex-2026.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3790ba9fb5dd76715a7afe34dbe603ba03f8820764b1dc929dd08106214ed031", size = 291954, upload-time = "2026-04-03T20:54:13.412Z" }, + { url = "https://files.pythonhosted.org/packages/5a/92/4712b9fe6a33d232eeb1c189484b80c6c4b8422b90e766e1195d6e758207/regex-2026.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fae3c6e795d7678963f2170152b0d892cf6aee9ee8afc8c45e6be38d5107fe7", size = 289487, upload-time = "2026-04-03T20:54:15.824Z" }, + { url = "https://files.pythonhosted.org/packages/88/2c/f83b93f85e01168f1070f045a42d4c937b69fdb8dd7ae82d307253f7e36e/regex-2026.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:298c3ec2d53225b3bf91142eb9691025bab610e0c0c51592dde149db679b3d17", size = 796646, upload-time = "2026-04-03T20:54:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/df/55/61a2e17bf0c4dc57e11caf8dd11771280d8aaa361785f9e3bc40d653f4a7/regex-2026.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e9638791082eaf5b3ac112c587518ee78e083a11c4b28012d8fe2a0f536dfb17", size = 865904, upload-time = "2026-04-03T20:54:20.019Z" }, + { url = "https://files.pythonhosted.org/packages/45/32/1ac8ed1b5a346b5993a3d256abe0a0f03b0b73c8cc88d928537368ac65b6/regex-2026.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae3e764bd4c5ff55035dc82a8d49acceb42a5298edf6eb2fc4d328ee5dd7afae", size = 912304, upload-time = "2026-04-03T20:54:22.403Z" }, + { url = "https://files.pythonhosted.org/packages/26/47/2ee5c613ab546f0eddebf9905d23e07beb933416b1246c2d8791d01979b4/regex-2026.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffa81f81b80047ba89a3c69ae6a0f78d06f4a42ce5126b0eb2a0a10ad44e0b2e", size = 801126, upload-time = "2026-04-03T20:54:24.308Z" }, + { url = "https://files.pythonhosted.org/packages/75/cd/41dacd129ca9fd20bd7d02f83e0fad83e034ac8a084ec369c90f55ef37e2/regex-2026.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f56ebf9d70305307a707911b88469213630aba821e77de7d603f9d2f0730687d", size = 776772, upload-time = "2026-04-03T20:54:26.319Z" }, + { url = "https://files.pythonhosted.org/packages/89/6d/5af0b588174cb5f46041fa7dd64d3fd5cd2fe51f18766703d1edc387f324/regex-2026.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:773d1dfd652bbffb09336abf890bfd64785c7463716bf766d0eb3bc19c8b7f27", size = 785228, upload-time = "2026-04-03T20:54:28.387Z" }, + { url = "https://files.pythonhosted.org/packages/b7/3b/f5a72b7045bd59575fc33bf1345f156fcfd5a8484aea6ad84b12c5a82114/regex-2026.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d51d20befd5275d092cdffba57ded05f3c436317ee56466c8928ac32d960edaf", size = 860032, upload-time = "2026-04-03T20:54:30.641Z" }, + { url = "https://files.pythonhosted.org/packages/39/a4/72a317003d6fcd7a573584a85f59f525dfe8f67e355ca74eb6b53d66a5e2/regex-2026.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:0a51cdb3c1e9161154f976cb2bef9894bc063ac82f31b733087ffb8e880137d0", size = 765714, upload-time = "2026-04-03T20:54:32.789Z" }, + { url = "https://files.pythonhosted.org/packages/25/1e/5672e16f34dbbcb2560cc7e6a2fbb26dfa8b270711e730101da4423d3973/regex-2026.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae5266a82596114e41fb5302140e9630204c1b5f325c770bec654b95dd54b0aa", size = 852078, upload-time = "2026-04-03T20:54:34.546Z" }, + { url = "https://files.pythonhosted.org/packages/f7/0d/c813f0af7c6cc7ed7b9558bac2e5120b60ad0fa48f813e4d4bd55446f214/regex-2026.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c882cd92ec68585e9c1cf36c447ec846c0d94edd706fe59e0c198e65822fd23b", size = 789181, upload-time = "2026-04-03T20:54:36.642Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6d/a344608d1adbd2a95090ddd906cec09a11be0e6517e878d02a5123e0917f/regex-2026.4.4-cp313-cp313-win32.whl", hash = "sha256:05568c4fbf3cb4fa9e28e3af198c40d3237cf6041608a9022285fe567ec3ad62", size = 266690, upload-time = "2026-04-03T20:54:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/31/07/54049f89b46235ca6f45cd6c88668a7050e77d4a15555e47dd40fde75263/regex-2026.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:3384df51ed52db0bea967e21458ab0a414f67cdddfd94401688274e55147bb81", size = 277733, upload-time = "2026-04-03T20:54:40.11Z" }, + { url = "https://files.pythonhosted.org/packages/0e/21/61366a8e20f4d43fb597708cac7f0e2baadb491ecc9549b4980b2be27d16/regex-2026.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:acd38177bd2c8e69a411d6521760806042e244d0ef94e2dd03ecdaa8a3c99427", size = 270565, upload-time = "2026-04-03T20:54:41.883Z" }, + { url = "https://files.pythonhosted.org/packages/f1/1e/3a2b9672433bef02f5d39aa1143ca2c08f311c1d041c464a42be9ae648dc/regex-2026.4.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f94a11a9d05afcfcfa640e096319720a19cc0c9f7768e1a61fceee6a3afc6c7c", size = 494126, upload-time = "2026-04-03T20:54:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/4e/4b/c132a4f4fe18ad3340d89fcb56235132b69559136036b845be3c073142ed/regex-2026.4.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:36bcb9d6d1307ab629edc553775baada2aefa5c50ccc0215fbfd2afcfff43141", size = 293882, upload-time = "2026-04-03T20:54:45.41Z" }, + { url = "https://files.pythonhosted.org/packages/f4/5f/eaa38092ce7a023656280f2341dbbd4ad5f05d780a70abba7bb4f4bea54c/regex-2026.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:261c015b3e2ed0919157046d768774ecde57f03d8fa4ba78d29793447f70e717", size = 292334, upload-time = "2026-04-03T20:54:47.051Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f6/dd38146af1392dac33db7074ab331cec23cced3759167735c42c5460a243/regex-2026.4.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c228cf65b4a54583763645dcd73819b3b381ca8b4bb1b349dee1c135f4112c07", size = 811691, upload-time = "2026-04-03T20:54:49.074Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f0/dc54c2e69f5eeec50601054998ec3690d5344277e782bd717e49867c1d29/regex-2026.4.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dd2630faeb6876fb0c287f664d93ddce4d50cd46c6e88e60378c05c9047e08ca", size = 871227, upload-time = "2026-04-03T20:54:51.035Z" }, + { url = "https://files.pythonhosted.org/packages/a1/af/cb16bd5dc61621e27df919a4449bbb7e5a1034c34d307e0a706e9cc0f3e3/regex-2026.4.4-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6a50ab11b7779b849472337191f3a043e27e17f71555f98d0092fa6d73364520", size = 917435, upload-time = "2026-04-03T20:54:52.994Z" }, + { url = "https://files.pythonhosted.org/packages/5c/71/8b260897f22996b666edd9402861668f45a2ca259f665ac029e6104a2d7d/regex-2026.4.4-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0734f63afe785138549fbe822a8cfeaccd1bae814c5057cc0ed5b9f2de4fc883", size = 816358, upload-time = "2026-04-03T20:54:54.884Z" }, + { url = "https://files.pythonhosted.org/packages/1c/60/775f7f72a510ef238254906c2f3d737fc80b16ca85f07d20e318d2eea894/regex-2026.4.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4ee50606cb1967db7e523224e05f32089101945f859928e65657a2cbb3d278b", size = 785549, upload-time = "2026-04-03T20:54:57.01Z" }, + { url = "https://files.pythonhosted.org/packages/58/42/34d289b3627c03cf381e44da534a0021664188fa49ba41513da0b4ec6776/regex-2026.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6c1818f37be3ca02dcb76d63f2c7aaba4b0dc171b579796c6fbe00148dfec6b1", size = 801364, upload-time = "2026-04-03T20:54:58.981Z" }, + { url = "https://files.pythonhosted.org/packages/fc/20/f6ecf319b382a8f1ab529e898b222c3f30600fcede7834733c26279e7465/regex-2026.4.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f5bfc2741d150d0be3e4a0401a5c22b06e60acb9aa4daa46d9e79a6dcd0f135b", size = 866221, upload-time = "2026-04-03T20:55:00.88Z" }, + { url = "https://files.pythonhosted.org/packages/92/6a/9f16d3609d549bd96d7a0b2aee1625d7512ba6a03efc01652149ef88e74d/regex-2026.4.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:504ffa8a03609a087cad81277a629b6ce884b51a24bd388a7980ad61748618ff", size = 772530, upload-time = "2026-04-03T20:55:03.213Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f6/aa9768bc96a4c361ac96419fbaf2dcdc33970bb813df3ba9b09d5d7b6d96/regex-2026.4.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:70aadc6ff12e4b444586e57fc30771f86253f9f0045b29016b9605b4be5f7dfb", size = 856989, upload-time = "2026-04-03T20:55:05.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/b4/c671db3556be2473ae3e4bb7a297c518d281452871501221251ea4ecba57/regex-2026.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f4f83781191007b6ef43b03debc35435f10cad9b96e16d147efe84a1d48bdde4", size = 803241, upload-time = "2026-04-03T20:55:07.162Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5c/83e3b1d89fa4f6e5a1bc97b4abd4a9a97b3c1ac7854164f694f5f0ba98a0/regex-2026.4.4-cp313-cp313t-win32.whl", hash = "sha256:e014a797de43d1847df957c0a2a8e861d1c17547ee08467d1db2c370b7568baa", size = 269921, upload-time = "2026-04-03T20:55:09.62Z" }, + { url = "https://files.pythonhosted.org/packages/28/07/077c387121f42cdb4d92b1301133c0d93b5709d096d1669ab847dda9fe2e/regex-2026.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:b15b88b0d52b179712632832c1d6e58e5774f93717849a41096880442da41ab0", size = 281240, upload-time = "2026-04-03T20:55:11.521Z" }, + { url = "https://files.pythonhosted.org/packages/9d/22/ead4a4abc7c59a4d882662aa292ca02c8b617f30b6e163bc1728879e9353/regex-2026.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:586b89cdadf7d67bf86ae3342a4dcd2b8d70a832d90c18a0ae955105caf34dbe", size = 272440, upload-time = "2026-04-03T20:55:13.365Z" }, + { url = "https://files.pythonhosted.org/packages/f0/f5/ed97c2dc47b5fbd4b73c0d7d75f9ebc8eca139f2bbef476bba35f28c0a77/regex-2026.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2da82d643fa698e5e5210e54af90181603d5853cf469f5eedf9bfc8f59b4b8c7", size = 490343, upload-time = "2026-04-03T20:55:15.241Z" }, + { url = "https://files.pythonhosted.org/packages/80/e9/de4828a7385ec166d673a5790ad06ac48cdaa98bc0960108dd4b9cc1aef7/regex-2026.4.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:54a1189ad9d9357760557c91103d5e421f0a2dabe68a5cdf9103d0dcf4e00752", size = 291909, upload-time = "2026-04-03T20:55:17.558Z" }, + { url = "https://files.pythonhosted.org/packages/b4/d6/5cfbfc97f3201a4d24b596a77957e092030dcc4205894bc035cedcfce62f/regex-2026.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:76d67d5afb1fe402d10a6403bae668d000441e2ab115191a804287d53b772951", size = 289692, upload-time = "2026-04-03T20:55:20.561Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ac/f2212d9fd56fe897e36d0110ba30ba2d247bd6410c5bd98499c7e5a1e1f2/regex-2026.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e7cd3e4ee8d80447a83bbc9ab0c8459781fa77087f856c3e740d7763be0df27f", size = 796979, upload-time = "2026-04-03T20:55:22.56Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e3/a016c12675fbac988a60c7e1c16e67823ff0bc016beb27bd7a001dbdabc6/regex-2026.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e19e18c568d2866d8b6a6dfad823db86193503f90823a8f66689315ba28fbe8", size = 866744, upload-time = "2026-04-03T20:55:24.646Z" }, + { url = "https://files.pythonhosted.org/packages/af/a4/0b90ca4cf17adc3cb43de80ec71018c37c88ad64987e8d0d481a95ca60b5/regex-2026.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7698a6f38730fd1385d390d1ed07bb13dce39aa616aca6a6d89bea178464b9a4", size = 911613, upload-time = "2026-04-03T20:55:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3b/2b3dac0b82d41ab43aa87c6ecde63d71189d03fe8854b8ca455a315edac3/regex-2026.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:173a66f3651cdb761018078e2d9487f4cf971232c990035ec0eb1cdc6bf929a9", size = 800551, upload-time = "2026-04-03T20:55:29.532Z" }, + { url = "https://files.pythonhosted.org/packages/25/fe/5365eb7aa0e753c4b5957815c321519ecab033c279c60e1b1ae2367fa810/regex-2026.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa7922bbb2cc84fa062d37723f199d4c0cd200245ce269c05db82d904db66b83", size = 776911, upload-time = "2026-04-03T20:55:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b3/7fb0072156bba065e3b778a7bc7b0a6328212be5dd6a86fd207e0c4f2dab/regex-2026.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:59f67cd0a0acaf0e564c20bbd7f767286f23e91e2572c5703bf3e56ea7557edb", size = 785751, upload-time = "2026-04-03T20:55:33.797Z" }, + { url = "https://files.pythonhosted.org/packages/02/1a/9f83677eb699273e56e858f7bd95acdbee376d42f59e8bfca2fd80d79df3/regex-2026.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:475e50f3f73f73614f7cba5524d6de49dee269df00272a1b85e3d19f6d498465", size = 860484, upload-time = "2026-04-03T20:55:35.745Z" }, + { url = "https://files.pythonhosted.org/packages/3b/7a/93937507b61cfcff8b4c5857f1b452852b09f741daa9acae15c971d8554e/regex-2026.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:a1c0c7d67b64d85ac2e1879923bad2f08a08f3004055f2f406ef73c850114bd4", size = 765939, upload-time = "2026-04-03T20:55:37.972Z" }, + { url = "https://files.pythonhosted.org/packages/86/ea/81a7f968a351c6552b1670ead861e2a385be730ee28402233020c67f9e0f/regex-2026.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:1371c2ccbb744d66ee63631cc9ca12aa233d5749972626b68fe1a649dd98e566", size = 851417, upload-time = "2026-04-03T20:55:39.92Z" }, + { url = "https://files.pythonhosted.org/packages/4c/7e/323c18ce4b5b8f44517a36342961a0306e931e499febbd876bb149d900f0/regex-2026.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:59968142787042db793348a3f5b918cf24ced1f23247328530e063f89c128a95", size = 789056, upload-time = "2026-04-03T20:55:42.303Z" }, + { url = "https://files.pythonhosted.org/packages/c0/af/e7510f9b11b1913b0cd44eddb784b2d650b2af6515bfce4cffcc5bfd1d38/regex-2026.4.4-cp314-cp314-win32.whl", hash = "sha256:59efe72d37fd5a91e373e5146f187f921f365f4abc1249a5ab446a60f30dd5f8", size = 272130, upload-time = "2026-04-03T20:55:44.995Z" }, + { url = "https://files.pythonhosted.org/packages/9a/51/57dae534c915e2d3a21490e88836fa2ae79dde3b66255ecc0c0a155d2c10/regex-2026.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:e0aab3ff447845049d676827d2ff714aab4f73f340e155b7de7458cf53baa5a4", size = 280992, upload-time = "2026-04-03T20:55:47.316Z" }, + { url = "https://files.pythonhosted.org/packages/0a/5e/abaf9f4c3792e34edb1434f06717fae2b07888d85cb5cec29f9204931bf8/regex-2026.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:a7a5bb6aa0cf62208bb4fa079b0c756734f8ad0e333b425732e8609bd51ee22f", size = 273563, upload-time = "2026-04-03T20:55:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/ff/06/35da85f9f217b9538b99cbb170738993bcc3b23784322decb77619f11502/regex-2026.4.4-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:97850d0638391bdc7d35dc1c1039974dcb921eaafa8cc935ae4d7f272b1d60b3", size = 494191, upload-time = "2026-04-03T20:55:51.258Z" }, + { url = "https://files.pythonhosted.org/packages/54/5b/1bc35f479eef8285c4baf88d8c002023efdeebb7b44a8735b36195486ae7/regex-2026.4.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ee7337f88f2a580679f7bbfe69dc86c043954f9f9c541012f49abc554a962f2e", size = 293877, upload-time = "2026-04-03T20:55:53.214Z" }, + { url = "https://files.pythonhosted.org/packages/39/5b/f53b9ad17480b3ddd14c90da04bfb55ac6894b129e5dea87bcaf7d00e336/regex-2026.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7429f4e6192c11d659900c0648ba8776243bf396ab95558b8c51a345afeddde6", size = 292410, upload-time = "2026-04-03T20:55:55.736Z" }, + { url = "https://files.pythonhosted.org/packages/bb/56/52377f59f60a7c51aa4161eecf0b6032c20b461805aca051250da435ffc9/regex-2026.4.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4f10fbd5dd13dcf4265b4cc07d69ca70280742870c97ae10093e3d66000359", size = 811831, upload-time = "2026-04-03T20:55:57.802Z" }, + { url = "https://files.pythonhosted.org/packages/dd/63/8026310bf066f702a9c361f83a8c9658f3fe4edb349f9c1e5d5273b7c40c/regex-2026.4.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a152560af4f9742b96f3827090f866eeec5becd4765c8e0d3473d9d280e76a5a", size = 871199, upload-time = "2026-04-03T20:56:00.333Z" }, + { url = "https://files.pythonhosted.org/packages/20/9f/a514bbb00a466dbb506d43f187a04047f7be1505f10a9a15615ead5080ee/regex-2026.4.4-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54170b3e95339f415d54651f97df3bff7434a663912f9358237941bbf9143f55", size = 917649, upload-time = "2026-04-03T20:56:02.445Z" }, + { url = "https://files.pythonhosted.org/packages/cb/6b/8399f68dd41a2030218839b9b18360d79b86d22b9fab5ef477c7f23ca67c/regex-2026.4.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:07f190d65f5a72dcb9cf7106bfc3d21e7a49dd2879eda2207b683f32165e4d99", size = 816388, upload-time = "2026-04-03T20:56:04.595Z" }, + { url = "https://files.pythonhosted.org/packages/1e/9c/103963f47c24339a483b05edd568594c2be486188f688c0170fd504b2948/regex-2026.4.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9a2741ce5a29d3c84b0b94261ba630ab459a1b847a0d6beca7d62d188175c790", size = 785746, upload-time = "2026-04-03T20:56:07.13Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ee/7f6054c0dec0cee3463c304405e4ff42e27cff05bf36fcb34be549ab17bd/regex-2026.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b26c30df3a28fd9793113dac7385a4deb7294a06c0f760dd2b008bd49a9139bc", size = 801483, upload-time = "2026-04-03T20:56:09.365Z" }, + { url = "https://files.pythonhosted.org/packages/30/c2/51d3d941cf6070dc00c3338ecf138615fc3cce0421c3df6abe97a08af61a/regex-2026.4.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:421439d1bee44b19f4583ccf42670ca464ffb90e9fdc38d37f39d1ddd1e44f1f", size = 866331, upload-time = "2026-04-03T20:56:12.039Z" }, + { url = "https://files.pythonhosted.org/packages/16/e8/76d50dcc122ac33927d939f350eebcfe3dbcbda96913e03433fc36de5e63/regex-2026.4.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:b40379b53ecbc747fd9bdf4a0ea14eb8188ca1bd0f54f78893a39024b28f4863", size = 772673, upload-time = "2026-04-03T20:56:14.558Z" }, + { url = "https://files.pythonhosted.org/packages/a5/6e/5f6bf75e20ea6873d05ba4ec78378c375cbe08cdec571c83fbb01606e563/regex-2026.4.4-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:08c55c13d2eef54f73eeadc33146fb0baaa49e7335eb1aff6ae1324bf0ddbe4a", size = 857146, upload-time = "2026-04-03T20:56:16.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/33/3c76d9962949e487ebba353a18e89399f292287204ac8f2f4cfc3a51c233/regex-2026.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9776b85f510062f5a75ef112afe5f494ef1635607bf1cc220c1391e9ac2f5e81", size = 803463, upload-time = "2026-04-03T20:56:18.923Z" }, + { url = "https://files.pythonhosted.org/packages/19/eb/ef32dcd2cb69b69bc0c3e55205bce94a7def48d495358946bc42186dcccc/regex-2026.4.4-cp314-cp314t-win32.whl", hash = "sha256:385edaebde5db5be103577afc8699fea73a0e36a734ba24870be7ffa61119d74", size = 275709, upload-time = "2026-04-03T20:56:20.996Z" }, + { url = "https://files.pythonhosted.org/packages/a0/86/c291bf740945acbf35ed7dbebf8e2eea2f3f78041f6bd7cdab80cb274dc0/regex-2026.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:5d354b18839328927832e2fa5f7c95b7a3ccc39e7a681529e1685898e6436d45", size = 285622, upload-time = "2026-04-03T20:56:23.641Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e7/ec846d560ae6a597115153c02ca6138a7877a1748b2072d9521c10a93e58/regex-2026.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:af0384cb01a33600c49505c27c6c57ab0b27bf84a74e28524c92ca897ebdac9d", size = 275773, upload-time = "2026-04-03T20:56:26.07Z" }, +] + +[[package]] +name = "requests" +version = "2.33.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/a4/98b9c7c6428a668bf7e42ebb7c79d576a1c3c1e3ae2d47e674b468388871/requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517", size = 134120, upload-time = "2026-03-30T16:09:15.531Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947, upload-time = "2026-03-30T16:09:13.83Z" }, +] + +[[package]] +name = "rich" +version = "15.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, +] + +[[package]] +name = "ruff" +version = "0.15.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/99/43/3291f1cc9106f4c63bdce7a8d0df5047fe8422a75b091c16b5e9355e0b11/ruff-0.15.12.tar.gz", hash = "sha256:ecea26adb26b4232c0c2ca19ccbc0083a68344180bba2a600605538ce51a40a6", size = 4643852, upload-time = "2026-04-24T18:17:14.305Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/6e/e78ffb61d4686f3d96ba3df2c801161843746dcbcbb17a1e927d4829312b/ruff-0.15.12-py3-none-linux_armv6l.whl", hash = "sha256:f86f176e188e94d6bdbc09f09bfd9dc729059ad93d0e7390b5a73efe19f8861c", size = 10640713, upload-time = "2026-04-24T18:17:22.841Z" }, + { url = "https://files.pythonhosted.org/packages/ae/08/a317bc231fb9e7b93e4ef3089501e51922ff88d6936ce5cf870c4fe55419/ruff-0.15.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e3bcd123364c3770b8e1b7baaf343cc99a35f197c5c6e8af79015c666c423a6c", size = 11069267, upload-time = "2026-04-24T18:17:30.105Z" }, + { url = "https://files.pythonhosted.org/packages/aa/a4/f828e9718d3dce1f5f11c39c4f65afd32783c8b2aebb2e3d259e492c47bd/ruff-0.15.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fe87510d000220aa1ed530d4448a7c696a0cae1213e5ec30e5874287b66557b5", size = 10397182, upload-time = "2026-04-24T18:17:07.177Z" }, + { url = "https://files.pythonhosted.org/packages/71/e0/3310fc6d1b5e1fdea22bf3b1b807c7e187b581021b0d7d4514cccdb5fb71/ruff-0.15.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84a1630093121375a3e2a95b4a6dc7b59e2b4ee76216e32d81aae550a832d002", size = 10758012, upload-time = "2026-04-24T18:16:55.759Z" }, + { url = "https://files.pythonhosted.org/packages/11/c1/a606911aee04c324ddaa883ae418f3569792fd3c4a10c50e0dd0a2311e1e/ruff-0.15.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb129f40f114f089ebe0ca56c0d251cf2061b17651d464bb6478dc01e69f11f5", size = 10447479, upload-time = "2026-04-24T18:16:51.677Z" }, + { url = "https://files.pythonhosted.org/packages/9d/68/4201e8444f0894f21ab4aeeaee68aa4f10b51613514a20d80bd628d57e88/ruff-0.15.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0c862b172d695db7598426b8af465e7e9ac00a3ea2a3630ee67eb82e366aaa6", size = 11234040, upload-time = "2026-04-24T18:17:16.529Z" }, + { url = "https://files.pythonhosted.org/packages/34/ff/8a6d6cf4ccc23fd67060874e832c18919d1557a0611ebef03fdb01fff11e/ruff-0.15.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2849ea9f3484c3aca43a82f484210370319e7170df4dfe4843395ddf6c57bc33", size = 12087377, upload-time = "2026-04-24T18:17:04.944Z" }, + { url = "https://files.pythonhosted.org/packages/85/f6/c669cf73f5152f623d34e69866a46d5e6185816b19fcd5b6dd8a2d299922/ruff-0.15.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e77c7e51c07fe396826d5969a5b846d9cd4c402535835fb6e21ce8b28fef847", size = 11367784, upload-time = "2026-04-24T18:17:25.409Z" }, + { url = "https://files.pythonhosted.org/packages/e8/39/c61d193b8a1daaa8977f7dea9e8d8ba866e02ea7b65d32f6861693aa4c12/ruff-0.15.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83b2f4f2f3b1026b5fb449b467d9264bf22067b600f7b6f41fc5958909f449d0", size = 11344088, upload-time = "2026-04-24T18:17:12.258Z" }, + { url = "https://files.pythonhosted.org/packages/c2/8d/49afab3645e31e12c590acb6d3b5b69d7aab5b81926dbaf7461f9441f37a/ruff-0.15.12-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9ba3b8f1afd7e2e43d8943e55f249e13f9682fde09711644a6e7290eb4f3e339", size = 11271770, upload-time = "2026-04-24T18:17:02.457Z" }, + { url = "https://files.pythonhosted.org/packages/46/06/33f41fe94403e2b755481cdfb9b7ef3e4e0ed031c4581124658d935d52b4/ruff-0.15.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e852ba9fdc890655e1d78f2df1499efbe0e54126bd405362154a75e2bde159c5", size = 10719355, upload-time = "2026-04-24T18:17:27.648Z" }, + { url = "https://files.pythonhosted.org/packages/0d/59/18aa4e014debbf559670e4048e39260a85c7fcee84acfd761ac01e7b8d35/ruff-0.15.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dd8aed930da53780d22fc70bdf84452c843cf64f8cb4eb38984319c24c5cd5fd", size = 10462758, upload-time = "2026-04-24T18:17:32.347Z" }, + { url = "https://files.pythonhosted.org/packages/25/e7/cc9f16fd0f3b5fddcbd7ec3d6ae30c8f3fde1047f32a4093a98d633c6570/ruff-0.15.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:01da3988d225628b709493d7dc67c3b9b12c0210016b08690ef9bd27970b262b", size = 10953498, upload-time = "2026-04-24T18:17:20.674Z" }, + { url = "https://files.pythonhosted.org/packages/72/7a/a9ba7f98c7a575978698f4230c5e8cc54bbc761af34f560818f933dafa0c/ruff-0.15.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:9cae0f92bd5700d1213188b31cd3bdd2b315361296d10b96b8e2337d3d11f53e", size = 11447765, upload-time = "2026-04-24T18:17:09.755Z" }, + { url = "https://files.pythonhosted.org/packages/ea/f9/0ae446942c846b8266059ad8a30702a35afae55f5cdc54c5adf8d7afdc27/ruff-0.15.12-py3-none-win32.whl", hash = "sha256:d0185894e038d7043ba8fd6aee7499ece6462dc0ea9f1e260c7451807c714c20", size = 10657277, upload-time = "2026-04-24T18:17:18.591Z" }, + { url = "https://files.pythonhosted.org/packages/33/f1/9614e03e1cdcbf9437570b5400ced8a720b5db22b28d8e0f1bda429f660d/ruff-0.15.12-py3-none-win_amd64.whl", hash = "sha256:c87a162d61ab3adca47c03f7f717c68672edec7d1b5499e652331780fe74950d", size = 11837758, upload-time = "2026-04-24T18:17:00.113Z" }, + { url = "https://files.pythonhosted.org/packages/c0/98/6beb4b351e472e5f4c4613f7c35a5290b8be2497e183825310c4c3a3984b/ruff-0.15.12-py3-none-win_arm64.whl", hash = "sha256:a538f7a82d061cee7be55542aca1d86d1393d55d81d4fcc314370f4340930d4f", size = 11120821, upload-time = "2026-04-24T18:16:57.979Z" }, +] + +[[package]] +name = "safetensors" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, + { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, + { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, + { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, + { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, + { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, + { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, + { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "threadpoolctl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload-time = "2025-12-10T07:08:03.251Z" }, + { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" }, + { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload-time = "2025-12-10T07:08:12.028Z" }, + { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload-time = "2025-12-10T07:08:13.688Z" }, + { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" }, + { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" }, + { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload-time = "2025-12-10T07:08:23.943Z" }, + { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload-time = "2025-12-10T07:08:25.71Z" }, + { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" }, + { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" }, + { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" }, + { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518, upload-time = "2025-12-10T07:08:36.339Z" }, + { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546, upload-time = "2025-12-10T07:08:38.128Z" }, + { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" }, + { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" }, + { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" }, + { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395, upload-time = "2025-12-10T07:08:49.337Z" }, + { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload-time = "2025-12-10T07:08:51.601Z" }, +] + +[[package]] +name = "scipy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, +] + +[[package]] +name = "sentence-transformers" +version = "5.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "scikit-learn" }, + { name = "scipy" }, + { name = "torch", version = "2.11.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.11.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, + { name = "tqdm" }, + { name = "transformers" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/68/7f98c221940ce783b492ad6140384daf2e2918cd7175009d6a362c22b9ee/sentence_transformers-5.4.1.tar.gz", hash = "sha256:436bcb1182a0ff42a8fb2b1c43498a70d0a75b688d182f2cd0d1dd115af61ddc", size = 428910, upload-time = "2026-04-14T13:34:59.006Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/d9/3a9b6f2ccdedc9dc00fe37b2fc58f58f8efbff44565cf4bf39d8568bb13a/sentence_transformers-5.4.1-py3-none-any.whl", hash = "sha256:a6d640fc363849b63affb8e140e9d328feabab86f83d58ac3e16b1c28140b790", size = 571311, upload-time = "2026-04-14T13:34:57.731Z" }, +] + +[[package]] +name = "setuptools" +version = "81.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/1c/73e719955c59b8e424d015ab450f51c0af856ae46ea2da83eba51cc88de1/setuptools-81.0.0.tar.gz", hash = "sha256:487b53915f52501f0a79ccfd0c02c165ffe06631443a886740b91af4b7a5845a", size = 1198299, upload-time = "2026-02-06T21:10:39.601Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "sqlglot" +version = "30.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3c/66/6ece15f197874e56c76e1d0269cebf284ba992a80dfadca9d1972fdf7edf/sqlglot-30.6.0.tar.gz", hash = "sha256:246d34d39927422a50a3fa155f37b2f6346fba85f1a755b13c941eb32ef93361", size = 5835307, upload-time = "2026-04-20T20:11:08.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/e7/64fe971cbca33a0446b06f4a5ff8e3fa4a1dbd0a039ceabcc3e6cf4087a9/sqlglot-30.6.0-py3-none-any.whl", hash = "sha256:e005fc2f47994f90d7d8df341f1cbe937518497b0b7b1507d4c03c4c9dfd2778", size = 673920, upload-time = "2026-04-20T20:11:05.758Z" }, +] + +[[package]] +name = "starlette" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, +] + +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, +] + +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + +[[package]] +name = "tokenizers" +version = "0.22.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, +] + +[[package]] +name = "torch" +version = "2.11.0" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'darwin'", + "python_full_version < '3.14' and sys_platform == 'darwin'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform == 'darwin'" }, + { name = "fsspec", marker = "sys_platform == 'darwin'" }, + { name = "jinja2", marker = "sys_platform == 'darwin'" }, + { name = "networkx", marker = "sys_platform == 'darwin'" }, + { name = "setuptools", marker = "sys_platform == 'darwin'" }, + { name = "sympy", marker = "sys_platform == 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, +] +wheels = [ + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442ec9dc78592564fdad69cf0beaa9da2f82ab810ccb4f13903869a90bf3f15d", upload-time = "2026-03-23T15:17:02Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cc3a195701bba2239c313ee311487f80f8aaebe9e89b9073dddbcf2f93b5a0ba", upload-time = "2026-03-23T15:17:06Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:072a0d6e4865e8b0dc0dbfe6ebed68fae235124222835ef03e5814d414d8c012", upload-time = "2026-03-23T15:17:10Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:23ec7789017da9d95b6d543d790814785e6f30905c5443efa8257d1490d73f79", upload-time = "2026-03-23T15:17:14Z" }, +] + +[[package]] +name = "torch" +version = "2.11.0+cpu" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform != 'darwin'" }, + { name = "fsspec", marker = "sys_platform != 'darwin'" }, + { name = "jinja2", marker = "sys_platform != 'darwin'" }, + { name = "networkx", marker = "sys_platform != 'darwin'" }, + { name = "setuptools", marker = "sys_platform != 'darwin'" }, + { name = "sympy", marker = "sys_platform != 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, +] +wheels = [ + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:d1eff25ccc454faf21c9666c81bfab8e405e87c12d300708d4559620bc191a36", upload-time = "2026-04-28T00:06:42Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:48b3e21a311445acdd0b27f13830e21d93adef70d4721e051e9f059baeb9b8f9", upload-time = "2026-04-28T00:06:51Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:45025d7752dbc6b4c784c03afaee9c5f19730ce084b2e43fc9a2fe1677d9ff86", upload-time = "2026-04-28T00:07:02Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:ed70d4a4fc9f8b826c02fa1a9800a83820fb2fa6ae607680b53390f9ef394d85", upload-time = "2026-04-28T00:07:12Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-linux_s390x.whl", hash = "sha256:65d427a196ab0abe359b93c5bffedd76ded02df2b1b1d2d9f11a2609b69f426a", upload-time = "2026-04-28T00:07:19Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:8f13dc7075ae04ca5f876a9f40b4e47522a04c23e30824b4409f42a3f3e57aa4", upload-time = "2026-04-28T00:07:27Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8713bb8679376ea0ec25742100b6cfb8447e0904c48bddefb9eb0ac1abbfa60a", upload-time = "2026-04-28T00:07:37Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:62ec1f1694c185f601eab74eb7fc0e8e10c64c06ae82f13c3592774c231c4877", upload-time = "2026-04-28T00:07:47Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-linux_s390x.whl", hash = "sha256:c9a14c367f470623b978e273a4e1915995b4ba7a0ae999178b06c273eea3536f", upload-time = "2026-04-28T00:07:54Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:71676f6a9a84bbd385e010198b51fa1c2324fb8f3c512a32d2c81af65f68f4c9", upload-time = "2026-04-28T00:08:02Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:f8481ea9088e4e5b81178a75aabdbb658bde8639bc1a15fd5d8f930abc966735", upload-time = "2026-04-28T00:08:11Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:7575af4c9f7f7500ed62b1dafeb069aa0ba35b368a5f09793b3976b3d50f4fe4", upload-time = "2026-04-28T00:08:20Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-linux_s390x.whl", hash = "sha256:825f1596878280a3a4c861441674888bc2d792e4ab7b045cb35feeab3f4f5dd7", upload-time = "2026-04-28T00:08:27Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c8a0bdfb2fd915b6c2cd27c856f63f729c366a4917772eba6b2b02aa3bce70d5", upload-time = "2026-04-28T00:08:36Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:768f22924a25cad2adeb9c6cbac5159e71067c8d4019b1511960d7435a5ca652", upload-time = "2026-04-28T00:08:47Z" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.11.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:6db45e7b2526d996fbf47c3d08737807a60a4e17996a6d91a97027fe260832c8", upload-time = "2026-04-28T00:08:57Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + +[[package]] +name = "transformers" +version = "5.6.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/e9/c6c80a07690142a7d05444271f47b9f3c8aac7dea01d52e1137ee480ad78/transformers-5.6.2.tar.gz", hash = "sha256:e657134c3e5a6bc00a3c35f4e2674bb51adfcd89898495b788a18552bac2b91a", size = 8311867, upload-time = "2026-04-23T18:33:29.332Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/95/0b0218149b0d6f14df35f5b8f676fa83df4f19ed253c3cc447107ef86eca/transformers-5.6.2-py3-none-any.whl", hash = "sha256:f8d3a1bb96778fed9b8aabfd0dd6e19843e4b0f2bb6b59f32b8a92051b0f348f", size = 10364898, upload-time = "2026-04-23T18:33:26.081Z" }, +] + +[[package]] +name = "typer" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7b/27/ede8cec7596e0041ba7e7b80b47d132562f56ff454313a16f6084e555c9f/typer-0.25.0.tar.gz", hash = "sha256:123eaf9f19bb40fd268310e12a542c0c6b4fab9c98d9d23342a01ff95e3ce930", size = 120150, upload-time = "2026-04-26T08:46:14.767Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/72/193d4e586ec5a4db834a36bbeb47641a62f951f114ffd0fe5b1b46e8d56f/typer-0.25.0-py3-none-any.whl", hash = "sha256:ac01b48823d3db9a83c9e164338057eadbb1c9957a2a6b4eeb486669c560b5dc", size = 55993, upload-time = "2026-04-26T08:46:15.889Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "tzdata" +version = "2026.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/19/1b9b0e29f30c6d35cb345486df41110984ea67ae69dddbc0e8a100999493/tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10", size = 198254, upload-time = "2026-04-24T15:22:08.651Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/e4/dccd7f47c4b64213ac01ef921a1337ee6e30e8c6466046018326977efd95/tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7", size = 349321, upload-time = "2026-04-24T15:22:05.876Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.46.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/93/041fca8274050e40e6791f267d82e0e2e27dd165627bd640d3e0e378d877/uvicorn-0.46.0.tar.gz", hash = "sha256:fb9da0926999cc6cb22dc7cd71a94a632f078e6ae47ff683c5c420750fb7413d", size = 88758, upload-time = "2026-04-23T07:16:00.151Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/a3/5b1562db76a5a488274b2332a97199b32d0442aca0ed193697fd47786316/uvicorn-0.46.0-py3-none-any.whl", hash = "sha256:bbebbcbed972d162afca128605223022bedd345b7bc7855ce66deb31487a9048", size = 70926, upload-time = "2026-04-23T07:15:58.355Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, + { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, + { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, + { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, + { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, + { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, + { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, + { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, +] + +[[package]] +name = "virtualenv" +version = "21.2.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, + { name = "python-discovery" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0c/98/3a7e644e19cb26133488caff231be390579860bbbb3da35913c49a1d0a46/virtualenv-21.2.4.tar.gz", hash = "sha256:b294ef68192638004d72524ce7ef303e9d0cf5a44c95ce2e54a7500a6381cada", size = 5850742, upload-time = "2026-04-14T22:15:31.438Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/8d/edd0bd910ff803c308ee9a6b7778621af0d10252219ad9f19ef4d4982a61/virtualenv-21.2.4-py3-none-any.whl", hash = "sha256:29d21e941795206138d0f22f4e45ff7050e5da6c6472299fb7103318763861ac", size = 5831232, upload-time = "2026-04-14T22:15:29.342Z" }, +] + +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, +] + +[[package]] +name = "websockets" +version = "16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" }, + { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" }, + { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" }, + { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" }, + { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" }, + { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" }, + { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" }, + { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" }, + { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" }, + { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" }, + { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" }, + { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" }, + { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" }, + { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" }, + { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" }, + { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" }, + { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" }, + { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, +]